from picalo import *                           # import the Picalo libraries
import sys, re, random, os, os.path, urllib    # import commonly-used Python libraries

DETECTLET_STANDARD = 1.0
DETECTLET_VERSION = 1.0

#1. grab the payroll table
#2. seperate it out by employeeId
#3. sort it by date
#4. go from one total pay to the next, looking for 
#    a percentage change in the positive over a cap set by the user


wizard = '''
<wizard>
    <page>
        Select the TABLE containing payroll information:
        
        Specifically the TABLE will need a column that contains the
        total pay for a payment to an employee for wages.  This needed
        column could be named TotalPay or CheckAmount or Amount.  This
        is the column that will be looked into for certain levels of
        increase that might indicate an employee is being paid too much
        and might be a flag for fraud.
        
        <parameter type="Table" variable="payrollTable" />
    </page>
    <page>
        Select the COLUMN representing the payment for one pay period
        to an employee:  
        
        As explained before, this might be called TotalPayment or
        CheckAmount.  
        
        <parameter type="Column" table="payrollTable" variable="paymentColumn" />
    </page>
    <page>
        Select the COLUMN identifies the employee:  
        
        Usually this 
        will be an ID number.  But it could also be a name.  The Detectlet will
        just need to know who it can identify each entry to a certain employee, 
        so if it is a name the name has to be exactly the same for multiple entries.        
        This means that if "Joe Worker" has more than one payroll entry in the table,
        and the column that identfies the employee is his/her name, "Joe Worker"
        (spelled exactly like that) has to be in each entry that pertains to Joe.
        
        <parameter type="Column" table="payrollTable" variable="idColumn" />
    </page>
    <page>
        Select the COLUMN containing dates so that the Detectlet can sort
        the entries by date.  This will facilitate finding jumps in payment to
        employees.
        
        <parameter type="Column" table="payrollTable" variable="dateColumn" />
    </page>
    <page>
        Select the minimum allowable percentage jump in pay that an employee
        can have:  
        
        Meaning - if my company has a policy that an employee can only
        have a maximum of a 20% raise, I would enter 20% here and the results
        would give me all the entries of employees who had a jump in percentage
        increase of pay GREATER THAN 20%.
        
        <parameter type="int" min="1" max="1000" default="20" variable="minPercentage" />
    </page>
</wizard>'''


RESULTS_TEXT = '''\
The table produced represents all payroll entries that were a jump in
total payment to the employee GREATER THAN the tolerance you entered.

The rows that are in this new table represent the payment AFTER the increase.
You can go back into the original table to look at the payment before the jump
to do a comparison if you desire, but the most suspicion will come from
the actual payment when the increase occured.

This new table has a new column named "PercentageJump". This shows how much
the row is increased from the row previous.
'''

def run(payrollTable, paymentColumn, idColumn, dateColumn, minPercentage):        
    '''Payroll frauds often happen in small amounts.  An employee's pay
       can be changed after an approved amount has been set.  Or, the
       hourly rate of pay can be changed to inflate the total payment
       to an employee.  
       
       This detectlet will help investigators find unusually large jumps
       in total payment (NOT the pay rate per hour or per week).  This is 
       done because a change in the total payment is more pertainent to
       a business than a change in pay rate, because the total payment 
       is what is actually paid to an employee, and the pay rate could
       remain unaltered but the total payment could be changed.  Thus 
       deceiving an investigator who only looks at the pay rate information
       
       The example data below will open an example payroll table that 
       you can use to familiarize yourself with how this Detectlet
       works.  Each row represents a payment to an employee.  The total
       payment column is the amount paid to the employee for the pay
       period represented in the begin date through the end date.
       The employeeId identifies each row, connecting that row (or payment)
       to a certain employee.  
    
    '''
    redFlagRows = []    
    
    #stratify the payroll table by the idColumn
    stratList = Grouping.stratify_by_value(payrollTable, idColumn)
        
    #loop over each table (which is each employee's group of payments), sorting each one by date
    for employeeTable in stratList:
        Simple.sort(employeeTable, True, dateColumn)
        
        #grab a reacord in the employee table, store the totat payment
        # then calculate the percantage jump that happens to the next total payment        
        for currRowIdx in range(len(employeeTable)):
            info = []
            
            if(currRowIdx+1 < len(employeeTable)):
                firstPayment = employeeTable[currRowIdx][paymentColumn]
                secondPayment = employeeTable[currRowIdx+1][paymentColumn]
 
                if firstPayment == 0:
                    percentageIncrease = 0
                else:
                    percentageIncrease = (((secondPayment - firstPayment) / firstPayment) * 100)
                    print "% delta: " + str(percentageIncrease) + " for " + str(firstPayment) + " to " + str(secondPayment)
                    if(percentageIncrease > minPercentage):
                        info.append(employeeTable[currRowIdx+1])
                        info.append(percentageIncrease)
                        redFlagRows.append(info)                    
        
    #build a table of the flagged rows and return it
    #create a new table out of the valid rows
    namesAndType = []
    for col in payrollTable.get_columns():
        namesAndType.append(col.name)
        namesAndType.append(col.get_type())       
    
    res = Table()
    res.delete_column(0)
    res.delete_column(0)
    res.delete_column(0)
    
    for idx in range(0, len(namesAndType), 2):
        name = namesAndType[idx]
        type = namesAndType[idx+1]
        if(type == None):
            type = unicode
            
        res.append_column(name, type)
    
    #add the percentage column
    res.append_column("PercentageJump", int)
    
    for row in range(0, len(redFlagRows)):    
        res.append()
        for idx in range(0, len(redFlagRows[row][0])):
            res[row][idx] = redFlagRows[row][0][idx]
        res[row][len(res.get_column_names())-1] = redFlagRows[row][1]
    
    #sort descending by PercentageJump column
    Simple.sort(res, False, "PercentageJump")    
    
    return res, RESULTS_TEXT
    
def example_input():
    import StringIO
    table = load_csv(StringIO.StringIO(csvdata))
    table.set_type('PayrollId', int)
    table.set_type('EmployeeId', int)
    table.set_type('TotalHours', int) 
    table.set_type('BeginDate', Date) 
    table.set_type('EndDate', Date) 
    table.set_type('TotalPay', int)    
    
    return table
    
      
csvdata = '''\
PayrollId,EmployeeId,TotalHours,BeginDate,EndDate,TotalPay
0,236,15,2003-09-01,2003-09-15,1000
1,259,15,2003-09-15,2003-09-29,1100
2,252,40,2003-09-29,2003-10-13,1200
3,242,55,2003-10-13,2003-10-27,1300
4,311,40,2003-10-27,2003-11-10,1400
5,291,70,2003-11-10,2003-11-24,1500
6,310,45,2003-11-24,2003-12-08,1600
7,317,10,2003-12-08,2003-12-22,1700
8,298,35,2003-12-22,2004-01-05,1800
9,269,20,2004-01-05,2004-01-19,1900
10,242,12,2004-01-19,2004-02-02,2000
11,284,35,2004-02-02,2004-02-16,2100
12,305,65,2004-02-16,2004-03-01,2200
13,316,12,2004-03-01,2004-03-15,2300
14,244,40,2004-03-15,2004-03-29,2400
15,291,20,2004-03-29,2004-04-12,2500
16,304,70,2004-04-12,2004-04-26,2600
17,239,15,2004-04-26,2004-05-10,2700
18,283,55,2004-05-10,2004-05-24,2800
19,257,60,2004-05-24,2004-06-07,2900
20,283,10,2004-06-07,2004-06-21,3000
21,308,55,2004-06-21,2004-07-05,3100
22,305,25,2004-07-05,2004-07-19,3200
23,303,45,2004-07-19,2004-08-02,3300
24,313,25,2004-08-02,2004-08-16,3400
25,258,70,2004-08-16,2004-08-30,3500
26,287,60,2004-08-30,2004-09-13,3600
27,298,45,2004-09-13,2004-09-27,3700
28,295,20,2004-09-27,2004-10-11,3800
29,283,25,2004-10-11,2004-10-25,3900
30,299,10,2004-10-25,2004-11-08,4000
31,272,25,2004-11-08,2004-11-22,4100
32,234,65,2004-11-22,2004-12-06,4200
33,269,60,2004-12-06,2004-12-20,4300
34,307,25,2004-12-20,2005-01-03,4400
35,252,60,2005-01-03,2005-01-17,4500
36,294,35,2005-01-17,2005-01-31,4600
37,286,15,2005-01-31,2005-02-14,4700
38,317,25,2005-02-14,2005-02-28,4800
39,285,15,2005-02-28,2005-03-14,4900
40,298,45,2005-03-14,2005-03-28,5000
41,298,70,2005-03-28,2005-04-11,5100
42,303,25,2005-04-11,2005-04-25,5200
43,277,65,2005-04-25,2005-05-09,5300
44,285,35,2005-05-09,2005-05-23,5400
45,239,70,2005-05-23,2005-06-06,5500
46,309,15,2005-06-06,2005-06-20,5600
47,323,25,2005-06-20,2005-07-04,5700
48,314,20,2005-07-04,2005-07-18,5800
49,295,35,2005-07-18,2005-08-01,5900
50,319,10,2005-08-01,2005-08-15,6000
51,304,25,2005-08-15,2005-08-29,6100
52,246,30,2005-08-29,2005-09-12,6200
53,316,12,2005-09-12,2005-09-26,6300
54,286,15,2005-09-26,2005-10-10,6400
55,242,40,2005-10-10,2005-10-24,6500
56,268,70,2005-10-24,2005-11-07,6600
57,275,45,2005-11-07,2005-11-21,6700
58,302,15,2005-11-21,2005-12-05,6800
59,282,15,2005-12-05,2005-12-19,6900
60,320,25,2005-12-19,2006-01-02,7000
61,269,12,2006-01-02,2006-01-16,7100
62,289,30,2006-01-16,2006-01-30,7200
63,300,35,2006-01-30,2006-02-13,7300
64,277,60,2006-02-13,2006-02-27,7400
65,265,50,2006-02-27,2006-03-13,7500
66,258,50,2006-03-13,2006-03-27,7600
67,300,12,2006-03-27,2006-04-10,7700
68,268,25,2006-04-10,2006-04-24,7800
69,235,20,2006-04-24,2006-05-08,7900
70,276,45,2006-05-08,2006-05-22,8000
71,250,10,2006-05-22,2006-06-05,8100
72,316,65,2006-06-05,2006-06-19,8200
73,315,20,2006-06-19,2006-07-03,8300
74,279,40,2006-07-03,2006-07-17,8400
75,260,45,2006-07-17,2006-07-31,8500
76,320,30,2006-07-31,2006-08-14,8600
77,294,15,2006-08-14,2006-08-28,8700
78,296,50,2006-08-28,2006-09-11,8800
79,287,30,2006-09-11,2006-09-25,8900
80,244,60,2006-09-25,2006-10-09,9000
81,300,40,2006-10-09,2006-10-23,9100
82,316,10,2006-10-23,2006-11-06,9200
83,288,35,2006-11-06,2006-11-20,9300
84,281,60,2006-11-20,2006-12-04,9400
85,239,55,2006-12-04,2006-12-18,9500
86,306,15,2006-12-18,2007-01-01,9600
87,276,70,2007-01-01,2007-01-15,9700
88,262,35,2007-01-15,2007-01-29,9800
89,277,40,2007-01-29,2007-02-12,9900
90,259,45,2007-02-12,2007-02-26,10000
91,236,45,2007-02-26,2007-03-12,10100
92,245,35,2007-03-12,2007-03-26,10200
93,293,40,2007-03-26,2007-04-09,10300
94,318,15,2007-04-09,2007-04-23,10400
95,248,50,2007-04-23,2007-05-07,10500
96,250,35,2007-05-07,2007-05-21,10600
97,243,60,2007-05-21,2007-06-04,10700
98,255,35,2007-06-04,2007-06-18,10800
99,286,60,2007-06-18,2007-07-02,10900
'''