###################################################################
#
# Author: Dan Austin
#
###################################################################
# Current Version: 1.0
###################################################################
# Version Notes
#
# 4/13/2008 - Version 1.0 complete
#
###################################################################

DETECTLET_STANDARD = 1.0

from picalo import *
wizard = '''
<wizard>
  <page>
    Select the table containing direct deposit information.
    This table should look similar to the example input data (see the previous
    page) and should have columns for the employee, routing number,
    and account numbers.
    <parameter type="Table" variable="table"/>
  </page>
  <page>
    Select the column containing employee IDs:    

    This information will be used to identify the employees with
    matching direct deposit information.
    
    <parameter type="column" table="table" variable="employeeCol"/>
  </page>
  <page>
    Select the column containing routing numbers:

    Your table should contain a column identifying the routing number of
    the bank the direct deposit is being made to.  This number identifies
    the bank the account belongs to and is unique to each bank or credit
    union.
    
    <parameter type="column" table="table" variable="routingCol"/>
  </page>
  <page>
    Select the column containing account numbers:

    Your table should contain a column indicating the account number to
    which the direct deposit will be made.  Combined with the bank routing
    number, this number will uniquely identify the employee's bank account.
        
    <parameter type="column" table="table" variable="acctCol"/>
  </page>
</wizard>
'''
RESULTS_TEXT = '''\
    The displayed table shows all of the employees who have direct deposit accounts
    matching those of other employees.
    
    The results found in this table should be investigated immediately to determine
    if a fake employee exists and whether or not another employee is receiving the 
    excess pay.
'''

def run(table, employeeCol, routingCol, acctCol):
  '''Employees who commit fraud against their employer often use the
     payroll system to create fake employees and cash in on these fraudulently prepared 
     paychecks.
     
     In some instances, the fake employee will have the same address as the
     employee comitting the fraud or have similar initials/name of
     the employee. Since many businesses have moved to the direct deposit
     of paychecks, finding a fraudster cashing in on a fake employee by 
     giving them the same direct deposit information is not uncommon.
     
     This deteclet compares the direct deposit information of each employee
     by looking for any two employees that share the same bank routing number
     and account number and flags them for potential fraud.
     
     This detectlet goes through the following process:
     - Parses through each account and pulls out the routing number and account
         number.
     - Compares each entry with every other entry in the table.
     - Displays the matching entries with the employeeIDs associated with each.
     - Sorts the results by account numbers so that matching employees can be seen with each other
  '''
  
  # validate the data
  assert employeeCol != routingCol != acctCol, 'The Employee, Routing Number, and Account Number columns must be different.  Please ensure you haven\'t selected the same column for two of these items.'  
  
  # run the analysis
  results = Table([
    ( 'employeeID',         unicode ),
    ( 'routingNum',         unicode ),
    ( 'acctNum',            unicode ),
  ])
  
  #Cycle through the rows in a table, and compare each row with every other row in the table
  for row in table:
    for compareRow in table:
      if row[employeeCol] <> compareRow[employeeCol]:
        if row[routingCol] == compareRow[routingCol] and row[acctCol] == compareRow[acctCol]:
          rec = results.append()
          rec['employeeID'] = row[employeeCol]
          rec['routingNum'] = compareRow[routingCol]
          rec['acctNum'] = compareRow[acctCol]
           
  #Sorts the results by routing number
  Simple.sort(results, True, acctCol)
  
  return results, RESULTS_TEXT

def example_input():
  import StringIO  # to emulate a file for load_csv
  table = load_csv(StringIO.StringIO(csvdata))
  table.set_type('employeeID',    unicode)
  table.set_type('routingNum',    unicode)
  table.set_type('acctNum',       unicode)
  return table

# Sample Data
csvdata = '''\
employeeID,routingNum,acctNum
1,121000497,1234567890
2,273857398,2243265106
3,123415156,6019950165
4,467786543,3528246091
5,987762373,3926706037
6,121000497,9858218163
7,273857398,1747032412
8,123415156,9279926055
9,467786543,5869703918
10,987762373,3460983661
11,121000497,4596659910
12,273857398,8850057708
13,123415156,4346477100
14,467786543,7371891261
15,987762373,1961791667
16,121000497,4758912242
17,273857398,9537199291
18,123415156,8621615292
19,467786543,9945172239
20,987762373,2219216755
21,121000497,4942576808
22,273857398,2687356835
23,123415156,1568686012
24,467786543,9254491000
25,987762373,6593413014
26,121000497,1012702183
27,273857398,1914016895
28,123415156,1376157830
29,467786543,3797279706
30,987762373,4428616099
31,121000497,9732612490
32,273857398,7020121225
33,467786543,1339138813
34,467786543,1559012535
35,987762373,4030223408
36,121000497,1234567890
37,273857398,7432065689
38,123815156,7649954001
39,462286543,4039138813
40,987762373,6931474754
41,121000497,7333196416
42,273857398,1864098033
43,123415156,7569577619
44,467786543,2074839672
45,987762373,3077063961
46,121000497,2000543294
47,273857398,5537240663
48,123415156,9924453769
49,467786543,1698308124
50,987762373,7144621978
51,121000497,8672245854
52,273857398,2885577626
53,123415156,7100975646
54,467786543,1480196417
55,987762373,7746967647
56,121000497,4793107483
57,273857398,5149210171
58,123415156,9113892370
59,467786543,1934654382
60,987762373,6414576122
61,121000497,6635910142
62,273857398,3854041880
63,123415156,7731221216
64,467786543,1133407396
65,987762373,1390331160
66,121000497,8493270794
67,273857398,3677115987
68,123415156,2178817973
69,467786543,8267219604
70,987762373,6767651910
71,121000497,2827644373
72,273857398,2214716571
73,123415156,7026554380
74,467786543,3344040937
75,467786543,4121138813
76,121000497,2415465151
77,273857398,2956122249
78,123415156,1154930620
79,467786543,2771627197
80,987762373,8667253060
81,121000497,2621978263
82,273857398,8615137918
83,123415156,9985444990
84,467786543,1043913721
85,987762373,4794119671
86,121000497,7974174969
87,273857398,7022280665
88,123415156,4768843670
89,467786543,3550267950
90,987762373,1315918612
91,121000497,3425255090
92,273857398,3976965635
93,123415156,2420209954
94,467786543,5041409046
95,467786543,4039138813
96,121000497,8012922451
97,273857398,2386924322
98,123415156,1890268774
99,467786543,9246435596
'''