###################################################################
#
# Author: Dan Austin
# Detectlet: FindPaychecksAfterTerminationDate.py
#
###################################################################
# Current Version: 1.0
###################################################################
# Version Notes
#
# 4/13/2008 - Version 1.0 complete
#
###################################################################

DETECTLET_STANDARD = 1.0

from picalo import *
wizard = '''
<wizard>
  <page>
    Select the TABLE containing the payroll information for
    each terminated employee as well as the termination dates of the employee.
    The result of this table should look like something similar to
    the sample data provided with this detectlet.
    
    If you need to join two or more tables together to get
    your data into this form, close the wizard and do so.
    <parameter type="Table" variable="table"/>
  </page>
  <page>
    Select the COLUMN containing employee IDs:
    
    Your input table should contain a column containing employeeIDs.
    This information will be used to identify the employee.
    
    Which column uniquely identifies each employee.  The column may
    be called "employeeID" or something similar.
    
    <parameter type="column" table="table" variable="employeeCol"/>
  </page>
  <page>
    Select the COLUMN containing the dates of payroll checks:
    
    Your table should contain a column identifying the date of each
    payroll check.  This date will be compared with the employee
    termination date to determine if the account is still being used
    on the payroll.
    
    Which column identifies the paycheck dates?  This column may be
    called "checkDate", "payDate", or something similar.
    <parameter type="column" table="table" variable="checkDateCol"/>
  </page>
  <page>
    Select the COLUMN containing paycheck numbers:

    Your table should contain a column indicating the check number used to
    pay the employee.  This number should be unique for each payroll check.
    
    Which column identifies the check number?  This column may be called
    "checkNum", "checkID", or something similar.
    
    <parameter type="column" table="table" variable="checkNumCol"/>
  </page>
  <page>
    Select the COLUMN containing the employee's termination date:

    Your table should contain a column indicating the termination date
    of each employee (the date they left the company, or the date they
    were fired.  This date will be used to compare against every paycheck
    date.
    
    Which column identifies the employee termination date?  This column
    may be called "lastDay", "terminationDate", or something similar.
    
    <parameter type="column" table="table" variable="lastDayCol"/>
  </page>
</wizard>
'''
RESULTS_TEXT = '''\
    The displayed table identifies every paycheck an employee received after their
    termination date.  Having one or two paychecks after a termination date, is not
    necessarily an indication of fraud, but multiple occurences should be looked into
    to determine if the terminated employee is being used by someone as a ghost-employee.
'''

def run(table, employeeCol, checkDateCol, checkNumCol, lastDayCol):
  '''Employees who commit fraud against their organization often use the
     payroll system to create fake employees and cash in on the paychecks
     being received by these fake employees (also known as 'ghost-employees').
     
     One common method used by fraudsters is to use the payroll account of
     a terminated employee to capture unearned payroll funds.
     
     In order to identify this type of fraud, this detectlet compares the
     termination date with the last day a paycheck was received by each
     employee. If the last-paycheck date is significantly over the last-date
     of employment, a red flag is raised.
     
     In order to determine if this type of fraud exists, the detectlet goes
     through the following steps:
     - Find all paychecks dated after the termination date
     - Stratify results by employee
  '''
  
  # validate the data
  assert employeeCol != checkDateCol != checkNumCol != lastDayCol, 'The Employee, Routing Number, and Account Number columns must be different.  Please ensure you haven\'t selected the same column for two of these items.'
  
  # run the analysis
  results = Table([
    ( 'employeeID',            unicode ),
    ( 'lastCheckNum',          unicode ),
    ( 'checkDate',         Date ),
    ( 'lastEmployedDate',      Date ),
  ])
  
  for row in table:
    try:
      if row[checkDateCol] > row[lastDayCol]:
        rec = results.append()
        rec['employeeID'] = row[employeeCol]
        rec['lastCheckNum'] = row[checkNumCol]
        rec['checkDate'] = row[checkDateCol]
        rec['lastEmployedDate'] = row[lastDayCol]
    except ValueError:
      print "not included"
      
  results = Grouping.stratify_by_value(results, 0)
  
  return results, RESULTS_TEXT

def example_input():
  import StringIO  # to emulate a file for load_csv
  table = load_csv(StringIO.StringIO(csvdata))
  table.set_type('checkDate',   Date)
  table.set_type('checkNum',    unicode)
  table.set_type('employeeID',  unicode)
  table.set_type('lastDay',     Date)
  return table

# Sample Data
csvdata = '''\
checkDate,checkNum,employeeID,lastDay
11/1/2007,101,1,1/1/2008
11/1/2007,102,2,2/11/2008
11/1/2007,103,3,12/31/2007
11/1/2007,104,4,3/27/2008
11/1/2007,105,5,3/26/2008
11/16/2007,106,1,1/1/2008
11/16/2007,107,2,2/11/2008
11/16/2007,108,3,12/31/2007
11/16/2007,109,4,3/27/2008
11/16/2007,110,5,3/26/2008
12/1/2007,111,1,1/1/2008
12/1/2007,112,2,2/11/2008
12/1/2007,113,3,12/31/2007
12/1/2007,114,4,3/27/2008
12/1/2007,115,5,3/26/2008
12/16/2007,116,1,1/1/2008
12/16/2007,117,2,2/11/2008
12/16/2007,118,3,12/31/2007
12/16/2007,119,4,3/27/2008
12/16/2007,120,5,3/26/2008
1/1/2008,121,1,1/1/2008
1/1/2008,122,2,2/11/2008
1/1/2008,123,3,12/31/2007
1/1/2008,124,4,3/27/2008
1/1/2008,125,5,3/26/2008
1/16/2008,126,1,1/1/2008
1/16/2008,127,2,2/11/2008
1/16/2008,128,3,12/31/2007
1/16/2008,129,4,3/27/2008
1/16/2008,130,5,3/26/2008
2/1/2008,131,2,2/11/2008
2/1/2008,132,3,12/31/2007
2/1/2008,133,4,3/27/2008
2/1/2008,134,5,3/26/2008
2/16/2008,135,2,2/11/2008
2/16/2008,136,3,12/31/2007
2/16/2008,137,4,3/27/2008
2/16/2008,138,5,3/26/2008
3/1/2008,139,2,2/11/2008
3/1/2008,140,3,12/31/2007
3/1/2008,141,4,3/27/2008
3/1/2008,142,5,3/26/2008
3/16/2008,143,2,2/11/2008
3/16/2008,144,3,12/31/2007
3/16/2008,145,4,3/27/2008
3/16/2008,146,5,3/26/2008
4/1/2008,147,2,2/11/2008
4/1/2008,148,3,12/31/2007
4/16/2008,149,2,2/11/2008
4/16/2008,150,3,12/31/2007
5/1/2008,151,2,2/11/2008
5/1/2008,152,3,12/31/2007
'''