###################################################################
#
# Author: Dan Austin
# Detectlet: FindEmployeesWithoutUsedVacationOrSickDays.py
#
###################################################################
# Current Version: 1.0
###################################################################
# Version Notes
#
# 4/13/2008 - Version 1.0 complete
#
###################################################################

DETECTLET_STANDARD = 1.0

from picalo import *
wizard = '''
<wizard>
  <page>
    Select the TABLE containing the amount of time each employee has taken
    for vacation days and for sick days.  This table should look something
    like the sample data.
    
    If you need to join two or more tables together to get
    your data into this form, close the wizard and do so.
    <parameter type="Table" variable="table"/>
  </page>
  <page>
    Select the COLUMN containing employee IDs.  The column may
    be called "employeeID" or something similar.

    <parameter type="column" table="table" variable="employeeCol"/>
  </page>
  <page>
    Select the COLUMN containing the number of vacation
    days that that an employee has taken.
    
    <parameter type="column" table="table" variable="vacationTakenCol"/>
  </page>
  <page>
    Select the COLUMN containing the number of sick days
    that an employee has taken.
    
    <parameter type="column" table="table" variable="sickTakenCol"/>
  </page>
</wizard>
'''
RESULTS_TEXT = '''\
    The displayed table identifies employees who have not taken vacation days
    or sick days.  A good sign that an employee in the payroll system does not
    actually exist is that there will be no recorded vacation or sick days for
    the employee.
    
    Please note that just because an employee appears in the table, it is not a
    sure indication that fraud has occurred.  There remains a possibility that
    a legitimate employee did not use any of their sick days or vacation days.
    However, a good practice would be to investigate anyone in this list to 
    ensure that they are a legit employee.
'''

def run(table, employeeCol, vacationTakenCol, sickTakenCol):
  '''Payroll fraud is a common tactic used by fraudsters to steal money from
     an organization. This fraud commonly involves the use of fake employees
     on the payroll system, also known as 'Ghost employees'.
     
     These ghost employees may be employees who's employment was terminated,
     or completely made up employees. There are several signs that help to
     indicate when an employee is a ghost-employee versus a legitimate employee.
     
     One of those signs involves the use of vacation days or sick leave. Most
     fraudsters using ghost employees to commit payroll fraud do not account for
     the fact that these employees also have sick days and vacation days registered
     under their names. A common sign of a ghost employee is that they will have
     never taken a single vacation day or sick leave.
     
     This detectlet searches through the employee time-off records and identifies
     any employees who have failed to use any vacation or sick days.  Considering
     that many companies now require their employees to take vacations, an employee
     without a single day off will be a very likely candidate for being fraudulent.
     
     These detectlet goes through the following steps:
     - Retrieve all employees who have taken 0 vacation and sick days.
  '''
  
  # validate the data
  assert employeeCol != vacationTakenCol != sickTakenCol, 'The Employee, Vacation taken, and Sick days taken columns must be different.  Please ensure you haven\'t selected the same column for two of these items.'
  
  # run the analysis
  results = Table([
    ( 'employeeID',            unicode ),
    ( 'vacationDaysTaken',     number ),
    ( 'sickDaysTaken',         number ),
  ])
  
  for row in table:
    if row[vacationTakenCol] + row[sickTakenCol] == 0:
      rec = results.append()
      rec['employeeID'] = row[employeeCol]
      rec['vacationDaysTaken'] = row[vacationTakenCol]
      rec['sickDaysTaken'] = row[sickTakenCol]
  
  return results, RESULTS_TEXT

def example_input():
  import StringIO  # to emulate a file for load_csv
  table = load_csv(StringIO.StringIO(csvdata))
  table.set_type('employeeID',         unicode)
  table.set_type('vacationDaysTaken',  number)
  table.set_type('sickDaysTaken',      number)
  return table

# Sample Data
csvdata = '''\
employeeID,vacationDaysTaken,sickDaysTaken
1,1,0
2,4,4
3,13,1
4,0,0
5,1,5
6,10,1
7,13,1
8,11,0
9,8,2
10,1,7
11,4,1
12,3,3
13,12,4
14,1,3
15,11,4
16,4,2
17,1,0
18,1,4
19,8,5
20,9,5
21,4,7
22,7,0
23,1,6
24,4,2
25,0,0
26,4,2
27,14,2
28,5,1
29,4,3
30,13,3
31,2,4
32,2,7
33,13,4
34,12,3
35,0,0
36,10,3
37,9,7
38,14,4
39,8,6
40,7,5
'''