####################################################################################
#                                                                                  #
# Copyright (c) 2008 Aaron Hardy <http://aaronhardy.com>                           #
#                                                                                  #
# This file is part of Picalo.                                                     #
#                                                                                  #
# Picalo is free software; you can redistribute it and/or modify                   #
# it under the terms of the GNU General Public License as published by             #
# the Free Software Foundation; either version 2 of the License, or                #
# (at your option) any later version.                                              #
#                                                                                  #
# Picalo is distributed in the hope that it will be useful,                        #
# but WITHOUT ANY WARRANTY; without even the implied warranty of                   #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the                    #
# GNU General Public License for more details.                                     #
#                                                                                  #
# You should have received a copy of the GNU General Public License                #
# along with Foobar; if not, write to the Free Software                            #
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA        #
#                                                                                  #
####################################################################################
#
# This detectlet detects employees with rising overtime.
#
# 07 APR 2008  First version of the wizard
#
####################################################################################

DETECTLET_STANDARD = 1.0

from picalo import *

wizard = '''
<wizard>
  <page>
    Select the TABLE containing payroll disbursement
    data. The table should at least contain a column referring to
    the ID of the employee receiving the disbursement as well
    as a column referring to the overtime hours or overtime pay
    for a given pay period.
    <parameter type="Table" variable="disbursement_table"/>
  </page>
  <page>
    Select the COLUMN referring to the ID of the 
    employee receiving the disbursement.
    <parameter type="Column" table="disbursement_table" variable="employeeId_col"/>
  </page>
  <page>
    Select the COLUMN that refers to overtime hours
    or overtime pay for a given pay period.
    <parameter type="Column" table="disbursement_table" variable="overtime_col"/>
  </page>
  <page>
    Specify a threshold level between 0 and 99.
    A higher level indicates a stricter analysis and fewer 
    results will likely be produced.  A lower level indicates a
    more relaxed analysis and more results will likely be
    produced.
    <parameter type="int" variable="threshold" min="0" max="99" default="0"/>
  </page>
</wizard>
'''

def run(
    disbursement_table,
    employeeId_col,
    overtime_col,
    threshold):
  '''Employees allowed to report overtime pay may take advantage
     of a lack of managerial supervision by reporting overtime
     hours that were never actually worked.  As the employee begins
     to realize that reported overtime hours go unchecked, the employee
     may start to report increasingly exaggerated overtime hours.
     
     This detectlet finds employees that have patterns of increasing
     reported overtime.  These employees and their related payroll
     disbursements should be investigated to verify that reported
     overtime matches actual overtime worked.
  '''
  
  # Validate the data
  assert employeeId_col != overtime_col, \
        'The employee ID and overtime columns are the same. \
        Please ensure you haven\'t selected the same column for these items.'
  assert isinstance(disbursement_table, Table), 'Please select a valid Picalo table'
  
  # Stratify disbursement data by employee
  strat_list = Grouping.stratify_by_value(disbursement_table, employeeId_col)
  
  # For each employee stratification, find the handshake slope.  If it's a positive
  # slope, store it for additional analysis.  We'll also store the highest
  # handshake slope found.
  positive_trends = []
  high_handshake_slope = 0
  for employee_strat in strat_list:
    handshake_slope = Trending.handshake_slope(employee_strat, overtime_col)[0][0] # Retrieve handshake slope
    if handshake_slope > 0: # Store trend if it's positive
      positive_trends.append({'strat':employee_strat, 'slope':handshake_slope})
      if handshake_slope > high_handshake_slope: # If this is the highest slope thus far, save it
        high_handshake_slope = handshake_slope
  
  # Now that we have positive trends only, cut out any that are not
  # above the threshold.  The evaluative threshold is calculated 
  # as a percentage of the highest handshake slope.
  # Example: Highest handshake slope = 16
  #          User-specified threshold (0-99) = 75
  #          Evaluative threshold (16*.75) = 12 
  #          All trends with handshake slopes higher than 12
  #          will be included in the results table list.
  trends_above_threshold = []
  for positive_trend in positive_trends:
    if positive_trend['slope'] > high_handshake_slope * threshold / 100:
      trends_above_threshold.append(positive_trend['strat'])
  
  # Return resulting trends as a table list.
  return TableList(trends_above_threshold), RETURN_TEXT

RETURN_TEXT = '''
As a result of the analysis, you will find a list of tables (TableList)
that includes one table for each employee portraying patterns of rising
overtime.  Please evaluate each table to determine whether the trend
seems particularly suspicious of fraudulent activity.
<br><br>
If the results seem too relaxed (you received too many results that
do not seem to portray a strong rising overtime pattern,) try running the 
detectlet again using a higher threshold level.  Likewise, if the results
seem too strict, try using a lower threshold level.
'''

def example_input():
  import StringIO  # to emulate a file for load_csv
  table = load_csv(StringIO.StringIO(CSV_DATA))
  table.set_type('DisbursementId', int)
  table.set_type('EmployeeId', int)
  table.set_type('OvertimeHours', currency)
  return table

CSV_DATA = '''\
DisbursementId,EmployeeId,OvertimeHours,TotalPayment
1,1,2.00,9836.37
2,2,0.00,7942.47
3,3,1.25,5580.37
4,4,0.00,8050.14
5,5,0.00,8077.64
6,6,0.00,6246.57
7,7,0.00,9573.42
8,8,4.00,9736.82
9,9,0.00,7728.93
10,10,0.00,6081.27
11,1,0.00,9367.97
12,2,0.00,7942.47
13,3,2.00,5681.83
14,4,0.00,8050.14
15,5,0.00,8077.64
16,6,0.00,6246.57
17,7,1.25,9872.59
18,8,0.00,8851.65
19,9,0.00,7728.93
20,10,5.00,6841.43
21,1,0.00,9367.97
22,2,0.00,7942.47
23,3,5.00,6087.68
24,4,2.00,8452.65
25,5,0.00,8077.64
26,6,0.00,6246.57
27,7,1.00,9812.76
28,8,0.00,8851.65
29,9,0.00,7728.93
30,10,0.00,6081.27
31,1,8.00,11241.56
32,2,10.00,9928.09
33,3,0.00,5411.27
34,4,0.00,8050.14
35,5,0.00,8077.64
36,6,0.00,6246.57
37,7,2.00,10052.09
38,8,0.00,8851.65
39,9,0.00,7728.93
40,10,0.00,6081.27
41,1,0.00,9367.97
42,2,2.50,8438.87
43,3,0.00,5411.27
44,4,0.00,8050.14
45,5,0.00,8077.64
46,6,0.00,6246.57
47,7,2.00,10052.09
48,8,0.00,8851.65
49,9,1.25,7970.46
50,10,0.00,6081.27
51,1,0.00,9367.97
52,2,15.00,10920.90
53,3,10.00,6764.09
54,4,0.00,8050.14
55,5,0.00,8077.64
56,6,0.00,6246.57
57,7,3.00,10291.43
58,8,0.00,8851.65
59,9,0.00,7728.93
60,10,0.00,6081.27
61,1,0.00,9367.97
62,2,2.00,8339.59
63,3,0.00,5411.27
64,4,0.00,8050.14
65,5,2.00,8481.52
66,6,0.00,6246.57
67,7,4.50,10650.43
68,8,0.00,8851.65
69,9,0.00,7728.93
70,10,0.00,6081.27
71,1,0.00,9367.97
72,2,0.00,7942.47
73,3,0.00,5411.27
74,4,0.00,8050.14
75,5,0.00,8077.64
76,6,0.00,6246.57
77,7,6.00,11009.43
78,8,5.00,9958.11
79,9,0.00,7728.93
80,10,4.50,6765.41
81,1,0.00,9367.97
82,2,0.00,7942.47
83,3,3.00,5817.12
84,4,0.00,8050.14
85,5,0.00,8077.64
86,6,0.00,6246.57
87,7,8.00,11488.10
88,8,0.00,8851.65
89,9,0.00,7728.93
90,10,0.00,6081.27
91,1,12.00,12178.36
92,2,0.00,7942.47
93,3,0.00,5411.27
94,4,0.00,8050.14
95,5,3.00,8683.46
96,6,0.00,6246.57
97,7,8.00,11488.10
98,8,0.00,8851.65
99,9,0.00,7728.93
100,10,0.00,6081.27
'''