# Travis Ringger
# tringger@gmail.com
# This software and my name may be included in Picalo for all to use, free of charge
#
####################################################################################
#                                                                                  
# Copyright (c) 2008 Travis Ringger <tringgerATgmailDOTcom>               
#                                                                                  
# This deteclet is part of the Bid Rigging wizards library.                     
# It does not require a license and is completely open source.                                    
#                                                                                  
####################################################################################
# UPDATES
#
#  14 April, 2008  First version of the wizard
#
####################################################################################
# STATUS:     
# 
# 
#   IDEAS/QUESTIONS
#
#
####################################################################################

DETECTLET_STANDARD = 1.0

from picalo import *                            # import the Picalo libraries
import sys, re, random, os, os.path, urllib     # import commonly-used Python libraries

wizard = '''
<wizard>
  <page> 
    Select the TABLE that contains the invoice number, 
    vendor id, pay rate, and craft type:
 
    An invoice will typically be created for each vendor to 
    record the type of work performed, the rate that vendor 
    will be paid, and some way to identify the vendor. 
   <parameter type="Table" variable="table"/>
  </page>
  <page>
    Select the COLUMN identifying invoices: 
    
    (Example: invoice, INVOICE_ID, invoice_number)
    <parameter type="Column" table="table" variable="invoice_id"/>
    
  </page>
  <page> 
    Select the COLUMN identifying vendors: 
    
    (Examples: Vendor_ID, Vendor, Vendor_Key, or Vendor_Foriegn_Key)
    <parameter type="Column" table="table" variable="vendor_id"/>

    Select the COLUMN containing the craft type: 

    (Example: electrician, landscaping, plumbing)
    <parameter type="Column" table="table" variable="craft_type"/>
  </page>
  <page>  
    Select the COLUMN identifying the PAY RATE (in dollars per hour):
    
    (Example: rate, wage)
    <parameter type="Column" table="table" variable="pay_rate"/>
  </page>
  <page>  
    What number (a positive number, in decimal form) would you like to use as the 
    z-score to determine an excessively high wage rate?  For example, to get the 
    top 10% and bottom 10% of wage rates, use the Standard Normal Table to find 
    1.28 as the z-score.
    
    (Example: 0.50, 1.5)
    <parameter type="float" variable="z_score" default="1.5"/>
  </page>

</wizard>
'''

RESULTS_TEXT = '''\
    The results table shows the outliers based on the specified z-score.  These
    outliers are the pay rates which are excessively higher (and lower) than the 
    average of all craft types. 
    
    The results are sorted by pay rate with the highest rates listed first.  
    The rates that appear highest in the list are those to which the greatest 
    attention should be paid.
    
    Further investigation may be required to determine whether or not each vendor
    identified in the results is actually committing fraud or not.  Special 
    circumstances or craft types could warrant higher than average pay rates.  
    
    It may be necessary to follow up with the vendor, or look up past invoices 
    with the same vendor to get an accurate picture of the vendor's history with 
    your company.  The vendor id identified in this table should help you look up
    the past invoices with this vendor. 
'''


def run(table, invoice_id, vendor_id, z_score, pay_rate, craft_type):
  '''Systems such as payroll are easily taken for granted because they operate 
     periodically and seem so straight-forward and reliable.  Contractors may target 
     a company's payroll system for those very reasons.  Imagine a company that hires
     several hundred contractors for a given project or contract.  An electrician, 
     or group of electricians, would try to get away with payroll fraud by increasing
     their hourly rate with hopes that no one would pay attention. 
     
     Systems auditors should be on the lookout for signs of this type of payroll fraud.  
  '''
 
  # validate the data
  assert invoice_id != vendor_id != z_score != pay_rate != craft_type, '''The values you selected for 
            Invoice ID, Vendor ID, Z-Score, Wage Rate, and/or Craft Type are not unique. Please 
            go back and select unique columns for these values from the table.'''
  assert z_score > 0, '''The Z-Score must be greater than zero.  Negative values of the Z-Score 
            are taken into consideration.  All that is required is one positive value.'''

  results = Simple.select_outliers_z(table, pay_rate, z_score)
  Simple.sort(results,False,pay_rate)
  return results, RESULTS_TEXT
  
  
# load the sample data, if the user requests it
def example_input():
  import StringIO  # to emulate a file for load_csv
  table = load_csv(StringIO.StringIO(csvdata))
  table.set_type('Rate', float)
  return table



# this is the sample data that will show if the user asks for it
csvdata = '''\
Invoice_ID,Craft_Type,Vendor_ID,Rate
3000,electrician,1008,38
5000,plumber,1798,46
8000,framer,1180,32
9000,roofer,1202,44
15000,drilling,1165,50
17000,dry wall,1110,41
39000,carpet,1345,44
43000,heating & air,1109,38
48000,paint,1089,48
48000,landscape,1145,30
48000,auto repair,1023,55
54000,janitorial,1024,29
61000,excavation,1125,42
68000,snow removal,1145,68
69000,trash removal,1304,46
74000,general building maintenance,1201,35
76000,parking lot repair,1165,40
79000,window washing,1190,27
98000,electrician,1008,38
103000,plumber,1798,46
105000,framer,1180,32
114000,roofer,1202,44
131000,drilling,1165,50
137000,dry wall,1110,41
145000,carpet,1345,44
165000,heating & air,1109,38
166000,paint,1089,48
176000,landscape,1145,30
187000,auto repair,1023,55
198000,janitorial,1024,29
206000,excavation,1125,42
206000,snow removal,1145,27
206000,trash removal,1304,46
216000,general building maintenance,1201,35
232000,parking lot repair,1165,40
239000,window washing,1190,27
243000,electrician,1008,38
248000,plumber,1798,46
256000,framer,1180,32
265000,roofer,1202,44
274000,drilling,1165,50
280000,dry wall,1110,41
280000,carpet,1345,44
291000,heating & air,1109,38
302000,paint,1089,48
303000,landscape,1145,30
304000,auto repair,1023,55
305000,janitorial,1024,29
307000,excavation,1125,42
326000,snow removal,1145,27
330000,trash removal,1304,46
338000,general building maintenance,1201,35
339000,parking lot repair,1165,40
346000,window washing,1190,27
352000,electrician,1008,38
365000,plumber,1798,46
367000,framer,1180,32
374000,roofer,1202,44
380000,drilling,1165,50
386000,dry wall,1110,41
387000,carpet,1345,44
392000,heating & air,1109,38
396000,paint,1089,48
409000,landscape,1145,30
439000,auto repair,1023,55
443000,janitorial,1024,29
455000,excavation,1125,81
463000,snow removal,1145,27
465000,trash removal,1304,46
469000,general building maintenance,1201,35
476000,parking lot repair,1165,40
494000,window washing,1190,27
494000,electrician,1008,38
499000,plumber,1798,46
499000,framer,1180,32
525000,roofer,1202,44
531000,drilling,1165,50
541000,dry wall,1110,41
544000,carpet,1345,44
552000,heating & air,1109,38
558000,paint,1089,48
566000,landscape,1145,30
569000,auto repair,1023,55
575000,janitorial,1024,29
599000,excavation,1125,42
618000,snow removal,1145,27
629000,trash removal,1304,46
633000,general building maintenance,1201,35
636000,parking lot repair,1165,40
641000,window washing,1190,27
648000,electrician,1008,38
658000,plumber,1798,46
662000,framer,1180,32
665000,roofer,1202,44
674000,drilling,1165,50
680000,dry wall,1110,41
692000,carpet,1345,44
693000,heating & air,1109,38
700000,paint,1089,48
708000,landscape,1145,30
711000,auto repair,1023,55
714000,janitorial,1024,29
716000,excavation,1125,42
722000,snow removal,1145,27
726000,trash removal,1304,46
727000,general building maintenance,1201,35
732000,parking lot repair,1165,40
734000,window washing,1190,27
746000,electrician,1008,38
779000,plumber,1798,46
780000,framer,1180,32
785000,roofer,1202,44
801000,drilling,1165,50
814000,dry wall,1110,41
815000,carpet,1345,44
820000,heating & air,1109,38
825000,paint,1089,48
826000,landscape,1145,30
834000,auto repair,1023,125
838000,janitorial,1024,29
839000,excavation,1125,42
839000,snow removal,1145,27
839000,trash removal,1304,46
840000,general building maintenance,1201,35
853000,parking lot repair,1165,40
861000,window washing,1190,27
870000,electrician,1008,38
882000,plumber,1798,46
885000,framer,1180,32
891000,roofer,1202,44
897000,drilling,1165,50
908000,dry wall,1110,41
910000,carpet,1345,44
913000,heating & air,1109,38
915000,paint,1089,48
927000,landscape,1145,30
933000,auto repair,1023,55
934000,janitorial,1024,29
938000,excavation,1125,42
944000,snow removal,1145,27
948000,trash removal,1304,46
959000,general building maintenance,1201,35
962000,parking lot repair,1165,40
967000,window washing,1190,27
971000,electrician,1008,38
978000,plumber,1798,79
989000,framer,1180,32
992000,roofer,1202,44
997000,drilling,1165,50
'''



