####################################################################################
#                                                                                  
# Copyright (c) 2008 Travis Kulbeth <tkulbethATgmailDOTcom>               
#                                                                                  
# This deteclet is part of the Payroll Frauds detectlets library.                     
# It is open source and can be modified as needed.                                    
#                                                                                  
####################################################################################
# UPDATES:
#
#   April 2008  First version of the detectlet
#   June 2008   Updated descriptions and instructions
#
# STATUS: Reviewed and Approved. Matt Hillary. June 2008
#
# IDEAS/QUESTIONS:
#
#
####################################################################################

DETECTLET_STANDARD = 1.0

from picalo import *                            # import the Picalo libraries
import sys, re, random, os, os.path, urllib     # import commonly-used Python libraries

wizard = '''
<wizard>
  <page>
    Select the TABLE containing employee information.  The table must contain 
    employeess Social Security Numbers(SSN).

    <parameter type="Table" variable="employee_table"/>
  </page>
  <page>
    Select the COLUMN from the employee table containing the employee's SOCIAL SECURITY NUMBER.      

    <parameter type="Column" table="employee_table" variable="employee_ssn_col"/>
  </page>
</wizard>
'''


RESULTS_TEXT = '''
    The displayed table shows all of the employee records where the 
    social security number has an invalid format or contains invalid values.


    The accepted format is '000-00-0000', and the correct numerical ranges are 
    as follows (as of April 5, 2008):


    First 3 digits: 001 - 772,
    Second 2 digits: 01 - 99,
    Last 4 digits: 0001 - 9999.
    

    In some cases, an employee record with an invalid social security number 
    may indicate a false employee record. A false employee record may be the result of 
    miskeyed data entry or may be a fraudulent employee record.
    The records in this result table may be phantom employee records, 
    possibly used by other employees to obtain additional paychecks. These records should be verified.
'''

def run(employee_table, employee_ssn_col):
    '''Many employee records contain an employee's Social Security Number for tax and other 
    purposes. In some cases, an employee record with an invalid social security number 
    may indicate an false employee record. An invalid SSN on an employee record indicates
    a miskeyed data entry or a fraudulent entry. This detectlet searches for all employees 
    that have invalid social security numbers (both by format and value).

    The detectlet goes through the following process:
    
    - Compares each record's SSN with the personal social security number format 
    ('000-00-0000') 

    - Checks that each component of the number is within the appropriate range, 
    as follows (as of April 5, 2008):
        First 3 digits: 001 - 772,
        Second 2 digits: 01 - 99,
        Last 4 digits: 0001 - 9999.

    Employees that are returned from this detectlet may be phantom employee records, 
    possibly used by other employees to obtain additional paychecks or other fraudulent purposes.
    '''
    #build the expression  
    expression = "checkSSNFormat(record." + employee_ssn_col + ") == True"

    #perform the match
    matches = Simple.select(employee_table, expression)
    
    return matches, RESULTS_TEXT
    
def checkSSNFormat(venTIN):
    '''Check an employee's SSN for the correct format.'''

    #This regular expression is from www.breakingpar.com.
    ssnreg = re.compile('^([0-6]\\d{2}|7[0-6]\\d|77[0-2])([ \\-]?)(\\d{2})\\2(\\d{4})$')
    #alternate regex (just format): ssnreg = re.compile('^(\\d{3})([ \\-]?)(\\d{2})\\2(\\d{4})$')
    m = ssnreg.search(venTIN)
    if m is None:
        return True
    else:
        return False

    
def example_input():
    import StringIO  # to emulate a file for load_csv
    table = load_csv(StringIO.StringIO(csvdata))
    table.set_type('EmployeeNumber', int)
    return table

csvdata = '''\
"EmployeeNumber","FirstName","MiddleInitial","LastName","Address","SSN"
234,"Mark","W","Smith","453 Eagle Road","661-70-0102"
235,"Stephen","C","Lehman","645 Webster Street","450-83-6762"
236,"Matthew","A","Harris","886 East Washington St.","179-87-8586"
237,"Carol","H","Harper","334 Lakeside Rd.","337-99-2560"
238,"Connie","G","Forde","632 Pine Lane","355-16-6122"
239,"John","E","Hawkins","45 Oak Ave.","436-59-4674"
240,"Daniel","T","Haynes","37 West Nottingham Avenue","522-82-6132"
241,"Joanne","U","Pearson","7453 Littlejohn Road","221-65-0898"
242,"David","K","Taylor","43 Gunshoot Street","09-59-9171"
243,"Mike","G","Shook","643 N. Arline St.","107-46-2583"
244,"Carolyn","S","Tabor","431 Bittersweet Rd.","689-58-0775"
245,"Gail","D","Cole","341 N. Harvest Road","247-02-7687"
246,"Joe","F","Cochran","1686 Spruce Street","376-00-8370"
247,"Laura","E","Cobb","4689 Sunny St.","540-26-6445"
248,"Beth","S","Butler","842 Thorton Rd.","209-36-3514"
249,"Jewel","D","McMullan","2817 S. Beech Lane","013-56-6936"
250,"Harold","G","Conery","3488 Spivey Ave.","701-96-6897"
251,"Ernie","B","Long","2002 Wolf Avenue","360-63-8649"
252,"Todd","C","Nickels","1498 Lion Road","801-63-1684"
253,"Bucky","C","O'Brian","2343 10th Street","371-16-2702"
254,"Frank","N","Fulton","2979 East Richardson Road","469-93-9060"
255,"Linda","M","Nelson","4766 Bunker Hill Street","746-49-4247"
256,"Justin","H","Miller","4327 Robin Hood St.","451-45-0945"
257,"Betty","F","Norris","1618 Sunset Rd.","265-23-4201"
258,"Billy","S","Buckner","1515 Mill Lane","343-720-0910"
259,"Bobby","A","Crosland","2622 Sonia Ave.","178-18-4486"
260,"Brian","D","Snow","640 Jolly Avenue","234-96-2318"
261,"Charles","J","Sherril","4491 Cypress Rd.","711-56-6070"
262,"Fred","K","Simmons","855 W. Manor Road","248-64-8771"
263,"James","L","Skinner","377 Alabama Street","377-70-2845"
264,"Kevin","W","Snyder","436 S. Mississippi St.","285-33-9774"
265,"Robert","C","Sneed","355 E. River Rd.","136-35-8873"
266,"William","A","Wright","3979 17th Lane","540-89-9606"
267,"Max","H","Warren","4570 N. Military Ave.","843-68-7893"
268,"Jim","G","Woods","1687 University Avenue","473-34-1433"
269,"Benjamin","E","Womack","1100 Main Rd.","409-06-5728"
270,"Jennifer","T","Wilson","73 Justin Road","127-78-3035"
271,"Jane","U","Wind","2703 Holly Hills Street","223-17-7565"
272,"Rose","K","Wise","172 Greentree St.","87-74-451"
273,"Mickey","G","Woodard","4555 Old Yorkville Rd.","336-38-2206"
274,"Annette","S","Williamson","1473 East Emerald Road","329-63-1048"
275,"George","D","White","892 Steens Street","546-29-7400"
276,"Terry","F","Washington","1285 Plum St.","582-40-9893"
277,"Debbie","E","DuFrene","3784 Dogwood Rd.","704-44-8518"
278,"Barry","S","Baker","4189 N. Gipson Lane","313-54-3845"
279,"Gertrude","D","Bailey","4842 West Point Ave.","190-75-3408"
280,"Harris","G","Ashford","70 Locksley Way Avenue","717-10-5455"
281,"Larry","B","Andrews","3885 Maid Marian Rd.","241-51-7402"
282,"Sharon","C","Anderson","4078 Reed Lane","379-38-2057"
283,"Diane","C","Douglas","4913 North Ave.","205-63-7138"
284,"Mary","N","Dickson","2220 South Avenue","612-66-3444"
285,"Randy","M","Polk","2424 East Road","750-31-3017"
286,"Timothy","H","Henderson","4540 West Point Street","771-12-1386"
287,"Wayne","F","Logan","1891 Yeates St.","291-12-4389"
288,"Richey","S","Cunnigham","534 East Columbus Road","339-54-7218"
289,"Gary","A","Davis","1378 Aztec Street","341-84-2295"
290,"Chris","D","Dallas","2564 Campus View St.","469-29-8874"
291,"Kerry","J","Dailey","3430 N. Montgomery Rd.","572-51-9535"
292,"Eloise","K","Cumberland","1515 North Jackson Lane","279-40-8365"
293,"Glenn","L","Graham","504 N. Lincoln Ave.","716-93-7646"
294,"Lucy","W","Hunt","3601 Carter Avenue","177-58-2380"
295,"Patrick","C","Williams","443 E. Bush St.","563-73-8139"
296,"Lewis","A","Scoggin","522 Starr St.","800-28-6519"
297,"Paul","D","Clemons","4473 Wood Road","231-48-1986"
298,"Sarah","G","Johnson","4723 Hartness Street","254-93-6501"
299,"Earl","E","Ivy","4038 S. Maple St.","553-62-7952"
300,"Harry","T","Jack","797 Oak Rd.","639-54-7714"
301,"Drew","U","Bryant","1933 Chestnut Lane","381-72-9536"
302,"Ally","K","Walker","3244 West Spivey Ave.","300-78-5517"
303,"Haley","G","Jamison","2136 East Wolf Avenue","495-25-4340"
304,"Lee","S","Jefferson","2483 W. Lion St.","716-10-1743"
305,"Kimberly","D","James","1635 10th Ave.","332-41-5665"
306,"Ronald","F","Brown","1095 Richardson Avenue","269-53-1740"
307,"Rhonda","E","Ballard","2821 North Bunker Hill Street","526-49-1636"
308,"Jusy","S","Green","1943 Robin Hood Road","264-85-4184"
309,"Jessie","D","Glover","3480 Sunset Street","582-08-5585"
310,"Cliff","G","Glaspe","2807 E. Mill St.","544-24-5044"
311,"Frances","B","Franklin","4337 Sonia Road","208-85-9638"
312,"Michael","C","Ford","2257 Jolly Street","500-58-6327"
313,"Lisa","C","Grace","4602 Cypress St.","150-62-7101"
314,"Ruth","N","Gregg","1678 S. Manor Rd.","233-49-6777"
315,"Malcom","M","Griffin","1291 Alabama Lane","662-25-9771"
316,"Keith","H","Nicholson","4423 N. Mississippi Ave.","225-93-1108"
317,"Jeff","F","Moore","778 River Avenue","231-61-8162"
318,"Zac","S","Wallace","2979 River St.","181-36-4160"
319,"Eric","A","McCann","2651 Lakeside Rd.","563-48-7335"
320,"Trey","D","McPhail","2153 North Pine Lane","571-45-4508"
321,"Doris","J","Hazzard","1642 Oak Ave.","649-64-3035"
322,"Elliot","K","Makamson","4983 East Nottingham Avenue","363-98-6871"
323,"Ashley","L","McGwire","932 Jeff Peay Avenue","078-85-8823"
'''
    
