# Grant Gordon <grantjgordonATgmailDOTcom>
#
# Version 1.0
#
# Note: This kind of detectlet by nature doesn't require a lot of data, so I prepared
#   what I would consider a reasonable number of Bidders and Evaluators in a real 
#   world situation.

####################################################################################
#                                                                                  
# Copyright (c) 2008 by Grant Gordon                        
#                                 
# This Detectlet is part of the Bid Rigging Library
#                                             
# Running this detectlet with the example data will yield the following results:
#   +----------+---------------+-------------+------------------+--------------+
#   | BidderID | BidderSurname | EvaluatorID | EvaluatorSurname | PercentMatch |
#   +----------+---------------+-------------+------------------+--------------+
#   |      236 | Harris        |         257 | Norris           |          0.5 |
#   |      240 | Haynes        |         263 | Hayne            |         0.75 |
#   +----------+---------------+-------------+------------------+--------------+    
####################################################################################

DETECTLET_STANDARD = 1.0

from picalo import *

wizard = '''
<wizard>
  <page>
    Select the TABLE containing Bidder records:
    <parameter type="Table" variable="tbl_bidders"/>
    Select the TABLE containing Evaluation Committee Member records:
    <parameter type="Table" variable="tbl_evals"/>
  </page>
  <page>
    Select the COLUMN containing Bidder's surnames:
    <parameter type="Column" table="tbl_bidders" variable="clm_bid_name"/>
    Select the COLUMN that contains Bidder's IDs:    
    <parameter type="Column" table="tbl_bidders" variable="clm_bid_id"/>
  </page>
    <page>
    Select the COLUMN containing Evaluation Committee Member's surnames:
    <parameter type="Column" table="tbl_evals" variable="clm_eval_sur"/>
    Select the COLUMN containing Evaluation Committee Member's IDs:    
    <parameter type="Column" table="tbl_evals" variable="clm_eval_id"/>
  </page>
</wizard>
'''

def run(tbl_bidders, tbl_evals, clm_bid_name, clm_eval_sur, clm_bid_id, clm_eval_id):
    '''This detectlet looks for bidders with surnames similar to those on the 
        bid evaluation committee.  Such potential family ties could signal
        a conflict of interest in the decision making process.
    '''
    
    RESULTS_TEXT = '''One or more of the Bidder's surnames seem to match an Evaluation Committee Member's surname.
                    The table below shows the Bidder's ID and surname, the Evaluation Committee Member's ID and Surname, and the percent match that was found.
                    These results warrant further investigation.
                    If no results show in the table, no bidder surnames seem to match evaluation committee member surnames.  
                '''
    
    tbl_results = Table([
        ('BidderID',int),
        ('BidderSurname',unicode),
        ('EvaluatorID',int),
        ('EvaluatorSurname',unicode),
        ('PercentMatch',float),
        ])
        
    # Duplicate tables to avoid over writing data
    tbl_wrk_bid = tbl_bidders[:]
    tbl_wrk_eval = tbl_evals[:]
    
    # Get list of bidder's last names
    lst_bidder_names = tbl_wrk_bid[clm_bid_name]

    # Get list of committee's last names
    lst_eval_names = tbl_wrk_eval[clm_eval_sur]

    # Fuzzy match each bidder last name to all committee last names
    for b, b_name in enumerate(lst_bidder_names):
        for e, e_name in enumerate(lst_eval_names):
            result = Simple.fuzzymatch(b_name, e_name)
            
            # If there is a .5 match or higher, add both names to results table
            if result >= .5:
                tbl_results.append(tbl_wrk_bid[b][clm_bid_id], tbl_wrk_bid[b][clm_bid_name], tbl_wrk_eval[e][clm_eval_id], tbl_wrk_eval[e][clm_eval_sur], result)
                RESULTS_TXT = '''\
                    One or more of the Bidder's surnames seem to match an Evaluation Committee Member's surname.
                    The table below shows the Bidder's ID and surname, the Evaluation Committee Member's ID and Surname, and the percent match that was found.
                    These results warrant further investigation.
                '''
    return tbl_results, RESULTS_TEXT
    
#-----------------------------------------------------------------------------------#
#           CSV Test Data
#-----------------------------------------------------------------------------------#
csvdata_bidders = '''\
BidderID,FirstName,MiddleInitial,LastName,Address
234,Mark,W,Smith,453 Eagle Road
235,Stephen,C,Lehman,645 Webster Street
236,Matthew,A,Harris,886 East Washington St.
237,Carol,H,Harper,334 Lakeside Rd.
238,Connie,G,Forde,632 Pine Lane
239,John,E,Hawkins,45 Oak Ave.
240,Daniel,T,Haynes,37 West Nottingham Avenue
241,Joanne,U,Pearson,7453 Littlejohn Road
'''

csvdata_evals = '''\
EvalID,FirstName,MiddleInitial,LastName,Address
249,Jewel,D,McMullan,2817 S. Beech Lane
250,Harold,G,Conery,3488 Spivey Ave.
251,Ernie,B,Long,2002 Wolf Avenue
252,Todd,C,Nickels,1498 Lion Road
253,Bucky,C,O'Brian,2343 10th Street
254,Frank,N,Fulton,2979 East Richardson Road
255,Linda,M,Nelson,4766 Bunker Hill Street
256,Justin,H,Miller,4327 Robin Hood St.
257,Betty,F,Norris,1618 Sunset Rd.
258,Billy,S,Buckner,1515 Mill Lane
259,Bobby,A,Crosland,2622 Sonia Ave.
260,Brian,D,Snow,640 Jolly Avenue
261,Charles,J,Sherril,4491 Cypress Rd.
262,Fred,K,Simmons,855 W. Manor Road
263,James,L,Hayne,377 Alabama Street
264,Kevin,W,Snyder,436 S. Mississippi St.
265,Robert,C,Sneed,355 E. River Rd.
266,William,A,Wright,3979 17th Lane
'''

def example_input():
  '''Returns the example input table so the user can
     see what their input should look like'''
  import StringIO  # to emulate a file for load_csv
  bid_table = load_csv(StringIO.StringIO(csvdata_bidders))
  bid_table.set_type('BidderID', float)
  bid_table.set_type('FirstName', unicode)
  bid_table.set_type('MiddleInitial',  unicode)
  bid_table.set_type('LastName',  unicode)
  bid_table.set_type('Address',  unicode)
  
  eval_table = load_csv(StringIO.StringIO(csvdata_evals))
  eval_table.set_type('EvalID', float)
  eval_table.set_type('FirstName', unicode)
  eval_table.set_type('MiddleInitial',  unicode)
  eval_table.set_type('LastName',  unicode)
  eval_table.set_type('Address',  unicode)
  
  return bid_table, eval_table

#-----------------------------------------------------------------------------------#
#           Testing
#-----------------------------------------------------------------------------------#
if __name__=="__main__":
    tbl = example_input()
    results = run(tbl[0],tbl[1],'LastName','LastName','BidderID','EvalID')
    results[0].view()