####################################################################################
#                                                                                  
# Copyright (c) 2006 Jeffrey Richardson <jeff_richardsonATbyuDOTnet>               
#                                                                                  
# This deteclet is part of the Bid Rigging detectlets library.                     
# It requires a license and is not open source.                                    
#                                                                                  
####################################################################################
# UPDATES
# 13 MAY 2006  First version of the detectlet
#
####################################################################################
#
# STATUS: Polish/Review
# Also, scope needs to be defined: list of purchases and/or summary?  
# 
#  Example data type casting issue
#
# IDEAS/QUESTIONS
#
#  I could make these detectlets more robust by returning all of the columns that the input table gives.  The detectlet could then place the
#  important fields in the front of the table.
#   -I could make the results table more robust by using the original column names from the source table
#
#  Employee information?
#  
# 
####################################################################################


from picalo import *
DETECTLET_STANDARD = 1.0
wizard = '''
<wizard>
  <page>
    Please select which table contains the bidding data.
    It should look similar to the example input data (see
    the previous page), and it should have columns for
    the project, bid, vendor, and amount.
    
    If you need to join two or more tables together to get
    your data into this form, close the wizard and do so.
    <parameter type="Table" variable="table"/>
  </page>
  <page>
    Your input table should contain the records for many
    projects (these could also be called contracts).This 
    detectlet will stratify the table into one table for 
    each project, then run the analysis on each project table.
    
    Which column should the table be stratified by?  This column
    should uniquely identify each PROJECT/CONTRACT.  It might be 
    called "id", "project id", "contract id", or something similar.
    <parameter type="Column" table="table" variable="project_col"/>
  </page>
  <page>
    Each project will have some bids (one or two or several).  Which column
    specifies the BID ID?  This column should contain data
    similar to a "bid id" or "bid num" or "bid key.
    <parameter type="Column" table="table" variable="bid_col"/>
  </page>
  <page>
    Each bid will be made by a VENDOR or third party company.  Which
    column identifies the bidder?  This column might be named 
    "vendor_id", "bidder_id", "vendor", or "bidder".
    <parameter type="Column" table="table" variable="vendor_col"/>
  </page>
  <page>
    Which column contains the total bid amount?  This is is
    the total price of the entire bid and not the price of individual 
    line items.  It might be called "amount", "total", "bid total",
    "total amount", or "bid amount".
    <parameter type="Column" table="table" variable="amount_col"/>
  </page>
  <page>
    Which column identifies the winning bid? It may be a simple Yes/No or
    True/False or represented by a checkmark (like in Access).  It might 
    be called "winner", "awarded", or "winning_bid".
    <parameter type="Column" table="table" variable="winner_col"/>
  </page>
</wizard>
'''
RESULTS_TEXT = '''\
    The displayed table shows all of the bids that won despite a lower bid.
    The table indicates the project (by ID), the bid 
    (by ID), the vendor (by ID) and the amount of the bid along with the
    difference in price between the lowest bid and the winning bid.
    
    Vendors that are consistantly showing up on this table should be investigated.
    If the difference between the lowest bid amount and winning bid amount is 
    very large, sever abuse may be taking place.
    
    If a specific employee is approving these bids, he or she may be directing
    work towards a specific vendors.
'''

def run(table, project_col, bid_col, vendor_col, amount_col, winner_col):
  '''Most organizations require multiple bids to increase competition between
     suppliers.  Bidding keeps prices low and increases quality.  Generally,
     contracts are awarded to the bid with the lowest price.  Occasionally, 
     a company may decide to award the contract to a vendor other than the 
     lowest bidder (because of other considerations).  
     
     Purchasers (who work for the organization) may want to direct work towards
     a specific bidder.  They may do this if they are getting kickbacks from a 
     certain vendor, have family or friends they want to funnel work to, or for 
     many other reasons.  When this happens, the company suffers economic losses
     by over paying for services or goods.  Furthermore, bidders that are sure 
     of winning are less inclined to assure quality.
     
     This detectlet searches for contracts that were awarded to vendors that did
     not give the lowest bid.  These contracts should be investigated and verified
     for their integrity.
     
     The detectlet goes through the following process:
     - Stratifies the file into a table for each project.  
     - Identifies the winning contract.
     - Identifies the lowest contract.
     - Calculates the difference between the winning bid and the lowest bid.
     - Returns the bids that won while not being the lowest bid
       
     Companies with a significant number of non-lowest winning bids are suspect.
     
     The example input table contains the following four columns:
     - Project: The project id
     - Vendor: The vendor id - there ID of the of the Company bidding.
     - Bid: The bid unique id
     - Amount: The total amount of each bid.
     - Winner: Identifies the winning bid
     
     In the example dataset several bids were won by Vendor #9 even though
     its bids were not the lowest bid.
  '''
  
  # validate the data
  assert project_col != bid_col != vendor_col != amount_col !=winner_col, 'The Project, Bidder, Amount, and Vendor columns must be different.  Please ensure you haven\'t selected the same column for two of these items.'
  
  print "running..."
  # run the analysis
  results = Table([
    ( 'project_id', unicode ),
    ( 'bid_id',     unicode ),
    ( 'vendor',     unicode ),
    ( 'amount',     number   ),
    ( 'difference', number   ),
  ])
  Simple.sort(table, True, amount_col) #Sort the table by lowest amount to highest amount (allows the detectlet to find the lowest bid faster)
  projects = Grouping.stratify_by_value(table, project_col)
  for project in projects:
    # if the first bid (lowest) is the winner than continue to the next contract
    if project[0][winner_col] == "True": ## TODO: Should this be a string?  Should it look for non-null (i.e. !=None)
      continue
    lowest_bid = project[0]
    winning_bid = project[0]
    #for bid in project: ## TODO: is there a way to get this to skip the first iteration?
    for counter, bid in enumerate(project): 
      if counter == 0: #skip the first bid (already been checked)
        continue
      elif bid[winner_col] == True and True or str(bid[winner_col]).lower() in ("true", "1", "yes", "t") and True or False:
        winning_bid = bid
    if lowest_bid != winning_bid: # if the lowest bid isn't the winning bid than record it
      rec = results.append()
      rec['project_id'] = winning_bid[project_col]
      rec['bid_id'] = winning_bid[bid_col]
      rec['vendor'] = winning_bid[vendor_col]
      rec['amount'] = winning_bid[amount_col]
      rec['difference'] = round(float(winning_bid[amount_col]) - float(lowest_bid[amount_col]),2)
      
  return results,RESULTS_TEXT

  
def example_input():
  import StringIO  # to emulate a file for load_csv
  table = load_csv(StringIO.StringIO(csvdata))
  table.set_type('bid_id', int)
  table.set_type('project_id', int)
  table.set_type('vendor_id', int)
  table.set_type('bid_total_amount', number)
  table.set_type('winner', boolean)
  return table
# This next part is not required, but it's easier to put the example data
# directly in this file so I don't have to worry about directories.
csvdata = '''\
bid_id,project_id,vendor_id,bid_total_amount,winner
1,1,2,81710.7,False
2,1,3,73616.2,True
3,1,8,78459.6,False
4,2,4,80154.2,True
5,2,10,84468.5,False
6,2,13,84499.9,False
7,3,3,79047.6,False
8,3,9,88832.6,True
9,3,6,80644.6,False
10,4,13,74844.8,False
11,4,1,77039.7,False
12,4,14,74475.7,True
13,5,13,88636.9,True
14,5,14,91261.7,False
15,5,1,92006.5,False
18,6,13,74107.6,True
19,7,5,60250.7,True
20,7,15,64108.6,False
21,7,7,64087.9,False
22,8,2,57121.4,True
23,8,6,57733.0,False
24,8,3,57941.8,False
25,9,14,75812.8,True
26,9,10,77389.5,False
30,10,11,80934.1,True
31,11,8,64544.8,False
32,11,3,67933.3,False
33,11,6,63895.0,True
34,12,1,76622.9,False
35,12,4,77277.5,False
36,12,10,72907.8,True
37,13,9,105374.9,True
38,13,3,102772.8,False
39,13,8,104931.2,False
40,14,8,83470.3,False
41,14,6,85833.0,True
42,14,9,85366.0,False
43,15,7,111531.9,False
44,15,5,111454.7,True
47,16,9,90398.3,True
48,16,8,93963.1,False
49,17,7,69701.3,True
50,17,12,72258.4,False
51,17,15,77042.6,False
52,18,6,65980.7,False
53,18,2,62398.9,False
54,18,9,66761.1,True
55,19,1,53137.9,False
56,19,14,52581.8,False
57,19,4,51033.2,True
58,20,10,94882.4,True
59,20,14,96371.4,False
60,20,1,94926.1,False
61,21,14,82492.3,False
62,21,1,74023.4,True
66,22,15,94699.5,True
67,23,13,97517.0,False
68,23,4,93742.7,True
69,23,1,97177.3,False
70,24,11,98032.0,False
71,24,7,95292.9,True
72,24,15,95506.6,False
73,25,12,98206.1,False
74,25,7,99439.2,False
75,25,15,96456.8,True
76,26,5,79598.0,False
77,26,11,78413.6,False
78,26,15,76724.4,True
79,27,6,67505.6,True
80,27,3,68713.5,False
81,27,2,72646.5,False
82,28,11,72019.1,True
85,29,1,104470.7,True
88,30,5,69785.3,False
89,30,15,66948.1,True
90,30,11,68492.1,False
91,31,8,83345.3,True
92,31,3,83426.9,False
95,32,12,84316.8,True
96,32,15,90266.1,False
97,33,13,79932.4,False
98,33,14,73360.4,True
99,33,10,78470.1,False
'''


