####################################################################################
#                                                                                  
# Copyright (c) 2006 Jeffrey Richardson <jeff_richardsonATbyuDOTnet>               
#                                                                                  
# This deteclet is part of the Bid Rigging wizards library.                     
# It requires a license and is not open source.                                    
#                                                                                  
####################################################################################
# UPDATES
# 10 Jul 2006    Updated script layout (changes to facilitate version and bug tracking)
# 11 MAR 2006  First version of the wizard
#
####################################################################################
# STATUS: Create Example Data
#       note: update the syntax for the type casting of the tables (to accomodate the recent API sytnax changes)          
# 
# 
#   IDEAS/QUESTIONS
#
#
####################################################################################

DETECTLET_STANDARD = 1.0

from picalo import *

wizard = '''
<wizard>
  <page>
    Select the TABLE that contains the information about
    the bids.  A project/contract will contain several bids.
    This table should contain one record for each
    bid.  It should contain a column a Unique Bid Number/ID, 
    a Bid Date, Vendor ID, and a column that identifies bid
    winners.
   <parameter type="Table" variable="table"/>
  </page>
  <page>
    Which column identifies the PROJECT/CONTRACT? 
    
    (Example: Contract_ID, Project_ID, Bid_Group)
    <parameter type="Column" table="table" variable="project_id"/>
    
    Which column identifies the BID's ID NUMBER? 
    
    (Examples: Bid_ID, Bid_Num, or Bid_Key)
    <parameter type="Column" table="table" variable="bid_id"/>
  </page>
  <page> 
    Which column identifies VENDOR? 
    
    (Examples: Vendor_ID, Vendor, Vendor_Key, or Vendor_Foriegn_Key)
    <parameter type="Column" table="table" variable="vendor_id"/>

    Which column contains the DATE the bid was submitted?
    
    (Example: Date, Bid_Date, Date_Submitted)
    <parameter type="Column" table="table" variable="date"/>
  </page>
  <page>  
    Which column identifies the WINNING bids?
    
    (Example: Winner, Contract_Accepted, Winning_Bid)
    <parameter type="Column" table="table" variable="winner"/>
  </page>
</wizard>
'''

RESULTS_TEXT = '''\
    The displayed table shows each vendor and a summary of there bidding
    history.  
    
    The first column identifies each vendor by thier id.  
    
    The second column, "Bid_Count", reports the number of bids submited by 
    the vendor.  
    
    The column "Last_Bid_count" contains a count of the 
    number of times that the vendor submitted the last bid (by date).  
    
    "Percent_Last_Bid" is the percent of times that the vendor's bid was 
    the last to be received (Last_Bid_Count/Bid Count).  
    
    "Win_Count" is the amount of times that the vendor won a bid.  
    
    "Percent_Wins" is the percent of times that the vendor won (Win_Count/Bid_Count).  
    
    "Win_and_Last_Bid_Count" contains the amount of times that the vendor's
    bid was both the last bid to be received and the winner.  
    
    "Percent_Wins_and_Last_Bids" is the percent of how many times that the
    vendor both won a bid while being the last bidder (Win_and_Last_Bid_Count/
    Bid_count).
'''


def run(table, project_id, bid_id, vendor_id, date, winner):
  '''Most organizations require multiple bids to increase competition between
     suppliers.  Purchasers may want to circumvent this control when they
     are getting kickbacks from a certain vendor, have family or friends they
     want to funnel work to, or for many other reasons.  Sometimes, when 
     purchasers want a specific vendor to win, they give that vendor insider
     information.  Generally, this information is about the other bids.  When a
     vendor knows the bid amounts of other vendors, they are able to under bid
     and win contracts.  
     
     The symptoms associated with this type of fraud include vendors who 
     consistently win and are the last bid to be recieved.  
     
     This wizard analyzes the bids and summarizes each vendor's bidding
     history by computing the total number of bids and the percent of times
     that the vendor wins and is the last bid to be received.
     
     The wizard goes through the following process:
     - Stratify the file into a table for each project.
     - Identify the Winning Bid and the Last Bid to be received.
     - Summarize which how many times each vendor:
        1) Is the Winner
        2) Is the last bidder (by date)
        3) Is both the Winner and the last bidder.
     - Compute percentages of the data.
     - Sort the table by the percentage of both winner and last bidder.
     
     Vendors who are consistantly the last bidder and winner may be receiving
     insider information.     
  '''
  # validate the data
  #TODO:
  #assert stratifycol != biddercol != lineitemcol != amountcol, 'The stratification, bidder, line item, and amount columns must be different.  Please ensure you haven\'t selected the same column for two of these items.'
  
  # run the analysis
  results = Table([
    ( 'vendor_id',                  unicode ),
    ( 'Bid_Count',                  int     ),
    ( 'Last_Bid_Count',             int     ),
    ( 'Percent_Last_Bids',          number  ),
    ( 'Win_Count',                  int     ),
    ( 'Percent_Wins',               number  ),
    ( 'Win_and_Last_Bid_Count',     int     ),
    ( 'Percent_Wins_and_Last_Bids', number  ),
  ])
  projects = Grouping.stratify_by_value (table, project_id)
  vendors_bids = Grouping.stratify_by_value(table,vendor_id) # TODO More efficient way to get the distinct Vendors
  
  tempTable = Table([
    ( "project_id",  unicode ),
    ( "winner_id",   unicode ),
    ( "last_bidder", unicode ),
    ( "win",         unicode ),
  ])
  for project in projects:
    rec = tempTable.append()
    rec["project_id"] = project[0][project_id]
    for bid_instance in project:
      Simple.sort(project, False, date)
      rec["last_bidder"] = project[0][vendor_id]
      Simple.sort(project, False, winner)
      rec["winner_id"] = project[0][vendor_id]
      rec["win"] = project[0][winner]
    
  for vendor_counter, ven_bids in enumerate(vendors_bids):
    show_progress('Analyzing...', float(vendor_counter) / len(vendors_bids))
    winCounter = 0
    lastBidCounter = 0
    winAndLastCounter = 0
    for project_record in tempTable:
      if project_record["winner_id"] == ven_bids[0][vendor_id]:
        winCounter += 1
      if project_record ["last_bidder"] == ven_bids[0][vendor_id]:
        lastBidCounter += 1
      if (project_record["winner_id"] == ven_bids[0][vendor_id]) & (project_record ["last_bidder"] == ven_bids[0][vendor_id]):
        winAndLastCounter  += 1
    rec = results.append()
    rec['vendor_id'] = ven_bids[0][vendor_id]
    rec['Bid_Count'] = len(ven_bids)
    rec['Last_Bid_Count'] = lastBidCounter
    rec['Win_Count'] = winCounter
    rec['Win_and_Last_Bid_Count'] = winAndLastCounter
    try: 
      percent_last = (float(lastBidCounter) / rec['Bid_Count']) * 100 
    except: 
      percent_last  = 0
    try:
      percent_wins = (float(winCounter) / rec['Bid_Count']) * 100
    except:
      percent_wins = 0
    try:
      percentage_win_and_last = (float(winAndLastCounter) / rec['Bid_Count']) * 100
    except:
      percentage_win_and_last = 0 
      
    rec['Percent_Last_Bids'] = percent_last 
    rec['Percent_Wins'] = percent_wins 
    rec['Percent_Wins_and_Last_Bids'] = percentage_win_and_last
    
  Simple.sort(results,False,"Percent_Wins_and_Last_Bids")
  return results, RESULTS_TEXT

def example_input():
  import StringIO  # to emulate a file for load_csv
  table = load_csv(StringIO.StringIO(csvdata))
  table.set_type('Contract', int)
  table.set_type('Amount', number)
  return table



# This next part is not required, but it's easier to put the example data
# directly in this file so I don't have to worry about directories.
csvdata = '''\
Contract,Bidder,Item,Amount
1,BidderA,1.1,9908.01
1,BidderA,1.2,4147.38
1,BidderA,1.3,2675.48
1,BidderA,2.1,7612.40
1,BidderA,2.2,4444.31
1,BidderA,3.1,9712.15
1,BidderA,3.2,3784.11
1,BidderB,1.1,11024.03
1,BidderB,1.2,4147.38
1,BidderB,1.3,2675.48
1,BidderB,2.1,7710.04
1,BidderB,2.2,4444.31
1,BidderB,3.1,9712.15
1,BidderB,3.2,3784.11
1,BidderC,1.1,13682.51
1,BidderC,1.2,4147.38
1,BidderC,1.3,2675.48
1,BidderC,2.1,9610.04
1,BidderC,2.2,4444.31
1,BidderC,3.1,9712.15
1,BidderC,3.2,3784.11
'''

