####################################################################################
#                             
# Copyright (c) 2006 Jeffrey Richardson <jeff_richardsonATbyuDOTnet>               
#                                                                                  					  
# This deteclet is part of the Bid Rigging wizards library.                     		
# It requires a license and is not open source.                                    		       
#                                                                                  
####################################################################################
# UPDATES
# 10 Jul 2006    Updated script layout (changes to facilitate version and bug tracking)
# 11 MAR 2006  First version of the wizard
#
####################################################################################
# STATUS: PRODUCTION
# 
#   IDEAS/QUESTIONS/NOTES:
#
#
####################################################################################
DETECTLET_STANDARD = 1.0

from picalo import *

wizard = '''
<wizard>
  <page>
    Please select which TABLE contains the bidding data.
    It should look similar to the example input data (see
    the previous page), and it should have columns for
    the bid unique id, contract id, bidder, and total amount. 
    (NOTE: this table should not contain the bid details/bid 
    line items.  Rather, it should contain the summation of
    these items.)
    
    <parameter type="Table" variable="table"/>
  </page>
  <page>
    Your input table should contain a record for each bid for a
    contract/project.  
    
    Which column identifies the PROJECT/CONTRACT to which the bid pertains?
    
    (Example: Contract_ID, Project_ID, Bid_Group)
    <parameter type="Column" table="table" variable="contract_id_col"/>
  </page>
  <page>
    Which column identifies the BID's ID NUMBER?  This id should 
    uniquely identify each bid.
    
    (Examples: Bid_ID, Bid_Num, or Bid_Key)
    <parameter type="Column" table="table" variable="bid_id_col"/>
    
    Which column identifies VENDOR? 
    
    (Examples: Vendor_ID, Vendor, Vendor_Key, or Vendor_Foriegn_Key)
    <parameter type="Column" table="table" variable="vendor_id_col"/>
    
  </page>
  <page>
    Which column contains the TOTAL dollar AMOUNT of the bid?
    
    (Examples: Total, Bid_Total, Dollar_Total, Bid_Amount)
    <parameter type="Column" table="table" variable="amount_col"/>
    
    Which column identifies the WINNING bids?
    
    (Example: Winner, Contract_Accepted, Winning_Bid)
    <parameter type="Column" table="table" variable="winner_col"/>
  </page>
</wizard>
'''

RESULTS_TEXT = '''\
    The displayed table lists every vendor that has submitted at least one
    bid.  The vendor is identified in the "Vendor" column.  The next column,
    "Bids_Count", displays the amount of bids submitted by the vendor.  
    "Lowest_Count" contains amount of times that vendor submitted the lowest
    bid.  "Percent_lowest" is the percent of times that the vendor's bid was
    the lowest (Lowest_Count/Bids_Count).  "Wins_Count" is the number of times
    that the vendor won a contract.  "Perecent_wins" contains the percent of wins
    to submitted bids (Wins_Count/Lowest_Count).  "Lowest_and_Winner_count" is the
    count of how many times the vendor's bid was both the lowest and the winner. 
    "Percentage_win_and_lowest" is the percentage of times the vendor's bids were 
    both the lowest and the winner (Percentage_win_and_lowest/Bids_Count).
'''


def run(table, contract_id_col, vendor_id_col, bid_id_col, amount_col, winner_col):
  '''Most organizations require multiple bids to increase competition between
     suppliers.  Purchasers may want to circumvent this control when they
     are getting kickbacks from a certain vendor, have family or friends they
     want to funnel work to, or for many other reasons.
     
     Generally, one would expect to see a variety of different vendors 
     winning bids.  Normally, price is the largest factor in determining
     bid winners.  A specific vendor would not likely win every bid.  However,
     if a vendor was winning bids where it was not the lowest bid, or if the 
     vendor was winning every bid, it could be due to fraud.  
     
     This wizard analyzes bids and summarizes each Vendors bid positions--
     amount of total bids, percent of lowest bids, percent of wins to total bids,
     percent of times where the bid was both the lowest and the winner.
     
     The wizard goes through the following process:
     - Stratifies the file into a table for each project.  
     - Stratifies the file into a table for each vendor containing all of its bids
     - Calculates the number of times the vendor won, was the lowest, and how many bids it made
     - Calculates a percentage for each.
     - Sorts the results by the percentage.
  '''
  # validate the data
  #TODO:
  #assert stratifycol != biddercol != lineitemcol != amountcol, 'The stratification, bidder, line item, and amount columns must be different.  Please ensure you haven\'t selected the same column for two of these items.'
  
  # run the analysis
  results = Table([
    ( 'Vendor',                    unicode),
    ( 'Bids_Count',                int),
    ( 'Lowest_count',              int),
    ( 'Percent_lowest',            number),
    ( 'Wins_counts',               int),
    ( 'Percent_wins',              number),
    ( 'Lowest_and_Winner_count',   int),
    ( 'Percentage_win_and_lowest', number),
  ])
  Simple.sort(table, True, contract_id_col, amount_col)
  contract_bids = Grouping.stratify_by_value(table,contract_id_col)
  vendors_bids = Grouping.stratify_by_value(table,vendor_id_col)
  for vendor_counter, ven_bids in enumerate(vendors_bids):
    show_progress('Analyzing...', float(vendor_counter) / len(vendors_bids))
    
    #calculate the number of times that the vendor placed the lowest bid
    counter = 0
    for contract in contract_bids:
      if contract[0][vendor_id_col] == ven_bids[0][vendor_id_col]:
        counter +=1
    count_of_lowest = counter
    
    #calculate the number of times that the vendor placed the lowest bid and won
    counter = 0 
    for contract in contract_bids:
      if (contract[0][vendor_id_col] == ven_bids[0][vendor_id_col]) & (contract[0][winner_col] == "True"):
        counter +=1
    count_of_lowest_and_winner = counter
    
    rec = results.append()
    rec['Vendor'] = ven_bids[0][vendor_id_col]
    rec['Bids_Count'] = len(ven_bids)
    rec['Lowest_count'] = count_of_lowest    
    rec['Wins_counts'] = len(Simple.select(ven_bids, "record['" + winner_col + "']=='True'")) 
    rec['Lowest_and_Winner_count'] = count_of_lowest_and_winner
    
    #verify no zero denominator
    try: 
      percent_wins = (float(rec['Wins_counts']) / float(rec['Bids_Count'])) * 100
    except: 
      percent_wins = 0
    try:
      percent_lowest = (float(count_of_lowest) / float(rec['Bids_Count'])) * 100 
    except:
      percent_lowest  = 0
    try:
      percentage_win_and_lowest = (float(count_of_lowest_and_winner) / float(rec['Bids_Count'])) * 100
    except:
      percentage_win_and_lowest = 0 
      
    rec['Percent_wins'] = percent_wins
    rec['Percent_lowest'] = percent_lowest
    rec['Percentage_win_and_lowest'] = percentage_win_and_lowest
    
  Simple.sort(results, False, 'Percentage_win_and_lowest')
  return results, RESULTS_TEXT

  
def example_input():
  import StringIO  # to emulate a file for load_csv
  table = load_csv(StringIO.StringIO(csvdata))
  table.set_type('Contract', int)
  table.set_type('Winner', boolean)
  table.set_type('Amount', number)
  return table



# This next part is not required, but it's easier to put the example data
# directly in this file so I don't have to worry about directories.
csvdata = '''\
Contract,Winner,Bidder,Item,Amount
1,True,BidderA,1.1,9908.01
1,True,BidderA,1.2,4147.38
1,True,BidderA,1.3,2675.48
1,True,BidderA,2.1,7612.40
1,True,BidderA,2.2,4444.31
1,True,BidderA,3.1,9712.15
1,True,BidderA,3.2,3784.11
1,False,BidderB,1.1,11024.03
1,False,BidderB,1.2,4147.38
1,False,BidderB,1.3,2675.48
1,False,BidderB,2.1,7710.04
1,False,BidderB,2.2,4444.31
1,False,BidderB,3.1,9712.15
1,False,BidderB,3.2,3784.11
1,False,BidderC,1.1,13682.51
1,False,BidderC,1.2,4147.38
1,False,BidderC,1.3,2675.48
1,False,BidderC,2.1,9610.04
1,False,BidderC,2.2,4444.31
1,False,BidderC,3.1,9712.15
1,False,BidderC,3.2,3784.11
'''

