####################################################################################
#                                                                                  
# Copyright (c) 2006 Jeffrey Richardson <jeff_richardsonATbyuDOTnet>               
#                                                                                  
# This deteclet is part of the Bid Rigging wizards library.                     
# It requires a license and is not open source.                                    
#                                                                                  
####################################################################################
# UPDATES:
# 10 Jul 2006    Updated script layout (changes to facilitate version and bug tracking)
# 11 MAR 2006  First version of the wizard
#
####################################################################################
#   STATUS:  Needs Example Data, Testing
#
# IDEAS/QUESTIONS
#  
####################################################################################

DETECTLET_STANDARD = 1.0

from picalo import *

wizard = '''
<wizard>
  <page>
    In order to perform this analysis, this wizard
    requires two tables.  The first table should contain
    the general information about each bid (e.g. Bid Number,
    Bidding Vendor, Date Recieved, Bid Total).  
    
    The second table should contain the detailed information 
    about the bids--the line items.  Each bid from the first table
    should have several coresponding records in this table--each 
    containing a sub-bid for every individual item in the bid.
    This table should have columns like Bid Number, Line Item
    Number, Item Bid Amount, and a Unique ID Number.
  </page>
  <page>
    Please select the FIRST TABLE which contains the general bid
    information.  It should look similar to the example input data
    (e.g. Bid Number, Bidding Vendor, Date Recieved, Bid Total), 
    This table should not contain the bid details/bid 
    line items.)
    
    (Example Names: bid, bids, vendor_bids)
    <parameter type="Table" variable="bids_table"/>
  </page>
  <page>
    This first table should contain a record for each bid.  
    
    Which column identifies the BID's ID NUMBER? 
    
    (Examples: Bid_ID, Bid_Num, or Bid_Key)
    <parameter type="Column" table="bids_table" variable="bids_table_bid_id"/>
    
    Which column contains the TOTAL dollar AMOUNT of the bid?
    
    (Examples: Total, Bid_Total, Dollar_Total, Bid_Amount)
    <parameter type="Column" table="bids_table" variable="bids_table_total"/>
  </page>
  <page>
    Please select the SECOND table which contains the detail/line item
    information. (Note: sometimes these are called 'details' and other times
    they are called 'line items'. Both names mean the same thing and
    may be used interchangably.)  It should contain a record for every 
    line item of a each bid.  There should be several records for each 
    one record in the first table.  Columns names should include: 
    Bid Unique ID, line_item number, and line item amount.
    
    (Example Names: bid_line_items, bid_details, bid_line_item_details)
    <parameter type="Table" variable="details_table"/>
  </page>
  <page>
    Second Table: Details/Line Items Table
    
    Which column contains the BID ID (not the line item ID)?
    
    (Examples: Bid_ID, Bid_Num, or Bid_Foreign_Key, Bid_FK)
    <parameter type="Column" table="details_table" variable="details_table_bid_id"/>
    
    Which column contains the AMOUNT of each individual bid item?
    
    (Example: amount, detail_amount, line_item_amount)
    <parameter type="Column" table="details_table" variable="details_table_amount"/>
  </page>
</wizard>
'''

RESULTS_TEXT = '''\
    The displayed table shows each bid with the recorded Total Amount
    and the Actual Total Amount (based on the details).  The last 
    column is the absolute difference between the two.  The bids that
    have the largest difference appear at the top of the table as these
    are the most material.  Differences may occur to fraud; however, some
    difference result from normal business operations such as change orders.
    Some differences can occur because of human error.
'''


def run(bids_table, bids_table_bid_id, bids_table_total, details_table, details_table_bid_id, details_table_amount):
  '''Most organizations require multiple bids to increase competition between
     suppliers.  Purchasers may want to circumvent this control when they
     are getting kickbacks from a certain vendor, have family or friends they
     want to funnel work to, or for many other reasons.
     
     When companies send out a request for bid, they send a detailed form that
     has several line items that are required for a specific bid.  With the 
     same form being sent to all bidding vendors, each bid is easily compared.
     These line items are then added together to get a grand total.  This 
     total is the primary factor in selecting the winning bid.  Sometimes, 
     either because of error or because of fraud, the number input as the
     total amount is not the same number as the added total of all of the 
     line items. 
     
     This wizard analyzes bids and verifies that each bid's total equals 
     the total amount as calculated by adding up all of the line item amounts.
     
     The wizard goes through the following process:
     - Add all of the line item detail amounts for each bid.  
     - Compares the newly calculated amount with the existing total amount.
     - Calculate the difference of these two bids.
     - Sorts the results by the difference.
       
     Projects with a significant difference between the actual total and the 
     recorded total may be fraudulent.
     
  '''
  # validate the data
  #TODO:
  #assert stratifycol != biddercol != lineitemcol != amountcol, 'The stratification, bidder, line item, and amount columns must be different.  Please ensure you haven\'t selected the same column for two of these items.'
  
  # run the analysis
  results = Table([
    ( 'Bid_id',         unicode ),
    ( 'Recorded_total', number ),
    ( 'Actual_total',   number ),
    ( 'Difference',     number ),
  ])
  bids = Grouping.stratify_by_value (details_table, details_table_bid_id)
  for bid_counter, bid in enumerate(bids):
    show_progress('Analyzing...', float(bid_counter) / len(bids))
    recalculated_total = 0.0
    for bid_detail in bid:
      recalculated_total = recalculated_total + bid_detail[details_table_amount]
    rec = results.append()
    rec['Bid_id'] = bid[0][details_table_bid_id]
    rec['Recorded_total'] = Simple.select(bids_table, "str(record['" + bids_table_bid_id + "']) == '" + str(bid[0][details_table_bid_id]) + "'")[0][bids_table_total]
    rec['Actual_total'] = recalculated_total
    rec['Difference'] = round(abs(recalculated_total - rec['Recorded_total']),2)
  Simple.sort(results,False,"Difference")
  return results, RESULTS_TEXT

def example_input():
  import StringIO  # to emulate a file for load_csv
  table = load_csv(StringIO.StringIO(csvdata))
  table.set_type('Contract', int)
  table.set_type('Amount', number)
  return table



# This next part is not required, but it's easier to put the example data
# directly in this file so I don't have to worry about directories.
csvdata = '''\
Contract,Bidder,Item,Amount
1,BidderA,1.1,9908.01
1,BidderA,1.2,4147.38
1,BidderA,1.3,2675.48
1,BidderA,2.1,7612.40
1,BidderA,2.2,4444.31
1,BidderA,3.1,9712.15
1,BidderA,3.2,3784.11
1,BidderB,1.1,11024.03
1,BidderB,1.2,4147.38
1,BidderB,1.3,2675.48
1,BidderB,2.1,7710.04
1,BidderB,2.2,4444.31
1,BidderB,3.1,9712.15
1,BidderB,3.2,3784.11
1,BidderC,1.1,13682.51
1,BidderC,1.2,4147.38
1,BidderC,1.3,2675.48
1,BidderC,2.1,9610.04
1,BidderC,2.2,4444.31
1,BidderC,3.1,9712.15
1,BidderC,3.2,3784.11
'''

