####################################################################################
#                                                                                  
# Copyright (c) 2008 Travis Ringger <tringgerATgmailDOTcom>               
#                                                                                  
# This deteclet is part of the Bid Rigging wizards library.                     
# It does not require a license and is completely open source.                                    
#                                                                                  
####################################################################################
# UPDATES:
#
#  April 22, 2008  First version of the wizard
#  June 13, 2008   Updated the descriptions and added code for the progress analyzer
#
# STATUS: Reviewed and Approved, Matt Hillary, June 2008
# 
# 
# IDEAS/QUESTIONS:
#
#
####################################################################################

DETECTLET_STANDARD = 1.0

from picalo import *                            # import the Picalo libraries
import sys, re, random, os, os.path, urllib     # import commonly-used Python libraries

wizard = '''
<wizard>
  <page> 
    A procurement record will typically be created for each vendor  
    that keeps track of the contract, the item id's, and the bid amount.
    Select the TABLE that contains the contract id, item id, and bid amount.
   <parameter type="Table" variable="table"/>
  </page>
  <page>
    Which column identifies the CONTRACT? 
    
    (Example: contract, CONTRACT_ID)
    <parameter type="Column" table="table" variable="contract"/>

    Which column identifies ITEM ID?
    
    (Examples: UNSPSC, item_id, item_desc)
    <parameter type="Column" table="table" variable="item"/>    
  </page>
  <page> 

    Which column contains the BID AMOUNT? 

    (Example: bid, amt, bid_amount)
    <parameter type="Column" table="table" variable="bid"/>
  </page>
  <page>  
    What number (a positive number, in decimal form) would you like to use as the 
    z-score to determine an excessively high wage rate?  For example, to get the 
    top 10% and bottom 10% of wage rates, use the Standard Normal Table to find 
    1.28 as the z-score.
    
    (Example: 0.50, 1.5)
    <parameter type="float" variable="z_score" default="1.5"/>
  </page>
</wizard>
'''

RESULTS_TEXT = '''\
    The results table shows the contract ID, item ID, and the bid amount.
    The table is sorted by item ID.
    
    Analyze the table by looking at each item id in the list. For each item, look at 
    the the bid amounts for each contract. Notice the variance in the bid amounts. If 
    the bid amount differ by a large degree, you may want to investigate. 
'''


def run(table, contract, item, bid, z_score):
    '''This Detectlet uses a z-score analysis to detect the difference in prices 
    of similar products on different contracts. Several similar items appear on 
    different contracts. The items from each contract can be compared to each other 
    to see if prices per item vary greatly from contract to contract. 
     
    Items with large variances are clearly reasons for concern. A z-score 
    is used to find those variances across several different contracts. 
    '''
 
    # validate the data
    assert contract != item != bid != z_score, '''The values you selected for 
            contract, item, bid, and/or z-score are not unique. Please 
            go back and select unique columns for these values from the table.'''
    assert z_score > 0, '''The z-score must be greater than zero.  Negative values of the z-Score 
            are taken into consideration.  All that is required is one positive value.'''

    counter = 0

    groups = Grouping.stratify_by_value(table, item)
    tlist = []  
    for item in groups:
        outliers = Simple.select_outliers_z(item, bid, z_score)
        counter += 1
        show_progress('Analyzing...', float(counter) / len(groups))

        if (len(item) == 1):
            continue
        else:
            tlist.append(item)
    
    results = TableArray(tlist).combine()


    return results, RESULTS_TEXT
  
  
# load the sample data, if the user requests it
def example_input():
    import StringIO  # to emulate a file for load_csv
    table = load_csv(StringIO.StringIO(csvdata))
    table.set_type('Contract_ID', int)
    table.set_type('Item_ID', int)
    table.set_type('Bid_Amount', number)
    return table



# this is the sample data that will show if the user asks for it
csvdata = '''\
Contract_ID,Item_ID,Bid_Amount
1,10000003,5000
1,10000001,485
1,10000005,3500
1,10000009,2000
1,10000008,1000
1,10000007,950
1,10000006,7050
1,10000099,5500
1,11000003,7600
1,10000052,5100
1,10000011,1600
1,10000039,560
1,10000026,14500
1,10000022,1300
2,10000004,900
2,10000005,8900
2,10000003,4500
2,10000010,15000
2,10000015,11500
2,10000018,500
3,10000015,1200
3,10000025,14700
3,10000038,550
3,10000001,1500
3,10000002,5000
3,10000003,5500
3,10000019,1200
3,10000022,1200
3,10000026,14700
3,10000039,550
3,10000011,1500
3,10000052,5000
3,11000003,7500
3,10000099,5500
4,10000015,1200
4,10000026,14700
4,10000038,2000
4,10000001,1500
4,10000002,5000
4,10000003,5500
4,10000019,1200
4,10000023,1200
4,10000026,14700
4,10000040,550
4,10000011,3500
4,10000052,5000
4,11000003,7500
4,10000099,5500
'''



