###################################################################
#
# Author: Dan Austin
#
###################################################################
# Current Version: 1.0
###################################################################
# Version Notes
#
# 4/6/2008 - Version 1.0 complete
#
###################################################################

DETECTLET_STANDARD = 1.0

from picalo import *
wizard = '''
<wizard>
  <page>
    Select the table containing bidding data.

    This table should have columns for products, vendors, and amounts.
    
    <parameter type="Table" variable="table"/>
  </page>
  <page>
    Select the column containing product IDs:

    Your input table should contain a record of products.
    This detectlet will stratify the input table into tables
    arranged by product types.
    
    For example, if the products were scissors, paper, pencils,
    and toner, the detectlet would stratify the input table into
    four separate groups, scissors, paper, pencils, and toner.
    This stratification will allow the detectlet to compare the
    pricing on similar products
    
    <parameter type="column" table="table" variable="productCol"/>
  </page>
  <page>
    Select the column containing vendor IDs:

    Your table should contain a column indicating the vendor who each
    product was purchased through.  This information will allow the
    detectlet to display possible corrupt vendors.
  
    <parameter type="column" table="table" variable="vendorCol"/>
  </page>
  <page>
    Select the column containing product prices:

    Your table should contain a column indicating the purchase price
    of each product.  The detectlet will use this price to determine
    products have prices outside of the acceptable range.
    
    <parameter type="column" table="table" variable="priceCol"/>
  </page>
  <page>    
    Enter the allowable percentage over the mean (do not include the % sign) into the 
    box below:

    In order to identify outliers, please specify an allowable percentage
    over the mean price.  For example, if the average price for a specific product
    was $100, and the allowable percentage was 10%, the detectlet would flag
    any product costing more than $110 (10% over $100).
    
    With the sample data, using 15% will reveal that vendor 5 has several products
    outside of that range.
    
    <parameter type="int" min="0" variable="allowPercent"/>
  </page>
</wizard>
'''
RESULTS_TEXT = '''\
    The displayed table shows all of the products that have prices outside
    of the acceptable range. The table indicates the product (by ID), the
    vendor (by ID) and the purchase price of the product.
    
    Vendors that consistantly show up on this table should be investigated.
    
    If a specific employee is approving these bids, he or she may be receiving
    kick-backs or be working with a specific vendor to guarntee them bids.
'''

def run(table, productCol, vendorCol, priceCol, allowPercent):
  '''Most organizations require multiple bids to increase competition between
     suppliers.  Having this type of competition keeps prices low and 
     increases quality.  However, for small projects, multiple bids are not necessary.  Organizations will fix
     a limit or threshold as to how much a project can cost before multiple
     bids are required.  
     
     Purchasers (who work for the organization) may want to 
     circumvent this control when they are getting kickbacks from a certain 
     vendor, have family or friends they want to funnel work to, or for 
     many other reasons.  The simplest way to circumvent this control is to
     simply ignore it.
     
     This detectlet compares products of the same category and looks for any
     extreme pricing differences between them.
     
     This detectlet goes through the following process:
     - Stratifies the table by products
     - Calculates an average cost in each product category
     - Calculates the highest price allowed before a product will be flagged
         by multiplying the mean price with the over-the-mean-allowable percentage
     - Compares each product price with the highest price allowed
     - Displays the results
     
     Companies consinstently showing up with prices outside of what would be
     expected may be working with an employee to prevent proper bidding.
  '''
  
  # validate the data
  assert productCol != vendorCol != priceCol, 'The Product, Vendor, and Purchase Price columns must be different.  Please ensure you haven\'t selected the same column for two of these items.'  
  
  # run the analysis
  results = Table([
    ( 'productID',        unicode ),
    ( 'vendorID',         unicode ),
    ( 'productPrice',     currency ),
  ])
  products = Grouping.stratify_by_value(table, productCol)
  for product in products:
    totalPrice = 0.00
    count = 0.00
    
    # Calculates the average price for a product
    for lineitem in product:
      totalPrice = totalPrice + lineitem[priceCol]
      count = count + 1

    mean = totalPrice / count
    
    # Calculates the maximum allowable price
    maxPrice = (mean * (1.0 + allowPercent / 100.0))

    #Finds all products whose price is above the maximum allowable price
    for lineitem in product:
      if lineitem[priceCol] > maxPrice:
        rec = results.append()
        rec['productID'] = lineitem[productCol]
        rec['vendorID'] = lineitem[vendorCol]
        rec['productPrice'] = lineitem[priceCol]
        
  #Sorts the results by vendor
  Simple.sort(results, True, vendorCol)
  
  return results, RESULTS_TEXT

  
def example_input():
  import StringIO  # to emulate a file for load_csv
  table = load_csv(StringIO.StringIO(csvdata))
  table.set_type('productID',      unicode)
  table.set_type('vendorID',       unicode)
  table.set_type('productPrice',   currency)
  return table

# Sample Data
csvdata = '''\
productID,vendorID,productPrice
4,1,15123.07
3,2,1032.35
5,5,75.22
3,1,1070.76
3,1,1357.52
2,1,104.81
4,4,15056.95
1,1,29.39
4,5,15945.79
1,5,28.79
3,4,1370.74
1,2,29.17
5,5,93.18
1,1,22.7
1,3,25.2
1,5,62.18
1,3,30.24
5,5,33.14
4,1,16467.18
5,2,22.07
3,3,1476.54
4,2,17206.98
2,1,174.03
2,5,198.12
4,2,16894.13
5,4,22.2
1,4,28.83
1,3,20.99
5,3,18.75
5,3,20.34
2,4,153.07
5,5,33.36
3,1,1127.39
1,3,26.2
3,2,1065.16
1,4,24.95
3,5,1562.31
2,4,151.62
2,2,162.22
4,1,17175.62
2,4,120.37
5,1,23.04
2,2,111.24
2,2,134.27
1,4,20.38
3,2,1005.39
1,5,45.34
5,2,23.66
4,3,16304.33
3,4,1027.73
3,3,1419.05
3,5,1815.4
5,3,20.15
3,5,1556.94
4,1,15003.4
3,4,1433.53
2,5,208.12
5,3,20.91
5,3,19.64
2,2,157.95
3,2,1151.31
1,4,27.25
1,3,29.39
4,3,15274.52
1,3,23.36
3,4,1429.85
3,5,1318.59
2,5,170.95
5,2,23.52
1,4,27.97
4,4,15818.25
3,3,1268.68
5,1,22.22
3,4,1000.76
1,2,21.2
4,1,15850.96
5,3,18.65
4,5,17061.27
1,1,28.6
5,5,42.27
1,1,21.6
5,1,22.94
4,4,16530.51
1,4,21.42
4,3,16862.86
5,3,21.15
5,3,20.74
1,2,22.39
4,4,17435.99
1,1,25.37
4,4,15715.06
3,1,1026.27
2,4,132.07
4,2,15775.86
1,2,21.34
5,3,20
1,4,22.19
4,2,16552.7
3,1,1417.9
'''