####################################################################################
#                                                                                  #
# Copyright (c) 2008 Aaron Hardy <http://aaronhardy.com>                           #
#                                                                                  #
# This file is part of Picalo.                                                     #
#                                                                                  #
# Picalo is free software; you can redistribute it and/or modify                   #
# it under the terms of the GNU General Public License as published by             #
# the Free Software Foundation; either version 2 of the License, or                #
# (at your option) any later version.                                              #
#                                                                                  #
# Picalo is distributed in the hope that it will be useful,                        #
# but WITHOUT ANY WARRANTY; without even the implied warranty of                   #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the                    #
# GNU General Public License for more details.                                     #
#                                                                                  #
# You should have received a copy of the GNU General Public License                #
# along with Foobar; if not, write to the Free Software                            #
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA        #
#                                                                                  #
####################################################################################
#
# This detectlet detects significant returns on a given product.
#
# 09 APR 2008  First version of the wizard
#
####################################################################################

DETECTLET_STANDARD = 1.0

from picalo import *

wizard = '''
<wizard>
  <page>
    Select the TABLE containing product PURCHASE data:

    It should at least have columns for the product identifier 
    (e.g., PartNumber, ProductCode) and the quantity PURCHASED.
    <parameter type="Table" variable="purchases_table"/>
  </page>
  <page>
    Select the COLUMN in the purchases table referring to 
    the UNIQUE IDENTIFIER of the various products (e.g., PartNumber, ProductCode):

    <parameter type="Column" table="purchases_table" variable="purchases_product_col"/>
  
    Select the COLUMN in the purchases table referring to 
    the QUANTITY PURCHASED:
    
    <parameter type="Column" table="purchases_table" variable="purchases_quantity_col"/>
  </page>
  <page>
    Select the TABLE containing product RETURN data:
    
    It should at least have columns for the product identifier 
    (e.g., PartNumber, ProductCode) and the quantity RETURNED.
    <parameter type="Table" variable="returns_table"/>
  </page>
  <page>
    Select the COLUMN in the returns table referring to 
    the UNIQUE IDENTIFIER of the various products (e.g., PartNumber, ProductCode):

    <parameter type="Column" table="returns_table" variable="returns_product_col"/>
  
    Select the COLUMN in the returns table referring to 
    the QUANTITY RETURNED:
    
    <parameter type="Column" table="returns_table" variable="returns_quantity_col"/>
  </page>
  <page>
    Specify a return percentage threshold between 1 and 100.
    Any product having a return rate higher than the specified return
    percentage threshold will be reported in the results.  A high
    threshold will likely produce fewer results than a low threshold.
    
    Example: If 1000 units of WidgetA were purchased, 300 units of WidgetA were returned, 
    and your percentage threshold was 25, WidgetA would be displayed
    in the detectlet results. Thirty percent of WidgetA purchases resulted
    in returns.  If only 200 units of WidgetA were returned, WidgetA
    would not be displayed in the detectlet results because only 20 percent
    of WidgetA purchases resulted in returns.
    <parameter type="int" variable="percentage_threshold" min="1" max="100" default="15"/>
  </page>
</wizard>
'''

def run(
    purchases_table,
    purchases_product_col,
    purchases_quantity_col,
    returns_table,
    returns_product_col,
    returns_quantity_col,
    percentage_threshold):
  '''Significant returns on purchased products can be one of the many
     indicators of fraud.  While not a strong indicator, it
     can help pinpoint problem areas.
     
     To use this detectlet, you will need both a "purchases" table and a
     "returns" table.
  '''
  
  import types
  
  # Validate the data
  assert purchases_product_col != purchases_quantity_col != returns_product_col != returns_quantity_col, \
      'The columns selected must all be different.  \
      Please ensure you haven\'t selected the same column for two items.'
  assert isinstance(purchases_table, Table), 'Please select a valid Picalo table'
  assert isinstance(returns_table, Table), 'Please select a valid Picalo table'
  assert purchases_table.column(purchases_quantity_col).get_type() in (types.IntType, types.FloatType, types.LongType), \
      'The quantity purchased column must be a number type.'
  assert returns_table.column(returns_quantity_col).get_type() in (types.IntType, types.FloatType, types.LongType), \
      'The quantity returned column must be a number type.'
  assert isinstance(percentage_threshold, types.IntType), \
      'The percentage threshold must be an integer.'
  
  # Summarize products by quantity purchased.
  purchases_summed = Grouping.summarize_by_value(purchases_table, purchases_product_col, \
      TotalPurchased="sum(group['" + purchases_quantity_col + "'])")
  
  # Summarize products by quantity returned.
  returns_summed = Grouping.summarize_by_value(returns_table, returns_product_col, \
      TotalReturned="sum(group['" + returns_quantity_col + "'])")
  
  # Join the summarized tables where the product's return percentage
  # is greater than the user-defined percentage threshold.
  results = Simple.join(purchases_summed, returns_summed, \
      "record1." + purchases_product_col + " == record2." + returns_product_col + \
      " and (record1.TotalPurchased * " + str(percentage_threshold) + " / 100) < record2.TotalReturned")
  
  # Set the data types for the "TotalPurchased and "TotalReturned" columns 
  # to numbers so we can evaluate them in a calculated column.
  results.set_type(1, number)
  results.set_format(1, "\"%0.2f\" % value")
  results.set_type(3, number)
  results.set_format(3, "\"%0.2f\" % value")
  
  # Create a calculated column to show the user the exact percentage
  # returned for each product on the results table
  results.insert_calculated(4, "PercentReturned", "record['TotalReturned'] / record['TotalPurchased'] * 100")
  results.set_type(4, number)
  results.set_format(4, "\"%0.2f\" % value")
  
  # Return results
  return results, RETURN_TEXT

RETURN_TEXT = '''
The results table contains products that have a return percentage
(quantity returned / quantity purchased) greater then the
specified percentage threshold.  The quantity purchased, quantity
returned, and return percentage are also reported.  These
returns should be investigated further to verify they
were legitimate.
'''

def example_input():
  import StringIO  # to emulate a file for load_csv
  purchases_table = load_csv(StringIO.StringIO(CSV_PURCHASES_DATA))
  purchases_table.set_type('PONumber', int)
  purchases_table.set_type('PartNumber', int)
  purchases_table.set_type('Quantity', int)
  purchases_table.set_type('Price', currency)
  
  returns_table = load_csv(StringIO.StringIO(CSV_RETURNS_DATA))
  returns_table.set_type('ReturnID', int)
  returns_table.set_type('PartNumber', int)
  returns_table.set_type('Quantity', int)
  returns_table.set_type('Price', currency)
  return [purchases_table, returns_table]

CSV_PURCHASES_DATA = '''\
PONumber,PartNumber,Quantity,Price
9044,9763,230,15.50
9045,4231,230,8.61
9046,9221,280,4.95
9047,1234,240,4.32
9048,9763,330,15.50
9049,3425,110,2.73
9050,1501,210,15.54
9050,1502,840,19.53
9050,1503,440,21.63
9051,7310,310,43.05
9052,3425,410,2.40
9053,1501,320,12.74
9053,1502,1200,20.58
9053,1503,640,21.63
9054,1448,680,6.83
9054,1501,320,15.40
9054,1502,1320,19.74
9055,9671,420,9.10
9056,4231,280,8.86
9057,6780,1560,12.00
9057,6781,1550,12.84
9058,1234,410,4.51
9059,6780,1220,12.60
9059,6781,1240,13.08
9060,3425,100,2.28
9061,6780,1100,12.84
9061,6781,1050,12.00
9062,9763,460,18.60
9063,1448,240,7.28
9063,1470,640,3.01
9063,1501,220,14.70
9064,1501,280,13.44
9064,1502,1050,19.32
9064,1503,44,21.21
9065,9671,230,10.90
9066,9671,390,10.90
9067,3425,110,2.45
9068,1501,220,14.98
9068,1502,800,19.53
9068,1503,480,20.79
9069,9763,850,18.60
9070,3425,420,2.68
9071,1234,200,4.78
9072,9671,310,13.10
9073,1234,320,4.65
9074,1234,210,4.55
9075,1470,1200,3.14
9075,1503,680,22.89
9075,9483,330,4.60
9076,9763,350,22.30
9077,3425,100,2.58
9078,1448,700,6.98
9078,1470,1400,3.55
9078,1501,300,13.02
9079,6780,1120,12.36
9079,6781,1110,10.92
9080,4231,360,7.54
9081,6780,1580,12.12
9081,9221,1510,4.85
9082,9763,290,22.30
9083,6780,1310,12.60
9083,6781,1320,10.92
9084,7310,310,40.18
9085,9671,410,13.10
9086,6780,960,12.36
9086,6781,950,12.00
9087,7310,100,38.13
9088,1234,210,5.11
9089,1234,480,4.88
9090,1501,340,14.98
9090,1502,1320,19.53
9090,1503,680,23.31
9091,6780,1240,12.00
9091,6781,1250,11.52
9092,9763,220,26.70
9093,1448,400,7.65
9093,1502,1000,22.26
9093,9483,240,4.90
9094,7522,100,28.80
9095,7310,800,44.69
9096,9483,370,4.70
9097,1234,220,5.06
9098,1470,1280,3.58
9098,1503,680,21.00
9098,9483,340,5.50
9099,9763,110,26.70
9100,1234,310,4.23
9101,6780,1610,11.76
9101,6781,1500,11.16
9102,1448,440,8.18
9102,1501,240,12.88
9102,1502,840,22.47
9103,4231,380,9.18
9104,1470,1320,3.49
9104,1501,340,14.84
9104,1503,620,22.89
9105,7310,410,44.69
9106,1234,230,4.37
9107,1234,440,5.15
9108,1234,210,4.46
9109,9221,320,5.10
9110,1448,780,8.25
9110,1502,1560,19.74
9110,9483,380,5.30
9111,9483,230,4.80
9112,9671,410,15.70
9113,1234,300,4.69
9114,3425,100,2.43
9115,4231,280,9.10
9116,6780,1350,11.04
9116,6781,1390,11.64
9117,6780,1660,11.16
9117,6781,1540,11.64
9118,7522,410,31.50
9119,9671,360,15.70
9120,7310,100,44.69
9121,6780,1250,13.32
9121,6781,850,13.08
9122,9763,350,32.00
9123,1501,250,15.40
9123,1502,920,21.42
9123,1503,420,22.47
9124,9763,425,32.00
9125,1448,780,7.35
9125,1470,1560,3.52
9125,1501,370,14.98
9126,1234,240,4.69
9127,6780,1520,12.84
9127,6781,1670,13.32
9128,1234,470,4.78
9129,1367,220,5.88
9130,3425,100,2.60
9131,7522,400,29.40
9132,9763,420,38.40
9133,6781,1290,11.16
9133,9221,1230,5.50
9134,1470,1280,3.58
9134,1503,640,19.95
9134,9483,330,4.75
9135,6780,1000,11.52
9135,6781,1000,12.96
9136,9763,440,38.40
9137,1367,320,5.14
9138,9671,370,18.80
9139,7522,300,31.20
9140,3425,100,2.58
9141,4231,410,8.77
9142,1501,370,14.14
9142,1502,1440,23.31
9142,1503,720,21.84
9143,1367,220,5.25
9144,9671,280,18.80
9145,3425,410,2.63
9146,1448,740,7.88
9146,1501,400,14.84
9146,1502,1560,22.47
9147,1501,220,14.00
9147,1502,880,20.79
9147,1503,440,21.42
9149,3425,410,2.48
9150,7522,810,31.20
9151,1367,450,5.57
9152,9763,400,46.10
9153,6780,1630,12.24
9153,6781,1550,12.24
9154,6780,1300,12.48
9154,6781,1330,11.88
9155,1367,250,5.83
9156,9763,400,46.10
9157,3425,110,2.55
9158,1470,1520,3.14
9158,1503,720,20.79
9158,9483,360,4.75
9159,7522,110,30.60
9160,3425,420,2.68
9161,1503,700,21.63
9162,6780,1340,11.52
9162,6781,1520,13.32
9163,1367,210,5.94
9164,3425,100,2.38
9165,1367,490,4.98
9166,1367,200,5.09
9167,4231,300,8.53
9168,9763,330,55.30
9169,4231,220,8.69
9170,4231,280,7.54
9171,1470,1600,3.39
9171,1503,800,21.63
9171,9483,360,4.70
9172,1501,340,14.70
9173,9763,320,55.30
9174,7522,210,32.40
9175,3425,400,2.50
9176,9671,110,22.50
9177,1367,330,5.57
9178,1470,1280,2.91
9178,1501,320,15.54
9178,1503,680,22.47
9179,6781,1660,12.84
9179,9221,1530,4.55
9180,4231,360,8.77
9181,9671,220,22.50
9182,9763,530,66.40
9183,1501,360,13.86
9183,1502,1600,19.95
9183,1503,780,21.63
9184,1501,250,13.16
9185,9671,490,27.00
9186,7310,230,41.41
9187,9763,550,66.40
9188,9671,220,27.00
9189,6780,1330,12.96
9189,6781,1290,12.24
9190,1448,1350,7.43
9191,6780,1220,10.92
9191,6781,1050,12.24'''

CSV_RETURNS_DATA = '''\
ReturnID,PartNumber,Quantity,Price
12,1470,100,$3.14
13,1503,680,$22.89
14,4231,114,$8.86
15,9483,57,$4.70
16,9483,40,$4.70
17,7310,620,$44.69
18,9671,390,$10.90
19,1502,1140,$19.74
19,1502,115,$21.42
20,9483,290,$5.30
21,9762,330,$55.30
22,1234,440,$5.15
23,9483,330,$4.75
24,1367,100,$5.94
25,9483,100,$4.75
26,1367,50,$4.98
27,9483,241,$4.70
28,9671,490,$27.00
29,9763,320,$55.30
'''