####################################################################################
#                                                                                  #
# Copyright (c) 2008 Aaron Hardy <http://aaronhardy.com>                           #
#                                                                                  #
# This file is part of Picalo.                                                     #
#                                                                                  #
# Picalo is free software; you can redistribute it and/or modify                   #
# it under the terms of the GNU General Public License as published by             #
# the Free Software Foundation; either version 2 of the License, or                #
# (at your option) any later version.                                              #
#                                                                                  #
# Picalo is distributed in the hope that it will be useful,                        #
# but WITHOUT ANY WARRANTY; without even the implied warranty of                   #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the                    #
# GNU General Public License for more details.                                     #
#                                                                                  #
# You should have received a copy of the GNU General Public License                #
# along with Foobar; if not, write to the Free Software                            #
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA        #
#                                                                                  #
####################################################################################
#
# This detectlet detects multiple bids on the same day.
#
# 05 APR 2008  First version of the wizard
#
####################################################################################

DETECTLET_STANDARD = 1.0

from picalo import *

wizard = '''
<wizard>
  <page>
    Select the TABLE containing bid data:
    
    It should at least have columns for the bid date and the
    unique identifier of the project/contract to which each bid is related.
    <parameter type="Table" variable="bid_table"/>
  </page>
  <page>
    Select the COLUMN referring to the DATE each bid was submitted:
    <parameter type="Column" table="bid_table" variable="bidDate_col"/>
    
    Select the COLUMN referring to the UNIQUE IDENTIFIER of the 
    project/contract to which each bid is related:
    <parameter type="Column" table="bid_table" variable="projectId_col"/>
  </page>
</wizard>
'''

def run(
    bid_table,
    bidDate_col,
    projectId_col):
  '''Purchasers (who work for the organization) may want to direct work toward
     a specific bidder.  They may do this if they are getting kickbacks from a 
     certain vendor, have family or friends they to whom they want to funnel work, or 
     for many other reasons.  When this happens, the company suffers economic losses
     by over-paying for services or goods.  Furthermore, bidders that are sure 
     of winning are less inclined to assure quality.
     
     Most organizations will give a period of several months for vendors to bid
     on a project or contract.  If a relatively small number of vendors submit bids,
     it's unlikely there will be more than one bid on a single day.
     Fraudulent activity may not be completely uncovered by this detectlet but 
     red flags will be raised which can be used in conjunction with other research 
     to pinpoint problem areas.
  '''
  
  # Validate the data
  assert bidDate_col != projectId_col, \
      'The bid date and project ID columns \
      must be different.  Please ensure you haven\'t selected \
      the same column for these two items.'
  assert isinstance(bid_table, Table), 'Please select a valid Picalo table'
  assert bid_table.column(bidDate_col).get_type() in (Date, DateTime), 'The column to group by must be DateTime or Date type.'
  
  # Create a results table with a schema identical to that 
  # of the user-provided bid table
  results_table = bid_table[0:0]
  
  # Stratify bid data by project
  project_strats = Grouping.stratify_by_value(bid_table, projectId_col)
  for project_strat in project_strats:
    # Stratify each project's bid data by date.
    date_strats = Grouping.stratify_by_date(project_strat, bidDate_col, 1)
    for date_strat in date_strats:
      # If there is more than one bid on a single day for a given project,
      # add the day's bid records to the results table.
      if len(date_strat) > 1:
        results_table.extend(date_strat)
  
  # Return results
  return results_table, RETURN_TEXT

RETURN_TEXT = '''
The results table contains bids which were submitted the same day as
another bid for a given project.  These bids should be investigated
further and used in conjunction with other research data to identify
fraudulent vendors.
'''

def example_input():
  import StringIO  # to emulate a file for load_csv
  table = load_csv(StringIO.StringIO(CSV_DATA))
  table.set_type('bid_id', int)
  table.set_type('project_id', int)
  table.set_type('vendor_id', int)
  table.set_type('bid_date', Date)
  table.set_type('bid_total_amount', currency)
  table.set_type('winner', boolean)
  return table

CSV_DATA = '''\
bid_id,project_id,vendor_id,bid_date,bid_total_amount,winner
1,1,2,01/02/2008,$81710.70,False
2,1,3,01/06/2008,$73616.20,True
3,1,8,01/12/2008,$78459.60,False
4,2,4,01/08/2008,$80154.20,True
5,2,10,01/11/2008,$84468.50,False
6,2,13,01/14/2008,$84499.90,False
7,3,3,01/02/2008,$79047.60,False
8,3,9,01/09/2008,$88832.60,True
9,3,6,01/10/2008,$80644.60,False
10,4,13,01/02/2008,$74844.80,False
11,4,1,01/17/2008,$77039.70,False
12,4,14,01/17/2008,$74475.70,True
13,5,13,01/02/2008,$92006.50,False
14,5,14,01/18/2008,$91261.70,False
15,5,1,01/21/2008,$88636.90,True
18,6,13,01/11/2008,$74107.60,True
19,7,5,01/02/2008,$60250.70,True
20,7,15,01/17/2008,$64108.60,False
21,7,7,01/19/2008,$64087.90,False
22,8,2,01/07/2008,$57121.40,True
23,8,6,01/14/2008,$57733.00,False
24,8,3,01/22/2008,$57941.80,False
25,9,14,01/22/2008,$75812.80,True
26,9,10,01/23/2008,$77389.50,False
30,10,11,01/10/2008,$80934.10,True
31,11,8,01/11/2008,$64544.80,False
32,11,3,01/12/2008,$67933.30,False
33,11,6,02/03/2008,$63895.00,True
34,12,1,01/02/2008,$76622.90,False
35,12,4,02/01/2008,$77277.50,False
36,12,10,02/05/2008,$72907.80,True
37,13,9,01/21/2008,$105374.90,True
38,13,3,01/22/2008,$102772.80,False
39,13,8,01/27/2008,$104931.20,False
40,14,8,02/01/2008,$85366.00,False
41,14,6,02/03/2008,$85833.00,False
42,14,9,02/14/2008,$83470.30,True
43,15,7,02/03/2008,$111531.90,False
44,15,5,02/04/2008,$111454.70,True
47,16,9,01/12/2008,$90398.30,True
48,16,8,02/01/2008,$93963.10,False
49,17,7,02/12/2008,$69701.30,True
50,17,12,01/31/2008,$72258.40,False
51,17,15,02/14/2008,$77042.60,False
52,18,6,01/14/2008,$65980.70,False
53,18,2,01/18/2008,$62398.90,False
54,18,9,02/14/2008,$66761.10,True
55,19,1,02/05/2008,$53137.90,False
56,19,14,02/02/2008,$52581.80,False
57,19,4,02/20/2008,$51033.20,True
58,20,10,01/11/2008,$94882.40,True
59,20,14,01/27/2008,$96371.40,False
60,20,1,02/02/2008,$94926.10,False
61,21,14,02/07/2008,$82492.30,False
62,21,1,02/10/2008,$74023.40,True
66,22,15,02/18/2008,$94699.50,True
67,23,13,02/02/2008,$97517.00,False
68,23,4,02/03/2008,$97177.30,False
69,23,1,02/26/2008,$93742.70,True
70,24,11,01/27/2008,$98032.00,False
71,24,7,02/15/2008,$95292.90,True
72,24,15,02/18/2008,$95506.60,False
73,25,12,02/19/2008,$98206.10,False
74,25,7,02/27/2008,$99439.20,False
75,25,15,02/27/2008,$96456.80,True
76,26,5,02/21/2008,$79598.00,False
77,26,11,03/01/2008,$78413.60,False
78,26,15,03/02/2008,$76724.40,True
79,27,6,02/12/2008,$67505.60,True
80,27,3,03/01/2008,$68713.50,False
81,27,2,03/02/2008,$72646.50,False
82,28,11,02/21/2008,$72019.10,True
85,29,1,02/21/2008,$104470.70,True
88,30,5,03/01/2008,$69785.30,False
89,30,15,03/02/2008,$66948.10,True
90,30,11,03/08/2008,$68492.10,False
91,31,8,03/08/2008,$83345.30,False
92,31,3,03/15/2008,$83213.90,True
95,32,12,03/01/2008,$84316.80,True
96,32,15,03/04/2008,$90266.10,False
97,33,13,02/04/2008,$79932.40,False
98,33,14,03/13/2008,$73360.40,True
99,33,10,03/15/2008,$78470.10,False'''