####################################################################################
#                                                                                  
# Copyright (c) 2006 Jeffrey Richardson <jeff_richardsonATbyuDOTnet>               
#                                                                                  
# This deteclet is part of the Phantom Vendors detectlets library.                     
# It requires a license and is not open source.                                    
#                                                                                  
####################################################################################
# UPDATES:
#
# June 2006  First version of the detectlet
#
####################################################################################
#
#   STATUS: Test
#           
#     Traceback (most recent call last):
#     File "C:\Program Files\Picalo\picalo\tools\Detectlets\__init__.py", line 350, in validateCurrentPage
#     NameError: global name 'TableList' is not defined
# 
#
# IDEAS/QUESTIONS
#  - I decided not to require the vendor ID becuase it doesn't do anything for this detectlet.  If the user needs it than he/she can do a join.
#   I decided against it because this detectlet will use the payables table and it is possible that a company may be paid while not assigned
#   an ID number.  I want this deteclet to be able to run despite this.  I could add it and then do a try/catch around the id (for nulls)
#
#
####################################################################################

DETECTLET_STANDARD = 1.0

from picalo import *

wizard = '''
<wizard>
  <page>
    Please select the table that contains the vendors that have
    been paid.  This table must contain the address of the company
    (city, state, and zip codes are not required).  This table should
    come from the payables table or from the cash disbursements table.
    <parameter type="Table" variable="table"/>
  </page>
  <page>
    Plese select the column that identifies the NAME of the companies.
    It may be called "vendor_name", "vendor", "name", "company", or
    "company_name".
    <parameter type="Column" table="table" variable="vendor_name_col"/>
  
    Please select the column that contains the ADDRESS of each company.  
    It will probably be called "address", "street_address", or "address1"
    <parameter type="Column" table="table" variable="address_col"/>
  </page>
</wizard>
'''

RESULTS_TEXT = '''\
    The displayed table shows all of the vendors that the detectlet identified 
    as having P.O. Box addresses.  Some of the companies may not have P.O. Boxes 
    (this may happen with certain street names). A simple user review will
    quickly identify those vendors.
    
    Companies that have P.O. Box addresses should be verified for legitimacy.  This 
    can be done be verifying the arrival of goods from the company or the performance 
    of services.  Furthermore, calling the company is a standard auditing technique.
'''


def run(table, address_col, vendor_name_col):
  '''Most companies have a real street addresses and do not use post office boxes.
     Shell companies that are used to defraud other companies generally never have
     real street mail addresses.  Instead, they use mail drop addresses (or post
     office boxes).  Using a P.O. Box allows the fraud perpetrator to distance 
     himself from the fraud.
  
    Legitimate companies may use P.O. Boxes for various reasons, so the use of a 
    P.O. Box is not all incriminating.  However, companies that use mail drop
    addresses should be verified for legitimacy .
  
    This detectlet searches for all vendors that have a post office box as an 
    address.  
    
    The detectlet goes through the following process:
     - Stratifies the table of vendors so as to eliminate duplicates
     - Searches the addresses of every vendor for key terms that indicate a P.O. Box.
       
    Vendors that are returned from this detectlet may be phantom vendors.       
  '''
   
  # run the analysis
  results = Table([
    ( 'name', unicode ),
    ( 'address', unicode ),
  ])
  terms = [
    u'p.o. box',
    u'p. o. box',
    u'po box',
    u'p o box',
    u'post office box',
    u'p office box',
    u'post o box',
    u'p. office box',
    u'post office. box'
    u'box#',
    u'box #',
    u'post office',
    u'box num',
    u'pob',
    u'p.o.b.',
    u'p. o. b.',
    ]
    
  vendors = Grouping.stratify_by_value(table,vendor_name_col)
  for vendor in vendors:
    for term in terms:
        if term in unicode(vendor[0][address_col].lower()):
            rec = results.append()
            rec["name"] = vendor[0][vendor_name_col]
            rec["address"] = vendor[0][address_col]
            break
  
  return results, RESULTS_TEXT

  
def example_input():
  import StringIO  # to emulate a file for load_csv
  table = load_csv(StringIO.StringIO(csvdata))
  table.set_type('vendor_id', int)
  return table

csvdata = '''\
vendor_id,vendor_name,address,city,state,zip,phone_number
10,Janson,912 Adams,Baltimore,MD,21206,555-555-2277
11,FlowFree,870 Mill,Norfolk/Virginia Beach,VA,23459,555-555-2626
2,Contractors Edge,Post Office Box Number 32,Greensboro,NC,27401,555-555-2578
14,Gilmore Tools,552 12th,Providence,RI,02901,555-555-2838
7,WWW,312 Walnut,Tampa,FL,33687,555-555-1210
6,Master Vent,667 Locust,Des Moines,IA,50265,555-555-2644
13,Zinc,735 Spring,Wichita,KS,67212,555-555-2518
5,DM,816 Central,Little Rock,AR,72117,555-555-2705
9,Talon,P.O. Box #3205,Oklahoma City,OK,73199,555-555-1626
4,Walco,510 6th,Ft. Worth,TX,75016,555-555-2340
12,Lava,61 Green,Dallas,TX,75016,555-555-2502
8,Barley,846 Hillcrest,Houston,TX,77001,555-555-1777
1,Jekel,182 East,Trenton,NJ,08620,555-555-2229
15,HNB Electric,P. Office Box 123,San Diego,CA,92155,555-555-2424
3,Neat things,378 Highland,Sacramento,CA,95829,555-555-2772
'''

