####################################################################################
#                                                                                  
# Copyright (c) 2006 Jeffrey Richardson <jeff_richardsonATbyuDOTnet>               
#                                                                                  
# This deteclet is part of the Phantom Vendors detectlets library.                     
# It requires a license and is not open source.                                    
#                                                                                  
####################################################################################
# UPDATES:
# 20 Jul 2006   Began work on sample data
# 31 MAY 2006  First version of the detectlet
#
####################################################################################
#
#   STATUS: Create sample data
#           
# 
#
# IDEAS/QUESTIONS
#   Should this be based on some sort of fuzzy match.  (If so, we could display the matching score)
#       -I'd say no becuase you want to see all companies that are close but not quite the same
# 
#
####################################################################################

DETECTLET_STANDARD = 1.0

from picalo import *

wizard = '''
<wizard>
  <page>
    Please select which table contains the names of the 
    QUALIFIED (APPROVED) VENDORS. It should look similar 
    to the example input data (see the previous page). 
    
    If you need to join two or more tables together to get
    your data into this form, close the wizard and do so.
    <parameter type="Table" variable="qual_vendor_table"/>
  </page>
  <page>
    Your qualified vendor table should contain the names
    of the approved vendors.  Plese select the column that
    contains the approved VENDOR NAMEs.  It may be called
    "vendor_name", "vendor", "name", "qual_vendor", or
    "qual_vendor_name".
    <parameter type="Column" table="qual_vendor_table" variable="qual_vendor_name_col"/>
  </page>
  <page>
    Please select the table that contains all of the  payment
    information.  It must contain information about each payment
    (e.g. Payment ID, Vendor Name, and Amount).  
    
    Please select the PAYMENT TABLE. This table could also be called
     a cash disbursements table or a payables table.  If you need to join 
    two or more tables together to get your data into this 
    form, close the wizard and do so. 
    <parameter type="Table" variable="table"/>
  </page>
  <page>
    Each payment record will be identified by an UNIQUE ID number.  This
    number is sometimes called a Key.  Which column identifies the
    payment ID?  This column might be named "pmt_id", "ID", "pmt_key",
    or "payment_unique_id".
    <parameter type="Column" table="table" variable="pmt_id_col"/>

    Which column contains the name of the VENDOR to whom the payment was
    made?  This columns needs to contain the name and not the id.
    It might be called "vendor", "vendor_name", "company_name",
    "payee", or "payee_name".
    <parameter type="Column" table="table" variable="table_vendor_name_col"/>
  </page>
  <page>
    Which column contains the AMOUNT paid? This columns needs to contain 
    total amount paid. It might be called "amount", "total", "total_amount",
    "amount_paid", or "total_paid".
    <parameter type="Column" table="table" variable="amount_col"/>
  </page>
</wizard>
'''

RESULTS_TEXT = '''\
    The displayed table shows all of the vendors who where paid but are not
    on the approved vendor list and how much each vendor was paid.
    
    Payments could legitimately be made to companies that are not on the approved
    vendor list.  The auditor will have to verify all approved circumstances.
    
    If a non-approved vendor is regularly being paid, further investigation 
    should be pursued.  A Benford's Analysis may help to find fabricated numbers.
    Products from this vendor may not be arriving or the services may not be being 
    performed.
    
    If a specific employee is approving these payments, he or she may be directing
    work towards phantom vendors.
'''


def run(table, table_vendor_name_col, pmt_id_col, amount_col, qual_vendor_table, qual_vendor_name_col):
  '''In order to ensure that work is only done by real vendors, companies
    will often create lists of approved vendors.  Approved vendors are vendors that have been
    verified and trusted.  This control helps to prevent phantom vendors from
    defrauding the company.
    
    A phatom vendor is a fake/shell company that is used to defraud companies.
    These shell companies will invoice other companies despite not having provided
    any goods or services.  Because companies have many vendors with whom they do
    business, the phantom vendor's invoices get lost in the stack and paid (while 
    never verified).  
  
    This detectlet searches for vendors that have been paid while not being
    on the approved vendor list.
    
    The detectlet goes through the following process:
     - Stratifies the data by vendor_name
     - Verifies the vendor agains the qualified vendor list
     - Summarizes the payment information for non-approved vendors
       
    Vendors that are returned from this detectlet may be phantom vendors.       
  '''
   
  # run the analysis
  results = Table([
    ( 'vendor',        unicode ),
    ( "count_of_pmts", int     ),
    ( 'total_amount',  number   ),
    ( "avg_amount",    number   ),
  ])
  vendors_pmts = Grouping.stratify_by_value(table, table_vendor_name_col)
  for vendor_pmts in vendors_pmts:
    if vendor_pmts[0][table_vendor_name_col] not in qual_vendor_name_col:
      rec = results.append()
      rec['vendor'] = vendor_pmts[0][table_vendor_name_col]
      rec["count_of_pmts"] = len(vendor_pmts)
      rec['total_amount'] = sum(vendor_pmts[0][amount_col])
      rec["avg_amount"] = mean(vendor_pmts[0][amount_col])
      
  return results, RESULTS_TEXT


def example_input():
  import StringIO  # to emulate a file for load_csv
  table_PaidVendors = load_csv(StringIO.StringIO(csvdata_PaidVendors))
  table.PaidVendors.set_type('bid_total_amount', number ),
  table_ApprovedVendors = load_csv(StringIO.StringIO(csvdata_ApprovedVendors))
  tableList = TableList( table_PaidVendors, table_ApprovedVendors )
  return tableList

# TODO: Use vendor table data (10 Approved vendors and 12 Paid Vendors)
csvdata_PaidVendors = '''\
payment_id, vendor_name, amount
1,Daniel Pet Grooming,
2,Joshua Demolition Company
3,Mana Foods
4,Annais & Sephora Services,5000.00
5,Isaiah Encryption and Security Solutions
6,Nehemiah Restoration and Construction Services
7,Annais & Sephora Services,5000.00
8,Samson Hair Salon
9,Zarahemla United
10,ABC Company of America
11,Samson Hair Salon
12,Caleb Risk Assesment
13,Nehemiah Restoration and Construction Services
14,Annais & Sephora Services,5000.00
15,Zarahemla United
16,Acme Unlimited
17,Isaiah Encryption and Security Solutions
18,Caleb Risk Assesment
19,Daniel Pet Grooming
20,ABC Company of America
21,Annais & Sephora Services,5000.00
22,Mana Foods
23,Zarahemla United
24,Samson Hair Salon
25,ABC Company of America
'''
csvdata_ApprovedVendors = '''\
vendor_name
ABC Company of America
Acme Unlimited
Zarahemla United
Daniel Pet Grooming
Samson Hair Salon
Joshua Demolition Company
Caleb Risk Assesment
Aaron's School of Public Speaking
Mana Foods
Isaiah Encryption and Security Solutions
Nehemiah Restoration and Construction Services
'''
