-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtransaction_parse.py
executable file
·142 lines (112 loc) · 7.07 KB
/
transaction_parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import pandas as pd
import os
import glob
import time
#!/usr/bin/env python3
''' Export transactions with detail from Givegbutter. Include all fields. Parser will drop any uncessary fields.
Create a mapper file for any tickets that were paid for someone other than the rider as csv with the following columns:
1. Name Team member (must match)
2. email address from Team member (Will replace the incorrect email address)
'''
# Load the CSV file into a DataFrame
file_path = 'transactions.csv'
df = pd.read_csv(file_path)
# Change any column data that matches '2024 Ride for Missing Children - MV New and Returning Riders' to '2024 Ride for Missing Children - MV New / Returning Riders'
# This was due to testing tickets before going live. Don't do this next year. Also use shorter ticket names.
df['Description'] = df['Description'].replace('2024 Ride for Missing Children - MV New and Returning Riders', '2024 Ride for Missing Children - MV New / Returning Riders')
# Filter out the rows where Subtype is 'ticket'
tickets_df = df[df['Subtype'] == 'ticket']
# Columns to be dropped
columns_to_drop = ["Campaign", "Campaign slug", "Team", "Reference #"]
# Drop the specified columns
tickets_df = tickets_df.drop(columns=columns_to_drop, errors='ignore')
# Load the name_mapping CSV file
df_mapping = pd.read_csv('name_mapping.csv')
# Split the 'Team member' column into first and last names
tickets_df[['First name from Team member', 'Last name']] = tickets_df['Team member'].str.split(' ', n=1, expand=True)
# Convert to lowercase
tickets_df['First name from Team member'] = tickets_df['First name from Team member'].str.lower()
tickets_df['First name'] = tickets_df['First name'].str.lower()
# Find mismatches between 'First name from Team member' and 'First name'
mismatches = tickets_df['First name from Team member'] != tickets_df['First name']
# Strip leading/trailing spaces from column names
df_mapping.columns = df_mapping.columns.str.strip()
# For each mismatch, find the correct email in the mapping DataFrame
for i in tickets_df[mismatches].index:
team_member = tickets_df.loc[i, 'Team member']
mapping_entry = df_mapping[df_mapping['Team member'] == team_member].reset_index(drop=True)
# If a match is found in the mapping DataFrame, update the 'Email' column in tickets_df
if not mapping_entry.empty:
correct_email = mapping_entry.loc[0, 'Email']
tickets_df.loc[i, 'Email'] = correct_email
# Columns to be dropped
columns_to_drop = ["First name", "Last name", "Country", "Status", "Fund ID", "Fund code", "Fund name", "Dedication type", "Dedication name", "Company", "Dedication recipient name", "Dedication recipient email",
"Method", "Credit card last four", "Credit card expiration date", "Discount code", "Method subtype", "Amount", "Fee", "Fee covered", "Donated", "Payout", "Currency",
"Recurring plan ID", "Frequency", "Check number", "Check deposited (UTC)", "Payment captured (UTC)", "Refund date (UTC)", "Dispute status", "Acknowledged", "Anonymous hide name",
"Anonymous hide amount", "Public name", "Public message", "Donor local timezone", "UTM source", "UTM medium", "UTM campaign", "UTM term", "UTM content", "Referrer", "Widget ID", "Match name", "Match amount", "External ID",
"Household ID", "Household name", "Subtype", "Quantity", "Price", "Discount", "Total", "First name from Team member" ]
# Drop the specified columns
tickets_df = tickets_df.drop(columns=columns_to_drop, errors='ignore')
# Mapping of original descriptions to new sheet names provided by the user
description_to_sheet_map = {
'2024 Ride for Missing Children - MV New / Returning Riders': 'NewAndReturning',
'2024 Ride for Missing Children - MV Reciprocal Riders': 'Reciprocal',
'2024 Ride for Missing Children - MV Volunteer': 'Volunteer'
}
# Function to auto-adjust columns' width
def auto_adjust_columns_width(df, writer, sheet_name):
for column in df:
# Find the length of the longest entry in the column
column_length = max(df[column].astype(str).map(len).max(), len(column))
# Find the column index (0-indexed)
col_idx = df.columns.get_loc(column)
# Adjust the column width at the column index
writer.sheets[sheet_name].set_column(col_idx, col_idx, column_length)
# Assuming the directory structure and file naming convention
output_dir = 'Rider_Volunteer_CSVs'
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# Function to save DataFrame to CSV and compare with the previous version
def save_and_compare_df(df, description):
# Filename convention: Description_CurrentTime.csv
filename = f"{description.replace(' ', '_')}_{int(time.time())}.csv"
filepath = os.path.join(output_dir, filename)
# Save current DataFrame to CSV
df.to_csv(filepath, index=False)
# Find previous file for the same description
previous_files = glob.glob(os.path.join(output_dir, f"{description.replace(' ', '_')}*.csv"))
previous_files = [f for f in previous_files if f != filepath] # Exclude current file
if previous_files:
# Assuming there's only one previous file for simplicity
previous_file = max(previous_files, key=os.path.getctime) # Get the most recent file
previous_df = pd.read_csv(previous_file)
# Compare DataFrames to find new rows in the current DataFrame
# This simplistic comparison assumes you're only looking for new rows added
comparison_df = pd.concat([df, previous_df, previous_df]).drop_duplicates(keep=False)
if not comparison_df.empty:
changes_filename = f"changes_{filename}"
changes_filepath = os.path.join(output_dir, changes_filename)
comparison_df.to_csv(changes_filepath, index=False)
print(f"Changes saved to {changes_filepath}")
else:
print("No changes detected.")
else:
print(f"No previous files found for {description}. Current file saved as {filename}.")
for original_desc, new_sheet_name in description_to_sheet_map.items():
# Filter the DataFrame based on the Description
subset_df = tickets_df[tickets_df['Description'] == original_desc]
# Save and compare the subset DataFrame
save_and_compare_df(subset_df, new_sheet_name)
# Get the current epoch time
current_time = int(time.time())
# Create a new Excel writer object with the current epoch time in the filename
mapped_excel_path = f'Rider_Volunteer_MasterList_{current_time}.xlsx'
with pd.ExcelWriter(mapped_excel_path, engine='xlsxwriter') as mapped_writer:
# Write each subset of data to a separate sheet based on the new mapping
for original_desc, new_sheet_name in description_to_sheet_map.items():
# Find the subset of dataframe for each original description
subset_df = tickets_df[tickets_df['Description'] == original_desc]
# Write to the corresponding new sheet name
subset_df.to_excel(mapped_writer, sheet_name=new_sheet_name, index=False)
# Auto-adjust columns' width
auto_adjust_columns_width(subset_df, mapped_writer, new_sheet_name)