diff --git a/csv/supplier_data_two_commas_added_in_excel.csv b/csv/supplier_data_two_commas_added_in_excel.csv new file mode 100644 index 0000000..07a3318 --- /dev/null +++ b/csv/supplier_data_two_commas_added_in_excel.csv @@ -0,0 +1,13 @@ +Supplier Name,Invoice Number,Part Number,Cost,Purchase Date +Supplier X,001-1001,2341,$500.00 ,1/20/14 +Supplier X,001-1001,2341,$500.00 ,1/20/14 +Supplier X,001-1001,5467,$750.00 ,1/20/14 +Supplier X,001-1001,5467,$750.00 ,1/20/14 +Supplier Y,50-9501,7009,$250.00 ,1/30/14 +Supplier Y,50-9501,7009,$250.00 ,1/30/14 +Supplier Y,50-9505,6650,$125.00 ,2002-03-14 +Supplier Y,50-9505,6650,$125.00 ,2002-03-14 +Supplier Z,920-4803,3321,$615.00 ,2002-03-14 +Supplier Z,920-4804,3321,$615.00 ,2002-10-14 +Supplier Z,920-4805,3321,"$6,015.00 ",2/17/14 +Supplier Z,920-4806,3321,"$1,006,015.00 ",2/24/14 diff --git a/excel/pandas_sum_average_multiple_workbooks.py b/excel/pandas_sum_average_multiple_workbooks.py index 4ba13f5..7d2a68e 100755 --- a/excel/pandas_sum_average_multiple_workbooks.py +++ b/excel/pandas_sum_average_multiple_workbooks.py @@ -17,9 +17,9 @@ worksheets_data_frame = None workbook_data_frame = None for worksheet_name, data in all_worksheets.items(): - total_sales = pd.DataFrame([float(str(value).strip('$').replace(',','')) for value in data.ix[:, 'Sale Amount']]).sum() + total_sales = pd.DataFrame([float(str(value).strip('$').replace(',','')) for value in data.ix[:, 'Sale Amount']]).sum().iloc[0] number_of_sales = len(data.loc[:, 'Sale Amount']) - average_sales = pd.DataFrame(total_sales / number_of_sales) + average_sales = total_sales / number_of_sales workbook_total_sales.append(total_sales) workbook_number_of_sales.append(number_of_sales) @@ -29,18 +29,18 @@ 'worksheet_total': total_sales, 'worksheet_average': average_sales} - worksheet_data_frames.append(pd.DataFrame(data, columns=['workbook', 'worksheet', 'worksheet_total', 'worksheet_average'])) + worksheet_data_frames.append(pd.DataFrame(data, columns=['workbook', 'worksheet', 'worksheet_total', 'worksheet_average'], index=[0])) worksheets_data_frame = pd.concat(worksheet_data_frames, axis=0, ignore_index=True) - workbook_total = pd.DataFrame(workbook_total_sales).sum() + workbook_total = pd.DataFrame(workbook_total_sales).sum().iloc[0] workbook_total_number_of_sales = pd.DataFrame(workbook_number_of_sales).sum() - workbook_average = pd.DataFrame(workbook_total / workbook_total_number_of_sales) + workbook_average = workbook_total / workbook_total_number_of_sales workbook_stats = {'workbook': os.path.basename(workbook), 'workbook_total': workbook_total, 'workbook_average': workbook_average} - workbook_stats = pd.DataFrame(workbook_stats, columns=['workbook', 'workbook_total', 'workbook_average']) + workbook_stats = pd.DataFrame(workbook_stats, columns=['workbook', 'workbook_total', 'workbook_average'], index=[0]) workbook_data_frame = pd.merge(worksheets_data_frame, workbook_stats, on='workbook', how='left') data_frames.append(workbook_data_frame) @@ -48,4 +48,4 @@ writer = pd.ExcelWriter(output_file) all_data_concatenated.to_excel(writer, sheet_name='sums_and_averages', index=False) -writer.save() \ No newline at end of file +writer.save() diff --git a/plots/ggplot_plots.py b/plots/ggplot_plots.py index 97ab926..d63a426 100755 --- a/plots/ggplot_plots.py +++ b/plots/ggplot_plots.py @@ -29,4 +29,4 @@ theme_gray() print(plt3) -ggsave(plt3, "ggplot_plots.png") \ No newline at end of file +plt3.save("ggplot_plots.png")