-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcheck.py
218 lines (181 loc) · 8.69 KB
/
check.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
#!/usr/bin/env python3
from bescheinigungs_check import BescheinigungsCheck
import openpyxl
import datetime
import argparse
import logging
from openpyxl.utils.exceptions import InvalidFileException
# Check of EfZ-Nachweis via DPSG NaMi 2.2 web interface
# data from a Microsoft Excel spreadsheet is processed and updated
def get_cols_from_cli_args(cli_args):
# dependent on the specific file structure the data is spread across different columns;
# we need to define the column number mapping here;
# we need first name, surname, EfZ number and birthday to start the query
# status and update timestamp are used to store the response in the same document
# TODO: somehow apply auto-detection from the given input file (nice to have)
col_firstname = cli_args.columns[0]
col_surname = cli_args.columns[1]
col_birthdate = cli_args.columns[2]
col_fznumber = cli_args.columns[3]
col_status = None
if len(cli_args.columns) > 4:
col_status = cli_args.columns[4]
col_update_timestamp = None
if len(cli_args.columns) > 5:
col_update_timestamp = cli_args.columns[5]
return col_firstname, col_surname, col_fznumber, col_birthdate, col_status, col_update_timestamp
def get_query_input_from_sheet_row(cli_args, sheet, row_idx):
# get column numbers for required input data; ignore columns for optional output data (status and timestamp)
col_firstname, col_surname, col_fznumber, col_birthdate, _, _ = get_cols_from_cli_args(cli_args)
birthdate = sheet.cell(row=row_idx, column=col_birthdate).value
if birthdate:
if type(birthdate) == datetime.datetime:
birthdate = birthdate.strftime("%d.%m.%Y")
else:
birthdate = ""
fznumber = sheet.cell(row=row_idx, column=col_fznumber).value
if not fznumber:
fznumber = ""
firstname = sheet.cell(row=row_idx, column=col_firstname).value
if not firstname:
firstname = ""
surname = sheet.cell(row=row_idx, column=col_surname).value
if not surname:
surname = ""
request_data = {
'fzNummer': fznumber,
'vorname': firstname,
'nachname': surname,
'geburtsdatum': birthdate
}
return request_data
def process_excel_file(cli_args):
filename = cli_args.filename
# TODO: we could also make this CSV input format compatible
wb = openpyxl.load_workbook(filename)
sheet = wb.active
log.debug(f"Processing file '{filename}'")
start_row = cli_args.start_row # skip the header row as it usually contains the headings
log.debug(f"Start row: {start_row}")
if start_row < 1:
exit(f"Start row number may not be < 1, given: {start_row}.")
log.info(f"Number of columns in workbook sheet: {sheet.max_column}")
log.debug(f"Number of rows in workbook sheet to be processed: {sheet.max_row}")
if any(col_idx > sheet.max_column for col_idx in cli_args.columns):
log.error("Given column number exceeds number of available columns.")
exit()
col_firstname, col_surname, col_fznumber, col_birthdate, col_status, col_update_timestamp = \
get_cols_from_cli_args(cli_args)
log.debug(f"First name is taken from column #{col_firstname}, surname from #{col_surname}, "
f"EfZ ID from #{col_fznumber} and date of birth from #{col_birthdate}.")
could_or_will = "will"
if cli_args.dry_run:
could_or_will = "could"
if col_status:
log.info(f"Status {could_or_will} be stored in column #{col_status}.")
else:
log.info("Won't store status (no valid column number given).")
if col_update_timestamp:
log.info(f"Timestamp {could_or_will} be stored in column #{col_update_timestamp}.")
else:
log.info("Won't store timestamp (no valid column number given).")
store_workbook = not cli_args.dry_run and ((col_status is not None) or (col_update_timestamp is not None))
log.debug(f"Need to store workbook after processing: {store_workbook}")
# print header for result visualization
if not cli_args.dont_print:
output_format = f"{' '*22}{'ID':<4} {'Vorname':>20} {'Nachname':>20} Status"
print(output_format)
print('-'*100)
for row_idx in range(start_row, sheet.max_row + 1):
req_data = get_query_input_from_sheet_row(cli_args, sheet, row_idx=row_idx)
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
efz_status = 'Prüfung fehlerhaft'
try:
check = BescheinigungsCheck(req_data['vorname'], req_data['nachname'], req_data['geburtsdatum'],
req_data['fzNummer'])
status = check.get_check_status()
if status:
efz_status = 'Gültig'
else:
efz_status = 'Ungültig'
except ValueError:
pass
# visualize result (along with some input data) on the command line
if not cli_args.dont_print:
output_format = f"[{timestamp}] #{row_idx-start_row+1:<4} {req_data.get('vorname'):>20} " \
f"{req_data.get('nachname'):>20} {efz_status}"
print(output_format)
# write result back to the workbook
if store_workbook and col_status:
sheet.cell(row=row_idx, column=col_status).value = efz_status
if store_workbook and col_update_timestamp:
sheet.cell(row=row_idx, column=col_update_timestamp).value = timestamp
# feed back to Excel
if store_workbook:
wb.save(filename)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='%(prog)s checks EfZ-Nachweise in DPSG NaMi 2.2 using Excel and '
'HTTP requests')
parser.add_argument(dest='filename',
help='path to Excel input/output file',
default="logic.json")
parser.add_argument('-c', '--columns',
type=int,
nargs='+',
help='column numbers for the Excel workbook in the order '
'Vorname (prename), '
'Nachname (surname), '
'Geburtsdatum (date of birth), '
'EfZ ID (EfZ-Nummer), '
'Status and Timestamp '
'where the first column has number 1 (not 0)',
default=[1, 2, 3, 4, 5, 6]) # default falls back to listed order from the help text above
parser.add_argument('-n', '--dry-run',
action='store_true',
help='Send the HTTP requests to the server but '
'don''t write back to the Excel file.',
default=False)
parser.add_argument('-dp', '--dont-print',
action='store_true',
help='Don''t print the data on the standard output.',
default=False)
parser.add_argument('-sr', '--start-row',
type=int,
help='Start row (to skip parsing of header lines); the first row has number 1 (not 0).',
default=2)
parser.add_argument('-v', '--verbose',
action='count',
help='log level (-v: INFO, -vv: DEBUG)',
default=0)
args = parser.parse_args()
# Create and configure logger object
log = logging.getLogger(__name__)
if args.verbose > 0:
log.setLevel(logging.INFO)
if args.verbose > 1:
log.setLevel(logging.DEBUG)
else:
log.setLevel(logging.WARNING)
# pre-check validity of given column numbers
num_cols = len(args.columns)
if num_cols < 4 or num_cols > 6:
log.error(f"Number of columns must be 4..6, but {num_cols} given: {args.columns}.")
exit()
if not all(col_idx > 0 for col_idx in args.columns):
log.error(f"Only column numbers greater or equal 1 allowed, but given: {args.columns}.")
exit()
if len(set(args.columns)) < num_cols:
# check for duplicates (convert to set and check length against length of list)
log.error("Cannot use the same column number for multiple input fields.")
exit()
if num_cols == 4:
log.info("Only 4 column numbers given. Won't write back status and timestamp to Excel file (read-only).")
try:
process_excel_file(args)
except FileNotFoundError:
log.error(f"Could not find file '{args.filename}'.")
exit()
except InvalidFileException:
log.error(f"File format of file '{args.filename}' is currently not supported "
"(supported formats are .xlsx, .xlsm, .xltx and .xltm).")
exit()