Skip to content

Commit

Permalink
WIP: more lintingg work
Browse files Browse the repository at this point in the history
  • Loading branch information
ross-spencer committed Jun 16, 2024
1 parent 6b78912 commit 5ac4172
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 77 deletions.
4 changes: 2 additions & 2 deletions src/anz_rosetta_csv/anz_rosetta_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def main():

if len(sys.argv) == 1:
parser.print_help()
sys.exit(0)
sys.exit()

args = parser.parse_args()
if args.args:
Expand All @@ -89,7 +89,7 @@ def main():
configfile=args.cfg,
provenance=args.pro,
)
res = csvgen.export2rosettacsv()
res = csvgen.export_to_rosetta_csv()
print(res)
sys.exit()

Expand Down
142 changes: 71 additions & 71 deletions src/anz_rosetta_csv/rosetta_csv_generator.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,34 @@
"""Archives New Zealand Rosetta CSV Generator."""

# pylint: disable=R1710,R0902,R0913,R0912

import configparser as ConfigParser
import logging
import sys

try:
import json_table_schema
from droid_csv_handler_class import *
from droid_csv_handler_class import DroidCSVHandler, GenericCSVHandler
from import_sheet_generator import ImportSheetGenerator
from provenance_csv_handler_class import ProvenanceCSVHandler
from rosetta_csv_sections_class import RosettaCSVSections
except ModuleNotFoundError:
try:
from src.anz_rosetta_csv.droid_csv_handler_class import *
from src.anz_rosetta_csv.droid_csv_handler_class import (
DroidCSVHandler,
GenericCSVHandler,
)
from src.anz_rosetta_csv.import_sheet_generator import ImportSheetGenerator
from src.anz_rosetta_csv.json_table_schema import json_table_schema
from src.anz_rosetta_csv.provenance_csv_handler_class import (
ProvenanceCSVHandler,
)
from src.anz_rosetta_csv.rosetta_csv_sections_class import RosettaCSVSections
except ModuleNotFoundError:
from anz_rosetta_csv.droid_csv_handler_class import *
from anz_rosetta_csv.droid_csv_handler_class import (
DroidCSVHandler,
GenericCSVHandler,
)
from anz_rosetta_csv.import_sheet_generator import ImportSheetGenerator
from anz_rosetta_csv.json_table_schema import json_table_schema
from anz_rosetta_csv.provenance_csv_handler_class import ProvenanceCSVHandler
Expand Down Expand Up @@ -74,6 +81,13 @@ def __init__(
logger.error("a configuration file hasn't been provided")
sys.exit(1)

self.subseriesmask = None
self.rnumber = None
self.droidlist = None
self.exportlist = None
self.provlist = None
self.duplicates = None

logging.info("reading app config from '%s'", configfile)
self.config = ConfigParser.RawConfigParser()
self.config.read(configfile)
Expand All @@ -83,7 +97,7 @@ def __init__(

# NOTE: A bit of a hack, compare with import schema work and refactor
self.rosettaschema = rosettaschema
self.readRosettaSchema()
self.read_rosetta_schema()

# Grab Rosetta Sections
rs = RosettaCSVSections(configfile)
Expand Down Expand Up @@ -118,7 +132,7 @@ def add_csv_value(self, value):
field = f'"{value}"'
return field

def readRosettaSchema(self):
def read_rosetta_schema(self):
"""Read the Rosetta Schema File."""
importschemajson = None
with open(self.rosettaschema, "r", encoding="utf-8") as rosetta_schema:
Expand All @@ -128,18 +142,18 @@ def readRosettaSchema(self):
importschemadict = importschema.as_dict()
importschemaheader = importschema.as_csv_header()

self.rosettacsvheader = (
importschemaheader + "\n"
) # TODO: Add newline in JSON Handler class?
self.rosettacsvheader = importschemaheader + "\n"
self.rosettacsvdict = importschemadict["fields"]

def createcolumns(self, columno):
def createcolumns(self, column_number):
"""Create a number of empty columns in Rosetta CSV."""
columns = ""
for column in range(columno):
columns = columns + '"",'
for _ in range(column_number):
columns = f'{columns}"",'
return columns

def normalize_spaces(self, filename):
"""Normalize spacces in a filename."""
if filename.find(" ") != -1:
filename = filename.replace(" ", " ")
return self.normalize_spaces(filename)
Expand All @@ -157,10 +171,7 @@ def compare_filenames_as_titles(self, droidrow, listcontroltitle):
def get_droid_value(
self, checksum, lc_title, lc_sub_series, rosetta_field, droid_field, path_mask
):
"""
# TODO: Potentially index droidlist by MD5 or SHA-256 in future...
# NOTE: itemtitle is title from Archway List Control...
"""
"""Retrieve a row from a DROID sheet."""
returnfield = ""
for drow in self.droidlist:
addtorow = False
Expand Down Expand Up @@ -209,34 +220,28 @@ def get_droid_value(
return returnfield

def csvstringoutput(self, csvlist):
# String output...
csvrows = self.rosettacsvheader

# TODO: Understand how to get this in rosettacsvsectionclass
# NOTE: Possibly put all basic RosettaCSV stuff in rosettacsvsectionclass?
# Static ROW in CSV Ingest Sheet
SIPROW = ['"",'] * len(self.rosettacsvdict)
SIPROW[0] = '"SIP",'
"""Output CSV as a string."""

# SIP Title...
csvrows = self.rosettacsvheader
sip_row = ['"",'] * len(self.rosettacsvdict)
sip_row[0] = '"SIP",'
sip_title = '"CSV Load",'
if self.config.has_option("rosetta mapping", "SIP Title"):
SIPROW[1] = '"' + self.config.get("rosetta mapping", "SIP Title") + '",'
else:
SIPROW[1] = '"CSV Load",'

csvrows = csvrows + "".join(SIPROW).rstrip(",") + "\n"

sip_title = f'"{self.config.get("rosetta mapping", "SIP Title")}",'
sip_row[1] = sip_title
sip_row = "".join(sip_row).rstrip(",")
csvrows = f"{csvrows}{sip_row}\n"
for sectionrows in csvlist:
rowdata = ""
for sectionrow in sectionrows:
for fielddata in sectionrow:
rowdata = rowdata + fielddata + ","
rowdata = rowdata.rstrip(",") + "\n"
rowdata = f"{rowdata}{fielddata},"
rowdata = f'{rowdata.rstrip(",")}\n'
csvrows = csvrows + rowdata

# this is the best i can think of because ExLibris have named two fields with the same
# title in CSV which doesn't help us when we're trying to use unique names for populating rows
# replaces SIP Title with Title (DC)
# this is the best i can think of because ExLibris have named two fields
# with the same title in CSV which doesn't help us when we're trying to
# use unique names for populating rows replaces SIP Title with Title (DC)
csvrows = csvrows.replace(
'"Object Type","SIP Title"', '"Object Type","Title (DC)"'
)
Expand All @@ -247,24 +252,21 @@ def csvstringoutput(self, csvlist):
return csvrows

def handleprovenanceexceptions(
self, PROVENANCE_FIELD, sectionrow, field, csvindex, rnumber
self, provenance_field, sectionrow, field, csvindex, rnumber
):
"""Read a provenance CSV file for exceptions on specific rows."""
ignorefield = False
if self.prov is True:
for p in self.provlist:
if p["RECORDNUMBER"] == rnumber:
# These values overwrite the defaults from DROID list...
# Double-check comparison to ensure we're inputting the right values...
# TODO: field == 'MD5' get from config...
if (PROVENANCE_FIELD == "CHECKSUM" and field == self.provhash) or (
PROVENANCE_FIELD == "ORIGINALNAME"
and field == "File Original Name"
):
if p[PROVENANCE_FIELD].lower().strip() != "ignore":
ignorefield = True
sectionrow[csvindex] = self.add_csv_value(
p[PROVENANCE_FIELD]
)
if not self.prov:
return False
for p_row in self.provlist:
if p_row["RECORDNUMBER"] != rnumber:
continue
if (provenance_field == "CHECKSUM" and field == self.provhash) or (
provenance_field == "ORIGINALNAME" and field == "File Original Name"
):
if p_row[provenance_field].lower().strip() != "ignore":
ignorefield = True
sectionrow[csvindex] = self.add_csv_value(p_row[provenance_field])
return ignorefield

def __setpathmask__(self):
Expand Down Expand Up @@ -306,12 +308,12 @@ def populaterows(self, field, listcontrolitem, sectionrow, csvindex, rnumber):
)

# if ignorefield is still false, check our checksum field as well...
if ignorefield is False:
if not ignorefield:
ignorefield = self.handleprovenanceexceptions(
"CHECKSUM", sectionrow, field, csvindex, rnumber
)

if ignorefield is False:
if not ignorefield:
sectionrow[csvindex] = self.add_csv_value(
self.get_droid_value(
checksum=listcontrolitem["Missing Comment"],
Expand Down Expand Up @@ -355,8 +357,8 @@ def create_rosetta_csv(self):
else:
logger.info("subseries mask is not set in config")

CSVINDEXSTARTPOS = 2
csvindex = CSVINDEXSTARTPOS
csv_index_start = 2
csvindex = csv_index_start

self.rnumber = 0
fields = []
Expand Down Expand Up @@ -389,21 +391,19 @@ def create_rosetta_csv(self):
field, item, sectionrow, csvindex, self.rnumber
)
else:
# we have a misalignment between cfg and json...
# TODO: Output a more useful error message?
sys.exit(
"CSV configuration and schema file do not match. Look for missing fields in either. Failed on: "
+ str(field)
+ " "
+ str(self.rosettacsvdict[csvindex]["name"])
logger.error(
"field in config: '%s' is not aligned with JSON schema '%s'",
field,
self.rosettacsvdict[csvindex]["name"],
)
sys.exit(1)

# increment csvindex along the x-axis...
csvindex += 1

itemrow.append(sectionrow)
fields.append(itemrow)
csvindex = CSVINDEXSTARTPOS
csvindex = csv_index_start

return self.csvstringoutput(fields)

Expand All @@ -430,30 +430,30 @@ def listduplicates(self):
dupe.append(cs)
return set(dupe)

def readExportCSV(self):
def read_export_csv(self):
"""Read a list control CSV."""
if self.exportsheet is not False:
csvhandler = GenericCSVHandler()
exportlist = csvhandler.csv_as_list(self.exportsheet)
return exportlist

def readDROIDCSV(self):
def read_droid_csv(self):
"""Read a DROID CSV."""
if self.droidcsv is not False:
droidcsvhandler = DroidCSVHandler()
droidlist = droidcsvhandler.read_droid_csv(self.droidcsv)
droidlist = droidcsvhandler.remove_folders(droidlist)
return droidcsvhandler.remove_container_contents(droidlist)

def export2rosettacsv(self):
def export_to_rosetta_csv(self):
"""Convert a list control and droid sheet to a Rosetta CSV."""
if self.droidcsv is not False and self.exportsheet is not False:
self.droidlist = self.readDROIDCSV()
self.exportlist = self.readExportCSV()
# self.readRosettaSchema() #NOTE: Moved to constructor... TODO: Refactor

self.droidlist = self.read_droid_csv()
self.exportlist = self.read_export_csv()
if self.prov is True:
provhandler = ProvenanceCSVHandler()
self.provlist = provhandler.read_provenance_csv(self.provfile)
if self.provlist is None:
self.prov = False

self.duplicates = self.listduplicates()
return self.create_rosetta_csv()
4 changes: 2 additions & 2 deletions tests/test_import_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -532,7 +532,7 @@ def test_csv_generation(tmp_path):
config_file,
prov_file,
)
res = csvgen.export2rosettacsv()
res = csvgen.export_to_rosetta_csv()
assert res.strip() == result.strip()


Expand Down Expand Up @@ -654,5 +654,5 @@ def test_duplicates(tmp_path):
config_file,
"",
)
res = csvgen.export2rosettacsv()
res = csvgen.export_to_rosetta_csv()
assert res.strip() == dupe_result.strip()
4 changes: 2 additions & 2 deletions tests/test_original_generator_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def test_normalize_spaces(mocker):
"""Test spaces are normalized correctly."""

placeholder_config = io.StringIO("")
mocker.patch.object(RosettaCSVGenerator, "readRosettaSchema")
mocker.patch.object(RosettaCSVGenerator, "read_rosetta_schema")

rosetta_csv_gen = RosettaCSVGenerator(False, False, False, placeholder_config)

Expand Down Expand Up @@ -76,7 +76,7 @@ def test_normalize_spaces(mocker):
def test_compare_filenames_as_titles(mocker):
"""Test that filenames are compared correctly."""
placeholder_config = io.StringIO("")
mocker.patch.object(RosettaCSVGenerator, "readRosettaSchema")
mocker.patch.object(RosettaCSVGenerator, "read_rosetta_schema")
rosetta_csv_gen = RosettaCSVGenerator(False, False, False, placeholder_config)

# Standard true comparison, expected result
Expand Down

0 comments on commit 5ac4172

Please sign in to comment.