Skip to content

Commit

Permalink
Merge pull request #1373 from sul-dlss/t1365-append-new-tag
Browse files Browse the repository at this point in the history
Refactors adding tags so that new tags are appended.
  • Loading branch information
jgreben authored Oct 29, 2024
2 parents 820af57 + 587420b commit 7efa254
Show file tree
Hide file tree
Showing 3 changed files with 179 additions and 70 deletions.
92 changes: 61 additions & 31 deletions libsys_airflow/plugins/shared/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,27 +61,6 @@ def put_folio_records(self, marc_instance_tags: dict, instance_id: str) -> bool:
)
return True

def __marc_json_with_new_tags__(self, marc_json: dict, marc_instance_tags: dict):
reader = pymarc.reader.JSONReader(json.dumps(marc_json))

for tag_name, indicator_subfields in marc_instance_tags.items():
logger.info(f"Constructing MARC tag {tag_name}")
for indsf in indicator_subfields:
new_tag = pymarc.Field(
tag=tag_name, indicators=[indsf['ind1'], indsf['ind2']] # type: ignore
)
for sfs in indsf['subfields']:
for sf_code, sf_val in sfs.items():
new_tag.add_subfield(sf_code, sf_val)
for record in reader:
existing_tags = record.get_fields(tag_name)
if self.__tag_is_unique__(existing_tags, new_tag):
record.add_ordered_field(new_tag)

record_json = record.as_json()
logger.info(f"Constructing MARC record: {record_json}")
return record_json

def __get_srs_record__(self, instance_uuid: str) -> Union[dict, None]:
source_storage_result = self.folio_client.folio_get(
f"/source-storage/source-records?instanceId={instance_uuid}"
Expand All @@ -106,16 +85,67 @@ def __instance_info__(self, instance_uuid: str) -> tuple:
hrid = instance["hrid"]
return version, hrid

def __marc_json_with_new_tags__(self, marc_json: dict, marc_instances_tags: dict):
reader = pymarc.reader.JSONReader(json.dumps(marc_json))

for tag_name, indicator_subfields in marc_instances_tags.items():
logger.info(f"Constructing MARC tag {tag_name}")
for record in reader:
existing_tags = record.get_fields(
tag_name
) # returns list of pymarc.Field or empty if record doesn't have any
if existing_tags:
logger.info(
f"Record has existing {tag_name}'s. New fields will be evaluated for uniqueness."
)
else:
logger.info(
f"Record does not have existing {tag_name}'s. New fields will be added."
)
# indicator_subfields:
# [{'ind1': ' ', 'ind2': ' ', 'subfields': [{'f': 'STEINMETZ'}, ...]},
# {'ind1': ' ', 'ind2': ' ', 'subfields': [{'f': 'WHITEHEAD'}, ...]}]
new_tags = []
for row in indicator_subfields:
new_field = self.__construct_new_field__(row, tag_name)
if self.__tag_is_unique__(existing_tags, new_field):
logger.info(f"New field {new_field.tag} is unique tag.")
new_tags.append(new_field)
else:
logger.info(f"New field {new_field.tag} is not unique")

for x in new_tags:
record.add_ordered_field(x)

record_json = record.as_json()
logger.info(f"Constructing MARC record: {record_json}")
return record_json

def __construct_new_field__(
self, indicator_subfields: dict, tag_name: str
) -> pymarc.Field:
new_field = pymarc.Field(
tag=tag_name, indicators=[indicator_subfields['ind1'], indicator_subfields['ind2']] # type: ignore
)
for subfields in indicator_subfields['subfields']:
self.__construct_new_subfields__(new_field, subfields)

return new_field

def __construct_new_subfields__(self, field: pymarc.Field, subfields: dict):
for sf_code, sf_val in subfields.items():
field.add_subfield(sf_code, sf_val)

return field

def __tag_is_unique__(self, fields: list, new_field: pymarc.Field) -> bool:
for existing_fields in fields:
for esubfield in existing_fields:
for nsubfield in new_field:
if (
nsubfield.code == esubfield.code
and nsubfield.value == esubfield.value
):
logger.info(f"Skip adding duplicated {new_field.tag} field")
return False
else:
return True
new_field_value = new_field.value()
existing_field_value = existing_fields.value()
if new_field_value == existing_field_value:
logger.info(f"Skip adding duplicated {new_field_value} field")
return False
else:
logger.info(f"{new_field_value} tag is unique")
return True
return True
9 changes: 8 additions & 1 deletion tests/digital_bookplates/test_add_marc_tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,10 @@ def test_put_folio_records_unique_tag(mock_folio_add_marc_tags, caplog):
)
assert put_record_result is True
assert "Skip adding duplicated 979 field" not in caplog.text
assert (
"ABBOTT druid:ws066yy0421 ws066yy0421_00_0001.jp2 The The Donald P. Abbott Fund for Marine Invertebrates tag is unique"
in caplog.text
)


def test_put_folio_records_duplicate_tag(mock_folio_add_marc_tags, caplog):
Expand All @@ -159,4 +163,7 @@ def test_put_folio_records_duplicate_tag(mock_folio_add_marc_tags, caplog):
marc_instance_tags, "242c6000-8485-5fcd-9b5e-adb60788ca59"
)
assert put_record_result is True
assert "Skip adding duplicated 979 field" in caplog.text
assert (
"Skip adding duplicated ABBOTT druid:ws066yy0421 ws066yy0421_00_0001.jp2 The The Donald P. Abbott Fund for Marine Invertebrates field"
in caplog.text
)
148 changes: 110 additions & 38 deletions tests/shared/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
import httpx
import pymarc
import pytest

from jsonpath_ng.ext import parse
Expand Down Expand Up @@ -51,13 +52,29 @@ def marc_json():
}


@pytest.fixture
def marc_979():
return {
"979": {
"ind1": " ",
"ind2": " ",
"subfields": [
{"f": "ABBOTT"},
{"b": "druid:ws066yy0421"},
{"c": "ws066yy0421_00_0001.jp2"},
{"d": "The Donald P. Abbott Fund for Marine Invertebrates"},
],
}
}


@pytest.fixture
def marc_instance_tags():
return {
"979": [
{
"ind1": "",
"ind2": "",
"ind1": " ",
"ind2": " ",
"subfields": [
{"f": "ABBOTT"},
{"b": "druid:ws066yy0421"},
Expand All @@ -69,6 +86,34 @@ def marc_instance_tags():
}


@pytest.fixture
def marc_instance_two_tags():
return {
"979": [
{
"ind1": " ",
"ind2": " ",
"subfields": [
{"f": "STEINMETZ"},
{"b": "druid:nc092rd1979"},
{"c": "nc092rd1979_00_0001.jp2"},
{"d": "Verna Pace Steinmetz Endowed Book Fund in History"},
],
},
{
"ind1": " ",
"ind2": " ",
"subfields": [
{"f": "WHITEHEAD"},
{"b": "druid:ph944pq1002"},
{"c": "ph944pq1002_00_0001.jp2"},
{"d": "Barry Whitehead Memorial Book Fund"},
],
},
]
}


@pytest.fixture
def mock_folio_add_marc_tags(mocker):
mock_httpx = mocker.MagicMock()
Expand All @@ -87,50 +132,15 @@ def mock_folio_add_marc_tags(mocker):
def mock_httpx_client():
def mock_response(request):
response = None
# following is not used in test but leaving here for testing more parts of utils.py
# match request.method:

# case 'PUT':
# if request.url.path.startswith('/change-manager/parsedRecords'):
# response = httpx.Response(status_code=202)

return response

return httpx.Client(transport=httpx.MockTransport(mock_response))


def mock_folio_client(mocker):
# following is not used in test but leaving here for testing more parts of utils.py
# def __srs_response__(path: str):
# output = {}
# instance_uuid = path.split("instanceId=")[-1]

# match instance_uuid:
# case "06660d4f-982d-54e8-b34c-532c268868e1":
# output = {
# "sourceRecords": [
# {
# "recordId": "e60b77d3-3a76-59e2-88f7-3d1a045af3b1",
# "parsedRecord": {"content": marc_json},
# }
# ]
# }

# return output

def mock_folio_get(*args, **kwargs):
output = {}
# following is not used in test but leaving here for testing more parts of utils.py
# if args[0].startswith("/source-storage/source-records"):
# output = __srs_response__(args[0])
# if args[0].startswith("/inventory/instances/"):
# for instance_uuid in [
# "64a5a15b-d89e-4bdd-bbd6-fcd215b367e4",
# "242c6000-8485-5fcd-9b5e-adb60788ca59",
# ]:
# if args[0].endswith(instance_uuid):
# output = {"_version": "1", "hrid": "a123456"}

return output

mock = mocker
Expand All @@ -141,7 +151,7 @@ def mock_folio_get(*args, **kwargs):


def test__marc_json_with_new_tags__(
mock_folio_add_marc_tags, marc_json, marc_instance_tags
mock_folio_add_marc_tags, marc_json, marc_instance_tags, caplog
):
add_marc_tag = utils.FolioAddMarcTags()
marc_json_with_new_tags = add_marc_tag.__marc_json_with_new_tags__(
Expand All @@ -151,3 +161,65 @@ def test__marc_json_with_new_tags__(
tag_979_exp = parse("$.fields[?(@['979'])]")
tag_979 = tag_979_exp.find(new_record_dict)[0].value
assert len(tag_979["979"]["subfields"]) == 4
assert "New field 979 is unique tag." in caplog.text


def test__marc_json_with_two_new_tags__(
mock_folio_add_marc_tags, marc_json, marc_instance_two_tags, caplog
):
add_marc_tag = utils.FolioAddMarcTags()
marc_json_with_new_tags = add_marc_tag.__marc_json_with_new_tags__(
marc_json, marc_instance_two_tags
)
new_record_dict = json.loads(marc_json_with_new_tags)
tag_979_exp = parse("$.fields[?(@['979'])]")
new_979_tags = tag_979_exp.find(new_record_dict)
assert len(new_979_tags) == 2
assert (
"Record does not have existing 979's. New fields will be added." in caplog.text
)


def test__marc_json_existing_tags__(
mock_folio_add_marc_tags, marc_json, marc_979, marc_instance_tags, caplog
):
add_marc_tag = utils.FolioAddMarcTags()
marc_json["fields"].append(marc_979)
marc_json_with_new_tags = add_marc_tag.__marc_json_with_new_tags__(
marc_json, marc_instance_tags
)
new_record_dict = json.loads(marc_json_with_new_tags)
tag_979_exp = parse("$.fields[?(@['979'])]")
new_979_tags = tag_979_exp.find(new_record_dict)
assert len(new_979_tags) == 1
assert (
"Record has existing 979's. New fields will be evaluated for uniqueness."
in caplog.text
)
assert (
"Skip adding duplicated ABBOTT druid:ws066yy0421 ws066yy0421_00_0001.jp2 The Donald P. Abbott Fund for Marine Invertebrates field"
in caplog.text
)
assert "New field 979 is not unique" in caplog.text


def test__tag_is_unique__(mock_folio_add_marc_tags, marc_json):
add_marc_tag = utils.FolioAddMarcTags()
reader = pymarc.reader.JSONReader(json.dumps(marc_json))
for record in reader:
existing_tags = record.get_fields("979")

new_field = pymarc.Field(
tag="979",
indicators=[" ", " "],
subfields=[
pymarc.Subfield(code='f', value='ABBOTT'),
pymarc.Subfield(code='b', value='druid:ws066yy0421'),
pymarc.Subfield(code='c', value='ws066yy0421_00_0001.jp2'),
pymarc.Subfield(
code='d',
value='The The Donald P. Abbott Fund for Marine Invertebrates',
),
],
)
assert add_marc_tag.__tag_is_unique__(existing_tags, new_field) is True

0 comments on commit 7efa254

Please sign in to comment.