From 587420b5a3e2abbedd96250705ad1bd25af75495 Mon Sep 17 00:00:00 2001 From: Shelley Doljack Date: Mon, 28 Oct 2024 22:11:14 -0600 Subject: [PATCH] Refactors adding tags so that new tags are appended. --- libsys_airflow/plugins/shared/utils.py | 92 +++++++---- .../digital_bookplates/test_add_marc_tags.py | 9 +- tests/shared/test_utils.py | 148 +++++++++++++----- 3 files changed, 179 insertions(+), 70 deletions(-) diff --git a/libsys_airflow/plugins/shared/utils.py b/libsys_airflow/plugins/shared/utils.py index a90d0af0..71c9e4db 100644 --- a/libsys_airflow/plugins/shared/utils.py +++ b/libsys_airflow/plugins/shared/utils.py @@ -61,27 +61,6 @@ def put_folio_records(self, marc_instance_tags: dict, instance_id: str) -> bool: ) return True - def __marc_json_with_new_tags__(self, marc_json: dict, marc_instance_tags: dict): - reader = pymarc.reader.JSONReader(json.dumps(marc_json)) - - for tag_name, indicator_subfields in marc_instance_tags.items(): - logger.info(f"Constructing MARC tag {tag_name}") - for indsf in indicator_subfields: - new_tag = pymarc.Field( - tag=tag_name, indicators=[indsf['ind1'], indsf['ind2']] # type: ignore - ) - for sfs in indsf['subfields']: - for sf_code, sf_val in sfs.items(): - new_tag.add_subfield(sf_code, sf_val) - for record in reader: - existing_tags = record.get_fields(tag_name) - if self.__tag_is_unique__(existing_tags, new_tag): - record.add_ordered_field(new_tag) - - record_json = record.as_json() - logger.info(f"Constructing MARC record: {record_json}") - return record_json - def __get_srs_record__(self, instance_uuid: str) -> Union[dict, None]: source_storage_result = self.folio_client.folio_get( f"/source-storage/source-records?instanceId={instance_uuid}" @@ -106,16 +85,67 @@ def __instance_info__(self, instance_uuid: str) -> tuple: hrid = instance["hrid"] return version, hrid + def __marc_json_with_new_tags__(self, marc_json: dict, marc_instances_tags: dict): + reader = pymarc.reader.JSONReader(json.dumps(marc_json)) + + for tag_name, indicator_subfields in marc_instances_tags.items(): + logger.info(f"Constructing MARC tag {tag_name}") + for record in reader: + existing_tags = record.get_fields( + tag_name + ) # returns list of pymarc.Field or empty if record doesn't have any + if existing_tags: + logger.info( + f"Record has existing {tag_name}'s. New fields will be evaluated for uniqueness." + ) + else: + logger.info( + f"Record does not have existing {tag_name}'s. New fields will be added." + ) + # indicator_subfields: + # [{'ind1': ' ', 'ind2': ' ', 'subfields': [{'f': 'STEINMETZ'}, ...]}, + # {'ind1': ' ', 'ind2': ' ', 'subfields': [{'f': 'WHITEHEAD'}, ...]}] + new_tags = [] + for row in indicator_subfields: + new_field = self.__construct_new_field__(row, tag_name) + if self.__tag_is_unique__(existing_tags, new_field): + logger.info(f"New field {new_field.tag} is unique tag.") + new_tags.append(new_field) + else: + logger.info(f"New field {new_field.tag} is not unique") + + for x in new_tags: + record.add_ordered_field(x) + + record_json = record.as_json() + logger.info(f"Constructing MARC record: {record_json}") + return record_json + + def __construct_new_field__( + self, indicator_subfields: dict, tag_name: str + ) -> pymarc.Field: + new_field = pymarc.Field( + tag=tag_name, indicators=[indicator_subfields['ind1'], indicator_subfields['ind2']] # type: ignore + ) + for subfields in indicator_subfields['subfields']: + self.__construct_new_subfields__(new_field, subfields) + + return new_field + + def __construct_new_subfields__(self, field: pymarc.Field, subfields: dict): + for sf_code, sf_val in subfields.items(): + field.add_subfield(sf_code, sf_val) + + return field + def __tag_is_unique__(self, fields: list, new_field: pymarc.Field) -> bool: for existing_fields in fields: - for esubfield in existing_fields: - for nsubfield in new_field: - if ( - nsubfield.code == esubfield.code - and nsubfield.value == esubfield.value - ): - logger.info(f"Skip adding duplicated {new_field.tag} field") - return False - else: - return True + new_field_value = new_field.value() + existing_field_value = existing_fields.value() + if new_field_value == existing_field_value: + logger.info(f"Skip adding duplicated {new_field_value} field") + return False + else: + logger.info(f"{new_field_value} tag is unique") + return True return True diff --git a/tests/digital_bookplates/test_add_marc_tags.py b/tests/digital_bookplates/test_add_marc_tags.py index 0d2bd3ce..00663025 100644 --- a/tests/digital_bookplates/test_add_marc_tags.py +++ b/tests/digital_bookplates/test_add_marc_tags.py @@ -151,6 +151,10 @@ def test_put_folio_records_unique_tag(mock_folio_add_marc_tags, caplog): ) assert put_record_result is True assert "Skip adding duplicated 979 field" not in caplog.text + assert ( + "ABBOTT druid:ws066yy0421 ws066yy0421_00_0001.jp2 The The Donald P. Abbott Fund for Marine Invertebrates tag is unique" + in caplog.text + ) def test_put_folio_records_duplicate_tag(mock_folio_add_marc_tags, caplog): @@ -159,4 +163,7 @@ def test_put_folio_records_duplicate_tag(mock_folio_add_marc_tags, caplog): marc_instance_tags, "242c6000-8485-5fcd-9b5e-adb60788ca59" ) assert put_record_result is True - assert "Skip adding duplicated 979 field" in caplog.text + assert ( + "Skip adding duplicated ABBOTT druid:ws066yy0421 ws066yy0421_00_0001.jp2 The The Donald P. Abbott Fund for Marine Invertebrates field" + in caplog.text + ) diff --git a/tests/shared/test_utils.py b/tests/shared/test_utils.py index 3ea642f9..a4a77916 100644 --- a/tests/shared/test_utils.py +++ b/tests/shared/test_utils.py @@ -1,5 +1,6 @@ import json import httpx +import pymarc import pytest from jsonpath_ng.ext import parse @@ -51,13 +52,29 @@ def marc_json(): } +@pytest.fixture +def marc_979(): + return { + "979": { + "ind1": " ", + "ind2": " ", + "subfields": [ + {"f": "ABBOTT"}, + {"b": "druid:ws066yy0421"}, + {"c": "ws066yy0421_00_0001.jp2"}, + {"d": "The Donald P. Abbott Fund for Marine Invertebrates"}, + ], + } + } + + @pytest.fixture def marc_instance_tags(): return { "979": [ { - "ind1": "", - "ind2": "", + "ind1": " ", + "ind2": " ", "subfields": [ {"f": "ABBOTT"}, {"b": "druid:ws066yy0421"}, @@ -69,6 +86,34 @@ def marc_instance_tags(): } +@pytest.fixture +def marc_instance_two_tags(): + return { + "979": [ + { + "ind1": " ", + "ind2": " ", + "subfields": [ + {"f": "STEINMETZ"}, + {"b": "druid:nc092rd1979"}, + {"c": "nc092rd1979_00_0001.jp2"}, + {"d": "Verna Pace Steinmetz Endowed Book Fund in History"}, + ], + }, + { + "ind1": " ", + "ind2": " ", + "subfields": [ + {"f": "WHITEHEAD"}, + {"b": "druid:ph944pq1002"}, + {"c": "ph944pq1002_00_0001.jp2"}, + {"d": "Barry Whitehead Memorial Book Fund"}, + ], + }, + ] + } + + @pytest.fixture def mock_folio_add_marc_tags(mocker): mock_httpx = mocker.MagicMock() @@ -87,50 +132,15 @@ def mock_folio_add_marc_tags(mocker): def mock_httpx_client(): def mock_response(request): response = None - # following is not used in test but leaving here for testing more parts of utils.py - # match request.method: - - # case 'PUT': - # if request.url.path.startswith('/change-manager/parsedRecords'): - # response = httpx.Response(status_code=202) - return response return httpx.Client(transport=httpx.MockTransport(mock_response)) def mock_folio_client(mocker): - # following is not used in test but leaving here for testing more parts of utils.py - # def __srs_response__(path: str): - # output = {} - # instance_uuid = path.split("instanceId=")[-1] - - # match instance_uuid: - # case "06660d4f-982d-54e8-b34c-532c268868e1": - # output = { - # "sourceRecords": [ - # { - # "recordId": "e60b77d3-3a76-59e2-88f7-3d1a045af3b1", - # "parsedRecord": {"content": marc_json}, - # } - # ] - # } - - # return output def mock_folio_get(*args, **kwargs): output = {} - # following is not used in test but leaving here for testing more parts of utils.py - # if args[0].startswith("/source-storage/source-records"): - # output = __srs_response__(args[0]) - # if args[0].startswith("/inventory/instances/"): - # for instance_uuid in [ - # "64a5a15b-d89e-4bdd-bbd6-fcd215b367e4", - # "242c6000-8485-5fcd-9b5e-adb60788ca59", - # ]: - # if args[0].endswith(instance_uuid): - # output = {"_version": "1", "hrid": "a123456"} - return output mock = mocker @@ -141,7 +151,7 @@ def mock_folio_get(*args, **kwargs): def test__marc_json_with_new_tags__( - mock_folio_add_marc_tags, marc_json, marc_instance_tags + mock_folio_add_marc_tags, marc_json, marc_instance_tags, caplog ): add_marc_tag = utils.FolioAddMarcTags() marc_json_with_new_tags = add_marc_tag.__marc_json_with_new_tags__( @@ -151,3 +161,65 @@ def test__marc_json_with_new_tags__( tag_979_exp = parse("$.fields[?(@['979'])]") tag_979 = tag_979_exp.find(new_record_dict)[0].value assert len(tag_979["979"]["subfields"]) == 4 + assert "New field 979 is unique tag." in caplog.text + + +def test__marc_json_with_two_new_tags__( + mock_folio_add_marc_tags, marc_json, marc_instance_two_tags, caplog +): + add_marc_tag = utils.FolioAddMarcTags() + marc_json_with_new_tags = add_marc_tag.__marc_json_with_new_tags__( + marc_json, marc_instance_two_tags + ) + new_record_dict = json.loads(marc_json_with_new_tags) + tag_979_exp = parse("$.fields[?(@['979'])]") + new_979_tags = tag_979_exp.find(new_record_dict) + assert len(new_979_tags) == 2 + assert ( + "Record does not have existing 979's. New fields will be added." in caplog.text + ) + + +def test__marc_json_existing_tags__( + mock_folio_add_marc_tags, marc_json, marc_979, marc_instance_tags, caplog +): + add_marc_tag = utils.FolioAddMarcTags() + marc_json["fields"].append(marc_979) + marc_json_with_new_tags = add_marc_tag.__marc_json_with_new_tags__( + marc_json, marc_instance_tags + ) + new_record_dict = json.loads(marc_json_with_new_tags) + tag_979_exp = parse("$.fields[?(@['979'])]") + new_979_tags = tag_979_exp.find(new_record_dict) + assert len(new_979_tags) == 1 + assert ( + "Record has existing 979's. New fields will be evaluated for uniqueness." + in caplog.text + ) + assert ( + "Skip adding duplicated ABBOTT druid:ws066yy0421 ws066yy0421_00_0001.jp2 The Donald P. Abbott Fund for Marine Invertebrates field" + in caplog.text + ) + assert "New field 979 is not unique" in caplog.text + + +def test__tag_is_unique__(mock_folio_add_marc_tags, marc_json): + add_marc_tag = utils.FolioAddMarcTags() + reader = pymarc.reader.JSONReader(json.dumps(marc_json)) + for record in reader: + existing_tags = record.get_fields("979") + + new_field = pymarc.Field( + tag="979", + indicators=[" ", " "], + subfields=[ + pymarc.Subfield(code='f', value='ABBOTT'), + pymarc.Subfield(code='b', value='druid:ws066yy0421'), + pymarc.Subfield(code='c', value='ws066yy0421_00_0001.jp2'), + pymarc.Subfield( + code='d', + value='The The Donald P. Abbott Fund for Marine Invertebrates', + ), + ], + ) + assert add_marc_tag.__tag_is_unique__(existing_tags, new_field) is True