Skip to content

Commit 7efa254

Browse files
authored
Merge pull request #1373 from sul-dlss/t1365-append-new-tag
Refactors adding tags so that new tags are appended.
2 parents 820af57 + 587420b commit 7efa254

File tree

3 files changed

+179
-70
lines changed

3 files changed

+179
-70
lines changed

libsys_airflow/plugins/shared/utils.py

Lines changed: 61 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -61,27 +61,6 @@ def put_folio_records(self, marc_instance_tags: dict, instance_id: str) -> bool:
6161
)
6262
return True
6363

64-
def __marc_json_with_new_tags__(self, marc_json: dict, marc_instance_tags: dict):
65-
reader = pymarc.reader.JSONReader(json.dumps(marc_json))
66-
67-
for tag_name, indicator_subfields in marc_instance_tags.items():
68-
logger.info(f"Constructing MARC tag {tag_name}")
69-
for indsf in indicator_subfields:
70-
new_tag = pymarc.Field(
71-
tag=tag_name, indicators=[indsf['ind1'], indsf['ind2']] # type: ignore
72-
)
73-
for sfs in indsf['subfields']:
74-
for sf_code, sf_val in sfs.items():
75-
new_tag.add_subfield(sf_code, sf_val)
76-
for record in reader:
77-
existing_tags = record.get_fields(tag_name)
78-
if self.__tag_is_unique__(existing_tags, new_tag):
79-
record.add_ordered_field(new_tag)
80-
81-
record_json = record.as_json()
82-
logger.info(f"Constructing MARC record: {record_json}")
83-
return record_json
84-
8564
def __get_srs_record__(self, instance_uuid: str) -> Union[dict, None]:
8665
source_storage_result = self.folio_client.folio_get(
8766
f"/source-storage/source-records?instanceId={instance_uuid}"
@@ -106,16 +85,67 @@ def __instance_info__(self, instance_uuid: str) -> tuple:
10685
hrid = instance["hrid"]
10786
return version, hrid
10887

88+
def __marc_json_with_new_tags__(self, marc_json: dict, marc_instances_tags: dict):
89+
reader = pymarc.reader.JSONReader(json.dumps(marc_json))
90+
91+
for tag_name, indicator_subfields in marc_instances_tags.items():
92+
logger.info(f"Constructing MARC tag {tag_name}")
93+
for record in reader:
94+
existing_tags = record.get_fields(
95+
tag_name
96+
) # returns list of pymarc.Field or empty if record doesn't have any
97+
if existing_tags:
98+
logger.info(
99+
f"Record has existing {tag_name}'s. New fields will be evaluated for uniqueness."
100+
)
101+
else:
102+
logger.info(
103+
f"Record does not have existing {tag_name}'s. New fields will be added."
104+
)
105+
# indicator_subfields:
106+
# [{'ind1': ' ', 'ind2': ' ', 'subfields': [{'f': 'STEINMETZ'}, ...]},
107+
# {'ind1': ' ', 'ind2': ' ', 'subfields': [{'f': 'WHITEHEAD'}, ...]}]
108+
new_tags = []
109+
for row in indicator_subfields:
110+
new_field = self.__construct_new_field__(row, tag_name)
111+
if self.__tag_is_unique__(existing_tags, new_field):
112+
logger.info(f"New field {new_field.tag} is unique tag.")
113+
new_tags.append(new_field)
114+
else:
115+
logger.info(f"New field {new_field.tag} is not unique")
116+
117+
for x in new_tags:
118+
record.add_ordered_field(x)
119+
120+
record_json = record.as_json()
121+
logger.info(f"Constructing MARC record: {record_json}")
122+
return record_json
123+
124+
def __construct_new_field__(
125+
self, indicator_subfields: dict, tag_name: str
126+
) -> pymarc.Field:
127+
new_field = pymarc.Field(
128+
tag=tag_name, indicators=[indicator_subfields['ind1'], indicator_subfields['ind2']] # type: ignore
129+
)
130+
for subfields in indicator_subfields['subfields']:
131+
self.__construct_new_subfields__(new_field, subfields)
132+
133+
return new_field
134+
135+
def __construct_new_subfields__(self, field: pymarc.Field, subfields: dict):
136+
for sf_code, sf_val in subfields.items():
137+
field.add_subfield(sf_code, sf_val)
138+
139+
return field
140+
109141
def __tag_is_unique__(self, fields: list, new_field: pymarc.Field) -> bool:
110142
for existing_fields in fields:
111-
for esubfield in existing_fields:
112-
for nsubfield in new_field:
113-
if (
114-
nsubfield.code == esubfield.code
115-
and nsubfield.value == esubfield.value
116-
):
117-
logger.info(f"Skip adding duplicated {new_field.tag} field")
118-
return False
119-
else:
120-
return True
143+
new_field_value = new_field.value()
144+
existing_field_value = existing_fields.value()
145+
if new_field_value == existing_field_value:
146+
logger.info(f"Skip adding duplicated {new_field_value} field")
147+
return False
148+
else:
149+
logger.info(f"{new_field_value} tag is unique")
150+
return True
121151
return True

tests/digital_bookplates/test_add_marc_tags.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,10 @@ def test_put_folio_records_unique_tag(mock_folio_add_marc_tags, caplog):
151151
)
152152
assert put_record_result is True
153153
assert "Skip adding duplicated 979 field" not in caplog.text
154+
assert (
155+
"ABBOTT druid:ws066yy0421 ws066yy0421_00_0001.jp2 The The Donald P. Abbott Fund for Marine Invertebrates tag is unique"
156+
in caplog.text
157+
)
154158

155159

156160
def test_put_folio_records_duplicate_tag(mock_folio_add_marc_tags, caplog):
@@ -159,4 +163,7 @@ def test_put_folio_records_duplicate_tag(mock_folio_add_marc_tags, caplog):
159163
marc_instance_tags, "242c6000-8485-5fcd-9b5e-adb60788ca59"
160164
)
161165
assert put_record_result is True
162-
assert "Skip adding duplicated 979 field" in caplog.text
166+
assert (
167+
"Skip adding duplicated ABBOTT druid:ws066yy0421 ws066yy0421_00_0001.jp2 The The Donald P. Abbott Fund for Marine Invertebrates field"
168+
in caplog.text
169+
)

tests/shared/test_utils.py

Lines changed: 110 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import json
22
import httpx
3+
import pymarc
34
import pytest
45

56
from jsonpath_ng.ext import parse
@@ -51,13 +52,29 @@ def marc_json():
5152
}
5253

5354

55+
@pytest.fixture
56+
def marc_979():
57+
return {
58+
"979": {
59+
"ind1": " ",
60+
"ind2": " ",
61+
"subfields": [
62+
{"f": "ABBOTT"},
63+
{"b": "druid:ws066yy0421"},
64+
{"c": "ws066yy0421_00_0001.jp2"},
65+
{"d": "The Donald P. Abbott Fund for Marine Invertebrates"},
66+
],
67+
}
68+
}
69+
70+
5471
@pytest.fixture
5572
def marc_instance_tags():
5673
return {
5774
"979": [
5875
{
59-
"ind1": "",
60-
"ind2": "",
76+
"ind1": " ",
77+
"ind2": " ",
6178
"subfields": [
6279
{"f": "ABBOTT"},
6380
{"b": "druid:ws066yy0421"},
@@ -69,6 +86,34 @@ def marc_instance_tags():
6986
}
7087

7188

89+
@pytest.fixture
90+
def marc_instance_two_tags():
91+
return {
92+
"979": [
93+
{
94+
"ind1": " ",
95+
"ind2": " ",
96+
"subfields": [
97+
{"f": "STEINMETZ"},
98+
{"b": "druid:nc092rd1979"},
99+
{"c": "nc092rd1979_00_0001.jp2"},
100+
{"d": "Verna Pace Steinmetz Endowed Book Fund in History"},
101+
],
102+
},
103+
{
104+
"ind1": " ",
105+
"ind2": " ",
106+
"subfields": [
107+
{"f": "WHITEHEAD"},
108+
{"b": "druid:ph944pq1002"},
109+
{"c": "ph944pq1002_00_0001.jp2"},
110+
{"d": "Barry Whitehead Memorial Book Fund"},
111+
],
112+
},
113+
]
114+
}
115+
116+
72117
@pytest.fixture
73118
def mock_folio_add_marc_tags(mocker):
74119
mock_httpx = mocker.MagicMock()
@@ -87,50 +132,15 @@ def mock_folio_add_marc_tags(mocker):
87132
def mock_httpx_client():
88133
def mock_response(request):
89134
response = None
90-
# following is not used in test but leaving here for testing more parts of utils.py
91-
# match request.method:
92-
93-
# case 'PUT':
94-
# if request.url.path.startswith('/change-manager/parsedRecords'):
95-
# response = httpx.Response(status_code=202)
96-
97135
return response
98136

99137
return httpx.Client(transport=httpx.MockTransport(mock_response))
100138

101139

102140
def mock_folio_client(mocker):
103-
# following is not used in test but leaving here for testing more parts of utils.py
104-
# def __srs_response__(path: str):
105-
# output = {}
106-
# instance_uuid = path.split("instanceId=")[-1]
107-
108-
# match instance_uuid:
109-
# case "06660d4f-982d-54e8-b34c-532c268868e1":
110-
# output = {
111-
# "sourceRecords": [
112-
# {
113-
# "recordId": "e60b77d3-3a76-59e2-88f7-3d1a045af3b1",
114-
# "parsedRecord": {"content": marc_json},
115-
# }
116-
# ]
117-
# }
118-
119-
# return output
120141

121142
def mock_folio_get(*args, **kwargs):
122143
output = {}
123-
# following is not used in test but leaving here for testing more parts of utils.py
124-
# if args[0].startswith("/source-storage/source-records"):
125-
# output = __srs_response__(args[0])
126-
# if args[0].startswith("/inventory/instances/"):
127-
# for instance_uuid in [
128-
# "64a5a15b-d89e-4bdd-bbd6-fcd215b367e4",
129-
# "242c6000-8485-5fcd-9b5e-adb60788ca59",
130-
# ]:
131-
# if args[0].endswith(instance_uuid):
132-
# output = {"_version": "1", "hrid": "a123456"}
133-
134144
return output
135145

136146
mock = mocker
@@ -141,7 +151,7 @@ def mock_folio_get(*args, **kwargs):
141151

142152

143153
def test__marc_json_with_new_tags__(
144-
mock_folio_add_marc_tags, marc_json, marc_instance_tags
154+
mock_folio_add_marc_tags, marc_json, marc_instance_tags, caplog
145155
):
146156
add_marc_tag = utils.FolioAddMarcTags()
147157
marc_json_with_new_tags = add_marc_tag.__marc_json_with_new_tags__(
@@ -151,3 +161,65 @@ def test__marc_json_with_new_tags__(
151161
tag_979_exp = parse("$.fields[?(@['979'])]")
152162
tag_979 = tag_979_exp.find(new_record_dict)[0].value
153163
assert len(tag_979["979"]["subfields"]) == 4
164+
assert "New field 979 is unique tag." in caplog.text
165+
166+
167+
def test__marc_json_with_two_new_tags__(
168+
mock_folio_add_marc_tags, marc_json, marc_instance_two_tags, caplog
169+
):
170+
add_marc_tag = utils.FolioAddMarcTags()
171+
marc_json_with_new_tags = add_marc_tag.__marc_json_with_new_tags__(
172+
marc_json, marc_instance_two_tags
173+
)
174+
new_record_dict = json.loads(marc_json_with_new_tags)
175+
tag_979_exp = parse("$.fields[?(@['979'])]")
176+
new_979_tags = tag_979_exp.find(new_record_dict)
177+
assert len(new_979_tags) == 2
178+
assert (
179+
"Record does not have existing 979's. New fields will be added." in caplog.text
180+
)
181+
182+
183+
def test__marc_json_existing_tags__(
184+
mock_folio_add_marc_tags, marc_json, marc_979, marc_instance_tags, caplog
185+
):
186+
add_marc_tag = utils.FolioAddMarcTags()
187+
marc_json["fields"].append(marc_979)
188+
marc_json_with_new_tags = add_marc_tag.__marc_json_with_new_tags__(
189+
marc_json, marc_instance_tags
190+
)
191+
new_record_dict = json.loads(marc_json_with_new_tags)
192+
tag_979_exp = parse("$.fields[?(@['979'])]")
193+
new_979_tags = tag_979_exp.find(new_record_dict)
194+
assert len(new_979_tags) == 1
195+
assert (
196+
"Record has existing 979's. New fields will be evaluated for uniqueness."
197+
in caplog.text
198+
)
199+
assert (
200+
"Skip adding duplicated ABBOTT druid:ws066yy0421 ws066yy0421_00_0001.jp2 The Donald P. Abbott Fund for Marine Invertebrates field"
201+
in caplog.text
202+
)
203+
assert "New field 979 is not unique" in caplog.text
204+
205+
206+
def test__tag_is_unique__(mock_folio_add_marc_tags, marc_json):
207+
add_marc_tag = utils.FolioAddMarcTags()
208+
reader = pymarc.reader.JSONReader(json.dumps(marc_json))
209+
for record in reader:
210+
existing_tags = record.get_fields("979")
211+
212+
new_field = pymarc.Field(
213+
tag="979",
214+
indicators=[" ", " "],
215+
subfields=[
216+
pymarc.Subfield(code='f', value='ABBOTT'),
217+
pymarc.Subfield(code='b', value='druid:ws066yy0421'),
218+
pymarc.Subfield(code='c', value='ws066yy0421_00_0001.jp2'),
219+
pymarc.Subfield(
220+
code='d',
221+
value='The The Donald P. Abbott Fund for Marine Invertebrates',
222+
),
223+
],
224+
)
225+
assert add_marc_tag.__tag_is_unique__(existing_tags, new_field) is True

0 commit comments

Comments
 (0)