Skip to content

Commit

Permalink
Merge pull request #1409 from sul-dlss/t1194-multiple-oclc-report
Browse files Browse the repository at this point in the history
Generate Multiple OCLC Numbers Report with Link in Email
  • Loading branch information
jermnelson authored Nov 7, 2024
2 parents dd9f90a + 03145c8 commit 89e5dc4
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 111 deletions.
22 changes: 10 additions & 12 deletions libsys_airflow/dags/data_exports/oclc_selections.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from airflow import DAG

from airflow.decorators import task
from airflow.decorators import task, task_group
from airflow.models.param import Param
from airflow.operators.python import BranchPythonOperator
from airflow.operators.empty import EmptyOperator
Expand Down Expand Up @@ -32,6 +32,8 @@
generate_multiple_oclc_identifiers_email,
)

from libsys_airflow.plugins.data_exports.oclc_reports import multiple_oclc_numbers_task

logger = logging.getLogger(__name__)

default_args = {
Expand Down Expand Up @@ -129,15 +131,10 @@ def divide_updates_records_by_library(**kwargs):
updates_records = kwargs.get("updates_records", [])
return divide_into_oclc_libraries(marc_file_list=updates_records)

@task
def aggregate_email_multiple_records(**kwargs):
ti = kwargs["ti"]
new_multiple_records = ti.xcom_pull(task_ids='divide_new_records_by_library')
deletes_multiple_records = ti.xcom_pull(
task_ids='divide_delete_records_by_library'
)
all_multiple_records = new_multiple_records + deletes_multiple_records
generate_multiple_oclc_identifiers_email(all_multiple_records)
@task_group(group_id="multiple-oclc-numbers-group")
def multiple_oclc_numbers_group(**kwargs):
kwargs["reports"] = multiple_oclc_numbers_task(**kwargs)
generate_multiple_oclc_identifiers_email(**kwargs)

@task
def remove_original_marc_files(**kwargs):
Expand All @@ -162,6 +159,8 @@ def remove_original_marc_files(**kwargs):

finish_division = EmptyOperator(task_id="finish_division")

multiple_oclc_numbers = multiple_oclc_numbers_group()

remove_original_marc = remove_original_marc_files(marc_file_list=fetch_marc_records)

finish_processing_marc = EmptyOperator(
Expand All @@ -172,7 +171,6 @@ def remove_original_marc_files(**kwargs):
check_record_ids >> fetch_folio_record_ids >> filter_out_updates_ids >> save_ids_to_file
check_record_ids >> save_ids_to_file >> fetch_marc_records


(
fetch_marc_records
>> [
Expand All @@ -185,6 +183,6 @@ def remove_original_marc_files(**kwargs):

(
finish_division
>> [aggregate_email_multiple_records(), remove_original_marc, archive_csv]
>> [multiple_oclc_numbers, remove_original_marc, archive_csv]
>> finish_processing_marc
)
65 changes: 19 additions & 46 deletions libsys_airflow/plugins/data_exports/email.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,25 +14,6 @@
logger = logging.getLogger(__name__)


def _oclc_identifiers(multiple_codes: list, folio_url: str):
template = Template(
"""<h2>Multiple OCLC Identifiers</h2>
<p>These Instances contain multiple OCLC identifiers and need
manual remediation to be uploaded to OCLC</p>
<ul>
{% for row in multiple_codes %}
<li>
<a href="{{folio_url}}/inventory/viewsource/{{row[0]}}">MARC view of Instance {{row[0]}}</a>
with OCLC Identifiers {% for code in row[2] %}{{ code }}{% if not loop.list %}, {% endif %}{% endfor %}.
</li>
{% endfor %}
</ul>
"""
)

return template.render(folio_url=folio_url, multiple_codes=multiple_codes)


def _cohort_emails():
return {
"business": Variable.get("OCLC_EMAIL_BUS"),
Expand Down Expand Up @@ -175,44 +156,36 @@ def generate_oclc_new_marc_errors_email(error_reports: dict):
)


def generate_multiple_oclc_identifiers_email(multiple_codes: list):
@task
def generate_multiple_oclc_identifiers_email(**kwargs):
"""
Generates an email for review by staff when multiple OCLC numbers
exist for a record
"""
if len(multiple_codes) < 1:
reports = kwargs["reports"]

if len(reports) < 1:
logger.info("No multiple OCLC Identifiers")
return
logger.info(
f"Generating Email of Multiple OCLC Identifiers for {len(multiple_codes)}"
)
folio_url = Variable.get("FOLIO_URL")
devs_email = Variable.get("EMAIL_DEVS")

cohort_emails = _cohort_emails()

html_content = _oclc_identifiers(multiple_codes, folio_url)
for library, report in reports.items():

if is_production():
send_email_with_server_name(
to=[
devs_email,
cohort_emails["business"],
cohort_emails["hoover"],
cohort_emails["lane"],
cohort_emails["law"],
cohort_emails["sul"],
],
subject="Review Instances with Multiple OCLC Indentifiers",
html_content=html_content,
to_emails, subject_line = _match_oclc_library(
library=library,
to_emails=[Variable.get("EMAIL_DEVS")],
cohort_emails=cohort_emails,
subject_line="Review Instances with Multiple OCLC Identifiers",
)
else:
folio_url = folio_url.replace("https://", "").replace(".stanford.edu", "")

if not is_production():
to_emails.pop(0) # Should only send report to libsys devs

send_email_with_server_name(
to=[
devs_email,
],
subject=f"{folio_url} - Review Instances with Multiple OCLC Indentifiers",
html_content=html_content,
to=to_emails,
subject=subject_line,
html_content=_oclc_report_html(report, library),
)


Expand Down
94 changes: 41 additions & 53 deletions tests/data_exports/test_data_exports_emails.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

@pytest.fixture
def mock_folio_variables(monkeypatch):
def mock_get(key):
def mock_get(key, *args):
value = None
match key:
case "FOLIO_URL":
Expand Down Expand Up @@ -55,46 +55,40 @@ def mock_dag_run(mocker):


def test_multiple_oclc_email(mocker, mock_folio_variables):
mock_send_email = mocker.patch(
"libsys_airflow.plugins.data_exports.email.send_email_with_server_name"
)

generate_multiple_oclc_identifiers_email(
[
(
"ae0b6949-6219-51cd-9a61-7794c2081fe7",
"STF",
["(OCoLC-M)21184692", "(OCoLC-I)272673749"],
),
(
"0221724f-2bca-497b-8d42-6786295e7173",
"HIN",
["(OCoLC-M)99087632", "(OCoLC-I)889220055"],
),
]
mock_send_email = mocker.MagicMock()

mocker.patch.multiple(
"libsys_airflow.plugins.shared.utils",
send_email=mock_send_email,
is_production=lambda: False,
)
assert mock_send_email.called

html_body = BeautifulSoup(
mock_send_email.call_args[1]['html_content'], 'html.parser'
mocker.patch(
"libsys_airflow.plugins.data_exports.email.is_production",
return_value=False,
)

list_items = html_body.find_all("li")
generate_multiple_oclc_identifiers_email.function(
reports={
"STF": "/opt/airflow/data-export-files/oclc/reports/STF/multiple_oclc_numbers/2024-11-05T23:26:11.316254.html",
"HIN": "/opt/airflow/data-export-files/oclc/reports/HIN/multiple_oclc_numbers/2024-11-05T23:26:12.316254.html",
"S7Z": "/opt/airflow/data-export-files/oclc/reports/S7Z/multiple_oclc_numbers/2024-11-05T23:26:12.316254.html",
}
)
assert mock_send_email.call_count == 3

assert (
list_items[0]
.find("a")
.get("href")
.endswith("/inventory/viewsource/ae0b6949-6219-51cd-9a61-7794c2081fe7")
sul_html_body = BeautifulSoup(
mock_send_email.call_args_list[0][1]['html_content'], 'html.parser'
)

assert "(OCoLC-M)21184692" in list_items[0].text
sul_report_link = sul_html_body.find("a")

assert (
list_items[1]
.find("a")
.get("href")
.endswith("/inventory/viewsource/0221724f-2bca-497b-8d42-6786295e7173")
assert sul_report_link.text == "2024-11-05T23:26:11.316254.html"

assert mock_send_email.call_args_list[1][1]["to"] == ['[email protected]']
assert mock_send_email.call_args_list[1][1]["subject"].endswith(
"Multiple OCLC Identifiers Hoover"
)


Expand All @@ -103,43 +97,37 @@ def test_no_multiple_oclc_code_email(mocker, mock_folio_variables, caplog):
"libsys_airflow.plugins.data_exports.email.send_email_with_server_name"
)

generate_multiple_oclc_identifiers_email([])
generate_multiple_oclc_identifiers_email.function(reports={})

assert "No multiple OCLC Identifiers" in caplog.text


def test_nonprod_oclc_email(mocker, mock_folio_variables):
def test_prod_oclc_email(mocker, mock_folio_variables):
mock_send_email = mocker.patch(
"libsys_airflow.plugins.data_exports.email.send_email_with_server_name"
)

mocker.patch(
"libsys_airflow.plugins.data_exports.transmission_tasks.is_production",
return_value=False,
"libsys_airflow.plugins.data_exports.email.is_production",
return_value=True,
)

generate_multiple_oclc_identifiers_email(
[
(
"ae0b6949-6219-51cd-9a61-7794c2081fe7",
"STF",
["(OCoLC-M)21184692", "(OCoLC-I)272673749"],
),
(
"0221724f-2bca-497b-8d42-6786295e7173",
"HIN",
["(OCoLC-M)99087632", "(OCoLC-I)889220055"],
),
]
generate_multiple_oclc_identifiers_email.function(
reports={
"CASUM": "/opt/airflow/data-export-files/oclc/reports/CASUM/multiple_oclc_numbers/2024-11-05T23:26:12.316254.html",
"HIN": "/opt/airflow/data-export-files/oclc/reports/HIN/multiple_oclc_numbers/2024-11-05T23:26:12.316254.html",
"RCJ": "/opt/airflow/data-export-files/oclc/reports/RCJ/multiple_oclc_numbers/2024-11-05T23:26:12.316254.html",
}
)
assert mock_send_email.called

assert (
mock_send_email.call_args[1]["subject"]
== "folio-test - Review Instances with Multiple OCLC Indentifiers"
mock_send_email.call_args_list[1][1]["subject"]
== "Review Instances with Multiple OCLC Identifiers Hoover"
)
assert mock_send_email.call_args[1]['to'] == [
'[email protected]',
assert mock_send_email.call_args_list[1][1]['to'] == [
"[email protected]",
"[email protected]",
]


Expand Down

0 comments on commit 89e5dc4

Please sign in to comment.