Skip to content

Commit e1d78b5

Browse files
authored
Merge pull request #357 from catalyst-cooperative/add-failures-to-slackbot
MVP improvements to automated archive runs
2 parents b562c0d + 140473c commit e1d78b5

File tree

3 files changed

+108
-52
lines changed

3 files changed

+108
-52
lines changed

.github/ISSUE_TEMPLATE/monthly-archive-update.md

+6-6
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
---
22
name: Monthly archive update
33
about: Template for publishing monthly archives.
4-
title: Publish archives for the month of MONTH
4+
title: Publish {{ date | date('MMMM Do YYYY') }} archives
55
labels: automation, zenodo
6-
assignees: ''
6+
assignees: e-belfer
77

88
---
99

10+
# Summary of results:
11+
See the job run logs and results [here]({{ env.RUN_URL }}).
12+
1013
# Review and publish archives
1114

1215
For each of the following archives, find the run status in the Github archiver run. If validation tests pass, manually review the archive and publish. If no changes detected, delete the draft. If changes are detected, manually review the archive following the guidelines in step 3 of `README.md`, then publish the new version. Then check the box here to confirm publication status, adding a note on the status (e.g., "v1 published", "no changes detected, draft deleted"):
@@ -50,8 +53,5 @@ If the validation failure is blocking (e.g., file format incorrect, whole datase
5053
For each run that failed because of another reason (e.g., underlying data changes, code failures), create an issue describing the failure and take necessary steps to resolve it.
5154

5255
```[tasklist]
53-
- [ ]
56+
- [ ] dataset
5457
```
55-
56-
# Relevant logs
57-
[Link to logs from GHA run]( PLEASE FIND THE ACTUAL LINK AND FILL IN HERE )

.github/workflows/run-archiver.yml

+38-25
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,23 @@ name: run-archiver
33

44
on:
55
workflow_dispatch:
6+
inputs:
7+
small_runner:
8+
description: 'Small runner: Comma-separated list of datasets to archive (e.g., "ferc2","ferc6").'
9+
# We can't pass env variables to the workflow_dispatch, so we manually list all small datasets here.
10+
default: '"eia176","eia191","eia757a","eia860","eia860m","eia861","eia923","eia930","eiaaeo","eiawater","eia_bulk_elec","epacamd_eia","ferc1","ferc2","ferc6","ferc60","ferc714","mshamines","nrelatb","phmsagas"'
11+
required: true
12+
type: string
13+
large_runner:
14+
description: "Kick off large runners (for epacems)?"
15+
required: true
16+
default: false
17+
type: boolean
18+
create_github_issue:
19+
description: "Create a Github issue from this run?"
20+
default: false
21+
required: true
22+
type: boolean
623
schedule:
724
- cron: "21 8 1 * *" # 8:21 AM UTC, first of every month
825

@@ -13,28 +30,8 @@ jobs:
1330
shell: bash -l {0}
1431
strategy:
1532
matrix:
16-
dataset:
17-
- eia176
18-
- eia191
19-
- eia757a
20-
- eia860
21-
- eia861
22-
- eia860m
23-
- eia923
24-
- eia930
25-
- eiaaeo
26-
- eiawater
27-
- eia_bulk_elec
28-
- epacamd_eia
29-
- ferc1
30-
- ferc2
31-
- ferc6
32-
- ferc60
33-
- ferc714
34-
- mshamines
35-
- nrelatb
36-
- phmsagas
37-
33+
# Note that we can't pass global env variables to the matrix, so we manually reproduce the list of datasets here.
34+
dataset: ${{ fromJSON(format('[{0}]', inputs.small_runner || '"eia176","eia191","eia757a","eia860","eia860m","eia861","eia923","eia930","eiaaeo","eiawater","eia_bulk_elec","epacamd_eia","ferc1","ferc2","ferc6","ferc60","ferc714","mshamines","nrelatb","phmsagas"' )) }}
3835
fail-fast: false
3936
runs-on: ubuntu-latest
4037
steps:
@@ -78,20 +75,20 @@ jobs:
7875
path: ${{ matrix.dataset }}_run_summary.json
7976

8077
archive-run-large:
78+
if: ${{ github.event_name == 'schedule' || inputs.large_runner }}
8179
defaults:
8280
run:
8381
shell: bash -l {0}
8482
strategy:
8583
matrix:
86-
dataset:
87-
- epacems
84+
# Note that we can't pass global env variables to the matrix, so we manually list the datasets here.
85+
dataset: ${{ fromJSON(format('[{0}]', '"epacems"' )) }}
8886
fail-fast: false
8987
runs-on:
9088
group: large-runner-group
9189
labels: ubuntu-22.04-4core
9290
steps:
9391
- uses: actions/checkout@v4
94-
9592
- name: Install Conda environment using mamba
9693
uses: mamba-org/setup-micromamba@v1
9794
with:
@@ -160,3 +157,19 @@ jobs:
160157
payload: ${{ steps.all_summaries.outputs.SLACK_PAYLOAD }}
161158
env:
162159
SLACK_BOT_TOKEN: ${{ secrets.PUDL_DEPLOY_SLACK_TOKEN }}
160+
161+
make-github-issue:
162+
if: ${{ always() && (github.event_name == 'schedule' || inputs.create_github_issue == true) }}
163+
runs-on: ubuntu-latest
164+
needs:
165+
- archive-run-small
166+
- archive-run-large
167+
steps:
168+
- uses: actions/checkout@v3
169+
- name: Create an issue
170+
uses: JasonEtco/[email protected]
171+
env:
172+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
173+
RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
174+
with:
175+
filename: .github/ISSUE_TEMPLATE/monthly-archive-update.md

scripts/make_slack_notification_message.py

+64-21
Original file line numberDiff line numberDiff line change
@@ -29,41 +29,81 @@ def _parse_args():
2929
return parser.parse_args()
3030

3131

32+
def _format_message(
33+
url: str, name: str, content: str, max_len: int = 3000
34+
) -> list[dict]:
35+
text = f"<{url}|*{name}*>\n{content}"[:max_len]
36+
return [
37+
{
38+
"type": "section",
39+
"text": {"type": "mrkdwn", "text": text},
40+
},
41+
]
42+
43+
44+
def _format_failures(summary: dict) -> list[dict]:
45+
name = summary["dataset_name"]
46+
url = summary["record_url"]
47+
48+
test_failures = defaultdict(list)
49+
for validation_test in summary["validation_tests"]:
50+
if (not validation_test["success"]) and (
51+
validation_test["required_for_run_success"]
52+
):
53+
test_failures = ". ".join(
54+
[validation_test["name"], ". ".join(validation_test["notes"])]
55+
) # Flatten list of lists
56+
57+
if test_failures:
58+
failures = f"```\n{json.dumps(test_failures, indent=2)}\n```"
59+
else:
60+
return None
61+
62+
return _format_message(url=url, name=name, content=failures)
63+
64+
65+
def _format_summary(summary: dict) -> list[dict]:
66+
name = summary["dataset_name"]
67+
url = summary["record_url"]
68+
if any(not test["success"] for test in summary["validation_tests"]):
69+
return None # Don't report on file changes if any test failed.
70+
71+
if file_changes := summary["file_changes"]:
72+
abridged_changes = defaultdict(list)
73+
for change in file_changes:
74+
abridged_changes[change["diff_type"]].append(change["name"])
75+
changes = f"```\n{json.dumps(abridged_changes, indent=2)}\n```"
76+
else:
77+
changes = "No changes."
78+
79+
return _format_message(url=url, name=name, content=changes)
80+
81+
3282
def main(summary_files: list[Path]) -> None:
3383
"""Format summary files for Slack perusal."""
3484
summaries = []
3585
for summary_file in summary_files:
3686
with summary_file.open() as f:
3787
summaries.extend(json.loads(f.read()))
3888

39-
def format_summary(summary: dict) -> list[dict]:
40-
name = summary["dataset_name"]
41-
url = summary["record_url"]
42-
if file_changes := summary["file_changes"]:
43-
abridged_changes = defaultdict(list)
44-
for change in file_changes:
45-
abridged_changes[change["diff_type"]].append(change["name"])
46-
changes = f"```\n{json.dumps(abridged_changes, indent=2)}\n```"
47-
else:
48-
changes = "No changes."
49-
50-
max_len = 3000
51-
text = f"<{url}|*{name}*>\n{changes}"[:max_len]
52-
return [
53-
{
54-
"type": "section",
55-
"text": {"type": "mrkdwn", "text": text},
56-
},
57-
]
89+
failed_blocks = list(
90+
itertools.chain.from_iterable(
91+
_format_failures(s) for s in summaries if _format_failures(s) is not None
92+
)
93+
)
5894

5995
unchanged_blocks = list(
6096
itertools.chain.from_iterable(
61-
format_summary(s) for s in summaries if not s["file_changes"]
97+
_format_summary(s)
98+
for s in summaries
99+
if (not s["file_changes"]) and (_format_summary(s) is not None)
62100
)
63101
)
64102
changed_blocks = list(
65103
itertools.chain.from_iterable(
66-
format_summary(s) for s in summaries if s["file_changes"]
104+
_format_summary(s)
105+
for s in summaries
106+
if (s["file_changes"]) and (_format_summary(s) is not None)
67107
)
68108
)
69109

@@ -73,6 +113,8 @@ def header_block(text: str) -> dict:
73113
def section_block(text: str) -> dict:
74114
return {"type": "section", "text": {"type": "mrkdwn", "text": text}}
75115

116+
if failed_blocks:
117+
failed_blocks = [section_block("*Validation Failures*")] + failed_blocks
76118
if changed_blocks:
77119
changed_blocks = [section_block("*Changed*")] + changed_blocks
78120
if unchanged_blocks:
@@ -84,6 +126,7 @@ def section_block(text: str) -> dict:
84126
"attachments": [
85127
{
86128
"blocks": [header_block("Archiver Run Outcomes")]
129+
+ failed_blocks
87130
+ changed_blocks
88131
+ unchanged_blocks,
89132
}

0 commit comments

Comments
 (0)