Skip to content

Commit 4a0ae38

Browse files
authored
Add last verified script (#3154)
- Add a script that inserts Created On, Last Updated On, Verified On after the title in the tutorials - The .json with the tutorials audit data that is used for last verified is stored in the "last-reviewed-data-json" branch and downloaded via a new Makefile command - Modified the GH tutorilas-build workflow to fetch all log history.
1 parent 9242c60 commit 4a0ae38

File tree

3 files changed

+175
-0
lines changed

3 files changed

+175
-0
lines changed

.github/workflows/build-tutorials.yml

+4
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ jobs:
4444
4545
- name: Checkout Tutorials
4646
uses: actions/checkout@v3
47+
with:
48+
fetch-depth: 0
4749

4850
- name: Setup Linux
4951
uses: pytorch/pytorch/.github/actions/setup-linux@main
@@ -115,6 +117,8 @@ jobs:
115117
116118
- name: Checkout Tutorials
117119
uses: actions/checkout@v3
120+
with:
121+
fetch-depth: 0
118122

119123
- name: Setup Linux
120124
uses: pytorch/pytorch/.github/actions/setup-linux@main

.jenkins/insert_last_verified.py

+160
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
import json
2+
import os
3+
import subprocess
4+
import sys
5+
from datetime import datetime
6+
7+
from bs4 import BeautifulSoup
8+
9+
10+
json_file_path = "tutorials-review-data.json"
11+
12+
# paths to skip from the post-processing script
13+
paths_to_skip = [
14+
"beginner/examples_autograd/two_layer_net_custom_function", # not present in the repo
15+
"beginner/examples_nn/two_layer_net_module", # not present in the repo
16+
"beginner/examples_tensor/two_layer_net_numpy", # not present in the repo
17+
"beginner/examples_tensor/two_layer_net_tensor", # not present in the repo
18+
"beginner/examples_autograd/two_layer_net_autograd", # not present in the repo
19+
"beginner/examples_nn/two_layer_net_optim", # not present in the repo
20+
"beginner/examples_nn/two_layer_net_nn", # not present in the repo
21+
"intermediate/coding_ddpg", # not present in the repo - will delete the carryover
22+
]
23+
# Mapping of source directories to build directories
24+
source_to_build_mapping = {
25+
"beginner": "beginner_source",
26+
"recipes": "recipes_source",
27+
"distributed": "distributed",
28+
"intermediate": "intermediate_source",
29+
"prototype": "prototype_source",
30+
"advanced": "advanced_source",
31+
"": "", # root dir for index.rst
32+
}
33+
34+
def get_git_log_date(file_path, git_log_args):
35+
try:
36+
result = subprocess.run(
37+
["git", "log"] + git_log_args + ["--", file_path],
38+
capture_output=True,
39+
text=True,
40+
check=True,
41+
)
42+
if result.stdout:
43+
date_str = result.stdout.splitlines()[0]
44+
return datetime.strptime(date_str, "%a, %d %b %Y %H:%M:%S %z")
45+
except subprocess.CalledProcessError:
46+
pass
47+
raise ValueError(f"Could not find date for {file_path}")
48+
49+
def get_creation_date(file_path):
50+
return get_git_log_date(file_path, ["--diff-filter=A", "--format=%aD"]).strftime("%b %d, %Y")
51+
52+
53+
def get_last_updated_date(file_path):
54+
return get_git_log_date(file_path, ["-1", "--format=%aD"]).strftime("%b %d, %Y")
55+
56+
# Try to find the source file with the given base path and the extensions .rst and .py
57+
def find_source_file(base_path):
58+
for ext in [".rst", ".py"]:
59+
source_file_path = base_path + ext
60+
if os.path.exists(source_file_path):
61+
return source_file_path
62+
return None
63+
64+
65+
# Function to process a JSON file and insert the "Last Verified" information into the HTML files
66+
def process_json_file(build_dir , json_file_path):
67+
with open(json_file_path, "r", encoding="utf-8") as json_file:
68+
json_data = json.load(json_file)
69+
70+
for entry in json_data:
71+
path = entry["Path"]
72+
last_verified = entry["Last Verified"]
73+
status = entry.get("Status", "")
74+
if path in paths_to_skip:
75+
print(f"Skipping path: {path}")
76+
continue
77+
if status in ["needs update", "not verified"]:
78+
formatted_last_verified = "Not Verified"
79+
elif last_verified:
80+
try:
81+
last_verified_date = datetime.strptime(last_verified, "%Y-%m-%d")
82+
formatted_last_verified = last_verified_date.strftime("%b %d, %Y")
83+
except ValueError:
84+
formatted_last_verified = "Unknown"
85+
else:
86+
formatted_last_verified = "Not Verified"
87+
if status == "deprecated":
88+
formatted_last_verified += "Deprecated"
89+
90+
for build_subdir, source_subdir in source_to_build_mapping.items():
91+
if path.startswith(build_subdir):
92+
html_file_path = os.path.join(build_dir, path + ".html")
93+
base_source_path = os.path.join(
94+
source_subdir, path[len(build_subdir) + 1 :]
95+
)
96+
source_file_path = find_source_file(base_source_path)
97+
break
98+
else:
99+
print(f"Warning: No mapping found for path {path}")
100+
continue
101+
102+
if not os.path.exists(html_file_path):
103+
print(
104+
f"Warning: HTML file not found for path {html_file_path}."
105+
"If this is a new tutorial, please add it to the audit JSON file and set the Verified status and todays's date."
106+
)
107+
continue
108+
109+
if not source_file_path:
110+
print(f"Warning: Source file not found for path {base_source_path}.")
111+
continue
112+
113+
created_on = get_creation_date(source_file_path)
114+
last_updated = get_last_updated_date(source_file_path)
115+
116+
with open(html_file_path, "r", encoding="utf-8") as file:
117+
soup = BeautifulSoup(file, "html.parser")
118+
# Check if the <p> tag with class "date-info-last-verified" already exists
119+
existing_date_info = soup.find("p", {"class": "date-info-last-verified"})
120+
if existing_date_info:
121+
print(
122+
f"Warning: <p> tag with class 'date-info-last-verified' already exists in {html_file_path}"
123+
)
124+
continue
125+
126+
h1_tag = soup.find("h1") # Find the h1 tag to insert the dates
127+
if h1_tag:
128+
date_info_tag = soup.new_tag("p", **{"class": "date-info-last-verified"})
129+
date_info_tag["style"] = "color: #6c6c6d; font-size: small;"
130+
# Add the "Created On", "Last Updated", and "Last Verified" information
131+
date_info_tag.string = (
132+
f"Created On: {created_on} | "
133+
f"Last Updated: {last_updated} | "
134+
f"Last Verified: {formatted_last_verified}"
135+
)
136+
# Insert the new tag after the <h1> tag
137+
h1_tag.insert_after(date_info_tag)
138+
# Save back to the HTML.
139+
with open(html_file_path, "w", encoding="utf-8") as file:
140+
file.write(str(soup))
141+
else:
142+
print(f"Warning: <h1> tag not found in {html_file_path}")
143+
144+
145+
def main():
146+
if len(sys.argv) < 2:
147+
print("Error: Build directory not provided. Exiting.")
148+
exit(1)
149+
build_dir = sys.argv[1]
150+
print(f"Build directory: {build_dir}")
151+
process_json_file(build_dir , json_file_path)
152+
print(
153+
"Finished processing JSON file. Please check the output for any warnings. "
154+
"Pages like `nlp/index.html` are generated only during the full `make docs` "
155+
"or `make html` build. Warnings about these files when you run `make html-noplot` "
156+
"can be ignored."
157+
)
158+
159+
if __name__ == "__main__":
160+
main()

Makefile

+11
Original file line numberDiff line numberDiff line change
@@ -86,18 +86,29 @@ download:
8686
wget https://www.cis.upenn.edu/~jshi/ped_html/PennFudanPed.zip -P $(DATADIR)
8787
unzip -o $(DATADIR)/PennFudanPed.zip -d intermediate_source/data/
8888

89+
download-last-reviewed-json:
90+
@echo "Downloading tutorials-review-data.json..."
91+
curl -o tutorials-review-data.json https://raw.githubusercontent.com/pytorch/tutorials/refs/heads/last-reviewed-data-json/tutorials-review-data.json
92+
@echo "Finished downloading tutorials-review-data.json."
8993
docs:
9094
make download
95+
make download-last-reviewed-json
9196
make html
97+
@python .jenkins/insert_last_verified.py $(BUILDDIR)/html
9298
rm -rf docs
9399
cp -r $(BUILDDIR)/html docs
94100
touch docs/.nojekyll
101+
rm -rf tutorials-review-data.json
95102

96103
html-noplot:
97104
$(SPHINXBUILD) -D plot_gallery=0 -b html $(SPHINXOPTS) "$(SOURCEDIR)" "$(BUILDDIR)/html"
98105
# bash .jenkins/remove_invisible_code_block_batch.sh "$(BUILDDIR)/html"
99106
@echo
107+
make download-last-reviewed-json
100108
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
109+
@echo "Running post-processing script to insert 'Last Verified' dates..."
110+
@python .jenkins/insert_last_verified.py $(BUILDDIR)/html
111+
rm -rf tutorials-review-data.json
101112

102113
clean-cache:
103114
make clean

0 commit comments

Comments
 (0)