-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathupdate_search.py
67 lines (52 loc) · 2.1 KB
/
update_search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import json
import os
import re
def extract_articles_from_md(file_path):
"""Extract article titles and links from markdown file."""
try:
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
# Extract title from the first h1
title_match = re.search(r"# (.*?)\n", content)
category = (
title_match.group(1)
if title_match
else os.path.basename(file_path).replace(".md", "")
)
# Extract articles using regex - now keeping both title and URL
article_matches = re.findall(r"- \[(.*?)\]\((.*?)\)", content)
# Create individual entries for each article
articles = []
for title, url in article_matches:
articles.append(
{
"title": title,
"url": url,
"category": category,
"content": f"{title} - {category} article about {title.lower()}",
}
)
return articles
except Exception as e:
print(f"Error processing {file_path}: {str(e)}")
return None
def generate_search_json():
"""Generate search.json by processing all markdown files in pages directory."""
pages_dir = "pages"
search_data = []
# Process each markdown file in the pages directory
if os.path.exists(pages_dir):
for filename in os.listdir(pages_dir):
if filename.endswith(".md"):
file_path = os.path.join(pages_dir, filename)
articles = extract_articles_from_md(file_path)
if articles:
search_data.extend(articles)
# Sort entries by title
search_data.sort(key=lambda x: x["title"])
# Write to search.json
with open("search.json", "w", encoding="utf-8") as f:
json.dump(search_data, f, indent=2, ensure_ascii=False)
print(f"Successfully generated search.json with {len(search_data)} articles")
if __name__ == "__main__":
generate_search_json()