Skip to content

Commit f5f1413

Browse files
authored
Merge pull request #186 from saharsh-agrawal/caching
Caching done correctly (atleast locally)
2 parents 610b6b2 + 323a127 commit f5f1413

File tree

8 files changed

+152
-69
lines changed

8 files changed

+152
-69
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ venv
1313

1414
*.pdf
1515
Academic_Cal-j/**
16-
final.json
16+
cache/**
1717

1818
# auto login
1919
erpcreds.py

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,6 @@ RUN chmod +x ./postinstall.sh
2020

2121
COPY . .
2222

23-
RUN python download-calendar.py
23+
# RUN python download-calendar.py
2424

2525
CMD [ "./postinstall.sh", "gunicorn", "--bind", "0.0.0.0:8000", "wsgi:app" ]

app.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -217,14 +217,17 @@ def download_ics():
217217

218218
# Create an in-memory file-like object for the ics content
219219
ics_file = io.BytesIO()
220-
ics_file.write(ics_content.encode("utf-8"))
220+
# generate_ics returns bytes already; write directly
221+
if isinstance(ics_content, str):
222+
ics_content = ics_content.encode("utf-8")
223+
ics_file.write(ics_content)
221224
ics_file.seek(0)
222225

223226
return send_file(
224227
ics_file,
225228
as_attachment=True,
226229
mimetype="text/calendar",
227-
download_name=f"${roll_number}-timetable.ics",
230+
download_name=f"{roll_number}-timetable.ics",
228231
)
229232
except Exception as e:
230233
return jsonify({"status": "error", "message": str(e)}), 500
@@ -260,7 +263,9 @@ def image_parser():
260263

261264
# Create an in-memory file-like object for the ics content
262265
ics_file = io.BytesIO()
263-
ics_file.write(ics_content.encode("utf-8"))
266+
if isinstance(ics_content, str):
267+
ics_content = ics_content.encode("utf-8")
268+
ics_file.write(ics_content)
264269
ics_file.seek(0)
265270

266271
return send_file(

timetable/generate_ics.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from datetime import timedelta
55
from timetable import Course
66
from utils import academic_calander_handler, dates, build_event_duration, generate_india_time
7+
from utils.holidays_handler import get_holidays
78

89

910
def generate_ics(courses: list[Course], output_filename, is_web=False):
@@ -60,7 +61,7 @@ def first_occurrence_of_day(day_name: str):
6061
cal.add_component(event)
6162

6263
# add holidays (as all-day events in Asia/Kolkata)
63-
for holiday in dates.holidays:
64+
for holiday in get_holidays():
6465
event = Event()
6566
event.add("summary", "INSTITUTE HOLIDAY : " + holiday[0])
6667
hdt = holiday[1]

timetable/google_calendar.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@
1212
from utils import (
1313
END_TERM_BEGIN,
1414
SEM_BEGIN,
15-
holidays,
1615
dates,
1716
generate_india_time,
1817
)
18+
from utils.holidays_handler import get_holidays
1919
from utils import academic_calander_handler
2020
from timetable import Course
2121

@@ -132,7 +132,7 @@ def first_occurrence_of_day(day_name: str):
132132
batch.execute() ## execute batch of timetable
133133

134134
# add holidays to calendar as all-day events (Asia/Kolkata midnight)
135-
for holiday in holidays:
135+
for holiday in get_holidays():
136136
hdt = holiday[1]
137137
start_str = generate_india_time(hdt.year, hdt.month, hdt.day, 0, 0).strftime("%Y-%m-%dT00:00:00")
138138
end_dt = generate_india_time(hdt.year, hdt.month, hdt.day, 0, 0) + timedelta(days=1)
@@ -148,6 +148,7 @@ def first_occurrence_of_day(day_name: str):
148148
},
149149
}
150150
service.events().insert(calendarId=calendar_id, body=holiday_event).execute()
151+
print("Added holidays")
151152

152153
# add academic calendar entries as all-day events
153154
for entry in academic_calander_handler.get_academic_calendar(is_web):
@@ -159,6 +160,7 @@ def first_occurrence_of_day(day_name: str):
159160
"end": {"dateTime": end_str, "timeZone": "Asia/Kolkata"},
160161
}
161162
service.events().insert(calendarId=calendar_id, body=event).execute()
163+
print("Added academic calendar entries")
162164

163165
print("\nAll events added successfully!\n")
164166

utils/academic_calander_handler.py

Lines changed: 55 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import tempfile
21
from datetime import datetime, timedelta
32
import glob
43
import camelot
@@ -11,6 +10,7 @@
1110
import re
1211

1312
JSON_FOLDER_NAME = 'Academic_Cal-j'
13+
CACHE_DIR = 'cache'
1414

1515

1616
@dataclass
@@ -37,6 +37,18 @@ def get_latest_calendar_name():
3737
return filename
3838

3939

40+
def ensure_cache_dir():
41+
if not os.path.isdir(CACHE_DIR):
42+
os.makedirs(CACHE_DIR, exist_ok=True)
43+
44+
45+
def get_cache_path():
46+
filename = get_latest_calendar_name()
47+
stem = os.path.splitext(filename)[0]
48+
ensure_cache_dir()
49+
return os.path.join(CACHE_DIR, f"{stem}.json")
50+
51+
4052
def is_file_present(file):
4153
if (os.path.exists(cwd() + '/' + file) or
4254
os.path.exists(cwd() + '/' + file + '/')
@@ -77,7 +89,7 @@ def get_latest_calendar(is_web=False):
7789

7890
try:
7991
with open(filename, "wb") as file:
80-
response = requests.get(url)
92+
response = requests.get(url, timeout=15)
8193
response.raise_for_status()
8294
file.write(response.content)
8395
except Exception as e:
@@ -129,7 +141,7 @@ def export_json():
129141
def get_json_files():
130142
folder_path = cwd() + '/' + JSON_FOLDER_NAME
131143
if (is_file_present(JSON_FOLDER_NAME)):
132-
files = glob.glob(folder_path + '/*.json', include_hidden=True)
144+
files = glob.glob(folder_path + '/*.json')
133145
return files
134146
else:
135147
return []
@@ -141,27 +153,37 @@ def merge_json():
141153
with open(file) as f:
142154
data = json.load(f)
143155
merged_data.extend(data)
144-
145-
with open('final.json', "w") as f:
146-
json.dump(merged_data, f, indent=4)
147-
148156
return merged_data
149157

158+
def cleanup_artifacts():
159+
"""Remove intermediate artifacts: extracted JSON folder, zip, and PDF."""
160+
try:
161+
delete_file(JSON_FOLDER_NAME)
162+
except Exception:
163+
pass
164+
try:
165+
delete_file(JSON_FOLDER_NAME + '.zip')
166+
except Exception:
167+
pass
168+
try:
169+
delete_file(get_latest_calendar_name())
170+
except Exception:
171+
pass
150172

151-
def clean_temp_files():
152-
base = tempfile.gettempdir()
153-
for filename in os.listdir(base):
154-
if not filename.startswith('tmp') or len(filename) != 11:
155-
continue
156-
fullpath = os.path.join(base, filename)
173+
def get_academic_calendar(is_web = False) -> list[DataEntry]:
174+
# Try cache first
175+
cache_path = get_cache_path()
176+
if os.path.isfile(cache_path):
157177
try:
158-
shutil.rmtree(fullpath)
159-
except Exception as E:
160-
print(E)
161-
continue
178+
with open(cache_path, 'r', encoding='utf-8') as f:
179+
cached = json.load(f)
180+
entries = [DataEntry(start_date=datetime.fromisoformat(x['start_date']),
181+
end_date=datetime.fromisoformat(x['end_date']),
182+
event=x['event']) for x in cached]
183+
return entries
184+
except Exception:
185+
pass
162186

163-
164-
def get_academic_calendar(is_web = False) -> list[DataEntry]:
165187
get_latest_calendar(is_web)
166188
export_json()
167189

@@ -223,13 +245,20 @@ def get_academic_calendar(is_web = False) -> list[DataEntry]:
223245
if (len(annual_convocation) == 2 and ("annual" in annual_convocation or "convocation" in annual_convocation)):
224246
break
225247

226-
## This has to be done to remove temporary files created by camelot. These files are not automatically
227-
## deleted until program exits
228-
## This is not ideal, and might be dangerous (and invisible) if other programs are creating similar directories often
229-
## Nothing else can be done without modifying `camelot`.
248+
# Cache for subsequent runs
230249
try:
231-
clean_temp_files()
232-
except Exception as E:
233-
print(E)
250+
ensure_cache_dir()
251+
with open(cache_path, 'w', encoding='utf-8') as f:
252+
json.dump([
253+
{
254+
'start_date': e.start_date.isoformat(),
255+
'end_date': e.end_date.isoformat(),
256+
'event': e.event,
257+
} for e in main_dates
258+
], f, indent=2)
259+
except Exception:
260+
pass
261+
# After caching, remove intermediate artifacts
262+
cleanup_artifacts()
234263

235264
return main_dates

utils/dates.py

Lines changed: 5 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
from __future__ import print_function
2-
import requests
32
from datetime import datetime, timedelta, date
4-
from bs4 import BeautifulSoup as bs
5-
from utils import build_event
63
import sys
4+
from utils import build_event
5+
from utils.holidays_handler import get_holidays
76

87

98
SEM_BEGIN = build_event.generate_india_time(2025, 7, 16, 0, 0)
@@ -14,36 +13,6 @@
1413
AUT_BREAK_END = build_event.generate_india_time(2025, 10, 5, 0, 0)
1514

1615

17-
### getting holidays
18-
def get_holidays() -> tuple[list[tuple[str, datetime]], dict[str, list[datetime]]]:
19-
"""
20-
scrapes holiday list from IITKGP website
21-
returns: list of holidays as occasions and datetime objects
22-
"""
23-
url = "https://www.iitkgp.ac.in/holidays"
24-
result = requests.get(url).text
25-
doc = bs(result, "html.parser")
26-
tbody = doc.tbody
27-
trs = tbody.contents
28-
holidays = []
29-
for i in range(3, len(trs) - 7, 2):
30-
cnt = 0
31-
for tr in trs[i]:
32-
cnt = cnt + 1
33-
if cnt == 2:
34-
occasion = tr.string
35-
if cnt == 4:
36-
datetime_str = tr.string
37-
d = (int)(datetime_str[:2])
38-
m = (int)(datetime_str[3:5])
39-
y = (int)(datetime_str[6:])
40-
hol_date = build_event.generate_india_time(y, m, d, 0, 0)
41-
holidays.append([occasion, hol_date])
42-
43-
holidays.sort(key=lambda x: x[1])
44-
return holidays
45-
46-
4716
def daterange(start_dt: datetime, end_dt: datetime):
4817
"""
4918
Yield all dates d such that start_dt.date() <= d.date() <= end_dt.date().
@@ -64,6 +33,9 @@ def get_class_off_dates_in_semester() -> list[datetime]:
6433
- Entire autumn break range [AUT_BREAK_BEGIN, AUT_BREAK_END]
6534
Returns tz-aware datetimes at 00:00 Asia/Kolkata for each day off.
6635
"""
36+
# Fetch holidays
37+
holidays = get_holidays()
38+
6739
# Build from the scraped holidays and fixed ranges
6840
off = set()
6941
# Holidays that fall within the semester window (exclude artificial boundary markers)
@@ -108,8 +80,6 @@ def get_class_off_dates_in_semester() -> list[datetime]:
10880
print("Note: SEM_BEGIN < MID_TERM_BEGIN < MID_TERM_END < END_TERM_BEGIN")
10981
sys.exit(1)
11082

111-
holidays = get_holidays()
112-
11383
def next_weekday(current_day: datetime, weekday: str) -> datetime:
11484
days = {
11585
"Monday": 0,

utils/holidays_handler.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
from __future__ import annotations
2+
3+
import os
4+
import json
5+
from datetime import datetime
6+
from typing import List, Tuple
7+
8+
import requests
9+
from bs4 import BeautifulSoup as bs
10+
11+
from utils.build_event import generate_india_time
12+
13+
14+
CACHE_DIR = 'cache'
15+
16+
17+
def ensure_cache_dir():
18+
if not os.path.isdir(CACHE_DIR):
19+
os.makedirs(CACHE_DIR, exist_ok=True)
20+
21+
22+
def holidays_cache_path() -> str:
23+
ensure_cache_dir()
24+
year = datetime.today().year
25+
return os.path.join(CACHE_DIR, f"holidays_{year}.json")
26+
27+
28+
def get_holidays(refresh: bool = False) -> List[Tuple[str, datetime]]:
29+
"""
30+
Fetch IITKGP institute holidays with caching.
31+
32+
Returns a list of tuples: (occasion, datetime_at_midnight_Asia_Kolkata)
33+
"""
34+
cache_path = holidays_cache_path()
35+
36+
if not refresh and os.path.isfile(cache_path):
37+
try:
38+
with open(cache_path, 'r', encoding='utf-8') as f:
39+
raw = json.load(f)
40+
return [(r[0], generate_india_time(*r[1])) for r in raw]
41+
except Exception:
42+
pass
43+
44+
url = "https://www.iitkgp.ac.in/holidays"
45+
result = requests.get(url, timeout=15).text
46+
doc = bs(result, "html.parser")
47+
tbody = doc.tbody
48+
trs = tbody.contents
49+
holidays: List[Tuple[str, datetime]] = []
50+
51+
for i in range(3, len(trs) - 7, 2):
52+
cnt = 0
53+
occasion = None
54+
for tr in trs[i]:
55+
cnt += 1
56+
if cnt == 2:
57+
occasion = tr.string
58+
if cnt == 4:
59+
datetime_str = tr.string
60+
try:
61+
dt = datetime.strptime(datetime_str, "%d.%m.%Y")
62+
hol_date = generate_india_time(dt.year, dt.month, dt.day, 0, 0)
63+
holidays.append((occasion, hol_date))
64+
except Exception:
65+
continue
66+
67+
holidays.sort(key=lambda x: x[1])
68+
69+
# Cache as [[occasion, [year, month, day, hour, minute]], ...]
70+
try:
71+
with open(cache_path, 'w', encoding='utf-8') as f:
72+
json.dump([[h[0], [h[1].year, h[1].month, h[1].day, 0, 0]] for h in holidays], f, indent=2)
73+
except Exception:
74+
pass
75+
76+
return holidays

0 commit comments

Comments
 (0)