1- import tempfile
21from datetime import datetime , timedelta
32import glob
43import camelot
1110import re
1211
1312JSON_FOLDER_NAME = 'Academic_Cal-j'
13+ CACHE_DIR = 'cache'
1414
1515
1616@dataclass
@@ -37,6 +37,18 @@ def get_latest_calendar_name():
3737 return filename
3838
3939
40+ def ensure_cache_dir ():
41+ if not os .path .isdir (CACHE_DIR ):
42+ os .makedirs (CACHE_DIR , exist_ok = True )
43+
44+
45+ def get_cache_path ():
46+ filename = get_latest_calendar_name ()
47+ stem = os .path .splitext (filename )[0 ]
48+ ensure_cache_dir ()
49+ return os .path .join (CACHE_DIR , f"{ stem } .json" )
50+
51+
4052def is_file_present (file ):
4153 if (os .path .exists (cwd () + '/' + file ) or
4254 os .path .exists (cwd () + '/' + file + '/' )
@@ -77,7 +89,7 @@ def get_latest_calendar(is_web=False):
7789
7890 try :
7991 with open (filename , "wb" ) as file :
80- response = requests .get (url )
92+ response = requests .get (url , timeout = 15 )
8193 response .raise_for_status ()
8294 file .write (response .content )
8395 except Exception as e :
@@ -129,7 +141,7 @@ def export_json():
129141def get_json_files ():
130142 folder_path = cwd () + '/' + JSON_FOLDER_NAME
131143 if (is_file_present (JSON_FOLDER_NAME )):
132- files = glob .glob (folder_path + '/*.json' , include_hidden = True )
144+ files = glob .glob (folder_path + '/*.json' )
133145 return files
134146 else :
135147 return []
@@ -141,27 +153,37 @@ def merge_json():
141153 with open (file ) as f :
142154 data = json .load (f )
143155 merged_data .extend (data )
144-
145- with open ('final.json' , "w" ) as f :
146- json .dump (merged_data , f , indent = 4 )
147-
148156 return merged_data
149157
158+ def cleanup_artifacts ():
159+ """Remove intermediate artifacts: extracted JSON folder, zip, and PDF."""
160+ try :
161+ delete_file (JSON_FOLDER_NAME )
162+ except Exception :
163+ pass
164+ try :
165+ delete_file (JSON_FOLDER_NAME + '.zip' )
166+ except Exception :
167+ pass
168+ try :
169+ delete_file (get_latest_calendar_name ())
170+ except Exception :
171+ pass
150172
151- def clean_temp_files ():
152- base = tempfile .gettempdir ()
153- for filename in os .listdir (base ):
154- if not filename .startswith ('tmp' ) or len (filename ) != 11 :
155- continue
156- fullpath = os .path .join (base , filename )
173+ def get_academic_calendar (is_web = False ) -> list [DataEntry ]:
174+ # Try cache first
175+ cache_path = get_cache_path ()
176+ if os .path .isfile (cache_path ):
157177 try :
158- shutil .rmtree (fullpath )
159- except Exception as E :
160- print (E )
161- continue
178+ with open (cache_path , 'r' , encoding = 'utf-8' ) as f :
179+ cached = json .load (f )
180+ entries = [DataEntry (start_date = datetime .fromisoformat (x ['start_date' ]),
181+ end_date = datetime .fromisoformat (x ['end_date' ]),
182+ event = x ['event' ]) for x in cached ]
183+ return entries
184+ except Exception :
185+ pass
162186
163-
164- def get_academic_calendar (is_web = False ) -> list [DataEntry ]:
165187 get_latest_calendar (is_web )
166188 export_json ()
167189
@@ -223,13 +245,20 @@ def get_academic_calendar(is_web = False) -> list[DataEntry]:
223245 if (len (annual_convocation ) == 2 and ("annual" in annual_convocation or "convocation" in annual_convocation )):
224246 break
225247
226- ## This has to be done to remove temporary files created by camelot. These files are not automatically
227- ## deleted until program exits
228- ## This is not ideal, and might be dangerous (and invisible) if other programs are creating similar directories often
229- ## Nothing else can be done without modifying `camelot`.
248+ # Cache for subsequent runs
230249 try :
231- clean_temp_files ()
232- except Exception as E :
233- print (E )
250+ ensure_cache_dir ()
251+ with open (cache_path , 'w' , encoding = 'utf-8' ) as f :
252+ json .dump ([
253+ {
254+ 'start_date' : e .start_date .isoformat (),
255+ 'end_date' : e .end_date .isoformat (),
256+ 'event' : e .event ,
257+ } for e in main_dates
258+ ], f , indent = 2 )
259+ except Exception :
260+ pass
261+ # After caching, remove intermediate artifacts
262+ cleanup_artifacts ()
234263
235264 return main_dates
0 commit comments