Skip to content

Commit a6e8af0

Browse files
authored
Merge pull request #176 from Devansh-bit/master
fix: Add temporary file cleanup to solve resource leakage
2 parents faae60a + 9126ba5 commit a6e8af0

File tree

2 files changed

+80
-42
lines changed

2 files changed

+80
-42
lines changed

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,13 @@ Please read [CONTRIBUTING.md](https://github.com/metakgp/gyft/blob/master/CONTRI
144144

145145
## Maintainer(s)
146146

147+
- [Devansh Gupta](https://github.com/Devansh-bit)
148+
149+
## Past Maintainer(s)
150+
147151
- [Ashwin Prasanth](https://github.com/ashwinpra)
152+
- [Siddharth Kannan](https://github.com/icyflame)
153+
- [Nishant Nikhil](https://github.com/nishnik)
148154

149155
<!-- MARKDOWN LINKS & IMAGES -->
150156

utils/academic_calander_handler.py

Lines changed: 74 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import tempfile
12
from datetime import datetime, timedelta
23
import glob
34
import camelot
@@ -9,44 +10,48 @@
910
from dataclasses import dataclass
1011
import re
1112

12-
1313
JSON_FOLDER_NAME = 'Academic_Cal-j'
1414

15+
1516
@dataclass
1617
class DataEntry:
1718
start_date: datetime = datetime.today()
1819
end_date: datetime = datetime.today()
1920
event: str = ""
2021

21-
#get the current working directory
22+
23+
# get the current working directory
2224
def cwd():
2325
return os.getcwd()
2426

27+
2528
def get_latest_calendar_name():
2629
curr_year = datetime.today().year
2730
curr_month = datetime.today().month
2831

29-
if(curr_month < 7):
32+
if (curr_month < 7):
3033
curr_year -= 1
31-
34+
3235
year_str = str(curr_year) + '-' + str((curr_year % 100) + 1)
3336
filename = 'AcademicCalendar' + year_str + '.pdf'
3437
return filename
3538

39+
3640
def is_file_present(file):
37-
if(os.path.exists(cwd() + '/' + file) or
38-
os.path.exists(cwd() + '/' + file + '/')
39-
):
41+
if (os.path.exists(cwd() + '/' + file) or
42+
os.path.exists(cwd() + '/' + file + '/')
43+
):
4044
return True
4145
return False
4246

47+
4348
def delete_file(file):
44-
if(is_file_present(file)):
49+
if (is_file_present(file)):
4550
try:
46-
print("DELETING file ",file)
47-
if(os.path.isdir(file)):
51+
print("DELETING file ", file)
52+
if (os.path.isdir(file)):
4853
shutil.rmtree(cwd() + '/' + file)
49-
elif(os.path.isfile(file)):
54+
elif (os.path.isfile(file)):
5055
os.remove(file)
5156
else:
5257
raise Exception("filename not valid")
@@ -57,25 +62,26 @@ def delete_file(file):
5762
else:
5863
print(file, "File not present..")
5964

60-
#fetch the latest academic calendar from the iitkgp website
65+
66+
# fetch the latest academic calendar from the iitkgp website
6167
def get_latest_calendar():
62-
6368
filename = get_latest_calendar_name()
6469
url = 'https://www.iitkgp.ac.in/assets/pdf/' + filename
6570

6671
## delete any old academic calander pdf if exists
67-
if(is_file_present(filename)):
72+
if (is_file_present(filename)):
6873
delete_file(filename)
69-
70-
with open(filename,"wb") as file:
74+
75+
with open(filename, "wb") as file:
7176
response = requests.get(url)
7277
file.write(response.content)
7378

74-
if(is_file_present(filename)):
79+
if (is_file_present(filename)):
7580
return True
7681
return False
77-
78-
def upzip_and_delete_zip(zip_file_name,result_folder_name):
82+
83+
84+
def upzip_and_delete_zip(zip_file_name, result_folder_name):
7985
with ZipFile(zip_file_name) as zip:
8086
try:
8187
zip.extractall(result_folder_name)
@@ -87,49 +93,67 @@ def upzip_and_delete_zip(zip_file_name,result_folder_name):
8793
delete_file(zip_file_name)
8894
return True
8995

96+
9097
def export_json():
9198
filename = get_latest_calendar_name()
9299
## [NOTE]
93100
## ignore the read_pdf not found warning
94101
## also the devs of camelot have mismached backend names so ghostscript points to pdfium and vice versa ...
95102
## so basically this is using pdfium but backend name needs to be ghostscript
96103
## in future if this gets fixed this need to be changed back
97-
tables = camelot.read_pdf(filename,pages="all",backend="ghostscript")
104+
105+
## This creates temporary files using `tempfile.mkdtemp()` in /tmp and does not clean them up until the program exits.
106+
tables = camelot.read_pdf(filename, pages="all", backend="ghostscript")
98107

99108
print("Checking for pre-existing folder")
100109
delete_file(JSON_FOLDER_NAME)
101110

102111
try:
103-
tables.export((JSON_FOLDER_NAME + '.json'),f='json',compress=True)
112+
tables.export((JSON_FOLDER_NAME + '.json'), f='json', compress=True)
104113
except Exception as E:
105114
print(E)
106115
return False
107116

108-
upzip_and_delete_zip((JSON_FOLDER_NAME + '.zip'),JSON_FOLDER_NAME)
117+
upzip_and_delete_zip((JSON_FOLDER_NAME + '.zip'), JSON_FOLDER_NAME)
109118
return True
110119

120+
111121
def get_json_files():
112122
folder_path = cwd() + '/' + JSON_FOLDER_NAME
113-
if(is_file_present(JSON_FOLDER_NAME)):
114-
files = glob.glob(folder_path + '/*.json',include_hidden=True)
123+
if (is_file_present(JSON_FOLDER_NAME)):
124+
files = glob.glob(folder_path + '/*.json', include_hidden=True)
115125
return files
116126
else:
117127
return []
118128

129+
119130
def merge_json():
120131
merged_data = []
121132
for file in get_json_files():
122133
with open(file) as f:
123134
data = json.load(f)
124135
merged_data.extend(data)
125-
126-
with open('final.json',"w") as f:
127-
json.dump(merged_data,f,indent=4)
136+
137+
with open('final.json', "w") as f:
138+
json.dump(merged_data, f, indent=4)
128139

129140
return merged_data
130141

131-
def get_academic_calendar() -> list[DataEntry]:
132142

143+
def clean_temp_files():
144+
base = tempfile.gettempdir()
145+
for filename in os.listdir(base):
146+
if not filename.startswith('tmp') or len(filename) != 11:
147+
continue
148+
fullpath = os.path.join(base, filename)
149+
try:
150+
shutil.rmtree(fullpath)
151+
except Exception as E:
152+
print(E)
153+
continue
154+
155+
156+
def get_academic_calendar() -> list[DataEntry]:
133157
get_latest_calendar()
134158
export_json()
135159

@@ -168,28 +192,36 @@ def get_academic_calendar() -> list[DataEntry]:
168192
date_regex = re.compile(r'\d{2}.\d{2}.\d{4}')
169193
maxLen = 1
170194
for date in all_dates:
171-
if(len(date) > 4 and date['4'] != ''):
195+
if (len(date) > 4 and date['4'] != ''):
172196
entry = DataEntry()
173-
if(len(date['1']) > 3):
174-
entry.event += date['1'].replace('\n','')
175-
entry.event += date['2'].replace('\n','')
197+
if (len(date['1']) > 3):
198+
entry.event += date['1'].replace('\n', '')
199+
entry.event += date['2'].replace('\n', '')
176200

177-
d =date['3'].replace('\n',' ').replace('(AN)','') + date['4'].replace('\n',' ').replace('(AN)','')
201+
d = date['3'].replace('\n', ' ').replace('(AN)', '') + date['4'].replace('\n', ' ').replace('(AN)', '')
178202
d = date_regex.findall(d)
179-
if(maxLen < len(d)):
203+
if (maxLen < len(d)):
180204
maxLen = len(d)
181-
if(len(d) == 1):
182-
entry.start_date = datetime.strptime(d[0],"%d.%m.%Y")
183-
entry.end_date = ( entry.start_date + timedelta(1) )
184-
elif(len(d) == 2):
185-
entry.start_date = datetime.strptime(d[0],"%d.%m.%Y")
186-
entry.end_date = datetime.strptime(d[1],"%d.%m.%Y")
205+
if (len(d) == 1):
206+
entry.start_date = datetime.strptime(d[0], "%d.%m.%Y")
207+
entry.end_date = (entry.start_date + timedelta(1))
208+
elif (len(d) == 2):
209+
entry.start_date = datetime.strptime(d[0], "%d.%m.%Y")
210+
entry.end_date = datetime.strptime(d[1], "%d.%m.%Y")
187211
main_dates.append(entry)
188212
annual_convocation = str(date['1']).strip().lower().split(" ")
189213
## KGP hai .. cannot trust, they can even mess up the spellings of annual convocation
190214
## this can just reduce the amount of places this will fail
191-
if(len(annual_convocation) == 2 and ("annual" in annual_convocation or "convocation" in annual_convocation)):
215+
if (len(annual_convocation) == 2 and ("annual" in annual_convocation or "convocation" in annual_convocation)):
192216
break
193217

194-
return main_dates
218+
## This has to be done to remove temporary files created by camelot. These files are not automatically
219+
## deleted until program exits
220+
## This is not ideal, and might be dangerous (and invisible) if other programs are creating similar directories often
221+
## Nothing else can be done without modifying `camelot`.
222+
try:
223+
clean_temp_files()
224+
except Exception as E:
225+
print(E)
195226

227+
return main_dates

0 commit comments

Comments
 (0)