@@ -68,11 +68,9 @@ async def parse_sections(session,
68
68
index = None ,
69
69
keep_section_order = False ,
70
70
keep_file_order = False ):
71
- if "aria-labelledby" in section .attrs :
72
- section_title_id = str (section ["aria-labelledby" ])
73
- section_name = str (section .find ("h3" , id = section_title_id ).string ).strip ()
74
- else :
75
- section_name = str (section ["aria-label" ]).strip ()
71
+
72
+ title = section .find ("h3" , id = re .compile ("sectionid-([0-9]+)-title" ), recursive = True )
73
+ section_name = str (title .text ).strip ()
76
74
77
75
if keep_section_order :
78
76
section_name = f"[{ index + 1 :02} ] { section_name } "
@@ -175,28 +173,23 @@ async def parse_module(session,
175
173
process_external_links ,
176
174
keep_file_order ,
177
175
password_mapper ):
178
- mtype = module ["class" ][1 ]
176
+ mtype = module ["class" ][2 ]
179
177
module_id = int (re .search ("module-([0-9]+)" , module ["id" ])[1 ])
180
178
if mtype == MTYPE_FILE :
181
- instance = module .find ("div" , class_ = "activityinstance " )
179
+ link = module .find ("a " )
182
180
try :
183
- file_name = str (instance . a .span .contents [0 ])
181
+ file_name = str (link .span .contents [0 ])
184
182
except AttributeError :
185
183
return
186
184
last_updated = last_updated_dict [module_id ]
187
185
188
- with_extension = False
189
- if "pdf-24" in instance .a .img ["src" ]:
190
- file_name += ".pdf"
191
- with_extension = True
192
-
193
186
if keep_file_order :
194
187
file_name = f"[{ module_idx + 1 :02} ] { file_name } "
195
188
196
- url = instance . a ["href" ] + "&redirect=1"
189
+ url = link ["href" ] + "&redirect=1"
197
190
await queue .put ({"path" : safe_path_join (base_path , file_name ),
198
191
"url" : url ,
199
- "with_extension" : with_extension ,
192
+ "with_extension" : False ,
200
193
"checksum" : last_updated })
201
194
202
195
elif mtype == MTYPE_DIRECTORY :
@@ -207,9 +200,9 @@ async def parse_module(session,
207
200
if not process_external_links :
208
201
return
209
202
210
- instance = module .find ("div" , class_ = "activityinstance " )
211
- url = instance . a ["href" ] + "&redirect=1"
212
- name = str (instance . a .span .contents [0 ])
203
+ link = module .find ("a " )
204
+ url = link ["href" ] + "&redirect=1"
205
+ name = str (link .span .contents [0 ])
213
206
214
207
if keep_file_order :
215
208
name = f"[{ module_idx + 1 :02} ] { name } "
@@ -226,13 +219,12 @@ async def parse_module(session,
226
219
password_mapper = password_mapper )
227
220
228
221
elif mtype == MTYPE_ASSIGN :
229
- instance = module .find ("div" , class_ = "activityinstance" )
230
- link = instance .a
222
+ link = module .find ("a" )
231
223
if link is None :
232
224
return
233
- href = instance . a ["href" ]
225
+ href = link ["href" ]
234
226
last_updated = last_updated_dict [module_id ]
235
- name = str (instance . a .span .contents [0 ])
227
+ name = str (link .span .contents [0 ])
236
228
237
229
assign_file_tree_soup_soup = await call_function_or_cache (get_assign_files_tree ,
238
230
last_updated ,
@@ -291,11 +283,11 @@ async def parse_folder(session, queue, download_settings, module, base_path, las
291
283
await parse_folder_tree (queue , folder_tree .ul , base_path , last_updated )
292
284
return
293
285
294
- instance = module .find ("div" , class_ = "activityinstance " )
295
- folder_name = str (instance . a .span .contents [0 ])
286
+ link = module .find ("a " )
287
+ folder_name = str (link .span .contents [0 ])
296
288
folder_path = safe_path_join (base_path , folder_name )
297
289
298
- href = instance . a ["href" ]
290
+ href = link ["href" ]
299
291
300
292
folder_soup = await call_function_or_cache (get_filemanager , last_updated , session , href )
301
293
0 commit comments