@@ -1108,148 +1108,104 @@ def check_broken_links(self, user_id, course_key_string, language):
1108
1108
"""
1109
1109
Checks for broken links in a course. Store the results in a file.
1110
1110
"""
1111
- courselike_key = CourseKey .from_string (course_key_string )
1112
-
1113
- try :
1114
- user = User .objects .get (pk = user_id )
1115
- except User .DoesNotExist :
1116
- with translation_language (language ):
1117
- self .status .fail (UserErrors .UNKNOWN_USER_ID .format (user_id ))
1118
- return
1119
- if not has_course_author_access (user , courselike_key ):
1120
- with translation_language (language ):
1121
- self .status .fail (UserErrors .PERMISSION_DENIED )
1122
- return
1123
-
1124
- courselike_block = modulestore ().get_course (courselike_key )
1125
-
1126
- try :
1127
- self .status .set_state ('Scanning' )
1128
- links_file = _create_broken_link_json (courselike_block , courselike_key , {}, self .status )
1129
- artifact = UserTaskArtifact (status = self .status , name = 'BrokenLinks' )
1130
- artifact .file .save (name = os .path .basename (links_file .name ), content = File (links_file ))
1131
- artifact .save ()
1132
- # catch all exceptions so we can record useful error messages
1133
- except Exception as exception : # pylint: disable=broad-except
1134
- LOGGER .exception ('Error checking links for course %s' , courselike_key , exc_info = True )
1135
- if self .status .state != UserTaskStatus .FAILED :
1136
- self .status .fail ({'raw_error_msg' : str (exception )})
1137
- return
1138
-
1139
-
1140
- def _create_broken_link_json (course_block , course_key , context , status = None ):
1141
- """
1142
- Generates a json file for broken links, or returns None if there was an error.
1143
- Updates the context with any error information if applicable.
1144
- Note that because the celery queue does not have credentials, some broken links will
1145
- need to be checked client side.
1111
+ def validate_user ():
1112
+ """Validate if the user exists. Otherwise log error. """
1113
+ try :
1114
+ return User .objects .get (pk = user_id )
1115
+ except User .DoesNotExist as exc :
1116
+ with translation_language (language ):
1117
+ self .status .fail (UserErrors .UNKNOWN_USER_ID .format (user_id ))
1118
+ return
1146
1119
1147
- File format:
1148
- [
1149
- [<blockId>, <broken link>],
1150
- ...,
1151
- ]
1152
- """
1153
- name = course_block .url_name
1154
- links_file = NamedTemporaryFile (prefix = name + '.' , suffix = '.json' )
1120
+ def get_urls (content ):
1121
+ """Returns all urls after href and src in content."""
1122
+ regex = r'\s+(?:href|src)=["\']([^"\']*)["\']'
1123
+ urls = re .findall (regex , content )
1124
+ return urls
1155
1125
1156
- try :
1157
- if status :
1158
- status .set_state ('Verifying' )
1159
- status .increment_completed_steps ()
1160
- LOGGER .debug ('json file being generated at %s' , links_file .name )
1126
+ def convert_to_standard_url (url , course_key ):
1127
+ """
1128
+ Returns standard urls when given studio urls.
1129
+ Example urls:
1130
+ /assets/courseware/v1/506da5d6f866e8f0be44c5df8b6e6b2a/asset-v1:edX+DemoX+Demo_Course+type@asset+block/getting-started_x250.png
1131
+ /static/getting-started_x250.png
1132
+ /container/block-v1:edX+DemoX+Demo_Course+type@vertical+block@2152d4a4aadc4cb0af5256394a3d1fc7
1133
+ """
1134
+ if not url .startswith ('http://' ) and not url .startswith ('https://' ):
1135
+ if url .startswith ('/static/' ):
1136
+ processed_url = replace_static_urls (f'\" { url } \" ' , course_id = course_key )[1 :- 1 ]
1137
+ return 'http://' + settings .CMS_BASE + processed_url
1138
+ elif url .startswith ('/' ):
1139
+ return 'http://' + settings .CMS_BASE + url
1140
+ else :
1141
+ return 'http://' + settings .CMS_BASE + '/container/' + url
1142
+
1143
+ def verify_url (url ):
1144
+ """Returns true if url request returns 200"""
1145
+ try :
1146
+ response = requests .get (url , timeout = 5 )
1147
+ return response .status_code == 200
1148
+ except requests .exceptions .RequestException as e :
1149
+ return False
1161
1150
1162
- verticals = modulestore ().get_items (
1163
- course_key ,
1164
- qualifiers = {'category' : 'vertical' },
1165
- )
1151
+ def scan_course (course_key ):
1152
+ """
1153
+ Scans the course and returns broken link tuples.
1154
+ [<block_id>, <broken_link>]
1155
+ """
1156
+ broken_links = []
1157
+ verticals = modulestore ().get_items (course_key , qualifiers = {'category' : 'vertical' })
1166
1158
blocks = []
1159
+
1167
1160
for vertical in verticals :
1168
1161
blocks .extend (vertical .get_children ())
1169
- data = []
1162
+
1170
1163
for block in blocks :
1171
1164
usage_key = block .usage_key
1172
1165
block_info = get_block_info (block )
1173
1166
block_data = block_info ['data' ]
1174
- urls = _get_urls (block_data )
1167
+ urls = get_urls (block_data )
1175
1168
1176
1169
for url in urls :
1177
1170
if url == '#' :
1178
1171
break
1179
- processed_url = _process_url (url , course_key )
1180
- if not _verify_url (processed_url ):
1181
- data .append ([str (usage_key ), url ])
1182
-
1183
- with open (links_file .name , 'w' ) as file :
1184
- json .dump (data , file , indent = 4 )
1185
-
1186
- except SerializationError as exc :
1187
- LOGGER .exception ('There was an error link checking %s' , course_key , exc_info = True )
1188
- parent = None
1189
- try :
1190
- failed_item = modulestore ().get_item (exc .location )
1191
- parent_loc = modulestore ().get_parent_location (failed_item .location )
1192
-
1193
- if parent_loc is not None :
1194
- parent = modulestore ().get_item (parent_loc )
1195
- except : # pylint: disable=bare-except
1196
- # if we have a nested exception, then we'll show the more generic error message
1197
- pass
1198
-
1199
- context .update ({
1200
- 'in_err' : True ,
1201
- 'raw_err_msg' : str (exc ),
1202
- 'edit_unit_url' : reverse_usage_url ("container_handler" , parent .location ) if parent else "" ,
1203
- })
1204
- if status :
1205
- status .fail (json .dumps ({'raw_error_msg' : context ['raw_err_msg' ],
1206
- 'edit_unit_url' : context ['edit_unit_url' ]}))
1207
- raise
1208
- except Exception as exc :
1209
- LOGGER .exception ('There was an error link checking %s' , course_key , exc_info = True )
1210
- context .update ({
1211
- 'in_err' : True ,
1212
- 'edit_unit_url' : None ,
1213
- 'raw_err_msg' : str (exc )})
1214
- if status :
1215
- status .fail (json .dumps ({'raw_error_msg' : context ['raw_err_msg' ]}))
1216
- raise
1217
-
1218
- return links_file
1172
+ standardized_url = convert_to_standard_url (url , course_key )
1173
+ if not verify_url (standardized_url ):
1174
+ broken_links .append ([str (usage_key ), url ])
1219
1175
1176
+ return broken_links
1177
+
1178
+ user = validate_user ()
1179
+ courselike_key = CourseKey .from_string (course_key_string )
1220
1180
1221
- def _get_urls (content ):
1222
- """
1223
- Return all urls after hrefs and src in content
1224
- """
1225
- urls = re .findall (r'\s+(?:href|src)=["\']([^"\']*)["\']' , content )
1226
- return urls
1181
+ try :
1182
+ self .status .set_state ('Preparing' )
1183
+ file_name = str (courselike_key )
1184
+ links_file = NamedTemporaryFile (prefix = file_name + '.' , suffix = '.json' )
1227
1185
1186
+ try :
1187
+ if self .status :
1188
+ self .status .set_state ('Scanning' )
1189
+ self .status .increment_completed_steps ()
1190
+ LOGGER .debug ('json file being generated at %s' , links_file .name )
1228
1191
1229
- def _process_url (url , course_key ):
1230
- """
1231
- Returns processed url
1232
- Some example urls that refer to places in studio:
1233
- - /assets/courseware/v1/506da5d6f866e8f0be44c5df8b6e6b2a/asset-v1:edX+DemoX+Demo_Course+type@asset+block/getting-started_x250.png
1234
- - /static/getting-started_x250.png
1235
- - /container/block-v1:edX+DemoX+Demo_Course+type@vertical+block@2152d4a4aadc4cb0af5256394a3d1fc7
1236
- """
1237
- if not url .startswith ('http://' ) and not url .startswith ('https://' ):
1238
- if url .startswith ('/static/' ):
1239
- processed_url = replace_static_urls (f'\" { url } \" ' , course_id = course_key )[1 :- 1 ]
1240
- return 'http://' + settings .CMS_BASE + processed_url
1241
- elif url .startswith ('/' ):
1242
- return 'http://' + settings .CMS_BASE + url
1243
- else :
1244
- return 'http://' + settings .CMS_BASE + '/container/' + url
1192
+ data = scan_course (courselike_key )
1245
1193
1194
+ with open (links_file .name , 'w' ) as file :
1195
+ json .dump (data , file , indent = 4 )
1196
+ except Exception as exc :
1197
+ LOGGER .exception ('There was an error link checking %s' , courselike_key , exc_info = True )
1198
+ if self .status :
1199
+ self .status .fail (json .dumps ({'raw_error_msg' : str (exc )}))
1200
+ raise
1246
1201
1247
- def _verify_url (url ):
1248
- """
1249
- Returns true if url request returns 200
1250
- """
1251
- try :
1252
- response = requests .get (url , timeout = 5 )
1253
- return response .status_code == 200
1254
- except requests .exceptions .RequestException as e :
1255
- return False
1202
+ artifact = UserTaskArtifact (status = self .status , name = 'BrokenLinks' )
1203
+ artifact .file .save (name = os .path .basename (links_file .name ), content = File (links_file ))
1204
+ artifact .save ()
1205
+
1206
+ # catch all exceptions so we can record useful error messages
1207
+ except Exception as exception : # pylint: disable=broad-except
1208
+ LOGGER .exception ('Error checking links for course %s' , courselike_key , exc_info = True )
1209
+ if self .status .state != UserTaskStatus .FAILED :
1210
+ self .status .fail ({'raw_error_msg' : str (exception )})
1211
+ return
0 commit comments