Skip to content

Commit 8c5d85d

Browse files
committed
feat: tasks code readability
1 parent 2383edc commit 8c5d85d

File tree

2 files changed

+83
-127
lines changed

2 files changed

+83
-127
lines changed

cms/djangoapps/contentstore/tasks.py

+81-125
Original file line numberDiff line numberDiff line change
@@ -1108,148 +1108,104 @@ def check_broken_links(self, user_id, course_key_string, language):
11081108
"""
11091109
Checks for broken links in a course. Store the results in a file.
11101110
"""
1111-
courselike_key = CourseKey.from_string(course_key_string)
1112-
1113-
try:
1114-
user = User.objects.get(pk=user_id)
1115-
except User.DoesNotExist:
1116-
with translation_language(language):
1117-
self.status.fail(UserErrors.UNKNOWN_USER_ID.format(user_id))
1118-
return
1119-
if not has_course_author_access(user, courselike_key):
1120-
with translation_language(language):
1121-
self.status.fail(UserErrors.PERMISSION_DENIED)
1122-
return
1123-
1124-
courselike_block = modulestore().get_course(courselike_key)
1125-
1126-
try:
1127-
self.status.set_state('Scanning')
1128-
links_file = _create_broken_link_json(courselike_block, courselike_key, {}, self.status)
1129-
artifact = UserTaskArtifact(status=self.status, name='BrokenLinks')
1130-
artifact.file.save(name=os.path.basename(links_file.name), content=File(links_file))
1131-
artifact.save()
1132-
# catch all exceptions so we can record useful error messages
1133-
except Exception as exception: # pylint: disable=broad-except
1134-
LOGGER.exception('Error checking links for course %s', courselike_key, exc_info=True)
1135-
if self.status.state != UserTaskStatus.FAILED:
1136-
self.status.fail({'raw_error_msg': str(exception)})
1137-
return
1138-
1139-
1140-
def _create_broken_link_json(course_block, course_key, context, status=None):
1141-
"""
1142-
Generates a json file for broken links, or returns None if there was an error.
1143-
Updates the context with any error information if applicable.
1144-
Note that because the celery queue does not have credentials, some broken links will
1145-
need to be checked client side.
1111+
def validate_user():
1112+
"""Validate if the user exists. Otherwise log error. """
1113+
try:
1114+
return User.objects.get(pk=user_id)
1115+
except User.DoesNotExist as exc:
1116+
with translation_language(language):
1117+
self.status.fail(UserErrors.UNKNOWN_USER_ID.format(user_id))
1118+
return
11461119

1147-
File format:
1148-
[
1149-
[<blockId>, <broken link>],
1150-
...,
1151-
]
1152-
"""
1153-
name = course_block.url_name
1154-
links_file = NamedTemporaryFile(prefix=name + '.', suffix='.json')
1120+
def get_urls(content):
1121+
"""Returns all urls after href and src in content."""
1122+
regex = r'\s+(?:href|src)=["\']([^"\']*)["\']'
1123+
urls = re.findall(regex, content)
1124+
return urls
11551125

1156-
try:
1157-
if status:
1158-
status.set_state('Verifying')
1159-
status.increment_completed_steps()
1160-
LOGGER.debug('json file being generated at %s', links_file.name)
1126+
def convert_to_standard_url(url, course_key):
1127+
"""
1128+
Returns standard urls when given studio urls.
1129+
Example urls:
1130+
/assets/courseware/v1/506da5d6f866e8f0be44c5df8b6e6b2a/asset-v1:edX+DemoX+Demo_Course+type@asset+block/getting-started_x250.png
1131+
/static/getting-started_x250.png
1132+
/container/block-v1:edX+DemoX+Demo_Course+type@vertical+block@2152d4a4aadc4cb0af5256394a3d1fc7
1133+
"""
1134+
if not url.startswith('http://') and not url.startswith('https://'):
1135+
if url.startswith('/static/'):
1136+
processed_url = replace_static_urls(f'\"{url}\"', course_id=course_key)[1:-1]
1137+
return 'http://' + settings.CMS_BASE + processed_url
1138+
elif url.startswith('/'):
1139+
return 'http://' + settings.CMS_BASE + url
1140+
else:
1141+
return 'http://' + settings.CMS_BASE + '/container/' + url
1142+
1143+
def verify_url(url):
1144+
"""Returns true if url request returns 200"""
1145+
try:
1146+
response = requests.get(url, timeout=5)
1147+
return response.status_code == 200
1148+
except requests.exceptions.RequestException as e:
1149+
return False
11611150

1162-
verticals = modulestore().get_items(
1163-
course_key,
1164-
qualifiers={'category': 'vertical'},
1165-
)
1151+
def scan_course(course_key):
1152+
"""
1153+
Scans the course and returns broken link tuples.
1154+
[<block_id>, <broken_link>]
1155+
"""
1156+
broken_links = []
1157+
verticals = modulestore().get_items(course_key, qualifiers={'category': 'vertical'})
11661158
blocks = []
1159+
11671160
for vertical in verticals:
11681161
blocks.extend(vertical.get_children())
1169-
data = []
1162+
11701163
for block in blocks:
11711164
usage_key = block.usage_key
11721165
block_info = get_block_info(block)
11731166
block_data = block_info['data']
1174-
urls = _get_urls(block_data)
1167+
urls = get_urls(block_data)
11751168

11761169
for url in urls:
11771170
if url == '#':
11781171
break
1179-
processed_url = _process_url(url, course_key)
1180-
if not _verify_url(processed_url):
1181-
data.append([str(usage_key), url])
1182-
1183-
with open(links_file.name, 'w') as file:
1184-
json.dump(data, file, indent=4)
1185-
1186-
except SerializationError as exc:
1187-
LOGGER.exception('There was an error link checking %s', course_key, exc_info=True)
1188-
parent = None
1189-
try:
1190-
failed_item = modulestore().get_item(exc.location)
1191-
parent_loc = modulestore().get_parent_location(failed_item.location)
1192-
1193-
if parent_loc is not None:
1194-
parent = modulestore().get_item(parent_loc)
1195-
except: # pylint: disable=bare-except
1196-
# if we have a nested exception, then we'll show the more generic error message
1197-
pass
1198-
1199-
context.update({
1200-
'in_err': True,
1201-
'raw_err_msg': str(exc),
1202-
'edit_unit_url': reverse_usage_url("container_handler", parent.location) if parent else "",
1203-
})
1204-
if status:
1205-
status.fail(json.dumps({'raw_error_msg': context['raw_err_msg'],
1206-
'edit_unit_url': context['edit_unit_url']}))
1207-
raise
1208-
except Exception as exc:
1209-
LOGGER.exception('There was an error link checking %s', course_key, exc_info=True)
1210-
context.update({
1211-
'in_err': True,
1212-
'edit_unit_url': None,
1213-
'raw_err_msg': str(exc)})
1214-
if status:
1215-
status.fail(json.dumps({'raw_error_msg': context['raw_err_msg']}))
1216-
raise
1217-
1218-
return links_file
1172+
standardized_url = convert_to_standard_url(url, course_key)
1173+
if not verify_url(standardized_url):
1174+
broken_links.append([str(usage_key), url])
12191175

1176+
return broken_links
1177+
1178+
user = validate_user()
1179+
courselike_key = CourseKey.from_string(course_key_string)
12201180

1221-
def _get_urls(content):
1222-
"""
1223-
Return all urls after hrefs and src in content
1224-
"""
1225-
urls = re.findall(r'\s+(?:href|src)=["\']([^"\']*)["\']', content)
1226-
return urls
1181+
try:
1182+
self.status.set_state('Preparing')
1183+
file_name = str(courselike_key)
1184+
links_file = NamedTemporaryFile(prefix=file_name + '.', suffix='.json')
12271185

1186+
try:
1187+
if self.status:
1188+
self.status.set_state('Scanning')
1189+
self.status.increment_completed_steps()
1190+
LOGGER.debug('json file being generated at %s', links_file.name)
12281191

1229-
def _process_url(url, course_key):
1230-
"""
1231-
Returns processed url
1232-
Some example urls that refer to places in studio:
1233-
- /assets/courseware/v1/506da5d6f866e8f0be44c5df8b6e6b2a/asset-v1:edX+DemoX+Demo_Course+type@asset+block/getting-started_x250.png
1234-
- /static/getting-started_x250.png
1235-
- /container/block-v1:edX+DemoX+Demo_Course+type@vertical+block@2152d4a4aadc4cb0af5256394a3d1fc7
1236-
"""
1237-
if not url.startswith('http://') and not url.startswith('https://'):
1238-
if url.startswith('/static/'):
1239-
processed_url = replace_static_urls(f'\"{url}\"', course_id=course_key)[1:-1]
1240-
return 'http://' + settings.CMS_BASE + processed_url
1241-
elif url.startswith('/'):
1242-
return 'http://' + settings.CMS_BASE + url
1243-
else:
1244-
return 'http://' + settings.CMS_BASE + '/container/' + url
1192+
data = scan_course(courselike_key)
12451193

1194+
with open(links_file.name, 'w') as file:
1195+
json.dump(data, file, indent=4)
1196+
except Exception as exc:
1197+
LOGGER.exception('There was an error link checking %s', courselike_key, exc_info=True)
1198+
if self.status:
1199+
self.status.fail(json.dumps({'raw_error_msg': str(exc)}))
1200+
raise
12461201

1247-
def _verify_url(url):
1248-
"""
1249-
Returns true if url request returns 200
1250-
"""
1251-
try:
1252-
response = requests.get(url, timeout=5)
1253-
return response.status_code == 200
1254-
except requests.exceptions.RequestException as e:
1255-
return False
1202+
artifact = UserTaskArtifact(status=self.status, name='BrokenLinks')
1203+
artifact.file.save(name=os.path.basename(links_file.name), content=File(links_file))
1204+
artifact.save()
1205+
1206+
# catch all exceptions so we can record useful error messages
1207+
except Exception as exception: # pylint: disable=broad-except
1208+
LOGGER.exception('Error checking links for course %s', courselike_key, exc_info=True)
1209+
if self.status.state != UserTaskStatus.FAILED:
1210+
self.status.fail({'raw_error_msg': str(exception)})
1211+
return

cms/djangoapps/contentstore/views/course_optimizer.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,8 @@ def link_check_status_handler(request, course_key_string):
9393
9494
-X : Link check unsuccessful due to some error with X as stage [0-3]
9595
0 : No status info found (task not yet created)
96-
1 : Scanning
97-
2 : Verifying
96+
1 : Preparing
97+
2 : Scanning
9898
3 : Success
9999
100100
If the link check was successful, an output result is also returned.

0 commit comments

Comments
 (0)