-
Notifications
You must be signed in to change notification settings - Fork 11
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix some issues to work on my ceiba #16
base: master
Are you sure you want to change the base?
Changes from 3 commits
6f9f504
4728d90
5383f8c
58f5bf5
915b6b4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,6 +10,8 @@ | |
import pycurl | ||
import urllib.parse | ||
|
||
from pathvalidate import sanitize_filepath | ||
|
||
class Error(Exception): | ||
def __str__(self): | ||
return self.message | ||
|
@@ -237,6 +239,7 @@ def download_file(self, path, retry, dcb, ecb): | |
|
||
def download_link(self, path, node, retry, dcb, ecb): | ||
disk_path_object = pathlib.Path(path.lstrip('/')) | ||
disk_path_object = pathlib.Path(sanitize_filepath(str(disk_path_object))) | ||
disk_path = str(disk_path_object) | ||
if self.vfs.is_internal_link(node): | ||
link_target_path = str(pathlib.PurePath(node.read_link())) | ||
|
@@ -283,7 +286,8 @@ def download_link(self, path, node, retry, dcb, ecb): | |
|
||
def download_regular(self, path, node, retry, dcb, ecb): | ||
disk_path_object = pathlib.Path(path.lstrip('/')) | ||
|
||
disk_path_object = pathlib.Path(sanitize_filepath(str(disk_path_object))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 如同我在 #15 (comment) 所說,我覺得這個可以移入 |
||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 請刪除行尾空白。 |
||
def ccb(*args): | ||
return dcb(path, *args) | ||
|
||
|
@@ -355,6 +359,7 @@ def disk_path_object_open(mode): | |
|
||
def download_directory(self, path, node, retry, dcb, ecb): | ||
disk_path_object = pathlib.Path(path.lstrip('/')) | ||
disk_path_object = pathlib.Path(sanitize_filepath(str(disk_path_object))) | ||
if disk_path_object.is_dir(): | ||
self.logger.info('跳過已經存在的資料夾 {}' \ | ||
.format(str(disk_path_object))) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -90,6 +90,7 @@ class Config: | |
'attr_course_grades_show': '成績公布', | ||
'value_course_grades_show_n': '不公布', | ||
'value_course_grades_show_p': '公布個人', | ||
'value_course_grades_show_a': '公布全班', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 這個 commit 只增加設定值而無實際使用,是不是應該把「公布全班」相關的修改移來這個 commit,或是把這個 commit 併入下個 commit?理想上我們會希望每個 commit 都是一個完整、有意義、可以測試的修改。 |
||
'attr_course_grades_is_changed': 'is_changed', | ||
'dir_course_homeworks': '作業區', | ||
'file_course_homeworks_homework': '作業內容.json', | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -353,6 +353,8 @@ def _create_course_list_map(self): | |
course_list_page = self.vfs.request.web('/student/index.php') | ||
course_list_rows_all = course_list_page.xpath('//table[1]/tr') | ||
course_list_rows = course_list_rows_all[1:] | ||
# Add 旁聽 courses | ||
course_list_rows += course_list_page.xpath('//table[2]/tr')[1:] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 如同我在 #13 (comment) 所說,這樣的修改對不曾旁聽過任何課程的學生會有問題。 |
||
course_list_header_row = course_list_rows_all[0] | ||
|
||
assert len(course_list_header_row) == 8 | ||
|
@@ -641,144 +643,160 @@ def fetch(self): | |
assert len(student_page.xpath('//table')) > 0 | ||
|
||
student_rows = student_page.xpath('//div[@id="sect_cont"]/table/tr') | ||
assert len(student_rows) == 12 | ||
# NTNU and NTUST students may have less rows | ||
#assert len(student_rows) == 12 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 改成<=? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ryucc Added There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 我其實有點好奇你遇到的是什麼狀況,不知道能不能貼個截圖(去除個資)當作參考,同時也讓之後看到這個 pull request 的人能更了解這個問題?我目前看過的 ntnu_* 和 ntust_* 帳號都和普通的臺大帳號一樣有 12 列,不知道是不是只有特定帳號有這個情形。 |
||
|
||
student_file = JSONFile(self.vfs, self) | ||
student_filename = '{}.json'.format(self._account, sn) | ||
self.add(student_filename, student_file) | ||
|
||
# 身份 | ||
student_role = row_get_value(student_rows[0], | ||
['身份', 'Role'], {}, free_form=True).strip() | ||
student_file.add(s['attr_students_role'], student_role, student_path) | ||
if len(student_rows) > 0: | ||
student_role = row_get_value(student_rows[0], | ||
['身份', 'Role'], {}, free_form=True).strip() | ||
student_file.add(s['attr_students_role'], student_role, student_path) | ||
|
||
# 照片 | ||
student_photo_element = row_get_value(student_rows[1], | ||
['照片', 'Photo'], {}, free_form=True, return_object=True) | ||
if len(student_photo_element) > 0: | ||
assert len(student_photo_element) == 1 | ||
assert student_photo_element[0].tag == 'img' | ||
assert student_photo_element[0].get('src') | ||
student_photo = student_photo_element[0].get('src') \ | ||
.rsplit('/', maxsplit=1)[1] | ||
student_photo_path = url_to_path_and_args( | ||
student_photo_element[0].get('src'), no_query_string=True)[0] | ||
self.add(student_photo, DownloadFile(self.vfs, self, | ||
student_photo_path)) | ||
else: | ||
student_photo = '' | ||
student_file.add(s['attr_students_photo'], student_photo, student_path) | ||
if len(student_rows) > 1: | ||
student_photo_element = row_get_value(student_rows[1], | ||
['照片', 'Photo'], {}, free_form=True, return_object=True) | ||
if len(student_photo_element) > 0: | ||
assert len(student_photo_element) == 1 | ||
assert student_photo_element[0].tag == 'img' | ||
assert student_photo_element[0].get('src') | ||
student_photo = student_photo_element[0].get('src') \ | ||
.rsplit('/', maxsplit=1)[1] | ||
student_photo_path = url_to_path_and_args( | ||
student_photo_element[0].get('src'), no_query_string=True)[0] | ||
self.add(student_photo, DownloadFile(self.vfs, self, | ||
student_photo_path)) | ||
else: | ||
student_photo = '' | ||
student_file.add(s['attr_students_photo'], student_photo, student_path) | ||
|
||
# 姓名 | ||
student_name = row_get_value(student_rows[2], | ||
['姓名', 'Name'], {}, free_form=True).strip() | ||
student_file.add(s['attr_students_name'], student_name, student_path) | ||
if len(student_rows) > 2: | ||
student_name = row_get_value(student_rows[2], | ||
['姓名', 'Name'], {}, free_form=True).strip() | ||
student_file.add(s['attr_students_name'], student_name, student_path) | ||
|
||
# 英文姓名 | ||
student_english_name = row_get_value(student_rows[3], | ||
['英文姓名', 'English Name'], {}, free_form=True).strip() | ||
student_file.add(s['attr_students_english_name'], | ||
student_english_name, student_path) | ||
if len(student_rows) > 3: | ||
student_english_name = row_get_value(student_rows[3], | ||
['英文姓名', 'English Name'], {}, free_form=True).strip() | ||
student_file.add(s['attr_students_english_name'], | ||
student_english_name, student_path) | ||
|
||
# 匿名代號 | ||
student_screen_name = row_get_value(student_rows[4], | ||
['匿名代號', 'Screen Name'], {}, free_form=True).strip() | ||
student_file.add(s['attr_students_screen_name'], | ||
student_screen_name, student_path) | ||
if len(student_rows) > 4: | ||
student_screen_name = row_get_value(student_rows[4], | ||
['匿名代號', 'Screen Name'], {}, free_form=True).strip() | ||
student_file.add(s['attr_students_screen_name'], | ||
student_screen_name, student_path) | ||
|
||
# 學校系級 | ||
student_school_year = row_get_value(student_rows[5], | ||
['系級', 'Major & Year', '學校系級', 'School & Dept'], | ||
{}, free_form=True).strip() | ||
student_file.add(s['attr_students_school_year'], | ||
student_school_year, student_path) | ||
if len(student_rows) > 5: | ||
student_school_year = row_get_value(student_rows[5], | ||
['系級', 'Major & Year', '學校系級', 'School & Dept'], | ||
{}, free_form=True).strip() | ||
student_file.add(s['attr_students_school_year'], | ||
student_school_year, student_path) | ||
|
||
# 個人首頁網址 | ||
student_homepage_url_element = row_get_value(student_rows[6], | ||
['個人首頁網址', 'Homepage URL'], {}, free_form=True, return_object=True) | ||
assert len(student_homepage_url_element) == 1 | ||
assert student_homepage_url_element[0].tag == 'a' | ||
assert student_homepage_url_element[0].get('href') | ||
student_homepage_url = element_get_text(student_homepage_url_element[0]) | ||
assert student_homepage_url_element[0].get('href') == \ | ||
if len(student_rows) > 6: | ||
student_homepage_url_element = row_get_value(student_rows[6], | ||
['個人首頁網址', 'Homepage URL'], {}, free_form=True, return_object=True) | ||
assert len(student_homepage_url_element) == 1 | ||
assert student_homepage_url_element[0].tag == 'a' | ||
assert student_homepage_url_element[0].get('href') | ||
student_homepage_url = element_get_text(student_homepage_url_element[0]) | ||
# Not sure what's this assertion for, but the program works fine without this assertion | ||
""" | ||
assert student_homepage_url_element[0].get('href') == \ | ||
student_homepage_url or \ | ||
student_homepage_url_element[0].get('href') == \ | ||
'http://' + student_homepage_url | ||
student_file.add(s['attr_students_homepage_url'], | ||
student_homepage_url, student_path) | ||
""" | ||
student_file.add(s['attr_students_homepage_url'], | ||
student_homepage_url, student_path) | ||
|
||
# 電子郵件 | ||
student_email_address_element = row_get_value(student_rows[7], | ||
['電子郵件', 'Email Address'], {}, free_form=True, return_object=True) | ||
assert len(student_email_address_element) == 1 | ||
assert student_email_address_element[0].tag == 'a' | ||
assert student_email_address_element[0].get('href') | ||
student_email_address = element_get_text(student_email_address_element[0]) | ||
if len(student_email_address_element[0]) == 0: | ||
if student_email_address.find('"') < 0: | ||
assert student_email_address_element[0].get('href') == \ | ||
'mailto:' + student_email_address | ||
else: | ||
self.vfs.logger.warning('學號 {} 的個人頁面電子郵件欄位有多餘的標籤' \ | ||
.format(self._account)) | ||
self.vfs.logger.warning('這很有可能是 CEIBA 沒有跳脫特殊字元所造成') | ||
student_email_address_href = student_email_address_element[0].get('href') | ||
assert student_email_address_href.startswith('mailto:') | ||
if student_email_address_href.find('<') >= 7 and \ | ||
student_email_address_href.find('>') >= 7: | ||
student_email_address = student_email_address_href[7:] | ||
if len(student_rows) > 7: | ||
student_email_address_element = row_get_value(student_rows[7], | ||
['電子郵件', 'Email Address'], {}, free_form=True, return_object=True) | ||
assert len(student_email_address_element) == 1 | ||
assert student_email_address_element[0].tag == 'a' | ||
assert student_email_address_element[0].get('href') | ||
student_email_address = element_get_text(student_email_address_element[0]) | ||
if len(student_email_address_element[0]) == 0: | ||
if student_email_address.find('"') < 0: | ||
assert student_email_address_element[0].get('href') == \ | ||
'mailto:' + student_email_address | ||
else: | ||
assert student_email_address.find('"') >= 0 | ||
student_file.add(s['attr_students_email_address'], | ||
student_email_address, student_path) | ||
self.vfs.logger.warning('學號 {} 的個人頁面電子郵件欄位有多餘的標籤' \ | ||
.format(self._account)) | ||
self.vfs.logger.warning('這很有可能是 CEIBA 沒有跳脫特殊字元所造成') | ||
student_email_address_href = student_email_address_element[0].get('href') | ||
assert student_email_address_href.startswith('mailto:') | ||
if student_email_address_href.find('<') >= 7 and \ | ||
student_email_address_href.find('>') >= 7: | ||
student_email_address = student_email_address_href[7:] | ||
else: | ||
assert student_email_address.find('"') >= 0 | ||
student_file.add(s['attr_students_email_address'], | ||
student_email_address, student_path) | ||
|
||
# 常用電子郵件 | ||
student_frequently_used_email_element = row_get_value(student_rows[8], | ||
['常用電子郵件', 'Frequently Used Email'], | ||
{}, free_form=True, return_object=True) | ||
assert len(student_frequently_used_email_element) == 1 | ||
assert student_frequently_used_email_element[0].tag == 'a' | ||
assert student_frequently_used_email_element[0].get('href') | ||
student_frequently_used_email = element_get_text( | ||
student_frequently_used_email_element[0]) | ||
student_frequently_used_email_from_href = \ | ||
student_frequently_used_email_element[0].get('href') | ||
|
||
# CEIBA 不會跳脫 < 和 > 符號,如果使用者填寫的電子郵件地址包含這個符號 | ||
# 會使透過 .text 拿到的資料不正確 | ||
if student_frequently_used_email_from_href.find('<') >= 0 and \ | ||
student_frequently_used_email_from_href.find('>') >= 0: | ||
assert student_frequently_used_email_from_href.startswith('mailto:') | ||
student_frequently_used_email = \ | ||
student_frequently_used_email_from_href[7:] | ||
else: | ||
assert student_frequently_used_email_from_href == \ | ||
'mailto:' + student_frequently_used_email | ||
if len(student_rows) > 8: | ||
student_frequently_used_email_element = row_get_value(student_rows[8], | ||
['常用電子郵件', 'Frequently Used Email'], | ||
{}, free_form=True, return_object=True) | ||
assert len(student_frequently_used_email_element) == 1 | ||
assert student_frequently_used_email_element[0].tag == 'a' | ||
assert student_frequently_used_email_element[0].get('href') | ||
student_frequently_used_email = element_get_text( | ||
student_frequently_used_email_element[0]) | ||
student_frequently_used_email_from_href = \ | ||
student_frequently_used_email_element[0].get('href') | ||
|
||
# CEIBA 不會跳脫 < 和 > 符號,如果使用者填寫的電子郵件地址包含這個符號 | ||
# 會使透過 .text 拿到的資料不正確 | ||
if student_frequently_used_email_from_href.find('<') >= 0 and \ | ||
student_frequently_used_email_from_href.find('>') >= 0: | ||
assert student_frequently_used_email_from_href.startswith('mailto:') | ||
student_frequently_used_email = \ | ||
student_frequently_used_email_from_href[7:] | ||
else: | ||
assert student_frequently_used_email_from_href == \ | ||
'mailto:' + student_frequently_used_email | ||
|
||
student_file.add(s['attr_students_frequently_used_email'], | ||
student_frequently_used_email, student_path) | ||
student_file.add(s['attr_students_frequently_used_email'], | ||
student_frequently_used_email, student_path) | ||
|
||
# 聯絡電話 | ||
student_phone = row_get_value(student_rows[9], | ||
['聯絡電話', 'Phone'], {}, free_form=True).strip() | ||
student_file.add(s['attr_students_phone'], student_phone, student_path) | ||
if len(student_rows) > 9: | ||
student_phone = row_get_value(student_rows[9], | ||
['聯絡電話', 'Phone'], {}, free_form=True).strip() | ||
student_file.add(s['attr_students_phone'], student_phone, student_path) | ||
|
||
# 聯絡地址 | ||
student_address = row_get_value(student_rows[10], | ||
['聯絡地址', 'Address'], {}, free_form=True).strip() | ||
student_file.add(s['attr_students_address'], | ||
student_address, student_path) | ||
if len(student_rows) > 10: | ||
student_address = row_get_value(student_rows[10], | ||
['聯絡地址', 'Address'], {}, free_form=True).strip() | ||
student_file.add(s['attr_students_address'], | ||
student_address, student_path) | ||
|
||
# 更多的個人資訊 | ||
student_more_personal_information_element = row_get_value(student_rows[11], | ||
['更多的個人資訊', 'More Personal Information'], | ||
{}, free_form=True, return_object=True) | ||
|
||
# 使用者可以自己在這個欄位塞各種標籤…… | ||
student_more_personal_information = ''.join( | ||
student_more_personal_information_element.itertext()) | ||
student_file.add(s['attr_students_more_personal_information'], | ||
student_more_personal_information, student_path) | ||
if len(student_rows) > 11: | ||
student_more_personal_information_element = row_get_value(student_rows[11], | ||
['更多的個人資訊', 'More Personal Information'], | ||
{}, free_form=True, return_object=True) | ||
|
||
# 使用者可以自己在這個欄位塞各種標籤…… | ||
student_more_personal_information = ''.join( | ||
student_more_personal_information_element.itertext()) | ||
student_file.add(s['attr_students_more_personal_information'], | ||
student_more_personal_information, student_path) | ||
|
||
student_file.finish() | ||
self.ready = True | ||
|
@@ -795,7 +813,8 @@ def fetch(self): | |
assert set(result.keys()) == set(result_keys) | ||
|
||
days = '一二三四五六日' | ||
slots = '01234@56789XABCD' # 節次 (possible time slots: See https://nol.ntu.edu.tw/nol/guest/index.php for more information) | ||
#slots = '01234@56789XABCD' # 節次 (possible time slots: See https://nol.ntu.edu.tw/nol/guest/index.php for more information) | ||
slots = '01234@56789XABCD ' # For me, 暑期實習 has slot with space character | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 除非舊版的程式碼未來很可能會恢復使用,不然請不要用註解的方式刪除或修改程式碼。要刪就直接刪掉,要改也直接改掉,想看舊版的人可以自己去翻 git log。以這裡來說,你可以把舊的註解移到上面一行,再加上你這裡寫的新註解。註解中的「For me」看起來不太必要,其他人可能不看 git log 大概也不知道這裡的「me」是誰。 |
||
courses = dict() | ||
|
||
class Course(dict): | ||
|
@@ -2064,6 +2083,10 @@ def fetch(self): | |
grade_row_show = 'N' | ||
elif grade_row[7].text in ['公布個人', 'Individual']: | ||
grade_row_show = 'P' | ||
elif len(grade_row[7].xpath('./a')) == 1 and \ | ||
grade_row[7].xpath('./a')[0].text in ['公布全班', 'Everyone']: | ||
# TODO: download everyone's grade from this link | ||
grade_row_show = 'A' | ||
else: | ||
assert False | ||
|
||
|
@@ -2081,7 +2104,7 @@ def fetch(self): | |
else: | ||
assert set(grade.keys()) - set(optional_sub_keys) == set(sub_keys) | ||
assert grade['grade_isranking'] in ['0', '1'] | ||
assert grade['show'] in ['N', 'P'] | ||
assert grade['show'] in ['N', 'P', 'A'] | ||
assert grade['is_changed'] in ['0', '1'] | ||
|
||
grade_item_filename += ' {:08}'.format(int(grade['main_sn'])) | ||
|
@@ -2240,6 +2263,8 @@ def fetch(self): | |
show = s['value_course_grades_show_n'] | ||
elif grade_row_show == 'P': | ||
show = s['value_course_grades_show_p'] | ||
elif grade_row_show == 'A': | ||
show = s['value_course_grades_show_a'] | ||
else: | ||
assert False | ||
|
||
|
@@ -3121,7 +3146,8 @@ def __init__(self, vfs, parent, cell): | |
def fetch(self): | ||
s = self.vfs.strings | ||
|
||
assert not element_get_text(self._cell).strip() | ||
# I have a course which only has it assistants' names in '課程助教' column with pure text | ||
#assert not element_get_text(self._cell).strip() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 同上面所說,不建議用註解的方式來刪除程式。另外以這裡來說,你應該會想要把名字記下來而不是直接忽略。 |
||
for child in self._cell: | ||
assert child.tag == 'a' or child.tag == 'br' | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
請仿照其他檔案,把所有的
from ... import ...
排在一起,並照字母排序。由於
pathvalidate
不在 Python 標準函式庫中,需要額外安裝才能使用,因此請一併修改README.asciidoc
、configure.ac
、requirements.txt
讓使用者知道這件事。