Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix some issues to work on my ceiba #16

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
242 changes: 134 additions & 108 deletions ceiba_dl/vfs.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,8 @@ def _create_course_list_map(self):
course_list_page = self.vfs.request.web('/student/index.php')
course_list_rows_all = course_list_page.xpath('//table[1]/tr')
course_list_rows = course_list_rows_all[1:]
# Add 旁聽 courses
course_list_rows += course_list_page.xpath('//table[2]/tr')[1:]
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

如同我在 #13 (comment) 所說,這樣的修改對不曾旁聽過任何課程的學生會有問題。

course_list_header_row = course_list_rows_all[0]

assert len(course_list_header_row) == 8
Expand Down Expand Up @@ -641,144 +643,160 @@ def fetch(self):
assert len(student_page.xpath('//table')) > 0

student_rows = student_page.xpath('//div[@id="sect_cont"]/table/tr')
assert len(student_rows) == 12
# NTNU and NTUST students may have less rows
#assert len(student_rows) == 12
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

改成<=?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ryucc Added

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

我其實有點好奇你遇到的是什麼狀況,不知道能不能貼個截圖(去除個資)當作參考,同時也讓之後看到這個 pull request 的人能更了解這個問題?我目前看過的 ntnu_* 和 ntust_* 帳號都和普通的臺大帳號一樣有 12 列,不知道是不是只有特定帳號有這個情形。


student_file = JSONFile(self.vfs, self)
student_filename = '{}.json'.format(self._account, sn)
self.add(student_filename, student_file)

# 身份
student_role = row_get_value(student_rows[0],
['身份', 'Role'], {}, free_form=True).strip()
student_file.add(s['attr_students_role'], student_role, student_path)
if len(student_rows) > 0:
student_role = row_get_value(student_rows[0],
['身份', 'Role'], {}, free_form=True).strip()
student_file.add(s['attr_students_role'], student_role, student_path)

# 照片
student_photo_element = row_get_value(student_rows[1],
['照片', 'Photo'], {}, free_form=True, return_object=True)
if len(student_photo_element) > 0:
assert len(student_photo_element) == 1
assert student_photo_element[0].tag == 'img'
assert student_photo_element[0].get('src')
student_photo = student_photo_element[0].get('src') \
.rsplit('/', maxsplit=1)[1]
student_photo_path = url_to_path_and_args(
student_photo_element[0].get('src'), no_query_string=True)[0]
self.add(student_photo, DownloadFile(self.vfs, self,
student_photo_path))
else:
student_photo = ''
student_file.add(s['attr_students_photo'], student_photo, student_path)
if len(student_rows) > 1:
student_photo_element = row_get_value(student_rows[1],
['照片', 'Photo'], {}, free_form=True, return_object=True)
if len(student_photo_element) > 0:
assert len(student_photo_element) == 1
assert student_photo_element[0].tag == 'img'
assert student_photo_element[0].get('src')
student_photo = student_photo_element[0].get('src') \
.rsplit('/', maxsplit=1)[1]
student_photo_path = url_to_path_and_args(
student_photo_element[0].get('src'), no_query_string=True)[0]
self.add(student_photo, DownloadFile(self.vfs, self,
student_photo_path))
else:
student_photo = ''
student_file.add(s['attr_students_photo'], student_photo, student_path)

# 姓名
student_name = row_get_value(student_rows[2],
['姓名', 'Name'], {}, free_form=True).strip()
student_file.add(s['attr_students_name'], student_name, student_path)
if len(student_rows) > 2:
student_name = row_get_value(student_rows[2],
['姓名', 'Name'], {}, free_form=True).strip()
student_file.add(s['attr_students_name'], student_name, student_path)

# 英文姓名
student_english_name = row_get_value(student_rows[3],
['英文姓名', 'English Name'], {}, free_form=True).strip()
student_file.add(s['attr_students_english_name'],
student_english_name, student_path)
if len(student_rows) > 3:
student_english_name = row_get_value(student_rows[3],
['英文姓名', 'English Name'], {}, free_form=True).strip()
student_file.add(s['attr_students_english_name'],
student_english_name, student_path)

# 匿名代號
student_screen_name = row_get_value(student_rows[4],
['匿名代號', 'Screen Name'], {}, free_form=True).strip()
student_file.add(s['attr_students_screen_name'],
student_screen_name, student_path)
if len(student_rows) > 4:
student_screen_name = row_get_value(student_rows[4],
['匿名代號', 'Screen Name'], {}, free_form=True).strip()
student_file.add(s['attr_students_screen_name'],
student_screen_name, student_path)

# 學校系級
student_school_year = row_get_value(student_rows[5],
['系級', 'Major & Year', '學校系級', 'School & Dept'],
{}, free_form=True).strip()
student_file.add(s['attr_students_school_year'],
student_school_year, student_path)
if len(student_rows) > 5:
student_school_year = row_get_value(student_rows[5],
['系級', 'Major & Year', '學校系級', 'School & Dept'],
{}, free_form=True).strip()
student_file.add(s['attr_students_school_year'],
student_school_year, student_path)

# 個人首頁網址
student_homepage_url_element = row_get_value(student_rows[6],
['個人首頁網址', 'Homepage URL'], {}, free_form=True, return_object=True)
assert len(student_homepage_url_element) == 1
assert student_homepage_url_element[0].tag == 'a'
assert student_homepage_url_element[0].get('href')
student_homepage_url = element_get_text(student_homepage_url_element[0])
assert student_homepage_url_element[0].get('href') == \
if len(student_rows) > 6:
student_homepage_url_element = row_get_value(student_rows[6],
['個人首頁網址', 'Homepage URL'], {}, free_form=True, return_object=True)
assert len(student_homepage_url_element) == 1
assert student_homepage_url_element[0].tag == 'a'
assert student_homepage_url_element[0].get('href')
student_homepage_url = element_get_text(student_homepage_url_element[0])
# Not sure what's this assertion for, but the program works fine without this assertion
"""
assert student_homepage_url_element[0].get('href') == \
student_homepage_url or \
student_homepage_url_element[0].get('href') == \
'http://' + student_homepage_url
student_file.add(s['attr_students_homepage_url'],
student_homepage_url, student_path)
"""
student_file.add(s['attr_students_homepage_url'],
student_homepage_url, student_path)

# 電子郵件
student_email_address_element = row_get_value(student_rows[7],
['電子郵件', 'Email Address'], {}, free_form=True, return_object=True)
assert len(student_email_address_element) == 1
assert student_email_address_element[0].tag == 'a'
assert student_email_address_element[0].get('href')
student_email_address = element_get_text(student_email_address_element[0])
if len(student_email_address_element[0]) == 0:
if student_email_address.find('"') < 0:
assert student_email_address_element[0].get('href') == \
'mailto:' + student_email_address
else:
self.vfs.logger.warning('學號 {} 的個人頁面電子郵件欄位有多餘的標籤' \
.format(self._account))
self.vfs.logger.warning('這很有可能是 CEIBA 沒有跳脫特殊字元所造成')
student_email_address_href = student_email_address_element[0].get('href')
assert student_email_address_href.startswith('mailto:')
if student_email_address_href.find('<') >= 7 and \
student_email_address_href.find('>') >= 7:
student_email_address = student_email_address_href[7:]
if len(student_rows) > 7:
student_email_address_element = row_get_value(student_rows[7],
['電子郵件', 'Email Address'], {}, free_form=True, return_object=True)
assert len(student_email_address_element) == 1
assert student_email_address_element[0].tag == 'a'
assert student_email_address_element[0].get('href')
student_email_address = element_get_text(student_email_address_element[0])
if len(student_email_address_element[0]) == 0:
if student_email_address.find('"') < 0:
assert student_email_address_element[0].get('href') == \
'mailto:' + student_email_address
else:
assert student_email_address.find('"') >= 0
student_file.add(s['attr_students_email_address'],
student_email_address, student_path)
self.vfs.logger.warning('學號 {} 的個人頁面電子郵件欄位有多餘的標籤' \
.format(self._account))
self.vfs.logger.warning('這很有可能是 CEIBA 沒有跳脫特殊字元所造成')
student_email_address_href = student_email_address_element[0].get('href')
assert student_email_address_href.startswith('mailto:')
if student_email_address_href.find('<') >= 7 and \
student_email_address_href.find('>') >= 7:
student_email_address = student_email_address_href[7:]
else:
assert student_email_address.find('"') >= 0
student_file.add(s['attr_students_email_address'],
student_email_address, student_path)

# 常用電子郵件
student_frequently_used_email_element = row_get_value(student_rows[8],
['常用電子郵件', 'Frequently Used Email'],
{}, free_form=True, return_object=True)
assert len(student_frequently_used_email_element) == 1
assert student_frequently_used_email_element[0].tag == 'a'
assert student_frequently_used_email_element[0].get('href')
student_frequently_used_email = element_get_text(
student_frequently_used_email_element[0])
student_frequently_used_email_from_href = \
student_frequently_used_email_element[0].get('href')

# CEIBA 不會跳脫 < 和 > 符號,如果使用者填寫的電子郵件地址包含這個符號
# 會使透過 .text 拿到的資料不正確
if student_frequently_used_email_from_href.find('<') >= 0 and \
student_frequently_used_email_from_href.find('>') >= 0:
assert student_frequently_used_email_from_href.startswith('mailto:')
student_frequently_used_email = \
student_frequently_used_email_from_href[7:]
else:
assert student_frequently_used_email_from_href == \
'mailto:' + student_frequently_used_email
if len(student_rows) > 8:
student_frequently_used_email_element = row_get_value(student_rows[8],
['常用電子郵件', 'Frequently Used Email'],
{}, free_form=True, return_object=True)
assert len(student_frequently_used_email_element) == 1
assert student_frequently_used_email_element[0].tag == 'a'
assert student_frequently_used_email_element[0].get('href')
student_frequently_used_email = element_get_text(
student_frequently_used_email_element[0])
student_frequently_used_email_from_href = \
student_frequently_used_email_element[0].get('href')

# CEIBA 不會跳脫 < 和 > 符號,如果使用者填寫的電子郵件地址包含這個符號
# 會使透過 .text 拿到的資料不正確
if student_frequently_used_email_from_href.find('<') >= 0 and \
student_frequently_used_email_from_href.find('>') >= 0:
assert student_frequently_used_email_from_href.startswith('mailto:')
student_frequently_used_email = \
student_frequently_used_email_from_href[7:]
else:
assert student_frequently_used_email_from_href == \
'mailto:' + student_frequently_used_email

student_file.add(s['attr_students_frequently_used_email'],
student_frequently_used_email, student_path)
student_file.add(s['attr_students_frequently_used_email'],
student_frequently_used_email, student_path)

# 聯絡電話
student_phone = row_get_value(student_rows[9],
['聯絡電話', 'Phone'], {}, free_form=True).strip()
student_file.add(s['attr_students_phone'], student_phone, student_path)
if len(student_rows) > 9:
student_phone = row_get_value(student_rows[9],
['聯絡電話', 'Phone'], {}, free_form=True).strip()
student_file.add(s['attr_students_phone'], student_phone, student_path)

# 聯絡地址
student_address = row_get_value(student_rows[10],
['聯絡地址', 'Address'], {}, free_form=True).strip()
student_file.add(s['attr_students_address'],
student_address, student_path)
if len(student_rows) > 10:
student_address = row_get_value(student_rows[10],
['聯絡地址', 'Address'], {}, free_form=True).strip()
student_file.add(s['attr_students_address'],
student_address, student_path)

# 更多的個人資訊
student_more_personal_information_element = row_get_value(student_rows[11],
['更多的個人資訊', 'More Personal Information'],
{}, free_form=True, return_object=True)

# 使用者可以自己在這個欄位塞各種標籤……
student_more_personal_information = ''.join(
student_more_personal_information_element.itertext())
student_file.add(s['attr_students_more_personal_information'],
student_more_personal_information, student_path)
if len(student_rows) > 11:
student_more_personal_information_element = row_get_value(student_rows[11],
['更多的個人資訊', 'More Personal Information'],
{}, free_form=True, return_object=True)

# 使用者可以自己在這個欄位塞各種標籤……
student_more_personal_information = ''.join(
student_more_personal_information_element.itertext())
student_file.add(s['attr_students_more_personal_information'],
student_more_personal_information, student_path)

student_file.finish()
self.ready = True
Expand All @@ -795,7 +813,8 @@ def fetch(self):
assert set(result.keys()) == set(result_keys)

days = '一二三四五六日'
slots = '01234@56789XABCD' # 節次 (possible time slots: See https://nol.ntu.edu.tw/nol/guest/index.php for more information)
#slots = '01234@56789XABCD' # 節次 (possible time slots: See https://nol.ntu.edu.tw/nol/guest/index.php for more information)
slots = '01234@56789XABCD ' # For me, 暑期實習 has slot with space character
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

除非舊版的程式碼未來很可能會恢復使用,不然請不要用註解的方式刪除或修改程式碼。要刪就直接刪掉,要改也直接改掉,想看舊版的人可以自己去翻 git log。以這裡來說,你可以把舊的註解移到上面一行,再加上你這裡寫的新註解。註解中的「For me」看起來不太必要,其他人可能不看 git log 大概也不知道這裡的「me」是誰。

courses = dict()

class Course(dict):
Expand Down Expand Up @@ -2064,6 +2083,10 @@ def fetch(self):
grade_row_show = 'N'
elif grade_row[7].text in ['公布個人', 'Individual']:
grade_row_show = 'P'
elif len(grade_row[7].xpath('./a')) == 1 and \
grade_row[7].xpath('./a')[0].text in ['公布全班', 'Everyone']:
# TODO: download everyone's grade from this link
grade_row_show = 'A'
else:
assert False

Expand All @@ -2081,7 +2104,7 @@ def fetch(self):
else:
assert set(grade.keys()) - set(optional_sub_keys) == set(sub_keys)
assert grade['grade_isranking'] in ['0', '1']
assert grade['show'] in ['N', 'P']
assert grade['show'] in ['N', 'P', 'A']
assert grade['is_changed'] in ['0', '1']

grade_item_filename += ' {:08}'.format(int(grade['main_sn']))
Expand Down Expand Up @@ -2240,6 +2263,8 @@ def fetch(self):
show = s['value_course_grades_show_n']
elif grade_row_show == 'P':
show = s['value_course_grades_show_p']
elif grade_row_show == 'A':
show = s['value_course_grades_show_a']
else:
assert False

Expand Down Expand Up @@ -3121,7 +3146,8 @@ def __init__(self, vfs, parent, cell):
def fetch(self):
s = self.vfs.strings

assert not element_get_text(self._cell).strip()
# I have a course which only has it assistants' names in '課程助教' column with pure text
#assert not element_get_text(self._cell).strip()
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

同上面所說,不建議用註解的方式來刪除程式。另外以這裡來說,你應該會想要把名字記下來而不是直接忽略。

for child in self._cell:
assert child.tag == 'a' or child.tag == 'br'

Expand Down