Skip to content

Commit 98fc3db

Browse files
Add files via upload (#960)
<!-- If this is your first pull request: sign the CLA with this GitHub app: https://cla-assistant.io/renovatebot/renovate --> <!-- Make sure the `Allow edits and access to secrets by maintainers` checkbox is checked on this pull request. --> <!-- Please read https://github.com/renovatebot/renovate/blob/main/.github/contributing.md before you create your pull request.--> ## Changes <!-- Describe what behavior is changed by this PR. --> ## Context <!-- Describe why you're making these changes if it's not already explained in a corresponding issue. --> <!-- If you're closing an existing issue with this pull request, use the keyword Closes #issue_number. --> <!-- If you're referencing an issue with this pull request, put it in a Markdown list like this: - #issue_number. --> ## Documentation (please check one with an [x]) - [ ] I have updated the documentation, or - [ ] No documentation update is required ## How I've tested my work (please select one) I have verified these changes via: - [ ] Code inspection only, or - [ ] Newly added/modified unit tests, or - [ ] No unit tests but ran on a real repository, or - [ ] Both unit tests + ran on a real repository <!-- Do you have any suggestions about this PR template? Edit it here: https://github.com/renovatebot/renovate/edit/main/.github/pull_request_template.md --> <!-- Please do not force push to your PR's branch after you have created your PR, as doing so forces us to review the whole PR again. This makes it harder for us to review your work because we don't know what has changed. --> <!-- PRs will always be squashed by us when we merge your work. Commit as many times as you need in this branch. -->
2 parents ca7caa7 + 2725623 commit 98fc3db

File tree

1 file changed

+197
-0
lines changed

1 file changed

+197
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
# code to download CAIE past papers (back exams) for students to get all the back exams they need for their exams
2+
3+
import requests,lxml,os
4+
from bs4 import BeautifulSoup
5+
6+
class DownloadPapers:
7+
def __init__(self):
8+
self.websites = ["https://pastpapers.co/cie/?dir=A-Level", "https://pastpapers.co/cie/?dir=IGCSE","https://pastpapers.co/cie/?dir=O-Level", "https://pastpapers.co/cie/?dir=Pre-U"]
9+
self.years = ["2000", "2001", "2002", "2003", '2004', "2005", "2006", "2007", "2008", "2009", "2010", "2011", "2012","2013", "2014", "2015", "2016", "2017", "2018", "2019", "2020", "2021", "2022", "2023", "2024"]
10+
11+
12+
13+
#this downloads all the a-level,igcse,pre-u,o-level past paper (back exams)
14+
def download_all(self)->None:
15+
16+
for website in self.websites:
17+
18+
topics_soup = BeautifulSoup(requests.get(url=website).text, "lxml")
19+
qualification = website.split("=")[-1].replace("%20", " ").replace("%26", "&")
20+
os.makedirs(qualification)
21+
topics = [f'https://pastpapers.co/cie/{topic["href"]}' for topic in topics_soup.find_all(name="a", class_="clearfix") if topic["href"] != 'https://pastpapers.co/cie/']
22+
23+
for topic in topics:
24+
25+
years_soup = BeautifulSoup(requests.get(url=topic).text, "lxml")
26+
subject_name = topic.split("/")[-1].replace("%20", " ").replace("%26", "&")
27+
os.makedirs(f"{qualification}/{subject_name}")
28+
times = [f'https://pastpapers.co/cie/{time["href"]}' for time in years_soup.find_all(name="a", class_="clearfix") if time["href"] not in self.websites]
29+
print(times)
30+
31+
for time in times:
32+
33+
final_soup = BeautifulSoup(requests.get(url=time).text, "lxml")
34+
papers = [f'https://pastpapers.co/cie/{a["href"]}' for a in final_soup.find_all("a", class_="clearfix")][:-1]
35+
session = time.split("/")[-1].replace("%20", " ").replace("%26", "&")
36+
37+
if session in self.years:
38+
39+
times = [paper.replace("%20", " ") for paper in papers if "pdf" not in paper]
40+
41+
for time in times:
42+
43+
final_soup = BeautifulSoup(requests.get(url=time).text, "lxml")
44+
papers = [f'https://pastpapers.co/cie/{a["href"]}' for a in final_soup.find_all("a", class_="clearfix")][:-1]
45+
print(papers)
46+
session = time.split("/")[-1].replace("%20", " ")
47+
os.makedirs(f"{qualification}/{subject_name}/{session}")
48+
49+
for paper in papers:
50+
51+
paper_name = paper.split("/")[-1].replace("%20", " ").replace("%26", "&")
52+
with open(f"{qualification}/{subject_name}/{session}/{paper_name}", "wb") as f:
53+
f.write(requests.get(url=paper).content)
54+
print(f"{qualification}/{subject_name}/{session}/{paper_name} was downloaded")
55+
56+
else:
57+
os.makedirs(f"{qualification}/{subject_name}/{session}")
58+
for paper in papers:
59+
60+
print(paper)
61+
paper_name = paper.split("/")[-1]
62+
with open(f"{qualification}/{subject_name}/{session}/{paper_name}", "wb") as f:
63+
f.write(requests.get(url=paper).content)
64+
print(f"{qualification}/{subject_name}/{session}/{paper_name} was downloaded")
65+
66+
#this shows all the topics
67+
def show_topics(self) -> None:
68+
for website in self.websites:
69+
70+
print(website.split("=")[-1])
71+
topics_soup = BeautifulSoup(requests.get(url=website).text, "lxml")
72+
topics = [f'https://pastpapers.co/cie/{topic["href"]}' for topic in topics_soup.find_all(name="a", class_="clearfix") if topic["href"] != 'https://pastpapers.co/cie/']
73+
print(topics)
74+
75+
#when given the qualification and the subject name it will download all that subject's past papers
76+
#You need to get the end of the url from pastpapers.co
77+
#for example for https://pastpapers.co/cie/?dir=IGCSE/Information%20and%20Communication%20Technology%20%289-1%29%20%280983%29
78+
#for the qualification igcse will be good enough
79+
#but the subject name needs to be exactly Information%20and%20Communication%20Technology%20%289-1%29%20%280983%29 the same
80+
#Example
81+
#dp = DownloadPapers()
82+
#dp.download_subject("igcse","Information%20and%20Communication%20Technology%20%289-1%29%20%280983%29")
83+
84+
def download_subject(self,qualification: str, subject_name: str)->None:
85+
86+
if qualification.lower() == "igcse":
87+
qualification = qualification.upper()
88+
else:
89+
qualification = qualification.title()
90+
91+
website = f"https://pastpapers.co/cie/?dir={qualification}"
92+
link = f"https://pastpapers.co/cie/?dir={qualification}/{subject_name}"
93+
soup = BeautifulSoup(requests.get(url=link).text, "lxml")
94+
times = [f'https://pastpapers.co/cie/{time["href"]}' for time in soup.find_all(name="a", class_="clearfix") if time["href"] != website]
95+
96+
if times == self.websites:
97+
print("The qualification or the subject name is wrong")
98+
else:
99+
os.makedirs(qualification)
100+
os.makedirs(f"{qualification}/{subject_name}")
101+
102+
for time in times:
103+
104+
final_soup = BeautifulSoup(requests.get(url=time).text, "lxml")
105+
papers = [f'https://pastpapers.co/cie/{a["href"]}' for a in final_soup.find_all("a", class_="clearfix")][:-1]
106+
session = time.split("/")[-1].replace("%20", " ").replace("%26", "&")
107+
108+
if session in self.years:
109+
times = [paper.replace("%20", " ") for paper in papers if "pdf" not in paper]
110+
111+
for time in times:
112+
113+
final_soup = BeautifulSoup(requests.get(url=time).text, "lxml")
114+
papers = [f'https://pastpapers.co/cie/{a["href"]}' for a in final_soup.find_all("a", class_="clearfix")][:-1]
115+
print(papers)
116+
session = time.split("/")[-1].replace("%20", " ")
117+
os.makedirs(f"{qualification}/{subject_name}/{session}")
118+
119+
for paper in papers:
120+
paper_name = paper.split("/")[-1].replace("%20", " ").replace("%26", "&")
121+
with open(f"{qualification}/{subject_name}/{session}/{paper_name}", "wb") as f:
122+
f.write(requests.get(url=paper).content)
123+
print(f"{qualification}/{subject_name}/{session}/{paper_name} was downloaded")
124+
else:
125+
os.makedirs(f"{qualification}/{subject_name}/{session}")
126+
for paper in papers:
127+
128+
print(paper)
129+
paper_name = paper.split("/")[-1]
130+
with open(f"{qualification}/{subject_name}/{session}/{paper_name}", "wb") as f:
131+
f.write(requests.get(url=paper).content)
132+
print(f"{qualification}/{subject_name}/{session}/{paper_name} was downloaded")
133+
134+
#this will download a specific qualification
135+
#you just need the name of the qualification right letter wise so "O-LEVEL" or "o-level" will be fine
136+
def download_qualification(self,name: str)->None:
137+
qualifications = ["a-level", "igcse", "o-level", "pre-u"]
138+
139+
if name.lower() in qualifications:
140+
qualification = name.title()
141+
142+
if name.lower() == "igcse":
143+
qualification = name.upper()
144+
145+
os.makedirs(qualification)
146+
website = f"https://pastpapers.co/cie/?dir={qualification}"
147+
topics_soup = BeautifulSoup(requests.get(url=website).text, "lxml")
148+
topics = [f'https://pastpapers.co/cie/{topic["href"]}' for topic in topics_soup.find_all(name="a", class_="clearfix") if topic["href"] != 'https://pastpapers.co/cie/']
149+
150+
for topic in topics:
151+
152+
years_soup = BeautifulSoup(requests.get(url=topic).text, "lxml")
153+
subject_name = topic.split("/")[-1].replace("%20", " ").replace("%26", "&")
154+
os.makedirs(f"{qualification}/{subject_name}")
155+
times = [f'https://pastpapers.co/cie/{time["href"]}' for time in
156+
years_soup.find_all(name="a", class_="clearfix") if time["href"] != website]
157+
print(times)
158+
159+
for time in times:
160+
161+
final_soup = BeautifulSoup(requests.get(url=time).text, "lxml")
162+
papers = [f'https://pastpapers.co/cie/{a["href"]}' for a in
163+
final_soup.find_all("a", class_="clearfix")][:-1]
164+
session = time.split("/")[-1].replace("%20", " ").replace("%26", "&")
165+
166+
if session in self.years:
167+
times = [paper.replace("%20", " ") for paper in papers if "pdf" not in paper]
168+
169+
for time in times:
170+
171+
final_soup = BeautifulSoup(requests.get(url=time).text, "lxml")
172+
papers = [f'https://pastpapers.co/cie/{a["href"]}' for a in
173+
final_soup.find_all("a", class_="clearfix")][:-1]
174+
print(papers)
175+
session = time.split("/")[-1].replace("%20", " ")
176+
os.makedirs(f"{qualification}/{subject_name}/{session}")
177+
178+
for paper in papers:
179+
paper_name = paper.split("/")[-1].replace("%20", " ").replace("%26", "&")
180+
with open(f"{qualification}/{subject_name}/{session}/{paper_name}", "wb") as f:
181+
f.write(requests.get(url=paper).content)
182+
print(f"{qualification}/{subject_name}/{session}/{paper_name} was downloaded")
183+
else:
184+
os.makedirs(f"{qualification}/{subject_name}/{session}")
185+
for paper in papers:
186+
187+
print(paper)
188+
paper_name = paper.split("/")[-1]
189+
with open(f"{qualification}/{subject_name}/{session}/{paper_name}", "wb") as f:
190+
f.write(requests.get(url=paper).content)
191+
print(f"{qualification}/{subject_name}/{session}/{paper_name} was downloaded")
192+
else:
193+
print("The qualification name is wrong")
194+
195+
if __name__ == "__main__":
196+
dp = DownloadPapers()
197+
#dp.download_subject("igcse","Information%20and%20Communication%20Technology%20%289-1%29%20%280983%29")

0 commit comments

Comments
 (0)