Skip to content

Commit a50e243

Browse files
Add files via upload (#962)
<!-- If this is your first pull request: sign the CLA with this GitHub app: https://cla-assistant.io/renovatebot/renovate --> <!-- Make sure the `Allow edits and access to secrets by maintainers` checkbox is checked on this pull request. --> <!-- Please read https://github.com/renovatebot/renovate/blob/main/.github/contributing.md before you create your pull request.--> ## Changes <!-- Describe what behavior is changed by this PR. --> ## Context <!-- Describe why you're making these changes if it's not already explained in a corresponding issue. --> <!-- If you're closing an existing issue with this pull request, use the keyword Closes #issue_number. --> <!-- If you're referencing an issue with this pull request, put it in a Markdown list like this: - #issue_number. --> ## Documentation (please check one with an [x]) - [ ] I have updated the documentation, or - [ ] No documentation update is required ## How I've tested my work (please select one) I have verified these changes via: - [ ] Code inspection only, or - [ ] Newly added/modified unit tests, or - [ ] No unit tests but ran on a real repository, or - [ ] Both unit tests + ran on a real repository <!-- Do you have any suggestions about this PR template? Edit it here: https://github.com/renovatebot/renovate/edit/main/.github/pull_request_template.md --> <!-- Please do not force push to your PR's branch after you have created your PR, as doing so forces us to review the whole PR again. This makes it harder for us to review your work because we don't know what has changed. --> <!-- PRs will always be squashed by us when we merge your work. Commit as many times as you need in this branch. -->
2 parents cda3ed9 + 6e82620 commit a50e243

File tree

1 file changed

+85
-0
lines changed

1 file changed

+85
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
#Y Combinator Hacker News Web Scraper
2+
import requests,lxml
3+
from bs4 import BeautifulSoup
4+
5+
6+
class YCombinator:
7+
def __init__(self):
8+
self.hacker_soup = BeautifulSoup(requests.get(url="https://news.ycombinator.com/news").text, "lxml")
9+
self.articles = [span for span in self.hacker_soup.find_all("span", class_="titleline")]
10+
11+
self.points = [int(span.text.strip("points").strip()) for span in self.hacker_soup.find_all("span", class_="score")]
12+
self.headlines = [article.find("a", href=True).text for article in self.articles]
13+
self.links = [article.find("a", href=True)["href"] for article in self.articles]
14+
#.replace("\u00a0"," ") to replace the \xa0
15+
self.sublines = [article.text.strip().replace("\u00a0"," ") for article in self.hacker_soup.find_all("span",class_="subline")]
16+
17+
#On the Y Combinator news they add job positions and this is the code to get extra subline and will assign 0 points for those articles
18+
extra_subtext = [td.text.strip("\n") for td in self.hacker_soup.find_all("td", class_="subtext") if "comment" not in td.text]
19+
extra_index = 0
20+
for headline in self.headlines:
21+
if "YC" in headline and "Hiring" in headline:
22+
index = self.headlines.index(headline)
23+
self.points.insert(index,0)
24+
self.sublines.insert(index,extra_subtext[extra_index])
25+
extra_index += 1
26+
27+
# This will find how many comments there are for each article
28+
self.comments = []
29+
for subline in self.sublines:
30+
splits = [split.strip() for split in subline.split("|")]
31+
if "comments" in splits[-1]:
32+
last_item = int(splits[-1].strip("comments").strip())
33+
self.comments.append(last_item)
34+
elif "comment" in splits[-1]:
35+
last_item = int(splits[-1].strip("comment").strip())
36+
self.comments.append(last_item)
37+
else:
38+
self.comments.append(0)
39+
40+
41+
#This method will show all the top 30 current headlines on the Website
42+
def show_all(self):
43+
for i in range(len(self.headlines)):
44+
headline = self.headlines[i]
45+
link = self.links[i]
46+
points = self.points[i]
47+
print(f"{i+1}. {headline}")
48+
print(link)
49+
print(self.sublines[i])
50+
print()
51+
52+
#This method will show the news with the most points
53+
def most_points(self):
54+
greatest = max(self.points)
55+
index = self.points.index(greatest)
56+
print(f"{self.headlines[index]}")
57+
print(self.links[index])
58+
print(self.sublines[index])
59+
print()
60+
61+
# This method will show the article with the most comments
62+
def most_comments(self):
63+
greatest = max(self.comments)
64+
index = self.comments.index(greatest)
65+
print(f"{self.headlines[index]}")
66+
print(self.links[index])
67+
print(self.sublines[index])
68+
print()
69+
70+
#This method will show the top headline
71+
def show_first(self):
72+
print(f"{self.headlines[0]}")
73+
print(self.links[0])
74+
print(self.sublines[0])
75+
print()
76+
77+
78+
79+
80+
if __name__ == "__main__":
81+
yc = YCombinator()
82+
yc.show_all()
83+
yc.show_first()
84+
yc.most_points()
85+
yc.most_comments()

0 commit comments

Comments
 (0)