Skip to content

Commit 6e82620

Browse files
Add files via upload
Signed-off-by: Fabiana 🚀 Campanari <[email protected]>
1 parent cda3ed9 commit 6e82620

File tree

1 file changed

+85
-0
lines changed

1 file changed

+85
-0
lines changed
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
#Y Combinator Hacker News Web Scraper
2+
import requests,lxml
3+
from bs4 import BeautifulSoup
4+
5+
6+
class YCombinator:
7+
def __init__(self):
8+
self.hacker_soup = BeautifulSoup(requests.get(url="https://news.ycombinator.com/news").text, "lxml")
9+
self.articles = [span for span in self.hacker_soup.find_all("span", class_="titleline")]
10+
11+
self.points = [int(span.text.strip("points").strip()) for span in self.hacker_soup.find_all("span", class_="score")]
12+
self.headlines = [article.find("a", href=True).text for article in self.articles]
13+
self.links = [article.find("a", href=True)["href"] for article in self.articles]
14+
#.replace("\u00a0"," ") to replace the \xa0
15+
self.sublines = [article.text.strip().replace("\u00a0"," ") for article in self.hacker_soup.find_all("span",class_="subline")]
16+
17+
#On the Y Combinator news they add job positions and this is the code to get extra subline and will assign 0 points for those articles
18+
extra_subtext = [td.text.strip("\n") for td in self.hacker_soup.find_all("td", class_="subtext") if "comment" not in td.text]
19+
extra_index = 0
20+
for headline in self.headlines:
21+
if "YC" in headline and "Hiring" in headline:
22+
index = self.headlines.index(headline)
23+
self.points.insert(index,0)
24+
self.sublines.insert(index,extra_subtext[extra_index])
25+
extra_index += 1
26+
27+
# This will find how many comments there are for each article
28+
self.comments = []
29+
for subline in self.sublines:
30+
splits = [split.strip() for split in subline.split("|")]
31+
if "comments" in splits[-1]:
32+
last_item = int(splits[-1].strip("comments").strip())
33+
self.comments.append(last_item)
34+
elif "comment" in splits[-1]:
35+
last_item = int(splits[-1].strip("comment").strip())
36+
self.comments.append(last_item)
37+
else:
38+
self.comments.append(0)
39+
40+
41+
#This method will show all the top 30 current headlines on the Website
42+
def show_all(self):
43+
for i in range(len(self.headlines)):
44+
headline = self.headlines[i]
45+
link = self.links[i]
46+
points = self.points[i]
47+
print(f"{i+1}. {headline}")
48+
print(link)
49+
print(self.sublines[i])
50+
print()
51+
52+
#This method will show the news with the most points
53+
def most_points(self):
54+
greatest = max(self.points)
55+
index = self.points.index(greatest)
56+
print(f"{self.headlines[index]}")
57+
print(self.links[index])
58+
print(self.sublines[index])
59+
print()
60+
61+
# This method will show the article with the most comments
62+
def most_comments(self):
63+
greatest = max(self.comments)
64+
index = self.comments.index(greatest)
65+
print(f"{self.headlines[index]}")
66+
print(self.links[index])
67+
print(self.sublines[index])
68+
print()
69+
70+
#This method will show the top headline
71+
def show_first(self):
72+
print(f"{self.headlines[0]}")
73+
print(self.links[0])
74+
print(self.sublines[0])
75+
print()
76+
77+
78+
79+
80+
if __name__ == "__main__":
81+
yc = YCombinator()
82+
yc.show_all()
83+
yc.show_first()
84+
yc.most_points()
85+
yc.most_comments()

0 commit comments

Comments
 (0)