-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
128 lines (117 loc) · 4.64 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import sent_tokenize, word_tokenize
from bs4 import BeautifulSoup
import requests
from googlesearch import search
from collections import Counter
from heapq import nlargest
def search_for_articles(query, num_articles=4):
try:
search_results = search(query, num=num_articles, stop=num_articles)
return search_results
except Exception as e:
print(f"Error occurred during article search: {e}")
return []
def get_article_text(url):
try:
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
paragraphs = soup.find_all('p')
article_text = ''
for paragraph in paragraphs:
article_text += paragraph.get_text()
return article_text
except Exception as e:
print(f"Error occurred while fetching article text from {url}: {e}")
return ''
def summarize_article(article_text, target_length=150):
try:
sentences = sent_tokenize(article_text)
words = word_tokenize(article_text.lower())
stop_words = set(stopwords.words('english'))
word_freq = {}
for word in words:
if word not in stop_words:
if word not in word_freq:
word_freq[word] = 1
else:
word_freq[word] += 1
sentence_scores = {}
for sentence in sentences:
for word in word_tokenize(sentence.lower()):
if word in word_freq:
if len(sentence.split(' ')) < 30:
if sentence not in sentence_scores:
sentence_scores[sentence] = word_freq[word]
else:
sentence_scores[sentence] += word_freq[word]
summarized_sentences = []
summary_length = 0
for sentence in sentences:
if summary_length + len(sentence.split()) <= target_length:
summarized_sentences.append(sentence)
summary_length += len(sentence.split())
else:
break
summarized_article = ' '.join(summarized_sentences)
return summarized_article
except Exception as e:
print(f"Error occurred while summarizing article: {e}")
return ''
def get_keywords(article_text, num_keywords=5):
try:
words = word_tokenize(article_text.lower())
stop_words = set(stopwords.words('english'))
filtered_words = [word for word in words if word not in stop_words]
word_freq = Counter(filtered_words)
keywords = nlargest(num_keywords, word_freq, key=word_freq.get)
return keywords
except Exception as e:
print(f"Error occurred while extracting keywords: {e}")
return []
def suggest_articles(articles, current_text, num_suggestions=4):
try:
current_keywords = get_keywords(current_text)
suggested_articles = []
for article in articles:
article_text = get_article_text(article)
article_keywords = get_keywords(article_text)
similarity_score = len(set(current_keywords) & set(article_keywords))
suggested_articles.append((article, similarity_score))
suggested_articles.sort(key=lambda x: x[1], reverse=True)
return [article[0] for article in suggested_articles[:num_suggestions]]
except Exception as e:
print(f"Error occurred while suggesting articles: {e}")
return []
def main():
try:
nltk.download('punkt')
nltk.download('stopwords')
query = input("Enter the topic to search for: ")
articles = search_for_articles(query)
if articles:
summary = ''
article_links = []
for article in articles:
article_text = get_article_text(article)
if article_text:
summary += summarize_article(article_text) + '\n'
article_links.append(article)
if summary:
print("Summarized Article:\n", summary)
print("Links to articles used:")
for link in article_links:
print(link)
print("Suggested Articles:")
suggested_articles = suggest_articles(articles, summary)
for suggested_article in suggested_articles:
print(suggested_article)
else:
print("No relevant articles found.")
else:
print("No relevant articles found.")
except Exception as e:
print(f"An error occurred: {e}")
if __name__ == "__main__":
main()