Skip to content

Commit 2edb48c

Browse files
author
Your Name
committed
[CLEANUP]
1 parent 29cde44 commit 2edb48c

File tree

6 files changed

+198
-1322
lines changed

6 files changed

+198
-1322
lines changed

agent_workspace/Medical-Summarization-Agent_state.json

Lines changed: 8 additions & 8 deletions
Large diffs are not rendered by default.

example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from medinsight.agent import MedInsightPro
22

33
# Initialize the MedInsight Pro agent
4-
agent = MedInsightPro()
4+
agent = MedInsightPro(max_articles=4)
55

66
# Run a query to summarize the latest medical research on COVID-19 treatments
77
output = agent.run("COVID-19 treatments")

medinsight/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
from medinsight.agent import MedInsightPro
2+
from medinsight.pub_med import query_pubmed_with_abstract
23

3-
__all__ = ["MedInsightPro"]
4+
__all__ = ["MedInsightPro", "query_pubmed_with_abstract"]

medinsight/agent.py

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from swarms import OpenAIChat
99
from swarms import Agent
1010
from dotenv import load_dotenv
11+
from medinsight.pub_med import query_pubmed_with_abstract
1112

1213
load_dotenv()
1314

@@ -81,23 +82,25 @@ def __init__(
8182
semantic_scholar_api_key: str = None,
8283
system_prompt: str = med_sys_prompt,
8384
agent: Agent = agent,
85+
max_articles: int = 10,
8486
):
8587
self.pubmed_api_key = pubmed_api_key
8688
self.semantic_scholar_api_key = semantic_scholar_api_key
8789
self.system_prompt = system_prompt
8890
self.agent = agent
91+
self.max_articles = max_articles
8992

9093
# Initialize the metadata history log
9194
self.metadata_log: List[MedInsightMetadata] = []
9295

9396
# Function to access PubMed data
94-
def fetch_pubmed_data(self, query, max_results=10):
97+
def fetch_pubmed_data(self, query: str):
9598
logger.info(f"Fetching data from PubMed for query: {query}")
9699
url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
97100
params = {
98101
"db": "pubmed",
99102
"term": query,
100-
"retmax": max_results,
103+
"retmax": self.max_articles,
101104
"api_key": self.pubmed_api_key,
102105
"retmode": "json",
103106
}
@@ -116,11 +119,14 @@ def fetch_pubmed_data(self, query, max_results=10):
116119
fetch_response = requests.get(
117120
fetch_url, params=fetch_params
118121
)
119-
return fetch_response.json()
122+
123+
return json.dumps(fetch_response.json())
120124
return {}
121125

122126
# Function to access Semantic Scholar data
123-
def fetch_semantic_scholar_data(self, query, max_results=10):
127+
def fetch_semantic_scholar_data(
128+
self, query: str, max_results: int = 10
129+
):
124130
logger.info(
125131
f"Fetching data from Semantic Scholar for query: {query}"
126132
)
@@ -134,13 +140,15 @@ def fetch_semantic_scholar_data(self, query, max_results=10):
134140
def run(self, task: str):
135141
logger.info(f"Running MedInsightPro agent for task: {task}")
136142
status = "success"
137-
pubmed_data, semantic_scholar_data = {}, {}
143+
# pubmed_data, semantic_scholar_data = {}, {}
138144
combined_summary = ""
139145

140146
try:
141147
# Fetch data from PubMed
142148
if self.pubmed_api_key:
143-
pubmed_data = self.fetch_pubmed_data(task)
149+
pubmed_data = query_pubmed_with_abstract(
150+
query=task, max_articles=self.max_articles
151+
)
144152
logger.info(f"PubMed data: {pubmed_data}")
145153

146154
# Fetch data from Semantic Scholar
@@ -150,20 +158,26 @@ def run(self, task: str):
150158
)
151159

152160
# Summarize data with GPT-4
153-
combined_summary_input = f"PubMed Data: {pubmed_data}\nSemantic Scholar Data: {semantic_scholar_data}"
161+
# combined_summary_input = f"PubMed Data: {pubmed_data}\nSemantic Scholar Data: {semantic_scholar_data}"
162+
if pubmed_data:
163+
combined_summary_input = pubmed_data
164+
else:
165+
combined_summary_input = semantic_scholar_data
166+
154167
combined_summary = self.agent.run(combined_summary_input)
155168
logger.info(f"Summarization completed for task: {task}")
156169
except Exception as e:
157170
logger.error(
158171
f"Error during processing task: {task}. Error: {e}"
159172
)
160173
status = "failure"
174+
raise e
161175

162176
# Log metadata
163177
metadata = MedInsightMetadata(
164178
query=task,
165-
pubmed_results=pubmed_data,
166-
semantic_scholar_results=semantic_scholar_data,
179+
# pubmed_results=pubmed_data,
180+
# semantic_scholar_results=semantic_scholar_data,
167181
combined_summary=combined_summary,
168182
status=status,
169183
)

medinsight/pub_med.py

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
import os
2+
from Bio import Entrez
3+
from loguru import logger
4+
from dotenv import load_dotenv
5+
from typing import Optional
6+
7+
# Load environment variables from .env file
8+
load_dotenv()
9+
10+
# Check if email is set in the environment
11+
ENTREZ_EMAIL = os.getenv("ENTREZ_EMAIL")
12+
13+
if not ENTREZ_EMAIL:
14+
raise EnvironmentError(
15+
"ENTREZ_EMAIL is not set in the environment. Please set it in your .env file."
16+
)
17+
18+
Entrez.email = ENTREZ_EMAIL # Set email for Entrez queries
19+
20+
logger.add("pubmed_query.log", rotation="1 MB") # Rotating log file
21+
22+
23+
def query_pubmed_with_abstract(
24+
query: str,
25+
max_articles: int = 10,
26+
start_date: Optional[str] = None,
27+
end_date: Optional[str] = None,
28+
journal: Optional[str] = None,
29+
author: Optional[str] = None,
30+
):
31+
"""
32+
Query PubMed for articles and return their title, authors, abstract, etc.
33+
34+
Args:
35+
query (str): The search query.
36+
max_articles (int): Maximum number of articles to retrieve.
37+
start_date (Optional[str]): Start date for filtering (e.g., "2020/01/01").
38+
end_date (Optional[str]): End date for filtering (e.g., "2023/12/31").
39+
journal (Optional[str]): Filter by journal name.
40+
author (Optional[str]): Filter by author name.
41+
42+
Returns:
43+
List of dict: A list of dictionaries containing article info.
44+
"""
45+
try:
46+
# Build the search query with optional filters
47+
search_query = query
48+
if journal:
49+
search_query += f' AND "{journal}"[Journal]'
50+
if author:
51+
search_query += f" AND {author}[Author]"
52+
if start_date and end_date:
53+
search_query += f" AND ({start_date}[Date - Publication] : {end_date}[Date - Publication])"
54+
55+
logger.info(f"Querying PubMed with search: {search_query}")
56+
57+
# Fetch search results from PubMed
58+
handle = Entrez.esearch(
59+
db="pubmed", term=search_query, retmax=max_articles
60+
)
61+
record = Entrez.read(handle)
62+
handle.close()
63+
64+
id_list = record["IdList"]
65+
logger.info(
66+
f"Found {len(id_list)} articles for query: {search_query}"
67+
)
68+
69+
if not id_list:
70+
logger.warning("No articles found.")
71+
return []
72+
73+
# Fetch article details (XML format)
74+
handle = Entrez.efetch(
75+
db="pubmed",
76+
id=",".join(id_list),
77+
rettype="xml",
78+
retmode="text",
79+
)
80+
articles = Entrez.read(handle)
81+
handle.close()
82+
83+
article_list = []
84+
85+
# Extract information from articles
86+
for article in articles["PubmedArticle"]:
87+
article_data = {}
88+
medline_citation = article.get("MedlineCitation", {})
89+
article_metadata = medline_citation.get("Article", {})
90+
91+
article_data["Title"] = article_metadata.get(
92+
"ArticleTitle", "N/A"
93+
)
94+
article_data["PMID"] = medline_citation.get("PMID", "N/A")
95+
article_data["Authors"] = [
96+
(
97+
f"{author['LastName']} {author.get('Initials', '')}"
98+
if "LastName" in author
99+
else "Unknown Author"
100+
)
101+
for author in article_metadata.get("AuthorList", [])
102+
]
103+
article_data["Source"] = article_metadata.get(
104+
"Journal", {}
105+
).get("Title", "N/A")
106+
article_data["PublicationDate"] = (
107+
article_metadata.get("Journal", {})
108+
.get("JournalIssue", {})
109+
.get("PubDate", "N/A")
110+
)
111+
112+
# Extract abstract if available
113+
abstract = article_metadata.get("Abstract", {}).get(
114+
"AbstractText", []
115+
)
116+
article_data["Abstract"] = (
117+
" ".join(str(part) for part in abstract)
118+
if abstract
119+
else "N/A"
120+
)
121+
122+
article_list.append(article_data)
123+
124+
logger.info(
125+
f"Successfully retrieved {len(article_list)} articles."
126+
)
127+
# Output the results
128+
# Output the results as a single string
129+
merged_string = ""
130+
for (
131+
article
132+
) in article_list: # Changed from articles to article_list
133+
title = f"Title: {article['Title']}"
134+
pmid = f"PMID: {article['PMID']}"
135+
authors = f"Authors: {article['Authors']}"
136+
source = f"Source: {article['Source']}"
137+
publication_date = (
138+
f"Publication Date: {article['PublicationDate']}"
139+
)
140+
abstract = f"Abstract: {article['Abstract']}"
141+
merged_string += f"{title}\n{pmid}\n{authors}\n{source}\n{publication_date}\n{abstract}\n" # Concatenate to merged_string
142+
merged_string += "-" * 40 + "\n" # Add separator
143+
merged_string += "\n"
144+
145+
# print(merged_string) # Print the final merged string
146+
return merged_string
147+
except Exception as e:
148+
logger.exception(
149+
f"An error occurred during the PubMed query: {e}"
150+
)
151+
return []
152+
153+
154+
# # Example usage with more search features
155+
# articles = query_pubmed_with_abstract(
156+
# query="deep learning in medical imaging",
157+
# max_articles=20,
158+
# )
159+
160+
161+
# print(articles)

0 commit comments

Comments
 (0)