Skip to content

Commit

Permalink
[CLEANUP]
Browse files Browse the repository at this point in the history
  • Loading branch information
Your Name committed Sep 11, 2024
1 parent 29cde44 commit 2edb48c
Show file tree
Hide file tree
Showing 6 changed files with 198 additions and 1,322 deletions.
16 changes: 8 additions & 8 deletions agent_workspace/Medical-Summarization-Agent_state.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion example.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from medinsight.agent import MedInsightPro

# Initialize the MedInsight Pro agent
agent = MedInsightPro()
agent = MedInsightPro(max_articles=4)

# Run a query to summarize the latest medical research on COVID-19 treatments
output = agent.run("COVID-19 treatments")
Expand Down
3 changes: 2 additions & 1 deletion medinsight/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from medinsight.agent import MedInsightPro
from medinsight.pub_med import query_pubmed_with_abstract

__all__ = ["MedInsightPro"]
__all__ = ["MedInsightPro", "query_pubmed_with_abstract"]
32 changes: 23 additions & 9 deletions medinsight/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from swarms import OpenAIChat
from swarms import Agent
from dotenv import load_dotenv
from medinsight.pub_med import query_pubmed_with_abstract

load_dotenv()

Expand Down Expand Up @@ -81,23 +82,25 @@ def __init__(
semantic_scholar_api_key: str = None,
system_prompt: str = med_sys_prompt,
agent: Agent = agent,
max_articles: int = 10,
):
self.pubmed_api_key = pubmed_api_key
self.semantic_scholar_api_key = semantic_scholar_api_key
self.system_prompt = system_prompt
self.agent = agent
self.max_articles = max_articles

# Initialize the metadata history log
self.metadata_log: List[MedInsightMetadata] = []

# Function to access PubMed data
def fetch_pubmed_data(self, query, max_results=10):
def fetch_pubmed_data(self, query: str):
logger.info(f"Fetching data from PubMed for query: {query}")
url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
params = {
"db": "pubmed",
"term": query,
"retmax": max_results,
"retmax": self.max_articles,
"api_key": self.pubmed_api_key,
"retmode": "json",
}
Expand All @@ -116,11 +119,14 @@ def fetch_pubmed_data(self, query, max_results=10):
fetch_response = requests.get(
fetch_url, params=fetch_params
)
return fetch_response.json()

return json.dumps(fetch_response.json())
return {}

# Function to access Semantic Scholar data
def fetch_semantic_scholar_data(self, query, max_results=10):
def fetch_semantic_scholar_data(
self, query: str, max_results: int = 10
):
logger.info(
f"Fetching data from Semantic Scholar for query: {query}"
)
Expand All @@ -134,13 +140,15 @@ def fetch_semantic_scholar_data(self, query, max_results=10):
def run(self, task: str):
logger.info(f"Running MedInsightPro agent for task: {task}")
status = "success"
pubmed_data, semantic_scholar_data = {}, {}
# pubmed_data, semantic_scholar_data = {}, {}
combined_summary = ""

try:
# Fetch data from PubMed
if self.pubmed_api_key:
pubmed_data = self.fetch_pubmed_data(task)
pubmed_data = query_pubmed_with_abstract(
query=task, max_articles=self.max_articles
)
logger.info(f"PubMed data: {pubmed_data}")

# Fetch data from Semantic Scholar
Expand All @@ -150,20 +158,26 @@ def run(self, task: str):
)

# Summarize data with GPT-4
combined_summary_input = f"PubMed Data: {pubmed_data}\nSemantic Scholar Data: {semantic_scholar_data}"
# combined_summary_input = f"PubMed Data: {pubmed_data}\nSemantic Scholar Data: {semantic_scholar_data}"
if pubmed_data:
combined_summary_input = pubmed_data
else:
combined_summary_input = semantic_scholar_data

combined_summary = self.agent.run(combined_summary_input)
logger.info(f"Summarization completed for task: {task}")
except Exception as e:
logger.error(
f"Error during processing task: {task}. Error: {e}"
)
status = "failure"
raise e

# Log metadata
metadata = MedInsightMetadata(
query=task,
pubmed_results=pubmed_data,
semantic_scholar_results=semantic_scholar_data,
# pubmed_results=pubmed_data,
# semantic_scholar_results=semantic_scholar_data,
combined_summary=combined_summary,
status=status,
)
Expand Down
161 changes: 161 additions & 0 deletions medinsight/pub_med.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
import os
from Bio import Entrez
from loguru import logger
from dotenv import load_dotenv
from typing import Optional

# Load environment variables from .env file
load_dotenv()

# Check if email is set in the environment
ENTREZ_EMAIL = os.getenv("ENTREZ_EMAIL")

if not ENTREZ_EMAIL:
raise EnvironmentError(
"ENTREZ_EMAIL is not set in the environment. Please set it in your .env file."
)

Entrez.email = ENTREZ_EMAIL # Set email for Entrez queries

logger.add("pubmed_query.log", rotation="1 MB") # Rotating log file


def query_pubmed_with_abstract(
query: str,
max_articles: int = 10,
start_date: Optional[str] = None,
end_date: Optional[str] = None,
journal: Optional[str] = None,
author: Optional[str] = None,
):
"""
Query PubMed for articles and return their title, authors, abstract, etc.
Args:
query (str): The search query.
max_articles (int): Maximum number of articles to retrieve.
start_date (Optional[str]): Start date for filtering (e.g., "2020/01/01").
end_date (Optional[str]): End date for filtering (e.g., "2023/12/31").
journal (Optional[str]): Filter by journal name.
author (Optional[str]): Filter by author name.
Returns:
List of dict: A list of dictionaries containing article info.
"""
try:
# Build the search query with optional filters
search_query = query
if journal:
search_query += f' AND "{journal}"[Journal]'
if author:
search_query += f" AND {author}[Author]"
if start_date and end_date:
search_query += f" AND ({start_date}[Date - Publication] : {end_date}[Date - Publication])"

logger.info(f"Querying PubMed with search: {search_query}")

# Fetch search results from PubMed
handle = Entrez.esearch(
db="pubmed", term=search_query, retmax=max_articles
)
record = Entrez.read(handle)
handle.close()

id_list = record["IdList"]
logger.info(
f"Found {len(id_list)} articles for query: {search_query}"
)

if not id_list:
logger.warning("No articles found.")
return []

# Fetch article details (XML format)
handle = Entrez.efetch(
db="pubmed",
id=",".join(id_list),
rettype="xml",
retmode="text",
)
articles = Entrez.read(handle)
handle.close()

article_list = []

# Extract information from articles
for article in articles["PubmedArticle"]:
article_data = {}
medline_citation = article.get("MedlineCitation", {})
article_metadata = medline_citation.get("Article", {})

article_data["Title"] = article_metadata.get(
"ArticleTitle", "N/A"
)
article_data["PMID"] = medline_citation.get("PMID", "N/A")
article_data["Authors"] = [
(
f"{author['LastName']} {author.get('Initials', '')}"
if "LastName" in author
else "Unknown Author"
)
for author in article_metadata.get("AuthorList", [])
]
article_data["Source"] = article_metadata.get(
"Journal", {}
).get("Title", "N/A")
article_data["PublicationDate"] = (
article_metadata.get("Journal", {})
.get("JournalIssue", {})
.get("PubDate", "N/A")
)

# Extract abstract if available
abstract = article_metadata.get("Abstract", {}).get(
"AbstractText", []
)
article_data["Abstract"] = (
" ".join(str(part) for part in abstract)
if abstract
else "N/A"
)

article_list.append(article_data)

logger.info(
f"Successfully retrieved {len(article_list)} articles."
)
# Output the results
# Output the results as a single string
merged_string = ""
for (
article
) in article_list: # Changed from articles to article_list
title = f"Title: {article['Title']}"
pmid = f"PMID: {article['PMID']}"
authors = f"Authors: {article['Authors']}"
source = f"Source: {article['Source']}"
publication_date = (
f"Publication Date: {article['PublicationDate']}"
)
abstract = f"Abstract: {article['Abstract']}"
merged_string += f"{title}\n{pmid}\n{authors}\n{source}\n{publication_date}\n{abstract}\n" # Concatenate to merged_string
merged_string += "-" * 40 + "\n" # Add separator
merged_string += "\n"

# print(merged_string) # Print the final merged string
return merged_string
except Exception as e:
logger.exception(
f"An error occurred during the PubMed query: {e}"
)
return []


# # Example usage with more search features
# articles = query_pubmed_with_abstract(
# query="deep learning in medical imaging",
# max_articles=20,
# )


# print(articles)
Loading

0 comments on commit 2edb48c

Please sign in to comment.