Skip to content

Commit 99c6d80

Browse files
committed
type specific processing
1 parent 1c17553 commit 99c6d80

9 files changed

+167
-2
lines changed

src/hierarchical.py

+21-2
Original file line numberDiff line numberDiff line change
@@ -74,14 +74,31 @@ def download_file(self, file_link: str):
7474

7575
@property
7676
def prompt(self):
77+
type_specific_prompts = {
78+
"Interview": "Identify and distinguish between different speakers. Include key quotes and main discussion points.",
79+
"Keynote": "Include speaker details and their expertise. Highlight key messages and main takeaways.",
80+
"Scientific Paper": "Extract key statements, contributions, main results, and important references. Focus on methodology and findings.",
81+
"Report": "Highlight numerical results, key statistics, and main takeaways. Include significant findings and conclusions.",
82+
"Book": "Include author information, main plot points, and key character descriptions. Highlight character development and relationships.",
83+
"Presentation Slides": "Determine if this is a motivational talk, results presentation, or idea/pitch. For motivational talks, focus on key messages and call-to-action. For result presentations, emphasize numerical results and achievements. For idea/pitch presentations, highlight the core idea and value proposition."
84+
}
85+
86+
# Get type-specific prompt
87+
type_prompt = ""
88+
if self.content_types is not None and hasattr(self, '_content_type'):
89+
content_type = self._content_type
90+
if content_type in type_specific_prompts:
91+
type_prompt = f"\nFor this {content_type}: {type_specific_prompts[content_type]}"
92+
7793
return (
7894
f"Create a comprehensive summary of the provided content and return the result as JSON.\n"
7995
+ (
8096
f"The type of the provided content is specified in [CONTENT TYPE].\n"
8197
if self.content_types is not None
8298
else ""
8399
)
84-
+ "The summary must be in the language specified in [[CONTENT LANGUAGE]], regardless of the source material.\n"
100+
+ type_prompt # Add the type-specific prompt
101+
+ "\nThe summary must be in the language specified in [[CONTENT LANGUAGE]], regardless of the source material.\n"
85102
+ f"Extract important facts from the text and return them in a list in JSON format as 'facts'.\n"
86103
+ f"[[IMPORTANT]] Ensure that the summary is consistent with the facts. Do not add information not contained in the text.\n"
87104
+ r'JSON schema: {"summary": "string", "facts": "array of strings"}'
@@ -246,7 +263,9 @@ def validate_type(cls, v):
246263
seed=self.seed,
247264
)
248265

249-
# add to overall usage
266+
# Store the content type for use in prompt
267+
self._content_type = res.type
268+
250269
self.add_usage(usage)
251270
return res.type
252271
else:

src/pytest.ini

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
[pytest]
2+
markers =
3+
interview: mark a test as related to interview summaries
4+
keynote: mark a test as related to keynote summaries
5+
paper: mark a test as related to scientific paper summaries
6+
report: mark a test as related to report summaries
7+
book: mark a test as related to book summaries
8+
presentation: mark a test as related to presentation slides summaries

src/tests.py

+138
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
import pytest
2+
from symai import Symbol
3+
from hierarchical import HierarchicalSummary
4+
5+
CONTENT_TYPES = [
6+
"Interview",
7+
"Keynote",
8+
"Scientific Paper",
9+
"Report",
10+
"Book",
11+
"Presentation Slides"
12+
]
13+
14+
@pytest.mark.interview
15+
@pytest.mark.parametrize("file_path", ["../testfiles/interview_transcript.pdf"])
16+
def test_interview_summary(file_path):
17+
summarizer = HierarchicalSummary(
18+
file_link=file_path,
19+
content_types=CONTENT_TYPES
20+
)
21+
summary, _ = summarizer()
22+
23+
# Verify content type
24+
assert summary.type == "Interview"
25+
26+
# Verify speakers are identified
27+
sym = Symbol(f"Does this summary identify different speakers and their key discussion points? Return yes or no.\n{summary.summary}").interpret()
28+
assert "yes" in sym.lower() or "true" in sym.lower()
29+
30+
@pytest.mark.keynote
31+
@pytest.mark.parametrize("file_path", ["../testfiles/keynote_presentation.pdf"])
32+
def test_keynote_summary(file_path):
33+
summarizer = HierarchicalSummary(
34+
file_link=file_path,
35+
content_types=CONTENT_TYPES
36+
)
37+
summary, _ = summarizer()
38+
39+
# Verify content type
40+
assert summary.type == "Keynote"
41+
42+
# Verify speaker details and key messages
43+
sym = Symbol(f"Does this summary include speaker details, their expertise, and key messages? Return yes or no.\n{summary.summary}").interpret()
44+
assert "yes" in sym.lower() or "true" in sym.lower()
45+
46+
@pytest.mark.paper
47+
@pytest.mark.parametrize("file_path", ["../testfiles/symbolicai_no_refs.pdf"])
48+
def test_scientific_paper_summary(file_path):
49+
summarizer = HierarchicalSummary(
50+
file_link=file_path,
51+
content_types=CONTENT_TYPES
52+
)
53+
summary, _ = summarizer()
54+
55+
# Verify content type
56+
assert summary.type == "Scientific Paper"
57+
58+
# Verify methodology and findings
59+
sym = Symbol(f"Does this summary include methodology details and research findings? Return yes or no.\n{summary.summary}").interpret()
60+
assert "yes" in sym.lower() or "true" in sym.lower()
61+
62+
@pytest.mark.report
63+
@pytest.mark.parametrize("file_path", ["../testfiles/google_report.pdf"])
64+
def test_report_summary(file_path):
65+
summarizer = HierarchicalSummary(
66+
file_link=file_path,
67+
content_types=CONTENT_TYPES
68+
)
69+
summary, _ = summarizer()
70+
71+
# Verify content type
72+
assert summary.type == "Report"
73+
74+
# Verify numerical results and statistics
75+
sym = Symbol(f"Does this summary include specific numerical results and statistics? Return yes or no.\n{summary.summary}").interpret()
76+
assert "yes" in sym.lower() or "true" in sym.lower()
77+
78+
@pytest.mark.book
79+
@pytest.mark.parametrize("file_path", ["../testfiles/book.pdf"])
80+
def test_book_summary(file_path):
81+
summarizer = HierarchicalSummary(
82+
file_link=file_path,
83+
content_types=CONTENT_TYPES
84+
)
85+
summary, _ = summarizer()
86+
87+
# Verify content type
88+
assert summary.type == "Book"
89+
90+
# Verify character descriptions and relationships
91+
sym = Symbol(f"Does this summary include character descriptions and their relationships? Return yes or no.\n{summary.summary}").interpret()
92+
assert "yes" in sym.lower() or "true" in sym.lower()
93+
94+
@pytest.mark.presentation
95+
@pytest.mark.parametrize("file_path", ["../testfiles/pitch_deck.pdf"])
96+
def test_pitch_presentation_slides_summary(file_path):
97+
summarizer = HierarchicalSummary(
98+
file_link=file_path,
99+
content_types=CONTENT_TYPES
100+
)
101+
summary, _ = summarizer()
102+
103+
# Verify content type
104+
assert summary.type == "Presentation Slides"
105+
106+
# Verify core idea and value proposition
107+
sym = Symbol(f"Does this summary include the core idea and value proposition? Return yes or no.\n{summary.summary}").interpret()
108+
assert "yes" in sym.lower() or "true" in sym.lower()
109+
110+
# @pytest.mark.parametrize("file_path", ["path/to/presentation/files"])
111+
# def test_motivational_presentation_slides_summary(file_path):
112+
# summarizer = HierarchicalSummary(
113+
# file_link=file_path,
114+
# content_types=CONTENT_TYPES
115+
# )
116+
# summary, _ = summarizer()
117+
118+
# # Verify content type
119+
# assert summary.type == "Presentation Slides"
120+
121+
# # Verify key messages and call-to-action
122+
# sym = Symbol("Does this summary include key messages and a call-to-action? {summary.summary}")
123+
# assert "yes" in sym().lower() or "true" in sym().lower()
124+
125+
# @pytest.mark.parametrize("file_path", ["path/to/presentation/files"])
126+
# def test_results_presentation_slides_summary(file_path):
127+
# summarizer = HierarchicalSummary(
128+
# file_link=file_path,
129+
# content_types=CONTENT_TYPES
130+
# )
131+
# summary, _ = summarizer()
132+
133+
# # Verify content type
134+
# assert summary.type == "Presentation Slides"
135+
136+
# # Verify numerical results and achievements
137+
# sym = Symbol("Does this summary include numerical results and achievements? {summary.summary}")
138+
# assert "yes" in sym().lower() or "true" in sym().lower()

testfiles/book.pdf

585 KB
Binary file not shown.

testfiles/google_report.pdf

506 KB
Binary file not shown.

testfiles/interview_transcript.pdf

477 KB
Binary file not shown.

testfiles/keynote_presentation.pdf

11.2 MB
Binary file not shown.

testfiles/pitch_deck.pdf

129 KB
Binary file not shown.

testfiles/symbolicai_no_refs.pdf

780 KB
Binary file not shown.

0 commit comments

Comments
 (0)