Skip to content

Commit 1d969b0

Browse files
Jigsawcopybara-github
authored andcommitted
Internal change
GitOrigin-RevId: df69dc309b11c001eb7d0ec4848861e60963218d
1 parent 43d602a commit 1d969b0

File tree

2 files changed

+85
-10
lines changed

2 files changed

+85
-10
lines changed

src/tasks/summarization_subtasks/overview.test.ts

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
import { removeEmptyLines } from "./overview";
15+
import { isMdListValid, removeEmptyLines } from "./overview";
1616

1717
describe("IntroTest", () => {
1818
it("should remove empty lines", async () => {
@@ -89,3 +89,42 @@ describe("IntroTest", () => {
8989
).toEqual("* Item 1\n* Item 2\n* Item 3");
9090
});
9191
});
92+
93+
describe("isMdListValid", () => {
94+
it("should return false if a line does not match the expected format", async () => {
95+
expect(
96+
isMdListValid("* **Topic 1**: Summary\n* **Topic 2**: Summary\nTopic 3: Summary", [
97+
"Topic 1",
98+
"Topic 2",
99+
"Topic 3",
100+
])
101+
).toEqual(false);
102+
});
103+
104+
it("should return false if some topic names don't match the expected order", async () => {
105+
expect(
106+
isMdListValid("* **Topic 1**: Summary\n* **Topic 3**: Summary\n* **Topic 2**: Summary", [
107+
"Topic 1",
108+
"Topic 2",
109+
"Topic 3",
110+
])
111+
).toEqual(false);
112+
});
113+
114+
it("should return true if all lines match the expected format and topic order", async () => {
115+
expect(
116+
isMdListValid("* **Topic 1**: Summary\n* **Topic 2**: Summary\n* **Topic 3**: Summary", [
117+
"Topic 1",
118+
"Topic 2",
119+
"Topic 3",
120+
])
121+
).toEqual(true);
122+
expect(
123+
isMdListValid("* **Topic 1:** Summary\n* **Topic 2:** Summary\n* **Topic 3:** Summary", [
124+
"Topic 1",
125+
"Topic 2",
126+
"Topic 3",
127+
])
128+
).toEqual(true);
129+
});
130+
});

src/tasks/summarization_subtasks/overview.ts

Lines changed: 45 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,12 @@
1818
import { SummaryStats, TopicStats } from "../../stats/summary_stats";
1919
import { SummaryContent, Summary } from "../../types";
2020
import { RecursiveSummary } from "./recursive_summarization";
21-
import { getAbstractPrompt, decimalToPercent, filterSummaryContent } from "../../sensemaker_utils";
21+
import {
22+
getAbstractPrompt,
23+
decimalToPercent,
24+
filterSummaryContent,
25+
retryCall,
26+
} from "../../sensemaker_utils";
2227

2328
function oneShotInstructions(topicNames: string[]) {
2429
return (
@@ -97,14 +102,23 @@ export class OverviewSummary extends RecursiveSummary<OverviewInput> {
97102
` </topicsSummary>`,
98103
this.additionalContext
99104
);
100-
const result = await this.model.generateText(prompt);
101-
// Check to make sure that every single topicName in topicNames is in the list, and raise an error if not
102-
for (const topicName of topicNames) {
103-
if (!result.includes(topicName)) {
104-
throw new Error(`Overview summary is missing topic name: ${topicName}`);
105-
}
106-
}
107-
return removeEmptyLines(result);
105+
return await retryCall(
106+
async function (model, prompt) {
107+
let result = await model.generateText(prompt);
108+
result = removeEmptyLines(result);
109+
if (!result) {
110+
throw new Error(`Overview summary failed to conform to markdown list format.`);
111+
} else {
112+
return result;
113+
}
114+
},
115+
(result) => isMdListValid(result, topicNames),
116+
3,
117+
"Overview summary failed to conform to markdown list format, or did not include all topic descriptions exactly as intended.",
118+
undefined,
119+
[this.model, prompt],
120+
[]
121+
);
108122
}
109123

110124
/**
@@ -171,3 +185,25 @@ function filterSectionsForOverview(topicSummary: SummaryContent): SummaryContent
171185
export function removeEmptyLines(mdList: string): string {
172186
return mdList.replace(/\s*[\r\n]+\s*/g, "\n").trim();
173187
}
188+
189+
/**
190+
* This function processes the input markdown list string, ensuring that it matches
191+
* the expected format, normalizing it with `removeEmptyLines`, and ensuring that each
192+
* lines matches the expected format (* **bold topic**: summary...)
193+
*/
194+
export function isMdListValid(mdList: string, topicNames: string[]): boolean {
195+
const lines = mdList.split("\n");
196+
for (const [index, line] of lines.entries()) {
197+
// Check to make sure that every single topicName in topicNames is in the list, and in the right order
198+
if (!line.includes(topicNames[index])) {
199+
console.log("Topic name not found in list:", topicNames[index]);
200+
return false;
201+
}
202+
// Check to make sure that every line matches the expected format
203+
if (!line.match(/^[\*\-]\s\*\*.*:?\*\*:?\s/) && !line.match(/^[\*\-]\s\_\_.*:?\_\_:?\s/)) {
204+
console.log("Line does not match expected format:", line);
205+
return false;
206+
}
207+
}
208+
return true;
209+
}

0 commit comments

Comments
 (0)