Skip to content

Commit 5c48684

Browse files
alyssachvastacopybara-github
authored andcommitted
Internal change
GitOrigin-RevId: b310fa8c0f9e451a318abe47860c798246816203
1 parent 4c4e3b8 commit 5c48684

File tree

2 files changed

+125
-51
lines changed

2 files changed

+125
-51
lines changed

runner-cli/runner.ts

Lines changed: 10 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -16,21 +16,22 @@
1616
// directory, and as documented in `runner_utils.ts`.
1717

1818
import { Command } from "commander";
19-
import * as fs from "fs";
20-
import { marked } from "marked";
2119
import {
2220
getCommentsFromCsv,
2321
getSummary,
24-
getTopicsFromComments,
25-
getTopicsAndSubtopics,
22+
getTopics,
23+
writeSummaryToGroundedCSV,
24+
writeSummaryToHtml,
2625
} from "./runner_utils";
27-
import { type Topic } from "../src/types";
2826

2927
async function main(): Promise<void> {
3028
// Parse command line arguments.
3129
const program = new Command();
3230
program
33-
.option("-o, --outputFile <file>", "The output file name.")
31+
.option(
32+
"-o, --outputBasename <file>",
33+
"The output basename, this will be prepended to 'summary.html' and 'summaryClaimsAndComments.csv'."
34+
)
3435
.option("-i, --inputFile <file>", "The input file name.")
3536
.option(
3637
"-a, --additionalContext <context>",
@@ -41,58 +42,17 @@ async function main(): Promise<void> {
4142
const options = program.opts();
4243

4344
const comments = await getCommentsFromCsv(options.inputFile);
44-
// check if any comments have topics before using getTopicsFromComments, otherwise, learn topics using runner_utils function
45-
let topics: Topic[];
46-
if (comments.length > 0 && comments.some((comment) => comment.topics)) {
47-
console.log("Comments already have topics. Skipping topic learning.");
48-
topics = getTopicsFromComments(comments);
49-
} else {
50-
console.log("Learning topics from comments.");
51-
topics = await getTopicsAndSubtopics(options.vertexProject, comments);
52-
}
45+
const topics = await getTopics(comments, options.vertexProject);
5346

5447
const summary = await getSummary(
5548
options.vertexProject,
5649
comments,
5750
topics,
5851
options.additionalContext
5952
);
60-
const markdownContent = summary.getText("MARKDOWN");
61-
const htmlContent = `
62-
<!DOCTYPE html>
63-
<html>
64-
<head>
65-
<title>Summary</title>
66-
<style>
67-
body {
68-
font-family: Arial, sans-serif;
69-
line-height: 1.6;
70-
max-width: 800px;
71-
margin: 0 auto;
72-
padding: 20px;
73-
}
74-
</style>
75-
${
76-
// When in DEBUG_MODE, we need to add the DataTables and jQuery libraries, and hook
77-
// into our table elements to add support for features like sorting and search.
78-
process.env.DEBUG_MODE === "true"
79-
? `
80-
<script src="https://code.jquery.com/jquery-3.7.1.js"></script>
81-
<script src="https://cdn.datatables.net/2.2.1/js/dataTables.js"></script>
82-
<link rel="stylesheet" href="https://cdn.datatables.net/2.2.1/css/dataTables.dataTables.css" />
83-
<script>$(document).ready( function () {$('table').DataTable();} )</script>
84-
`
85-
: ""
86-
}
87-
</head>
88-
<body>
89-
${marked(markdownContent)}
90-
</body>
91-
</html>`;
9253

93-
const outputPath = `${options.outputFile}.html`;
94-
fs.writeFileSync(outputPath, htmlContent);
95-
console.log(`Written summary to ${outputPath}`);
54+
writeSummaryToHtml(summary, options.outputBasename + "-summary.html");
55+
writeSummaryToGroundedCSV(summary, options.outputBasename + "-summaryClaimsAndComments.csv");
9656
}
9757

9858
main();

runner-cli/runner_utils.ts

Lines changed: 115 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,19 @@
1919

2020
import { Sensemaker } from "../src/sensemaker";
2121
import { VertexModel } from "../src/models/vertex_model";
22-
import { Summary, VoteTally, Comment, SummarizationType, Topic } from "../src/types";
22+
import {
23+
Summary,
24+
VoteTally,
25+
Comment,
26+
SummarizationType,
27+
Topic,
28+
SummaryContent,
29+
} from "../src/types";
2330
import * as path from "path";
2431
import * as fs from "fs";
2532
import { parse } from "csv-parse";
33+
import { marked } from "marked";
34+
import { createObjectCsvWriter } from "csv-writer";
2635

2736
/**
2837
* Core comment columns, sans any vote tally rows
@@ -56,6 +65,62 @@ export interface VoteTallyCsvRow {
5665
//This is a type that combines VoteTallyCsvRow and CoreCommentCsvRow
5766
export type CommentCsvRow = VoteTallyCsvRow & CoreCommentCsvRow;
5867

68+
/**
69+
* Add the text and supporting comments to statementsWithComments. Also adds nested content.
70+
* @param summaryContent the content and subcontent to add
71+
* @param allComments all the comments from the deliberation
72+
* @param statementsWithComments where to add new text and supporting comments
73+
* @returns none
74+
*/
75+
function addStatement(
76+
summaryContent: SummaryContent,
77+
allComments: Comment[],
78+
statementsWithComments: { summary: string; comments: string }[]
79+
) {
80+
if (summaryContent.subContents) {
81+
summaryContent.subContents.forEach((subContent) => {
82+
addStatement(subContent, allComments, statementsWithComments);
83+
});
84+
}
85+
86+
if (summaryContent.text.length === 0 && !summaryContent.title) {
87+
return;
88+
}
89+
let comments: Comment[] = [];
90+
if (summaryContent.citations) {
91+
comments = summaryContent.citations
92+
.map((commentId: string) => allComments.find((comment: Comment) => comment.id === commentId))
93+
.filter((comment) => comment !== undefined);
94+
}
95+
statementsWithComments.push({
96+
summary: (summaryContent.title || "") + summaryContent.text,
97+
comments: comments.map((comment) => `* [${comment.id}] ${comment.text}`).join("\n"),
98+
});
99+
}
100+
101+
/**
102+
* Outputs a CSV where each row represents a statement and its associated comments.
103+
*
104+
* @param summary the summary to split.
105+
* @param outputFilePath Path to the output CSV file that will have columns "summary" for the statement, and "comments" for the comment texts associated with that statement.
106+
*/
107+
export function writeSummaryToGroundedCSV(summary: Summary, outputFilePath: string) {
108+
const statementsWithComments: { summary: string; comments: string }[] = [];
109+
110+
for (const summaryContent of summary.contents) {
111+
addStatement(summaryContent, summary.comments, statementsWithComments);
112+
}
113+
114+
const csvWriter = createObjectCsvWriter({
115+
path: outputFilePath,
116+
header: [
117+
{ id: "summary", title: "summary" },
118+
{ id: "comments", title: "comments" },
119+
],
120+
});
121+
csvWriter.writeRecords(statementsWithComments);
122+
console.log(`Summary statements saved to ${outputFilePath}`);
123+
}
59124
/**
60125
* Identify topics and subtopics when input data has not already been categorized.
61126
* @param project The Vertex GCloud project name
@@ -99,6 +164,55 @@ export async function getSummary(
99164
);
100165
}
101166

167+
// Gets existing topics from comments or learns them if not provided.
168+
export async function getTopics(comments: Comment[], vertexProject: string): Promise<Topic[]> {
169+
if (comments.length > 0 && comments.some((comment) => comment.topics)) {
170+
console.log("Comments already have topics. Skipping topic learning.");
171+
return getTopicsFromComments(comments);
172+
} else {
173+
console.log("Learning topics from comments.");
174+
return await getTopicsAndSubtopics(vertexProject, comments);
175+
}
176+
}
177+
178+
export function writeSummaryToHtml(summary: Summary, outputFile: string) {
179+
const markdownContent = summary.getText("MARKDOWN");
180+
const htmlContent = `
181+
<!DOCTYPE html>
182+
<html>
183+
<head>
184+
<title>Summary</title>
185+
<style>
186+
body {
187+
font-family: Arial, sans-serif;
188+
line-height: 1.6;
189+
max-width: 800px;
190+
margin: 0 auto;
191+
padding: 20px;
192+
}
193+
</style>
194+
${
195+
// When in DEBUG_MODE, we need to add the DataTables and jQuery libraries, and hook
196+
// into our table elements to add support for features like sorting and search.
197+
process.env.DEBUG_MODE === "true"
198+
? `
199+
<script src="https://code.jquery.com/jquery-3.7.1.js"></script>
200+
<script src="https://cdn.datatables.net/2.2.1/js/dataTables.js"></script>
201+
<link rel="stylesheet" href="https://cdn.datatables.net/2.2.1/css/dataTables.dataTables.css" />
202+
<script>$(document).ready( function () {$('table').DataTable();} )</script>
203+
`
204+
: ""
205+
}
206+
</head>
207+
<body>
208+
${marked(markdownContent)}
209+
</body>
210+
</html>`;
211+
212+
fs.writeFileSync(outputFile, htmlContent);
213+
console.log(`Written summary to ${outputFile}`);
214+
}
215+
102216
/**
103217
* Parse a topics string from the categorization_runner.ts into a (possibly) nested topics and subtopics
104218
* array, omitting subtopics if not present in the labels.

0 commit comments

Comments
 (0)