Skip to content

Commit 56e8190

Browse files
alyssachvastacopybara-github
authored andcommitted
Internal change
GitOrigin-RevId: f76835a1d05f78b6cf5d881ad958fe6ba45b07a5
1 parent 9438d37 commit 56e8190

File tree

2 files changed

+93
-23
lines changed

2 files changed

+93
-23
lines changed

src/stats/summary_stats.ts

Lines changed: 92 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -115,15 +115,97 @@ export abstract class SummaryStats {
115115
}
116116

117117
/**
118-
* Sorts topics and their subtopics based on comment count in descending order, with
119-
* "Other" topics and subtopics going last.
118+
* Allow comments to only be used in one subtopic. They are kept in the smallest subtopic only.
119+
* @param topicStats the stats by topic to filter repeat comments out of
120+
* @returns the TopicStats with each comment only appearing once.
121+
*/
122+
private useCommentOnlyOnce(topicStats: TopicStats[]): TopicStats[] {
123+
let alreadyUsedComments = new Set<string>();
124+
// Consider the subtopics with the least comments first so they will keep the comment and all
125+
// future duplicates will be removed.
126+
const reversedTopicStats = this.sortTopicStats(topicStats, false);
127+
128+
const filteredTopicStats: TopicStats[] = [];
129+
for (const topic of reversedTopicStats) {
130+
const newTopicStats: TopicStats = {
131+
name: topic.name,
132+
commentCount: topic.commentCount,
133+
summaryStats: topic.summaryStats,
134+
subtopicStats: [],
135+
};
136+
if (topic.subtopicStats) {
137+
for (const subtopic of topic.subtopicStats) {
138+
// Ignore repeats in the "Other":"Other" category, these are a special case and should
139+
// eventually be handled separately.
140+
if (topic.name === "Other" && subtopic.name === "Other") {
141+
newTopicStats.subtopicStats!.push(subtopic);
142+
continue;
143+
}
144+
// Remove comments that have already been used previously and regenerate the
145+
// SummmaryStats object.
146+
const unusedComments = subtopic.summaryStats.comments.filter((comment) => {
147+
return !alreadyUsedComments.has(comment.id);
148+
});
149+
newTopicStats.subtopicStats!.push({
150+
name: subtopic.name,
151+
commentCount: unusedComments.length,
152+
summaryStats: (this.constructor as typeof SummaryStats).create(unusedComments),
153+
});
154+
alreadyUsedComments = new Set<string>([
155+
...alreadyUsedComments,
156+
...unusedComments.map((comment) => comment.id),
157+
]);
158+
}
159+
}
160+
filteredTopicStats.push(newTopicStats);
161+
}
162+
return this.sortTopicStats(filteredTopicStats);
163+
}
164+
165+
/**
166+
* Sorts topics and their subtopics based on comment count, with
167+
* "Other" topics and subtopics going last in sortByDescendingCount order.
168+
* @param topicStats what to sort
169+
* @param sortByDescendingCount whether to sort by comment count sortByDescendingCount or ascending
170+
* @returns the topics and subtopics sorted by comment count
171+
*/
172+
private sortTopicStats(
173+
topicStats: TopicStats[],
174+
sortByDescendingCount: boolean = true
175+
): TopicStats[] {
176+
topicStats.sort((a, b) => {
177+
if (a.name === "Other") return sortByDescendingCount ? 1 : -1;
178+
if (b.name === "Other") return sortByDescendingCount ? -1 : 1;
179+
return sortByDescendingCount
180+
? b.commentCount - a.commentCount
181+
: a.commentCount - b.commentCount;
182+
});
183+
184+
topicStats.forEach((topic) => {
185+
if (topic.subtopicStats) {
186+
topic.subtopicStats.sort((a, b) => {
187+
if (a.name === "Other") return sortByDescendingCount ? 1 : -1;
188+
if (b.name === "Other") return sortByDescendingCount ? -1 : 1;
189+
return sortByDescendingCount
190+
? b.commentCount - a.commentCount
191+
: a.commentCount - b.commentCount;
192+
});
193+
}
194+
});
195+
196+
return topicStats;
197+
}
198+
199+
/**
200+
* Gets a sorted list of stats for each topic and subtopic.
201+
*
202+
* @param forceOneSubtopicEach whether to force comments with multiple topic/subtopics to only
203+
* have one. This is done by keeping only the subtopic with the least comments that's not in
204+
* the Other:Other category.
120205
*
121-
* @param commentsByTopic A nested map where keys are topic names, values are maps
122-
* where keys are subtopic names, and values are maps where
123-
* keys are comment IDs and values are comment texts.
124206
* @returns A list of TopicStats objects sorted by comment count with "Other" topics last.
125207
*/
126-
getStatsByTopic(): TopicStats[] {
208+
getStatsByTopic(forceOneSubtopicEach: boolean = false): TopicStats[] {
127209
const commentsByTopic = groupCommentsBySubtopic(this.comments);
128210
const topicStats: TopicStats[] = [];
129211

@@ -155,23 +237,11 @@ export abstract class SummaryStats {
155237
});
156238
}
157239

158-
topicStats.sort((a, b) => {
159-
if (a.name === "Other") return 1;
160-
if (b.name === "Other") return -1;
161-
return b.commentCount - a.commentCount;
162-
});
163-
164-
topicStats.forEach((topic) => {
165-
if (topic.subtopicStats) {
166-
topic.subtopicStats.sort((a, b) => {
167-
if (a.name === "Other") return 1;
168-
if (b.name === "Other") return -1;
169-
return b.commentCount - a.commentCount;
170-
});
171-
}
172-
});
240+
if (forceOneSubtopicEach) {
241+
return this.useCommentOnlyOnce(topicStats);
242+
}
173243

174-
return topicStats;
244+
return this.sortTopicStats(topicStats);
175245
}
176246
}
177247

src/tasks/summarization_subtasks/topics.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ function getDifferencesOfOpinionSingleCommentInstructions(containsGroups: boolea
9595
export class TopicsSummary extends RecursiveSummary<SummaryStats> {
9696
async getSummary(): Promise<SummaryContent> {
9797
// First construct the introductory description for the entire section
98-
const topicStats: TopicStats[] = this.input.getStatsByTopic();
98+
const topicStats: TopicStats[] = this.input.getStatsByTopic(true);
9999
const nTopics: number = topicStats.length;
100100
const nSubtopics: number = topicStats
101101
.map((t) => t.subtopicStats?.length || 0)

0 commit comments

Comments
 (0)