Skip to content

Commit 92ab09d

Browse files
nijat12copybara-github
authored andcommitted
Added pre-commit support to replace husky. Ran the project through formatter to fix formatting issues.
GitOrigin-RevId: 253c778e8c0fd0a6d28b00aa4f1c8232f7eec36a
1 parent bb4502f commit 92ab09d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

69 files changed

+3814
-3490
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,5 @@ __pycache__
77
private_*
88
# Ignore Gemini CLI files.
99
GEMINI.md
10-
todo.md
10+
todo.md
11+
*.env

.husky/commit-msg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@ if [ -e "$PROJECT_ROOT/.git/hooks/commit-msg" ]; then
55
$PROJECT_ROOT/.git/hooks/commit-msg "$@"
66
else
77
exit 0
8-
fi
8+
fi

.husky/pre-commit

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
npm test
2-
npx lint-staged
2+
npx lint-staged

.pre-commit-config.yaml

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
repos:
2+
- repo: https://github.com/pre-commit/pre-commit-hooks
3+
rev: v5.0.0
4+
hooks:
5+
- id: end-of-file-fixer
6+
types: [python]
7+
- repo: https://github.com/google/pyink
8+
rev: 24.10.1
9+
hooks:
10+
- id: pyink
11+
types: [python]
12+
language_version: python3.12
13+
- repo: local
14+
hooks:
15+
- id: pytest
16+
name: pytest
17+
entry: bash -c 'python -m ensurepip && pip install -r requirements-dev.txt && python -m pytest'
18+
language: python
19+
types: [python]
20+
pass_filenames: false
21+
- id: npm-test
22+
name: Run npm tests
23+
entry: npm test
24+
language: system
25+
types: [file]
26+
pass_filenames: false
27+
- id: lint-staged
28+
name: Run lint-staged
29+
entry: npx lint-staged
30+
language: system
31+
types: [file]
32+
pass_filenames: false

docker-compose.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,4 @@ services:
77
- ./napolitan:/app/napolitan
88
- ./models:/app/models
99
environment:
10-
PYTHONPATH: /app
10+
PYTHONPATH: /app

library/bin/download_polis_data.sh

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,5 +35,3 @@ curl "${EXPORT_URL_BASE}/participant-votes.csv" > "${OUTPUT_DIR}/participants-vo
3535
curl "${EXPORT_URL_BASE}/votes.csv" > "${OUTPUT_DIR}/votes.csv"
3636
curl "${EXPORT_URL_BASE}/summary.csv" > "${OUTPUT_DIR}/summary.csv"
3737
curl "${EXPORT_URL_BASE}/comment-groups.csv" > "${OUTPUT_DIR}/comment-groups.csv"
38-
39-

library/bin/process_polis_data.py

Lines changed: 65 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
#!/usr/bin/env python3
22

3-
import re
4-
import pandas as pd
53
import argparse as arg
64
import itertools
5+
import re
6+
import pandas as pd
77

88

99
print("Starting process_polis_data.py program")
@@ -13,18 +13,28 @@
1313

1414
def getargs():
1515
parser = arg.ArgumentParser(
16-
description="Process Polis data from the openData export data.")
16+
description="Process Polis data from the openData export data."
17+
)
1718
parser.add_argument("export_directory", help="Path to export directory.")
18-
parser.add_argument("--participants-votes",
19-
help="Participants votes file (override).")
2019
parser.add_argument(
21-
"--comments", help="Path to the comments file (override).")
22-
parser.add_argument("-o", "--output_file",
23-
help="Path to the output CSV file.", required=True)
24-
parser.add_argument("--exclude-ungrouped-participants",
25-
help="Whether to include ungrouped participants in the output.", action="store_true")
20+
"--participants-votes", help="Participants votes file (override)."
21+
)
22+
parser.add_argument(
23+
"--comments", help="Path to the comments file (override)."
24+
)
25+
parser.add_argument(
26+
"-o", "--output_file", help="Path to the output CSV file.", required=True
27+
)
28+
parser.add_argument(
29+
"--exclude-ungrouped-participants",
30+
help="Whether to include ungrouped participants in the output.",
31+
action="store_true",
32+
)
2633
args = parser.parse_args()
27-
args.participants_votes = args.participants_votes or f"{args.export_directory}/participants-votes.csv"
34+
args.participants_votes = (
35+
args.participants_votes
36+
or f"{args.export_directory}/participants-votes.csv"
37+
)
2838
args.comments = args.comments or f"{args.export_directory}/comments.csv"
2939
return args
3040

@@ -49,29 +59,29 @@ def getargs():
4959
print("Args processed")
5060

5161
# make sure to cast comment ids as ints
52-
comments['comment-id'] = comments['comment-id'].astype(int)
62+
comments["comment-id"] = comments["comment-id"].astype(int)
5363

5464

5565
# their votes on everything.
5666
if args.exclude_ungrouped_participants:
5767
# filter out votes rows where group-id is nan, and make ints
5868
print("Filtering out ungrouped participants")
59-
votes = votes[votes['group-id'].notna()]
69+
votes = votes[votes["group-id"].notna()]
6070
else:
6171
# We fill the ungrouped participant records with -1 for group, which when
6272
# processed below will reserve group-0 for the "ungrouped", which we can
6373
# manually filter into the columns
64-
votes['group-id'] = votes['group-id'].fillna(-1)
74+
votes["group-id"] = votes["group-id"].fillna(-1)
6575

6676
# Increment group ids so they are 1 based instead of 0 (noting that, as described above,
6777
# the "ungrouped" psuedo-group gets bumped here from -1 to 0, to be dealt with later)
68-
votes['group-id'] = votes['group-id'].astype(int) + 1
78+
votes["group-id"] = votes["group-id"].astype(int) + 1
6979
# Sort the ids so they come out in the right order in the output file header
70-
group_ids = sorted(votes['group-id'].unique())
80+
group_ids = sorted(votes["group-id"].unique())
7181
print("Group ids:", group_ids)
7282

7383
# prompt: find all of the column names in the votes df that match a numeric regex
74-
comment_ids = [col for col in votes.columns if re.match(r'^\d+$', col)]
84+
comment_ids = [col for col in votes.columns if re.match(r"^\d+$", col)]
7585

7686
# Create a dictionary for mapping comment to total vote count for each column in
7787
# the votes table, for later verification
@@ -80,23 +90,28 @@ def getargs():
8090
comment_vote_counts[int(comment_id)] = votes[comment_id].value_counts().sum()
8191

8292
# Melt the DataFrame
83-
melted_votes = votes.melt(id_vars=[
84-
"group-id"], value_vars=comment_ids, var_name='comment-id', value_name='value')
85-
melted_votes['comment-id'] = melted_votes['comment-id'].astype(int)
93+
melted_votes = votes.melt(
94+
id_vars=["group-id"],
95+
value_vars=comment_ids,
96+
var_name="comment-id",
97+
value_name="value",
98+
)
99+
melted_votes["comment-id"] = melted_votes["comment-id"].astype(int)
86100
# Group, count, unstack, and fill missing values
87101
result = (
88-
melted_votes.groupby(['comment-id', 'group-id'])['value']
102+
melted_votes.groupby(["comment-id", "group-id"])["value"]
89103
.value_counts()
90104
.unstack(fill_value=0)
91105
.reset_index()
92106
)
93107

94108
# Rename columns
95109
result = result.rename(
96-
columns={-1: 'disagree-count', 0: 'pass-count', 1: 'agree-count'})
110+
columns={-1: "disagree-count", 0: "pass-count", 1: "agree-count"}
111+
)
97112

98113
# Pivot out the group-id column so that each of the vote count columns look like "group-N-VOTE-count"
99-
pivoted = result.pivot(index="comment-id", columns='group-id')
114+
pivoted = result.pivot(index="comment-id", columns="group-id")
100115

101116
# A function for naming groups based on group id.
102117
# Note that for the group_id == 0, the "ungrouped" pseudo-group, this returns "Group-none"
@@ -107,19 +122,20 @@ def group_name(group_id):
107122

108123

109124
# Use the pivoted data to prepare a dataframe for merging
110-
for_merge = pd.DataFrame({'comment-id': pivoted.index})
125+
for_merge = pd.DataFrame({"comment-id": pivoted.index})
111126
for group_id in group_ids:
112127
for count_col in ["disagree-count", "pass-count", "agree-count"]:
113-
for_merge[group_name(group_id) + "-" +
114-
count_col] = pivoted[count_col][group_id].values
128+
for_merge[group_name(group_id) + "-" + count_col] = pivoted[count_col][
129+
group_id
130+
].values
115131

116132
# zero out total vote tallies since incorrect from filtering or database caching
117133
comments["agrees"] = 0
118134
comments["disagrees"] = 0
119135
comments["passes"] = 0
120136

121137
# merge in the per group tallies above
122-
comments = comments.merge(for_merge, on='comment-id')
138+
comments = comments.merge(for_merge, on="comment-id")
123139

124140
# add up from the votes matrix for consistency
125141
for group_id in group_ids:
@@ -128,14 +144,18 @@ def group_name(group_id):
128144
comments["agrees"] += comments[group + "-agree-count"]
129145
comments["passes"] += comments[group + "-pass-count"]
130146

131-
comments["votes"] = comments["agrees"] + \
132-
comments["disagrees"] + comments["passes"]
147+
comments["votes"] = (
148+
comments["agrees"] + comments["disagrees"] + comments["passes"]
149+
)
133150

134151
comments["agree_rate"] = comments["agrees"] / comments["votes"]
135152
comments["disagree_rate"] = comments["disagrees"] / comments["votes"]
136153
comments["pass_rate"] = comments["passes"] / comments["votes"]
137154
comments["difference_of_opinion_rank"] = (
138-
1 - abs(comments["agree_rate"] - comments["disagree_rate"]) - comments["pass_rate"])
155+
1
156+
- abs(comments["agree_rate"] - comments["disagree_rate"])
157+
- comments["pass_rate"]
158+
)
139159

140160

141161
# Go through and check that all of our output comment["votes"] counts are no
@@ -144,10 +164,16 @@ def group_name(group_id):
144164
# a result of filters applied based on who was grouped in the conversation analysis.
145165
print("Validating aggregate vote counts...")
146166
failed_validations = 0
147-
for comment_id in comments['comment-id']:
148-
if comment_vote_counts[comment_id] < comments[comments['comment-id'] == int(comment_id)]["votes"].iloc[0]:
167+
for comment_id in comments["comment-id"]:
168+
if (
169+
comment_vote_counts[comment_id]
170+
< comments[comments["comment-id"] == int(comment_id)]["votes"].iloc[0]
171+
):
149172
print(
150-
f"WARNING: Vote count mismatch for comment {comment_id}. Original count: {comment_vote_counts[comment_id]}, New count: {comments[comments['comment-id'] == int(comment_id)]['votes'].iloc[0]}")
173+
f"WARNING: Vote count mismatch for comment {comment_id}. Original"
174+
f" count: {comment_vote_counts[comment_id]}, New count:"
175+
f" {comments[comments['comment-id'] == int(comment_id)]['votes'].iloc[0]}"
176+
)
151177
failed_validations += 1
152178
if failed_validations == 0:
153179
print("All validations passed!")
@@ -157,14 +183,17 @@ def group_name(group_id):
157183
# to non-strict moderation)
158184
print("N comments total:", len(comments))
159185
print("N votes total:", comments["votes"].sum())
160-
moderated_comments = comments[(comments["moderated"] == 1) | (
161-
(comments["moderated"] == 0) & (comments["votes"] > 1))]
186+
moderated_comments = comments[
187+
(comments["moderated"] == 1)
188+
| ((comments["moderated"] == 0) & (comments["votes"] > 1))
189+
]
162190
print("N comments included after moderation:", len(moderated_comments))
163191
print("N votes after moderation:", moderated_comments["votes"].sum())
164192

165193
# prompt: write out to a CSV file
166194
moderated_comments = moderated_comments.rename(
167-
columns={'comment-body': 'comment_text'})
195+
columns={"comment-body": "comment_text"}
196+
)
168197
moderated_comments.to_csv(args.output_file, index=False)
169198

170199
# Exit with non-zero error code if any validations failed

library/docs/.nojekyll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
TypeDoc added this file to prevent GitHub Pages from using Jekyll. You can turn off this behavior by setting the `githubPages` option to false.
1+
TypeDoc added this file to prevent GitHub Pages from using Jekyll. You can turn off this behavior by setting the `githubPages` option to false.

0 commit comments

Comments
 (0)