-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add script to scrape GitHub for contributors (#45)
Add script to scrape Northstar related repositories and compiles a list of contributors. Co-authored-by: Barnaby <[email protected]>
- Loading branch information
1 parent
0024f71
commit a428a9e
Showing
1 changed file
with
145 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
import re | ||
import requests | ||
from typing import List, Optional | ||
|
||
github_token = ( | ||
None # supply a github token to avoid ratelimit, or don't, it's up to you | ||
) | ||
|
||
contributor_list_file = "../src/data/contributors.ts" | ||
|
||
orgs = ["R2Northstar", "R2NorthstarTools"] | ||
|
||
excluded_repos = [ | ||
"R2Northstar/zlib", # zlib is kind of empty, so we can exclude it | ||
"R2NorthstarTools/NorthstarProton", # Ignore NorthstarProton as it's a fork and difficult to untangle contributions from upstream | ||
] | ||
|
||
# List of users to exclude | ||
excluded_users = [ | ||
"dependabot[bot]", # bot | ||
"harmony-weblate", # bot | ||
"weblate", # bot | ||
] | ||
|
||
|
||
def extract_github_usernames(contributor_list_file) -> Optional[List[str]]: | ||
# Regular expression to match GitHub user URLs with quotation marks | ||
github_url_pattern = r'"https://github\.com/([\w-]+)/?"' | ||
|
||
# Read file | ||
with open(contributor_list_file, "r") as f: | ||
file_contents = f.read() | ||
|
||
# Extract GitHub usersnames | ||
usernames = re.findall(github_url_pattern, file_contents) | ||
|
||
return usernames | ||
|
||
|
||
def get_repos(org_name) -> Optional[List[str]]: | ||
print(f"Org: {org}") | ||
url = f"https://api.github.com/orgs/{org_name}/repos" | ||
headers = {} | ||
if github_token is not None: | ||
headers = {"Authorization": f"Bearer {github_token}"} | ||
response = requests.get(url, headers=headers) | ||
if response.status_code == 200: | ||
repos = response.json() | ||
return [repo["name"] for repo in repos] | ||
else: | ||
print(f"Failed to retrieve contributors. Status code: {response.status_code}") | ||
print(f"Response: {response.text}") | ||
return None | ||
|
||
|
||
# Exclude manually added contributors | ||
excluded_users += extract_github_usernames(contributor_list_file) | ||
|
||
|
||
contributors = {} | ||
|
||
for org in orgs: | ||
repos = get_repos(org) | ||
if repos is None: | ||
continue | ||
for repo in repos: | ||
if f"{org}/{repo}" in excluded_repos: | ||
continue | ||
|
||
print(f"Repo: {org}/{repo}") | ||
url = f"https://api.github.com/repos/{org}/{repo}/contributors" | ||
headers = {} | ||
if github_token is not None: | ||
headers = {"Authorization": f"Bearer {github_token}"} | ||
response = requests.get(url, headers=headers) | ||
if response.status_code == 200: | ||
repo_contributors = response.json() | ||
for contributor in repo_contributors: | ||
if contributor["login"] in excluded_users: | ||
continue | ||
|
||
if contributor["login"] in contributors: | ||
contributors[contributor["login"]] = { | ||
"login": contributor["login"], | ||
"contributions": contributors[contributor["login"]][ | ||
"contributions" | ||
] | ||
+ contributor["contributions"], | ||
"avatar_url": contributor["avatar_url"] | ||
+ "&s=64", # Make sure to use lower resolution version to not overload client on load | ||
} | ||
else: | ||
contributors[contributor["login"]] = { | ||
"login": contributor["login"], | ||
"contributions": contributor["contributions"], | ||
"avatar_url": contributor["avatar_url"] | ||
+ "&s=64", # Make sure to use lower resolution version to not overload client on load | ||
} | ||
|
||
# Sort contributor list alphabetically | ||
sorted_contributors = sorted(contributors.values(), key=lambda x: x["login"]) | ||
|
||
|
||
# Generate TypeScript code | ||
def generate_typescript_code(contributor_list): | ||
|
||
file_header_string = "// Auto-generated from Python script\n" | ||
|
||
definition_string = """ | ||
export interface CommunityContributor { | ||
url?: string; | ||
icon: string; | ||
name: string; | ||
} | ||
""" | ||
list_start_string = ( | ||
"""export const community_contributors: CommunityContributor[] = [""" | ||
) | ||
list_end_string = """ | ||
] | ||
""" | ||
|
||
contributor_list_string = "" | ||
for contributor in contributor_list: | ||
contributor_list_string += f""" | ||
{{ | ||
url: "https://github.com/{contributor['login']}", | ||
icon: "{contributor['avatar_url']}", | ||
name: "{contributor['login']}", | ||
}},""" | ||
contributor | ||
|
||
return ( | ||
file_header_string | ||
+ definition_string | ||
+ list_start_string | ||
+ contributor_list_string | ||
+ list_end_string | ||
) | ||
|
||
|
||
typscript_code = generate_typescript_code(sorted_contributors) | ||
|
||
with open("../src/data/community-contributors.ts", "w") as f: | ||
f.write(typscript_code) |