Skip to content

Commit be2b40d

Browse files
committed
feat: languages filter (closes #59)
1 parent dfb5ac9 commit be2b40d

File tree

10 files changed

+84
-20
lines changed

10 files changed

+84
-20
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# CHANGELOG
22

3+
## v6.3.0 (2025-03-13)
4+
5+
- Adds new `languages` CLI flag allowing filtering via a comma-separated list of GitHub languages for repos (cannot be used with include/exclude) - (closes #59)
6+
- Forking repos is now a threaded operation like all other operations instead of sequential
7+
38
## v6.2.0 (2025-03-13)
49

510
- Forking gists is now a threaded operation like all other operations instead of sequential

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ Options:
5050
-f, --fork Pass this flag to fork git assets.
5151
--include INCLUDE Pass a comma separated list of repos to filter what is included in the Archive.
5252
--exclude EXCLUDE Pass a comma separated list of repos to filter what is excluded from the Archive.
53+
--languages LANGUAGES Pass a comma separated list of languages to filter what is included in the Archive.
5354
--forks Pass this flag to include forked git assets (when cloning or pulling).
5455
--location LOCATION The location where you want your GitHub Archive to be stored. Default: /Users/USERNAME/github-archive
5556
--https Use HTTPS URLs instead of SSH.

github_archive/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "6.2.0"
1+
__version__ = "6.3.0"

github_archive/archive.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ def __init__(
6060
fork=False,
6161
include=None,
6262
exclude=None,
63+
languages=None,
6364
forks=False,
6465
location=DEFAULT_LOCATION,
6566
use_https=False,
@@ -80,6 +81,7 @@ def __init__(
8081
self.fork = fork
8182
self.include = include.lower().split(',') if include else ''
8283
self.exclude = exclude.lower().split(',') if exclude else ''
84+
self.languages = languages.lower().split(',') if languages else ''
8385
self.forks = forks
8486
self.location = os.path.expanduser(location)
8587
self.use_https = use_https
@@ -147,7 +149,7 @@ def run(self):
147149
_ = iterate_repos_to_archive(self, user_repos, PULL_OPERATION)
148150
if self.fork:
149151
logger.info('# Forking user repos...')
150-
iterate_repos_to_fork(user_repos)
152+
iterate_repos_to_fork(self, user_repos)
151153

152154
# Orgs
153155
if self.orgs:
@@ -167,7 +169,7 @@ def run(self):
167169
_ = iterate_repos_to_archive(self, org_repos, PULL_OPERATION)
168170
if self.fork:
169171
logger.info('# Forking org repos...')
170-
iterate_repos_to_fork(org_repos)
172+
iterate_repos_to_fork(self, org_repos)
171173

172174
# Stars
173175
if self.stars:
@@ -187,7 +189,7 @@ def run(self):
187189
_ = iterate_repos_to_archive(self, starred_repos, PULL_OPERATION)
188190
if self.fork:
189191
logger.info('# Forking starred repos...')
190-
iterate_repos_to_fork(starred_repos)
192+
iterate_repos_to_fork(self, starred_repos)
191193

192194
if failed_repo_dirs:
193195
logger.info('Cleaning up repos...')
@@ -261,6 +263,11 @@ def initialize_project(self):
261263
logger=logger,
262264
message='The include and exclude flags are mutually exclusive. Only one can be used on each run.',
263265
)
266+
elif (self.include or self.exclude) and self.languages:
267+
log_and_raise_value_error(
268+
logger=logger,
269+
message='The include and exclude flags cannot be used with the languages flag.',
270+
)
264271

265272
def authenticated_user_in_users(self) -> bool:
266273
"""Returns True if the authenticated user is in the list of users."""

github_archive/cli.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,13 @@ def __init__(self):
109109
default=None,
110110
help='Pass a comma separated list of repos to filter what is excluded from the Archive.',
111111
)
112+
parser.add_argument(
113+
'--languages',
114+
type=str,
115+
required=False,
116+
default=None,
117+
help='Pass a comma separated list of languages to filter what is included in the Archive.',
118+
)
112119
parser.add_argument(
113120
'--forks',
114121
action='store_true',
@@ -182,6 +189,7 @@ def run(self):
182189
fork=self.fork,
183190
include=self.include,
184191
exclude=self.exclude,
192+
languages=self.languages,
185193
forks=self.forks,
186194
location=self.location,
187195
use_https=self.https,

github_archive/gists.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,7 @@
2828
)
2929

3030

31-
def iterate_gists_to_archive(
32-
github_archive: GithubArchive, gists: List[Gist.Gist], operation: str
33-
) -> List[Optional[str]]:
31+
def iterate_gists_to_archive(github_archive: GithubArchive, gists: List[Gist.Gist], operation: str) -> None:
3432
"""Iterate over each gist and start a thread if it can be archived."""
3533
pool = ThreadPoolExecutor(github_archive.threads)
3634
thread_list = []
@@ -48,9 +46,6 @@ def iterate_gists_to_archive(
4846
)
4947

5048
wait(thread_list, return_when=ALL_COMPLETED)
51-
failed_gists = [gist.result() for gist in thread_list if gist.result()]
52-
53-
return failed_gists
5449

5550

5651
def view_gists(gists: List[Gist.Gist]):

github_archive/repos.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,14 @@ def iterate_repos_to_archive(
4242

4343
for repo in repos:
4444
if (
45-
(not github_archive.include and not github_archive.exclude)
45+
(
46+
github_archive.languages
47+
and repo.language
48+
and repo.language.lower() in github_archive.languages
49+
and not github_archive.include
50+
and not github_archive.exclude
51+
)
52+
or (not github_archive.languages and not github_archive.include and not github_archive.exclude)
4653
or (github_archive.include and repo.name in github_archive.include)
4754
or (github_archive.exclude and repo.name not in github_archive.exclude)
4855
):
@@ -58,7 +65,7 @@ def iterate_repos_to_archive(
5865
)
5966
)
6067
else:
61-
logger.debug(f'{repo.name} skipped due to include/exclude filtering')
68+
logger.debug(f'{repo.name} skipped due to filtering')
6269

6370
wait(thread_list, return_when=ALL_COMPLETED)
6471
failed_repos = [repo.result() for repo in thread_list if repo.result()]
@@ -75,10 +82,20 @@ def view_repos(repos: List[Repository.Repository]):
7582
logger.info(repo_name)
7683

7784

78-
def iterate_repos_to_fork(repos: List[Repository.Repository]):
85+
def iterate_repos_to_fork(github_archive: GithubArchive, repos: List[Repository.Repository]) -> None:
7986
"""Iterates through a list of repos and attempts to fork them."""
87+
pool = ThreadPoolExecutor(github_archive.threads)
88+
thread_list = []
89+
8090
for repo in repos:
81-
_fork_repo(repo)
91+
thread_list.append(
92+
pool.submit(
93+
_fork_repo,
94+
repo=repo,
95+
)
96+
)
97+
98+
wait(thread_list, return_when=ALL_COMPLETED)
8299

83100

84101
def _fork_repo(repo: Repository.Repository):

test/unit/conftest.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,6 @@ def mock_git_asset():
1515
mock_git_asset.owner.login = 'mock_username'
1616
mock_git_asset.html_url = 'mock/html_url'
1717
mock_git_asset.ssh_url = 'mock/ssh_url'
18+
mock_git_asset.language = 'Python'
1819

1920
return mock_git_asset

test/unit/test_archive.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,10 @@ def test_initialize_project(mock_make_dirs, mock_dir_exist, mock_logger):
306306
{'users': 'justintime50', 'clone': True, 'include': 'mock-repo', 'exclude': 'another-mock-repo'},
307307
'The include and exclude flags are mutually exclusive. Only one can be used on each run.',
308308
),
309+
(
310+
{'users': 'justintime50', 'clone': True, 'include': 'mock-repo', 'languages': 'python'},
311+
'The include and exclude flags cannot be used with the languages flag.',
312+
),
309313
],
310314
)
311315
@patch('github_archive.archive.Github.get_user')

test/unit/test_repos.py

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import copy
12
import subprocess
23
from unittest.mock import (
34
MagicMock,
@@ -51,30 +52,51 @@ def test_iterate_repos_matching_authed_username(mock_archive_repo, mock_github_i
5152
@patch('github_archive.repos._archive_repo')
5253
def test_iterate_repos_include_list(mock_archive_repo, mock_github_instance, mock_git_asset):
5354
"""Tests that we iterate repos that are on the include list."""
54-
repos = [mock_git_asset]
55+
mock_non_include_asset = copy.deepcopy(mock_git_asset)
56+
mock_non_include_asset.name = 'not-the-name'
57+
repos = [mock_git_asset, mock_non_include_asset]
5558
github_archive = GithubArchive(
5659
users='mock_username',
5760
include='mock-asset-name',
5861
)
5962

6063
iterate_repos_to_archive(github_archive, repos, CLONE_OPERATION)
6164

62-
mock_archive_repo.assert_called_once()
65+
mock_archive_repo.assert_called_once() # Called once even though there are two, ensure we filtered
6366

6467

6568
@patch('github_archive.archive.Github')
6669
@patch('github_archive.repos._archive_repo')
6770
def test_iterate_repos_exclude_list(mock_archive_repo, mock_github_instance, mock_git_asset):
6871
"""Tests that we do not iterate repos that are on the exclude list."""
69-
repos = [mock_git_asset]
72+
mock_non_exclude_asset = copy.deepcopy(mock_git_asset)
73+
mock_non_exclude_asset.name = 'not-the-name'
74+
repos = [mock_git_asset, mock_non_exclude_asset]
7075
github_archive = GithubArchive(
7176
users='mock_username',
7277
exclude='mock-asset-name',
7378
)
7479

7580
iterate_repos_to_archive(github_archive, repos, CLONE_OPERATION)
7681

77-
mock_archive_repo.assert_not_called()
82+
mock_archive_repo.assert_called_once() # Called once even though there are two, ensure we filtered
83+
84+
85+
@patch('github_archive.archive.Github')
86+
@patch('github_archive.repos._archive_repo')
87+
def test_iterate_repos_languages_list(mock_archive_repo, mock_github_instance, mock_git_asset):
88+
"""Tests that we iterate repos that are one of the languages in the list."""
89+
mock_non_language_asset = copy.deepcopy(mock_git_asset)
90+
mock_non_language_asset.language = 'Go'
91+
repos = [mock_git_asset, mock_non_language_asset]
92+
github_archive = GithubArchive(
93+
users='mock_username',
94+
languages='python',
95+
)
96+
97+
iterate_repos_to_archive(github_archive, repos, CLONE_OPERATION)
98+
99+
mock_archive_repo.assert_called_once() # Called once even though there are two, ensure we filtered
78100

79101

80102
@patch('logging.Logger.info')
@@ -157,10 +179,14 @@ def test_archive_repo_called_process_error(mock_logger, mock_subprocess, mock_gi
157179
mock_logger.assert_called_once()
158180

159181

182+
@patch('github_archive.archive.Github')
160183
@patch('github_archive.repos._fork_repo')
161-
def test_iterate_repos_to_fork(mock_fork_repo):
184+
def test_iterate_repos_to_fork(mock_fork_repo, mock_github_instance):
162185
repo = MagicMock(spec=Repository.Repository)
163-
iterate_repos_to_fork([repo])
186+
github_archive = GithubArchive(
187+
gists='mock_username',
188+
)
189+
iterate_repos_to_fork(github_archive, [repo])
164190

165191
mock_fork_repo.assert_called_once()
166192

0 commit comments

Comments
 (0)