Skip to content

Commit 3edbfc7

Browse files
authored
Merge pull request #472 from Iamrodos/feature/108-starred-skip-size-over
Add --starred-skip-size-over flag to limit starred repo size (#108)
2 parents 875f09e + 3c43e0f commit 3edbfc7

File tree

4 files changed

+272
-4
lines changed

4 files changed

+272
-4
lines changed

README.rst

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ CLI Help output::
3939
github-backup [-h] [-t TOKEN_CLASSIC] [-f TOKEN_FINE] [-q] [--as-app]
4040
[-o OUTPUT_DIRECTORY] [-l LOG_LEVEL] [-i]
4141
[--incremental-by-files]
42-
[--starred] [--all-starred]
42+
[--starred] [--all-starred] [--starred-skip-size-over MB]
4343
[--watched] [--followers] [--following] [--all]
4444
[--issues] [--issue-comments] [--issue-events] [--pulls]
4545
[--pull-comments] [--pull-commits] [--pull-details]
@@ -84,6 +84,8 @@ CLI Help output::
8484
incremental backup based on modification date of files
8585
--starred include JSON output of starred repositories in backup
8686
--all-starred include starred repositories in backup [*]
87+
--starred-skip-size-over MB
88+
skip starred repositories larger than this size in MB
8789
--watched include JSON output of watched repositories in backup
8890
--followers include JSON output of followers in backup
8991
--following include JSON output of following users in backup
@@ -292,10 +294,20 @@ All is not everything
292294

293295
The ``--all`` argument does not include: cloning private repos (``-P, --private``), cloning forks (``-F, --fork``), cloning starred repositories (``--all-starred``), ``--pull-details``, cloning LFS repositories (``--lfs``), cloning gists (``--gists``) or cloning starred gist repos (``--starred-gists``). See examples for more.
294296

295-
Cloning all starred size
296-
------------------------
297+
Starred repository size
298+
-----------------------
299+
300+
Using the ``--all-starred`` argument to clone all starred repositories may use a large amount of storage space.
301+
302+
To see your starred repositories sorted by size (requires `GitHub CLI <https://cli.github.com>`_)::
303+
304+
gh api user/starred --paginate --jq 'sort_by(-.size)[]|"\(.full_name) \(.size/1024|round)MB"'
305+
306+
To limit which starred repositories are cloned, use ``--starred-skip-size-over SIZE`` where SIZE is in MB. For example, ``--starred-skip-size-over 500`` will skip any starred repository where the git repository size (code and history) exceeds 500 MB. Note that this size limit only applies to the repository itself, not issues, release assets or other metadata. This filter only affects starred repositories; your own repositories are always included regardless of size.
307+
308+
For finer control, avoid using ``--assets`` with starred repos, or use ``--skip-assets-on`` for specific repositories with large release binaries.
297309

298-
Using the ``--all-starred`` argument to clone all starred repositories may use a large amount of storage space, especially if ``--all`` or more arguments are used. e.g. commonly starred repos can have tens of thousands of issues, many large assets and the repo itself etc. Consider just storing links to starred repos in JSON format with ``--starred``.
310+
Alternatively, consider just storing links to starred repos in JSON format with ``--starred``.
299311

300312
Incremental Backup
301313
------------------

github_backup/github_backup.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,13 @@ def parse_args(args=None):
211211
dest="all_starred",
212212
help="include starred repositories in backup [*]",
213213
)
214+
parser.add_argument(
215+
"--starred-skip-size-over",
216+
type=int,
217+
metavar="MB",
218+
dest="starred_skip_size_over",
219+
help="skip starred repositories larger than this size in MB",
220+
)
214221
parser.add_argument(
215222
"--watched",
216223
action="store_true",
@@ -1570,6 +1577,25 @@ def filter_repositories(args, unfiltered_repositories):
15701577
]
15711578
if args.skip_archived:
15721579
repositories = [r for r in repositories if not r.get("archived")]
1580+
if args.starred_skip_size_over is not None:
1581+
if args.starred_skip_size_over <= 0:
1582+
logger.warning(
1583+
"--starred-skip-size-over must be greater than 0, ignoring"
1584+
)
1585+
else:
1586+
size_limit_kb = args.starred_skip_size_over * 1024
1587+
filtered = []
1588+
for r in repositories:
1589+
if r.get("is_starred") and r.get("size", 0) > size_limit_kb:
1590+
size_mb = r.get("size", 0) / 1024
1591+
logger.info(
1592+
"Skipping starred repo {0} ({1:.0f} MB) due to --starred-skip-size-over {2}".format(
1593+
r.get("full_name", r.get("name")), size_mb, args.starred_skip_size_over
1594+
)
1595+
)
1596+
else:
1597+
filtered.append(r)
1598+
repositories = filtered
15731599
if args.exclude:
15741600
repositories = [
15751601
r for r in repositories if "name" not in r or r["name"] not in args.exclude

tests/test_case_sensitivity.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ def test_filter_repositories_case_insensitive_user(self):
2626
args.private = False
2727
args.public = False
2828
args.all = True
29+
args.skip_archived = False
30+
args.starred_skip_size_over = None
2931

3032
# Simulate GitHub API returning canonical case
3133
repos = [
@@ -65,6 +67,8 @@ def test_filter_repositories_case_insensitive_org(self):
6567
args.private = False
6668
args.public = False
6769
args.all = True
70+
args.skip_archived = False
71+
args.starred_skip_size_over = None
6872

6973
repos = [
7074
{
@@ -93,6 +97,8 @@ def test_filter_repositories_case_variations(self):
9397
args.private = False
9498
args.public = False
9599
args.all = True
100+
args.skip_archived = False
101+
args.starred_skip_size_over = None
96102

97103
repos = [
98104
{"name": "repo1", "owner": {"login": "test-user"}, "private": False, "fork": False},
Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,224 @@
1+
"""Tests for --starred-skip-size-over flag behavior (issue #108)."""
2+
3+
import pytest
4+
from unittest.mock import Mock
5+
6+
from github_backup import github_backup
7+
8+
9+
class TestStarredSkipSizeOver:
10+
"""Test suite for --starred-skip-size-over flag.
11+
12+
Issue #108: Allow restricting size of starred repositories before cloning.
13+
The size is based on the GitHub API's 'size' field (in KB), but the CLI
14+
argument accepts MB for user convenience.
15+
"""
16+
17+
def _create_mock_args(self, **overrides):
18+
"""Create a mock args object with sensible defaults."""
19+
args = Mock()
20+
args.user = "testuser"
21+
args.repository = None
22+
args.name_regex = None
23+
args.languages = None
24+
args.fork = False
25+
args.private = False
26+
args.skip_archived = False
27+
args.starred_skip_size_over = None
28+
args.exclude = None
29+
30+
for key, value in overrides.items():
31+
setattr(args, key, value)
32+
33+
return args
34+
35+
36+
class TestStarredSkipSizeOverArgumentParsing(TestStarredSkipSizeOver):
37+
"""Tests for --starred-skip-size-over argument parsing."""
38+
39+
def test_starred_skip_size_over_not_set_defaults_to_none(self):
40+
"""When --starred-skip-size-over is not specified, it should default to None."""
41+
args = github_backup.parse_args(["testuser"])
42+
assert args.starred_skip_size_over is None
43+
44+
def test_starred_skip_size_over_accepts_integer(self):
45+
"""--starred-skip-size-over should accept an integer value."""
46+
args = github_backup.parse_args(["testuser", "--starred-skip-size-over", "500"])
47+
assert args.starred_skip_size_over == 500
48+
49+
def test_starred_skip_size_over_rejects_non_integer(self):
50+
"""--starred-skip-size-over should reject non-integer values."""
51+
with pytest.raises(SystemExit):
52+
github_backup.parse_args(["testuser", "--starred-skip-size-over", "abc"])
53+
54+
55+
class TestStarredSkipSizeOverFiltering(TestStarredSkipSizeOver):
56+
"""Tests for --starred-skip-size-over filtering behavior."""
57+
58+
def test_starred_repo_under_limit_is_kept(self):
59+
"""Starred repos under the size limit should be kept."""
60+
args = self._create_mock_args(starred_skip_size_over=500)
61+
62+
repos = [
63+
{
64+
"name": "small-repo",
65+
"owner": {"login": "otheruser"},
66+
"size": 100 * 1024, # 100 MB in KB
67+
"is_starred": True,
68+
}
69+
]
70+
71+
result = github_backup.filter_repositories(args, repos)
72+
assert len(result) == 1
73+
assert result[0]["name"] == "small-repo"
74+
75+
def test_starred_repo_over_limit_is_filtered(self):
76+
"""Starred repos over the size limit should be filtered out."""
77+
args = self._create_mock_args(starred_skip_size_over=500)
78+
79+
repos = [
80+
{
81+
"name": "huge-repo",
82+
"owner": {"login": "otheruser"},
83+
"size": 600 * 1024, # 600 MB in KB
84+
"is_starred": True,
85+
}
86+
]
87+
88+
result = github_backup.filter_repositories(args, repos)
89+
assert len(result) == 0
90+
91+
def test_own_repo_over_limit_is_kept(self):
92+
"""User's own repos should not be affected by the size limit."""
93+
args = self._create_mock_args(starred_skip_size_over=500)
94+
95+
repos = [
96+
{
97+
"name": "my-huge-repo",
98+
"owner": {"login": "testuser"},
99+
"size": 600 * 1024, # 600 MB in KB
100+
# No is_starred flag - this is the user's own repo
101+
}
102+
]
103+
104+
result = github_backup.filter_repositories(args, repos)
105+
assert len(result) == 1
106+
assert result[0]["name"] == "my-huge-repo"
107+
108+
def test_starred_repo_at_exact_limit_is_kept(self):
109+
"""Starred repos at exactly the size limit should be kept."""
110+
args = self._create_mock_args(starred_skip_size_over=500)
111+
112+
repos = [
113+
{
114+
"name": "exact-limit-repo",
115+
"owner": {"login": "otheruser"},
116+
"size": 500 * 1024, # Exactly 500 MB in KB
117+
"is_starred": True,
118+
}
119+
]
120+
121+
result = github_backup.filter_repositories(args, repos)
122+
assert len(result) == 1
123+
assert result[0]["name"] == "exact-limit-repo"
124+
125+
def test_mixed_repos_filtered_correctly(self):
126+
"""Mix of own and starred repos should be filtered correctly."""
127+
args = self._create_mock_args(starred_skip_size_over=500)
128+
129+
repos = [
130+
{
131+
"name": "my-huge-repo",
132+
"owner": {"login": "testuser"},
133+
"size": 1000 * 1024, # 1 GB - own repo, should be kept
134+
},
135+
{
136+
"name": "starred-small",
137+
"owner": {"login": "otheruser"},
138+
"size": 100 * 1024, # 100 MB - under limit
139+
"is_starred": True,
140+
},
141+
{
142+
"name": "starred-huge",
143+
"owner": {"login": "anotheruser"},
144+
"size": 2000 * 1024, # 2 GB - over limit
145+
"is_starred": True,
146+
},
147+
]
148+
149+
result = github_backup.filter_repositories(args, repos)
150+
assert len(result) == 2
151+
names = [r["name"] for r in result]
152+
assert "my-huge-repo" in names
153+
assert "starred-small" in names
154+
assert "starred-huge" not in names
155+
156+
def test_no_size_limit_keeps_all_starred(self):
157+
"""When no size limit is set, all starred repos should be kept."""
158+
args = self._create_mock_args(starred_skip_size_over=None)
159+
160+
repos = [
161+
{
162+
"name": "huge-starred-repo",
163+
"owner": {"login": "otheruser"},
164+
"size": 10000 * 1024, # 10 GB
165+
"is_starred": True,
166+
}
167+
]
168+
169+
result = github_backup.filter_repositories(args, repos)
170+
assert len(result) == 1
171+
172+
def test_repo_without_size_field_is_kept(self):
173+
"""Repos without a size field should be kept (size defaults to 0)."""
174+
args = self._create_mock_args(starred_skip_size_over=500)
175+
176+
repos = [
177+
{
178+
"name": "no-size-repo",
179+
"owner": {"login": "otheruser"},
180+
"is_starred": True,
181+
# No size field
182+
}
183+
]
184+
185+
result = github_backup.filter_repositories(args, repos)
186+
assert len(result) == 1
187+
188+
def test_zero_value_warns_and_is_ignored(self, caplog):
189+
"""Zero value should warn and keep all repos."""
190+
args = self._create_mock_args(starred_skip_size_over=0)
191+
192+
repos = [
193+
{
194+
"name": "huge-starred-repo",
195+
"owner": {"login": "otheruser"},
196+
"size": 10000 * 1024, # 10 GB
197+
"is_starred": True,
198+
}
199+
]
200+
201+
result = github_backup.filter_repositories(args, repos)
202+
assert len(result) == 1
203+
assert "must be greater than 0" in caplog.text
204+
205+
def test_negative_value_warns_and_is_ignored(self, caplog):
206+
"""Negative value should warn and keep all repos."""
207+
args = self._create_mock_args(starred_skip_size_over=-5)
208+
209+
repos = [
210+
{
211+
"name": "huge-starred-repo",
212+
"owner": {"login": "otheruser"},
213+
"size": 10000 * 1024, # 10 GB
214+
"is_starred": True,
215+
}
216+
]
217+
218+
result = github_backup.filter_repositories(args, repos)
219+
assert len(result) == 1
220+
assert "must be greater than 0" in caplog.text
221+
222+
223+
if __name__ == "__main__":
224+
pytest.main([__file__, "-v"])

0 commit comments

Comments
 (0)