Skip to content

Commit a146375

Browse files
committed
list-pulls.py: Minimal support for multi-repository
This adds minimal support for handling the `bitcoin/bitcoin#1234` syntax in merge messages, as well as scaffolding for handling a project spanning multiple repositories. We do not currently have a meta-data backup repository covering `bitcoin-core/gui` so those PRs are considered unknown but belonging to the GUI category, for now.
1 parent 7efae0b commit a146375

File tree

1 file changed

+129
-40
lines changed

1 file changed

+129
-40
lines changed

list-pulls.py

Lines changed: 129 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/env python3
22
'''
33
Script to parse git commit list, extract github issues to create a changelog in
4-
text and json format.
4+
text and JSON format.
55
66
Run this in the root directory of the repository.
77
@@ -20,7 +20,8 @@
2020
likely needs to be extensively manually edited before ending up in the release
2121
notes.
2222
'''
23-
# W.J. van der Laan 2017-2019 (license: MIT)
23+
# W.J. van der Laan 2017-2021
24+
# SPDX-License-Identifier: MIT
2425
import subprocess
2526
import re
2627
import json
@@ -31,6 +32,7 @@
3132
# == Global environment ==
3233
GIT = os.getenv('GIT', 'git')
3334
GHMETA = os.getenv('GHMETA', '../bitcoin-gh-meta')
35+
DEFAULT_REPO = os.getenv('DEFAULT_REPO', 'bitcoin/bitcoin')
3436

3537
# == Label to category mapping ==
3638
# See: https://github.com/bitcoin/bitcoin/labels
@@ -130,13 +132,91 @@
130132
('wallet', 'Wallet', True),
131133
]
132134

135+
# Per-repository information
136+
REPO_INFO = {
137+
'bitcoin/bitcoin': {
138+
'label_mapping': LABEL_MAPPING,
139+
'prefixes': PREFIXES,
140+
'default_category': UNCATEGORIZED,
141+
'ghmeta': GHMETA,
142+
},
143+
# For now, GUI repository pulls are automatically categorized into the GUI category.
144+
'bitcoin-core/gui': {
145+
'label_mapping': (),
146+
'prefixes': [],
147+
'default_category': 'GUI',
148+
'ghmeta': None,
149+
},
150+
}
151+
152+
# == Utilities ==
153+
133154
def remove_last_if_empty(l):
134155
'''Remove empty last member of list'''
135156
if l[-1]==b'' or l[-1]=='':
136157
return l[0:-1]
137158
else:
138159
return l
139160

161+
# Valid chars in github names
162+
VALIDNAMECHARS = '[0-9a-zA-Z\-_]'
163+
# For parsing owner/repo#id
164+
FQID_RE = re.compile('^(' + VALIDNAMECHARS + '+)/(' + VALIDNAMECHARS + '+)#([0-9]+)$')
165+
# For parsing non-qualified #id
166+
PR_RE = re.compile('^#?([0-9]+)$')
167+
168+
class FQId:
169+
'''Fully qualified PR id.'''
170+
def __init__(self, owner: str, repo: str, pr: int):
171+
self.owner = owner
172+
self.repo = repo
173+
self.pr = pr
174+
175+
@property
176+
def _key(self):
177+
return (self.owner, self.repo, self.pr)
178+
179+
def __eq__(self, o):
180+
return self._key == o._key
181+
182+
def __lt__(self, o):
183+
return self._key < o._key
184+
185+
def __hash__(self):
186+
return hash(self._key)
187+
188+
def __str__(self):
189+
return f'{self.owner}/{self.repo}#{self.pr}'
190+
191+
def __repr__(self):
192+
return f'FQId({repr(self.owner)}, {repr(self.repo)}, {repr(self.pr)})'
193+
194+
@classmethod
195+
def parse(cls, pull, default_repo):
196+
'''Return FQId from 'owner/repo#id' or '#id' or 'id' string.'''
197+
m = FQID_RE.match(pull)
198+
if m:
199+
return cls(m.group(1), m.group(2), int(m.group(3)))
200+
m = PR_RE.match(pull)
201+
if m:
202+
(owner, repo) = default_repo.split('/')
203+
return cls(owner, repo, int(m.group(1)))
204+
raise ValueError(f'Cannot parse {pull} as PR specification.')
205+
206+
def tests():
207+
'''Quick internal sanity tests.'''
208+
assert(FQId.parse('bitcoin/bitcoin#1234', 'bitcoin/bitcoin') == FQId('bitcoin', 'bitcoin', 1234))
209+
assert(FQId.parse('bitcoin-core/gui#1235', 'bitcoin/bitcoin') == FQId('bitcoin-core', 'gui', 1235))
210+
assert(FQId.parse('#1236', 'bitcoin/bitcoin') == FQId('bitcoin', 'bitcoin', 1236))
211+
assert(FQId.parse('1237', 'bitcoin/bitcoin') == FQId('bitcoin', 'bitcoin', 1237))
212+
assert(str(FQId('bitcoin', 'bitcoin', 1239)) == 'bitcoin/bitcoin#1239')
213+
assert(FQId('bitcoin', 'bitcoin', 1239) < FQId('bitcoin', 'bitcoin', 1240))
214+
assert(not (FQId('bitcoin', 'bitcoin', 1240) < FQId('bitcoin', 'bitcoin', 1239)))
215+
assert(FQId('bitcoin', 'bitcoin', 1240) < FQId('bitcoin-core', 'gui', 1239))
216+
assert(not (FQId('bitcoin-core', 'gui', 1239) < FQId('bitcoin', 'bitcoin', 1240)))
217+
218+
# == Main program ==
219+
tests()
140220
ref_from = sys.argv[1] # 'v0.10.0rc1'
141221
ref_to = sys.argv[2] # 'master'
142222

@@ -148,8 +228,8 @@ def remove_last_if_empty(l):
148228
try:
149229
with open(exclude_file, 'r') as f:
150230
d = json.load(f)
151-
exclude_pulls = set(p['id'] for p in d['pulls'])
152-
print('Excluding ', exclude_pulls)
231+
exclude_pulls = set(FQId.parse(str(p['id']), DEFAULT_REPO) for p in d['pulls'])
232+
print(f'Excluding {", ".join(str(p) for p in exclude_pulls)}')
153233
print()
154234
except IOError as e:
155235
print(f'Unable to read exclude file {exclude_file}', file=sys.stderr)
@@ -187,12 +267,13 @@ def parse_commit_message(msg):
187267
'''
188268
retval = CommitMetaData()
189269
for line in msg.splitlines():
190-
m = re.match('Github-Pull: #?(\d+)', line, re.I)
191-
if m:
192-
retval.pull = int(m.group(1))
193-
m = re.match('Rebased-From: (.*)', line, re.I)
194-
if m:
195-
retval.rebased_from = m.group(1).strip().split()
270+
if line.startswith('Github-Pull:'):
271+
param = line[12:].strip()
272+
if param.startswith('#'): # compensate for incorrect #bitcoin-core/gui#148
273+
param = param[1:]
274+
retval.pull = FQId.parse(param, DEFAULT_REPO)
275+
if line.startswith('Rebased-From:'):
276+
retval.rebased_from = line[13:].strip().split()
196277
if retval.pull is not None:
197278
return retval
198279
else:
@@ -202,20 +283,20 @@ def parse_commit_message(msg):
202283
pulls = {}
203284
PullData = namedtuple('PullData', ['id', 'merge', 'commits', 'index'])
204285
orphans = set(commits)
205-
pullreq_re = re.compile('#([0-9]+)')
286+
MERGE_RE = re.compile('Merge (.*?):')
206287
for c in commit_data.values():
207288
# is merge commit
208289
if len(c.parents)>1:
209290
assert(len(c.parents)==2)
210-
match = pullreq_re.search(c.title)
291+
match = MERGE_RE.match(c.title)
211292
if match: # merges a pull request
212293
if c.sha in orphans:
213294
orphans.remove(c.sha)
214295
#print('removing ', c.sha)
215296
sub_commits = subprocess.check_output([GIT, 'rev-list', c.parents[0]+'..'+c.parents[1]])
216297
sub_commits = sub_commits.decode()
217298
sub_commits = set(sub_commits.rstrip().splitlines())
218-
pull = int(match.group(1))
299+
pull = FQId.parse(match.group(1), DEFAULT_REPO)
219300

220301
# remove commits that are not in the global list
221302
sub_commits = sub_commits.intersection(commits)
@@ -239,12 +320,14 @@ def parse_commit_message(msg):
239320
if md:
240321
sub_pulls[md.pull].append(cid)
241322

242-
if not sub_pulls and 'backport' in c.message.lower():
243-
# TODO could check pull label instead, but we don't know that here yet
244-
print('#%i: Merge commit message contains \'backport\' but there are no sub-pulls' % (pull))
323+
if not sub_pulls and 'backport' in c.title.lower():
324+
# just information for manual checking
325+
print(f'{pull}: Merge PR title {repr(c.title)} contains \'backport\' but there are no sub-pulls')
245326

246327
for (sub_pull, sub_pull_commits) in sub_pulls.items():
247328
pulls[sub_pull] = PullData(sub_pull, sub_pull_commits[0], sub_pull_commits, index)
329+
else:
330+
print(f'{c.sha}: Merge commit does not merge a PR: {c.title}')
248331

249332
# Extract remaining pull numbers from orphans, if they're backports
250333
for o in set(orphans):
@@ -261,26 +344,26 @@ def parse_commit_message(msg):
261344
pulls_order = [p.id for p in pulls_order]
262345
# pulls_order = sorted(pulls.keys())
263346

264-
def guess_category_from_labels(labels):
347+
def guess_category_from_labels(repo_info, labels):
265348
'''
266349
Guess category for a PR from github labels.
267350
'''
268351
labels = [l.lower() for l in labels]
269-
for (label_list, category) in LABEL_MAPPING:
352+
for (label_list, category) in repo_info['label_mapping']:
270353
for l in labels:
271354
if l in label_list:
272355
return category
273-
return UNCATEGORIZED
356+
return repo_info['default_category']
274357

275-
def get_category(labels, message):
358+
def get_category(repo_info, labels, message):
276359
'''
277-
Guess category for a PR from labels and message.
360+
Guess category for a PR from repository, labels and message prefixes.
278361
Strip category from message.
279362
'''
280-
category = guess_category_from_labels(labels)
363+
category = guess_category_from_labels(repo_info, labels)
281364
message = message.strip()
282365

283-
for (prefix, p_category, do_strip) in PREFIXES:
366+
for (prefix, p_category, do_strip) in repo_info['prefixes']:
284367
for variant in [('[' + prefix + ']:'), ('[' + prefix + ']'), (prefix + ':')]:
285368
if message.lower().startswith(variant):
286369
category = p_category
@@ -294,19 +377,25 @@ def get_category(labels, message):
294377
pull_labels = {}
295378
per_category = defaultdict(list)
296379
for pull in pulls_order:
297-
filename = f'{GHMETA}/issues/{pull//100}xx/{pull}.json'
298-
try:
299-
with open(filename, 'r') as f:
300-
data0 = json.load(f)
301-
except IOError as e:
302-
data0 = None
303-
304-
filename = f'{GHMETA}/issues/{pull//100}xx/{pull}-PR.json'
305-
try:
306-
with open(filename, 'r') as f:
307-
data1 = json.load(f)
308-
except IOError as e:
309-
data1 = {'title': '{Not found}', 'user': {'login':'unknown'}}
380+
repo_info = REPO_INFO[f'{pull.owner}/{pull.repo}']
381+
382+
# Find github metadata for PR, if available
383+
data0 = None
384+
data1 = {'title': '{Not found}', 'user': {'login':'unknown'}}
385+
if repo_info['ghmeta'] is not None:
386+
filename = f'{repo_info["ghmeta"]}/issues/{pull.pr//100}xx/{pull.pr}.json'
387+
try:
388+
with open(filename, 'r') as f:
389+
data0 = json.load(f)
390+
except IOError as e:
391+
pass
392+
393+
filename = f'{repo_info["ghmeta"]}/issues/{pull.pr//100}xx/{pull.pr}-PR.json'
394+
try:
395+
with open(filename, 'r') as f:
396+
data1 = json.load(f)
397+
except IOError as e:
398+
pass
310399

311400
message = data1['title']
312401
author = data1['user']['login']
@@ -325,7 +414,7 @@ def get_category(labels, message):
325414
message = message[0:-1]
326415

327416
# determine category and new message from message
328-
category, message = get_category(labels, message)
417+
category, message = get_category(repo_info, labels, message)
329418
data1['title'] = message
330419

331420
per_category[category].append((pull, message, author))
@@ -337,13 +426,13 @@ def get_category(labels, message):
337426
continue
338427
print('### %s' % category)
339428
for dd in per_category[category]:
340-
print('- #%i %s (%s)' % dd)
429+
print(f'- {dd[0]} {dd[1]} ({dd[2]})')
341430
print()
342431

343432
if per_category[UNCATEGORIZED]:
344433
print('### %s' % UNCATEGORIZED)
345434
for dd in per_category[UNCATEGORIZED]:
346-
print('- #%i %s (%s) (labels: %s)' % (dd+(pull_labels[dd[0]],)))
435+
print(f'- {dd[0]} {dd[1]} ({dd[2]}) (labels: {pull_labels[dd[0]]})')
347436
print()
348437

349438
print('### Orphan commits')
@@ -360,7 +449,7 @@ def get_category(labels, message):
360449
for pull in sorted(pulls.keys()):
361450
pd = pulls[pull]
362451
pulls_d.append(
363-
{'id': pd.id,
452+
{'id': str(pd.id),
364453
'merge': pd.merge,
365454
'commits': list(pd.commits),
366455
'meta': pull_meta[pd.id]})

0 commit comments

Comments
 (0)