11#!/usr/bin/env python3
22'''
33Script to parse git commit list, extract github issues to create a changelog in
4- text and json format.
4+ text and JSON format.
55
66Run this in the root directory of the repository.
77
2020likely needs to be extensively manually edited before ending up in the release
2121notes.
2222'''
23- # W.J. van der Laan 2017-2019 (license: MIT)
23+ # W.J. van der Laan 2017-2021
24+ # SPDX-License-Identifier: MIT
2425import subprocess
2526import re
2627import json
3132# == Global environment ==
3233GIT = os .getenv ('GIT' , 'git' )
3334GHMETA = os .getenv ('GHMETA' , '../bitcoin-gh-meta' )
35+ DEFAULT_REPO = os .getenv ('DEFAULT_REPO' , 'bitcoin/bitcoin' )
3436
3537# == Label to category mapping ==
3638# See: https://github.com/bitcoin/bitcoin/labels
130132 ('wallet' , 'Wallet' , True ),
131133]
132134
135+ # Per-repository information
136+ REPO_INFO = {
137+ 'bitcoin/bitcoin' : {
138+ 'label_mapping' : LABEL_MAPPING ,
139+ 'prefixes' : PREFIXES ,
140+ 'default_category' : UNCATEGORIZED ,
141+ 'ghmeta' : GHMETA ,
142+ },
143+ # For now, GUI repository pulls are automatically categorized into the GUI category.
144+ 'bitcoin-core/gui' : {
145+ 'label_mapping' : (),
146+ 'prefixes' : [],
147+ 'default_category' : 'GUI' ,
148+ 'ghmeta' : None ,
149+ },
150+ }
151+
152+ # == Utilities ==
153+
133154def remove_last_if_empty (l ):
134155 '''Remove empty last member of list'''
135156 if l [- 1 ]== b'' or l [- 1 ]== '' :
136157 return l [0 :- 1 ]
137158 else :
138159 return l
139160
161+ # Valid chars in github names
162+ VALIDNAMECHARS = '[0-9a-zA-Z\-_]'
163+ # For parsing owner/repo#id
164+ FQID_RE = re .compile ('^(' + VALIDNAMECHARS + '+)/(' + VALIDNAMECHARS + '+)#([0-9]+)$' )
165+ # For parsing non-qualified #id
166+ PR_RE = re .compile ('^#?([0-9]+)$' )
167+
168+ class FQId :
169+ '''Fully qualified PR id.'''
170+ def __init__ (self , owner : str , repo : str , pr : int ):
171+ self .owner = owner
172+ self .repo = repo
173+ self .pr = pr
174+
175+ @property
176+ def _key (self ):
177+ return (self .owner , self .repo , self .pr )
178+
179+ def __eq__ (self , o ):
180+ return self ._key == o ._key
181+
182+ def __lt__ (self , o ):
183+ return self ._key < o ._key
184+
185+ def __hash__ (self ):
186+ return hash (self ._key )
187+
188+ def __str__ (self ):
189+ return f'{ self .owner } /{ self .repo } #{ self .pr } '
190+
191+ def __repr__ (self ):
192+ return f'FQId({ repr (self .owner )} , { repr (self .repo )} , { repr (self .pr )} )'
193+
194+ @classmethod
195+ def parse (cls , pull , default_repo ):
196+ '''Return FQId from 'owner/repo#id' or '#id' or 'id' string.'''
197+ m = FQID_RE .match (pull )
198+ if m :
199+ return cls (m .group (1 ), m .group (2 ), int (m .group (3 )))
200+ m = PR_RE .match (pull )
201+ if m :
202+ (owner , repo ) = default_repo .split ('/' )
203+ return cls (owner , repo , int (m .group (1 )))
204+ raise ValueError (f'Cannot parse { pull } as PR specification.' )
205+
206+ def tests ():
207+ '''Quick internal sanity tests.'''
208+ assert (FQId .parse ('bitcoin/bitcoin#1234' , 'bitcoin/bitcoin' ) == FQId ('bitcoin' , 'bitcoin' , 1234 ))
209+ assert (FQId .parse ('bitcoin-core/gui#1235' , 'bitcoin/bitcoin' ) == FQId ('bitcoin-core' , 'gui' , 1235 ))
210+ assert (FQId .parse ('#1236' , 'bitcoin/bitcoin' ) == FQId ('bitcoin' , 'bitcoin' , 1236 ))
211+ assert (FQId .parse ('1237' , 'bitcoin/bitcoin' ) == FQId ('bitcoin' , 'bitcoin' , 1237 ))
212+ assert (str (FQId ('bitcoin' , 'bitcoin' , 1239 )) == 'bitcoin/bitcoin#1239' )
213+ assert (FQId ('bitcoin' , 'bitcoin' , 1239 ) < FQId ('bitcoin' , 'bitcoin' , 1240 ))
214+ assert (not (FQId ('bitcoin' , 'bitcoin' , 1240 ) < FQId ('bitcoin' , 'bitcoin' , 1239 )))
215+ assert (FQId ('bitcoin' , 'bitcoin' , 1240 ) < FQId ('bitcoin-core' , 'gui' , 1239 ))
216+ assert (not (FQId ('bitcoin-core' , 'gui' , 1239 ) < FQId ('bitcoin' , 'bitcoin' , 1240 )))
217+
218+ # == Main program ==
219+ tests ()
140220ref_from = sys .argv [1 ] # 'v0.10.0rc1'
141221ref_to = sys .argv [2 ] # 'master'
142222
@@ -148,8 +228,8 @@ def remove_last_if_empty(l):
148228 try :
149229 with open (exclude_file , 'r' ) as f :
150230 d = json .load (f )
151- exclude_pulls = set (p ['id' ] for p in d ['pulls' ])
152- print ('Excluding ' , exclude_pulls )
231+ exclude_pulls = set (FQId . parse ( str ( p ['id' ]), DEFAULT_REPO ) for p in d ['pulls' ])
232+ print (f 'Excluding { ", " . join ( str ( p ) for p in exclude_pulls ) } ' )
153233 print ()
154234 except IOError as e :
155235 print (f'Unable to read exclude file { exclude_file } ' , file = sys .stderr )
@@ -187,12 +267,13 @@ def parse_commit_message(msg):
187267 '''
188268 retval = CommitMetaData ()
189269 for line in msg .splitlines ():
190- m = re .match ('Github-Pull: #?(\d+)' , line , re .I )
191- if m :
192- retval .pull = int (m .group (1 ))
193- m = re .match ('Rebased-From: (.*)' , line , re .I )
194- if m :
195- retval .rebased_from = m .group (1 ).strip ().split ()
270+ if line .startswith ('Github-Pull:' ):
271+ param = line [12 :].strip ()
272+ if param .startswith ('#' ): # compensate for incorrect #bitcoin-core/gui#148
273+ param = param [1 :]
274+ retval .pull = FQId .parse (param , DEFAULT_REPO )
275+ if line .startswith ('Rebased-From:' ):
276+ retval .rebased_from = line [13 :].strip ().split ()
196277 if retval .pull is not None :
197278 return retval
198279 else :
@@ -202,20 +283,20 @@ def parse_commit_message(msg):
202283pulls = {}
203284PullData = namedtuple ('PullData' , ['id' , 'merge' , 'commits' , 'index' ])
204285orphans = set (commits )
205- pullreq_re = re .compile ('#([0-9]+) ' )
286+ MERGE_RE = re .compile ('Merge (.*?): ' )
206287for c in commit_data .values ():
207288 # is merge commit
208289 if len (c .parents )> 1 :
209290 assert (len (c .parents )== 2 )
210- match = pullreq_re . search (c .title )
291+ match = MERGE_RE . match (c .title )
211292 if match : # merges a pull request
212293 if c .sha in orphans :
213294 orphans .remove (c .sha )
214295 #print('removing ', c.sha)
215296 sub_commits = subprocess .check_output ([GIT , 'rev-list' , c .parents [0 ]+ '..' + c .parents [1 ]])
216297 sub_commits = sub_commits .decode ()
217298 sub_commits = set (sub_commits .rstrip ().splitlines ())
218- pull = int (match .group (1 ))
299+ pull = FQId . parse (match .group (1 ), DEFAULT_REPO )
219300
220301 # remove commits that are not in the global list
221302 sub_commits = sub_commits .intersection (commits )
@@ -239,12 +320,14 @@ def parse_commit_message(msg):
239320 if md :
240321 sub_pulls [md .pull ].append (cid )
241322
242- if not sub_pulls and 'backport' in c .message .lower ():
243- # TODO could check pull label instead, but we don't know that here yet
244- print ('#%i : Merge commit message contains \' backport\' but there are no sub-pulls' % ( pull ) )
323+ if not sub_pulls and 'backport' in c .title .lower ():
324+ # just information for manual checking
325+ print (f' { pull } : Merge PR title { repr ( c . title ) } contains \' backport\' but there are no sub-pulls' )
245326
246327 for (sub_pull , sub_pull_commits ) in sub_pulls .items ():
247328 pulls [sub_pull ] = PullData (sub_pull , sub_pull_commits [0 ], sub_pull_commits , index )
329+ else :
330+ print (f'{ c .sha } : Merge commit does not merge a PR: { c .title } ' )
248331
249332# Extract remaining pull numbers from orphans, if they're backports
250333for o in set (orphans ):
@@ -261,26 +344,26 @@ def parse_commit_message(msg):
261344pulls_order = [p .id for p in pulls_order ]
262345# pulls_order = sorted(pulls.keys())
263346
264- def guess_category_from_labels (labels ):
347+ def guess_category_from_labels (repo_info , labels ):
265348 '''
266349 Guess category for a PR from github labels.
267350 '''
268351 labels = [l .lower () for l in labels ]
269- for (label_list , category ) in LABEL_MAPPING :
352+ for (label_list , category ) in repo_info [ 'label_mapping' ] :
270353 for l in labels :
271354 if l in label_list :
272355 return category
273- return UNCATEGORIZED
356+ return repo_info [ 'default_category' ]
274357
275- def get_category (labels , message ):
358+ def get_category (repo_info , labels , message ):
276359 '''
277- Guess category for a PR from labels and message.
360+ Guess category for a PR from repository, labels and message prefixes .
278361 Strip category from message.
279362 '''
280- category = guess_category_from_labels (labels )
363+ category = guess_category_from_labels (repo_info , labels )
281364 message = message .strip ()
282365
283- for (prefix , p_category , do_strip ) in PREFIXES :
366+ for (prefix , p_category , do_strip ) in repo_info [ 'prefixes' ] :
284367 for variant in [('[' + prefix + ']:' ), ('[' + prefix + ']' ), (prefix + ':' )]:
285368 if message .lower ().startswith (variant ):
286369 category = p_category
@@ -294,19 +377,25 @@ def get_category(labels, message):
294377pull_labels = {}
295378per_category = defaultdict (list )
296379for pull in pulls_order :
297- filename = f'{ GHMETA } /issues/{ pull // 100 } xx/{ pull } .json'
298- try :
299- with open (filename , 'r' ) as f :
300- data0 = json .load (f )
301- except IOError as e :
302- data0 = None
303-
304- filename = f'{ GHMETA } /issues/{ pull // 100 } xx/{ pull } -PR.json'
305- try :
306- with open (filename , 'r' ) as f :
307- data1 = json .load (f )
308- except IOError as e :
309- data1 = {'title' : '{Not found}' , 'user' : {'login' :'unknown' }}
380+ repo_info = REPO_INFO [f'{ pull .owner } /{ pull .repo } ' ]
381+
382+ # Find github metadata for PR, if available
383+ data0 = None
384+ data1 = {'title' : '{Not found}' , 'user' : {'login' :'unknown' }}
385+ if repo_info ['ghmeta' ] is not None :
386+ filename = f'{ repo_info ["ghmeta" ]} /issues/{ pull .pr // 100 } xx/{ pull .pr } .json'
387+ try :
388+ with open (filename , 'r' ) as f :
389+ data0 = json .load (f )
390+ except IOError as e :
391+ pass
392+
393+ filename = f'{ repo_info ["ghmeta" ]} /issues/{ pull .pr // 100 } xx/{ pull .pr } -PR.json'
394+ try :
395+ with open (filename , 'r' ) as f :
396+ data1 = json .load (f )
397+ except IOError as e :
398+ pass
310399
311400 message = data1 ['title' ]
312401 author = data1 ['user' ]['login' ]
@@ -325,7 +414,7 @@ def get_category(labels, message):
325414 message = message [0 :- 1 ]
326415
327416 # determine category and new message from message
328- category , message = get_category (labels , message )
417+ category , message = get_category (repo_info , labels , message )
329418 data1 ['title' ] = message
330419
331420 per_category [category ].append ((pull , message , author ))
@@ -337,13 +426,13 @@ def get_category(labels, message):
337426 continue
338427 print ('### %s' % category )
339428 for dd in per_category [category ]:
340- print ('- #%i %s (%s)' % dd )
429+ print (f '- { dd [ 0 ] } { dd [ 1 ] } ( { dd [ 2 ] } )' )
341430 print ()
342431
343432if per_category [UNCATEGORIZED ]:
344433 print ('### %s' % UNCATEGORIZED )
345434 for dd in per_category [UNCATEGORIZED ]:
346- print ('- #%i %s (%s ) (labels: %s)' % ( dd + ( pull_labels [dd [0 ]],)) )
435+ print (f '- { dd [ 0 ] } { dd [ 1 ] } ( { dd [ 2 ] } ) (labels: { pull_labels [dd [0 ]]} )' )
347436 print ()
348437
349438print ('### Orphan commits' )
@@ -360,7 +449,7 @@ def get_category(labels, message):
360449for pull in sorted (pulls .keys ()):
361450 pd = pulls [pull ]
362451 pulls_d .append (
363- {'id' : pd .id ,
452+ {'id' : str ( pd .id ) ,
364453 'merge' : pd .merge ,
365454 'commits' : list (pd .commits ),
366455 'meta' : pull_meta [pd .id ]})
0 commit comments