@@ -4,7 +4,7 @@ use git2::{BlameOptions, Diff, DiffFindOptions, DiffOptions, FileMode, Oid, Patc
44use indicatif:: { ProgressBar , ProgressStyle } ;
55use log:: { debug, info, warn} ;
66use rayon:: prelude:: * ;
7- use std:: { cmp, collections:: HashMap } ;
7+ use std:: { cell :: RefCell , cmp, collections:: HashMap } ;
88use structopt:: StructOpt ;
99use thread_local:: ThreadLocal ;
1010
@@ -31,9 +31,9 @@ struct Opt {
3131 #[ structopt( long, default_value = "1" ) ]
3232 context : u32 ,
3333
34- /// Don't look further back than this when blaming files
34+ /// Try not to look further back than this commit when blaming files
3535 #[ structopt( long) ]
36- first_commit : Option < String > ,
36+ stop_at : Option < String > ,
3737
3838 // Maximum number of threads. 0 is auto.
3939 #[ structopt( long, short = "j" , default_value = "0" ) ]
@@ -81,30 +81,28 @@ fn main() -> Result<()> {
8181 . context ( "unable to find compare" ) ?
8282 . id ( ) ;
8383 info ! ( "compare: {}" , compare) ;
84- let first_commit = if let Some ( first_commit) = opt. first_commit {
85- let commit = repo
86- . revparse_single ( & first_commit)
87- . context ( "unable to find first_commit" ) ?
84+ let merge_base = repo
85+ . merge_base ( base, compare)
86+ . context ( "unable to find merge base" ) ?;
87+ info ! ( "merge base: {:?}" , merge_base) ;
88+ let stop_at = if let Some ( stop_at) = opt. stop_at {
89+ let mut commit = repo
90+ . revparse_single ( & stop_at)
91+ . context ( "unable to find stop_at commit" ) ?
8892 . id ( ) ;
89- info ! ( "first commit: {}" , commit) ;
93+ let base = repo. merge_base ( merge_base, commit) ?;
94+ if base != commit {
95+ warn ! (
96+ "stop_at ({}) not an ancestor of {} and {}. using {} instead." ,
97+ commit, merge_base, compare, base
98+ ) ;
99+ }
100+ commit = base;
101+ info ! ( "stopping at commit: {}" , commit) ;
90102 Some ( commit)
91103 } else {
92104 None
93105 } ;
94- let merge_base = repo
95- . merge_base ( base, compare)
96- . context ( "unable to find merge base" ) ?;
97- if let Some ( commit) = first_commit {
98- if let Ok ( base) = repo. merge_base ( merge_base, commit) {
99- if base != commit {
100- warn ! (
101- "first_commit ({}) not an ancestor of {} and {}" ,
102- commit, base, compare
103- ) ;
104- }
105- }
106- }
107- info ! ( "merge base: {:?}" , merge_base) ;
108106 let diff =
109107 get_diff ( & repo, merge_base, compare, opt. context ) . context ( "error calculating diff" ) ?;
110108 progress. tick ( ) ;
@@ -116,6 +114,8 @@ fn main() -> Result<()> {
116114 type ModifiedMap = HashMap < ( Option < String > , Option < String > ) , usize > ;
117115 let repo_tls: ThreadLocal < Repository > = ThreadLocal :: new ( ) ;
118116 let diff_tls: ThreadLocal < Diff > = ThreadLocal :: new ( ) ;
117+ type MergeBaseMap = HashMap < ( Oid , Oid ) , Option < Oid > > ;
118+ let merge_base_tls: ThreadLocal < RefCell < MergeBaseMap > > = ThreadLocal :: new ( ) ;
119119 let modified = ( 0 ..num_deltas) . into_par_iter ( ) . map ( |deltaidx| -> Result < ModifiedMap > {
120120 let mut modified: ModifiedMap = HashMap :: new ( ) ;
121121 let repo = repo_tls. get_or_try ( get_repo) ?;
@@ -135,84 +135,97 @@ fn main() -> Result<()> {
135135 } else if delta. old_file ( ) . is_binary ( ) || delta. new_file ( ) . is_binary ( ) {
136136 debug ! ( "skipping blame of {:?} because it is binary" , old_path) ;
137137 } else {
138- if !delta. new_file ( ) . exists ( ) {
139- info ! ( "processing {:?} -> [deleted]" , old_path) ;
140- } else if let Some ( new_path) = delta. new_file ( ) . path ( ) {
141- if old_path == new_path {
142- info ! ( "processing {:?}" , old_path) ;
143- } else {
144- info ! ( "processing {:?} -> {:?}" , old_path, new_path) ;
145- }
138+ if !delta. new_file ( ) . exists ( ) {
139+ info ! ( "processing {:?} -> [deleted]" , old_path) ;
140+ } else if let Some ( new_path) = delta. new_file ( ) . path ( ) {
141+ if old_path == new_path {
142+ info ! ( "processing {:?}" , old_path) ;
146143 } else {
147- debug ! ( "new new_file for {:?}" , old_path) ;
148- }
149- let mut min_line = None ;
150- let mut max_line = None ;
151- for hunkidx in 0 ..patch. num_hunks ( ) {
152- let ( hunk, _) = patch. hunk ( hunkidx) ?;
153- min_line = Some ( cmp:: min (
154- min_line. unwrap_or ( std:: u32:: MAX ) ,
155- hunk. old_start ( ) ,
156- ) ) ;
157- max_line = Some ( cmp:: max (
158- max_line. unwrap_or ( std:: u32:: MIN ) ,
159- hunk. old_start ( ) + hunk. old_lines ( ) ,
160- ) ) ;
161- }
162- let mut blame_options = BlameOptions :: new ( ) ;
163- blame_options
164- . newest_commit ( merge_base)
165- . use_mailmap ( true )
166- // not sure what this one does, but it sounds useful
167- //.track_copies_same_commit_moves(true);
168- ;
169- if let ( Some ( min) , Some ( max) ) = ( min_line, max_line) {
170- blame_options. min_line ( min as usize ) . max_line ( max as usize ) ;
171- }
172- if let Some ( commit) = first_commit {
173- blame_options. oldest_commit ( commit) ;
144+ info ! ( "processing {:?} -> {:?}" , old_path, new_path) ;
174145 }
175- match repo. blame_file ( old_path, Some ( & mut blame_options) ) {
176- Ok ( blame) => {
177- for hunkidx in 0 ..patch. num_hunks ( ) {
178- let ( hunk, _) = patch. hunk ( hunkidx) ?;
179- for line in
180- hunk. old_start ( ) ..( hunk. old_start ( ) + hunk. old_lines ( ) )
181- {
182- if let Some ( oldhunk) = blame. get_line ( line as usize ) {
183- let sign = oldhunk. final_signature ( ) ;
184- // !!! hack to work around bug in libgit2 (?)
185- struct HackSignature {
186- raw : * const std:: ffi:: c_void ,
187- _owned : bool ,
188- }
189- let signptr: & HackSignature =
190- unsafe { & * ( & sign as * const git2:: Signature as * const HackSignature ) } ;
191- if signptr. raw . is_null ( ) {
192- warn ! ( "bad signature found in file: {:?}. might be an author without an email or something (bug in libgit2)" , old_path) ;
193- } else {
194- let author = (
195- sign. name ( ) . map ( String :: from) ,
196- sign. email ( ) . map ( String :: from) ,
197- ) ;
198- modified
199- . entry ( author)
200- . and_modify ( |e| * e += 1 )
201- . or_insert ( 1 ) ;
146+ } else {
147+ debug ! ( "new new_file for {:?}" , old_path) ;
148+ }
149+ let mut min_line = None ;
150+ let mut max_line = None ;
151+ for hunkidx in 0 ..patch. num_hunks ( ) {
152+ let ( hunk, _) = patch. hunk ( hunkidx) ?;
153+ min_line = Some ( cmp:: min (
154+ min_line. unwrap_or ( std:: u32:: MAX ) ,
155+ hunk. old_start ( ) ,
156+ ) ) ;
157+ max_line = Some ( cmp:: max (
158+ max_line. unwrap_or ( std:: u32:: MIN ) ,
159+ hunk. old_start ( ) + hunk. old_lines ( ) ,
160+ ) ) ;
161+ }
162+ let mut blame_options = BlameOptions :: new ( ) ;
163+ blame_options
164+ . newest_commit ( merge_base)
165+ . use_mailmap ( true )
166+ // not sure what this one does, but it sounds useful
167+ //.track_copies_same_commit_moves(true);
168+ ;
169+ // TODO blame each separate continuous chunk of changed lines instead?
170+ if let ( Some ( min) , Some ( max) ) = ( min_line, max_line) {
171+ blame_options. min_line ( min as usize ) . max_line ( max as usize ) ;
172+ }
173+ if let Some ( commit) = stop_at {
174+ blame_options. oldest_commit ( commit) ;
175+ }
176+ match repo. blame_file ( old_path, Some ( & mut blame_options) ) {
177+ Ok ( blame) => {
178+ for hunkidx in 0 ..patch. num_hunks ( ) {
179+ let ( hunk, _) = patch. hunk ( hunkidx) ?;
180+ for line in
181+ hunk. old_start ( ) ..( hunk. old_start ( ) + hunk. old_lines ( ) )
182+ {
183+ if let Some ( oldhunk) = blame. get_line ( line as usize ) {
184+ if let Some ( commit) = stop_at {
185+ let key = ( commit, oldhunk. final_commit_id ( ) ) ;
186+ let mut map = merge_base_tls. get_or_default ( ) . borrow_mut ( ) ;
187+ let base = map. entry ( key) . or_insert_with ( || repo. merge_base ( key. 0 , key. 1 ) . ok ( ) ) ;
188+ if let Some ( b) = base {
189+ if * b != commit {
190+ // this seems to happen a lot. oldest_commit on blame options doesn't seem to do what I expected :(
191+ continue ;
192+ }
202193 }
194+ }
195+ let sign = oldhunk. final_signature ( ) ;
196+ // !!! hack to work around bug in libgit2 (?)
197+ struct HackSignature {
198+ raw : * const std:: ffi:: c_void ,
199+ _owned : bool ,
200+ }
201+ let signptr: & HackSignature =
202+ unsafe { & * ( & sign as * const git2:: Signature as * const HackSignature ) } ;
203+ if signptr. raw . is_null ( ) {
204+ warn ! ( "bad signature found in file: {:?}. might be an author without an email or something (bug in libgit2)" , old_path) ;
203205 } else {
204- debug ! (
205- "line {} not found in {:?}@{}" ,
206- line, old_path, merge_base
206+ debug ! ( "path: {:?} commit: {} line: {}" , old_path, oldhunk. final_commit_id( ) , line) ;
207+ let author = (
208+ sign. name ( ) . map ( String :: from) ,
209+ sign. email ( ) . map ( String :: from) ,
207210 ) ;
211+ modified
212+ . entry ( author)
213+ . and_modify ( |e| * e += 1 )
214+ . or_insert ( 1 ) ;
208215 }
216+ } else {
217+ debug ! (
218+ "line {} not found in {:?}@{}" ,
219+ line, old_path, merge_base
220+ ) ;
209221 }
210222 }
211223 }
212- Err ( e) => {
213- debug ! ( "error blaming {:?}: {}" , old_path, e) ;
214- }
215224 }
225+ Err ( e) => {
226+ debug ! ( "error blaming {:?}: {}" , old_path, e) ;
227+ }
228+ }
216229 }
217230 } else {
218231 debug ! (
@@ -237,6 +250,7 @@ fn main() -> Result<()> {
237250 }
238251 Ok ( acc)
239252 } ) ?;
253+ drop ( merge_base_tls) ;
240254 drop ( diff_tls) ;
241255 drop ( repo_tls) ;
242256 let mut modified_sorted = modified. into_iter ( ) . collect :: < Vec < _ > > ( ) ;
0 commit comments