@@ -7,22 +7,23 @@ use gix_object::{
7
7
bstr:: { BStr , BString } ,
8
8
FindExt ,
9
9
} ;
10
+ use gix_traverse:: commit:: find as find_commit;
11
+ use smallvec:: SmallVec ;
10
12
use std:: num:: NonZeroU32 ;
11
13
use std:: ops:: Range ;
12
14
13
15
/// Produce a list of consecutive [`BlameEntry`] instances to indicate in which commits the ranges of the file
14
- /// at `traverse[0] :<file_path>` originated in.
16
+ /// at `suspect :<file_path>` originated in.
15
17
///
16
18
/// ## Paramters
17
19
///
18
20
/// * `odb`
19
21
/// - Access to database objects, also for used for diffing.
20
22
/// - Should have an object cache for good diff performance.
21
- /// * `traverse`
22
- /// - The list of commits from the most recent to prior ones, following all parents sorted
23
- /// by time.
24
- /// - It's paramount that older commits are returned after newer ones.
25
- /// - The first commit returned here is the first eligible commit to be responsible for parts of `file_path`.
23
+ /// * `suspect`
24
+ /// - The first commit to be responsible for parts of `file_path`.
25
+ /// * `cache`
26
+ /// - Optionally, the commitgraph cache.
26
27
/// * `file_path`
27
28
/// - A *slash-separated* worktree-relative path to the file to blame.
28
29
/// * `range`
@@ -60,20 +61,14 @@ use std::ops::Range;
60
61
// <---><----------><-------><-----><------->
61
62
// <---><---><-----><-------><-----><------->
62
63
// <---><---><-----><-------><-----><-><-><->
63
- pub fn file < E > (
64
+ pub fn file (
64
65
odb : impl gix_object:: Find + gix_object:: FindHeader ,
65
- traverse : impl IntoIterator < Item = Result < gix_traverse:: commit:: Info , E > > ,
66
+ suspect : ObjectId ,
67
+ cache : Option < gix_commitgraph:: Graph > ,
66
68
resource_cache : & mut gix_diff:: blob:: Platform ,
67
69
file_path : & BStr ,
68
70
range : Option < Range < u32 > > ,
69
- ) -> Result < Outcome , Error >
70
- where
71
- E : Into < Box < dyn std:: error:: Error + Send + Sync + ' static > > ,
72
- {
73
- let mut traverse = traverse. into_iter ( ) . peekable ( ) ;
74
- let Some ( Ok ( suspect) ) = traverse. peek ( ) . map ( |res| res. as_ref ( ) . map ( |item| item. id ) ) else {
75
- return Err ( Error :: EmptyTraversal ) ;
76
- } ;
71
+ ) -> Result < Outcome , Error > {
77
72
let _span = gix_trace:: coarse!( "gix_blame::file()" , ?file_path, ?suspect) ;
78
73
79
74
let mut stats = Statistics :: default ( ) ;
84
79
commit_id : suspect,
85
80
} ) ?;
86
81
let blamed_file_blob = odb. find_blob ( & blamed_file_entry_id, & mut buf) ?. data . to_vec ( ) ;
87
- let num_lines_in_blamed = {
88
- let mut interner = gix_diff:: blob:: intern:: Interner :: new ( blamed_file_blob. len ( ) / 100 ) ;
89
- tokens_for_diffing ( & blamed_file_blob)
90
- . tokenize ( )
91
- . map ( |token| interner. intern ( token) )
92
- . count ( )
93
- } as u32 ;
82
+ let num_lines_in_blamed = tokens_for_diffing ( & blamed_file_blob) . tokenize ( ) . count ( ) as u32 ;
94
83
95
84
// Binary or otherwise empty?
96
85
if num_lines_in_blamed == 0 {
@@ -103,30 +92,40 @@ where
103
92
suspects: [ ( suspect, range_in_blamed_file) ] . into( ) ,
104
93
} ] ;
105
94
95
+ let ( mut buf, mut buf2) = ( Vec :: new ( ) , Vec :: new ( ) ) ;
96
+ let commit = find_commit ( cache. as_ref ( ) , & odb, & suspect, & mut buf) ?;
97
+ let mut queue: gix_revwalk:: PriorityQueue < CommitTime , ObjectId > = gix_revwalk:: PriorityQueue :: new ( ) ;
98
+ queue. insert ( commit_time ( commit) ?, suspect) ;
99
+
106
100
let mut out = Vec :: new ( ) ;
107
101
let mut diff_state = gix_diff:: tree:: State :: default ( ) ;
108
102
let mut previous_entry: Option < ( ObjectId , ObjectId ) > = None ;
109
- ' outer: while let Some ( item) = traverse. next ( ) {
103
+ ' outer: while let Some ( suspect) = queue. pop_value ( ) {
104
+ stats. commits_traversed += 1 ;
110
105
if hunks_to_blame. is_empty ( ) {
111
106
break ;
112
107
}
113
- let commit = item. map_err ( |err| Error :: Traverse ( err. into ( ) ) ) ?;
114
- let suspect = commit. id ;
115
- stats. commits_traversed += 1 ;
116
108
117
- let parent_ids = commit. parent_ids ;
109
+ let is_still_suspect = hunks_to_blame. iter ( ) . any ( |hunk| hunk. suspects . contains_key ( & suspect) ) ;
110
+ if !is_still_suspect {
111
+ // There are no `UnblamedHunk`s associated with this `suspect`, so we can continue with
112
+ // the next one.
113
+ continue ' outer;
114
+ }
115
+
116
+ let commit = find_commit ( cache. as_ref ( ) , & odb, & suspect, & mut buf) ?;
117
+ let parent_ids: ParentIds = collect_parents ( commit, & odb, cache. as_ref ( ) , & mut buf2) ?;
118
118
if parent_ids. is_empty ( ) {
119
- if traverse . peek ( ) . is_none ( ) {
120
- // I’m not entirely sure if this is correct yet. `suspect`, at this point, is the `id` of
121
- // the last `item` that was yielded by `traverse `, so it makes sense to assign the
122
- // remaining lines to it, even though we don’t explicitly check whether that is true
123
- // here. We could perhaps use diff-tree-to-tree to compare `suspect`
124
- // against an empty tree to validate this assumption.
119
+ if queue . is_empty ( ) {
120
+ // I’m not entirely sure if this is correct yet. `suspect`, at this point, is the
121
+ // `id` of the last `item` that was yielded by `queue `, so it makes sense to assign
122
+ // the remaining lines to it, even though we don’t explicitly check whether that is
123
+ // true here. We could perhaps use diff-tree-to-tree to compare `suspect` against
124
+ // an empty tree to validate this assumption.
125
125
if unblamed_to_out_is_done ( & mut hunks_to_blame, & mut out, suspect) {
126
126
break ' outer;
127
127
}
128
128
}
129
-
130
129
// There is more, keep looking.
131
130
continue ;
132
131
}
@@ -143,7 +142,41 @@ where
143
142
continue ;
144
143
} ;
145
144
146
- for ( pid, parent_id) in parent_ids. iter ( ) . enumerate ( ) {
145
+ // This block asserts that, for every `UnblamedHunk`, all lines in the *Blamed File* are
146
+ // identical to the corresponding lines in the *Source File*.
147
+ #[ cfg( debug_assertions) ]
148
+ {
149
+ let source_blob = odb. find_blob ( & entry_id, & mut buf) ?. data . to_vec ( ) ;
150
+ let mut source_interner = gix_diff:: blob:: intern:: Interner :: new ( source_blob. len ( ) / 100 ) ;
151
+ let source_lines_as_tokens: Vec < _ > = tokens_for_diffing ( & source_blob)
152
+ . tokenize ( )
153
+ . map ( |token| source_interner. intern ( token) )
154
+ . collect ( ) ;
155
+
156
+ let mut blamed_interner = gix_diff:: blob:: intern:: Interner :: new ( blamed_file_blob. len ( ) / 100 ) ;
157
+ let blamed_lines_as_tokens: Vec < _ > = tokens_for_diffing ( & blamed_file_blob)
158
+ . tokenize ( )
159
+ . map ( |token| blamed_interner. intern ( token) )
160
+ . collect ( ) ;
161
+
162
+ for hunk in hunks_to_blame. iter ( ) {
163
+ if let Some ( range_in_suspect) = hunk. suspects . get ( & suspect) {
164
+ let range_in_blamed_file = hunk. range_in_blamed_file . clone ( ) ;
165
+
166
+ for ( blamed_line_number, source_line_number) in range_in_blamed_file. zip ( range_in_suspect. clone ( ) ) {
167
+ let source_token = source_lines_as_tokens[ source_line_number as usize ] ;
168
+ let blame_token = blamed_lines_as_tokens[ blamed_line_number as usize ] ;
169
+
170
+ let source_line = BString :: new ( source_interner[ source_token] . into ( ) ) ;
171
+ let blamed_line = BString :: new ( blamed_interner[ blame_token] . into ( ) ) ;
172
+
173
+ assert_eq ! ( source_line, blamed_line) ;
174
+ }
175
+ }
176
+ }
177
+ }
178
+
179
+ for ( pid, ( parent_id, parent_commit_time) ) in parent_ids. iter ( ) . enumerate ( ) {
147
180
if let Some ( parent_entry_id) =
148
181
find_path_entry_in_commit ( & odb, parent_id, file_path, & mut buf, & mut buf2, & mut stats) ?
149
182
{
@@ -153,17 +186,19 @@ where
153
186
}
154
187
if no_change_in_entry {
155
188
pass_blame_from_to ( suspect, * parent_id, & mut hunks_to_blame) ;
189
+ queue. insert ( * parent_commit_time, * parent_id) ;
156
190
continue ' outer;
157
191
}
158
192
}
159
193
}
160
194
161
195
let more_than_one_parent = parent_ids. len ( ) > 1 ;
162
- for parent_id in parent_ids {
196
+ for ( parent_id, parent_commit_time) in parent_ids {
197
+ queue. insert ( parent_commit_time, parent_id) ;
163
198
let changes_for_file_path = tree_diff_at_file_path (
164
199
& odb,
165
200
file_path,
166
- commit . id ,
201
+ suspect ,
167
202
parent_id,
168
203
& mut stats,
169
204
& mut diff_state,
@@ -588,8 +623,51 @@ fn find_path_entry_in_commit(
588
623
Ok ( res. map ( |e| e. oid ) )
589
624
}
590
625
591
- /// Return an iterator over tokens for use in diffing. These usually lines, but iit's important to unify them
592
- /// so the later access shows the right thing.
626
+ type CommitTime = i64 ;
627
+
628
+ fn commit_time ( commit : gix_traverse:: commit:: Either < ' _ , ' _ > ) -> Result < CommitTime , gix_object:: decode:: Error > {
629
+ match commit {
630
+ gix_traverse:: commit:: Either :: CommitRefIter ( commit_ref_iter) => {
631
+ commit_ref_iter. committer ( ) . map ( |c| c. time . seconds )
632
+ }
633
+ gix_traverse:: commit:: Either :: CachedCommit ( commit) => Ok ( commit. committer_timestamp ( ) as i64 ) ,
634
+ }
635
+ }
636
+
637
+ type ParentIds = SmallVec < [ ( gix_hash:: ObjectId , i64 ) ; 2 ] > ;
638
+
639
+ fn collect_parents (
640
+ commit : gix_traverse:: commit:: Either < ' _ , ' _ > ,
641
+ odb : & impl gix_object:: Find ,
642
+ cache : Option < & gix_commitgraph:: Graph > ,
643
+ buf : & mut Vec < u8 > ,
644
+ ) -> Result < ParentIds , Error > {
645
+ let mut parent_ids: ParentIds = Default :: default ( ) ;
646
+ match commit {
647
+ gix_traverse:: commit:: Either :: CachedCommit ( commit) => {
648
+ let cache = cache
649
+ . as_ref ( )
650
+ . expect ( "find returned a cached commit, so we expect cache to be present" ) ;
651
+ for parent_pos in commit. iter_parents ( ) {
652
+ let parent = cache. commit_at ( parent_pos?) ;
653
+ parent_ids. push ( ( parent. id ( ) . to_owned ( ) , parent. committer_timestamp ( ) as i64 ) ) ;
654
+ }
655
+ }
656
+ gix_traverse:: commit:: Either :: CommitRefIter ( commit_ref_iter) => {
657
+ for id in commit_ref_iter. parent_ids ( ) {
658
+ let parent = odb. find_commit_iter ( id. as_ref ( ) , buf) . ok ( ) ;
659
+ let parent_commit_time = parent
660
+ . and_then ( |parent| parent. committer ( ) . ok ( ) . map ( |committer| committer. time . seconds ) )
661
+ . unwrap_or_default ( ) ;
662
+ parent_ids. push ( ( id, parent_commit_time) ) ;
663
+ }
664
+ }
665
+ } ;
666
+ Ok ( parent_ids)
667
+ }
668
+
669
+ /// Return an iterator over tokens for use in diffing. These are usually lines, but it's important
670
+ /// to unify them so the later access shows the right thing.
593
671
pub ( crate ) fn tokens_for_diffing ( data : & [ u8 ] ) -> impl TokenSource < Token = & [ u8 ] > {
594
672
gix_diff:: blob:: sources:: byte_lines_with_terminator ( data)
595
673
}
0 commit comments