Skip to content

Commit 0cad7c6

Browse files
author
Jaakko Kangasharju
committed
Speed up initial commit processing
There is no reason to run blame on files in the initial commit, since all the lines there are known to have the commit author as their author. Rewrite the initial commit processing to only read the files, which speeds up the first commit processing substantially.
1 parent 46017b8 commit 0cad7c6

File tree

1 file changed

+15
-8
lines changed

1 file changed

+15
-8
lines changed

githammer/hammer.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
import datetime
1616
import os
17+
import io
1718
import re
1819
from operator import itemgetter
1920

@@ -115,26 +116,32 @@ def _build_commit_map(self, session):
115116
if db_detail.test_count:
116117
self._shas_to_commits[db_detail.commit_id].test_counts[db_detail.author] = db_detail.test_count
117118

119+
def _process_lines_into_line_counts(self, repository, commit, path, lines, line_counts, test_counts):
120+
is_test_file = _is_test_file(repository.configuration, path)
121+
author = self._names_to_authors[_author_line(commit)]
122+
line_counts[author] = line_counts.get(author, 0) + len(lines)
123+
if is_test_file:
124+
for line in lines:
125+
if re.search(repository.test_line_regex, line):
126+
test_counts[author] = test_counts.get(author, 0) + 1
127+
118128
def _blame_blob_into_line_counts(self, repository, commit_to_blame, path, line_counts, test_counts):
119129
if not _is_source_file(repository.configuration, path):
120130
return
121-
is_test_file = _is_test_file(repository.configuration, path)
122131
blame = repository.git_repository.blame(commit_to_blame, path, w=True)
123132
for commit, lines in blame:
124-
author = self._names_to_authors[_author_line(commit)]
125-
line_counts[author] = line_counts.get(author, 0) + len(lines)
126-
if is_test_file:
127-
for line in lines:
128-
if re.search(repository.test_line_regex, line):
129-
test_counts[author] = test_counts.get(author, 0) + 1
133+
self._process_lines_into_line_counts(repository, commit, path, lines, line_counts, test_counts)
130134

131135
def _make_full_commit_stats(self, repository, commit):
132136
line_counts = {}
133137
test_counts = {}
134138
for git_object in commit.tree.traverse(visit_once=True):
135139
if git_object.type != 'blob':
136140
continue
137-
self._blame_blob_into_line_counts(repository, commit, git_object.path, line_counts, test_counts)
141+
if not _is_source_file(repository.configuration, git_object.path):
142+
continue
143+
lines = [line.decode('utf-8') for line in io.BytesIO(git_object.data_stream.read()).readlines()]
144+
self._process_lines_into_line_counts(repository, commit, git_object.path, lines, line_counts, test_counts)
138145
return line_counts, test_counts
139146

140147
def _make_diffed_commit_stats(self, repository, commit, previous_commit, previous_commit_line_counts,

0 commit comments

Comments
 (0)