Skip to content

Add incremental blame support #409

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 14, 2016
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions git/compat.py
Original file line number Diff line number Diff line change
@@ -8,6 +8,7 @@
# flake8: noqa

import sys
import six

from gitdb.utils.compat import (
PY3,
@@ -46,6 +47,20 @@ def mviter(d):
def mviter(d):
return d.itervalues()

PRE_PY27 = sys.version_info < (2, 7)


def safe_decode(s):
"""Safely decodes a binary string to unicode"""
if isinstance(s, six.text_type):
return s
elif isinstance(s, six.binary_type):
if PRE_PY27:
return s.decode(defenc) # we're screwed
else:
return s.decode(defenc, errors='replace')
raise TypeError('Expected bytes or text, but got %r' % (s,))


def with_metaclass(meta, *bases):
"""copied from https://github.com/Byron/bcore/blob/master/src/python/butility/future.py#L15"""
68 changes: 65 additions & 3 deletions git/repo/base.py
Original file line number Diff line number Diff line change
@@ -52,12 +52,14 @@
from git.compat import (
text_type,
defenc,
PY3
PY3,
safe_decode,
)

import os
import sys
import re
from six.moves import range

DefaultDBType = GitCmdObjectDB
if sys.version_info[:2] < (2, 5): # python 2.4 compatiblity
@@ -655,7 +657,64 @@ def active_branch(self):
:return: Head to the active branch"""
return self.head.reference

def blame(self, rev, file):
def blame_incremental(self, rev, file, **kwargs):
"""Iterator for blame information for the given file at the given revision.
Unlike .blame(), this does not return the actual file's contents, only
a stream of (commit, range) tuples.
:parm rev: revision specifier, see git-rev-parse for viable options.
:return: lazy iterator of (git.Commit, range) tuples, where the commit
indicates the commit to blame for the line, and range
indicates a span of line numbers in the resulting file.
If you combine all line number ranges outputted by this command, you
should get a continuous range spanning all line numbers in the file.
"""
data = self.git.blame(rev, '--', file, p=True, incremental=True, stdout_as_string=False, **kwargs)
commits = dict()

stream = iter(data.splitlines())
while True:
line = next(stream) # when exhausted, casues a StopIteration, terminating this function

hexsha, _, lineno, num_lines = line.split()
lineno = int(lineno)
num_lines = int(num_lines)
if hexsha not in commits:
# Now read the next few lines and build up a dict of properties
# for this commit
props = dict()
while True:
line = next(stream)
if line == b'boundary':
# "boundary" indicates a root commit and occurs
# instead of the "previous" tag
continue

tag, value = line.split(b' ', 1)
props[tag] = value
if tag == b'filename':
# "filename" formally terminates the entry for --incremental
break

c = Commit(self, hex_to_bin(hexsha),
author=Actor(safe_decode(props[b'author']),
safe_decode(props[b'author-mail'].lstrip(b'<').rstrip(b'>'))),
authored_date=int(props[b'author-time']),
committer=Actor(safe_decode(props[b'committer']),
safe_decode(props[b'committer-mail'].lstrip(b'<').rstrip(b'>'))),
committed_date=int(props[b'committer-time']),
message=safe_decode(props[b'summary']))
commits[hexsha] = c
else:
# Discard the next line (it's a filename end tag)
line = next(stream)
assert line.startswith(b'filename'), 'Unexpected git blame output'

yield commits[hexsha], range(lineno, lineno + num_lines)

def blame(self, rev, file, incremental=False, **kwargs):
"""The blame information for the given file at the given revision.
:parm rev: revision specifier, see git-rev-parse for viable options.
@@ -664,7 +723,10 @@ def blame(self, rev, file):
A list of tuples associating a Commit object with a list of lines that
changed within the given commit. The Commit objects will be given in order
of appearance."""
data = self.git.blame(rev, '--', file, p=True, stdout_as_string=False)
if incremental:
return self.blame_incremental(rev, file, **kwargs)

data = self.git.blame(rev, '--', file, p=True, stdout_as_string=False, **kwargs)
commits = dict()
blames = list()
info = None
30 changes: 30 additions & 0 deletions git/test/fixtures/blame_incremental
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
82b8902e033430000481eb355733cd7065342037 2 2 1
author Sebastian Thiel
author-mail <[email protected]>
author-time 1270634931
author-tz +0200
committer Sebastian Thiel
committer-mail <[email protected]>
committer-time 1270634931
committer-tz +0200
summary Used this release for a first beta of the 0.2 branch of development
previous 501bf602abea7d21c3dbb409b435976e92033145 AUTHORS
filename AUTHORS
82b8902e033430000481eb355733cd7065342037 14 14 1
filename AUTHORS
c76852d0bff115720af3f27acdb084c59361e5f6 1 1 1
author Michael Trier
author-mail <[email protected]>
author-time 1232829627
author-tz -0500
committer Michael Trier
committer-mail <[email protected]>
committer-time 1232829627
committer-tz -0500
summary Lots of spring cleaning and added in Sphinx documentation.
previous bcd57e349c08bd7f076f8d6d2f39b702015358c1 AUTHORS
filename AUTHORS
c76852d0bff115720af3f27acdb084c59361e5f6 2 3 11
filename AUTHORS
c76852d0bff115720af3f27acdb084c59361e5f6 13 15 2
filename AUTHORS
24 changes: 24 additions & 0 deletions git/test/test_repo.py
Original file line number Diff line number Diff line change
@@ -50,6 +50,16 @@
from nose import SkipTest


def iter_flatten(lol):
for items in lol:
for item in items:
yield item


def flatten(lol):
return list(iter_flatten(lol))


class TestRepo(TestBase):

@raises(InvalidGitRepositoryError)
@@ -323,6 +333,20 @@ def test_blame_real(self):
assert c, "Should have executed at least one blame command"
assert nml, "There should at least be one blame commit that contains multiple lines"

@patch.object(Git, '_call_process')
def test_blame_incremental(self, git):
git.return_value = fixture('blame_incremental')
blame_output = self.rorepo.blame_incremental('9debf6b0aafb6f7781ea9d1383c86939a1aacde3', 'AUTHORS')
blame_output = list(blame_output)
assert len(blame_output) == 5

# Check all outputted line numbers
ranges = flatten([line_numbers for _, line_numbers in blame_output])
assert ranges == flatten([range(2, 3), range(14, 15), range(1, 2), range(3, 14), range(15, 17)]), str(ranges)

commits = [c.hexsha[:7] for c, _ in blame_output]
assert commits == ['82b8902', '82b8902', 'c76852d', 'c76852d', 'c76852d'], str(commits)

@patch.object(Git, '_call_process')
def test_blame_complex_revision(self, git):
git.return_value = fixture('blame_complex_revision')