Skip to content

Commit 8c1a87d

Browse files
committed
commit: refactored existing code to decode commits from streams - performance is slightly better
git.cmd: added method to provide access to the content stream directly. This is more efficient if large objects are handled, if it is actually used test.helpers: removed unnecessary code
1 parent df08923 commit 8c1a87d

File tree

9 files changed

+810
-718
lines changed

9 files changed

+810
-718
lines changed

Diff for: lib/git/cmd.py

+496-405
Large diffs are not rendered by default.

Diff for: lib/git/objects/base.py

+209-209
Large diffs are not rendered by default.

Diff for: lib/git/objects/commit.py

+74-65
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,14 @@
99
import git.stats as stats
1010
from git.actor import Actor
1111
from tree import Tree
12+
from cStringIO import StringIO
1213
import base
1314
import utils
1415
import time
1516
import os
1617

17-
class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable):
18+
19+
class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Serializable):
1820
"""
1921
Wraps a git Commit object.
2022
@@ -91,7 +93,8 @@ def __init__(self, repo, sha, tree=None, author=None, authored_date=None, author
9193
self._set_self_from_args_(locals())
9294

9395
if parents is not None:
94-
self.parents = tuple( self.__class__(repo, p) for p in parents )
96+
cls = type(self)
97+
self.parents = tuple(cls(repo, p) for p in parents if not isinstance(p, cls))
9598
# END for each parent to convert
9699

97100
if self.sha and tree is not None:
@@ -109,20 +112,9 @@ def _set_cache_(self, attr):
109112
We set all values at once.
110113
"""
111114
if attr in Commit.__slots__:
112-
# prepare our data lines to match rev-list
113-
data_lines = self.data.splitlines()
114-
data_lines.insert(0, "commit %s" % self.sha)
115-
temp = self._iter_from_process_or_stream(self.repo, iter(data_lines), False).next()
116-
self.parents = temp.parents
117-
self.tree = temp.tree
118-
self.author = temp.author
119-
self.authored_date = temp.authored_date
120-
self.author_tz_offset = temp.author_tz_offset
121-
self.committer = temp.committer
122-
self.committed_date = temp.committed_date
123-
self.committer_tz_offset = temp.committer_tz_offset
124-
self.message = temp.message
125-
self.encoding = temp.encoding
115+
# read the data in a chunk, its faster - then provide a file wrapper
116+
hexsha, typename, size, data = self.repo.git.get_object_data(self)
117+
self._deserialize(StringIO(data))
126118
else:
127119
super(Commit, self)._set_cache_(attr)
128120

@@ -260,59 +252,18 @@ def _iter_from_process_or_stream(cls, repo, proc_or_stream, from_rev_list):
260252
iterator returning Commit objects
261253
"""
262254
stream = proc_or_stream
263-
if not hasattr(stream,'next'):
255+
if not hasattr(stream,'readline'):
264256
stream = proc_or_stream.stdout
265257

266-
for line in stream:
267-
commit_tokens = line.split()
258+
while True:
259+
line = stream.readline()
260+
if not line:
261+
break
262+
commit_tokens = line.split()
268263
id = commit_tokens[1]
269264
assert commit_tokens[0] == "commit"
270-
tree = stream.next().split()[1]
271-
272-
parents = []
273-
next_line = None
274-
for parent_line in stream:
275-
if not parent_line.startswith('parent'):
276-
next_line = parent_line
277-
break
278-
# END abort reading parents
279-
parents.append(parent_line.split()[-1])
280-
# END for each parent line
281-
282-
author, authored_date, author_tz_offset = utils.parse_actor_and_date(next_line)
283-
committer, committed_date, committer_tz_offset = utils.parse_actor_and_date(stream.next())
284-
285265

286-
# empty line
287-
encoding = stream.next()
288-
encoding.strip()
289-
if encoding:
290-
encoding = encoding[encoding.find(' ')+1:]
291-
# END parse encoding
292-
293-
message_lines = list()
294-
if from_rev_list:
295-
for msg_line in stream:
296-
if not msg_line.startswith(' '):
297-
# and forget about this empty marker
298-
break
299-
# END abort message reading
300-
# strip leading 4 spaces
301-
message_lines.append(msg_line[4:])
302-
# END while there are message lines
303-
else:
304-
# a stream from our data simply gives us the plain message
305-
for msg_line in stream:
306-
message_lines.append(msg_line)
307-
# END message parsing
308-
message = '\n'.join(message_lines)
309-
310-
311-
yield Commit(repo, id, tree,
312-
author, authored_date, author_tz_offset,
313-
committer, committed_date, committer_tz_offset,
314-
message, tuple(parents),
315-
encoding or cls.default_encoding)
266+
yield Commit(repo, id)._deserialize(stream, from_rev_list)
316267
# END for each line in stream
317268

318269

@@ -393,7 +344,7 @@ def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False):
393344

394345
# assume utf8 encoding
395346
enc_section, enc_option = cls.conf_encoding.split('.')
396-
conf_encoding = cr.get_value(enc_section, enc_option, default_encoding)
347+
conf_encoding = cr.get_value(enc_section, enc_option, cls.default_encoding)
397348

398349
author = Actor(author_name, author_email)
399350
committer = Actor(committer_name, committer_email)
@@ -429,3 +380,61 @@ def __str__(self):
429380
def __repr__(self):
430381
return '<git.Commit "%s">' % self.sha
431382

383+
#{ Serializable Implementation
384+
385+
def _serialize(self, stream):
386+
# for now, this is very inefficient and in fact shouldn't be used like this
387+
return super(Commit, self)._serialize(stream)
388+
389+
def _deserialize(self, stream, from_rev_list=False):
390+
""":param from_rev_list: if true, the stream format is coming from the rev-list command
391+
Otherwise it is assumed to be a plain data stream from our object"""
392+
self.tree = Tree(self.repo, stream.readline().split()[1], 0, '')
393+
394+
self.parents = list()
395+
next_line = None
396+
while True:
397+
parent_line = stream.readline()
398+
if not parent_line.startswith('parent'):
399+
next_line = parent_line
400+
break
401+
# END abort reading parents
402+
self.parents.append(type(self)(self.repo, parent_line.split()[-1]))
403+
# END for each parent line
404+
self.parents = tuple(self.parents)
405+
406+
self.author, self.authored_date, self.author_tz_offset = utils.parse_actor_and_date(next_line)
407+
self.committer, self.committed_date, self.committer_tz_offset = utils.parse_actor_and_date(stream.readline())
408+
409+
410+
# empty line
411+
self.encoding = self.default_encoding
412+
enc = stream.readline()
413+
enc.strip()
414+
if enc:
415+
self.encoding = enc[enc.find(' ')+1:]
416+
# END parse encoding
417+
418+
message_lines = list()
419+
if from_rev_list:
420+
while True:
421+
msg_line = stream.readline()
422+
if not msg_line.startswith(' '):
423+
# and forget about this empty marker
424+
# cut the last newline to get rid of the artificial newline added
425+
# by rev-list command. Lets hope its just linux style \n
426+
message_lines[-1] = message_lines[-1][:-1]
427+
break
428+
# END abort message reading
429+
# strip leading 4 spaces
430+
message_lines.append(msg_line[4:])
431+
# END while there are message lines
432+
self.message = ''.join(message_lines)
433+
else:
434+
# a stream from our data simply gives us the plain message
435+
# The end of our message stream is marked with a newline that we strip
436+
self.message = stream.read()[:-1]
437+
# END message parsing
438+
return self
439+
440+
#} END serializable implementation

Diff for: lib/git/objects/tree.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ def traverse( self, predicate = lambda i,d: True,
209209
visit_once = False, ignore_self=1 ):
210210
"""For documentation, see utils.Traversable.traverse
211211
212-
Trees are set to visist_once = False to gain more performance in the traversal"""
212+
Trees are set to visit_once = False to gain more performance in the traversal"""
213213
return super(Tree, self).traverse(predicate, prune, depth, branch_first, visit_once, ignore_self)
214214

215215
# List protocol

Diff for: lib/git/objects/utils.py

+17
Original file line numberDiff line numberDiff line change
@@ -280,3 +280,20 @@ def addToStack( stack, item, branch_first, depth ):
280280

281281
addToStack( stack, item, branch_first, nd )
282282
# END for each item on work stack
283+
284+
285+
class Serializable(object):
286+
"""Defines methods to serialize and deserialize objects from and into a data stream"""
287+
288+
def _serialize(self, stream):
289+
"""Serialize the data of this object into the given data stream
290+
:note: a serialized object would ``_deserialize`` into the same objet
291+
:param stream: a file-like object
292+
:return: self"""
293+
raise NotImplementedError("To be implemented in subclass")
294+
295+
def _deserialize(self, stream):
296+
"""Deserialize all information regarding this object from the stream
297+
:param stream: a file-like object
298+
:return: self"""
299+
raise NotImplementedError("To be implemented in subclass")

Diff for: test/git/test_commit.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ def test_rev_list_bisect_all(self, git):
129129
bisect_all=True)
130130
assert_true(git.called)
131131

132-
commits = Commit._iter_from_process_or_stream(self.rorepo, ListProcessAdapter(revs), True)
132+
commits = Commit._iter_from_process_or_stream(self.rorepo, StringProcessAdapter(revs), True)
133133
expected_ids = (
134134
'cf37099ea8d1d8c7fbf9b6d12d7ec0249d3acb8b',
135135
'33ebe7acec14b25c5f84f35a664803fcab2f7781',

Diff for: test/git/test_diff.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,15 @@ def _assert_diff_format(self, diffs):
2020
return diffs
2121

2222
def test_list_from_string_new_mode(self):
23-
output = ListProcessAdapter(fixture('diff_new_mode'))
23+
output = StringProcessAdapter(fixture('diff_new_mode'))
2424
diffs = Diff._index_from_patch_format(self.rorepo, output.stdout)
2525
self._assert_diff_format(diffs)
2626

2727
assert_equal(1, len(diffs))
2828
assert_equal(10, len(diffs[0].diff.splitlines()))
2929

3030
def test_diff_with_rename(self):
31-
output = ListProcessAdapter(fixture('diff_rename'))
31+
output = StringProcessAdapter(fixture('diff_rename'))
3232
diffs = Diff._index_from_patch_format(self.rorepo, output.stdout)
3333
self._assert_diff_format(diffs)
3434

@@ -47,7 +47,7 @@ def test_diff_patch_format(self):
4747
"diff_tree_numstat_root" )
4848

4949
for fixture_name in fixtures:
50-
diff_proc = ListProcessAdapter(fixture(fixture_name))
50+
diff_proc = StringProcessAdapter(fixture(fixture_name))
5151
diffs = Diff._index_from_patch_format(self.rorepo, diff_proc.stdout)
5252
# END for each fixture
5353

Diff for: test/git/test_repo.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ def test_heads_should_populate_head_data(self):
4848

4949
def test_tree_from_revision(self):
5050
tree = self.rorepo.tree('0.1.6')
51+
assert len(tree.sha) == 40
5152
assert tree.type == "tree"
5253
assert self.rorepo.tree(tree) == tree
5354

@@ -56,9 +57,9 @@ def test_tree_from_revision(self):
5657

5758
@patch_object(Git, '_call_process')
5859
def test_commits(self, git):
59-
git.return_value = ListProcessAdapter(fixture('rev_list'))
60+
git.return_value = StringProcessAdapter(fixture('rev_list'))
6061

61-
commits = list( self.rorepo.iter_commits('master', max_count=10) )
62+
commits = list(self.rorepo.iter_commits('master', max_count=10))
6263

6364
c = commits[0]
6465
assert_equal('4c8124ffcf4039d292442eeccabdeca5af5c5017', c.sha)

Diff for: test/testlib/helper.py

+6-32
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from unittest import TestCase
1010
import tempfile
1111
import shutil
12+
import cStringIO
1213

1314
GIT_REPO = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
1415

@@ -23,40 +24,13 @@ def absolute_project_path():
2324
return os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
2425

2526

26-
class ListProcessAdapter(object):
27-
"""Allows to use lists as Process object as returned by SubProcess.Popen.
27+
class StringProcessAdapter(object):
28+
"""Allows to use strings as Process object as returned by SubProcess.Popen.
2829
Its tailored to work with the test system only"""
2930

30-
class Stream(object):
31-
"""Simple stream emulater meant to work only with tests"""
32-
def __init__(self, data):
33-
self.data = data
34-
self.cur_iter = None
35-
36-
def __iter__(self):
37-
dat = self.data
38-
if isinstance(dat, basestring):
39-
dat = dat.splitlines()
40-
if self.cur_iter is None:
41-
self.cur_iter = iter(dat)
42-
return self.cur_iter
43-
44-
def read(self):
45-
dat = self.data
46-
if isinstance(dat, (tuple,list)):
47-
dat = "\n".join(dat)
48-
return dat
49-
50-
def next(self):
51-
if self.cur_iter is None:
52-
self.cur_iter = iter(self)
53-
return self.cur_iter.next()
54-
55-
# END stream
56-
57-
def __init__(self, input_list_or_string):
58-
self.stdout = self.Stream(input_list_or_string)
59-
self.stderr = self.Stream('')
31+
def __init__(self, input_string):
32+
self.stdout = cStringIO.StringIO(input_string)
33+
self.stderr = cStringIO.StringIO()
6034

6135
def wait(self):
6236
return 0

0 commit comments

Comments
 (0)