Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 69dd875

Browse files
committedJun 21, 2010
index.write_tree: initial version implemented, although its not yet working correctly, a test to explicitly compare the git version with the python implementation is still missing
Tree and Index internally use 20 byte shas, converting them only as needed to reduce memory footprint and processing time objects: started own 'fun' module containing the most important tree functions, more are likely to be added soon
1 parent 1044116 commit 69dd875

File tree

13 files changed

+298
-208
lines changed

13 files changed

+298
-208
lines changed
 

‎CHANGES

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,13 @@ CHANGES
55
===
66
* ConcurrentWriteOperation was removed, and replaced by LockedFD
77
* IndexFile.get_entries_key was renamed to entry_key
8+
* IndexEntry instances contained in IndexFile.entries now use binary sha's. Use
9+
the .hexsha property to obtain the hexadecimal version
10+
* IndexFile.write_tree: removed missing_ok keyword, its always True now
11+
Instead of raising GitCommandError it raises UnmergedEntriesError
12+
* diff.Diff.null_hex_sha renamed to NULL_HEX_SHA, to be conforming with
13+
the naming in the Object base class
14+
815

916
0.2 Beta 2
1017
===========

‎lib/git/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def _init_externals():
2828
from git.objects import *
2929
from git.refs import *
3030
from git.diff import *
31-
from git.errors import InvalidGitRepositoryError, NoSuchPathError, GitCommandError
31+
from git.errors import *
3232
from git.cmd import Git
3333
from git.repo import Repo
3434
from git.remote import *

‎lib/git/db.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,12 @@
44
OStream
55
)
66

7+
from gitdb.util import to_hex_sha
8+
79
from gitdb.db import GitDB
810
from gitdb.db import LooseObjectDB
911

12+
1013
__all__ = ('GitCmdObjectDB', 'GitDB' )
1114

1215
#class GitCmdObjectDB(CompoundDB, ObjectDBW):
@@ -24,11 +27,11 @@ def __init__(self, root_path, git):
2427
self._git = git
2528

2629
def info(self, sha):
27-
t = self._git.get_object_header(sha)
30+
t = self._git.get_object_header(to_hex_sha(sha))
2831
return OInfo(*t)
2932

3033
def stream(self, sha):
3134
"""For now, all lookup is done by git itself"""
32-
t = self._git.stream_object_data(sha)
35+
t = self._git.stream_object_data(to_hex_sha(sha))
3336
return OStream(*t)
3437

‎lib/git/diff.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ class Diff(object):
196196
\.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
197197
""", re.VERBOSE | re.MULTILINE)
198198
# can be used for comparisons
199-
null_hex_sha = "0"*40
199+
NULL_HEX_SHA = "0"*40
200200

201201
__slots__ = ("a_blob", "b_blob", "a_mode", "b_mode", "new_file", "deleted_file",
202202
"rename_from", "rename_to", "diff")

‎lib/git/errors.py

Lines changed: 39 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -6,43 +6,51 @@
66
""" Module containing all exceptions thrown througout the git package, """
77

88
class InvalidGitRepositoryError(Exception):
9-
""" Thrown if the given repository appears to have an invalid format. """
9+
""" Thrown if the given repository appears to have an invalid format. """
1010

1111

1212
class NoSuchPathError(OSError):
13-
""" Thrown if a path could not be access by the system. """
13+
""" Thrown if a path could not be access by the system. """
1414

1515

1616
class GitCommandError(Exception):
17-
""" Thrown if execution of the git command fails with non-zero status code. """
18-
def __init__(self, command, status, stderr=None):
19-
self.stderr = stderr
20-
self.status = status
21-
self.command = command
22-
23-
def __str__(self):
24-
return ("'%s' returned exit status %i: %s" %
25-
(' '.join(str(i) for i in self.command), self.status, self.stderr))
17+
""" Thrown if execution of the git command fails with non-zero status code. """
18+
def __init__(self, command, status, stderr=None):
19+
self.stderr = stderr
20+
self.status = status
21+
self.command = command
22+
23+
def __str__(self):
24+
return ("'%s' returned exit status %i: %s" %
25+
(' '.join(str(i) for i in self.command), self.status, self.stderr))
2626

2727

2828
class CheckoutError( Exception ):
29-
"""Thrown if a file could not be checked out from the index as it contained
30-
changes.
31-
32-
The .failed_files attribute contains a list of relative paths that failed
33-
to be checked out as they contained changes that did not exist in the index.
34-
35-
The .failed_reasons attribute contains a string informing about the actual
36-
cause of the issue.
37-
38-
The .valid_files attribute contains a list of relative paths to files that
39-
were checked out successfully and hence match the version stored in the
40-
index"""
41-
def __init__(self, message, failed_files, valid_files, failed_reasons):
42-
Exception.__init__(self, message)
43-
self.failed_files = failed_files
44-
self.failed_reasons = failed_reasons
45-
self.valid_files = valid_files
46-
47-
def __str__(self):
48-
return Exception.__str__(self) + ":%s" % self.failed_files
29+
"""Thrown if a file could not be checked out from the index as it contained
30+
changes.
31+
32+
The .failed_files attribute contains a list of relative paths that failed
33+
to be checked out as they contained changes that did not exist in the index.
34+
35+
The .failed_reasons attribute contains a string informing about the actual
36+
cause of the issue.
37+
38+
The .valid_files attribute contains a list of relative paths to files that
39+
were checked out successfully and hence match the version stored in the
40+
index"""
41+
def __init__(self, message, failed_files, valid_files, failed_reasons):
42+
Exception.__init__(self, message)
43+
self.failed_files = failed_files
44+
self.failed_reasons = failed_reasons
45+
self.valid_files = valid_files
46+
47+
def __str__(self):
48+
return Exception.__str__(self) + ":%s" % self.failed_files
49+
50+
51+
class CacheError(Exception):
52+
"""Base for all errors related to the git index, which is called cache internally"""
53+
54+
class UnmergedEntriesError(CacheError):
55+
"""Thrown if an operation cannot proceed as there are still unmerged
56+
entries in the cache"""

‎lib/git/index/base.py

Lines changed: 35 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,13 @@
55
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
66
"""Module containing Index implementation, allowing to perform all kinds of index
77
manipulations such as querying and merging. """
8-
import binascii
98
import tempfile
109
import os
1110
import sys
1211
import subprocess
1312
import glob
1413
from cStringIO import StringIO
14+
from binascii import b2a_hex
1515

1616
from stat import (
1717
S_ISLNK,
@@ -25,16 +25,12 @@
2525
from typ import (
2626
BaseIndexEntry,
2727
IndexEntry,
28-
CE_NAMEMASK,
29-
CE_STAGESHIFT
3028
)
3129

3230
from util import (
3331
TemporaryFileSwap,
3432
post_clear_cache,
3533
default_index,
36-
pack,
37-
unpack
3834
)
3935

4036
import git.objects
@@ -60,20 +56,17 @@
6056
LockedFD,
6157
join_path_native,
6258
file_contents_ro,
63-
LockFile
64-
)
65-
66-
67-
from gitdb.base import (
68-
IStream
6959
)
7060

7161
from fun import (
7262
write_cache,
7363
read_cache,
64+
write_tree_from_cache,
7465
entry_key
7566
)
7667

68+
from gitdb.base import IStream
69+
7770
__all__ = ( 'IndexFile', 'CheckoutError' )
7871

7972

@@ -161,10 +154,15 @@ def _deserialize(self, stream):
161154
self.version, self.entries, self._extension_data, conten_sha = read_cache(stream)
162155
return self
163156

164-
def _serialize(self, stream, ignore_tree_extension_data=False):
157+
def _entries_sorted(self):
158+
""":return: list of entries, in a sorted fashion, first by path, then by stage"""
165159
entries_sorted = self.entries.values()
166-
entries_sorted.sort(key=lambda e: (e[3], e.stage)) # use path/stage as sort key
167-
write_cache(entries_sorted,
160+
entries_sorted.sort(key=lambda e: (e.path, e.stage)) # use path/stage as sort key
161+
return entries_sorted
162+
163+
def _serialize(self, stream, ignore_tree_extension_data=False):
164+
entries = self._entries_sorted()
165+
write_cache(entries,
168166
stream,
169167
(ignore_tree_extension_data and None) or self._extension_data)
170168
return self
@@ -403,7 +401,7 @@ def iter_blobs(self, predicate = lambda t: True):
403401
# TODO: is it necessary to convert the mode ? We did that when adding
404402
# it to the index, right ?
405403
mode = self._stat_mode_to_index_mode(entry.mode)
406-
blob = Blob(self.repo, entry.sha, mode, entry.path)
404+
blob = Blob(self.repo, entry.hexsha, mode, entry.path)
407405
blob.size = entry.size
408406
output = (entry.stage, blob)
409407
if predicate(output):
@@ -490,33 +488,31 @@ def update(self):
490488
# allows to lazily reread on demand
491489
return self
492490

493-
def _write_tree(self, missing_ok=False):
491+
def write_tree(self):
494492
"""Writes this index to a corresponding Tree object into the repository's
495493
object database and return it.
496-
497-
:param missing_ok:
498-
If True, missing objects referenced by this index will not result
499-
in an error.
500-
501-
:return: Tree object representing this index"""
494+
495+
:return: Tree object representing this index
496+
:note: The tree will be written even if one or more objects the tree refers to
497+
does not yet exist in the object database. This could happen if you added
498+
Entries to the index directly.
499+
:raise ValueError: if there are no entries in the cache
500+
:raise UnmergedEntriesError: """
502501
# we obtain no lock as we just flush our contents to disk as tree
503502
if not self.entries:
504503
raise ValueError("Cannot write empty index")
505504

505+
# TODO: use memory db, this helps to prevent IO if the resulting tree
506+
# already exists
507+
entries = self._entries_sorted()
508+
binsha, tree_items = write_tree_from_cache(entries, self.repo.odb, slice(0, len(entries)))
506509

510+
# note: additional deserialization could be saved if write_tree_from_cache
511+
# would return sorted tree entries
512+
root_tree = Tree(self.repo, b2a_hex(binsha), path='')
513+
root_tree._cache = tree_items
514+
return root_tree
507515

508-
return Tree(self.repo, tree_sha, 0, '')
509-
510-
def write_tree(self, missing_ok = False):
511-
index_path = self._index_path()
512-
tmp_index_mover = TemporaryFileSwap(index_path)
513-
514-
self.write(index_path, ignore_tree_extension_data=True)
515-
tree_sha = self.repo.git.write_tree(missing_ok=missing_ok)
516-
517-
del(tmp_index_mover) # as soon as possible
518-
return Tree(self.repo, tree_sha, 0, '')
519-
520516
def _process_diff_args(self, args):
521517
try:
522518
args.pop(args.index(self))
@@ -525,7 +521,6 @@ def _process_diff_args(self, args):
525521
# END remove self
526522
return args
527523

528-
529524
def _to_relative_path(self, path):
530525
""":return: Version of path relative to our git directory or raise ValueError
531526
if it is not within our git direcotory"""
@@ -599,7 +594,7 @@ def add(self, items, force=True, fprogress=lambda *args: None, path_rewriter=Non
599594
600595
- BaseIndexEntry or type
601596
Handling equals the one of Blob objects, but the stage may be
602-
explicitly set.
597+
explicitly set. Please note that Index Entries require binary sha's.
603598
604599
:param force:
605600
If True, otherwise ignored or excluded files will be
@@ -666,7 +661,7 @@ def store_path(filepath):
666661
fprogress(filepath, True, filepath)
667662

668663
return BaseIndexEntry((self._stat_mode_to_index_mode(st.st_mode),
669-
istream.sha, 0, filepath))
664+
istream.binsha, 0, filepath))
670665
# END utility method
671666

672667

@@ -691,14 +686,14 @@ def store_path(filepath):
691686

692687
# HANLDE ENTRY OBJECT CREATION
693688
# create objects if required, otherwise go with the existing shas
694-
null_entries_indices = [ i for i,e in enumerate(entries) if e.sha == Object.NULL_HEX_SHA ]
689+
null_entries_indices = [ i for i,e in enumerate(entries) if e.binsha == Object.NULL_BIN_SHA ]
695690
if null_entries_indices:
696691
for ei in null_entries_indices:
697692
null_entry = entries[ei]
698693
new_entry = store_path(null_entry.path)
699694

700695
# update null entry
701-
entries[ei] = BaseIndexEntry((null_entry.mode, new_entry.sha, null_entry.stage, null_entry.path))
696+
entries[ei] = BaseIndexEntry((null_entry.mode, new_entry.binsha, null_entry.stage, null_entry.path))
702697
# END for each entry index
703698
# END null_entry handling
704699

@@ -707,7 +702,7 @@ def store_path(filepath):
707702
# all object sha's
708703
if path_rewriter:
709704
for i,e in enumerate(entries):
710-
entries[i] = BaseIndexEntry((e.mode, e.sha, e.stage, path_rewriter(e)))
705+
entries[i] = BaseIndexEntry((e.mode, e.binsha, e.stage, path_rewriter(e)))
711706
# END for each entry
712707
# END handle path rewriting
713708

‎lib/git/index/fun.py

Lines changed: 66 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@
22
Contains standalone functions to accompany the index implementation and make it
33
more versatile
44
"""
5+
from stat import S_IFDIR
6+
from cStringIO import StringIO
7+
8+
from git.errors import UnmergedEntriesError
9+
from git.objects.fun import tree_to_stream
510
from git.utils import (
611
IndexFileSHA1Writer,
712
)
@@ -16,12 +21,11 @@
1621
unpack
1722
)
1823

19-
from binascii import (
20-
hexlify,
21-
unhexlify
22-
)
24+
from gitdb.base import IStream
25+
from gitdb.typ import str_tree_type
26+
from binascii import a2b_hex
2327

24-
__all__ = ('write_cache', 'read_cache' )
28+
__all__ = ('write_cache', 'read_cache', 'write_tree_from_cache', 'entry_key' )
2529

2630
def write_cache_entry(entry, stream):
2731
"""Write the given entry to the stream"""
@@ -34,7 +38,7 @@ def write_cache_entry(entry, stream):
3438
assert plen == len(path), "Path %s too long to fit into index" % entry[3]
3539
flags = plen | entry[2]
3640
write(pack(">LLLLLL20sH", entry[6], entry[7], entry[0],
37-
entry[8], entry[9], entry[10], unhexlify(entry[1]), flags))
41+
entry[8], entry[9], entry[10], entry[1], flags))
3842
write(path)
3943
real_size = ((stream.tell() - beginoffset + 8) & ~7)
4044
write("\0" * ((beginoffset + real_size) - stream.tell()))
@@ -80,7 +84,7 @@ def read_entry(stream):
8084

8185
real_size = ((stream.tell() - beginoffset + 8) & ~7)
8286
data = stream.read((beginoffset + real_size) - stream.tell())
83-
return IndexEntry((mode, hexlify(sha), flags, path, ctime, mtime, dev, ino, uid, gid, size))
87+
return IndexEntry((mode, sha, flags, path, ctime, mtime, dev, ino, uid, gid, size))
8488

8589
def read_header(stream):
8690
"""Return tuple(version_long, num_entries) from the given stream"""
@@ -136,3 +140,58 @@ def read_cache(stream):
136140

137141
return (version, entries, extension_data, content_sha)
138142

143+
def write_tree_from_cache(entries, odb, sl, si=0):
144+
"""Create a tree from the given sorted list of entries and put the respective
145+
trees into the given object database
146+
:param entries: **sorted** list of IndexEntries
147+
:param odb: object database to store the trees in
148+
:param si: start index at which we should start creating subtrees
149+
:param sl: slice indicating the range we should process on the entries list
150+
:return: tuple(binsha, list(tree_entry, ...)) a tuple of a sha and a list of
151+
tree entries being a tuple of hexsha, mode, name"""
152+
tree_items = list()
153+
ci = sl.start
154+
end = sl.stop
155+
while ci < end:
156+
entry = entries[ci]
157+
if entry.stage != 0:
158+
raise UnmergedEntriesError(entry)
159+
# END abort on unmerged
160+
ci += 1
161+
rbound = entry.path.find('/', si)
162+
if rbound == -1:
163+
# its not a tree
164+
tree_items.append((entry.binsha, entry.mode, entry.path[si:]))
165+
else:
166+
# find common base range
167+
base = entry.path[si:rbound]
168+
xi = ci
169+
while xi < end:
170+
oentry = entries[xi]
171+
xi += 1
172+
orbound = oentry.path.find('/')
173+
if orbound == -1 or oentry.path[si:orbound] != base:
174+
break
175+
# END abort on base mismatch
176+
# END find common base
177+
178+
# enter recursion
179+
# ci - 1 as we want to count our current item as well
180+
sha, tree_entry_list = write_tree_from_cache(entries, odb, slice(ci-1, xi), rbound+1)
181+
tree_items.append((sha, S_IFDIR, base))
182+
183+
# skip ahead
184+
ci = xi
185+
# END handle bounds
186+
# END for each entry
187+
188+
# finally create the tree
189+
sio = StringIO()
190+
tree_to_stream(tree_items, sio.write)
191+
sio.seek(0)
192+
193+
istream = odb.store(IStream(str_tree_type, len(sio.getvalue()), sio))
194+
return (istream.binsha, tree_items)
195+
196+
197+

‎lib/git/index/typ.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@
55
unpack
66
)
77

8+
from binascii import (
9+
b2a_hex,
10+
a2b_hex
11+
)
12+
813
__all__ = ('BlobFilter', 'BaseIndexEntry', 'IndexEntry')
914

1015
#{ Invariants
@@ -50,17 +55,22 @@ class BaseIndexEntry(tuple):
5055
use numeric indices for performance reasons. """
5156

5257
def __str__(self):
53-
return "%o %s %i\t%s" % (self.mode, self.sha, self.stage, self.path)
58+
return "%o %s %i\t%s" % (self.mode, self.hexsha, self.stage, self.path)
5459

5560
@property
5661
def mode(self):
5762
""" File Mode, compatible to stat module constants """
5863
return self[0]
5964

6065
@property
61-
def sha(self):
62-
""" hex sha of the blob """
66+
def binsha(self):
67+
"""binary sha of the blob """
6368
return self[1]
69+
70+
@property
71+
def hexsha(self):
72+
"""hex version of our sha"""
73+
return b2a_hex(self[1])
6474

6575
@property
6676
def stage(self):
@@ -88,7 +98,7 @@ def flags(self):
8898
@classmethod
8999
def from_blob(cls, blob, stage = 0):
90100
""":return: Fully equipped BaseIndexEntry at the given stage"""
91-
return cls((blob.mode, blob.sha, stage << CE_STAGESHIFT, blob.path))
101+
return cls((blob.mode, a2b_hex(blob.sha), stage << CE_STAGESHIFT, blob.path))
92102

93103

94104
class IndexEntry(BaseIndexEntry):
@@ -145,12 +155,12 @@ def from_base(cls, base):
145155
146156
:param base: Instance of type BaseIndexEntry"""
147157
time = pack(">LL", 0, 0)
148-
return IndexEntry((base.mode, base.sha, base.flags, base.path, time, time, 0, 0, 0, 0, 0))
158+
return IndexEntry((base.mode, base.binsha, base.flags, base.path, time, time, 0, 0, 0, 0, 0))
149159

150160
@classmethod
151161
def from_blob(cls, blob, stage = 0):
152162
""":return: Minimal entry resembling the given blob object"""
153163
time = pack(">LL", 0, 0)
154-
return IndexEntry((blob.mode, blob.sha, stage << CE_STAGESHIFT, blob.path, time, time, 0, 0, 0, 0, blob.size))
164+
return IndexEntry((blob.mode, a2b_hex(blob.sha), stage << CE_STAGESHIFT, blob.path, time, time, 0, 0, 0, 0, blob.size))
155165

156166

‎lib/git/objects/base.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ class Object(LazyMixin):
2121
inst.data # byte string containing the whole data of the object
2222
"""
2323
NULL_HEX_SHA = '0'*40
24+
NULL_BIN_SHA = '\0'*20
2425
TYPES = ("blob", "tree", "commit", "tag")
2526
__slots__ = ("repo", "sha", "size", "data" )
2627
type = None # to be set by subclass

‎lib/git/objects/fun.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
"""Module with functions which are supposed to be as fast as possible"""
2+
3+
__all__ = ('tree_to_stream', 'tree_entries_from_data')
4+
5+
def tree_to_stream(entries, write):
6+
"""Write the give list of entries into a stream using its write method
7+
:param entries: **sorted** list of tuples with (binsha, mode, name)
8+
:param write: write method which takes a data string"""
9+
ord_zero = ord('0')
10+
bit_mask = 7 # 3 bits set
11+
12+
for binsha, mode, name in entries:
13+
mode_str = ''
14+
for i in xrange(6):
15+
mode_str = chr(((mode >> (i*3)) & bit_mask) + ord_zero) + mode_str
16+
# END for each 8 octal value
17+
18+
# git slices away the first octal if its zero
19+
if mode_str[0] == '0':
20+
mode_str = mode_str[1:]
21+
# END save a byte
22+
23+
write("%s %s\0%s" % (mode_str, name, binsha))
24+
# END for each item
25+
26+
27+
def tree_entries_from_data(data):
28+
"""Reads the binary representation of a tree and returns tuples of Tree items
29+
:param data: data block with tree data
30+
:return: list(tuple(binsha, mode, tree_relative_path), ...)"""
31+
ord_zero = ord('0')
32+
len_data = len(data)
33+
i = 0
34+
out = list()
35+
while i < len_data:
36+
mode = 0
37+
38+
# read mode
39+
# Some git versions truncate the leading 0, some don't
40+
# The type will be extracted from the mode later
41+
while data[i] != ' ':
42+
# move existing mode integer up one level being 3 bits
43+
# and add the actual ordinal value of the character
44+
mode = (mode << 3) + (ord(data[i]) - ord_zero)
45+
i += 1
46+
# END while reading mode
47+
48+
# byte is space now, skip it
49+
i += 1
50+
51+
# parse name, it is NULL separated
52+
53+
ns = i
54+
while data[i] != '\0':
55+
i += 1
56+
# END while not reached NULL
57+
name = data[ns:i]
58+
59+
# byte is NULL, get next 20
60+
i += 1
61+
sha = data[i:i+20]
62+
i = i + 20
63+
64+
out.append((sha, mode, name))
65+
# END for each byte in data stream
66+
return out

‎lib/git/objects/tree.py

Lines changed: 28 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,21 @@
55
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
66

77
import os
8+
import utils
9+
import base
10+
811
from blob import Blob
912
from submodule import Submodule
10-
import base
11-
import binascii
1213
import git.diff as diff
13-
import utils
14-
from git.utils import join_path
15-
1614
join = os.path.join
1715

18-
def sha_to_hex(sha):
19-
"""Takes a string and returns the hex of the sha within"""
20-
hexsha = binascii.hexlify(sha)
21-
return hexsha
22-
16+
from fun import (
17+
tree_entries_from_data,
18+
tree_to_stream
19+
)
20+
21+
from gitdb.util import to_bin_sha
22+
from binascii import b2a_hex
2323

2424
class TreeModifier(object):
2525
"""A utility class providing methods to alter the underlying cache in a list-like
@@ -51,44 +51,44 @@ def set_done(self):
5151
#} END interface
5252

5353
#{ Mutators
54-
def add(self, hexsha, mode, name, force=False):
54+
def add(self, sha, mode, name, force=False):
5555
"""Add the given item to the tree. If an item with the given name already
5656
exists, nothing will be done, but a ValueError will be raised if the
5757
sha and mode of the existing item do not match the one you add, unless
5858
force is True
59-
:param hexsha: The 40 byte sha of the item to add
59+
:param sha: The 20 or 40 byte sha of the item to add
6060
:param mode: int representing the stat compatible mode of the item
6161
:param force: If True, an item with your name and information will overwrite
6262
any existing item with the same name, no matter which information it has
6363
:return: self"""
6464
if '/' in name:
6565
raise ValueError("Name must not contain '/' characters")
66-
if len(hexsha) != 40:
67-
raise ValueError("Hexsha required, got %r" % hexsha)
6866
if (mode >> 12) not in Tree._map_id_to_type:
6967
raise ValueError("Invalid object type according to mode %o" % mode)
70-
68+
69+
sha = to_bin_sha(sha)
7170
index = self._index_by_name(name)
72-
item = (hexsha, mode, name)
71+
item = (sha, mode, name)
7372
if index == -1:
7473
self._cache.append(item)
7574
else:
7675
if force:
7776
self._cache[index] = item
7877
else:
7978
ex_item = self._cache[index]
80-
if ex_item[0] != hexsha or ex_item[1] != mode:
79+
if ex_item[0] != sha or ex_item[1] != mode:
8180
raise ValueError("Item %r existed with different properties" % name)
8281
# END handle mismatch
8382
# END handle force
8483
# END handle name exists
8584
return self
8685

87-
def add_unchecked(self, hexsha, mode, name):
86+
def add_unchecked(self, binsha, mode, name):
8887
"""Add the given item to the tree, its correctness is assumed, which
8988
puts the caller into responsibility to assure the input is correct.
90-
For more information on the parameters, see ``add``"""
91-
self._cache.append((hexsha, mode, name))
89+
For more information on the parameters, see ``add``
90+
:param binsha: 20 byte binary sha"""
91+
self._cache.append((binsha, mode, name))
9292

9393
def __delitem__(self, name):
9494
"""Deletes an item with the given name if it exists"""
@@ -146,70 +146,21 @@ def _get_intermediate_items(cls, index_object):
146146
def _set_cache_(self, attr):
147147
if attr == "_cache":
148148
# Set the data when we need it
149-
self._cache = self._get_tree_cache(self.data)
149+
self._cache = tree_entries_from_data(self.data)
150150
else:
151151
super(Tree, self)._set_cache_(attr)
152152

153-
def _get_tree_cache(self, data):
154-
""" :return: list(object_instance, ...)
155-
:param data: data string containing our serialized information"""
156-
return list(self._iter_from_data(data))
157-
158153
def _iter_convert_to_object(self, iterable):
159154
"""Iterable yields tuples of (hexsha, mode, name), which will be converted
160155
to the respective object representation"""
161-
for hexsha, mode, name in iterable:
156+
for binsha, mode, name in iterable:
162157
path = join(self.path, name)
163158
type_id = mode >> 12
164159
try:
165-
yield self._map_id_to_type[type_id](self.repo, hexsha, mode, path)
160+
yield self._map_id_to_type[type_id](self.repo, b2a_hex(binsha), mode, path)
166161
except KeyError:
167162
raise TypeError( "Unknown type %i found in tree data for path '%s'" % (type_id, path))
168163
# END for each item
169-
170-
def _iter_from_data(self, data):
171-
"""
172-
Reads the binary non-pretty printed representation of a tree and converts
173-
it into Blob, Tree or Commit objects.
174-
175-
Note: This method was inspired by the parse_tree method in dulwich.
176-
177-
:yield: Tuple(hexsha, mode, tree_relative_path)
178-
"""
179-
ord_zero = ord('0')
180-
len_data = len(data)
181-
i = 0
182-
while i < len_data:
183-
mode = 0
184-
185-
# read mode
186-
# Some git versions truncate the leading 0, some don't
187-
# The type will be extracted from the mode later
188-
while data[i] != ' ':
189-
# move existing mode integer up one level being 3 bits
190-
# and add the actual ordinal value of the character
191-
mode = (mode << 3) + (ord(data[i]) - ord_zero)
192-
i += 1
193-
# END while reading mode
194-
195-
# byte is space now, skip it
196-
i += 1
197-
198-
# parse name, it is NULL separated
199-
200-
ns = i
201-
while data[i] != '\0':
202-
i += 1
203-
# END while not reached NULL
204-
name = data[ns:i]
205-
206-
# byte is NULL, get next 20
207-
i += 1
208-
sha = data[i:i+20]
209-
i = i + 20
210-
211-
yield (sha_to_hex(sha), mode, name)
212-
# END for each byte in data stream
213164

214165
def __div__(self, file):
215166
"""
@@ -250,7 +201,7 @@ def __div__(self, file):
250201
else:
251202
for info in self._cache:
252203
if info[2] == file: # [2] == name
253-
return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join(self.path, info[2]))
204+
return self._map_id_to_type[info[1] >> 12](self.repo, b2a_hex(info[0]), info[1], join(self.path, info[2]))
254205
# END for each obj
255206
raise KeyError( msg % file )
256207
# END handle long paths
@@ -304,7 +255,7 @@ def __len__(self):
304255
def __getitem__(self, item):
305256
if isinstance(item, int):
306257
info = self._cache[item]
307-
return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join(self.path, info[2]))
258+
return self._map_id_to_type[info[1] >> 12](self.repo, b2a_hex(info[0]), info[1], join(self.path, info[2]))
308259

309260
if isinstance(item, basestring):
310261
# compatability
@@ -335,32 +286,16 @@ def __contains__(self, item):
335286
def __reversed__(self):
336287
return reversed(self._iter_convert_to_object(self._cache))
337288

338-
def _serialize(self, stream, presort=False):
289+
def _serialize(self, stream):
339290
"""Serialize this tree into the stream. Please note that we will assume
340291
our tree data to be in a sorted state. If this is not the case, serialization
341292
will not generate a correct tree representation as these are assumed to be sorted
342293
by algorithms"""
343-
ord_zero = ord('0')
344-
bit_mask = 7 # 3 bits set
345-
hex_to_bin = binascii.a2b_hex
346-
347-
for hexsha, mode, name in self._cache:
348-
mode_str = ''
349-
for i in xrange(6):
350-
mode_str = chr(((mode >> (i*3)) & bit_mask) + ord_zero) + mode_str
351-
# END for each 8 octal value
352-
353-
# git slices away the first octal if its zero
354-
if mode_str[0] == '0':
355-
mode_str = mode_str[1:]
356-
# END save a byte
357-
358-
stream.write("%s %s\0%s" % (mode_str, name, hex_to_bin(hexsha)))
359-
# END for each item
294+
tree_to_stream(self._cache, stream.write)
360295
return self
361296

362297
def _deserialize(self, stream):
363-
self._cache = self._get_tree_cache(stream.read())
298+
self._cache = tree_entries_from_data(stream.read())
364299
return self
365300

366301

‎lib/git/utils.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,14 @@
1010
import tempfile
1111

1212
from gitdb.util import (
13-
make_sha,
14-
FDStreamWrapper,
15-
LockedFD,
16-
file_contents_ro,
17-
LazyMixin
18-
)
13+
make_sha,
14+
FDStreamWrapper,
15+
LockedFD,
16+
file_contents_ro,
17+
LazyMixin,
18+
to_hex_sha,
19+
to_bin_sha
20+
)
1921

2022

2123
def stream_copy(source, destination, chunk_size=512*1024):

‎test/git/test_index.py

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def test_index_file_base(self):
5555
last_val = None
5656
entry = index.entries.itervalues().next()
5757
for attr in ("path","ctime","mtime","dev","inode","mode","uid",
58-
"gid","size","sha","stage"):
58+
"gid","size","binsha", "hexsha", "stage"):
5959
val = getattr(entry, attr)
6060
# END for each method
6161

@@ -128,7 +128,7 @@ def test_index_file_from_tree(self):
128128

129129

130130
# writing a tree should fail with an unmerged index
131-
self.failUnlessRaises(GitCommandError, three_way_index.write_tree)
131+
self.failUnlessRaises(UnmergedEntriesError, three_way_index.write_tree)
132132

133133
# removed unmerged entries
134134
unmerged_blob_map = three_way_index.unmerged_blobs()
@@ -159,27 +159,27 @@ def test_index_merge_tree(self, rw_repo):
159159
manifest_entry = rw_repo.index.entries[manifest_key]
160160
rw_repo.index.merge_tree(next_commit)
161161
# only one change should be recorded
162-
assert manifest_entry.sha != rw_repo.index.entries[manifest_key].sha
162+
assert manifest_entry.binsha != rw_repo.index.entries[manifest_key].binsha
163163

164164
rw_repo.index.reset(rw_repo.head)
165-
assert rw_repo.index.entries[manifest_key].sha == manifest_entry.sha
165+
assert rw_repo.index.entries[manifest_key].binsha == manifest_entry.binsha
166166

167167
# FAKE MERGE
168168
#############
169169
# Add a change with a NULL sha that should conflict with next_commit. We
170170
# pretend there was a change, but we do not even bother adding a proper
171171
# sha for it ( which makes things faster of course )
172-
manifest_fake_entry = BaseIndexEntry((manifest_entry[0], Diff.null_hex_sha, 0, manifest_entry[3]))
172+
manifest_fake_entry = BaseIndexEntry((manifest_entry[0], "\0"*20, 0, manifest_entry[3]))
173173
rw_repo.index.add([manifest_fake_entry])
174174
# add actually resolves the null-hex-sha for us as a feature, but we can
175175
# edit the index manually
176-
assert rw_repo.index.entries[manifest_key].sha != Diff.null_hex_sha
176+
assert rw_repo.index.entries[manifest_key].binsha != Object.NULL_BIN_SHA
177177
# must operate on the same index for this ! Its a bit problematic as
178178
# it might confuse people
179179
index = rw_repo.index
180180
index.entries[manifest_key] = IndexEntry.from_base(manifest_fake_entry)
181181
index.write()
182-
assert rw_repo.index.entries[manifest_key].sha == Diff.null_hex_sha
182+
assert rw_repo.index.entries[manifest_key].hexsha == Diff.NULL_HEX_SHA
183183

184184
# a three way merge would result in a conflict and fails as the command will
185185
# not overwrite any entries in our index and hence leave them unmerged. This is
@@ -189,10 +189,11 @@ def test_index_merge_tree(self, rw_repo):
189189
# the only way to get the merged entries is to safe the current index away into a tree,
190190
# which is like a temporary commit for us. This fails as well as the NULL sha deos not
191191
# have a corresponding object
192-
self.failUnlessRaises(GitCommandError, index.write_tree)
192+
# NOTE: missing_ok is not a kwarg anymore, missing_ok is always true
193+
# self.failUnlessRaises(GitCommandError, index.write_tree)
193194

194-
# if missing objects are okay, this would work though
195-
tree = index.write_tree(missing_ok = True)
195+
# if missing objects are okay, this would work though ( they are always okay now )
196+
tree = index.write_tree()
196197

197198
# now make a proper three way merge with unmerged entries
198199
unmerged_tree = IndexFile.from_tree(rw_repo, parent_commit, tree, next_commit)
@@ -348,7 +349,7 @@ def mixed_iterator():
348349
if type_id == 0: # path
349350
yield entry.path
350351
elif type_id == 1: # blob
351-
yield Blob(rw_repo, entry.sha, entry.mode, entry.path)
352+
yield Blob(rw_repo, entry.hexsha, entry.mode, entry.path)
352353
elif type_id == 2: # BaseIndexEntry
353354
yield BaseIndexEntry(entry[:4])
354355
elif type_id == 3: # IndexEntry
@@ -442,18 +443,19 @@ def mixed_iterator():
442443
old_blob = new_commit.parents[0].tree.blobs[0]
443444
entries = index.reset(new_commit).add([old_blob], fprogress=self._fprogress_add)
444445
self._assert_fprogress(entries)
445-
assert index.entries[(old_blob.path,0)].sha == old_blob.sha and len(entries) == 1
446+
assert index.entries[(old_blob.path,0)].hexsha == old_blob.sha and len(entries) == 1
446447

447448
# mode 0 not allowed
448-
null_sha = "0"*40
449-
self.failUnlessRaises(ValueError, index.reset(new_commit).add, [BaseIndexEntry((0, null_sha,0,"doesntmatter"))])
449+
null_hex_sha = Diff.NULL_HEX_SHA
450+
null_bin_sha = "\0" * 20
451+
self.failUnlessRaises(ValueError, index.reset(new_commit).add, [BaseIndexEntry((0, null_bin_sha,0,"doesntmatter"))])
450452

451453
# add new file
452454
new_file_relapath = "my_new_file"
453455
new_file_path = self._make_file(new_file_relapath, "hello world", rw_repo)
454-
entries = index.reset(new_commit).add([BaseIndexEntry((010644, null_sha, 0, new_file_relapath))], fprogress=self._fprogress_add)
456+
entries = index.reset(new_commit).add([BaseIndexEntry((010644, null_bin_sha, 0, new_file_relapath))], fprogress=self._fprogress_add)
455457
self._assert_fprogress(entries)
456-
assert len(entries) == 1 and entries[0].sha != null_sha
458+
assert len(entries) == 1 and entries[0].hexsha != null_hex_sha
457459

458460
# add symlink
459461
if sys.platform != "win32":
@@ -467,21 +469,21 @@ def mixed_iterator():
467469
assert S_ISLNK(index.entries[index.entry_key("my_real_symlink", 0)].mode)
468470

469471
# we expect only the target to be written
470-
assert index.repo.odb.stream(entries[0].sha).read() == target
472+
assert index.repo.odb.stream(entries[0].binsha).read() == target
471473
# END real symlink test
472474

473475
# add fake symlink and assure it checks-our as symlink
474476
fake_symlink_relapath = "my_fake_symlink"
475477
link_target = "/etc/that"
476478
fake_symlink_path = self._make_file(fake_symlink_relapath, link_target, rw_repo)
477-
fake_entry = BaseIndexEntry((0120000, null_sha, 0, fake_symlink_relapath))
479+
fake_entry = BaseIndexEntry((0120000, null_hex_sha, 0, fake_symlink_relapath))
478480
entries = index.reset(new_commit).add([fake_entry], fprogress=self._fprogress_add)
479481
self._assert_fprogress(entries)
480-
assert entries[0].sha != null_sha
482+
assert entries[0].hexsha != null_hex_sha
481483
assert len(entries) == 1 and S_ISLNK(entries[0].mode)
482484

483485
# assure this also works with an alternate method
484-
full_index_entry = IndexEntry.from_base(BaseIndexEntry((0120000, entries[0].sha, 0, entries[0].path)))
486+
full_index_entry = IndexEntry.from_base(BaseIndexEntry((0120000, entries[0].binsha, 0, entries[0].path)))
485487
entry_key = index.entry_key(full_index_entry)
486488
index.reset(new_commit)
487489

@@ -493,7 +495,7 @@ def mixed_iterator():
493495
assert S_ISLNK(new_entry.mode)
494496

495497
# a tree created from this should contain the symlink
496-
tree = index.write_tree(True)
498+
tree = index.write_tree()
497499
assert fake_symlink_relapath in tree
498500

499501
# checkout the fakelink, should be a link then
@@ -567,3 +569,5 @@ def make_paths():
567569
for filenum in range(len(paths)):
568570
assert index.entry_key(str(filenum), 0) in index.entries
569571

572+
def test_compare_write_tree(self):
573+
self.fail("compare git-write-tree with python implementation, must have same output")

0 commit comments

Comments
 (0)
Please sign in to comment.