Skip to content

Commit c38bd19

Browse files
committed
Increased initial size of decompressed data to obtain loose object header information
This appears to fix gitpython-developers/GitPython#220 , in this particular case. Nonetheless, we might just have gotten lucky here, and the actual issue is not yet solved and can thus re-occour. It would certainly be best to churn through plenty of loose objects to assure this truly works now. Maybe the pack could be recompressed as loose objects to get a sufficiently large data set
1 parent ab45206 commit c38bd19

File tree

4 files changed

+15
-8
lines changed

4 files changed

+15
-8
lines changed

Diff for: gitdb/stream.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,9 @@ def _parse_header_info(self):
100100
101101
:return: parsed type_string, size"""
102102
# read header
103-
maxb = 512 # should really be enough, cgit uses 8192 I believe
103+
# should really be enough, cgit uses 8192 I believe
104+
# And for good reason !! This needs to be that high for the header to be read correctly in all cases
105+
maxb = 8192
104106
self._s = maxb
105107
hdr = self.read(maxb)
106108
hdrend = hdr.find(NULL_BYTE)
@@ -243,7 +245,7 @@ def read(self, size=-1):
243245
# moving the window into the memory map along as we decompress, which keeps
244246
# the tail smaller than our chunk-size. This causes 'only' the chunk to be
245247
# copied once, and another copy of a part of it when it creates the unconsumed
246-
# tail. We have to use it to hand in the appropriate amount of bytes durin g
248+
# tail. We have to use it to hand in the appropriate amount of bytes during
247249
# the next read.
248250
tail = self._zip.unconsumed_tail
249251
if tail:
@@ -284,6 +286,7 @@ def read(self, size=-1):
284286
else:
285287
unused_datalen = len(self._zip.unconsumed_tail) + len(self._zip.unused_data)
286288
# end handle very special case ...
289+
287290
self._cbr += len(indata) - unused_datalen
288291
self._br += len(dcompdat)
289292

Binary file not shown.

Diff for: gitdb/test/performance/test_pack.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,8 @@ def test_pack_random_access(self):
6363
st = time()
6464
for sha in sha_list[:max_items]:
6565
stream = pdb_stream(sha)
66-
stream.read()
66+
read_len = len(stream.read())
67+
assert read_len == stream.size
6768
total_size += stream.size
6869
elapsed = time() - st
6970
total_kib = total_size / 1000

Diff for: gitdb/test/test_stream.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -144,8 +144,11 @@ def test_compressed_writer(self):
144144

145145
def test_decompress_reader_special_case(self):
146146
odb = LooseObjectDB(fixture_path('objects'))
147-
ostream = odb.stream(hex_to_bin('7bb839852ed5e3a069966281bb08d50012fb309b'))
148-
149-
# if there is a bug, we will be missing one byte exactly !
150-
data = ostream.read()
151-
assert len(data) == ostream.size
147+
for sha in ('888401851f15db0eed60eb1bc29dec5ddcace911',
148+
'7bb839852ed5e3a069966281bb08d50012fb309b',):
149+
ostream = odb.stream(hex_to_bin(sha))
150+
151+
# if there is a bug, we will be missing one byte exactly !
152+
data = ostream.read()
153+
assert len(data) == ostream.size
154+
# end for each loose object sha to test

0 commit comments

Comments
 (0)