Increased initial size of decompressed data to obtain loose object header information

Byron · Byron · commit c38bd19706ab · 2015-01-01T13:47:19.000+01:00
This appears to fix gitpython-developers/GitPython#220 , in this particular case. Nonetheless, we might just have gotten lucky here, and the actual issue is not yet solved and can thus re-occour. It would certainly be best to churn through plenty of loose objects to assure this truly works now. Maybe the pack could be recompressed as loose objects to get a sufficiently large data set
diff --git a/gitdb/stream.py b/gitdb/stream.py
@@ -100,7 +100,9 @@ def _parse_header_info(self):
 
         :return: parsed type_string, size"""
         # read header
-        maxb = 512              # should really be enough, cgit uses 8192 I believe
+        # should really be enough, cgit uses 8192 I believe
+        # And for good reason !! This needs to be that high for the header to be read correctly in all cases
+        maxb = 8192
         self._s = maxb
         hdr = self.read(maxb)
         hdrend = hdr.find(NULL_BYTE)
@@ -243,7 +245,7 @@ def read(self, size=-1):
         # moving the window into the memory map along as we decompress, which keeps
         # the tail smaller than our chunk-size. This causes 'only' the chunk to be
         # copied once, and another copy of a part of it when it creates the unconsumed
-        # tail. We have to use it to hand in the appropriate amount of bytes durin g
+        # tail. We have to use it to hand in the appropriate amount of bytes during
         # the next read.
         tail = self._zip.unconsumed_tail
         if tail:
@@ -284,6 +286,7 @@ def read(self, size=-1):
         else:
             unused_datalen = len(self._zip.unconsumed_tail) + len(self._zip.unused_data)
         # end handle very special case ... 
+
         self._cbr += len(indata) - unused_datalen
         self._br += len(dcompdat)
 
diff --git a/gitdb/test/fixtures/objects/88/8401851f15db0eed60eb1bc29dec5ddcace911 b/gitdb/test/fixtures/objects/88/8401851f15db0eed60eb1bc29dec5ddcace911
diff --git a/gitdb/test/performance/test_pack.py b/gitdb/test/performance/test_pack.py
@@ -63,7 +63,8 @@ def test_pack_random_access(self):
         st = time()
         for sha in sha_list[:max_items]:
             stream = pdb_stream(sha)
-            stream.read()
+            read_len = len(stream.read())
+            assert read_len == stream.size
             total_size += stream.size
         elapsed = time() - st
         total_kib = total_size / 1000
diff --git a/gitdb/test/test_stream.py b/gitdb/test/test_stream.py
@@ -144,8 +144,11 @@ def test_compressed_writer(self):
 
     def test_decompress_reader_special_case(self):
         odb = LooseObjectDB(fixture_path('objects'))
-        ostream = odb.stream(hex_to_bin('7bb839852ed5e3a069966281bb08d50012fb309b'))
-
-        # if there is a bug, we will be missing one byte exactly !
-        data = ostream.read()
-        assert len(data) == ostream.size
+        for sha in ('888401851f15db0eed60eb1bc29dec5ddcace911',
+                    '7bb839852ed5e3a069966281bb08d50012fb309b',):
+            ostream = odb.stream(hex_to_bin(sha))
+
+            # if there is a bug, we will be missing one byte exactly !
+            data = ostream.read()
+            assert len(data) == ostream.size
+        # end for each loose object sha to test