Skip to content

Commit 0d22c80

Browse files
committed
Added integrity test for loose objects to search large datasets for
the issue described in gitpython-developers/GitPython#220 See test notes for proper usage, it all depends on a useful dataset with high entropy
1 parent 6b32bbc commit 0d22c80

File tree

1 file changed

+31
-0
lines changed

1 file changed

+31
-0
lines changed

Diff for: gitdb/test/performance/test_pack.py

+31
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@
99
TestBigRepoR
1010
)
1111

12+
from gitdb import (
13+
MemoryDB,
14+
IStream,
15+
)
16+
from gitdb.typ import str_blob_type
1217
from gitdb.exc import UnsupportedOperation
1318
from gitdb.db.pack import PackedDB
1419
from gitdb.utils.compat import xrange
@@ -70,6 +75,32 @@ def test_pack_random_access(self):
7075
total_kib = total_size / 1000
7176
print("PDB: Obtained %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (max_items, total_kib, total_kib/elapsed , elapsed, max_items / elapsed), file=sys.stderr)
7277

78+
@skip_on_travis_ci
79+
def test_loose_correctness(self):
80+
"""based on the pack(s) of our packed object DB, we will just copy and verify all objects in the back
81+
into the loose object db (memory).
82+
This should help finding dormant issues like this one https://github.com/gitpython-developers/GitPython/issues/220
83+
faster
84+
:note: It doesn't seem this test can find the issue unless the given pack contains highly compressed
85+
data files, like archives."""
86+
pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))
87+
mdb = MemoryDB()
88+
for c, sha in enumerate(pdb.sha_iter()):
89+
ostream = pdb.stream(sha)
90+
# the issue only showed on larger files which are hardly compressible ...
91+
if ostream.type != str_blob_type:
92+
continue
93+
istream = IStream(ostream.type, ostream.size, ostream.stream)
94+
mdb.store(istream)
95+
assert istream.binsha == sha
96+
# this can fail ... sometimes, so the packs dataset should be huge
97+
assert len(mdb.stream(sha).read()) == ostream.size
98+
99+
if c and c % 1000 == 0:
100+
print("Verified %i loose object compression/decompression cycles" % c, file=sys.stderr)
101+
mdb._cache.clear()
102+
# end for each sha to copy
103+
73104
@skip_on_travis_ci
74105
def test_correctness(self):
75106
pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))

0 commit comments

Comments
 (0)