Skip to content

Commit e746f96

Browse files
committed
Fixed implementation after design change to deal with it - all tests run, but next there will have to be more through testing
1 parent a1e8044 commit e746f96

File tree

14 files changed

+275
-180
lines changed

14 files changed

+275
-180
lines changed

lib/git/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,5 +22,8 @@
2222
from git.index import *
2323
from git.utils import LockFile, BlockingLockFile
2424

25+
# odb is NOT imported intentionally - if you really want it, you should get it
26+
# yourself as its part of the core
27+
2528
__all__ = [ name for name, obj in locals().items()
2629
if not (name.startswith('_') or inspect.ismodule(obj)) ]

lib/git/objects/base.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -76,12 +76,14 @@ def _set_cache_(self, attr):
7676
Retrieve object information
7777
"""
7878
if attr == "size":
79-
typename, self.size = self.repo.odb.info(self.sha)
80-
assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type)
79+
oinfo = self.repo.odb.info(self.sha)
80+
self.size = oinfo.size
81+
assert oinfo.type == self.type, _assertion_msg_format % (self.sha, oinfo.type, self.type)
8182
elif attr == "data":
82-
typename, self.size, stream = self.repo.odb.stream(self.sha)
83-
self.data = stream.read() # once we have an own odb, we can delay reading
84-
assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type)
83+
ostream = self.repo.odb.stream(self.sha)
84+
self.size = ostream.size
85+
self.data = ostream.read()
86+
assert ostream.type == self.type, _assertion_msg_format % (self.sha, ostream.type, self.type)
8587
else:
8688
super(Object,self)._set_cache_(attr)
8789

@@ -124,14 +126,13 @@ def __repr__(self):
124126
def data_stream(self):
125127
""" :return: File Object compatible stream to the uncompressed raw data of the object
126128
:note: returned streams must be read in order"""
127-
type, size, stream = self.repo.odb.stream(self.sha)
128-
return stream
129+
return self.repo.odb.stream(self.sha)
129130

130131
def stream_data(self, ostream):
131132
"""Writes our data directly to the given output stream
132133
:param ostream: File object compatible stream object.
133134
:return: self"""
134-
type, size, istream = self.repo.odb.stream(self.sha)
135+
istream = self.repo.odb.stream(self.sha)
135136
stream_copy(istream, ostream)
136137
return self
137138

lib/git/objects/commit.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import git.stats as stats
1010
from git.actor import Actor
1111
from tree import Tree
12+
from git.odb import IStream
1213
from cStringIO import StringIO
1314
import base
1415
import utils
@@ -346,7 +347,8 @@ def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False):
346347
streamlen = stream.tell()
347348
stream.seek(0)
348349

349-
new_commit.sha = repo.odb.store(cls.type, streamlen, stream, sha_as_hex=True)
350+
istream = repo.odb.store(IStream(cls.type, streamlen, stream))
351+
new_commit.sha = istream.sha
350352

351353
if head:
352354
try:

lib/git/odb/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,6 @@
11
"""Initialize the object database module"""
22

3+
# default imports
4+
from db import *
5+
from stream import *
6+

lib/git/odb/db.py

Lines changed: 58 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@
88

99
from stream import (
1010
DecompressMemMapReader,
11-
FDCompressedSha1Writer
11+
FDCompressedSha1Writer,
12+
Sha1Writer,
13+
OStream,
14+
OInfo
1215
)
1316

1417
from utils import (
@@ -34,11 +37,13 @@
3437
import os
3538

3639

40+
__all__ = ('ObjectDBR', 'ObjectDBW', 'FileDBBase', 'LooseObjectDB', 'PackedDB',
41+
'CompoundDB', 'ReferenceDB', 'GitObjectDB' )
42+
3743
class ObjectDBR(object):
3844
"""Defines an interface for object database lookup.
3945
Objects are identified either by hex-sha (40 bytes) or
4046
by sha (20 bytes)"""
41-
__slots__ = tuple()
4247

4348
def __contains__(self, sha):
4449
return self.has_obj
@@ -52,35 +57,34 @@ def has_object(self, sha):
5257
raise NotImplementedError("To be implemented in subclass")
5358

5459
def info(self, sha):
55-
""" :return: ODB_Info instance
60+
""" :return: OInfo instance
5661
:param sha: 40 bytes hexsha or 20 bytes binary sha
5762
:raise BadObject:"""
5863
raise NotImplementedError("To be implemented in subclass")
5964

6065
def info_async(self, input_channel):
6166
"""Retrieve information of a multitude of objects asynchronously
6267
:param input_channel: Channel yielding the sha's of the objects of interest
63-
:return: Channel yielding ODB_Info|InvalidODB_Info, in any order"""
68+
:return: Channel yielding OInfo|InvalidOInfo, in any order"""
6469
raise NotImplementedError("To be implemented in subclass")
6570

6671
def stream(self, sha):
67-
""":return: ODB_OStream instance
72+
""":return: OStream instance
6873
:param sha: 40 bytes hexsha or 20 bytes binary sha
6974
:raise BadObject:"""
7075
raise NotImplementedError("To be implemented in subclass")
7176

7277
def stream_async(self, input_channel):
73-
"""Retrieve the ODB_OStream of multiple objects
78+
"""Retrieve the OStream of multiple objects
7479
:param input_channel: see ``info``
7580
:param max_threads: see ``ObjectDBW.store``
76-
:return: Channel yielding ODB_OStream|InvalidODB_OStream instances in any order"""
81+
:return: Channel yielding OStream|InvalidOStream instances in any order"""
7782
raise NotImplementedError("To be implemented in subclass")
7883

7984
#} END query interface
8085

8186
class ObjectDBW(object):
8287
"""Defines an interface to create objects in the database"""
83-
__slots__ = "_ostream"
8488

8589
def __init__(self, *args, **kwargs):
8690
self._ostream = None
@@ -99,12 +103,12 @@ def set_ostream(self, stream):
99103
def ostream(self):
100104
""":return: overridden output stream this instance will write to, or None
101105
if it will write to the default stream"""
102-
return self._ostream
106+
return self._ostream
103107

104108
def store(self, istream):
105109
"""Create a new object in the database
106110
:return: the input istream object with its sha set to its corresponding value
107-
:param istream: ODB_IStream compatible instance. If its sha is already set
111+
:param istream: IStream compatible instance. If its sha is already set
108112
to a value, the object will just be stored in the our database format,
109113
in which case the input stream is expected to be in object format ( header + contents ).
110114
:raise IOError: if data could not be written"""
@@ -115,30 +119,23 @@ def store_async(self, input_channel):
115119
return right away, returning an output channel which receives the results as
116120
they are computed.
117121
118-
:return: Channel yielding your ODB_IStream which served as input, in any order.
122+
:return: Channel yielding your IStream which served as input, in any order.
119123
The IStreams sha will be set to the sha it received during the process,
120124
or its error attribute will be set to the exception informing about the error.
121-
:param input_channel: Channel yielding ODB_IStream instance.
125+
:param input_channel: Channel yielding IStream instance.
122126
As the same instances will be used in the output channel, you can create a map
123127
between the id(istream) -> istream
124128
:note:As some ODB implementations implement this operation as atomic, they might
125129
abort the whole operation if one item could not be processed. Hence check how
126130
many items have actually been produced."""
127-
# a trivial implementation, ignoring the threads for now
128-
# TODO: add configuration to the class to determine whether we may
129-
# actually use multiple threads, default False of course. If the add
130-
shas = list()
131-
for args in iter_info:
132-
shas.append(self.store(dry_run=dry_run, sha_as_hex=sha_as_hex, *args))
133-
return shas
131+
raise NotImplementedError("To be implemented in subclass")
134132

135133
#} END edit interface
136134

137135

138136
class FileDBBase(object):
139137
"""Provides basic facilities to retrieve files of interest, including
140138
caching facilities to help mapping hexsha's to objects"""
141-
__slots__ = ('_root_path', )
142139

143140
def __init__(self, root_path):
144141
"""Initialize this instance to look for its files at the given root path
@@ -164,15 +161,11 @@ def db_path(self, rela_path):
164161
return join(self._root_path, rela_path)
165162
#} END interface
166163

167-
#{ Utiltities
168-
169-
170-
#} END utilities
171164

172165

173166
class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW):
174167
"""A database which operates on loose object files"""
175-
__slots__ = ('_hexsha_to_file', '_fd_open_flags')
168+
176169
# CONFIGURATION
177170
# chunks in which data will be copied between streams
178171
stream_chunk_size = chunk_size
@@ -238,21 +231,26 @@ def _map_loose_object(self, sha):
238231
finally:
239232
os.close(fd)
240233
# END assure file is closed
234+
235+
def set_ostream(self, stream):
236+
""":raise TypeError: if the stream does not support the Sha1Writer interface"""
237+
if stream is not None and not isinstance(stream, Sha1Writer):
238+
raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__)
239+
return super(LooseObjectDB, self).set_ostream(stream)
241240

242241
def info(self, sha):
243242
m = self._map_loose_object(sha)
244243
try:
245-
return loose_object_header_info(m)
244+
type, size = loose_object_header_info(m)
245+
return OInfo(sha, type, size)
246246
finally:
247247
m.close()
248248
# END assure release of system resources
249249

250-
def object(self, sha):
250+
def stream(self, sha):
251251
m = self._map_loose_object(sha)
252-
reader = DecompressMemMapReader(m, close_on_deletion = True)
253-
type, size = reader.initialize()
254-
255-
return type, size, reader
252+
type, size, stream = DecompressMemMapReader.new(m, close_on_deletion = True)
253+
return OStream(sha, type, size, stream)
256254

257255
def has_object(self, sha):
258256
try:
@@ -263,27 +261,33 @@ def has_object(self, sha):
263261
# END check existance
264262

265263
def store(self, istream):
266-
# open a tmp file to write the data to
267-
# todo: implement ostream properly
268-
fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path)
269-
writer = FDCompressedSha1Writer(fd)
264+
"""note: The sha we produce will be hex by nature"""
265+
assert istream.sha is None, "Direct istream writing not yet implemented"
266+
tmp_path = None
267+
writer = self.ostream()
268+
if writer is None:
269+
# open a tmp file to write the data to
270+
fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path)
271+
writer = FDCompressedSha1Writer(fd)
272+
# END handle custom writer
270273

271274
try:
272-
write_object(type, size, stream, writer,
273-
close_target_stream=True, chunk_size=self.stream_chunk_size)
274-
except:
275-
os.remove(tmp_path)
276-
raise
277-
# END assure tmpfile removal on error
278-
275+
try:
276+
write_object(istream.type, istream.size, istream.read, writer.write,
277+
chunk_size=self.stream_chunk_size)
278+
except:
279+
if tmp_path:
280+
os.remove(tmp_path)
281+
raise
282+
# END assure tmpfile removal on error
283+
finally:
284+
if tmp_path:
285+
writer.close()
286+
# END assure target stream is closed
279287

280-
# in dry-run mode, we delete the file afterwards
281288
sha = writer.sha(as_hex=True)
282289

283-
if dry_run:
284-
os.remove(tmp_path)
285-
else:
286-
# rename the file into place
290+
if tmp_path:
287291
obj_path = self.db_path(self.object_path(sha))
288292
obj_dir = dirname(obj_path)
289293
if not isdir(obj_dir):
@@ -292,11 +296,8 @@ def store(self, istream):
292296
rename(tmp_path, obj_path)
293297
# END handle dry_run
294298

295-
if not sha_as_hex:
296-
sha = hex_to_bin(sha)
297-
# END handle sha format
298-
299-
return sha
299+
istream.sha = sha
300+
return istream
300301

301302

302303
class PackedDB(FileDBBase, ObjectDBR):
@@ -320,18 +321,17 @@ class GitObjectDB(LooseObjectDB):
320321
:note: for now, we use the git command to do all the lookup, just until he
321322
have packs and the other implementations
322323
"""
323-
__slots__ = ('_git', )
324324
def __init__(self, root_path, git):
325325
"""Initialize this instance with the root and a git command"""
326326
super(GitObjectDB, self).__init__(root_path)
327327
self._git = git
328328

329329
def info(self, sha):
330-
discard, type, size = self._git.get_object_header(sha)
331-
return type, size
330+
t = self._git.get_object_header(sha)
331+
return OInfo(t[0], t[1], t[2])
332332

333-
def object(self, sha):
333+
def stream(self, sha):
334334
"""For now, all lookup is done by git itself"""
335-
discard, type, size, stream = self._git.stream_object_data(sha)
336-
return type, size, stream
335+
t = self._git.stream_object_data(sha)
336+
return OStream(t[0], t[1], t[2], t[3])
337337

lib/git/odb/fun.py

Lines changed: 20 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
# used when dealing with larger streams
2222
chunk_size = 1000*1000
2323

24+
__all__ = ('is_loose_object', 'loose_object_header_info', 'object_header_info',
25+
'write_object' )
2426

2527
#{ Routines
2628

@@ -73,42 +75,34 @@ def object_header_info(m):
7375
raise BadObjectType(type_id)
7476
# END handle exceptions
7577

76-
def write_object(type, size, source_stream, target_stream, close_target_stream=True,
77-
chunk_size=chunk_size):
78+
def write_object(type, size, read, write, chunk_size=chunk_size):
7879
"""Write the object as identified by type, size and source_stream into the
7980
target_stream
8081
8182
:param type: type string of the object
8283
:param size: amount of bytes to write from source_stream
83-
:param source_stream: stream as file-like object providing at least size bytes
84-
:param target_stream: stream as file-like object to receive the data
84+
:param read: read method of a stream providing the content data
85+
:param write: write method of the output stream
8586
:param close_target_stream: if True, the target stream will be closed when
8687
the routine exits, even if an error is thrown
87-
:param chunk_size: size of chunks to read from source. Larger values can be beneficial
88-
for io performance, but cost more memory as well
8988
:return: The actual amount of bytes written to stream, which includes the header and a trailing newline"""
9089
tbw = 0 # total num bytes written
9190
dbw = 0 # num data bytes written
92-
try:
93-
# WRITE HEADER: type SP size NULL
94-
tbw += target_stream.write("%s %i\0" % (type, size))
95-
96-
# WRITE ALL DATA UP TO SIZE
97-
while True:
98-
cs = min(chunk_size, size-dbw)
99-
data_len = target_stream.write(source_stream.read(cs))
100-
dbw += data_len
101-
if data_len < cs or dbw == size:
102-
tbw += dbw
103-
break
104-
# END check for stream end
105-
# END duplicate data
106-
return tbw
107-
finally:
108-
if close_target_stream:
109-
target_stream.close()
110-
# END handle stream closing
111-
# END assure file was closed
11291

92+
# WRITE HEADER: type SP size NULL
93+
tbw += write("%s %i\0" % (type, size))
94+
95+
# WRITE ALL DATA UP TO SIZE
96+
while True:
97+
cs = min(chunk_size, size-dbw)
98+
data_len = write(read(cs))
99+
dbw += data_len
100+
if data_len < cs or dbw == size:
101+
tbw += dbw
102+
break
103+
# END check for stream end
104+
# END duplicate data
105+
return tbw
106+
113107

114108
#} END routines

0 commit comments

Comments
 (0)