8
8
9
9
from stream import (
10
10
DecompressMemMapReader ,
11
- FDCompressedSha1Writer
11
+ FDCompressedSha1Writer ,
12
+ Sha1Writer ,
13
+ OStream ,
14
+ OInfo
12
15
)
13
16
14
17
from utils import (
34
37
import os
35
38
36
39
40
+ __all__ = ('ObjectDBR' , 'ObjectDBW' , 'FileDBBase' , 'LooseObjectDB' , 'PackedDB' ,
41
+ 'CompoundDB' , 'ReferenceDB' , 'GitObjectDB' )
42
+
37
43
class ObjectDBR (object ):
38
44
"""Defines an interface for object database lookup.
39
45
Objects are identified either by hex-sha (40 bytes) or
40
46
by sha (20 bytes)"""
41
- __slots__ = tuple ()
42
47
43
48
def __contains__ (self , sha ):
44
49
return self .has_obj
@@ -52,35 +57,34 @@ def has_object(self, sha):
52
57
raise NotImplementedError ("To be implemented in subclass" )
53
58
54
59
def info (self , sha ):
55
- """ :return: ODB_Info instance
60
+ """ :return: OInfo instance
56
61
:param sha: 40 bytes hexsha or 20 bytes binary sha
57
62
:raise BadObject:"""
58
63
raise NotImplementedError ("To be implemented in subclass" )
59
64
60
65
def info_async (self , input_channel ):
61
66
"""Retrieve information of a multitude of objects asynchronously
62
67
:param input_channel: Channel yielding the sha's of the objects of interest
63
- :return: Channel yielding ODB_Info|InvalidODB_Info , in any order"""
68
+ :return: Channel yielding OInfo|InvalidOInfo , in any order"""
64
69
raise NotImplementedError ("To be implemented in subclass" )
65
70
66
71
def stream (self , sha ):
67
- """:return: ODB_OStream instance
72
+ """:return: OStream instance
68
73
:param sha: 40 bytes hexsha or 20 bytes binary sha
69
74
:raise BadObject:"""
70
75
raise NotImplementedError ("To be implemented in subclass" )
71
76
72
77
def stream_async (self , input_channel ):
73
- """Retrieve the ODB_OStream of multiple objects
78
+ """Retrieve the OStream of multiple objects
74
79
:param input_channel: see ``info``
75
80
:param max_threads: see ``ObjectDBW.store``
76
- :return: Channel yielding ODB_OStream|InvalidODB_OStream instances in any order"""
81
+ :return: Channel yielding OStream|InvalidOStream instances in any order"""
77
82
raise NotImplementedError ("To be implemented in subclass" )
78
83
79
84
#} END query interface
80
85
81
86
class ObjectDBW (object ):
82
87
"""Defines an interface to create objects in the database"""
83
- __slots__ = "_ostream"
84
88
85
89
def __init__ (self , * args , ** kwargs ):
86
90
self ._ostream = None
@@ -99,12 +103,12 @@ def set_ostream(self, stream):
99
103
def ostream (self ):
100
104
""":return: overridden output stream this instance will write to, or None
101
105
if it will write to the default stream"""
102
- return self ._ostream
106
+ return self ._ostream
103
107
104
108
def store (self , istream ):
105
109
"""Create a new object in the database
106
110
:return: the input istream object with its sha set to its corresponding value
107
- :param istream: ODB_IStream compatible instance. If its sha is already set
111
+ :param istream: IStream compatible instance. If its sha is already set
108
112
to a value, the object will just be stored in the our database format,
109
113
in which case the input stream is expected to be in object format ( header + contents ).
110
114
:raise IOError: if data could not be written"""
@@ -115,30 +119,23 @@ def store_async(self, input_channel):
115
119
return right away, returning an output channel which receives the results as
116
120
they are computed.
117
121
118
- :return: Channel yielding your ODB_IStream which served as input, in any order.
122
+ :return: Channel yielding your IStream which served as input, in any order.
119
123
The IStreams sha will be set to the sha it received during the process,
120
124
or its error attribute will be set to the exception informing about the error.
121
- :param input_channel: Channel yielding ODB_IStream instance.
125
+ :param input_channel: Channel yielding IStream instance.
122
126
As the same instances will be used in the output channel, you can create a map
123
127
between the id(istream) -> istream
124
128
:note:As some ODB implementations implement this operation as atomic, they might
125
129
abort the whole operation if one item could not be processed. Hence check how
126
130
many items have actually been produced."""
127
- # a trivial implementation, ignoring the threads for now
128
- # TODO: add configuration to the class to determine whether we may
129
- # actually use multiple threads, default False of course. If the add
130
- shas = list ()
131
- for args in iter_info :
132
- shas .append (self .store (dry_run = dry_run , sha_as_hex = sha_as_hex , * args ))
133
- return shas
131
+ raise NotImplementedError ("To be implemented in subclass" )
134
132
135
133
#} END edit interface
136
134
137
135
138
136
class FileDBBase (object ):
139
137
"""Provides basic facilities to retrieve files of interest, including
140
138
caching facilities to help mapping hexsha's to objects"""
141
- __slots__ = ('_root_path' , )
142
139
143
140
def __init__ (self , root_path ):
144
141
"""Initialize this instance to look for its files at the given root path
@@ -164,15 +161,11 @@ def db_path(self, rela_path):
164
161
return join (self ._root_path , rela_path )
165
162
#} END interface
166
163
167
- #{ Utiltities
168
-
169
-
170
- #} END utilities
171
164
172
165
173
166
class LooseObjectDB (FileDBBase , ObjectDBR , ObjectDBW ):
174
167
"""A database which operates on loose object files"""
175
- __slots__ = ( '_hexsha_to_file' , '_fd_open_flags' )
168
+
176
169
# CONFIGURATION
177
170
# chunks in which data will be copied between streams
178
171
stream_chunk_size = chunk_size
@@ -238,21 +231,26 @@ def _map_loose_object(self, sha):
238
231
finally :
239
232
os .close (fd )
240
233
# END assure file is closed
234
+
235
+ def set_ostream (self , stream ):
236
+ """:raise TypeError: if the stream does not support the Sha1Writer interface"""
237
+ if stream is not None and not isinstance (stream , Sha1Writer ):
238
+ raise TypeError ("Output stream musst support the %s interface" % Sha1Writer .__name__ )
239
+ return super (LooseObjectDB , self ).set_ostream (stream )
241
240
242
241
def info (self , sha ):
243
242
m = self ._map_loose_object (sha )
244
243
try :
245
- return loose_object_header_info (m )
244
+ type , size = loose_object_header_info (m )
245
+ return OInfo (sha , type , size )
246
246
finally :
247
247
m .close ()
248
248
# END assure release of system resources
249
249
250
- def object (self , sha ):
250
+ def stream (self , sha ):
251
251
m = self ._map_loose_object (sha )
252
- reader = DecompressMemMapReader (m , close_on_deletion = True )
253
- type , size = reader .initialize ()
254
-
255
- return type , size , reader
252
+ type , size , stream = DecompressMemMapReader .new (m , close_on_deletion = True )
253
+ return OStream (sha , type , size , stream )
256
254
257
255
def has_object (self , sha ):
258
256
try :
@@ -263,27 +261,33 @@ def has_object(self, sha):
263
261
# END check existance
264
262
265
263
def store (self , istream ):
266
- # open a tmp file to write the data to
267
- # todo: implement ostream properly
268
- fd , tmp_path = tempfile .mkstemp (prefix = 'obj' , dir = self ._root_path )
269
- writer = FDCompressedSha1Writer (fd )
264
+ """note: The sha we produce will be hex by nature"""
265
+ assert istream .sha is None , "Direct istream writing not yet implemented"
266
+ tmp_path = None
267
+ writer = self .ostream ()
268
+ if writer is None :
269
+ # open a tmp file to write the data to
270
+ fd , tmp_path = tempfile .mkstemp (prefix = 'obj' , dir = self ._root_path )
271
+ writer = FDCompressedSha1Writer (fd )
272
+ # END handle custom writer
270
273
271
274
try :
272
- write_object (type , size , stream , writer ,
273
- close_target_stream = True , chunk_size = self .stream_chunk_size )
274
- except :
275
- os .remove (tmp_path )
276
- raise
277
- # END assure tmpfile removal on error
278
-
275
+ try :
276
+ write_object (istream .type , istream .size , istream .read , writer .write ,
277
+ chunk_size = self .stream_chunk_size )
278
+ except :
279
+ if tmp_path :
280
+ os .remove (tmp_path )
281
+ raise
282
+ # END assure tmpfile removal on error
283
+ finally :
284
+ if tmp_path :
285
+ writer .close ()
286
+ # END assure target stream is closed
279
287
280
- # in dry-run mode, we delete the file afterwards
281
288
sha = writer .sha (as_hex = True )
282
289
283
- if dry_run :
284
- os .remove (tmp_path )
285
- else :
286
- # rename the file into place
290
+ if tmp_path :
287
291
obj_path = self .db_path (self .object_path (sha ))
288
292
obj_dir = dirname (obj_path )
289
293
if not isdir (obj_dir ):
@@ -292,11 +296,8 @@ def store(self, istream):
292
296
rename (tmp_path , obj_path )
293
297
# END handle dry_run
294
298
295
- if not sha_as_hex :
296
- sha = hex_to_bin (sha )
297
- # END handle sha format
298
-
299
- return sha
299
+ istream .sha = sha
300
+ return istream
300
301
301
302
302
303
class PackedDB (FileDBBase , ObjectDBR ):
@@ -320,18 +321,17 @@ class GitObjectDB(LooseObjectDB):
320
321
:note: for now, we use the git command to do all the lookup, just until he
321
322
have packs and the other implementations
322
323
"""
323
- __slots__ = ('_git' , )
324
324
def __init__ (self , root_path , git ):
325
325
"""Initialize this instance with the root and a git command"""
326
326
super (GitObjectDB , self ).__init__ (root_path )
327
327
self ._git = git
328
328
329
329
def info (self , sha ):
330
- discard , type , size = self ._git .get_object_header (sha )
331
- return type , size
330
+ t = self ._git .get_object_header (sha )
331
+ return OInfo ( t [ 0 ], t [ 1 ], t [ 2 ])
332
332
333
- def object (self , sha ):
333
+ def stream (self , sha ):
334
334
"""For now, all lookup is done by git itself"""
335
- discard , type , size , stream = self ._git .stream_object_data (sha )
336
- return type , size , stream
335
+ t = self ._git .stream_object_data (sha )
336
+ return OStream ( t [ 0 ], t [ 1 ], t [ 2 ], t [ 3 ])
337
337
0 commit comments