88
99from fun import (
1010 pack_object_header_info ,
11+ stream_copy ,
12+ chunk_size ,
1113 OFS_DELTA ,
1214 REF_DELTA
1315 )
2022 )
2123from stream import (
2224 DecompressMemMapReader ,
25+ NullStream
2326 )
2427
2528from struct import (
3437
3538def pack_object_at (data , as_stream ):
3639 """
37- :return: info or stream object of the correct type according to the type
38- of the object, REF_DELTAS will not be resolved in case a stream is desired.
39- The resulting ODeltaPackStream will have None instead of a stream.
40+ :return: tuple(num_header_bytes, PackInfo|PackStream)
41+ Tuple of number of additional bytes read from data until the data stream begins
42+ and object of the correct type according to the type of the object.
43+ If as_stream is True, the object will contain a stream, allowing the
44+ data to be read decompressed.
4045 :param data: random accessable data at which the header of an object can be read
4146 :param as_stream: if True, a stream object will be returned that can read
4247 the data, otherwise you receive an info object only
4348 :note: a bit redundant, but it needs to be as fast as possible !"""
4449 type_id , uncomp_size , data_offset = pack_object_header_info (data )
45-
50+ total_offset = None # set later, actual offset until data stream begins
51+ obj = None
4652 if type_id == OFS_DELTA :
47- i = 0
53+ i = data_offset
4854 delta_offset = 0
4955 s = 7
50- while c & 0x80 :
56+ while True :
5157 c = ord (data [i ])
52- i += 1
5358 delta_offset += (c & 0x7f ) << s
59+ i += 1
60+ if not (c & 0x80 ):
61+ break
5462 s += 7
5563 # END character loop
64+ total_offset = i
5665 if as_stream :
57- stream = DecompressMemMapReader (buffer (data , i ), False , uncomp_size )
58- return ODeltaPackStream (type_id , uncomp_size , delta_offset , stream )
66+ stream = DecompressMemMapReader (buffer (data , total_offset ), False , uncomp_size )
67+ obj = ODeltaPackStream (type_id , uncomp_size , delta_offset , stream )
5968 else :
60- return ODeltaPackInfo (type_id , uncomp_size , delta_offset )
69+ obj = ODeltaPackInfo (type_id , uncomp_size , delta_offset )
6170 # END handle stream
6271 elif type_id == REF_DELTA :
63- ref_sha = data [:20 ]
72+ total_offset = data_offset + 20
73+ ref_sha = data [data_offset :total_offset ]
74+
6475 if as_stream :
65- stream = DecompressMemMapReader (buffer (data , 20 ), False , uncomp_size )
66- return ODeltaPackStream (type_id , uncomp_size , ref_sha , stream )
76+ stream = DecompressMemMapReader (buffer (data , total_offset ), False , uncomp_size )
77+ obj = ODeltaPackStream (type_id , uncomp_size , ref_sha , stream )
6778 else :
68- return ODeltaPackInfo (type_id , uncomp_size , ref_sha )
79+ obj = ODeltaPackInfo (type_id , uncomp_size , ref_sha )
6980 # END handle stream
7081 else :
82+ total_offset = data_offset
7183 # assume its a base object
7284 if as_stream :
7385 # if no size is given, it will read the header on first access
74- stream = DecompressMemMapReader (buffer (data , data_offset ), False )
75- return OPackStream (type_id , uncomp_size , stream )
86+ stream = DecompressMemMapReader (buffer (data , data_offset ), False , uncomp_size )
87+ obj = OPackStream (type_id , uncomp_size , stream )
7688 else :
77- return OPackInfo (type_id , uncomp_size )
89+ obj = OPackInfo (type_id , uncomp_size )
7890 # END handle as_stream
7991 # END handle type id
8092
93+ return total_offset , obj
94+
8195
8296#} END utilities
8397
@@ -267,7 +281,8 @@ class PackFile(LazyMixin):
267281 __slots__ = ('_packpath' , '_data' , '_size' , '_version' )
268282
269283 # offset into our data at which the first object starts
270- _first_object_offset = 3 * 4
284+ _first_object_offset = 3 * 4 # header bytes
285+ _footer_size = 20 # final sha
271286
272287 def __init__ (self , packpath ):
273288 self ._packpath = packpath
@@ -287,16 +302,28 @@ def _set_cache_(self, attr):
287302 assert self ._version in (2 , 3 ), "Cannot handle pack format version %i" % self ._version
288303 # END handle header
289304
290- def _iter_objects (self , start_offset , as_stream ):
305+ def _iter_objects (self , start_offset , as_stream = True ):
291306 """Handle the actual iteration of objects within this pack"""
292307 data = self ._data
293- size = len (data )
308+ content_size = len (data ) - self . _footer_size
294309 cur_offset = start_offset or self ._first_object_offset
295310
296- while cur_offset < size :
297- ostream = pack_object_at (buffer (data , cur_offset ), True )
298- # TODO: Decompressor needs to track the size of bytes actually decompressed
311+ null = NullStream ()
312+ while cur_offset < content_size :
313+ header_offset , ostream = pack_object_at (buffer (data , cur_offset ), True )
314+ # scrub the stream to the end - this decompresses the object, but yields
315+ # the amount of compressed bytes we need to get to the next offset
316+
317+ stream_copy (ostream .read , null .write , ostream .size , chunk_size )
318+ cur_offset += header_offset + ostream .stream .compressed_bytes_read ()
319+
299320
321+ # if a stream is requested, reset it beforehand
322+ # Otherwise return the Stream object directly, its derived from the
323+ # info object
324+ if as_stream :
325+ ostream .stream .seek (0 )
326+ yield ostream
300327 # END until we have read everything
301328
302329 #{ Interface
@@ -329,6 +356,15 @@ def stream(self, offset):
329356 :return: OPackStream instance, the actual type differs depending on the type_id attribute"""
330357 raise NotImplementedError ()
331358
359+ def stream_iter (self , start_offset = 0 ):
360+ """:return: iterator yielding OPackStream compatible instances, allowing
361+ to access the data in the pack directly.
362+ :param start_offset: offset to the first object to iterate. If 0, iteration
363+ starts at the very first object in the pack.
364+ :note: Iterating a pack directly is costly as the datastream has to be decompressed
365+ to determine the bounds between the objects"""
366+ return self ._iter_objects (start_offset , as_stream = True )
367+
332368 #} END Read-Database like Interface
333369
334370
0 commit comments