33 BadObject ,
44 )
55from util import (
6+ zlib ,
67 LockedFD ,
78 LazyMixin ,
89 unpack_from ,
1213from fun import (
1314 pack_object_header_info ,
1415 type_id_to_type_map ,
16+ write_object ,
1517 stream_copy ,
1618 chunk_size ,
1719 delta_types ,
3133from stream import (
3234 DecompressMemMapReader ,
3335 DeltaApplyReader ,
36+ Sha1Writer ,
3437 NullStream ,
3538 )
3639
3740from struct import (
3841 pack ,
3942 )
4043
41- __all__ = ('PackIndexFile' , 'PackFile' )
44+ import os
45+ __all__ = ('PackIndexFile' , 'PackFile' , 'PackEntity' )
4246
4347
4448
@@ -237,6 +241,10 @@ def size(self):
237241 """:return: amount of objects referred to by this index"""
238242 return self ._fanout_table [255 ]
239243
244+ def path (self ):
245+ """:return: path to the packindexfile"""
246+ return self ._indexpath
247+
240248 def packfile_checksum (self ):
241249 """:return: 20 byte sha representing the sha1 hash of the pack file"""
242250 return self ._data [- 40 :- 20 ]
@@ -288,8 +296,8 @@ class PackFile(LazyMixin):
288296 __slots__ = ('_packpath' , '_data' , '_size' , '_version' )
289297
290298 # offset into our data at which the first object starts
291- _first_object_offset = 3 * 4 # header bytes
292- _footer_size = 20 # final sha
299+ first_object_offset = 3 * 4 # header bytes
300+ footer_size = 20 # final sha
293301
294302 def __init__ (self , packpath ):
295303 self ._packpath = packpath
@@ -312,8 +320,8 @@ def _set_cache_(self, attr):
312320 def _iter_objects (self , start_offset , as_stream = True ):
313321 """Handle the actual iteration of objects within this pack"""
314322 data = self ._data
315- content_size = len (data ) - self ._footer_size
316- cur_offset = start_offset or self ._first_object_offset
323+ content_size = len (data ) - self .footer_size
324+ cur_offset = start_offset or self .first_object_offset
317325
318326 null = NullStream ()
319327 while cur_offset < content_size :
@@ -343,10 +351,18 @@ def version(self):
343351 """:return: the version of this pack"""
344352 return self ._version
345353
354+ def data (self ):
355+ """:return: read-only data of this pack. It provides random access and usually
356+ is a memory map"""
357+ return self ._data
358+
346359 def checksum (self ):
347360 """:return: 20 byte sha1 hash on all object sha's contained in this file"""
348361 return self ._data [- 20 :]
349-
362+
363+ def path (self ):
364+ """:return: path to the packfile"""
365+ return self ._packpath
350366 #} END pack information
351367
352368 #{ Pack Specific
@@ -383,13 +399,13 @@ def info(self, offset):
383399 """Retrieve information about the object at the given file-absolute offset
384400 :param offset: byte offset
385401 :return: OPackInfo instance, the actual type differs depending on the type_id attribute"""
386- return pack_object_at (self ._data , offset or self ._first_object_offset , False )
402+ return pack_object_at (self ._data , offset or self .first_object_offset , False )
387403
388404 def stream (self , offset ):
389405 """Retrieve an object at the given file-relative offset as stream along with its information
390406 :param offset: byte offset
391407 :return: OPackStream instance, the actual type differs depending on the type_id attribute"""
392- return pack_object_at (self ._data , offset or self ._first_object_offset , True )
408+ return pack_object_at (self ._data , offset or self .first_object_offset , True )
393409
394410 def stream_iter (self , start_offset = 0 ):
395411 """:return: iterator yielding OPackStream compatible instances, allowing
@@ -403,7 +419,7 @@ def stream_iter(self, start_offset=0):
403419 #} END Read-Database like Interface
404420
405421
406- class PackFileEntity (object ):
422+ class PackEntity (object ):
407423 """Combines the PackIndexFile and the PackFile into one, allowing the
408424 actual objects to be resolved and iterated"""
409425
@@ -412,11 +428,12 @@ class PackFileEntity(object):
412428 IndexFileCls = PackIndexFile
413429 PackFileCls = PackFile
414430
415- def __init__ (self , basename ):
431+ def __init__ (self , pack_or_index_path ):
432+ """Initialize ourselves with the path to the respective pack or index file"""
433+ basename , ext = os .path .splitext (pack_or_index_path )
416434 self ._index = self .IndexFileCls ("%s.idx" % basename ) # PackIndexFile instance
417435 self ._pack = self .PackFileCls ("%s.pack" % basename ) # corresponding PackFile instance
418436
419-
420437 def _sha_to_index (self , sha ):
421438 """:return: index for the given sha, or raise"""
422439 index = self ._index .sha_to_index (sha )
@@ -426,12 +443,20 @@ def _sha_to_index(self, sha):
426443
427444 def _iter_objects (self , as_stream ):
428445 """Iterate over all objects in our index and yield their OInfo or OStream instences"""
429- raise NotImplementedError ()
430-
431- def _object (self , sha , as_stream ):
432- """:return: OInfo or OStream object providing information about the given sha"""
446+ indexfile = self ._index
447+ _object = self ._object
448+ for index in xrange (indexfile .size ()):
449+ sha = indexfile .sha (index )
450+ yield _object (sha , as_stream , index )
451+ # END for each index
452+
453+ def _object (self , sha , as_stream , index = - 1 ):
454+ """:return: OInfo or OStream object providing information about the given sha
455+ :param index: if not -1, its assumed to be the sha's index in the IndexFile"""
433456 # its a little bit redundant here, but it needs to be efficient
434- offset = self ._index .offset (self ._sha_to_index (sha ))
457+ if index < 0 :
458+ index = self ._sha_to_index (sha )
459+ offset = self ._index .offset (index )
435460 type_id , uncomp_size , data_rela_offset = pack_object_header_info (buffer (self ._pack ._data , offset ))
436461 if as_stream :
437462 if type_id not in delta_types :
@@ -447,7 +472,7 @@ def _object(self, sha, as_stream):
447472 offset , src_size = msb_size (buf )
448473 offset , target_size = msb_size (buf , offset )
449474
450- streams [0 ].seek (0 ) # assure it can be read by the delta reader
475+ streams [0 ].stream . seek (0 ) # assure it can be read by the delta reader
451476 dstream = DeltaApplyReader .new (streams )
452477
453478 return OStream (sha , dstream .type , target_size , dstream )
@@ -476,20 +501,79 @@ def info(self, sha):
476501 """Retrieve information about the object identified by the given sha
477502 :param sha: 20 byte sha1
478503 :raise BadObject:
479- :return: OInfo instance"""
504+ :return: OInfo instance, with 20 byte sha """
480505 return self ._object (sha , as_stream = False )
481506
482507 def stream (self , sha ):
483508 """Retrieve an object stream along with its information as identified by the given sha
484509 :param sha: 20 byte sha1
485510 :raise BadObject:
486- :return: OStream instance"""
511+ :return: OStream instance, with 20 byte sha """
487512 return self ._object (sha , as_stream = True )
488513
489514 #} END Read-Database like Interface
490515
491516 #{ Interface
492-
517+
518+ def pack (self ):
519+ """:return: the underlying pack file instance"""
520+ return self ._pack
521+
522+ def index (self ):
523+ """:return: the underlying pack index file instance"""
524+ return self ._index
525+
526+ def is_valid_stream (self , sha , use_crc = False ):
527+ """Verify that the stream at the given sha is valid.
528+ :param sha: 20 byte sha1 of the object whose stream to verify
529+ :param use_crc: if True, the index' crc for the sha is used to determine
530+ whether the compressed stream of the object is valid. If it is
531+ a delta, this only verifies that the delta's data is valid, not the
532+ data of the actual undeltified object, as it depends on more than
533+ just this stream.
534+ If False, the object will be decompressed and the sha generated. It must
535+ match the given sha
536+ :return: True if the stream is valid
537+ :raise UnsupportedOperation: If the index is version 1 only
538+ :raise BadObject: sha was not found"""
539+ if use_crc :
540+ index = self ._sha_to_index (sha )
541+ offset = self ._index .offset (index )
542+ pack_data = self ._pack .data ()
543+ next_index = min (self ._index .size ()- 1 , index + 1 )
544+ next_offset = 0
545+ if next_index == index :
546+ next_offset = len (pack_data ) - self ._pack .footer_size
547+ else :
548+ next_offset = self ._index .offset (next_index )
549+ # END get next offset
550+ crc_value = self ._index .crc (index )
551+
552+ this_crc_value = 0
553+ crc_update = zlib .crc32
554+
555+ # create the current crc value, on the compressed object data
556+ # Read it in chunks, without copying the data
557+ cur_pos = offset
558+ while cur_pos < next_offset :
559+ rbound = min (cur_pos + chunk_size , next_offset )
560+ size = rbound - cur_pos
561+ crc_update (buffer (pack_data , cur_pos , size ), this_crc_value )
562+ cur_pos += size
563+ # END window size loop
564+
565+ assert this_crc_value == crc_value
566+ return this_crc_value == crc_value
567+ else :
568+ shawriter = Sha1Writer ()
569+ stream = self ._object (sha , as_stream = True )
570+ # write a loose object, which is the basis for the sha
571+ write_object (stream .type , stream .size , stream .read , shawriter .write )
572+
573+ return shawriter .sha (as_hex = False ) == sha
574+ # END handle crc/sha verification
575+ return True
576+
493577 def info_iter (self ):
494578 """:return: Iterator over all objects in this pack. The iterator yields
495579 OInfo instances"""
0 commit comments