11"""Contains PackIndexFile and PackFile implementations"""
22from gitdb .exc import (
3- BadObject ,
3+ BadObject ,
4+ UnsupportedOperation
45 )
56from util import (
67 zlib ,
4142 pack ,
4243 )
4344
45+ from itertools import izip
46+ import array
4447import os
4548__all__ = ('PackIndexFile' , 'PackFile' , 'PackEntity' )
4649
@@ -253,6 +256,21 @@ def indexfile_checksum(self):
253256 """:return: 20 byte sha representing the sha1 hash of this index file"""
254257 return self ._data [- 20 :]
255258
259+ def offsets (self ):
260+ """:return: sequence of all offsets in the order in which they were written
261+ :note: return value can be random accessed, but may be immmutable"""
262+ if self ._version == 2 :
263+ # read stream to array, convert to tuple
264+ a = array .array ('I' ) # 4 byte unsigned int, long are 8 byte on 64 bit it appears
265+ a .fromstring (buffer (self ._data , self ._pack_offset , self ._pack_64_offset - self ._pack_offset ))
266+
267+ # networkbyteorder to something array likes more
268+ a .byteswap ()
269+ return a
270+ else :
271+ return tuple (self .offset (index ) for index in xrange (self .size ()))
272+ # END handle version
273+
256274 def sha_to_index (self , sha ):
257275 """
258276 :return: index usable with the ``offset`` or ``entry`` method, or None
@@ -419,11 +437,14 @@ def stream_iter(self, start_offset=0):
419437 #} END Read-Database like Interface
420438
421439
422- class PackEntity (object ):
440+ class PackEntity (LazyMixin ):
423441 """Combines the PackIndexFile and the PackFile into one, allowing the
424442 actual objects to be resolved and iterated"""
425443
426- __slots__ = ('_index' , '_pack' )
444+ __slots__ = ( '_index' , # our index file
445+ '_pack' , # our pack file
446+ '_offset_map' # on demand dict mapping one offset to the next consecutive one
447+ )
427448
428449 IndexFileCls = PackIndexFile
429450 PackFileCls = PackFile
@@ -433,6 +454,28 @@ def __init__(self, pack_or_index_path):
433454 basename , ext = os .path .splitext (pack_or_index_path )
434455 self ._index = self .IndexFileCls ("%s.idx" % basename ) # PackIndexFile instance
435456 self ._pack = self .PackFileCls ("%s.pack" % basename ) # corresponding PackFile instance
457+
458+ def _set_cache_ (self , attr ):
459+ # currently this can only be _offset_map
460+ offsets_sorted = sorted (self ._index .offsets ())
461+ last_offset = len (self ._pack .data ()) - self ._pack .footer_size
462+ assert offsets_sorted , "Cannot handle empty indices"
463+
464+ offset_map = None
465+ if len (offsets_sorted ) == 1 :
466+ offset_map = { offsets_sorted [0 ] : last_offset }
467+ else :
468+ iter_offsets = iter (offsets_sorted )
469+ iter_offsets_plus_one = iter (offsets_sorted )
470+ iter_offsets_plus_one .next ()
471+ consecutive = izip (iter_offsets , iter_offsets_plus_one )
472+
473+ offset_map = dict (consecutive )
474+
475+ # the last offset is not yet set
476+ offset_map [offsets_sorted [- 1 ]] = last_offset
477+ # END handle offset amount
478+ self ._offset_map = offset_map
436479
437480 def _sha_to_index (self , sha ):
438481 """:return: index for the given sha, or raise"""
@@ -537,33 +580,31 @@ def is_valid_stream(self, sha, use_crc=False):
537580 :raise UnsupportedOperation: If the index is version 1 only
538581 :raise BadObject: sha was not found"""
539582 if use_crc :
583+ if self ._index .version () < 2 :
584+ raise UnsupportedOperation ("Version 1 indices do not contain crc's, verify by sha instead" )
585+ # END handle index version
586+
540587 index = self ._sha_to_index (sha )
541588 offset = self ._index .offset (index )
542- pack_data = self ._pack .data ()
543- next_index = min (self ._index .size ()- 1 , index + 1 )
544- next_offset = 0
545- if next_index == index :
546- next_offset = len (pack_data ) - self ._pack .footer_size
547- else :
548- next_offset = self ._index .offset (next_index )
549- # END get next offset
589+ next_offset = self ._offset_map [offset ]
550590 crc_value = self ._index .crc (index )
551591
552- this_crc_value = 0
553- crc_update = zlib .crc32
554-
555592 # create the current crc value, on the compressed object data
556593 # Read it in chunks, without copying the data
594+ crc_update = zlib .crc32
595+ pack_data = self ._pack .data ()
557596 cur_pos = offset
597+ this_crc_value = 0
558598 while cur_pos < next_offset :
559599 rbound = min (cur_pos + chunk_size , next_offset )
560600 size = rbound - cur_pos
561- crc_update (buffer (pack_data , cur_pos , size ), this_crc_value )
601+ this_crc_value = crc_update (buffer (pack_data , cur_pos , size ), this_crc_value )
562602 cur_pos += size
563603 # END window size loop
564604
565- assert this_crc_value == crc_value
566- return this_crc_value == crc_value
605+ # crc returns signed 32 bit numbers, the AND op forces it into unsigned
606+ # mode ... wow, sneaky, from dulwich.
607+ return (this_crc_value & 0xffffffff ) == crc_value
567608 else :
568609 shawriter = Sha1Writer ()
569610 stream = self ._object (sha , as_stream = True )
0 commit comments