Skip to content

Commit f4b6e27

Browse files
committed
Implemented main PackEntity object retrieval method and moved constructor for delta_streams out of the PackFile, into the stream itself where it belongs.
All this is still to be tested
1 parent 6a4eee2 commit f4b6e27

4 files changed

Lines changed: 165 additions & 40 deletions

File tree

fun.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
# INVARIANTS
1515
OFS_DELTA = 6
1616
REF_DELTA = 7
17+
delta_types = (OFS_DELTA, REF_DELTA)
18+
1719
type_id_to_type_map = {
1820
0 : "", # EXT 1
1921
1 : "commit",

pack.py

Lines changed: 109 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,24 @@
55
from util import (
66
LockedFD,
77
LazyMixin,
8-
file_contents_ro,
9-
unpack_from
8+
unpack_from,
9+
file_contents_ro,
1010
)
1111

1212
from fun import (
1313
pack_object_header_info,
14+
type_id_to_type_map,
1415
stream_copy,
1516
chunk_size,
17+
delta_types,
1618
OFS_DELTA,
17-
REF_DELTA
19+
REF_DELTA,
20+
msb_size
1821
)
1922

20-
from base import (
23+
from base import ( # Amazing !
24+
OInfo,
25+
OStream,
2126
OPackInfo,
2227
OPackStream,
2328
ODeltaPackInfo,
@@ -35,7 +40,7 @@
3540

3641
__all__ = ('PackIndexFile', 'PackFile')
3742

38-
_delta_types = (OFS_DELTA, REF_DELTA)
43+
3944

4045

4146
#{ Utilities
@@ -95,8 +100,6 @@ def pack_object_at(data, offset, as_stream):
95100
# END handle info
96101
# END handle stream
97102

98-
99-
100103
#} END utilities
101104

102105

@@ -355,6 +358,7 @@ def collect_streams(self, offset):
355358
the last one is either a full object, or a REF_Delta stream. The latter
356359
type needs its reference object to be locked up in an ODB to form a valid
357360
delta chain.
361+
If the object at offset is no delta, the size of the list is 1.
358362
:param offset: specifies the first byte of the object within this pack"""
359363
out = list()
360364
while True:
@@ -370,31 +374,7 @@ def collect_streams(self, offset):
370374
# END handle type
371375
# END while chaining streams
372376
return out
373-
374-
def to_delta_stream(self, stream_list):
375-
"""Convert the given list of streams into a stream which resolves deltas
376-
(if availble) when reading from it.
377-
:param stream_list: one or more stream objects. If the first stream is a Delta,
378-
there must be at least two streams in the list. The list's last stream
379-
must be a non-delta stream.
380-
:return: Non-Delta OPackStream object whose stream can be used to obtain
381-
the decompressed resolved data
382-
:raise ValueError: if the stream list cannot be handled due to a missing base object"""
383-
if len(stream_list) == 1:
384-
if stream_list[0].type_id in _delta_types:
385-
raise ValueError("Cannot resolve deltas if only one stream is given", stream_list[0].type)
386-
# its an object, no need to resolve anything
387-
return stream_list[0]
388-
# END single object special handling
389-
390-
if stream_list[-1].type_id in _delta_types:
391-
raise ValueError("Cannot resolve deltas if there is no base object stream, last one was type: %s" % stream_list[-1].type)
392-
# END check stream
393-
394-
# just create the respective stream wrapper
395-
return DeltaApplyReader(stream_list)
396-
397-
377+
398378
#} END pack specific
399379

400380
#{ Read-Database like Interface
@@ -437,8 +417,58 @@ def __init__(self, basename):
437417
self._pack = self.PackFileCls("%s.pack" % basename) # corresponding PackFile instance
438418

439419

420+
def _sha_to_index(self, sha):
421+
""":return: index for the given sha, or raise"""
422+
index = self._index.sha_to_index(sha)
423+
if index is None:
424+
raise BadObject(sha)
425+
return index
426+
440427
def _iter_objects(self, as_stream):
441-
raise NotImplementedError
428+
"""Iterate over all objects in our index and yield their OInfo or OStream instences"""
429+
raise NotImplementedError()
430+
431+
def _object(self, sha, as_stream):
432+
""":return: OInfo or OStream object providing information about the given sha"""
433+
# its a little bit redundant here, but it needs to be efficient
434+
offset = self._index.offset(self._sha_to_index(sha))
435+
type_id, uncomp_size, data_rela_offset = pack_object_header_info(buffer(self._pack._data, offset))
436+
if as_stream:
437+
if type_id not in delta_types:
438+
packstream = self._pack.stream(offset)
439+
return OStream(sha, packstream.type, packstream.size, packstream.stream)
440+
# END handle non-deltas
441+
442+
# produce a delta stream containing all info
443+
# To prevent it from applying the deltas when querying the size,
444+
# we extract it from the delta stream ourselves
445+
streams = self.collect_streams_at_offset(offset)
446+
buf = streams[0].read(512)
447+
offset, src_size = msb_size(buf)
448+
offset, target_size = msb_size(buf, offset)
449+
450+
streams[0].seek(0) # assure it can be read by the delta reader
451+
dstream = DeltaApplyReader.new(streams)
452+
453+
return OStream(sha, dstream.type, target_size, dstream)
454+
else:
455+
if type_id not in delta_types:
456+
return OInfo(sha, type_id_to_type_map[type_id], uncomp_size)
457+
# END handle non-deltas
458+
459+
# deltas are a little tougher - unpack the first bytes to obtain
460+
# the actual target size, as opposed to the size of the delta data
461+
streams = self.collect_streams_at_offset(offset)
462+
buf = streams[0].read(512)
463+
offset, src_size = msb_size(buf)
464+
offset, target_size = msb_size(buf, offset)
465+
466+
# collect the streams to obtain the actual object type
467+
if streams[-1].type_id in delta_types:
468+
raise BadObject(sha, "Could not resolve delta object")
469+
470+
return OInfo(sha, streams[-1].type, target_size)
471+
# END handle stream
442472

443473
#{ Read-Database like Interface
444474

@@ -447,14 +477,14 @@ def info(self, sha):
447477
:param sha: 20 byte sha1
448478
:raise BadObject:
449479
:return: OInfo instance"""
450-
raise NotImplementedError()
480+
return self._object(sha, as_stream=False)
451481

452482
def stream(self, sha):
453483
"""Retrieve an object stream along with its information as identified by the given sha
454484
:param sha: 20 byte sha1
455485
:raise BadObject:
456486
:return: OStream instance"""
457-
raise NotImplementedError()
487+
return self._object(sha, as_stream=True)
458488

459489
#} END Read-Database like Interface
460490

@@ -470,4 +500,47 @@ def stream_iter(self):
470500
OStream instances"""
471501
return self._iter_objects(as_stream=True)
472502

473-
#} Interface
503+
def collect_streams_at_offset(self, offset):
504+
"""As the version in the PackFile, but can resolve REF deltas within this pack
505+
For more info, see ``collect_streams``
506+
:param offset: offset into the pack file at which the object can be found"""
507+
streams = self._pack.collect_streams(offset)
508+
509+
# try to resolve the last one if needed. It is assumed to be either
510+
# a REF delta, or a base object, as OFFSET deltas are resolved by the pack
511+
if streams[-1].type_id == REF_DELTA:
512+
stream = streams[-1]
513+
while stream.type_id in delta_types:
514+
if stream.type_id == REF_DELTA:
515+
sindex = self._index.sha_to_index(stream.delta_info)
516+
if sindex is None:
517+
break
518+
stream = self._pack.stream(self._index.offset(sindex))
519+
streams.append(stream)
520+
else:
521+
# must be another OFS DELTA - this could happen if a REF
522+
# delta we resolve previously points to an OFS delta. Who
523+
# would do that ;) ? We can handle it though
524+
stream = self._pack.stream(stream.delta_info)
525+
streams.append(stream)
526+
# END handle ref delta
527+
# END resolve ref streams
528+
# END resolve streams
529+
530+
return streams
531+
532+
def collect_streams(self, sha):
533+
"""As ``PackFile.collect_streams``, but takes a sha instead of an offset.
534+
Additionally, ref_delta streams will be resolved within this pack.
535+
If this is not possible, the stream will be left alone, hence it is adivsed
536+
to check for unresolved ref-deltas and resolve them before attempting to
537+
construct a delta stream.
538+
:param sha: 20 byte sha1 specifying the object whose related streams you want to collect
539+
:return: list of streams, first being the actual object delta, the last being
540+
a possibly unresolved base object.
541+
:raise BadObject:"""
542+
return self.collect_streams_at_offset(self._index.offset(self._sha_to_index(sha)))
543+
544+
545+
546+
#} END interface

stream.py

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
from fun import (
88
msb_size,
99
stream_copy,
10-
apply_delta_data
10+
apply_delta_data,
11+
delta_types
1112
)
1213

1314
from util import (
@@ -19,7 +20,7 @@
1920
zlib
2021
)
2122

22-
__all__ = ('DecompressMemMapReader', 'FDCompressedSha1Writer')
23+
__all__ = ('DecompressMemMapReader', 'FDCompressedSha1Writer', 'DeltaApplyReader')
2324

2425

2526
#{ RO Streams
@@ -418,6 +419,50 @@ def seek(self, offset, whence=os.SEEK_SET):
418419
raise ValueError("Can only seek to position 0")
419420
# END handle offset
420421
self._size
422+
423+
#{ Interface
424+
425+
@classmethod
426+
def new(cls, stream_list):
427+
"""Convert the given list of streams into a stream which resolves deltas
428+
when reading from it.
429+
:param stream_list: two or more stream objects, first stream is a Delta
430+
to the object that you want to resolve, followed by N additional delta
431+
streams. The list's last stream must be a non-delta stream.
432+
:return: Non-Delta OPackStream object whose stream can be used to obtain
433+
the decompressed resolved data
434+
:raise ValueError: if the stream list cannot be handled"""
435+
if len(stream_list) < 2:
436+
raise ValueError("Need at least two streams")
437+
# END single object special handling
438+
439+
if stream_list[-1].type_id in delta_types:
440+
raise ValueError("Cannot resolve deltas if there is no base object stream, last one was type: %s" % stream_list[-1].type)
441+
# END check stream
442+
443+
return cls(stream_list)
444+
445+
#} END interface
446+
447+
448+
#{ OInfo like Interface
449+
450+
@property
451+
def type(self):
452+
return self._bstream.type
453+
454+
@property
455+
def type_id(self):
456+
return self._bstream.type_id
457+
458+
@property
459+
def size(self):
460+
""":return: number of uncompressed bytes in the stream"""
461+
return self._size
462+
463+
#} END oinfo like interface
464+
465+
421466
#} END RO streams
422467

423468

test/test_pack.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@
55
with_packs_rw,
66
fixture_path
77
)
8+
from gitdb.stream import (
9+
DeltaApplyReader
10+
)
11+
812
from gitdb.pack import (
913
PackIndexFile,
1014
PackFile
@@ -71,15 +75,16 @@ def _assert_pack_file(self, pack, version, size):
7175

7276
# read the stream
7377
try:
74-
dstream = pack.to_delta_stream(streams)
78+
dstream = DeltaApplyReader.new(streams)
7579
except ValueError:
7680
# ignore these, old git versions use only ref deltas,
7781
# which we havent resolved ( as we are without an index )
82+
# Also ignore non-delta streams
7883
continue
7984
# END get deltastream
8085

8186
# read all
82-
assert len(dstream.read())
87+
assert len(dstream.read())
8388

8489
# read chunks
8590
# NOTE: the current implementation is safe, it basically transfers

0 commit comments

Comments
 (0)