Skip to content

Commit ae6d08e

Browse files
committed
Removed data-offset field from PackInfo as it is not needed in most cases. Instead, pack_at_offset returns the data-offset, slightly improving performance, and reducing memory demands
1 parent fc6253d commit ae6d08e

5 files changed

Lines changed: 29 additions & 35 deletions

File tree

base.py

Lines changed: 16 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,8 @@ class OPackInfo(tuple):
6464
location in the pack at which that actual data stream can be found."""
6565
__slots__ = tuple()
6666

67-
def __new__(cls, packoffset, dataoffset, type, size):
68-
return tuple.__new__(cls, (packoffset, dataoffset, type, size))
67+
def __new__(cls, packoffset, type, size):
68+
return tuple.__new__(cls, (packoffset,type, size))
6969

7070
def __init__(self, *args):
7171
tuple.__init__(self)
@@ -76,21 +76,17 @@ def __init__(self, *args):
7676
def pack_offset(self):
7777
return self[0]
7878

79-
@property
80-
def data_offset(self):
81-
return self[1]
82-
8379
@property
8480
def type(self):
85-
return type_id_to_type_map[self[2]]
81+
return type_id_to_type_map[self[1]]
8682

8783
@property
8884
def type_id(self):
89-
return self[2]
85+
return self[1]
9086

9187
@property
9288
def size(self):
93-
return self[3]
89+
return self[2]
9490

9591
#} END interface
9692

@@ -102,13 +98,13 @@ class ODeltaPackInfo(OPackInfo):
10298
the pack offset of the base object"""
10399
__slots__ = tuple()
104100

105-
def __new__(cls, packoffset, dataoffset, type, size, delta_info):
106-
return tuple.__new__(cls, (packoffset, dataoffset, type, size, delta_info))
101+
def __new__(cls, packoffset, type, size, delta_info):
102+
return tuple.__new__(cls, (packoffset, type, size, delta_info))
107103

108104
#{ Interface
109105
@property
110106
def delta_info(self):
111-
return self[4]
107+
return self[3]
112108
#} END interface
113109

114110

@@ -142,35 +138,35 @@ class OPackStream(OPackInfo):
142138
is provided"""
143139
__slots__ = tuple()
144140

145-
def __new__(cls, packoffset, dataoffset, type, size, stream, *args):
141+
def __new__(cls, packoffset, type, size, stream, *args):
146142
"""Helps with the initialization of subclasses"""
147-
return tuple.__new__(cls, (packoffset, dataoffset, type, size, stream))
143+
return tuple.__new__(cls, (packoffset, type, size, stream))
148144

149145
#{ Stream Reader Interface
150146
def read(self, size=-1):
151-
return self[4].read(size)
147+
return self[3].read(size)
152148

153149
@property
154150
def stream(self):
155-
return self[4]
151+
return self[3]
156152
#} END stream reader interface
157153

158154

159155
class ODeltaPackStream(ODeltaPackInfo):
160156
"""Provides a stream outputting the uncompressed offset delta information"""
161157
__slots__ = tuple()
162158

163-
def __new__(cls, packoffset, dataoffset, type, size, delta_info, stream):
164-
return tuple.__new__(cls, (packoffset, dataoffset, type, size, delta_info, stream))
159+
def __new__(cls, packoffset, type, size, delta_info, stream):
160+
return tuple.__new__(cls, (packoffset, type, size, delta_info, stream))
165161

166162

167163
#{ Stream Reader Interface
168164
def read(self, size=-1):
169-
return self[5].read(size)
165+
return self[4].read(size)
170166

171167
@property
172168
def stream(self):
173-
return self[5]
169+
return self[4]
174170
#} END stream reader interface
175171

176172

pack.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@
5555

5656
def pack_object_at(data, offset, as_stream):
5757
"""
58-
:return: PackInfo|PackStream
58+
:return: Tuple(abs_data_offset, PackInfo|PackStream)
5959
an object of the correct type according to the type_id of the object.
6060
If as_stream is True, the object will contain a stream, allowing the
6161
data to be read decompressed.
@@ -97,14 +97,14 @@ def pack_object_at(data, offset, as_stream):
9797
if as_stream:
9898
stream = DecompressMemMapReader(buffer(data, total_rela_offset), False, uncomp_size)
9999
if delta_info is None:
100-
return OPackStream(offset, abs_data_offset, type_id, uncomp_size, stream)
100+
return abs_data_offset, OPackStream(offset, type_id, uncomp_size, stream)
101101
else:
102-
return ODeltaPackStream(offset, abs_data_offset, type_id, uncomp_size, delta_info, stream)
102+
return abs_data_offset, ODeltaPackStream(offset, type_id, uncomp_size, delta_info, stream)
103103
else:
104104
if delta_info is None:
105-
return OPackInfo(offset, abs_data_offset, type_id, uncomp_size)
105+
return abs_data_offset, OPackInfo(offset, type_id, uncomp_size)
106106
else:
107-
return ODeltaPackInfo(offset, abs_data_offset, type_id, uncomp_size, delta_info)
107+
return abs_data_offset, ODeltaPackInfo(offset, type_id, uncomp_size, delta_info)
108108
# END handle info
109109
# END handle stream
110110

@@ -278,6 +278,7 @@ def sha_to_index(self, sha):
278278
if the sha was not found in this pack index
279279
:param sha: 20 byte sha to lookup"""
280280
first_byte = ord(sha[0])
281+
get_sha = self.sha
281282
lo = 0 # lower index, the left bound of the bisection
282283
if first_byte != 0:
283284
lo = self._fanout_table[first_byte-1]
@@ -286,7 +287,7 @@ def sha_to_index(self, sha):
286287
# bisect until we have the sha
287288
while lo < hi:
288289
mid = (lo + hi) / 2
289-
c = cmp(sha, self.sha(mid))
290+
c = cmp(sha, get_sha(mid))
290291
if c < 0:
291292
hi = mid
292293
elif not c:
@@ -346,12 +347,12 @@ def _iter_objects(self, start_offset, as_stream=True):
346347

347348
null = NullStream()
348349
while cur_offset < content_size:
349-
ostream = pack_object_at(data, cur_offset, True)
350+
data_offset, ostream = pack_object_at(data, cur_offset, True)
350351
# scrub the stream to the end - this decompresses the object, but yields
351352
# the amount of compressed bytes we need to get to the next offset
352353

353354
stream_copy(ostream.read, null.write, ostream.size, chunk_size)
354-
cur_offset += (ostream.data_offset - ostream.pack_offset) + ostream.stream.compressed_bytes_read()
355+
cur_offset += (data_offset - ostream.pack_offset) + ostream.stream.compressed_bytes_read()
355356

356357

357358
# if a stream is requested, reset it beforehand
@@ -399,7 +400,7 @@ def collect_streams(self, offset):
399400
:param offset: specifies the first byte of the object within this pack"""
400401
out = list()
401402
while True:
402-
ostream = pack_object_at(self._data, offset, True)
403+
ostream = pack_object_at(self._data, offset, True)[1]
403404
out.append(ostream)
404405
if ostream.type_id == OFS_DELTA:
405406
offset = ostream.pack_offset - ostream.delta_info
@@ -420,13 +421,13 @@ def info(self, offset):
420421
"""Retrieve information about the object at the given file-absolute offset
421422
:param offset: byte offset
422423
:return: OPackInfo instance, the actual type differs depending on the type_id attribute"""
423-
return pack_object_at(self._data, offset or self.first_object_offset, False)
424+
return pack_object_at(self._data, offset or self.first_object_offset, False)[1]
424425

425426
def stream(self, offset):
426427
"""Retrieve an object at the given file-relative offset as stream along with its information
427428
:param offset: byte offset
428429
:return: OPackStream instance, the actual type differs depending on the type_id attribute"""
429-
return pack_object_at(self._data, offset or self.first_object_offset, True)
430+
return pack_object_at(self._data, offset or self.first_object_offset, True)[1]
430431

431432
def stream_iter(self, start_offset=0):
432433
""":return: iterator yielding OPackStream compatible instances, allowing

test/performance/test_db.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ class TestGitDBPerformance(TestBigRepoR):
1414

1515
def test_pack_random_access(self):
1616
pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))
17-
assert len(pdb._entities) > 1
1817

1918
# sha lookup
2019
st = time()
@@ -72,3 +71,4 @@ def test_pack_random_access(self):
7271
elapsed = time() - st
7372
total_kib = total_size / 1000
7473
print >> sys.stderr, "PDB: Obtained %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (max_items, total_kib, total_kib/elapsed , elapsed, max_items / elapsed)
74+

test/test_base.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,12 @@ def test_streams(self):
3535
assert pinfo.type == str_blob_type
3636
assert pinfo.type_id == blob_id
3737
assert pinfo.pack_offset == 0
38-
assert pinfo.data_offset == 1
3938

4039
dpinfo = ODeltaPackInfo(0, 1, blob_id, s, sha)
4140
assert dpinfo.type == str_blob_type
4241
assert dpinfo.type_id == blob_id
4342
assert dpinfo.delta_info == sha
4443
assert dpinfo.pack_offset == 0
45-
assert dpinfo.data_offset == 1
4644

4745

4846
# test ostream

test/test_pack.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,6 @@ def _assert_pack_file(self, pack, version, size):
7272
stream = pack.stream(obj.pack_offset)
7373

7474
assert info.pack_offset == stream.pack_offset
75-
assert info.data_offset == stream.data_offset
7675
assert info.type_id == stream.type_id
7776
assert hasattr(stream, 'read')
7877

0 commit comments

Comments
 (0)