Skip to content

Commit e9c5cf3

Browse files
committed
delta-apply now works after fixing a stupid type, instead of i + 1 I wrote i + i ... argh \!
1 parent 8ab9b4f commit e9c5cf3

3 files changed

Lines changed: 28 additions & 19 deletions

File tree

fun.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -155,14 +155,16 @@ def stream_copy(read, write, size, chunk_size):
155155
return dbw
156156

157157

158-
def apply_delta_data(src_buf, src_buf_size, delta_buf, delta_buf_size, target_file):
158+
def apply_delta_data(src_buf, src_buf_size, delta_buf, delta_buf_size, target_file,
159+
target_size):
159160
"""Apply data from a delta buffer using a source buffer to the target file,
160161
which will be written to
161162
:param src_buf: random access data from which the delta was created
162163
:param src_buf_size: size of the source buffer in bytes
163164
:param delta_buf_size: size fo the delta buffer in bytes
164165
:param delta_buf: random access delta data
165166
:param target_file: file like object to write the result to
167+
:param target_size: size of the target buffer
166168
:note: transcribed to python from the similar routine in patch-delta.c"""
167169
i = 0
168170
twrite = target_file.write
@@ -180,7 +182,7 @@ def apply_delta_data(src_buf, src_buf_size, delta_buf, delta_buf_size, target_fi
180182
i += 1
181183
if (c & 0x04):
182184
cp_off |= (ord(db[i]) << 16)
183-
i += i
185+
i += 1
184186
if (c & 0x08):
185187
cp_off |= (ord(db[i]) << 24)
186188
i += 1
@@ -196,14 +198,20 @@ def apply_delta_data(src_buf, src_buf_size, delta_buf, delta_buf_size, target_fi
196198

197199
if not cp_size:
198200
cp_size = 0x10000
199-
# maybe skip this check ?
200-
if (cp_off + cp_size < cp_size or
201-
cp_off + cp_size > src_buf_size):
201+
202+
rbound = cp_off + cp_size
203+
if (rbound < cp_size or
204+
rbound > src_buf_size or
205+
cp_size > target_size):
202206
break
203207
twrite(buffer(src_buf, cp_off, cp_size))
208+
target_size -= cp_size
204209
elif c:
210+
if c > target_size:
211+
break
205212
twrite(db[i:i+c])
206213
i += c
214+
target_size -= c
207215
else:
208216
raise ValueError("unexpected delta opcode 0")
209217
# END handle command byte

stream.py

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -327,19 +327,15 @@ def __init__(self, stream_list):
327327

328328
def _set_cache_(self, attr):
329329
"""If we are here, we apply the actual deltas"""
330-
# fill in delta info structures, providing the source and target buffer
331-
# sizes.
332-
buffer_offset_list = list()
333-
final_target_size = None
330+
331+
# prefetch information
332+
buffer_info_list = list()
334333
max_target_size = 0
335334
for dstream in self._dstreams:
336335
buf = dstream.read(512) # read the header information + X
337336
offset, src_size = msb_size(buf)
338337
offset, target_size = msb_size(buf, offset)
339-
if final_target_size is None:
340-
final_target_size = target_size
341-
# END set final target size
342-
buffer_offset_list.append((buffer(buf, offset), offset))
338+
buffer_info_list.append((buffer(buf, offset), offset, src_size, target_size))
343339
max_target_size = max(max_target_size, target_size)
344340
# END for each delta stream
345341

@@ -358,7 +354,7 @@ def _set_cache_(self, attr):
358354
# Allocate private memory map big enough to hold the first base buffer
359355
# We need random access to it
360356
bbuf = allocate_memory(base_size)
361-
stream_copy(self._bstream.read, bbuf.write, base_size, 256*mmap.PAGESIZE)
357+
stream_copy(self._bstream.read, bbuf.write, base_size, 256 * mmap.PAGESIZE)
362358

363359
# allocate memory map large enough for the largest (intermediate) target
364360
# We will use it as scratch space for all delta ops. If the final
@@ -370,7 +366,8 @@ def _set_cache_(self, attr):
370366
# work on the op-codes to reconstruct everything.
371367
# For the actual copying, we use a seek and write pattern of buffer
372368
# slices.
373-
for (dbuf, offset), dstream in reversed(zip(buffer_offset_list, self._dstreams)):
369+
final_target_size = None
370+
for (dbuf, offset, src_size, target_size), dstream in reversed(zip(buffer_info_list, self._dstreams)):
374371
# allocate a buffer to hold all delta data - fill in the data for
375372
# fast access. We do this as we know that reading individual bytes
376373
# from our stream would be slower than necessary ( although possible )
@@ -381,15 +378,16 @@ def _set_cache_(self, attr):
381378
# read the rest from the stream. The size we give is larger than necessary
382379
stream_copy(dstream.read, ddata.write, dstream.size, 256*mmap.PAGESIZE)
383380

384-
################################################################
385-
apply_delta_data(bbuf, len(bbuf), ddata, len(ddata), tbuf)
386-
################################################################
381+
#######################################################################
382+
apply_delta_data(bbuf, src_size, ddata, len(ddata), tbuf, target_size)
383+
#######################################################################
387384

388385
# finally, swap out source and target buffers. The target is now the
389386
# base for the next delta to apply
390387
bbuf, tbuf = tbuf, bbuf
391388
bbuf.seek(0)
392389
tbuf.seek(0)
390+
final_target_size = target_size
393391
# END for each delta to apply
394392

395393
# its already seeked to 0, constrain it to the actual size

test/performance/test_db.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,12 @@ def test_pack_random_access(self):
6363
# retrieve stream and read all
6464
max_items = 5000
6565
pdb_stream = pdb.stream
66+
total_size = 0
6667
st = time()
6768
for sha in sha_list[:max_items]:
6869
stream = pdb_stream(sha)
6970
stream.read()
71+
total_size += stream.size
7072
elapsed = time() - st
71-
print >> sys.stderr, "PDB: Obtained %i streams by sha and read all bytes in %f s ( %f info/s )" % (max_items, elapsed, max_items / elapsed)
73+
total_kib = total_size / 1000
74+
print >> sys.stderr, "PDB: Obtained %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (max_items, total_kib, total_kib/elapsed , elapsed, max_items / elapsed)

0 commit comments

Comments
 (0)