44import mmap
55import os
66
7+ from fun import (
8+ msb_size ,
9+ stream_copy ,
10+ apply_delta_data
11+ )
12+
713from util import (
14+ allocate_memory ,
815 LazyMixin ,
916 make_sha ,
1017 write ,
@@ -300,9 +307,11 @@ class DeltaApplyReader(LazyMixin):
300307 * cmd == 0 - invalid operation ( or error in delta stream )
301308 """
302309 __slots__ = (
303- "_streams " , # tuple of our stream objects
304- "_readers " , # list of read methods from our streams
310+ "_bstream " , # base stream to which to apply the deltas
311+ "_dstreams " , # tuple of delta stream readers
305312 "_mm_target" , # memory map of the delta-applied data
313+ "_size" , # actual number of bytes in _mm_target
314+ "_br" # number of bytes read
306315 )
307316
308317 def __init__ (self , stream_list ):
@@ -311,31 +320,81 @@ def __init__(self, stream_list):
311320 base object onto which to apply the deltas"""
312321 assert len (stream_list ) > 1 , "Need at least one delta and one base stream"
313322
314- self ._streams = tuple (stream_list )
315- self ._readers = None # TODO
323+ self ._bstream = stream_list [- 1 ]
324+ self ._dstreams = tuple (stream_list [:- 1 ])
325+ self ._br = 0
316326
317327 def _set_cache_ (self , attr ):
318328 """If we are here, we apply the actual deltas"""
319329 # fill in delta info structures, providing the source and target buffer
320330 # sizes.
331+ buffer_offset_list = list ()
332+ final_target_size = None
333+ max_target_size = 0
334+ for dstream in self ._dstreams :
335+ buf = dstream .read (512 ) # read the header information + X
336+ offset , src_size = msb_size (buf )
337+ offset , target_size = msb_size (buf , offset )
338+ if final_target_size is None :
339+ final_target_size = target_size
340+ # END set final target size
341+ buffer_offset_list .append ((buffer (buf , offset ), offset ))
342+ max_target_size = max (max_target_size , target_size )
343+ # END for each delta stream
344+
345+ # sanity check - the first delta to apply should have the same source
346+ # size as our actual base stream
347+ base_size = self ._bstream .size
348+ target_size = max_target_size
349+
350+ # if we have more than 1 delta to apply, we will swap buffers, hence we must
351+ # assure that all buffers we use are large enough to hold all the results
352+ if len (self ._dstreams ) > 1 :
353+ base_size = target_size = max (base_size , max_target_size )
354+ # END adjust buffer sizes
355+
321356
322357 # Allocate private memory map big enough to hold the first base buffer
323- # It can be swapped out if it is too large. We need random access to it
358+ # We need random access to it
359+ bbuf = allocate_memory (base_size )
324360
325361 # allocate memory map large enough for the largest (intermediate) target
326362 # We will use it as scratch space for all delta ops. If the final
327363 # target buffer is smaller than our allocated space, we just use parts
328- # of it
364+ # of it upon return.
365+ tbuf = allocate_memory (target_size )
329366
330367 # for each delta to apply, memory map the decompressed delta and
331368 # work on the op-codes to reconstruct everything.
332369 # For the actual copying, we use a seek and write pattern of buffer
333370 # slices.
334-
335- # NOTE: on py pre 2.5, all memory maps must actually be some kind
336- # of memory buffer,like StringIO ( ouch ;) )
337-
338-
371+ for (dbuf , offset ), dstream in reversed (zip (buffer_offset_list , self ._dstreams )):
372+ # allocate a buffer to hold all delta data - fill in the data for
373+ # fast access. We do this as we know that reading individual bytes
374+ # from our stream would be slower than necessary ( although possible )
375+ # The dbuf buffer contains commands after the first two MSB sizes, the
376+ # offset specifies the amount of bytes read to get the sizes.
377+ ddata = allocate_memory (dstream .size - offset )
378+ ddata .write (dbuf )
379+ # read the rest from the stream. The size we give is larger than necessary
380+ stream_copy (dstream .read , ddata .write , dstream .size , 256 * mmap .PAGESIZE )
381+
382+ ################################################################
383+ apply_delta_data (bbuf , len (bbuf ), ddata , len (ddata ), tbuf )
384+ ################################################################
385+
386+ # finally, swap out source and target buffers. The target is now the
387+ # base for the next delta to apply
388+ bbuf , tbuf = tbuf , bbuf
389+ bbuf .seek (0 )
390+ tbuf .seek (0 )
391+ # END for each delta to apply
392+
393+ # its already seeked to 0, constrain it to the actual size
394+ # NOTE: in the end of the loop, it swaps buffers, hence our target buffer
395+ # is not tbuf, but bbuf !
396+ self ._mm_target = bbuf
397+ self ._size = final_target_size
339398
340399 # TODO: Once that works, figure out the ordering of the opcodes. If they
341400 # are always in-order/sequential, an alternate implementation could
@@ -344,10 +403,21 @@ def _set_cache_(self, attr):
344403 # concatenated opcode list which indicates what to copy from which delta
345404 # to which position. This preprocessing would allow true streaming
346405
347- def read (self , size = 0 ):
348- # pass the call to our lazy-loaded delta-applied data
349- return self ._mm_target .read (size )
350-
406+ def read (self , count = 0 ):
407+ bl = self ._size - self ._br # bytes left
408+ if count < 1 or count > bl :
409+ count = bl
410+ data = self ._mm_target .read (count )
411+ self ._br += len (data )
412+ return data
413+
414+ def seek (self , offset , whence = os .SEEK_SET ):
415+ """Allows to reset the stream to restart reading
416+ :raise ValueError: If offset and whence are not 0"""
417+ if offset != 0 or whence != os .SEEK_SET :
418+ raise ValueError ("Can only seek to position 0" )
419+ # END handle offset
420+ self ._size
351421#} END RO streams
352422
353423
0 commit comments