@@ -272,7 +272,82 @@ def read(self, size=-1):
272272 dcompdat += self .read (size - len (dcompdat ))
273273 # END handle special case
274274 return dcompdat
275+
276+
277+ class DeltaApplyReader (LazyMixin ):
278+ """A reader which dynamically applies pack deltas to a base object, keeping the
279+ memory demands to a minimum.
280+
281+ The size of the final object is only obtainable once all deltas have been
282+ applied, unless it is retrieved from a pack index.
283+
284+ The uncompressed Delta has the following layout (MSB being a most significant
285+ bit encoded dynamic size):
286+
287+ * MSB Source Size - the size of the base against which the delta was created
288+ * MSB Target Size - the size of the resulting data after the delta was applied
289+ * A list of one byte commands (cmd) which are followed by a specific protocol:
290+
291+ * cmd & 0x80 - copy delta_data[offset:offset+size]
292+
293+ * Followed by an encoded offset into the delta data
294+ * Followed by an encoded size of the chunk to copy
295+
296+ * cmd & 0x7f - insert
297+
298+ * insert cmd bytes from the delta buffer into the output stream
299+
300+ * cmd == 0 - invalid operation ( or error in delta stream )
301+ """
302+ __slots__ = (
303+ "_streams" , # tuple of our stream objects
304+ "_readers" , # list of read methods from our streams
305+ "_mm_target" , # memory map of the delta-applied data
306+ )
307+
308+ def __init__ (self , stream_list ):
309+ """Initialize this instance with a list of streams, the first stream being
310+ the delta to apply on top of all following deltas, the last stream being the
311+ base object onto which to apply the deltas"""
312+ assert len (stream_list ) > 1 , "Need at least one delta and one base stream"
313+
314+ self ._streams = tuple (stream_list )
315+ self ._readers = None # TODO
316+
317+ def _set_cache_ (self , attr ):
318+ """If we are here, we apply the actual deltas"""
319+ # fill in delta info structures, providing the source and target buffer
320+ # sizes.
275321
322+ # Allocate private memory map big enough to hold the first base buffer
323+ # It can be swapped out if it is too large. We need random access to it
324+
325+ # allocate memory map large enough for the largest (intermediate) target
326+ # We will use it as scratch space for all delta ops. If the final
327+ # target buffer is smaller than our allocated space, we just use parts
328+ # of it
329+
330+ # for each delta to apply, memory map the decompressed delta and
331+ # work on the op-codes to reconstruct everything.
332+ # For the actual copying, we use a seek and write pattern of buffer
333+ # slices.
334+
335+ # NOTE: on py pre 2.5, all memory maps must actually be some kind
336+ # of memory buffer,like StringIO ( ouch ;) )
337+
338+
339+
340+ # TODO: Once that works, figure out the ordering of the opcodes. If they
341+ # are always in-order/sequential, an alternate implementation could
342+ # use stream access only. Of course this would mean we would read
343+ # all deltas in advance, analyse the opcode ranges to determine a final
344+ # concatenated opcode list which indicates what to copy from which delta
345+ # to which position. This preprocessing would allow true streaming
346+
347+ def read (self , size = 0 ):
348+ # pass the call to our lazy-loaded delta-applied data
349+ return self ._mm_target .read (size )
350+
276351#} END RO streams
277352
278353
0 commit comments