@@ -106,9 +106,12 @@ bool TSI_copy_stream_from_object(ToplevelStreamInfo* info)
106106 if (!ptmp ){
107107 return 0 ;
108108 }
109+ uint ofs = (uint )(info -> cstart - info -> tds );
109110 memcpy ((void * )ptmp , info -> tds , info -> tdslen );
111+
110112 info -> tds = ptmp ;
111- info -> cstart = ptmp ;
113+ info -> cstart = ptmp + ofs ;
114+
112115 Py_DECREF (info -> parent_object );
113116 info -> parent_object = 0 ;
114117
@@ -125,8 +128,10 @@ bool TSI_resize(ToplevelStreamInfo* info, uint num_bytes)
125128 if (num_bytes <= info -> tdslen ){
126129 return 1 ;
127130 }
131+ uint ofs = (uint )(info -> cstart - info -> tds );
128132 info -> tds = PyMem_Realloc ((void * )info -> tds , num_bytes );
129- info -> cstart = info -> tds ;
133+ info -> tdslen = num_bytes ;
134+ info -> cstart = info -> tds + ofs ;
130135
131136 return info -> tds != NULL ;
132137}
@@ -182,19 +187,21 @@ void DC_apply(const DeltaChunk* dc, const uchar* base, PyObject* writer, PyObjec
182187
183188// Encode the information in the given delta chunk and write the byte-stream
184189// into the given output stream
190+ // It will be copied into the given bounds, the given size must be the final size
191+ // and work with the given relative offset - hence the bounds are assumed to be
192+ // correct and to fit within the unaltered dc
185193inline
186- void DC_encode_to (const DeltaChunk * dc , uchar * * pout )
194+ void DC_encode_to (const DeltaChunk * dc , uchar * * pout , uint ofs , uint size )
187195{
188196 uchar * out = * pout ;
189197 if (dc -> data ){
190- * out ++ = (uchar )dc -> ts ;
191- memcpy (out , dc -> data , dc -> ts );
192- out += dc -> ts ;
198+ * out ++ = (uchar )size ;
199+ memcpy (out , dc -> data + ofs , size );
200+ out += size ;
193201 } else {
194202 uchar i = 0x80 ;
195203 uchar * op = out ++ ;
196- uint moff = dc -> so ;
197- uint msize = dc -> ts ;
204+ uint moff = dc -> so + ofs ;
198205
199206 if (moff & 0x000000ff )
200207 * out ++ = moff >> 0 , i |= 0x01 ;
@@ -205,10 +212,10 @@ void DC_encode_to(const DeltaChunk* dc, uchar** pout)
205212 if (moff & 0xff000000 )
206213 * out ++ = moff >> 24 , i |= 0x08 ;
207214
208- if (msize & 0x00ff )
209- * out ++ = msize >> 0 , i |= 0x10 ;
210- if (msize & 0xff00 )
211- * out ++ = msize >> 8 , i |= 0x20 ;
215+ if (size & 0x00ff )
216+ * out ++ = size >> 0 , i |= 0x10 ;
217+ if (size & 0xff00 )
218+ * out ++ = size >> 8 , i |= 0x20 ;
212219
213220 * op = i ;
214221 }
@@ -224,13 +231,13 @@ ushort DC_count_encode_bytes(const DeltaChunk* dc)
224231 } else {
225232 ushort c = 1 ; // cmd byte
226233 uint ts = dc -> ts ;
227- ull to = dc -> to ;
234+ ull so = dc -> so ;
228235
229236 // offset
230- c += to & 0x000000FF ;
231- c += to & 0x0000FF00 ;
232- c += to & 0x00FF0000 ;
233- c += to & 0xFF000000 ;
237+ c += so & 0x000000FF ;
238+ c += so & 0x0000FF00 ;
239+ c += so & 0x00FF0000 ;
240+ c += so & 0xFF000000 ;
234241
235242 // size - max size is 0x10000, its encoded with 0 size bits
236243 c += ts & 0x000000FF ;
@@ -485,179 +492,201 @@ const uchar* next_delta_info(const uchar*, DeltaChunk*);
485492
486493// Return the amount of chunks a slice at the given spot would have, as well as
487494// its size in bytes it would have if the possibly partial chunks would be encoded
488- // The bytes will be added
495+ // and added to the spot marked by sdc
489496inline
490- uint DIV_count_slice_chunks_and_bytes (const DeltaInfoVector * src , ull ofs , ull size , uint * out_bytes )
497+ uint DIV_count_slice_bytes (const DeltaInfoVector * src , uint ofs , uint size )
491498{
492- uint num_dc = 0 ;
499+ uint num_bytes = 0 ;
493500 DeltaInfo * cdi = DIV_closest_chunk (src , ofs );
494501
502+
503+
495504 DeltaChunk dc ;
496505 DC_init (& dc , 0 , 0 , 0 , NULL );
497506
498507 // partial overlap
499508 if (cdi -> to != ofs ) {
500509 const ull relofs = ofs - cdi -> to ;
501510 const uint cdisize = DIV_info_size (src , cdi );
502- size -= cdisize - relofs < size ? cdisize - relofs : size ;
503- num_dc += 1 ;
504- cdi += 1 ;
511+ const uint actual_size = cdisize - relofs < size ? cdisize - relofs : size ;
512+ size -= actual_size ;
505513
506514 // get the size in bytes the info would have
507515 next_delta_info (src -> dstream + cdi -> dso , & dc );
508- * out_bytes += DC_count_encode_bytes (& dc );
516+ dc .so += relofs ;
517+ dc .ts = actual_size ;
518+ num_bytes += DC_count_encode_bytes (& dc );
519+
520+ cdi += 1 ;
509521
510522 if (size == 0 ){
511- return num_dc ;
523+ return num_bytes ;
512524 }
513525 }
514526
515527 const DeltaInfo const * vecend = DIV_end (src );
516- const DeltaInfo const * veclast = DIV_last (src );
517- for ( ;(cdi < vecend ) && size ; ++ cdi ){
518- num_dc += 1 ;
519-
520- const uint cdisize = DIV_info_size2 (src , cdi , veclast );
521-
528+ for ( ;cdi < vecend ; ++ cdi ){
522529 next_delta_info (src -> dstream + cdi -> dso , & dc );
523- * out_bytes += DC_count_encode_bytes (& dc );
524530
525- if (cdisize < size ) {
526- size -= cdisize ;
531+ if (dc .ts < size ) {
532+ num_bytes += DC_count_encode_bytes (& dc );
533+ size -= dc .ts ;
527534 } else {
535+ dc .ts = size ;
536+ num_bytes += DC_count_encode_bytes (& dc );
528537 size = 0 ;
529538 break ;
530539 }
531540 }
532541
533- * out_bytes += 0 ;
534- return num_dc ;
542+ assert ( size == 0 ) ;
543+ return num_bytes ;
535544}
536545
537546// Write a slice as defined by its absolute offset in bytes and its size into the given
538- // destination memory. The individual chunks written will be a deep copy of the source
539- // data chunks
547+ // destination memory. The individual chunks written will be a byte copy of the source
548+ // data chunk stream
540549// Return: number of chunks in the slice
541550inline
542- uint DIV_copy_slice_to (const DeltaInfoVector * src , DeltaInfo * dest , ull ofs , ull size )
551+ uint DIV_copy_slice_to (const DeltaInfoVector * src , uchar * dest , ull tofs , uint size )
543552{
544- /*
545- assert(DIV_lbound(src) <= ofs);
546- assert((ofs + size) <= DIV_last(src)->to + src->di_last_size);
553+ assert (DIV_lbound (src ) <= tofs );
554+ assert ((tofs + size ) <= DIV_last (src )-> to + src -> di_last_size );
547555
548- DeltaInfo* cdc = DIV_closest_chunk(src, ofs);
556+ DeltaChunk dc ;
557+ DC_init (& dc , 0 , 0 , 0 , NULL );
558+
559+ DeltaInfo * cdi = DIV_closest_chunk (src , tofs );
549560 uint num_chunks = 0 ;
550561
551562 // partial overlap
552- if (cdc->to != ofs) {
553- const ull relofs = ofs - cdc->to;
554- DC_offset_copy_to(cdc, dest, relofs, cdc->ts - relofs < size ? cdc->ts - relofs : size);
555- cdc += 1;
556- size -= dest->ts;
557- dest += 1; // must be here, we are reading the size !
563+ if (cdi -> to != tofs ) {
564+ const uint relofs = tofs - cdi -> to ;
565+ next_delta_info (src -> dstream + cdi -> dso , & dc );
566+ const uint cdisize = dc .ts ;
567+ const uint actual_size = cdisize - relofs < size ? cdisize - relofs : size ;
568+
569+ size -= actual_size ;
570+
571+ // adjust dc proportions
572+
573+ DC_encode_to (& dc , & dest , relofs , actual_size );
574+
558575 num_chunks += 1 ;
576+ cdi += 1 ;
559577
560578 if (size == 0 ){
561579 return num_chunks ;
562580 }
563581 }
564582
565583 const DeltaInfo * vecend = DIV_end (src );
566- for( ;(cdc < vecend) && size ; ++cdc )
584+ for ( ;cdi < vecend ; ++ cdi )
567585 {
568586 num_chunks += 1 ;
569- if (cdc->ts < size) {
570- DC_copy_to(cdc, dest++);
571- size -= cdc->ts;
587+ next_delta_info (src -> dstream + cdi -> dso , & dc );
588+ if (dc .ts < size ) {
589+ // Full copy would be possible, but the final length of the dstream
590+ // needs to be used as well to know how many bytes to copy
591+ // TODO: make a DIV_ function for this
592+ DC_encode_to (& dc , & dest , 0 , dc .ts );
593+ size -= dc .ts ;
572594 } else {
573- DC_offset_copy_to(cdc, dest, 0, size);
595+ DC_encode_to ( & dc , & dest , 0 , size );
574596 size = 0 ;
575597 break ;
576598 }
577599 }
578600
579601 assert (size == 0 );
580602 return num_chunks ;
581- */
582- assert (0 ); // TODO
583- return 0 ;
584603}
585604
605+
586606// Take slices of div into the corresponding area of the tsi, which is the topmost
587607// delta to apply.
588608bool DIV_connect_with_base (ToplevelStreamInfo * tsi , DeltaInfoVector * div )
589609{
590610 assert (tsi -> num_chunks );
591611
592- uint * const offset_array = PyMem_Malloc (tsi -> num_chunks * sizeof (uint ));
612+ typedef struct {
613+ uint bofs ; // byte-offset of delta stream
614+ uint dofs ; // delta stream offset relative to tsi->cstart
615+ } OffsetInfo ;
616+
617+
618+ OffsetInfo * const offset_array = PyMem_Malloc (tsi -> num_chunks * sizeof (OffsetInfo ));
593619 if (!offset_array ){
594620 return 0 ;
595621 }
596622
597- uint * pofs = offset_array ;
598- uint num_addchunks = 0 ;
623+ OffsetInfo * pofs = offset_array ;
599624 uint num_addbytes = 0 ;
600625
601626 const uchar * data = TSI_first (tsi );
627+ const uchar * prev_data = data ;
602628 const uchar const * dend = TSI_end (tsi );
629+
603630 DeltaChunk dc ;
604631 DC_init (& dc , 0 , 0 , 0 , NULL );
605632
606633 // OFFSET RUN
607- for (;data < dend ; pofs ++ )
634+ for (;data < dend ; pofs ++ , prev_data = data )
608635 {
636+
637+ pofs -> bofs = num_addbytes ;
638+ pofs -> dofs = (uint )(prev_data - data );
639+
609640 // Data chunks don't need processing
610- * pofs = num_addchunks ;
611641 data = next_delta_info (data , & dc );
612642
613643 if (dc .data ){
614644 continue ;
615645 }
616646
617647 // offset the next chunk by the amount of chunks in the slice
618- // - 1, because we replace our own chunk
619- num_addchunks += DIV_count_slice_chunks_and_bytes (div , dc .so , dc .ts , & num_addbytes ) - 1 ;
620- assert (num_addbytes );
648+ // - N, because we replace our own chunk's bytes
649+ num_addbytes += DIV_count_slice_bytes (div , dc .so , dc .ts ) - (data - prev_data );
621650 }
622651
623- /*
624- // reserve enough memory to hold all the new chunks
625- // reinit pointers, array could have been reallocated
626- TSI_resize(tsis, tsi->tdslen + num_addbytes);
627- dc = DIV_last(tdcv);
628- dcend = DIV_first(tdcv) - 1;
629652
630- // now, that we have our pointers with the old size
631- tdcv->size += num_addchunks;
653+
654+ // reserve enough memory to hold all the new chunks
655+ TSI_resize (tsi , tsi -> tdslen + num_addbytes );
656+ const OffsetInfo const * pofs_start = offset_array - 1 ;
657+ const OffsetInfo * cpofs ;
658+ uchar * ds ; // pointer into the delta stream
659+ const uchar * nds ; // next pointer, used for size retrieving the size
660+ uint num_addchunks = 0 ; // total amount of chunks added
632661
633662 // Insert slices, from the end to the beginning, which allows memcpy
634663 // to be used, with a little help of the offset array
635- for (pofs -= 1; dc > dcend; dc--, pofs-- )
664+ for (cpofs = pofs - 1 ; cpofs > pofs_start ; cpofs -- )
636665 {
666+ ds = (uchar * )(tsi -> cstart + cpofs -> dofs );
667+ nds = next_delta_info (ds , & dc );
668+
637669 // Data chunks don't need processing
638- const uint ofs = *pofs;
639- if (dc->data){
670+ if (dc .data ){
640671 // NOTE: could peek the preceeding chunks to figure out whether they are
641672 // all just moved by ofs. In that case, they can move as a whole!
642673 // tests showed that this is very rare though, even in huge deltas, so its
643674 // not worth the extra effort
644- if (ofs ){
645- memcpy((void*)(dc + ofs ), (void*)dc, sizeof(DeltaInfo) );
675+ if (pofs -> bofs ){
676+ memcpy ((void * )(ds + cpofs -> bofs ), (void * )ds , nds - ds );
646677 }
647678 continue ;
648679 }
649680
650- // Copy Chunks, and move their target offset into place
651- // As we could override dc when slicing, we get the data here
652- const ull relofs = dc->to - dc->so;
653-
654- DeltaInfo* tdc = dc + ofs;
655- DeltaInfo* tdcend = tdc + DIV_copy_slice_to(bdcv, tdc, dc->so, dc->ts);
656- for(;tdc < tdcend; tdc++){
657- tdc->to += relofs;
658- }
681+ // Copy Chunks - target offset is determined by their location and size
682+ // hence it doesn't need specific adjustment
683+ // -1 chunks because we overwrite our own chunk ( by not copying it )
684+ num_addchunks += DIV_copy_slice_to (div , ds + cpofs -> bofs , dc .so , dc .ts );
685+ num_addchunks -= 1 ;
659686 }
660- */
687+
688+ tsi -> num_chunks += num_addchunks ;
689+
661690 PyMem_Free (offset_array );
662691 return 1 ;
663692
@@ -823,7 +852,7 @@ const uchar* next_delta_info(const uchar* data, DeltaChunk* dc)
823852 if (cp_size == 0 ) cp_size = 0x10000 ;
824853
825854 dc -> to += dc -> ts ;
826- dc -> data = 0 ;
855+ dc -> data = NULL ;
827856 dc -> so = cp_off ;
828857 dc -> ts = cp_size ;
829858
0 commit comments