Skip to content

Commit 8693a7e

Browse files
committed
Implemented connect_with which includes all the slicing functions, which now operate on the delta stream data directly, its yet to be tested though, and I am afraid of this
1 parent 9e62c54 commit 8693a7e

1 file changed

Lines changed: 118 additions & 89 deletions

File tree

_delta_apply.c

Lines changed: 118 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -106,9 +106,12 @@ bool TSI_copy_stream_from_object(ToplevelStreamInfo* info)
106106
if (!ptmp){
107107
return 0;
108108
}
109+
uint ofs = (uint)(info->cstart - info->tds);
109110
memcpy((void*)ptmp, info->tds, info->tdslen);
111+
110112
info->tds = ptmp;
111-
info->cstart = ptmp;
113+
info->cstart = ptmp + ofs;
114+
112115
Py_DECREF(info->parent_object);
113116
info->parent_object = 0;
114117

@@ -125,8 +128,10 @@ bool TSI_resize(ToplevelStreamInfo* info, uint num_bytes)
125128
if (num_bytes <= info->tdslen){
126129
return 1;
127130
}
131+
uint ofs = (uint)(info->cstart - info->tds);
128132
info->tds = PyMem_Realloc((void*)info->tds, num_bytes);
129-
info->cstart = info->tds;
133+
info->tdslen = num_bytes;
134+
info->cstart = info->tds + ofs;
130135

131136
return info->tds != NULL;
132137
}
@@ -182,19 +187,21 @@ void DC_apply(const DeltaChunk* dc, const uchar* base, PyObject* writer, PyObjec
182187

183188
// Encode the information in the given delta chunk and write the byte-stream
184189
// into the given output stream
190+
// It will be copied into the given bounds, the given size must be the final size
191+
// and work with the given relative offset - hence the bounds are assumed to be
192+
// correct and to fit within the unaltered dc
185193
inline
186-
void DC_encode_to(const DeltaChunk* dc, uchar** pout)
194+
void DC_encode_to(const DeltaChunk* dc, uchar** pout, uint ofs, uint size)
187195
{
188196
uchar* out = *pout;
189197
if (dc->data){
190-
*out++ = (uchar)dc->ts;
191-
memcpy(out, dc->data, dc->ts);
192-
out += dc->ts;
198+
*out++ = (uchar)size;
199+
memcpy(out, dc->data+ofs, size);
200+
out += size;
193201
} else {
194202
uchar i = 0x80;
195203
uchar* op = out++;
196-
uint moff = dc->so;
197-
uint msize = dc->ts;
204+
uint moff = dc->so+ofs;
198205

199206
if (moff & 0x000000ff)
200207
*out++ = moff >> 0, i |= 0x01;
@@ -205,10 +212,10 @@ void DC_encode_to(const DeltaChunk* dc, uchar** pout)
205212
if (moff & 0xff000000)
206213
*out++ = moff >> 24, i |= 0x08;
207214

208-
if (msize & 0x00ff)
209-
*out++ = msize >> 0, i |= 0x10;
210-
if (msize & 0xff00)
211-
*out++ = msize >> 8, i |= 0x20;
215+
if (size & 0x00ff)
216+
*out++ = size >> 0, i |= 0x10;
217+
if (size & 0xff00)
218+
*out++ = size >> 8, i |= 0x20;
212219

213220
*op = i;
214221
}
@@ -224,13 +231,13 @@ ushort DC_count_encode_bytes(const DeltaChunk* dc)
224231
} else {
225232
ushort c = 1; // cmd byte
226233
uint ts = dc->ts;
227-
ull to = dc->to;
234+
ull so = dc->so;
228235

229236
// offset
230-
c += to & 0x000000FF;
231-
c += to & 0x0000FF00;
232-
c += to & 0x00FF0000;
233-
c += to & 0xFF000000;
237+
c += so & 0x000000FF;
238+
c += so & 0x0000FF00;
239+
c += so & 0x00FF0000;
240+
c += so & 0xFF000000;
234241

235242
// size - max size is 0x10000, its encoded with 0 size bits
236243
c += ts & 0x000000FF;
@@ -485,179 +492,201 @@ const uchar* next_delta_info(const uchar*, DeltaChunk*);
485492

486493
// Return the amount of chunks a slice at the given spot would have, as well as
487494
// its size in bytes it would have if the possibly partial chunks would be encoded
488-
// The bytes will be added
495+
// and added to the spot marked by sdc
489496
inline
490-
uint DIV_count_slice_chunks_and_bytes(const DeltaInfoVector* src, ull ofs, ull size, uint* out_bytes)
497+
uint DIV_count_slice_bytes(const DeltaInfoVector* src, uint ofs, uint size)
491498
{
492-
uint num_dc = 0;
499+
uint num_bytes = 0;
493500
DeltaInfo* cdi = DIV_closest_chunk(src, ofs);
494501

502+
503+
495504
DeltaChunk dc;
496505
DC_init(&dc, 0, 0, 0, NULL);
497506

498507
// partial overlap
499508
if (cdi->to != ofs) {
500509
const ull relofs = ofs - cdi->to;
501510
const uint cdisize = DIV_info_size(src, cdi);
502-
size -= cdisize - relofs < size ? cdisize - relofs : size;
503-
num_dc += 1;
504-
cdi += 1;
511+
const uint actual_size = cdisize - relofs < size ? cdisize - relofs : size;
512+
size -= actual_size;
505513

506514
// get the size in bytes the info would have
507515
next_delta_info(src->dstream + cdi->dso, &dc);
508-
*out_bytes += DC_count_encode_bytes(&dc);
516+
dc.so += relofs;
517+
dc.ts = actual_size;
518+
num_bytes += DC_count_encode_bytes(&dc);
519+
520+
cdi += 1;
509521

510522
if (size == 0){
511-
return num_dc;
523+
return num_bytes;
512524
}
513525
}
514526

515527
const DeltaInfo const* vecend = DIV_end(src);
516-
const DeltaInfo const* veclast = DIV_last(src);
517-
for( ;(cdi < vecend) && size; ++cdi){
518-
num_dc += 1;
519-
520-
const uint cdisize = DIV_info_size2(src, cdi, veclast);
521-
528+
for( ;cdi < vecend; ++cdi){
522529
next_delta_info(src->dstream + cdi->dso, &dc);
523-
*out_bytes += DC_count_encode_bytes(&dc);
524530

525-
if (cdisize < size) {
526-
size -= cdisize;
531+
if (dc.ts < size) {
532+
num_bytes += DC_count_encode_bytes(&dc);
533+
size -= dc.ts;
527534
} else {
535+
dc.ts = size;
536+
num_bytes += DC_count_encode_bytes(&dc);
528537
size = 0;
529538
break;
530539
}
531540
}
532541

533-
*out_bytes += 0;
534-
return num_dc;
542+
assert(size == 0);
543+
return num_bytes;
535544
}
536545

537546
// Write a slice as defined by its absolute offset in bytes and its size into the given
538-
// destination memory. The individual chunks written will be a deep copy of the source
539-
// data chunks
547+
// destination memory. The individual chunks written will be a byte copy of the source
548+
// data chunk stream
540549
// Return: number of chunks in the slice
541550
inline
542-
uint DIV_copy_slice_to(const DeltaInfoVector* src, DeltaInfo* dest, ull ofs, ull size)
551+
uint DIV_copy_slice_to(const DeltaInfoVector* src, uchar* dest, ull tofs, uint size)
543552
{
544-
/*
545-
assert(DIV_lbound(src) <= ofs);
546-
assert((ofs + size) <= DIV_last(src)->to + src->di_last_size);
553+
assert(DIV_lbound(src) <= tofs);
554+
assert((tofs + size) <= DIV_last(src)->to + src->di_last_size);
547555

548-
DeltaInfo* cdc = DIV_closest_chunk(src, ofs);
556+
DeltaChunk dc;
557+
DC_init(&dc, 0, 0, 0, NULL);
558+
559+
DeltaInfo* cdi = DIV_closest_chunk(src, tofs);
549560
uint num_chunks = 0;
550561

551562
// partial overlap
552-
if (cdc->to != ofs) {
553-
const ull relofs = ofs - cdc->to;
554-
DC_offset_copy_to(cdc, dest, relofs, cdc->ts - relofs < size ? cdc->ts - relofs : size);
555-
cdc += 1;
556-
size -= dest->ts;
557-
dest += 1; // must be here, we are reading the size !
563+
if (cdi->to != tofs) {
564+
const uint relofs = tofs - cdi->to;
565+
next_delta_info(src->dstream + cdi->dso, &dc);
566+
const uint cdisize = dc.ts;
567+
const uint actual_size = cdisize - relofs < size ? cdisize - relofs : size;
568+
569+
size -= actual_size;
570+
571+
// adjust dc proportions
572+
573+
DC_encode_to(&dc, &dest, relofs, actual_size);
574+
558575
num_chunks += 1;
576+
cdi += 1;
559577

560578
if (size == 0){
561579
return num_chunks;
562580
}
563581
}
564582

565583
const DeltaInfo* vecend = DIV_end(src);
566-
for( ;(cdc < vecend) && size; ++cdc)
584+
for( ;cdi < vecend; ++cdi)
567585
{
568586
num_chunks += 1;
569-
if (cdc->ts < size) {
570-
DC_copy_to(cdc, dest++);
571-
size -= cdc->ts;
587+
next_delta_info(src->dstream + cdi->dso, &dc);
588+
if (dc.ts < size) {
589+
// Full copy would be possible, but the final length of the dstream
590+
// needs to be used as well to know how many bytes to copy
591+
// TODO: make a DIV_ function for this
592+
DC_encode_to(&dc, &dest, 0, dc.ts);
593+
size -= dc.ts;
572594
} else {
573-
DC_offset_copy_to(cdc, dest, 0, size);
595+
DC_encode_to(&dc, &dest, 0, size);
574596
size = 0;
575597
break;
576598
}
577599
}
578600

579601
assert(size == 0);
580602
return num_chunks;
581-
*/
582-
assert(0); // TODO
583-
return 0;
584603
}
585604

605+
586606
// Take slices of div into the corresponding area of the tsi, which is the topmost
587607
// delta to apply.
588608
bool DIV_connect_with_base(ToplevelStreamInfo* tsi, DeltaInfoVector* div)
589609
{
590610
assert(tsi->num_chunks);
591611

592-
uint *const offset_array = PyMem_Malloc(tsi->num_chunks * sizeof(uint));
612+
typedef struct {
613+
uint bofs; // byte-offset of delta stream
614+
uint dofs; // delta stream offset relative to tsi->cstart
615+
} OffsetInfo;
616+
617+
618+
OffsetInfo *const offset_array = PyMem_Malloc(tsi->num_chunks * sizeof(OffsetInfo));
593619
if (!offset_array){
594620
return 0;
595621
}
596622

597-
uint* pofs = offset_array;
598-
uint num_addchunks = 0;
623+
OffsetInfo* pofs = offset_array;
599624
uint num_addbytes = 0;
600625

601626
const uchar* data = TSI_first(tsi);
627+
const uchar* prev_data = data;
602628
const uchar const* dend = TSI_end(tsi);
629+
603630
DeltaChunk dc;
604631
DC_init(&dc, 0, 0, 0, NULL);
605632

606633
// OFFSET RUN
607-
for (;data < dend; pofs++)
634+
for (;data < dend; pofs++, prev_data = data)
608635
{
636+
637+
pofs->bofs = num_addbytes;
638+
pofs->dofs = (uint)(prev_data - data);
639+
609640
// Data chunks don't need processing
610-
*pofs = num_addchunks;
611641
data = next_delta_info(data, &dc);
612642

613643
if (dc.data){
614644
continue;
615645
}
616646

617647
// offset the next chunk by the amount of chunks in the slice
618-
// - 1, because we replace our own chunk
619-
num_addchunks += DIV_count_slice_chunks_and_bytes(div, dc.so, dc.ts, &num_addbytes) - 1;
620-
assert(num_addbytes);
648+
// - N, because we replace our own chunk's bytes
649+
num_addbytes += DIV_count_slice_bytes(div, dc.so, dc.ts) - (data - prev_data);
621650
}
622651

623-
/*
624-
// reserve enough memory to hold all the new chunks
625-
// reinit pointers, array could have been reallocated
626-
TSI_resize(tsis, tsi->tdslen + num_addbytes);
627-
dc = DIV_last(tdcv);
628-
dcend = DIV_first(tdcv) - 1;
629652

630-
// now, that we have our pointers with the old size
631-
tdcv->size += num_addchunks;
653+
654+
// reserve enough memory to hold all the new chunks
655+
TSI_resize(tsi, tsi->tdslen + num_addbytes);
656+
const OffsetInfo const* pofs_start = offset_array - 1;
657+
const OffsetInfo* cpofs;
658+
uchar* ds; // pointer into the delta stream
659+
const uchar* nds; // next pointer, used for size retrieving the size
660+
uint num_addchunks = 0; // total amount of chunks added
632661

633662
// Insert slices, from the end to the beginning, which allows memcpy
634663
// to be used, with a little help of the offset array
635-
for (pofs -= 1; dc > dcend; dc--, pofs-- )
664+
for (cpofs = pofs - 1; cpofs > pofs_start; cpofs--)
636665
{
666+
ds = (uchar*)(tsi->cstart + cpofs->dofs);
667+
nds = next_delta_info(ds, &dc);
668+
637669
// Data chunks don't need processing
638-
const uint ofs = *pofs;
639-
if (dc->data){
670+
if (dc.data){
640671
// NOTE: could peek the preceeding chunks to figure out whether they are
641672
// all just moved by ofs. In that case, they can move as a whole!
642673
// tests showed that this is very rare though, even in huge deltas, so its
643674
// not worth the extra effort
644-
if (ofs){
645-
memcpy((void*)(dc + ofs), (void*)dc, sizeof(DeltaInfo));
675+
if (pofs->bofs){
676+
memcpy((void*)(ds + cpofs->bofs), (void*)ds, nds - ds);
646677
}
647678
continue;
648679
}
649680

650-
// Copy Chunks, and move their target offset into place
651-
// As we could override dc when slicing, we get the data here
652-
const ull relofs = dc->to - dc->so;
653-
654-
DeltaInfo* tdc = dc + ofs;
655-
DeltaInfo* tdcend = tdc + DIV_copy_slice_to(bdcv, tdc, dc->so, dc->ts);
656-
for(;tdc < tdcend; tdc++){
657-
tdc->to += relofs;
658-
}
681+
// Copy Chunks - target offset is determined by their location and size
682+
// hence it doesn't need specific adjustment
683+
// -1 chunks because we overwrite our own chunk ( by not copying it )
684+
num_addchunks += DIV_copy_slice_to(div, ds + cpofs->bofs, dc.so, dc.ts);
685+
num_addchunks -= 1;
659686
}
660-
*/
687+
688+
tsi->num_chunks += num_addchunks;
689+
661690
PyMem_Free(offset_array);
662691
return 1;
663692

@@ -823,7 +852,7 @@ const uchar* next_delta_info(const uchar* data, DeltaChunk* dc)
823852
if (cp_size == 0) cp_size = 0x10000;
824853

825854
dc->to += dc->ts;
826-
dc->data = 0;
855+
dc->data = NULL;
827856
dc->so = cp_off;
828857
dc->ts = cp_size;
829858

0 commit comments

Comments
 (0)