@@ -464,16 +464,21 @@ void _idtr_update_halo(DTypeId ddpttype, int64_t ndims, int64_t *ownedOff,
464464 auto ownedRows = ownedShape[0 ];
465465 auto ownedRowEnd = ownedRowStart + ownedRows;
466466 // all remaining dims are treated as one large column
467- auto ownedCols = std::accumulate (&ownedShape[1 ], &ownedShape[ndims], 1 ,
467+ auto ownedTotCols = std::accumulate (&ownedShape[1 ], &ownedShape[ndims], 1 ,
468+ std::multiplies<int64_t >());
469+ auto bbTotCols = std::accumulate (&bbShape[1 ], &bbShape[ndims], 1 ,
468470 std::multiplies<int64_t >());
469471
470472 // find local elements to send to next workers (destination leftHalo)
471473 // and previous workers (destination rightHalo)
472474 std::vector<int > lSendOff (nworkers, 0 ), rSendOff (nworkers, 0 );
473475 std::vector<int > lSendSize (nworkers, 0 ), rSendSize (nworkers, 0 );
474476
475- // use send buffer if owned data is strided
476- bool bufferizeSend = !is_contiguous (ownedShape, ownedStride, ndims);
477+ // use send buffer if owned data is strided or sending a subview
478+ bool bufferizeSend = (!is_contiguous (ownedShape, ownedStride, ndims) ||
479+ bbTotCols != ownedTotCols);
480+
481+ // assert(!bufferizeSend);
477482 std::vector<int64_t > lBufferStart (nworkers * ndims, 0 );
478483 std::vector<int64_t > lBufferSize (nworkers * ndims, 0 );
479484 std::vector<int64_t > rBufferStart (nworkers * ndims, 0 );
@@ -495,9 +500,9 @@ void _idtr_update_halo(DTypeId ddpttype, int64_t ndims, int64_t *ownedOff,
495500 auto globalRowStart = std::max (ownedRowStart, bRowStart);
496501 auto globalRowEnd = std::min (ownedRowEnd, bRowEnd);
497502 auto localRowStart = globalRowStart - ownedRowStart;
498- auto localStart = (int )(localRowStart)*ownedCols ;
503+ auto localStart = (int )(localRowStart)*ownedTotCols ;
499504 auto nRows = globalRowEnd - globalRowStart;
500- auto nSend = (int )(nRows)*ownedCols ;
505+ auto nSend = (int )(nRows)*bbTotCols ;
501506
502507 if (i < myWorkerIndex) {
503508 // target is rightHalo
@@ -506,8 +511,8 @@ void _idtr_update_halo(DTypeId ddpttype, int64_t ndims, int64_t *ownedOff,
506511 rBufferStart[i * ndims] = localRowStart;
507512 rBufferSize[i * ndims] = nRows;
508513 for (auto j = 1 ; j < ndims; ++j) {
509- rBufferStart[i * ndims + j] = ownedOff [j];
510- rBufferSize[i * ndims + j] = ownedShape [j];
514+ rBufferStart[i * ndims + j] = bbOff [j];
515+ rBufferSize[i * ndims + j] = bbShape [j];
511516 }
512517 } else {
513518 rSendOff[i] = localStart;
@@ -521,8 +526,8 @@ void _idtr_update_halo(DTypeId ddpttype, int64_t ndims, int64_t *ownedOff,
521526 lBufferStart[i * ndims] = localRowStart;
522527 lBufferSize[i * ndims] = nRows;
523528 for (auto j = 1 ; j < ndims; ++j) {
524- lBufferStart[i * ndims + j] = ownedOff [j];
525- lBufferSize[i * ndims + j] = ownedShape [j];
529+ lBufferStart[i * ndims + j] = bbOff [j];
530+ lBufferSize[i * ndims + j] = bbShape [j];
526531 }
527532 } else {
528533 lSendOff[i] = localStart;
0 commit comments