@@ -433,11 +433,11 @@ void TimeFrameGPU<NLayers>::loadCellsDevice()
433433{
434434 GPUTimer timer (mGpuStreams , " loading cell seeds" , NLayers - 2 );
435435 for (auto iLayer{0 }; iLayer < NLayers - 2 ; ++iLayer) {
436- GPULog (" gpu-transfer: loading {} cell seeds on layer {}, for {:.2f} MB." , this ->mCells [iLayer].size (), iLayer, this ->mCells [iLayer].size () * sizeof (CellSeedN ) / constants::MB);
437- allocMemAsync (reinterpret_cast <void **>(&mCellsDevice [iLayer]), this ->mCells [iLayer].size () * sizeof (CellSeedN ), mGpuStreams [iLayer], this ->hasFrameworkAllocator ());
436+ GPULog (" gpu-transfer: loading {} cell seeds on layer {}, for {:.2f} MB." , this ->mCells [iLayer].size (), iLayer, this ->mCells [iLayer].size () * sizeof (CellSeed ) / constants::MB);
437+ allocMemAsync (reinterpret_cast <void **>(&mCellsDevice [iLayer]), this ->mCells [iLayer].size () * sizeof (CellSeed ), mGpuStreams [iLayer], this ->hasFrameworkAllocator ());
438438 allocMemAsync (reinterpret_cast <void **>(&mNeighboursIndexTablesDevice [iLayer]), (this ->mCells [iLayer].size () + 1 ) * sizeof (int ), mGpuStreams [iLayer], this ->hasFrameworkAllocator ()); // accessory for the neigh. finding.
439439 GPUChkErrS (cudaMemsetAsync (mNeighboursIndexTablesDevice [iLayer], 0 , (this ->mCells [iLayer].size () + 1 ) * sizeof (int ), mGpuStreams [iLayer].get ()));
440- GPUChkErrS (cudaMemcpyAsync (mCellsDevice [iLayer], this ->mCells [iLayer].data (), this ->mCells [iLayer].size () * sizeof (CellSeedN ), cudaMemcpyHostToDevice, mGpuStreams [iLayer].get ()));
440+ GPUChkErrS (cudaMemcpyAsync (mCellsDevice [iLayer], this ->mCells [iLayer].data (), this ->mCells [iLayer].size () * sizeof (CellSeed ), cudaMemcpyHostToDevice, mGpuStreams [iLayer].get ()));
441441 }
442442}
443443
@@ -465,8 +465,8 @@ void TimeFrameGPU<NLayers>::createCellsBuffersArray(const int iteration)
465465{
466466 if (!iteration) {
467467 GPUTimer timer (" creating cells buffers array" );
468- allocMem (reinterpret_cast <void **>(&mCellsDeviceArray ), (NLayers - 2 ) * sizeof (CellSeedN *), this ->hasFrameworkAllocator ());
469- GPUChkErrS (cudaMemcpy (mCellsDeviceArray , mCellsDevice .data (), mCellsDevice .size () * sizeof (CellSeedN *), cudaMemcpyHostToDevice));
468+ allocMem (reinterpret_cast <void **>(&mCellsDeviceArray ), (NLayers - 2 ) * sizeof (CellSeed *), this ->hasFrameworkAllocator ());
469+ GPUChkErrS (cudaMemcpy (mCellsDeviceArray , mCellsDevice .data (), mCellsDevice .size () * sizeof (CellSeed *), cudaMemcpyHostToDevice));
470470 }
471471}
472472
@@ -477,10 +477,10 @@ void TimeFrameGPU<NLayers>::createCellsBuffers(const int layer)
477477 mNCells [layer] = 0 ;
478478 GPUChkErrS (cudaMemcpyAsync (&mNCells [layer], mCellsLUTDevice [layer] + mNTracklets [layer], sizeof (int ), cudaMemcpyDeviceToHost, mGpuStreams [layer].get ()));
479479 mGpuStreams [layer].sync (); // ensure number of cells is correct
480- GPULog (" gpu-transfer: creating cell buffer for {} elements on layer {}, for {:.2f} MB." , mNCells [layer], layer, mNCells [layer] * sizeof (CellSeedN ) / constants::MB);
481- allocMemAsync (reinterpret_cast <void **>(&mCellsDevice [layer]), mNCells [layer] * sizeof (CellSeedN ), mGpuStreams [layer], this ->hasFrameworkAllocator (), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
482- GPUChkErrS (cudaMemsetAsync (mCellsDevice [layer], 0 , mNCells [layer] * sizeof (CellSeedN ), mGpuStreams [layer].get ()));
483- GPUChkErrS (cudaMemcpyAsync (&mCellsDeviceArray [layer], &mCellsDevice [layer], sizeof (CellSeedN *), cudaMemcpyHostToDevice, mGpuStreams [layer].get ()));
480+ GPULog (" gpu-transfer: creating cell buffer for {} elements on layer {}, for {:.2f} MB." , mNCells [layer], layer, mNCells [layer] * sizeof (CellSeed ) / constants::MB);
481+ allocMemAsync (reinterpret_cast <void **>(&mCellsDevice [layer]), mNCells [layer] * sizeof (CellSeed ), mGpuStreams [layer], this ->hasFrameworkAllocator (), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
482+ GPUChkErrS (cudaMemsetAsync (mCellsDevice [layer], 0 , mNCells [layer] * sizeof (CellSeed ), mGpuStreams [layer].get ()));
483+ GPUChkErrS (cudaMemcpyAsync (&mCellsDeviceArray [layer], &mCellsDevice [layer], sizeof (CellSeed *), cudaMemcpyHostToDevice, mGpuStreams [layer].get ()));
484484}
485485
486486template <int NLayers>
@@ -495,12 +495,12 @@ void TimeFrameGPU<NLayers>::loadCellsLUTDevice()
495495}
496496
497497template <int NLayers>
498- void TimeFrameGPU<NLayers>::loadTrackSeedsDevice(bounded_vector<CellSeedN >& seeds)
498+ void TimeFrameGPU<NLayers>::loadTrackSeedsDevice(bounded_vector<TrackSeedN >& seeds)
499499{
500500 GPUTimer timer (" loading track seeds" );
501- GPULog (" gpu-transfer: loading {} track seeds, for {:.2f} MB." , seeds.size (), seeds.size () * sizeof (CellSeedN ) / constants::MB);
502- allocMem (reinterpret_cast <void **>(&mTrackSeedsDevice ), seeds.size () * sizeof (CellSeedN ), this ->hasFrameworkAllocator (), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
503- GPUChkErrS (cudaMemcpy (mTrackSeedsDevice , seeds.data (), seeds.size () * sizeof (CellSeedN ), cudaMemcpyHostToDevice));
501+ GPULog (" gpu-transfer: loading {} track seeds, for {:.2f} MB." , seeds.size (), seeds.size () * sizeof (TrackSeedN ) / constants::MB);
502+ allocMem (reinterpret_cast <void **>(&mTrackSeedsDevice ), seeds.size () * sizeof (TrackSeedN ), this ->hasFrameworkAllocator (), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
503+ GPUChkErrS (cudaMemcpy (mTrackSeedsDevice , seeds.data (), seeds.size () * sizeof (TrackSeedN ), cudaMemcpyHostToDevice));
504504 GPULog (" gpu-transfer: creating {} track seeds LUT, for {:.2f} MB." , seeds.size () + 1 , (seeds.size () + 1 ) * sizeof (int ) / constants::MB);
505505 allocMem (reinterpret_cast <void **>(&mTrackSeedsLUTDevice ), (seeds.size () + 1 ) * sizeof (int ), this ->hasFrameworkAllocator (), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
506506 GPUChkErrS (cudaMemset (mTrackSeedsLUTDevice , 0 , (seeds.size () + 1 ) * sizeof (int )));
@@ -537,9 +537,9 @@ void TimeFrameGPU<NLayers>::downloadCellsDevice()
537537{
538538 GPUTimer timer (mGpuStreams , " downloading cells" , NLayers - 2 );
539539 for (int iLayer{0 }; iLayer < NLayers - 2 ; ++iLayer) {
540- GPULog (" gpu-transfer: downloading {} cells on layer: {}, for {:.2f} MB." , mNCells [iLayer], iLayer, mNCells [iLayer] * sizeof (CellSeedN ) / constants::MB);
540+ GPULog (" gpu-transfer: downloading {} cells on layer: {}, for {:.2f} MB." , mNCells [iLayer], iLayer, mNCells [iLayer] * sizeof (CellSeed ) / constants::MB);
541541 this ->mCells [iLayer].resize (mNCells [iLayer]);
542- GPUChkErrS (cudaMemcpyAsync (this ->mCells [iLayer].data (), this ->mCellsDevice [iLayer], mNCells [iLayer] * sizeof (CellSeedN ), cudaMemcpyDeviceToHost, mGpuStreams [iLayer].get ()));
542+ GPUChkErrS (cudaMemcpyAsync (this ->mCells [iLayer].data (), this ->mCellsDevice [iLayer], mNCells [iLayer] * sizeof (CellSeed ), cudaMemcpyDeviceToHost, mGpuStreams [iLayer].get ()));
543543 }
544544}
545545
0 commit comments