Skip to content

Commit 425f555

Browse files
authored
ITS: factor out CPU/GPU common code & separate Cell class (#15293)
* ITS: add memory stats Signed-off-by: Felix Schlepper <felix.schlepper@cern.ch> * ITS: clear tracklets after cell finding Signed-off-by: Felix Schlepper <felix.schlepper@cern.ch> * ITS: separate into cell and track seed class Signed-off-by: Felix Schlepper <felix.schlepper@cern.ch> * ITS: factor common functions out Signed-off-by: Felix Schlepper <felix.schlepper@cern.ch> --------- Signed-off-by: Felix Schlepper <felix.schlepper@cern.ch>
1 parent 271f3d7 commit 425f555

21 files changed

Lines changed: 953 additions & 851 deletions

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,11 @@ namespace o2::its::gpu
2727
template <int NLayers>
2828
class TimeFrameGPU final : public TimeFrame<NLayers>
2929
{
30-
using typename TimeFrame<NLayers>::CellSeedN;
3130
using typename TimeFrame<NLayers>::IndexTableUtilsN;
3231
using typename TimeFrame<NLayers>::ROFOverlapTableN;
3332
using typename TimeFrame<NLayers>::ROFVertexLookupTableN;
3433
using typename TimeFrame<NLayers>::ROFMaskTableN;
34+
using typename TimeFrame<NLayers>::TrackSeedN;
3535

3636
public:
3737
TimeFrameGPU() = default;
@@ -72,7 +72,7 @@ class TimeFrameGPU final : public TimeFrame<NLayers>
7272
void loadCellsLUTDevice();
7373
void loadTrackSeedsDevice();
7474
void loadTrackSeedsChi2Device();
75-
void loadTrackSeedsDevice(bounded_vector<CellSeedN>&);
75+
void loadTrackSeedsDevice(bounded_vector<TrackSeedN>&);
7676
void createTrackletsBuffers(const int);
7777
void createTrackletsBuffersArray(const int);
7878
void createCellsBuffers(const int);
@@ -136,8 +136,8 @@ class TimeFrameGPU final : public TimeFrame<NLayers>
136136
int** getDeviceArrayTrackletsLUT() const { return mTrackletsLUTDeviceArray; }
137137
int** getDeviceArrayCellsLUT() const { return mCellsLUTDeviceArray; }
138138
int** getDeviceArrayNeighboursCellLUT() const { return mNeighboursCellLUTDeviceArray; }
139-
CellSeedN** getDeviceArrayCells() { return mCellsDeviceArray; }
140-
CellSeedN* getDeviceTrackSeeds() { return mTrackSeedsDevice; }
139+
CellSeed** getDeviceArrayCells() { return mCellsDeviceArray; }
140+
TrackSeedN* getDeviceTrackSeeds() { return mTrackSeedsDevice; }
141141
int* getDeviceTrackSeedsLUT() { return mTrackSeedsLUTDevice; }
142142
auto getNTrackSeeds() const { return mNTracks; }
143143
o2::track::TrackParCovF** getDeviceArrayTrackSeeds() { return mCellSeedsDeviceArray; }
@@ -157,7 +157,7 @@ class TimeFrameGPU final : public TimeFrame<NLayers>
157157
gsl::span<int*> getDeviceTrackletsLUTs() { return mTrackletsLUTDevice; }
158158
gsl::span<int*> getDeviceCellLUTs() { return mCellsLUTDevice; }
159159
gsl::span<Tracklet*> getDeviceTracklets() { return mTrackletsDevice; }
160-
gsl::span<CellSeedN*> getDeviceCells() { return mCellsDevice; }
160+
gsl::span<CellSeed*> getDeviceCells() { return mCellsDevice; }
161161

162162
// Overridden getters
163163
size_t getNumberOfTracklets() const final;
@@ -203,10 +203,10 @@ class TimeFrameGPU final : public TimeFrame<NLayers>
203203
int** mNeighboursCellDeviceArray{nullptr};
204204
int** mNeighboursCellLUTDeviceArray{nullptr};
205205
int** mTrackletsLUTDeviceArray{nullptr};
206-
std::array<CellSeedN*, NLayers - 2> mCellsDevice;
207-
CellSeedN** mCellsDeviceArray;
206+
std::array<CellSeed*, NLayers - 2> mCellsDevice;
207+
CellSeed** mCellsDeviceArray;
208208
std::array<int*, NLayers - 3> mNeighboursIndexTablesDevice;
209-
CellSeedN* mTrackSeedsDevice{nullptr};
209+
TrackSeedN* mTrackSeedsDevice{nullptr};
210210
int* mTrackSeedsLUTDevice{nullptr};
211211
unsigned int mNTracks{0};
212212
std::array<o2::track::TrackParCovF*, NLayers - 2> mCellSeedsDevice;

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
namespace o2::its
2020
{
2121

22-
template <int NLayers = 7>
22+
template <int NLayers>
2323
class TrackerTraitsGPU final : public TrackerTraits<NLayers>
2424
{
2525
using typename TrackerTraits<NLayers>::IndexTableUtilsN;

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,9 @@
2424

2525
namespace o2::its
2626
{
27-
template <int>
2827
class CellSeed;
28+
template <int>
29+
class TrackSeed;
2930
class TrackingFrameInfo;
3031
class Tracklet;
3132
template <int>
@@ -34,7 +35,7 @@ class Cluster;
3435
class TrackITSExt;
3536
class ExternalAllocator;
3637

37-
template <int NLayers = 7>
38+
template <int NLayers>
3839
void countTrackletsInROFsHandler(const IndexTableUtils<NLayers>* utils,
3940
const typename ROFMaskTable<NLayers>::View& rofMask,
4041
const int layer,
@@ -62,7 +63,7 @@ void countTrackletsInROFsHandler(const IndexTableUtils<NLayers>* utils,
6263
o2::its::ExternalAllocator* alloc,
6364
gpu::Streams& streams);
6465

65-
template <int NLayers = 7>
66+
template <int NLayers>
6667
void computeTrackletsInROFsHandler(const IndexTableUtils<NLayers>* utils,
6768
const typename ROFMaskTable<NLayers>::View& rofMask,
6869
const int layer,
@@ -101,13 +102,14 @@ void countCellsHandler(const Cluster** sortedClusters,
101102
int** trackletsLUT,
102103
const int nTracklets,
103104
const int layer,
104-
CellSeed<NLayers>* cells,
105+
CellSeed* cells,
105106
int** cellsLUTsDeviceArray,
106107
int* cellsLUTsHost,
107108
const float bz,
108109
const float maxChi2ClusterAttachment,
109110
const float cellDeltaTanLambdaSigma,
110111
const float nSigmaCut,
112+
const std::vector<float>& layerxX0Host,
111113
o2::its::ExternalAllocator* alloc,
112114
gpu::Streams& streams);
113115

@@ -119,17 +121,18 @@ void computeCellsHandler(const Cluster** sortedClusters,
119121
int** trackletsLUT,
120122
const int nTracklets,
121123
const int layer,
122-
CellSeed<NLayers>* cells,
124+
CellSeed* cells,
123125
int** cellsLUTsDeviceArray,
124126
int* cellsLUTsHost,
125127
const float bz,
126128
const float maxChi2ClusterAttachment,
127129
const float cellDeltaTanLambdaSigma,
128130
const float nSigmaCut,
131+
const std::vector<float>& layerxX0Host,
129132
gpu::Streams& streams);
130133

131134
template <int NLayers>
132-
void countCellNeighboursHandler(CellSeed<NLayers>** cellsLayersDevice,
135+
void countCellNeighboursHandler(CellSeed** cellsLayersDevice,
133136
int* neighboursLUTs,
134137
int** cellsLUTs,
135138
gpuPair<int, int>* cellNeighbours,
@@ -145,7 +148,7 @@ void countCellNeighboursHandler(CellSeed<NLayers>** cellsLayersDevice,
145148
gpu::Stream& stream);
146149

147150
template <int NLayers>
148-
void computeCellNeighboursHandler(CellSeed<NLayers>** cellsLayersDevice,
151+
void computeCellNeighboursHandler(CellSeed** cellsLayersDevice,
149152
int* neighboursLUTs,
150153
int** cellsLUTs,
151154
gpuPair<int, int>* cellNeighbours,
@@ -165,31 +168,33 @@ int filterCellNeighboursHandler(gpuPair<int, int>*,
165168
gpu::Stream&,
166169
o2::its::ExternalAllocator* = nullptr);
167170

168-
template <int NLayers = 7>
171+
template <int NLayers>
169172
void processNeighboursHandler(const int startLayer,
170173
const int startLevel,
171-
CellSeed<NLayers>** allCellSeeds,
172-
CellSeed<NLayers>* currentCellSeeds,
174+
CellSeed** allCellSeeds,
175+
CellSeed* currentCellSeeds,
173176
std::array<int, NLayers - 2>& nCells,
174177
const unsigned char** usedClusters,
175178
std::array<int*, NLayers - 2>& neighbours,
176179
gsl::span<int*> neighboursDeviceLUTs,
177180
const TrackingFrameInfo** foundTrackingFrameInfo,
178-
bounded_vector<CellSeed<NLayers>>& seedsHost,
181+
bounded_vector<TrackSeed<NLayers>>& seedsHost,
179182
const float bz,
180183
const float MaxChi2ClusterAttachment,
181184
const float maxChi2NDF,
185+
const std::vector<float>& layerxX0Host,
182186
const o2::base::Propagator* propagator,
183187
const o2::base::PropagatorF::MatCorrType matCorrType,
184188
o2::its::ExternalAllocator* alloc);
185189

186-
template <int NLayers = 7>
187-
void countTrackSeedHandler(CellSeed<NLayers>* trackSeeds,
190+
template <int NLayers>
191+
void countTrackSeedHandler(TrackSeed<NLayers>* trackSeeds,
188192
const TrackingFrameInfo** foundTrackingFrameInfo,
189193
const Cluster** unsortedClusters,
190194
int* seedLUT,
191195
const std::vector<float>& layerRadiiHost,
192196
const std::vector<float>& minPtsHost,
197+
const std::vector<float>& layerxX0Host,
193198
const unsigned int nSeeds,
194199
const float Bz,
195200
const int startLevel,
@@ -202,14 +207,15 @@ void countTrackSeedHandler(CellSeed<NLayers>* trackSeeds,
202207
const o2::base::PropagatorF::MatCorrType matCorrType,
203208
o2::its::ExternalAllocator* alloc);
204209

205-
template <int NLayers = 7>
206-
void computeTrackSeedHandler(CellSeed<NLayers>* trackSeeds,
210+
template <int NLayers>
211+
void computeTrackSeedHandler(TrackSeed<NLayers>* trackSeeds,
207212
const TrackingFrameInfo** foundTrackingFrameInfo,
208213
const Cluster** unsortedClusters,
209214
o2::its::TrackITSExt* tracks,
210215
const int* seedLUT,
211216
const std::vector<float>& layerRadiiHost,
212217
const std::vector<float>& minPtsHost,
218+
const std::vector<float>& layerxX0Host,
213219
const unsigned int nSeeds,
214220
const unsigned int nTracks,
215221
const float Bz,

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/Utils.h

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -343,27 +343,6 @@ struct TypedAllocator {
343343
ExternalAllocator* mInternalAllocator;
344344
};
345345

346-
template <int nLayers>
347-
GPUdii() const int4 getBinsRect(const Cluster& currentCluster, const int layerIndex,
348-
const o2::its::IndexTableUtils<nLayers>* utils,
349-
const float z1, const float z2, float maxdeltaz, float maxdeltaphi)
350-
{
351-
const float zRangeMin = o2::gpu::CAMath::Min(z1, z2) - maxdeltaz;
352-
const float phiRangeMin = (maxdeltaphi > o2::constants::math::PI) ? 0.f : currentCluster.phi - maxdeltaphi;
353-
const float zRangeMax = o2::gpu::CAMath::Max(z1, z2) + maxdeltaz;
354-
const float phiRangeMax = (maxdeltaphi > o2::constants::math::PI) ? o2::constants::math::TwoPI : currentCluster.phi + maxdeltaphi;
355-
356-
if (zRangeMax < -utils->getLayerZ(layerIndex) ||
357-
zRangeMin > utils->getLayerZ(layerIndex) || zRangeMin > zRangeMax) {
358-
return {};
359-
}
360-
361-
return int4{o2::gpu::CAMath::Max(0, utils->getZBinIndex(layerIndex, zRangeMin)),
362-
utils->getPhiBinIndex(math_utils::getNormalizedPhi(phiRangeMin)),
363-
o2::gpu::CAMath::Min(utils->getNzBins() - 1, utils->getZBinIndex(layerIndex, zRangeMax)),
364-
utils->getPhiBinIndex(math_utils::getNormalizedPhi(phiRangeMax))};
365-
}
366-
367346
GPUdii() gpuSpan<const Vertex> getPrimaryVertices(const int rof,
368347
const int* roframesPV,
369348
const int nROF,

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -433,11 +433,11 @@ void TimeFrameGPU<NLayers>::loadCellsDevice()
433433
{
434434
GPUTimer timer(mGpuStreams, "loading cell seeds", NLayers - 2);
435435
for (auto iLayer{0}; iLayer < NLayers - 2; ++iLayer) {
436-
GPULog("gpu-transfer: loading {} cell seeds on layer {}, for {:.2f} MB.", this->mCells[iLayer].size(), iLayer, this->mCells[iLayer].size() * sizeof(CellSeedN) / constants::MB);
437-
allocMemAsync(reinterpret_cast<void**>(&mCellsDevice[iLayer]), this->mCells[iLayer].size() * sizeof(CellSeedN), mGpuStreams[iLayer], this->hasFrameworkAllocator());
436+
GPULog("gpu-transfer: loading {} cell seeds on layer {}, for {:.2f} MB.", this->mCells[iLayer].size(), iLayer, this->mCells[iLayer].size() * sizeof(CellSeed) / constants::MB);
437+
allocMemAsync(reinterpret_cast<void**>(&mCellsDevice[iLayer]), this->mCells[iLayer].size() * sizeof(CellSeed), mGpuStreams[iLayer], this->hasFrameworkAllocator());
438438
allocMemAsync(reinterpret_cast<void**>(&mNeighboursIndexTablesDevice[iLayer]), (this->mCells[iLayer].size() + 1) * sizeof(int), mGpuStreams[iLayer], this->hasFrameworkAllocator()); // accessory for the neigh. finding.
439439
GPUChkErrS(cudaMemsetAsync(mNeighboursIndexTablesDevice[iLayer], 0, (this->mCells[iLayer].size() + 1) * sizeof(int), mGpuStreams[iLayer].get()));
440-
GPUChkErrS(cudaMemcpyAsync(mCellsDevice[iLayer], this->mCells[iLayer].data(), this->mCells[iLayer].size() * sizeof(CellSeedN), cudaMemcpyHostToDevice, mGpuStreams[iLayer].get()));
440+
GPUChkErrS(cudaMemcpyAsync(mCellsDevice[iLayer], this->mCells[iLayer].data(), this->mCells[iLayer].size() * sizeof(CellSeed), cudaMemcpyHostToDevice, mGpuStreams[iLayer].get()));
441441
}
442442
}
443443

@@ -465,8 +465,8 @@ void TimeFrameGPU<NLayers>::createCellsBuffersArray(const int iteration)
465465
{
466466
if (!iteration) {
467467
GPUTimer timer("creating cells buffers array");
468-
allocMem(reinterpret_cast<void**>(&mCellsDeviceArray), (NLayers - 2) * sizeof(CellSeedN*), this->hasFrameworkAllocator());
469-
GPUChkErrS(cudaMemcpy(mCellsDeviceArray, mCellsDevice.data(), mCellsDevice.size() * sizeof(CellSeedN*), cudaMemcpyHostToDevice));
468+
allocMem(reinterpret_cast<void**>(&mCellsDeviceArray), (NLayers - 2) * sizeof(CellSeed*), this->hasFrameworkAllocator());
469+
GPUChkErrS(cudaMemcpy(mCellsDeviceArray, mCellsDevice.data(), mCellsDevice.size() * sizeof(CellSeed*), cudaMemcpyHostToDevice));
470470
}
471471
}
472472

@@ -477,10 +477,10 @@ void TimeFrameGPU<NLayers>::createCellsBuffers(const int layer)
477477
mNCells[layer] = 0;
478478
GPUChkErrS(cudaMemcpyAsync(&mNCells[layer], mCellsLUTDevice[layer] + mNTracklets[layer], sizeof(int), cudaMemcpyDeviceToHost, mGpuStreams[layer].get()));
479479
mGpuStreams[layer].sync(); // ensure number of cells is correct
480-
GPULog("gpu-transfer: creating cell buffer for {} elements on layer {}, for {:.2f} MB.", mNCells[layer], layer, mNCells[layer] * sizeof(CellSeedN) / constants::MB);
481-
allocMemAsync(reinterpret_cast<void**>(&mCellsDevice[layer]), mNCells[layer] * sizeof(CellSeedN), mGpuStreams[layer], this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
482-
GPUChkErrS(cudaMemsetAsync(mCellsDevice[layer], 0, mNCells[layer] * sizeof(CellSeedN), mGpuStreams[layer].get()));
483-
GPUChkErrS(cudaMemcpyAsync(&mCellsDeviceArray[layer], &mCellsDevice[layer], sizeof(CellSeedN*), cudaMemcpyHostToDevice, mGpuStreams[layer].get()));
480+
GPULog("gpu-transfer: creating cell buffer for {} elements on layer {}, for {:.2f} MB.", mNCells[layer], layer, mNCells[layer] * sizeof(CellSeed) / constants::MB);
481+
allocMemAsync(reinterpret_cast<void**>(&mCellsDevice[layer]), mNCells[layer] * sizeof(CellSeed), mGpuStreams[layer], this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
482+
GPUChkErrS(cudaMemsetAsync(mCellsDevice[layer], 0, mNCells[layer] * sizeof(CellSeed), mGpuStreams[layer].get()));
483+
GPUChkErrS(cudaMemcpyAsync(&mCellsDeviceArray[layer], &mCellsDevice[layer], sizeof(CellSeed*), cudaMemcpyHostToDevice, mGpuStreams[layer].get()));
484484
}
485485

486486
template <int NLayers>
@@ -495,12 +495,12 @@ void TimeFrameGPU<NLayers>::loadCellsLUTDevice()
495495
}
496496

497497
template <int NLayers>
498-
void TimeFrameGPU<NLayers>::loadTrackSeedsDevice(bounded_vector<CellSeedN>& seeds)
498+
void TimeFrameGPU<NLayers>::loadTrackSeedsDevice(bounded_vector<TrackSeedN>& seeds)
499499
{
500500
GPUTimer timer("loading track seeds");
501-
GPULog("gpu-transfer: loading {} track seeds, for {:.2f} MB.", seeds.size(), seeds.size() * sizeof(CellSeedN) / constants::MB);
502-
allocMem(reinterpret_cast<void**>(&mTrackSeedsDevice), seeds.size() * sizeof(CellSeedN), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
503-
GPUChkErrS(cudaMemcpy(mTrackSeedsDevice, seeds.data(), seeds.size() * sizeof(CellSeedN), cudaMemcpyHostToDevice));
501+
GPULog("gpu-transfer: loading {} track seeds, for {:.2f} MB.", seeds.size(), seeds.size() * sizeof(TrackSeedN) / constants::MB);
502+
allocMem(reinterpret_cast<void**>(&mTrackSeedsDevice), seeds.size() * sizeof(TrackSeedN), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
503+
GPUChkErrS(cudaMemcpy(mTrackSeedsDevice, seeds.data(), seeds.size() * sizeof(TrackSeedN), cudaMemcpyHostToDevice));
504504
GPULog("gpu-transfer: creating {} track seeds LUT, for {:.2f} MB.", seeds.size() + 1, (seeds.size() + 1) * sizeof(int) / constants::MB);
505505
allocMem(reinterpret_cast<void**>(&mTrackSeedsLUTDevice), (seeds.size() + 1) * sizeof(int), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
506506
GPUChkErrS(cudaMemset(mTrackSeedsLUTDevice, 0, (seeds.size() + 1) * sizeof(int)));
@@ -537,9 +537,9 @@ void TimeFrameGPU<NLayers>::downloadCellsDevice()
537537
{
538538
GPUTimer timer(mGpuStreams, "downloading cells", NLayers - 2);
539539
for (int iLayer{0}; iLayer < NLayers - 2; ++iLayer) {
540-
GPULog("gpu-transfer: downloading {} cells on layer: {}, for {:.2f} MB.", mNCells[iLayer], iLayer, mNCells[iLayer] * sizeof(CellSeedN) / constants::MB);
540+
GPULog("gpu-transfer: downloading {} cells on layer: {}, for {:.2f} MB.", mNCells[iLayer], iLayer, mNCells[iLayer] * sizeof(CellSeed) / constants::MB);
541541
this->mCells[iLayer].resize(mNCells[iLayer]);
542-
GPUChkErrS(cudaMemcpyAsync(this->mCells[iLayer].data(), this->mCellsDevice[iLayer], mNCells[iLayer] * sizeof(CellSeedN), cudaMemcpyDeviceToHost, mGpuStreams[iLayer].get()));
542+
GPUChkErrS(cudaMemcpyAsync(this->mCells[iLayer].data(), this->mCellsDevice[iLayer], mNCells[iLayer] * sizeof(CellSeed), cudaMemcpyDeviceToHost, mGpuStreams[iLayer].get()));
543543
}
544544
}
545545

0 commit comments

Comments
 (0)