Skip to content

Commit d353cb5

Browse files
committed
ITS: separate into cell and track seed class
Signed-off-by: Felix Schlepper <felix.schlepper@cern.ch>
1 parent 2296451 commit d353cb5

13 files changed

Lines changed: 219 additions & 156 deletions

File tree

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,11 @@ namespace o2::its::gpu
2727
template <int NLayers>
2828
class TimeFrameGPU final : public TimeFrame<NLayers>
2929
{
30-
using typename TimeFrame<NLayers>::CellSeedN;
3130
using typename TimeFrame<NLayers>::IndexTableUtilsN;
3231
using typename TimeFrame<NLayers>::ROFOverlapTableN;
3332
using typename TimeFrame<NLayers>::ROFVertexLookupTableN;
3433
using typename TimeFrame<NLayers>::ROFMaskTableN;
34+
using typename TimeFrame<NLayers>::TrackSeedN;
3535

3636
public:
3737
TimeFrameGPU() = default;
@@ -72,7 +72,7 @@ class TimeFrameGPU final : public TimeFrame<NLayers>
7272
void loadCellsLUTDevice();
7373
void loadTrackSeedsDevice();
7474
void loadTrackSeedsChi2Device();
75-
void loadTrackSeedsDevice(bounded_vector<CellSeedN>&);
75+
void loadTrackSeedsDevice(bounded_vector<TrackSeedN>&);
7676
void createTrackletsBuffers(const int);
7777
void createTrackletsBuffersArray(const int);
7878
void createCellsBuffers(const int);
@@ -136,8 +136,8 @@ class TimeFrameGPU final : public TimeFrame<NLayers>
136136
int** getDeviceArrayTrackletsLUT() const { return mTrackletsLUTDeviceArray; }
137137
int** getDeviceArrayCellsLUT() const { return mCellsLUTDeviceArray; }
138138
int** getDeviceArrayNeighboursCellLUT() const { return mNeighboursCellLUTDeviceArray; }
139-
CellSeedN** getDeviceArrayCells() { return mCellsDeviceArray; }
140-
CellSeedN* getDeviceTrackSeeds() { return mTrackSeedsDevice; }
139+
CellSeed** getDeviceArrayCells() { return mCellsDeviceArray; }
140+
TrackSeedN* getDeviceTrackSeeds() { return mTrackSeedsDevice; }
141141
int* getDeviceTrackSeedsLUT() { return mTrackSeedsLUTDevice; }
142142
auto getNTrackSeeds() const { return mNTracks; }
143143
o2::track::TrackParCovF** getDeviceArrayTrackSeeds() { return mCellSeedsDeviceArray; }
@@ -157,7 +157,7 @@ class TimeFrameGPU final : public TimeFrame<NLayers>
157157
gsl::span<int*> getDeviceTrackletsLUTs() { return mTrackletsLUTDevice; }
158158
gsl::span<int*> getDeviceCellLUTs() { return mCellsLUTDevice; }
159159
gsl::span<Tracklet*> getDeviceTracklets() { return mTrackletsDevice; }
160-
gsl::span<CellSeedN*> getDeviceCells() { return mCellsDevice; }
160+
gsl::span<CellSeed*> getDeviceCells() { return mCellsDevice; }
161161

162162
// Overridden getters
163163
size_t getNumberOfTracklets() const final;
@@ -203,10 +203,10 @@ class TimeFrameGPU final : public TimeFrame<NLayers>
203203
int** mNeighboursCellDeviceArray{nullptr};
204204
int** mNeighboursCellLUTDeviceArray{nullptr};
205205
int** mTrackletsLUTDeviceArray{nullptr};
206-
std::array<CellSeedN*, NLayers - 2> mCellsDevice;
207-
CellSeedN** mCellsDeviceArray;
206+
std::array<CellSeed*, NLayers - 2> mCellsDevice;
207+
CellSeed** mCellsDeviceArray;
208208
std::array<int*, NLayers - 3> mNeighboursIndexTablesDevice;
209-
CellSeedN* mTrackSeedsDevice{nullptr};
209+
TrackSeedN* mTrackSeedsDevice{nullptr};
210210
int* mTrackSeedsLUTDevice{nullptr};
211211
unsigned int mNTracks{0};
212212
std::array<o2::track::TrackParCovF*, NLayers - 2> mCellSeedsDevice;

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
namespace o2::its
2020
{
2121

22-
template <int NLayers = 7>
22+
template <int NLayers>
2323
class TrackerTraitsGPU final : public TrackerTraits<NLayers>
2424
{
2525
using typename TrackerTraits<NLayers>::IndexTableUtilsN;

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,9 @@
2424

2525
namespace o2::its
2626
{
27-
template <int>
2827
class CellSeed;
28+
template <int>
29+
class TrackSeed;
2930
class TrackingFrameInfo;
3031
class Tracklet;
3132
template <int>
@@ -34,7 +35,7 @@ class Cluster;
3435
class TrackITSExt;
3536
class ExternalAllocator;
3637

37-
template <int NLayers = 7>
38+
template <int NLayers>
3839
void countTrackletsInROFsHandler(const IndexTableUtils<NLayers>* utils,
3940
const typename ROFMaskTable<NLayers>::View& rofMask,
4041
const int layer,
@@ -62,7 +63,7 @@ void countTrackletsInROFsHandler(const IndexTableUtils<NLayers>* utils,
6263
o2::its::ExternalAllocator* alloc,
6364
gpu::Streams& streams);
6465

65-
template <int NLayers = 7>
66+
template <int NLayers>
6667
void computeTrackletsInROFsHandler(const IndexTableUtils<NLayers>* utils,
6768
const typename ROFMaskTable<NLayers>::View& rofMask,
6869
const int layer,
@@ -101,7 +102,7 @@ void countCellsHandler(const Cluster** sortedClusters,
101102
int** trackletsLUT,
102103
const int nTracklets,
103104
const int layer,
104-
CellSeed<NLayers>* cells,
105+
CellSeed* cells,
105106
int** cellsLUTsDeviceArray,
106107
int* cellsLUTsHost,
107108
const float bz,
@@ -119,7 +120,7 @@ void computeCellsHandler(const Cluster** sortedClusters,
119120
int** trackletsLUT,
120121
const int nTracklets,
121122
const int layer,
122-
CellSeed<NLayers>* cells,
123+
CellSeed* cells,
123124
int** cellsLUTsDeviceArray,
124125
int* cellsLUTsHost,
125126
const float bz,
@@ -129,7 +130,7 @@ void computeCellsHandler(const Cluster** sortedClusters,
129130
gpu::Streams& streams);
130131

131132
template <int NLayers>
132-
void countCellNeighboursHandler(CellSeed<NLayers>** cellsLayersDevice,
133+
void countCellNeighboursHandler(CellSeed** cellsLayersDevice,
133134
int* neighboursLUTs,
134135
int** cellsLUTs,
135136
gpuPair<int, int>* cellNeighbours,
@@ -145,7 +146,7 @@ void countCellNeighboursHandler(CellSeed<NLayers>** cellsLayersDevice,
145146
gpu::Stream& stream);
146147

147148
template <int NLayers>
148-
void computeCellNeighboursHandler(CellSeed<NLayers>** cellsLayersDevice,
149+
void computeCellNeighboursHandler(CellSeed** cellsLayersDevice,
149150
int* neighboursLUTs,
150151
int** cellsLUTs,
151152
gpuPair<int, int>* cellNeighbours,
@@ -165,26 +166,26 @@ int filterCellNeighboursHandler(gpuPair<int, int>*,
165166
gpu::Stream&,
166167
o2::its::ExternalAllocator* = nullptr);
167168

168-
template <int NLayers = 7>
169+
template <int NLayers>
169170
void processNeighboursHandler(const int startLayer,
170171
const int startLevel,
171-
CellSeed<NLayers>** allCellSeeds,
172-
CellSeed<NLayers>* currentCellSeeds,
172+
CellSeed** allCellSeeds,
173+
CellSeed* currentCellSeeds,
173174
std::array<int, NLayers - 2>& nCells,
174175
const unsigned char** usedClusters,
175176
std::array<int*, NLayers - 2>& neighbours,
176177
gsl::span<int*> neighboursDeviceLUTs,
177178
const TrackingFrameInfo** foundTrackingFrameInfo,
178-
bounded_vector<CellSeed<NLayers>>& seedsHost,
179+
bounded_vector<TrackSeed<NLayers>>& seedsHost,
179180
const float bz,
180181
const float MaxChi2ClusterAttachment,
181182
const float maxChi2NDF,
182183
const o2::base::Propagator* propagator,
183184
const o2::base::PropagatorF::MatCorrType matCorrType,
184185
o2::its::ExternalAllocator* alloc);
185186

186-
template <int NLayers = 7>
187-
void countTrackSeedHandler(CellSeed<NLayers>* trackSeeds,
187+
template <int NLayers>
188+
void countTrackSeedHandler(TrackSeed<NLayers>* trackSeeds,
188189
const TrackingFrameInfo** foundTrackingFrameInfo,
189190
const Cluster** unsortedClusters,
190191
int* seedLUT,
@@ -202,8 +203,8 @@ void countTrackSeedHandler(CellSeed<NLayers>* trackSeeds,
202203
const o2::base::PropagatorF::MatCorrType matCorrType,
203204
o2::its::ExternalAllocator* alloc);
204205

205-
template <int NLayers = 7>
206-
void computeTrackSeedHandler(CellSeed<NLayers>* trackSeeds,
206+
template <int NLayers>
207+
void computeTrackSeedHandler(TrackSeed<NLayers>* trackSeeds,
207208
const TrackingFrameInfo** foundTrackingFrameInfo,
208209
const Cluster** unsortedClusters,
209210
o2::its::TrackITSExt* tracks,

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -433,11 +433,11 @@ void TimeFrameGPU<NLayers>::loadCellsDevice()
433433
{
434434
GPUTimer timer(mGpuStreams, "loading cell seeds", NLayers - 2);
435435
for (auto iLayer{0}; iLayer < NLayers - 2; ++iLayer) {
436-
GPULog("gpu-transfer: loading {} cell seeds on layer {}, for {:.2f} MB.", this->mCells[iLayer].size(), iLayer, this->mCells[iLayer].size() * sizeof(CellSeedN) / constants::MB);
437-
allocMemAsync(reinterpret_cast<void**>(&mCellsDevice[iLayer]), this->mCells[iLayer].size() * sizeof(CellSeedN), mGpuStreams[iLayer], this->hasFrameworkAllocator());
436+
GPULog("gpu-transfer: loading {} cell seeds on layer {}, for {:.2f} MB.", this->mCells[iLayer].size(), iLayer, this->mCells[iLayer].size() * sizeof(CellSeed) / constants::MB);
437+
allocMemAsync(reinterpret_cast<void**>(&mCellsDevice[iLayer]), this->mCells[iLayer].size() * sizeof(CellSeed), mGpuStreams[iLayer], this->hasFrameworkAllocator());
438438
allocMemAsync(reinterpret_cast<void**>(&mNeighboursIndexTablesDevice[iLayer]), (this->mCells[iLayer].size() + 1) * sizeof(int), mGpuStreams[iLayer], this->hasFrameworkAllocator()); // accessory for the neigh. finding.
439439
GPUChkErrS(cudaMemsetAsync(mNeighboursIndexTablesDevice[iLayer], 0, (this->mCells[iLayer].size() + 1) * sizeof(int), mGpuStreams[iLayer].get()));
440-
GPUChkErrS(cudaMemcpyAsync(mCellsDevice[iLayer], this->mCells[iLayer].data(), this->mCells[iLayer].size() * sizeof(CellSeedN), cudaMemcpyHostToDevice, mGpuStreams[iLayer].get()));
440+
GPUChkErrS(cudaMemcpyAsync(mCellsDevice[iLayer], this->mCells[iLayer].data(), this->mCells[iLayer].size() * sizeof(CellSeed), cudaMemcpyHostToDevice, mGpuStreams[iLayer].get()));
441441
}
442442
}
443443

@@ -465,8 +465,8 @@ void TimeFrameGPU<NLayers>::createCellsBuffersArray(const int iteration)
465465
{
466466
if (!iteration) {
467467
GPUTimer timer("creating cells buffers array");
468-
allocMem(reinterpret_cast<void**>(&mCellsDeviceArray), (NLayers - 2) * sizeof(CellSeedN*), this->hasFrameworkAllocator());
469-
GPUChkErrS(cudaMemcpy(mCellsDeviceArray, mCellsDevice.data(), mCellsDevice.size() * sizeof(CellSeedN*), cudaMemcpyHostToDevice));
468+
allocMem(reinterpret_cast<void**>(&mCellsDeviceArray), (NLayers - 2) * sizeof(CellSeed*), this->hasFrameworkAllocator());
469+
GPUChkErrS(cudaMemcpy(mCellsDeviceArray, mCellsDevice.data(), mCellsDevice.size() * sizeof(CellSeed*), cudaMemcpyHostToDevice));
470470
}
471471
}
472472

@@ -477,10 +477,10 @@ void TimeFrameGPU<NLayers>::createCellsBuffers(const int layer)
477477
mNCells[layer] = 0;
478478
GPUChkErrS(cudaMemcpyAsync(&mNCells[layer], mCellsLUTDevice[layer] + mNTracklets[layer], sizeof(int), cudaMemcpyDeviceToHost, mGpuStreams[layer].get()));
479479
mGpuStreams[layer].sync(); // ensure number of cells is correct
480-
GPULog("gpu-transfer: creating cell buffer for {} elements on layer {}, for {:.2f} MB.", mNCells[layer], layer, mNCells[layer] * sizeof(CellSeedN) / constants::MB);
481-
allocMemAsync(reinterpret_cast<void**>(&mCellsDevice[layer]), mNCells[layer] * sizeof(CellSeedN), mGpuStreams[layer], this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
482-
GPUChkErrS(cudaMemsetAsync(mCellsDevice[layer], 0, mNCells[layer] * sizeof(CellSeedN), mGpuStreams[layer].get()));
483-
GPUChkErrS(cudaMemcpyAsync(&mCellsDeviceArray[layer], &mCellsDevice[layer], sizeof(CellSeedN*), cudaMemcpyHostToDevice, mGpuStreams[layer].get()));
480+
GPULog("gpu-transfer: creating cell buffer for {} elements on layer {}, for {:.2f} MB.", mNCells[layer], layer, mNCells[layer] * sizeof(CellSeed) / constants::MB);
481+
allocMemAsync(reinterpret_cast<void**>(&mCellsDevice[layer]), mNCells[layer] * sizeof(CellSeed), mGpuStreams[layer], this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
482+
GPUChkErrS(cudaMemsetAsync(mCellsDevice[layer], 0, mNCells[layer] * sizeof(CellSeed), mGpuStreams[layer].get()));
483+
GPUChkErrS(cudaMemcpyAsync(&mCellsDeviceArray[layer], &mCellsDevice[layer], sizeof(CellSeed*), cudaMemcpyHostToDevice, mGpuStreams[layer].get()));
484484
}
485485

486486
template <int NLayers>
@@ -495,12 +495,12 @@ void TimeFrameGPU<NLayers>::loadCellsLUTDevice()
495495
}
496496

497497
template <int NLayers>
498-
void TimeFrameGPU<NLayers>::loadTrackSeedsDevice(bounded_vector<CellSeedN>& seeds)
498+
void TimeFrameGPU<NLayers>::loadTrackSeedsDevice(bounded_vector<TrackSeedN>& seeds)
499499
{
500500
GPUTimer timer("loading track seeds");
501-
GPULog("gpu-transfer: loading {} track seeds, for {:.2f} MB.", seeds.size(), seeds.size() * sizeof(CellSeedN) / constants::MB);
502-
allocMem(reinterpret_cast<void**>(&mTrackSeedsDevice), seeds.size() * sizeof(CellSeedN), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
503-
GPUChkErrS(cudaMemcpy(mTrackSeedsDevice, seeds.data(), seeds.size() * sizeof(CellSeedN), cudaMemcpyHostToDevice));
501+
GPULog("gpu-transfer: loading {} track seeds, for {:.2f} MB.", seeds.size(), seeds.size() * sizeof(TrackSeedN) / constants::MB);
502+
allocMem(reinterpret_cast<void**>(&mTrackSeedsDevice), seeds.size() * sizeof(TrackSeedN), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
503+
GPUChkErrS(cudaMemcpy(mTrackSeedsDevice, seeds.data(), seeds.size() * sizeof(TrackSeedN), cudaMemcpyHostToDevice));
504504
GPULog("gpu-transfer: creating {} track seeds LUT, for {:.2f} MB.", seeds.size() + 1, (seeds.size() + 1) * sizeof(int) / constants::MB);
505505
allocMem(reinterpret_cast<void**>(&mTrackSeedsLUTDevice), (seeds.size() + 1) * sizeof(int), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
506506
GPUChkErrS(cudaMemset(mTrackSeedsLUTDevice, 0, (seeds.size() + 1) * sizeof(int)));
@@ -537,9 +537,9 @@ void TimeFrameGPU<NLayers>::downloadCellsDevice()
537537
{
538538
GPUTimer timer(mGpuStreams, "downloading cells", NLayers - 2);
539539
for (int iLayer{0}; iLayer < NLayers - 2; ++iLayer) {
540-
GPULog("gpu-transfer: downloading {} cells on layer: {}, for {:.2f} MB.", mNCells[iLayer], iLayer, mNCells[iLayer] * sizeof(CellSeedN) / constants::MB);
540+
GPULog("gpu-transfer: downloading {} cells on layer: {}, for {:.2f} MB.", mNCells[iLayer], iLayer, mNCells[iLayer] * sizeof(CellSeed) / constants::MB);
541541
this->mCells[iLayer].resize(mNCells[iLayer]);
542-
GPUChkErrS(cudaMemcpyAsync(this->mCells[iLayer].data(), this->mCellsDevice[iLayer], mNCells[iLayer] * sizeof(CellSeedN), cudaMemcpyDeviceToHost, mGpuStreams[iLayer].get()));
542+
GPUChkErrS(cudaMemcpyAsync(this->mCells[iLayer].data(), this->mCellsDevice[iLayer], mNCells[iLayer] * sizeof(CellSeed), cudaMemcpyDeviceToHost, mGpuStreams[iLayer].get()));
543543
}
544544
}
545545

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ void TrackerTraitsGPU<NLayers>::findRoads(const int iteration)
265265
{
266266
for (int startLevel{this->mTrkParams[iteration].CellsPerRoad()}; startLevel >= this->mTrkParams[iteration].CellMinimumLevel(); --startLevel) {
267267
const int minimumLayer{startLevel - 1};
268-
bounded_vector<CellSeed<NLayers>> trackSeeds(this->getMemoryPool().get());
268+
bounded_vector<TrackSeed<NLayers>> trackSeeds(this->getMemoryPool().get());
269269
for (int startLayer{this->mTrkParams[iteration].CellsPerRoad() - 1}; startLayer >= minimumLayer; --startLayer) {
270270
if ((this->mTrkParams[iteration].StartLayerMask & (1 << (startLayer + 2))) == 0) {
271271
continue;

0 commit comments

Comments
 (0)