Skip to content

Commit fcc46c2

Browse files
committed
GPU: Restructure parameters for TPC track merging, simplifying and working around an NVIDIA compiler bug for blackwell
1 parent 76626a3 commit fcc46c2

7 files changed

Lines changed: 75 additions & 70 deletions

File tree

GPU/GPUTracking/Global/GPUChainTracking.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,7 @@ class GPUChainTracking : public GPUChain
306306
std::pair<uint32_t, uint32_t> TPCClusterizerDecodeZSCountUpdate(uint32_t iSector, const CfFragment& fragment);
307307
void TPCClusterizerEnsureZSOffsets(uint32_t iSector, const CfFragment& fragment);
308308
#endif
309-
void RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSector, int8_t mergeMode, GPUReconstruction::krnlDeviceType deviceType);
309+
void RunTPCTrackingMerger_MergeBorderTracks(uint8_t mergeMode, GPUReconstruction::krnlDeviceType deviceType);
310310
void RunTPCTrackingMerger_Resolve(int8_t useOrigTrackParam, int8_t mergeAll, GPUReconstruction::krnlDeviceType deviceType);
311311
void RunTPCClusterFilter(o2::tpc::ClusterNativeAccess* clusters, std::function<o2::tpc::ClusterNative*(size_t)> allocator, bool applyClusterCuts);
312312
bool NeedTPCClustersOnGPU();

GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -28,22 +28,22 @@
2828

2929
using namespace o2::gpu;
3030

31-
void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSector, int8_t mergeMode, GPUReconstruction::krnlDeviceType deviceType)
31+
void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks(uint8_t mergeMode, GPUReconstruction::krnlDeviceType deviceType)
3232
{
3333
GPUTPCGMMerger& Merger = processors()->tpcMerger;
3434
bool doGPU = GetRecoStepsGPU() & RecoStep::TPCMerging;
3535
GPUTPCGMMerger& MergerShadow = doGPU ? processorsShadow()->tpcMerger : Merger;
3636
if (GetProcessingSettings().deterministicGPUReconstruction) {
37-
uint32_t nBorderTracks = withinSector == 1 ? NSECTORS : (2 * NSECTORS);
37+
uint32_t nBorderTracks = (mergeMode & GPUTPCGMMerger::mergeModes::mergeWithinSector) ? NSECTORS : (2 * NSECTORS);
3838
runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::borderTracks>({{nBorderTracks, -WarpSize(), 0, deviceType}}, 0);
3939
}
40-
uint32_t n = withinSector == -1 ? NSECTORS / 2 : NSECTORS;
40+
uint32_t n = (mergeMode & GPUTPCGMMerger::mergeModes::mergeAcrossCE) ? NSECTORS / 2 : NSECTORS;
4141
if (GetProcessingSettings().alternateBorderSort == -1 ? mRec->getGPUParameters(doGPU).par_ALTERNATE_BORDER_SORT : GetProcessingSettings().alternateBorderSort) {
4242
RecordMarker(&mEvents->single, 0);
4343
TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0, &mEvents->init);
4444
for (uint32_t i = 0; i < n; i++) {
4545
int32_t stream = i % mRec->NStreams();
46-
runKernel<GPUTPCGMMergerMergeBorders, 0>({GetGridAuto(stream, deviceType), krnlRunRangeNone, {nullptr, stream && i < (uint32_t)mRec->NStreams() ? &mEvents->single : nullptr}}, i, withinSector, mergeMode);
46+
runKernel<GPUTPCGMMergerMergeBorders, 0>({GetGridAuto(stream, deviceType), krnlRunRangeNone, {nullptr, stream && i < (uint32_t)mRec->NStreams() ? &mEvents->single : nullptr}}, i, mergeMode);
4747
}
4848
ReleaseEvent(mEvents->single);
4949
SynchronizeEventAndRelease(mEvents->init);
@@ -52,12 +52,12 @@ void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSecto
5252
int32_t n1, n2;
5353
GPUTPCGMBorderTrack *b1, *b2;
5454
int32_t jSector;
55-
Merger.MergeBorderTracksSetup(n1, n2, b1, b2, jSector, i, withinSector, mergeMode);
55+
Merger.MergeBorderTracksSetup(n1, n2, b1, b2, jSector, i, mergeMode);
5656
gputpcgmmergertypes::GPUTPCGMBorderRange* range1 = MergerShadow.BorderRange(i);
5757
gputpcgmmergertypes::GPUTPCGMBorderRange* range2 = MergerShadow.BorderRange(jSector) + *processors()->tpcTrackers[jSector].NTracks();
5858
runKernel<GPUTPCGMMergerMergeBorders, 3>({{1, -WarpSize(), stream, deviceType}}, range1, n1, 0);
5959
runKernel<GPUTPCGMMergerMergeBorders, 3>({{1, -WarpSize(), stream, deviceType}}, range2, n2, 1);
60-
runKernel<GPUTPCGMMergerMergeBorders, 2>({GetGridAuto(stream, deviceType)}, i, withinSector, mergeMode);
60+
runKernel<GPUTPCGMMergerMergeBorders, 2>({GetGridAuto(stream, deviceType)}, i, mergeMode);
6161
}
6262
int32_t ne = std::min<int32_t>(n, mRec->NStreams()) - 1; // Stream 0 must wait for all streams, Note n > 1
6363
for (int32_t j = 0; j < ne; j++) {
@@ -66,14 +66,14 @@ void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSecto
6666
StreamWaitForEvents(0, &mEvents->sector[0], ne);
6767
} else {
6868
for (uint32_t i = 0; i < n; i++) {
69-
runKernel<GPUTPCGMMergerMergeBorders, 0>(GetGridAuto(0, deviceType), i, withinSector, mergeMode);
69+
runKernel<GPUTPCGMMergerMergeBorders, 0>(GetGridAuto(0, deviceType), i, mergeMode);
7070
}
71-
runKernel<GPUTPCGMMergerMergeBorders, 1>({{2 * n, -WarpSize(), 0, deviceType}}, 0, withinSector, mergeMode);
71+
runKernel<GPUTPCGMMergerMergeBorders, 1>({{2 * n, -WarpSize(), 0, deviceType}}, 0, mergeMode);
7272
for (uint32_t i = 0; i < n; i++) {
73-
runKernel<GPUTPCGMMergerMergeBorders, 2>(GetGridAuto(0, deviceType), i, withinSector, mergeMode);
73+
runKernel<GPUTPCGMMergerMergeBorders, 2>(GetGridAuto(0, deviceType), i, mergeMode);
7474
}
7575
}
76-
DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingRanges, doGPU, Merger, &GPUTPCGMMerger::DumpMergeRanges, *mDebugFile, withinSector, mergeMode);
76+
DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingRanges, doGPU, Merger, &GPUTPCGMMerger::DumpMergeRanges, *mDebugFile, mergeMode);
7777
mRec->ReturnVolatileDeviceMemory();
7878
}
7979

@@ -138,25 +138,29 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
138138
}
139139
DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingSectorTracks, doGPU, Merger, &GPUTPCGMMerger::DumpSectorTracks, *mDebugFile);
140140

141+
// Merge within Sectors
141142
runKernel<GPUTPCGMMergerClearLinks>(GetGridAuto(0, deviceType), false);
142143
runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), NSECTORS * sizeof(*MergerShadowAll.TmpCounter()));
143144
runKernel<GPUTPCGMMergerMergeWithinPrepare>(GetGridAuto(0, deviceType));
144-
RunTPCTrackingMerger_MergeBorderTracks(1, 0, deviceType);
145+
RunTPCTrackingMerger_MergeBorderTracks(GPUTPCGMMerger::mergeModes::mergeWithinSector, deviceType);
145146
RunTPCTrackingMerger_Resolve(0, 1, deviceType);
146147
DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingMatching, doGPU, Merger, &GPUTPCGMMerger::DumpMergedWithinSectors, *mDebugFile);
147148

149+
// Merge between sectors - transport to the middle of the sector and rotate vertically to the border on the left / right
148150
runKernel<GPUTPCGMMergerClearLinks>(GetGridAuto(0, deviceType), false);
149151
runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter()));
150152
runKernel<GPUTPCGMMergerMergeSectorsPrepare>(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 2, 3, 0);
151-
RunTPCTrackingMerger_MergeBorderTracks(0, 0, deviceType);
153+
RunTPCTrackingMerger_MergeBorderTracks(GPUTPCGMMerger::mergeModes::mergeBetweenSector | GPUTPCGMMerger::mergeModes::mergeAtMidRow, deviceType);
152154
RunTPCTrackingMerger_Resolve(0, 1, deviceType);
155+
// Merge between sectors - transport to the left / right edge of the sector and rotate horizontally
153156
runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter()));
154157
runKernel<GPUTPCGMMergerMergeSectorsPrepare>(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 0);
155-
RunTPCTrackingMerger_MergeBorderTracks(0, 0, deviceType);
158+
RunTPCTrackingMerger_MergeBorderTracks(GPUTPCGMMerger::mergeModes::mergeBetweenSector, deviceType);
156159
RunTPCTrackingMerger_Resolve(0, 1, deviceType);
160+
// Merge between sectors - use original track param
157161
runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter()));
158162
runKernel<GPUTPCGMMergerMergeSectorsPrepare>(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 1);
159-
RunTPCTrackingMerger_MergeBorderTracks(0, -1, deviceType);
163+
RunTPCTrackingMerger_MergeBorderTracks(GPUTPCGMMerger::mergeModes::mergeBetweenSector | GPUTPCGMMerger::mergeModes::mergeWithOriginalParameters, deviceType);
160164
RunTPCTrackingMerger_Resolve(0, 1, deviceType);
161165
DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingMatching, doGPU, Merger, &GPUTPCGMMerger::DumpMergedBetweenSectors, *mDebugFile);
162166

@@ -175,8 +179,10 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
175179

176180
if (param().rec.tpc.mergeCE) {
177181
runKernel<GPUTPCGMMergerClearLinks>(GetGridAuto(0, deviceType), true);
178-
RunTPCTrackingMerger_MergeBorderTracks(-1, 1, deviceType);
179-
RunTPCTrackingMerger_MergeBorderTracks(-1, 2, deviceType);
182+
// Merge across CE - compare at row 63
183+
RunTPCTrackingMerger_MergeBorderTracks(GPUTPCGMMerger::mergeModes::mergeAcrossCE, deviceType);
184+
// Merge across CE - compare at row of cluster
185+
RunTPCTrackingMerger_MergeBorderTracks(GPUTPCGMMerger::mergeModes::mergeAcrossCE | GPUTPCGMMerger::mergeModes::mergeAtCluster, deviceType);
180186
runKernel<GPUTPCGMMergerMergeCE>(GetGridAuto(0, deviceType));
181187
DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingCE, doGPU, Merger, &GPUTPCGMMerger::DumpMergeCE, *mDebugFile);
182188
}

GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx

Lines changed: 20 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -780,12 +780,11 @@ GPUd() void GPUTPCGMMerger::MergeSectorsPrepareStep2(int32_t nBlocks, int32_t nT
780780
}
781781

782782
template <>
783-
GPUd() void GPUTPCGMMerger::MergeBorderTracks<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, const GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, const GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode)
783+
GPUd() void GPUTPCGMMerger::MergeBorderTracks<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, const GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, const GPUTPCGMBorderTrack* B2, int32_t N2, uint8_t mergeMode)
784784
{
785785
CADEBUG(GPUInfo("\nMERGING Sectors %d %d NTracks %d %d CROSS %d", iSector1, iSector2, N1, N2, mergeMode));
786786
GPUTPCGMBorderRange* range1 = mBorderRange[iSector1];
787787
GPUTPCGMBorderRange* range2 = mBorderRange[iSector2] + *GetConstantMem()->tpcTrackers[iSector2].NTracks();
788-
bool sameSector = (iSector1 == iSector2);
789788
for (int32_t itr = iBlock * nThreads + iThread; itr < N1; itr += nThreads * nBlocks) {
790789
const GPUTPCGMBorderTrack& b = B1[itr];
791790
float d = CAMath::Max(0.5f, 3.5f * CAMath::Sqrt(b.Cov()[1]));
@@ -795,16 +794,13 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<0>(int32_t nBlocks, int32_t nThrea
795794
d = 3;
796795
}
797796
CADEBUG(printf(" Input Sector 1 %d Track %d: ", iSector1, itr); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Par()[i]); } printf(" - "); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Cov()[i]); } printf(" - D %8.3f\n", d));
798-
GPUTPCGMBorderRange range;
799-
range.fId = itr;
800-
range.fMin = b.Par()[1] + b.ZOffsetLinear() - d;
801-
range.fMax = b.Par()[1] + b.ZOffsetLinear() + d;
797+
const GPUTPCGMBorderRange range = {.fId = itr, .fMin = b.Par()[1] + b.ZOffsetLinear() - d, .fMax = b.Par()[1] + b.ZOffsetLinear() + d};
802798
range1[itr] = range;
803-
if (sameSector) {
799+
if (iSector1 == iSector2) {
804800
range2[itr] = range;
805801
}
806802
}
807-
if (!sameSector) {
803+
if (iSector1 != iSector2) {
808804
for (int32_t itr = iBlock * nThreads + iThread; itr < N2; itr += nThreads * nBlocks) {
809805
const GPUTPCGMBorderTrack& b = B2[itr];
810806
float d = CAMath::Max(0.5f, 3.5f * CAMath::Sqrt(b.Cov()[1]));
@@ -814,17 +810,13 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<0>(int32_t nBlocks, int32_t nThrea
814810
d = 3;
815811
}
816812
CADEBUG(printf(" Input Sector 2 %d Track %d: ", iSector2, itr); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Par()[i]); } printf(" - "); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Cov()[i]); } printf(" - D %8.3f\n", d));
817-
GPUTPCGMBorderRange range;
818-
range.fId = itr;
819-
range.fMin = b.Par()[1] + b.ZOffsetLinear() - d;
820-
range.fMax = b.Par()[1] + b.ZOffsetLinear() + d;
821-
range2[itr] = range;
813+
range2[itr] = {.fId = itr, .fMin = b.Par()[1] + b.ZOffsetLinear() - d, .fMax = b.Par()[1] + b.ZOffsetLinear() + d};
822814
}
823815
}
824816
}
825817

826818
template <>
827-
GPUd() void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, const GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, const GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode)
819+
GPUd() void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, const GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, const GPUTPCGMBorderTrack* B2, int32_t N2, uint8_t mergeMode)
828820
{
829821
#if !defined(GPUCA_GPUCODE_COMPILEKERNELS)
830822
GPUTPCGMBorderRange* range1 = mBorderRange[iSector1];
@@ -857,7 +849,7 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<3>(int32_t nBlocks, int32_t nThrea
857849
}
858850

859851
template <>
860-
GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, const GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, const GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode)
852+
GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, const GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, const GPUTPCGMBorderTrack* B2, int32_t N2, uint8_t mergeMode)
861853
{
862854
// int32_t statAll = 0, statMerged = 0;
863855
float factor2ys = Param().rec.tpc.trackMergerFactor2YS;
@@ -913,9 +905,9 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThrea
913905
if (b2.NClusters() < lBest2) {
914906
CADEBUG2(continue, printf("!NCl1\n"));
915907
}
916-
if (mergeMode > 0) {
908+
if (mergeMode & mergeModes::mergeAcrossCE) {
917909
// Merging CE tracks
918-
int32_t maxRowDiff = mergeMode == 2 ? 1 : 3; // TODO: check cut
910+
int32_t maxRowDiff = (mergeMode & mergeModes::mergeAtCluster) ? 1 : 3; // TODO: check cut
919911
if (CAMath::Abs(b1.Row() - b2.Row()) > maxRowDiff) {
920912
CADEBUG2(continue, printf("!ROW\n"));
921913
}
@@ -965,22 +957,22 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThrea
965957
CADEBUG(GPUInfo("Found match %d %d", b1.TrackID(), iBest2));
966958

967959
mTrackLinks[b1.TrackID()] = iBest2;
968-
if (mergeMode > 0) {
960+
if (mergeMode & mergeModes::mergeAcrossCE) {
969961
GPUCA_DETERMINISTIC_CODE(CAMath::AtomicMax(&mTrackLinks[iBest2], b1.TrackID()), mTrackLinks[iBest2] = b1.TrackID());
970962
}
971963
}
972964
// GPUInfo("STAT: sectors %d, %d: all %d merged %d", iSector1, iSector2, statAll, statMerged);
973965
}
974966

975-
GPUdii() void GPUTPCGMMerger::MergeBorderTracksSetup(int32_t& n1, int32_t& n2, GPUTPCGMBorderTrack*& b1, GPUTPCGMBorderTrack*& b2, int32_t& jSector, int32_t iSector, int8_t withinSector, int8_t mergeMode) const
967+
GPUdii() void GPUTPCGMMerger::MergeBorderTracksSetup(int32_t& n1, int32_t& n2, GPUTPCGMBorderTrack*& b1, GPUTPCGMBorderTrack*& b2, int32_t& jSector, int32_t iSector, uint8_t mergeMode) const
976968
{
977-
if (withinSector == 1) { // Merge tracks within the same sector
969+
if (mergeMode & mergeModes::mergeWithinSector) { // Merge tracks within the same sector
978970
jSector = iSector;
979971
n1 = n2 = mMemory->tmpCounter[iSector];
980972
b1 = b2 = mBorder[iSector];
981-
} else if (withinSector == -1) { // Merge tracks accross the central electrode
973+
} else if (mergeMode & mergeModes::mergeAcrossCE) { // Merge tracks accross the central electrode
982974
jSector = (iSector + NSECTORS / 2);
983-
const int32_t offset = mergeMode == 2 ? NSECTORS : 0;
975+
const int32_t offset = (mergeMode & mergeModes::mergeAtCluster) ? NSECTORS : 0;
984976
n1 = mMemory->tmpCounter[iSector + offset];
985977
n2 = mMemory->tmpCounter[jSector + offset];
986978
b1 = mBorder[iSector + offset];
@@ -995,19 +987,19 @@ GPUdii() void GPUTPCGMMerger::MergeBorderTracksSetup(int32_t& n1, int32_t& n2, G
995987
}
996988

997989
template <int32_t I>
998-
GPUd() void GPUTPCGMMerger::MergeBorderTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector, int8_t withinSector, int8_t mergeMode)
990+
GPUd() void GPUTPCGMMerger::MergeBorderTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector, uint8_t mergeMode)
999991
{
1000992
int32_t n1, n2;
1001993
GPUTPCGMBorderTrack *b1, *b2;
1002994
int32_t jSector;
1003-
MergeBorderTracksSetup(n1, n2, b1, b2, jSector, iSector, withinSector, mergeMode);
995+
MergeBorderTracksSetup(n1, n2, b1, b2, jSector, iSector, mergeMode);
1004996
MergeBorderTracks<I>(nBlocks, nThreads, iBlock, iThread, iSector, b1, n1, jSector, b2, n2, mergeMode);
1005997
}
1006998

1007999
#if !defined(GPUCA_GPUCODE) || defined(GPUCA_GPUCODE_DEVICE) // FIXME: DR: WORKAROUND to avoid CUDA bug creating host symbols for device code.
1008-
template GPUdni() void GPUTPCGMMerger::MergeBorderTracks<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector, int8_t withinSector, int8_t mergeMode);
1009-
template GPUdni() void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector, int8_t withinSector, int8_t mergeMode);
1010-
template GPUdni() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector, int8_t withinSector, int8_t mergeMode);
1000+
template GPUdni() void GPUTPCGMMerger::MergeBorderTracks<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector, uint8_t mergeMode);
1001+
template GPUdni() void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector, uint8_t mergeMode);
1002+
template GPUdni() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector, uint8_t mergeMode);
10111003
#endif
10121004

10131005
GPUd() void GPUTPCGMMerger::MergeWithinSectorsPrepare(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
@@ -1350,7 +1342,7 @@ GPUd() void GPUTPCGMMerger::MergeCEFill(const GPUTPCGMSectorTrack* track, const
13501342
int32_t sector = track->Sector();
13511343
for (int32_t attempt = 0; attempt < 2; attempt++) {
13521344
GPUTPCGMBorderTrack b;
1353-
const float x0 = GPUTPCGeometry::Row2X(attempt == 0 ? 63 : cls.row);
1345+
const float x0 = GPUTPCGeometry::Row2X(attempt == 0 ? 63 : cls.row); // TODO: Fix 63
13541346
if (track->TransportToX(this, x0, Param().bzCLight, b, constants::MAX_SIN_PHI_LOW)) {
13551347
b.SetTrackID(itr);
13561348
b.SetNClusters(mMergedTracks[itr].NClusters());

0 commit comments

Comments
 (0)