2828
2929using namespace o2 ::gpu;
3030
31- void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks (int8_t withinSector, int8_t mergeMode, GPUReconstruction::krnlDeviceType deviceType)
31+ void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks (uint8_t mergeMode, GPUReconstruction::krnlDeviceType deviceType)
3232{
3333 GPUTPCGMMerger& Merger = processors ()->tpcMerger ;
3434 bool doGPU = GetRecoStepsGPU () & RecoStep::TPCMerging;
3535 GPUTPCGMMerger& MergerShadow = doGPU ? processorsShadow ()->tpcMerger : Merger;
3636 if (GetProcessingSettings ().deterministicGPUReconstruction ) {
37- uint32_t nBorderTracks = withinSector == 1 ? NSECTORS : (2 * NSECTORS);
37+ uint32_t nBorderTracks = (mergeMode & GPUTPCGMMerger::mergeModes::mergeWithinSector) ? NSECTORS : (2 * NSECTORS);
3838 runKernel<GPUTPCGlobalDebugSortKernels, GPUTPCGlobalDebugSortKernels::borderTracks>({{nBorderTracks, -WarpSize (), 0 , deviceType}}, 0 );
3939 }
40- uint32_t n = withinSector == - 1 ? NSECTORS / 2 : NSECTORS;
40+ uint32_t n = (mergeMode & GPUTPCGMMerger::mergeModes::mergeAcrossCE) ? NSECTORS / 2 : NSECTORS;
4141 if (GetProcessingSettings ().alternateBorderSort == -1 ? mRec ->getGPUParameters (doGPU).par_ALTERNATE_BORDER_SORT : GetProcessingSettings ().alternateBorderSort ) {
4242 RecordMarker (&mEvents ->single , 0 );
4343 TransferMemoryResourceLinkToHost (RecoStep::TPCMerging, Merger.MemoryResMemory (), 0 , &mEvents ->init );
4444 for (uint32_t i = 0 ; i < n; i++) {
4545 int32_t stream = i % mRec ->NStreams ();
46- runKernel<GPUTPCGMMergerMergeBorders, 0 >({GetGridAuto (stream, deviceType), krnlRunRangeNone, {nullptr , stream && i < (uint32_t )mRec ->NStreams () ? &mEvents->single : nullptr}}, i, withinSector, mergeMode);
46+ runKernel<GPUTPCGMMergerMergeBorders, 0 >({GetGridAuto (stream, deviceType), krnlRunRangeNone, {nullptr , stream && i < (uint32_t )mRec ->NStreams () ? &mEvents->single : nullptr}}, i, mergeMode);
4747 }
4848 ReleaseEvent (mEvents ->single );
4949 SynchronizeEventAndRelease (mEvents ->init );
@@ -52,12 +52,12 @@ void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSecto
5252 int32_t n1, n2;
5353 GPUTPCGMBorderTrack *b1, *b2;
5454 int32_t jSector;
55- Merger.MergeBorderTracksSetup (n1, n2, b1, b2, jSector, i, withinSector, mergeMode);
55+ Merger.MergeBorderTracksSetup (n1, n2, b1, b2, jSector, i, mergeMode);
5656 gputpcgmmergertypes::GPUTPCGMBorderRange* range1 = MergerShadow.BorderRange (i);
5757 gputpcgmmergertypes::GPUTPCGMBorderRange* range2 = MergerShadow.BorderRange (jSector) + *processors ()->tpcTrackers [jSector].NTracks ();
5858 runKernel<GPUTPCGMMergerMergeBorders, 3 >({{1 , -WarpSize (), stream, deviceType}}, range1, n1, 0 );
5959 runKernel<GPUTPCGMMergerMergeBorders, 3 >({{1 , -WarpSize (), stream, deviceType}}, range2, n2, 1 );
60- runKernel<GPUTPCGMMergerMergeBorders, 2 >({GetGridAuto (stream, deviceType)}, i, withinSector, mergeMode);
60+ runKernel<GPUTPCGMMergerMergeBorders, 2 >({GetGridAuto (stream, deviceType)}, i, mergeMode);
6161 }
6262 int32_t ne = std::min<int32_t >(n, mRec ->NStreams ()) - 1; // Stream 0 must wait for all streams, Note n > 1
6363 for (int32_t j = 0 ; j < ne; j++) {
@@ -66,14 +66,14 @@ void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSecto
6666 StreamWaitForEvents (0 , &mEvents ->sector [0 ], ne);
6767 } else {
6868 for (uint32_t i = 0 ; i < n; i++) {
69- runKernel<GPUTPCGMMergerMergeBorders, 0 >(GetGridAuto (0 , deviceType), i, withinSector, mergeMode);
69+ runKernel<GPUTPCGMMergerMergeBorders, 0 >(GetGridAuto (0 , deviceType), i, mergeMode);
7070 }
71- runKernel<GPUTPCGMMergerMergeBorders, 1 >({{2 * n, -WarpSize (), 0 , deviceType}}, 0 , withinSector, mergeMode);
71+ runKernel<GPUTPCGMMergerMergeBorders, 1 >({{2 * n, -WarpSize (), 0 , deviceType}}, 0 , mergeMode);
7272 for (uint32_t i = 0 ; i < n; i++) {
73- runKernel<GPUTPCGMMergerMergeBorders, 2 >(GetGridAuto (0 , deviceType), i, withinSector, mergeMode);
73+ runKernel<GPUTPCGMMergerMergeBorders, 2 >(GetGridAuto (0 , deviceType), i, mergeMode);
7474 }
7575 }
76- DoDebugAndDump (RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingRanges, doGPU, Merger, &GPUTPCGMMerger::DumpMergeRanges, *mDebugFile , withinSector, mergeMode);
76+ DoDebugAndDump (RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingRanges, doGPU, Merger, &GPUTPCGMMerger::DumpMergeRanges, *mDebugFile , mergeMode);
7777 mRec ->ReturnVolatileDeviceMemory ();
7878}
7979
@@ -138,25 +138,29 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
138138 }
139139 DoDebugAndDump (RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingSectorTracks, doGPU, Merger, &GPUTPCGMMerger::DumpSectorTracks, *mDebugFile );
140140
141+ // Merge within Sectors
141142 runKernel<GPUTPCGMMergerClearLinks>(GetGridAuto (0 , deviceType), false );
142143 runKernel<GPUMemClean16>({{1 , -WarpSize (), 0 , deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter (), NSECTORS * sizeof (*MergerShadowAll.TmpCounter ()));
143144 runKernel<GPUTPCGMMergerMergeWithinPrepare>(GetGridAuto (0 , deviceType));
144- RunTPCTrackingMerger_MergeBorderTracks (1 , 0 , deviceType);
145+ RunTPCTrackingMerger_MergeBorderTracks (GPUTPCGMMerger::mergeModes::mergeWithinSector , deviceType);
145146 RunTPCTrackingMerger_Resolve (0 , 1 , deviceType);
146147 DoDebugAndDump (RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingMatching, doGPU, Merger, &GPUTPCGMMerger::DumpMergedWithinSectors, *mDebugFile );
147148
149+ // Merge between sectors - transport to the middle of the sector and rotate vertically to the border on the left / right
148150 runKernel<GPUTPCGMMergerClearLinks>(GetGridAuto (0 , deviceType), false );
149151 runKernel<GPUMemClean16>({{1 , -WarpSize (), 0 , deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter (), 2 * NSECTORS * sizeof (*MergerShadowAll.TmpCounter ()));
150152 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(GetGridBlk (std::max (2u , numBlocks), 0 , deviceType), 2 , 3 , 0 );
151- RunTPCTrackingMerger_MergeBorderTracks (0 , 0 , deviceType);
153+ RunTPCTrackingMerger_MergeBorderTracks (GPUTPCGMMerger::mergeModes::mergeBetweenSector | GPUTPCGMMerger::mergeModes::mergeAtMidRow , deviceType);
152154 RunTPCTrackingMerger_Resolve (0 , 1 , deviceType);
155+ // Merge between sectors - transport to the left / right edge of the sector and rotate horizontally
153156 runKernel<GPUMemClean16>({{1 , -WarpSize (), 0 , deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter (), 2 * NSECTORS * sizeof (*MergerShadowAll.TmpCounter ()));
154157 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(GetGridBlk (std::max (2u , numBlocks), 0 , deviceType), 0 , 1 , 0 );
155- RunTPCTrackingMerger_MergeBorderTracks (0 , 0 , deviceType);
158+ RunTPCTrackingMerger_MergeBorderTracks (GPUTPCGMMerger::mergeModes::mergeBetweenSector , deviceType);
156159 RunTPCTrackingMerger_Resolve (0 , 1 , deviceType);
160+ // Merge between sectors - use original track param
157161 runKernel<GPUMemClean16>({{1 , -WarpSize (), 0 , deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter (), 2 * NSECTORS * sizeof (*MergerShadowAll.TmpCounter ()));
158162 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(GetGridBlk (std::max (2u , numBlocks), 0 , deviceType), 0 , 1 , 1 );
159- RunTPCTrackingMerger_MergeBorderTracks (0 , - 1 , deviceType);
163+ RunTPCTrackingMerger_MergeBorderTracks (GPUTPCGMMerger::mergeModes::mergeBetweenSector | GPUTPCGMMerger::mergeModes::mergeWithOriginalParameters , deviceType);
160164 RunTPCTrackingMerger_Resolve (0 , 1 , deviceType);
161165 DoDebugAndDump (RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingMatching, doGPU, Merger, &GPUTPCGMMerger::DumpMergedBetweenSectors, *mDebugFile );
162166
@@ -175,8 +179,10 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
175179
176180 if (param ().rec .tpc .mergeCE ) {
177181 runKernel<GPUTPCGMMergerClearLinks>(GetGridAuto (0 , deviceType), true );
178- RunTPCTrackingMerger_MergeBorderTracks (-1 , 1 , deviceType);
179- RunTPCTrackingMerger_MergeBorderTracks (-1 , 2 , deviceType);
182+ // Merge across CE - compare at row 63
183+ RunTPCTrackingMerger_MergeBorderTracks (GPUTPCGMMerger::mergeModes::mergeAcrossCE, deviceType);
184+ // Merge across CE - compare at row of cluster
185+ RunTPCTrackingMerger_MergeBorderTracks (GPUTPCGMMerger::mergeModes::mergeAcrossCE | GPUTPCGMMerger::mergeModes::mergeAtCluster, deviceType);
180186 runKernel<GPUTPCGMMergerMergeCE>(GetGridAuto (0 , deviceType));
181187 DoDebugAndDump (RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingCE, doGPU, Merger, &GPUTPCGMMerger::DumpMergeCE, *mDebugFile );
182188 }
0 commit comments