@@ -96,7 +96,6 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
9696 uint32_t numBlocks = (!mRec ->IsGPU () || doGPU) ? BlockCount () : 1 ;
9797 GPUTPCGMMerger& Merger = processors ()->tpcMerger ;
9898 GPUTPCGMMerger& MergerShadow = doGPU ? processorsShadow ()->tpcMerger : Merger;
99- GPUTPCGMMerger& MergerShadowAll = doGPU ? processorsShadow ()->tpcMerger : Merger;
10099 const int32_t outputStream = OutputStream ();
101100 if (GetProcessingSettings ().debugLevel >= 2 ) {
102101 GPUInfo (" Running TPC Merger" );
@@ -142,31 +141,31 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
142141
143142 // Merge within Sectors
144143 runKernel<GPUTPCGMMergerClearLinks>(GetGridAuto (0 , deviceType), false );
145- runKernel<GPUMemClean16>({{1 , -WarpSize (), 0 , deviceType, RecoStep::TPCMerging}}, MergerShadowAll .TmpCounter (), NSECTORS * sizeof (*MergerShadowAll .TmpCounter ()));
144+ runKernel<GPUMemClean16>({{1 , -WarpSize (), 0 , deviceType, RecoStep::TPCMerging}}, MergerShadow .TmpCounter (), NSECTORS * sizeof (*MergerShadow .TmpCounter ()));
146145 runKernel<GPUTPCGMMergerMergeWithinPrepare>(GetGridAuto (0 , deviceType));
147146 RunTPCTrackingMerger_MergeBorderTracks (GPUTPCGMMerger::mergeModes::mergeWithinSector, deviceType);
148147 RunTPCTrackingMerger_Resolve (0 , 1 , deviceType);
149148 DoDebugAndDump (RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingMatching, doGPU, Merger, &GPUTPCGMMerger::DumpMergedWithinSectors, *mDebugFile );
150149
151150 // Merge between sectors - transport to the middle of the sector and rotate vertically to the border on the left / right
152151 runKernel<GPUTPCGMMergerClearLinks>(GetGridAuto (0 , deviceType), false );
153- runKernel<GPUMemClean16>({{1 , -WarpSize (), 0 , deviceType, RecoStep::TPCMerging}}, MergerShadowAll .TmpCounter (), 2 * NSECTORS * sizeof (*MergerShadowAll .TmpCounter ()));
152+ runKernel<GPUMemClean16>({{1 , -WarpSize (), 0 , deviceType, RecoStep::TPCMerging}}, MergerShadow .TmpCounter (), 2 * NSECTORS * sizeof (*MergerShadow .TmpCounter ()));
154153 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(GetGridBlk (std::max (2u , numBlocks), 0 , deviceType), 2 , 3 , 0 );
155154 RunTPCTrackingMerger_MergeBorderTracks (GPUTPCGMMerger::mergeModes::mergeBetweenSector | GPUTPCGMMerger::mergeModes::mergeAtMidRow, deviceType);
156155 RunTPCTrackingMerger_Resolve (0 , 1 , deviceType);
156+ runKernel<GPUMemClean16>({{1 , -WarpSize (), 0 , deviceType, RecoStep::TPCMerging}}, MergerShadow.TmpCounter (), 2 * NSECTORS * sizeof (*MergerShadow.TmpCounter ()));
157157 // Merge between sectors - transport to the left / right edge of the sector and rotate horizontally
158- runKernel<GPUMemClean16>({{1 , -WarpSize (), 0 , deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter (), 2 * NSECTORS * sizeof (*MergerShadowAll.TmpCounter ()));
159158 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(GetGridBlk (std::max (2u , numBlocks), 0 , deviceType), 0 , 1 , 0 );
160159 RunTPCTrackingMerger_MergeBorderTracks (GPUTPCGMMerger::mergeModes::mergeBetweenSector, deviceType);
161160 RunTPCTrackingMerger_Resolve (0 , 1 , deviceType);
161+ runKernel<GPUMemClean16>({{1 , -WarpSize (), 0 , deviceType, RecoStep::TPCMerging}}, MergerShadow.TmpCounter (), 2 * NSECTORS * sizeof (*MergerShadow.TmpCounter ()));
162162 // Merge between sectors - use original track param
163- runKernel<GPUMemClean16>({{1 , -WarpSize (), 0 , deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter (), 2 * NSECTORS * sizeof (*MergerShadowAll.TmpCounter ()));
164163 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(GetGridBlk (std::max (2u , numBlocks), 0 , deviceType), 0 , 1 , 1 );
165164 RunTPCTrackingMerger_MergeBorderTracks (GPUTPCGMMerger::mergeModes::mergeBetweenSector | GPUTPCGMMerger::mergeModes::mergeWithOriginalParameters, deviceType);
166165 RunTPCTrackingMerger_Resolve (0 , 1 , deviceType);
167166 DoDebugAndDump (RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingMatching, doGPU, Merger, &GPUTPCGMMerger::DumpMergedBetweenSectors, *mDebugFile );
168167
169- runKernel<GPUMemClean16>({{1 , -WarpSize (), 0 , deviceType, RecoStep::TPCMerging}}, MergerShadowAll .TmpCounter (), 2 * NSECTORS * sizeof (*MergerShadowAll .TmpCounter ()));
168+ runKernel<GPUMemClean16>({{1 , -WarpSize (), 0 , deviceType, RecoStep::TPCMerging}}, MergerShadow .TmpCounter (), 2 * NSECTORS * sizeof (*MergerShadow .TmpCounter ()));
170169
171170 runKernel<GPUTPCGMMergerLinkExtrapolatedTracks>(GetGridAuto (0 , deviceType));
172171 if (GetProcessingSettings ().mergerSanityCheck ) {
@@ -208,8 +207,8 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
208207 if (maxId > Merger.NMaxClusters ()) {
209208 throw std::runtime_error (" mNMaxClusters too small" );
210209 }
211- runKernel<GPUMemClean16>({{numBlocks, -ThreadCount (), 0 , deviceType, RecoStep::TPCMerging}}, MergerShadowAll .SharedCount (), maxId * sizeof (*MergerShadowAll .SharedCount ()));
212- runKernel<GPUMemClean16>({{numBlocks, -ThreadCount (), 0 , deviceType, RecoStep::TPCMerging}}, MergerShadowAll .ClusterAttachment (), maxId * sizeof (*MergerShadowAll .ClusterAttachment ()));
210+ runKernel<GPUMemClean16>({{numBlocks, -ThreadCount (), 0 , deviceType, RecoStep::TPCMerging}}, MergerShadow .SharedCount (), maxId * sizeof (*MergerShadow .SharedCount ()));
211+ runKernel<GPUMemClean16>({{numBlocks, -ThreadCount (), 0 , deviceType, RecoStep::TPCMerging}}, MergerShadow .ClusterAttachment (), maxId * sizeof (*MergerShadow .ClusterAttachment ()));
213212 runKernel<GPUTPCGMMergerPrepareForFit, 0 >(GetGridAuto (0 , deviceType));
214213 CondWaitEvent (waitForTransfer, &mEvents ->single );
215214 runKernel<GPUTPCGMMergerSortTracksQPt>(GetGridAuto (0 , deviceType));
@@ -234,6 +233,9 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
234233 mOutputQueue .clear ();
235234 }
236235
236+ if (param ().rec .tpc .rebuildTrackInFit ) {
237+ runKernel<GPUMemClean16>({{numBlocks, -ThreadCount (), 0 , deviceType, RecoStep::TPCMerging}}, MergerShadow.ClusterCandidates (), Merger.NMergedTracks () * GPUTPCGeometry::NROWS * param ().rec .tpc .rebuildTrackInFitClusterCandidates * sizeof (*MergerShadow.ClusterCandidates ()));
238+ }
237239 runKernel<GPUTPCGMMergerTrackFit>(doGPU ? GetGrid (Merger.NMergedTracks (), 0 ) : GetGridAuto (0 ), mergerSortTracks ? 1 : 0 , 0 );
238240 if (param ().rec .tpc .rebuildTrackInFit ) {
239241 runKernel<GPUTPCGMMergerTrackFit>(doGPU ? GetGrid (Merger.NMergedTracks (), 0 ) : GetGridAuto (0 ), mergerSortTracks ? 1 : 0 , 1 );
@@ -268,13 +270,13 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
268270 throw std::runtime_error (" QA Scratch buffer exceeded" );
269271 }
270272 }
271- GPUMemCpy (RecoStep::TPCMerging, Merger.MergedTracks (), MergerShadowAll .MergedTracks (), Merger.NMergedTracks () * sizeof (*Merger.MergedTracks ()), outputStream, 0 , nullptr , waitEvent);
273+ GPUMemCpy (RecoStep::TPCMerging, Merger.MergedTracks (), MergerShadow .MergedTracks (), Merger.NMergedTracks () * sizeof (*Merger.MergedTracks ()), outputStream, 0 , nullptr , waitEvent);
272274 waitEvent = nullptr ;
273275 if (param ().dodEdxEnabled ) {
274- GPUMemCpy (RecoStep::TPCMerging, Merger.MergedTracksdEdx (), MergerShadowAll .MergedTracksdEdx (), Merger.NMergedTracks () * sizeof (*Merger.MergedTracksdEdx ()), outputStream, 0 );
276+ GPUMemCpy (RecoStep::TPCMerging, Merger.MergedTracksdEdx (), MergerShadow .MergedTracksdEdx (), Merger.NMergedTracks () * sizeof (*Merger.MergedTracksdEdx ()), outputStream, 0 );
275277 }
276- GPUMemCpy (RecoStep::TPCMerging, Merger.Clusters (), MergerShadowAll .Clusters (), Merger.NMergedTrackClusters () * sizeof (*Merger.Clusters ()), outputStream, 0 );
277- GPUMemCpy (RecoStep::TPCMerging, Merger.ClusterAttachment (), MergerShadowAll .ClusterAttachment (), Merger.NMaxClusters () * sizeof (*Merger.ClusterAttachment ()), outputStream, 0 );
278+ GPUMemCpy (RecoStep::TPCMerging, Merger.Clusters (), MergerShadow .Clusters (), Merger.NMergedTrackClusters () * sizeof (*Merger.Clusters ()), outputStream, 0 );
279+ GPUMemCpy (RecoStep::TPCMerging, Merger.ClusterAttachment (), MergerShadow .ClusterAttachment (), Merger.NMaxClusters () * sizeof (*Merger.ClusterAttachment ()), outputStream, 0 );
278280 }
279281 if (GetProcessingSettings ().outputSharedClusterMap ) {
280282 TransferMemoryResourceLinkToHost (RecoStep::TPCMerging, Merger.MemoryResOutputState (), outputStream, nullptr , waitEvent);
0 commit comments