Skip to content

Commit e661eb8

Browse files
committed
GPU TPC: Search hit closest to interpolated point in track rebuilding
1 parent 4cf0e6b commit e661eb8

8 files changed

Lines changed: 149 additions & 26 deletions

File tree

GPU/GPUTracking/Definitions/GPUSettingsList.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ AddOptionRTC(cfNoiseSuppressionEpsilonRelative, uint8_t, 76, "", 0, "Cluster Fin
139139
AddOptionRTC(cfEdgeTwoPads, uint8_t, 0, "", 0, "Flag clusters with peak on the 2 pads closes to the sector edge as edge cluster")
140140
AddOptionRTC(nWays, uint8_t, 3, "", 0, "Do N fit passes in final fit of merger (must be odd to end with inward fit)")
141141
AddOptionRTC(rebuildTrackInFit, uint8_t, 1, "", 0, "Rebuild track completely during fit based on clusters closed to interpolated track positions")
142+
AddOptionRTC(rebuildTrackInFitClusterCandidates, uint8_t, 3, "", 0, "Number of cluster candidates per row for rebuilt track")
142143
AddOptionRTC(trackFitRejectMode, int8_t, 5, "", 0, "0: no limit on rejection or missed hits, >0: break after n rejected hits, <0: reject at max -n hits")
143144
AddOptionRTC(rejectIFCLowRadiusCluster, uint8_t, 1, "", 0, "Reject clusters that get the IFC mask error during refit")
144145
AddOptionRTC(dEdxTruncLow, uint8_t, 2, "", 0, "Low truncation threshold, fraction of 128")

GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,6 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
9696
uint32_t numBlocks = (!mRec->IsGPU() || doGPU) ? BlockCount() : 1;
9797
GPUTPCGMMerger& Merger = processors()->tpcMerger;
9898
GPUTPCGMMerger& MergerShadow = doGPU ? processorsShadow()->tpcMerger : Merger;
99-
GPUTPCGMMerger& MergerShadowAll = doGPU ? processorsShadow()->tpcMerger : Merger;
10099
const int32_t outputStream = OutputStream();
101100
if (GetProcessingSettings().debugLevel >= 2) {
102101
GPUInfo("Running TPC Merger");
@@ -142,31 +141,31 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
142141

143142
// Merge within Sectors
144143
runKernel<GPUTPCGMMergerClearLinks>(GetGridAuto(0, deviceType), false);
145-
runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), NSECTORS * sizeof(*MergerShadowAll.TmpCounter()));
144+
runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadow.TmpCounter(), NSECTORS * sizeof(*MergerShadow.TmpCounter()));
146145
runKernel<GPUTPCGMMergerMergeWithinPrepare>(GetGridAuto(0, deviceType));
147146
RunTPCTrackingMerger_MergeBorderTracks(GPUTPCGMMerger::mergeModes::mergeWithinSector, deviceType);
148147
RunTPCTrackingMerger_Resolve(0, 1, deviceType);
149148
DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingMatching, doGPU, Merger, &GPUTPCGMMerger::DumpMergedWithinSectors, *mDebugFile);
150149

151150
// Merge between sectors - transport to the middle of the sector and rotate vertically to the border on the left / right
152151
runKernel<GPUTPCGMMergerClearLinks>(GetGridAuto(0, deviceType), false);
153-
runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter()));
152+
runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadow.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadow.TmpCounter()));
154153
runKernel<GPUTPCGMMergerMergeSectorsPrepare>(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 2, 3, 0);
155154
RunTPCTrackingMerger_MergeBorderTracks(GPUTPCGMMerger::mergeModes::mergeBetweenSector | GPUTPCGMMerger::mergeModes::mergeAtMidRow, deviceType);
156155
RunTPCTrackingMerger_Resolve(0, 1, deviceType);
156+
runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadow.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadow.TmpCounter()));
157157
// Merge between sectors - transport to the left / right edge of the sector and rotate horizontally
158-
runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter()));
159158
runKernel<GPUTPCGMMergerMergeSectorsPrepare>(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 0);
160159
RunTPCTrackingMerger_MergeBorderTracks(GPUTPCGMMerger::mergeModes::mergeBetweenSector, deviceType);
161160
RunTPCTrackingMerger_Resolve(0, 1, deviceType);
161+
runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadow.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadow.TmpCounter()));
162162
// Merge between sectors - use original track param
163-
runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter()));
164163
runKernel<GPUTPCGMMergerMergeSectorsPrepare>(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 1);
165164
RunTPCTrackingMerger_MergeBorderTracks(GPUTPCGMMerger::mergeModes::mergeBetweenSector | GPUTPCGMMerger::mergeModes::mergeWithOriginalParameters, deviceType);
166165
RunTPCTrackingMerger_Resolve(0, 1, deviceType);
167166
DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingMatching, doGPU, Merger, &GPUTPCGMMerger::DumpMergedBetweenSectors, *mDebugFile);
168167

169-
runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter()));
168+
runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadow.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadow.TmpCounter()));
170169

171170
runKernel<GPUTPCGMMergerLinkExtrapolatedTracks>(GetGridAuto(0, deviceType));
172171
if (GetProcessingSettings().mergerSanityCheck) {
@@ -208,8 +207,8 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
208207
if (maxId > Merger.NMaxClusters()) {
209208
throw std::runtime_error("mNMaxClusters too small");
210209
}
211-
runKernel<GPUMemClean16>({{numBlocks, -ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.SharedCount(), maxId * sizeof(*MergerShadowAll.SharedCount()));
212-
runKernel<GPUMemClean16>({{numBlocks, -ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.ClusterAttachment(), maxId * sizeof(*MergerShadowAll.ClusterAttachment()));
210+
runKernel<GPUMemClean16>({{numBlocks, -ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadow.SharedCount(), maxId * sizeof(*MergerShadow.SharedCount()));
211+
runKernel<GPUMemClean16>({{numBlocks, -ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadow.ClusterAttachment(), maxId * sizeof(*MergerShadow.ClusterAttachment()));
213212
runKernel<GPUTPCGMMergerPrepareForFit, 0>(GetGridAuto(0, deviceType));
214213
CondWaitEvent(waitForTransfer, &mEvents->single);
215214
runKernel<GPUTPCGMMergerSortTracksQPt>(GetGridAuto(0, deviceType));
@@ -234,6 +233,9 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
234233
mOutputQueue.clear();
235234
}
236235

236+
if (param().rec.tpc.rebuildTrackInFit) {
237+
runKernel<GPUMemClean16>({{numBlocks, -ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadow.ClusterCandidates(), Merger.NMergedTracks() * GPUTPCGeometry::NROWS * param().rec.tpc.rebuildTrackInFitClusterCandidates * sizeof(*MergerShadow.ClusterCandidates()));
238+
}
237239
runKernel<GPUTPCGMMergerTrackFit>(doGPU ? GetGrid(Merger.NMergedTracks(), 0) : GetGridAuto(0), mergerSortTracks ? 1 : 0, 0);
238240
if (param().rec.tpc.rebuildTrackInFit) {
239241
runKernel<GPUTPCGMMergerTrackFit>(doGPU ? GetGrid(Merger.NMergedTracks(), 0) : GetGridAuto(0), mergerSortTracks ? 1 : 0, 1);
@@ -268,13 +270,13 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
268270
throw std::runtime_error("QA Scratch buffer exceeded");
269271
}
270272
}
271-
GPUMemCpy(RecoStep::TPCMerging, Merger.MergedTracks(), MergerShadowAll.MergedTracks(), Merger.NMergedTracks() * sizeof(*Merger.MergedTracks()), outputStream, 0, nullptr, waitEvent);
273+
GPUMemCpy(RecoStep::TPCMerging, Merger.MergedTracks(), MergerShadow.MergedTracks(), Merger.NMergedTracks() * sizeof(*Merger.MergedTracks()), outputStream, 0, nullptr, waitEvent);
272274
waitEvent = nullptr;
273275
if (param().dodEdxEnabled) {
274-
GPUMemCpy(RecoStep::TPCMerging, Merger.MergedTracksdEdx(), MergerShadowAll.MergedTracksdEdx(), Merger.NMergedTracks() * sizeof(*Merger.MergedTracksdEdx()), outputStream, 0);
276+
GPUMemCpy(RecoStep::TPCMerging, Merger.MergedTracksdEdx(), MergerShadow.MergedTracksdEdx(), Merger.NMergedTracks() * sizeof(*Merger.MergedTracksdEdx()), outputStream, 0);
275277
}
276-
GPUMemCpy(RecoStep::TPCMerging, Merger.Clusters(), MergerShadowAll.Clusters(), Merger.NMergedTrackClusters() * sizeof(*Merger.Clusters()), outputStream, 0);
277-
GPUMemCpy(RecoStep::TPCMerging, Merger.ClusterAttachment(), MergerShadowAll.ClusterAttachment(), Merger.NMaxClusters() * sizeof(*Merger.ClusterAttachment()), outputStream, 0);
278+
GPUMemCpy(RecoStep::TPCMerging, Merger.Clusters(), MergerShadow.Clusters(), Merger.NMergedTrackClusters() * sizeof(*Merger.Clusters()), outputStream, 0);
279+
GPUMemCpy(RecoStep::TPCMerging, Merger.ClusterAttachment(), MergerShadow.ClusterAttachment(), Merger.NMaxClusters() * sizeof(*Merger.ClusterAttachment()), outputStream, 0);
278280
}
279281
if (GetProcessingSettings().outputSharedClusterMap) {
280282
TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResOutputState(), outputStream, nullptr, waitEvent);

GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,7 @@ void* GPUTPCGMMerger::SetPointersMerger(void* mem)
403403
memMax = (void*)std::max((size_t)mem, (size_t)memMax);
404404
mem = memBase;
405405
computePointerWithAlignment(mem, mLoopData, mNMaxTracks); // GPUTPCGMMergerTrackFit - GPUTPCGMMergerFollowLoopers, Reducing mNMaxTracks for mLoopData does not save memory, other parts are larger anyway
406+
computePointerWithAlignment(mem, mClusterCandidates, mNMaxTracks * GPUTPCGeometry::NROWS * Param().rec.tpc.rebuildTrackInFitClusterCandidates);
406407
memMax = (void*)std::max((size_t)mem, (size_t)memMax);
407408
mem = memBase;
408409
computePointerWithAlignment(mem, mLooperCandidates, mNMaxLooperMatches); // MergeLoopers 1-3
@@ -1655,7 +1656,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread
16551656
const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[t->Sector()];
16561657
const GPUTPCHitId& ic = trk.TrackHits()[t->OrigTrack()->FirstHitID() + i];
16571658
uint32_t id = trk.Data().ClusterDataIndex(trk.Data().Row(ic.RowIndex()), ic.HitIndex()) + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[t->Sector()][0];
1658-
*c2 = trackCluster{id, (uint8_t)ic.RowIndex(), t->Sector()};
1659+
*c2 = trackCluster{.id = id, .row = (uint8_t)ic.RowIndex(), .sector = t->Sector(), .error = 0.f};
16591660
}
16601661
nHits += nTrackHits;
16611662
}
@@ -1963,7 +1964,7 @@ GPUd() void GPUTPCGMMerger::MergeLoopersInit(int32_t nBlocks, int32_t nThreads,
19631964
for (uint32_t k = 0;k < trk.NClusters();k++) {
19641965
float xx, yy, zz;
19651966
const ClusterNative& GPUrestrict() cl = GetConstantMem()->ioPtrs.clustersNative->clustersLinear[mClusters[trk.FirstClusterRef() + k].num];
1966-
GetConstantMem()->calibObjects.fastTransformHelper->Transform(mClusters[trk.FirstClusterRef() + k].sector, mClusters[trk.FirstClusterRef() + k].row, cl.getPad(), cl.getTime(), xx, yy, zz, p.GetTOffset());
1967+
GetConstantMem()->calibObjects.fastTransform->Transform(mClusters[trk.FirstClusterRef() + k].sector, mClusters[trk.FirstClusterRef() + k].row, cl.getPad(), cl.getTime(), xx, yy, zz, p.GetTOffset());
19671968
float sa2, ca2;
19681969
CAMath::SinCos(Param().Alpha(mClusters[trk.FirstClusterRef() + k].sector), sa2, ca2);
19691970
float cx = ca2 * xx - sa2 * yy;

GPU/GPUTracking/Merger/GPUTPCGMMerger.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,10 +78,11 @@ class GPUTPCGMMerger : public GPUProcessor
7878
GPUAtomic(uint32_t) nLooperMatchCandidates;
7979
};
8080

81-
struct trackCluster {
81+
struct trackCluster { // TODO: Reduce size of this struct!
8282
uint32_t id;
8383
uint8_t row;
8484
uint8_t sector;
85+
float error;
8586
};
8687

8788
struct tmpSort {
@@ -125,6 +126,7 @@ class GPUTPCGMMerger : public GPUProcessor
125126
GPUhdi() uint32_t NMergedTrackClusters() const { return mMemory->nMergedTrackClusters; }
126127
GPUhdi() const GPUTPCGMMergedTrackHit* Clusters() const { return mClusters; }
127128
GPUhdi() GPUTPCGMMergedTrackHit* Clusters() { return (mClusters); }
129+
GPUhdi() trackCluster* ClusterCandidates() { return (mClusterCandidates); }
128130
GPUhdi() GPUAtomic(uint32_t) * ClusterAttachment() const { return mClusterAttachment; }
129131
GPUhdi() uint32_t* TrackOrderAttach() const { return mTrackOrderAttach; }
130132
GPUhdi() uint32_t* TrackOrderProcess() const { return mTrackOrderProcess; }
@@ -272,6 +274,7 @@ class GPUTPCGMMerger : public GPUProcessor
272274

273275
int32_t mNClusters = 0; // Total number of incoming clusters (from sector tracks)
274276
GPUTPCGMMergedTrack* mMergedTracks = nullptr; //* array of output merged tracks
277+
trackCluster* mClusterCandidates = nullptr;
275278
GPUdEdxInfo* mMergedTracksdEdx = nullptr; //* dEdx information
276279
GPUdEdxInfo* mMergedTracksdEdxAlt = nullptr; //* dEdx alternative information
277280
GPUTPCGMSectorTrack* mSectorTrackInfos = nullptr; //* additional information for sector tracks

0 commit comments

Comments
 (0)