Skip to content

Commit 4cf0e6b

Browse files
committed
GPU TPC: Add possibility to run last way of TPC fit in separate kernel to rebuild track ion between
1 parent 5d732b6 commit 4cf0e6b

8 files changed

Lines changed: 42 additions & 25 deletions

File tree

GPU/GPUTracking/Definitions/GPUSettingsList.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ AddOptionRTC(cfNoiseSuppressionEpsilon, uint8_t, 10, "", 0, "Cluster Finder: Dif
138138
AddOptionRTC(cfNoiseSuppressionEpsilonRelative, uint8_t, 76, "", 0, "Cluster Finder: Difference between peak and charge for the charge to count as a minima during noise suppression, relative as fraction of 255")
139139
AddOptionRTC(cfEdgeTwoPads, uint8_t, 0, "", 0, "Flag clusters with peak on the 2 pads closes to the sector edge as edge cluster")
140140
AddOptionRTC(nWays, uint8_t, 3, "", 0, "Do N fit passes in final fit of merger (must be odd to end with inward fit)")
141+
AddOptionRTC(rebuildTrackInFit, uint8_t, 1, "", 0, "Rebuild track completely during fit based on clusters closed to interpolated track positions")
141142
AddOptionRTC(trackFitRejectMode, int8_t, 5, "", 0, "0: no limit on rejection or missed hits, >0: break after n rejected hits, <0: reject at max -n hits")
142143
AddOptionRTC(rejectIFCLowRadiusCluster, uint8_t, 1, "", 0, "Reject clusters that get the IFC mask error during refit")
143144
AddOptionRTC(dEdxTruncLow, uint8_t, 2, "", 0, "Low truncation threshold, fraction of 128")

GPU/GPUTracking/Global/GPUChainTracking.cxx

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,10 @@ bool GPUChainTracking::ValidateSettings()
261261
GPUError("Cannot do error interpolation with NWays < 3!");
262262
return false;
263263
}
264+
if (param().rec.tpc.rebuildTrackInFit && !param().rec.tpc.mergerInterpolateErrors) {
265+
GPUError("Need error interpolation to rebuild tracks during fit");
266+
return false;
267+
}
264268
if (param().continuousMaxTimeBin > (int32_t)GPUSettings::TPC_MAX_TF_TIME_BIN) {
265269
GPUError("configured max time bin exceeds 256 orbits");
266270
return false;

GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,10 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
234234
mOutputQueue.clear();
235235
}
236236

237-
runKernel<GPUTPCGMMergerTrackFit>(doGPU ? GetGrid(Merger.NMergedTracks(), 0) : GetGridAuto(0), mergerSortTracks ? 1 : 0);
237+
runKernel<GPUTPCGMMergerTrackFit>(doGPU ? GetGrid(Merger.NMergedTracks(), 0) : GetGridAuto(0), mergerSortTracks ? 1 : 0, 0);
238+
if (param().rec.tpc.rebuildTrackInFit) {
239+
runKernel<GPUTPCGMMergerTrackFit>(doGPU ? GetGrid(Merger.NMergedTracks(), 0) : GetGridAuto(0), mergerSortTracks ? 1 : 0, 1);
240+
}
238241
runKernel<GPUTPCGMMergerFollowLoopers>(GetGridAuto(0));
239242

240243
DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingRefit, Merger, &GPUTPCGMMerger::DumpRefit, *mDebugFile);

GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@
1919
using namespace o2::gpu;
2020

2121
template <>
22-
GPUdii() void GPUTPCGMMergerTrackFit::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t mode)
22+
GPUdii() void GPUTPCGMMergerTrackFit::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t mode, int32_t rebuilt)
2323
{
2424
GPUCA_TBB_KERNEL_LOOP(merger.GetRec(), int32_t, ii, merger.NMergedTracks(), {
2525
const int32_t i = mode ? merger.TrackOrderProcess()[ii] : ii;
26-
GPUTPCGMTrackParam::RefitTrack(merger.MergedTracks()[i], i, &merger);
26+
GPUTPCGMTrackParam::RefitTrack(merger.MergedTracks()[i], i, &merger, rebuilt);
2727
});
2828
}
2929

GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ class GPUTPCGMMergerTrackFit : public GPUTPCGMMergerGeneral
3636
{
3737
public:
3838
template <int32_t iKernel = defaultKernel>
39-
GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger, int32_t mode);
39+
GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger, int32_t mode, int32_t rebuilt);
4040
};
4141

4242
class GPUTPCGMMergerFollowLoopers : public GPUTPCGMMergerGeneral

GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
using namespace o2::gpu;
4848
using namespace o2::tpc;
4949

50-
GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_t iTrk, GPUTPCGMMergedTrackHit* GPUrestrict() clusters, int32_t& GPUrestrict() N, int32_t& GPUrestrict() NTolerated, float& GPUrestrict() Alpha, float maxSinPhi, GPUTPCGMMergedTrack& GPUrestrict() track)
50+
GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_t iTrk, GPUTPCGMMergedTrackHit* GPUrestrict() clusters, int32_t& GPUrestrict() N, int32_t& GPUrestrict() NTolerated, float& GPUrestrict() Alpha, float maxSinPhi, GPUTPCGMMergedTrack& GPUrestrict() track, bool rebuilt)
5151
{
5252
static constexpr float kDeg2Rad = M_PI / 180.f;
5353
CADEBUG(static constexpr float kSectAngle = 2 * M_PI / 18.f);
@@ -60,7 +60,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_
6060
prop.SetMaterialTPC();
6161
prop.SetPolynomialField(&param.polynomialField);
6262
prop.SetMaxSinPhi(maxSinPhi);
63-
if (param.rec.tpc.mergerInterpolateErrors) {
63+
if (param.rec.tpc.mergerInterpolateErrors && !rebuilt) {
6464
for (int32_t i = 0; i < N; i++) {
6565
interpolation.hit[i].errorY = -1;
6666
}
@@ -70,12 +70,9 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_
7070
const int32_t maxN = N;
7171
int32_t ihitStart = 0;
7272
float covYYUpd = 0.f;
73-
float lastUpdateX = -1.f;
74-
uint8_t lastRow = 255;
75-
uint8_t lastSector = 255;
7673
float deltaZ = 0.f;
7774

78-
for (int32_t iWay = 0; iWay < nWays; iWay++) {
75+
for (int32_t iWay = rebuilt ? nWays - 1 : 0; iWay < nWays; iWay++) { // DR: Unrolling has no performance improvement on GPU, why?
7976
int32_t nMissed = 0, nMissed2 = 0;
8077
float sumInvSqrtCharge = 0.f;
8178
int32_t nAvgCharge = 0;
@@ -95,12 +92,14 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_
9592
prop.SetFitInProjections(true); // param.rec.fitInProjections == -1 ? (iWay == 0) : param.rec.fitInProjections); // TODO: Reenable once fixed
9693
prop.SetPropagateBzOnly(param.rec.fitPropagateBzOnly == -1 ? !finalFit : param.rec.fitPropagateBzOnly);
9794
prop.SetMatLUT((param.rec.useMatLUT && finalFit) ? merger->GetConstantMem()->calibObjects.matLUT : nullptr);
98-
prop.SetTrack(this, iWay ? prop.GetAlpha() : Alpha);
95+
prop.SetTrack(this, iWay && !rebuilt ? prop.GetAlpha() : Alpha);
9996
ConstrainSinPhi(iWay == 0 ? 0.95f : constants::MAX_SIN_PHI_LOW);
10097
CADEBUG(printf("Fitting track %d way %d (sector %d, alpha %f) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n", iTrk, iWay, CAMath::Float2IntRn(prop.GetAlpha() / kSectAngle) + (mP[1] < 0 ? 18 : 0), prop.GetAlpha()));
10198

10299
N = 0;
103-
lastUpdateX = -1;
100+
uint8_t lastRow = 255;
101+
uint8_t lastSector = 255;
102+
float lastUpdateX = -1;
104103
const bool inFlyDirection = iWay & 1;
105104
const int32_t wayDirection = (iWay & 1) ? -1 : 1;
106105

@@ -110,9 +109,12 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_
110109
lastSector = clusters[ihit].sector;
111110
}
112111

113-
if ((param.rec.tpc.trackFitRejectMode > 0 && nMissed >= param.rec.tpc.trackFitRejectMode) || nMissed2 >= param.rec.tpc.trackFitMaxRowMissedHard || clusters[ihit].state & GPUTPCGMMergedTrackHit::flagReject) {
112+
if ((param.rec.tpc.trackFitRejectMode > 0 && nMissed >= param.rec.tpc.trackFitRejectMode) || nMissed2 >= param.rec.tpc.trackFitMaxRowMissedHard || (clusters[ihit].state & GPUTPCGMMergedTrackHit::flagReject) || (rebuilt && (clusters[ihit].state & GPUTPCGMMergedTrackHit::flagHighIncl))) {
114113
CADEBUG(printf("\tSkipping hit %d, %d hits rejected, flag %X\n", ihit, nMissed, (int32_t)clusters[ihit].state));
115-
if (finalOutInFit && !(clusters[ihit].state & GPUTPCGMMergedTrackHit::flagReject)) {
114+
if (rebuilt && (clusters[ihit].state & GPUTPCGMMergedTrackHit::flagHighIncl)) {
115+
NTolerated++;
116+
}
117+
if (finalOutInFit && !(clusters[ihit].state & (GPUTPCGMMergedTrackHit::flagReject | GPUTPCGMMergedTrackHit::flagHighIncl))) {
116118
clusters[ihit].state |= GPUTPCGMMergedTrackHit::flagRejectErr;
117119
}
118120
continue;
@@ -327,6 +329,14 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_
327329
} else {
328330
deltaZ = 0.f;
329331
}
332+
333+
if (param.rec.tpc.rebuildTrackInFit && iWay == nWays - 2) {
334+
Alpha = prop.GetAlpha();
335+
if (ihitStart != 0) {
336+
MarkClusters(clusters, 0, ihitStart - 1, 1, GPUTPCGMMergedTrackHit::flagHighIncl);
337+
}
338+
return true;
339+
}
330340
}
331341
ConstrainSinPhi();
332342

@@ -894,7 +904,7 @@ GPUd() bool GPUTPCGMTrackParam::CheckNumericalQuality(float overrideCovYY) const
894904
return ok;
895905
}
896906

897-
GPUdii() void GPUTPCGMTrackParam::RefitTrack(GPUTPCGMMergedTrack& GPUrestrict() track, int32_t iTrk, GPUTPCGMMerger* GPUrestrict() merger) // VS: GPUd changed to GPUdii. No change in output and no performance penalty.
907+
GPUdii() void GPUTPCGMTrackParam::RefitTrack(GPUTPCGMMergedTrack& GPUrestrict() track, int32_t iTrk, GPUTPCGMMerger* GPUrestrict() merger, bool rebuilt) // VS: GPUd changed to GPUdii. No change in output and no performance penalty.
898908
{
899909
if (!track.OK()) {
900910
return;
@@ -908,21 +918,21 @@ GPUdii() void GPUTPCGMTrackParam::RefitTrack(GPUTPCGMMergedTrack& GPUrestrict()
908918
int32_t NTolerated = 0; // Clusters not fit but tollerated for track length cut
909919
GPUTPCGMTrackParam t = track.Param();
910920
float Alpha = track.Alpha();
911-
CADEBUG(int32_t nTrackHitsOld = nTrackHits; float ptOld = t.QPt());
912-
bool ok = t.Fit(merger, iTrk, merger->Clusters() + track.FirstClusterRef(), nTrackHits, NTolerated, Alpha, constants::MAX_SIN_PHI, track);
913-
CADEBUG(printf("Finished Fit Track %d\n", iTrk));
914-
CADEBUG(printf("OUTPUT hits %d -> %d+%d = %d, QPt %f -> %f, SP %f, OK %d chi2 %f chi2ndf %f\n", nTrackHitsOld, nTrackHits, NTolerated, nTrackHits + NTolerated, ptOld, t.QPt(), t.SinPhi(), (int32_t)ok, t.Chi2(), t.Chi2() / CAMath::Max(1, nTrackHits)));
921+
bool ok = t.Fit(merger, iTrk, merger->Clusters() + track.FirstClusterRef(), nTrackHits, NTolerated, Alpha, constants::MAX_SIN_PHI, track, rebuilt);
922+
CADEBUG(if (!merger->Param().rec.tpc.rebuildTrackInFit || rebuilt) printf("Finished Fit Track %7d --- OUTPUT hits %d -> %d+%d = %d, QPt %f -> %f, SP %f, OK %d chi2 %f chi2ndf %f\n", iTrk, track.NClusters(), nTrackHits, NTolerated, nTrackHits + NTolerated, track.GetParam().GetQPt(), t.QPt(), t.SinPhi(), (int32_t)ok, t.Chi2(), t.Chi2() / CAMath::Max(1, nTrackHits)));
915923

916924
if (CAMath::Abs(t.QPt()) < 1.e-4f) {
917-
t.QPt() = 1.e-4f;
925+
t.QPt() = CAMath::Copysign(1.e-4f, t.QPt());
918926
}
919927

920928
CADEBUG(if (t.GetX() > 250) { printf("ERROR, Track %d at impossible X %f, Pt %f, Looper %d\n", iTrk, t.GetX(), CAMath::Abs(1.f / t.QPt()), (int32_t)merger->MergedTracks()[iTrk].Looper()); });
921929

922930
track.SetOK(ok);
923-
track.SetNClustersFitted(nTrackHits);
924931
track.Param() = t;
925932
track.Alpha() = Alpha;
933+
if (!merger->Param().rec.tpc.rebuildTrackInFit || rebuilt) {
934+
track.SetNClustersFitted(nTrackHits);
935+
}
926936

927937
// if (track.OK()) merger->DebugRefitMergedTrack(track);
928938
}

GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,8 @@ class GPUTPCGMTrackParam
141141
GPUd() bool CheckNumericalQuality(float overrideCovYY = -1.f) const;
142142
GPUd() bool CheckCov() const;
143143

144-
GPUd() bool Fit(GPUTPCGMMerger* merger, int32_t iTrk, GPUTPCGMMergedTrackHit* clusters, int32_t& N, int32_t& NTolerated, float& Alpha, float maxSinPhi, GPUTPCGMMergedTrack& track);
144+
GPUd() bool Fit(GPUTPCGMMerger* merger, int32_t iTrk, GPUTPCGMMergedTrackHit* clusters, int32_t& N, int32_t& NTolerated, float& Alpha, float maxSinPhi, GPUTPCGMMergedTrack& track, bool rebuilt);
145+
GPUd() static void RefitTrack(GPUTPCGMMergedTrack& track, int32_t iTrk, GPUTPCGMMerger* merger, bool rebuilt);
145146
GPUd() void MoveToReference(GPUTPCGMPropagator& prop, const GPUParam& param, float& alpha);
146147
GPUd() void MirrorTo(GPUTPCGMPropagator& prop, float toY, float toZ, bool inFlyDirection, const GPUParam& param, uint8_t row, uint8_t clusterState, bool mirrorParameters, int8_t sector);
147148
GPUd() int32_t MergeDoubleRowClusters(int32_t& ihit, int32_t wayDirection, GPUTPCGMMergedTrackHit* clusters, const GPUTPCGMMerger* merger, GPUTPCGMPropagator& prop, float& xx, float& yy, float& zz, int32_t maxN, float clAlpha, uint8_t& clusterState, bool rejectChi2);
@@ -200,8 +201,6 @@ class GPUTPCGMTrackParam
200201
}
201202
}
202203

203-
GPUd() static void RefitTrack(GPUTPCGMMergedTrack& track, int32_t iTrk, GPUTPCGMMerger* merger);
204-
205204
GPUdi() void ConstrainSinPhi(float limit = constants::MAX_SIN_PHI)
206205
{
207206
if (mP[2] > limit) {

GPU/GPUTracking/kernels.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, mergedTracks2" "= TPC
5151
o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, borderTracks" "= TPCMERGER" NO int8_t parameter)
5252
o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fill" "= TPCOCCUPANCY" LB GPUTPCClusterOccupancyMapBin* map)
5353
o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fold" "= TPCOCCUPANCY" LB GPUTPCClusterOccupancyMapBin* map uint32_t* output)
54-
o2_gpu_add_kernel("GPUTPCGMMergerTrackFit" "GPUTPCGMMergerGPU TPCMERGER TPCTRACKER MATLUT TPCDEDX" LB int32_t mode)
54+
o2_gpu_add_kernel("GPUTPCGMMergerTrackFit" "GPUTPCGMMergerGPU TPCMERGER TPCTRACKER MATLUT TPCDEDX" LB int32_t mode int32_t rebuilt)
5555
o2_gpu_add_kernel("GPUTPCGMMergerFollowLoopers" "GPUTPCGMMergerGPU TPCMERGER TPCTRACKER MATLUT" LB)
5656
o2_gpu_add_kernel("GPUTPCGMMergerUnpackResetIds" "GPUTPCGMMergerGPU TPCMERGER" LB int32_t iSector)
5757
o2_gpu_add_kernel("GPUTPCGMMergerSectorRefit" "GPUTPCGMMergerGPU TPCMERGER MATLUT" LB int32_t iSector)

0 commit comments

Comments
 (0)