Skip to content

Commit cae1da0

Browse files
committed
GPU TPC: Slighly augment dynamic buffer for TPC Decoding
1 parent bda2612 commit cae1da0

5 files changed

Lines changed: 2925 additions & 5 deletions

File tree

Lines changed: 337 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,337 @@
1+
{
2+
"GPU_rec_tpc": {
3+
"rejectQPtB5": "20",
4+
"hitPickUpFactor": "1",
5+
"hitSearchArea2": "2",
6+
"neighboursSearchArea": "3",
7+
"clusterError2CorrectionY": "1",
8+
"clusterError2CorrectionZ": "1",
9+
"clusterError2AdditionalY": "0",
10+
"clusterError2AdditionalZ": "0",
11+
"clusterError2AdditionalYSeeding": "0.1",
12+
"clusterError2AdditionalZSeeding": "0.15",
13+
"clusterRejectChi2TolleranceY": "1",
14+
"clusterRejectChi2TolleranceZ": "1",
15+
"clusterErrorOccupancyScaler": "0.000995",
16+
"clusterErrorChargeScaler": "20",
17+
"sysClusErrorNormIFCCE": "1",
18+
"sysClusErrorSlopeIFCCE": "0.2",
19+
"sysClusErrorIFCCEZRegion": "-5",
20+
"sysClusErrorslopeIFCCEZ": "0.5",
21+
"sysClusErrorNormIFC": "0.5",
22+
"sysClusErrorSlopeIFC": "0.2",
23+
"sysClusErrorMinDist": "1.5",
24+
"sysClusErrorMaskError": "5",
25+
"sysClusErrorC12Norm": "5.33333e-06",
26+
"sysClusErrorC12Box": "1.1e-05",
27+
"minNClustersTrackSeed": "-1",
28+
"minNClustersFinalTrack": "-1",
29+
"searchWindowDZDR": "2.5",
30+
"trackReferenceX": "1000",
31+
"zsThreshold": "2",
32+
"tubeProtectSigma2": "16",
33+
"tubeProtectMaxSize2": "4",
34+
"tubeProtectMinSize2": "0.25",
35+
"tubeRemoveSigma2": "1.5625",
36+
"tubeRemoveMaxSize2": "6.25",
37+
"tubeExtraProtectMinOccupancy": "1500",
38+
"clustersShiftTimebins": "0",
39+
"clustersShiftTimebinsClusterizer": "0",
40+
"clustersEdgeFixDistance": "0",
41+
"defaultZOffsetOverR": "0.521095",
42+
"PID_EKrangeMin": "0.47",
43+
"PID_EKrangeMax": "0.57",
44+
"PID_EPrangeMin": "0.93",
45+
"PID_EPrangeMax": "1.03",
46+
"PID_EDrangeMin": "1.88",
47+
"PID_EDrangeMax": "1.98",
48+
"PID_ETrangeMin": "2.84",
49+
"PID_ETrangeMax": "2.94",
50+
"PID_sigma": "0.06",
51+
"minTrackdEdxMax": "20",
52+
"minTrackdEdxMax2Tot": "0.67",
53+
"extraClusterErrorEdgeY2": "0.35",
54+
"extraClusterErrorEdgeZ2": "0.15",
55+
"extraClusterErrorSingleY2": "0.04",
56+
"extraClusterErrorSingleZ2": "0.04",
57+
"extraClusterErrorSplitPadSharedSingleY2": "0.03",
58+
"extraClusterErrorFactorSplitPadSharedSingleY2": "3",
59+
"extraClusterErrorSplitTimeSharedSingleZ2": "0.03",
60+
"extraClusterErrorFactorSplitTimeSharedSingleZ2": "3",
61+
"errorsCECrossing[0]": "0",
62+
"errorsCECrossing[1]": "0",
63+
"errorsCECrossing[2]": "0",
64+
"errorsCECrossing[3]": "0",
65+
"errorsCECrossing[4]": "0",
66+
"extrapolationTrackingYRangeUpper": "0.85",
67+
"extrapolationTrackingYRangeLower": "0.85",
68+
"trackFollowingYFactor": "4",
69+
"trackMergerFactor2YS": "2.25",
70+
"trackMergerFactor2ZT": "2.25",
71+
"trackMergerFactor2K": "4",
72+
"trackMergerFactor2General": "12.25",
73+
"rejectEdgeClustersMargin": "0",
74+
"rejectEdgeClustersSigmaMargin": "0",
75+
"trackletMaxSharedFraction": "0.1",
76+
"trackletMinSharedNormFactor": "0",
77+
"maxTimeBinAboveThresholdIn1000Bin": "500",
78+
"maxConsecTimeBinAboveThreshold": "200",
79+
"noisyPadSaturationThreshold": "700",
80+
"occupancyMapTimeBins": "16",
81+
"occupancyMapTimeBinsAverage": "0",
82+
"trackFitCovLimit": "1000",
83+
"addErrorsCECrossing": "0",
84+
"trackMergerMinPartHits": "10",
85+
"trackMergerMinTotalHits": "20",
86+
"mergerCERowLimit": "5",
87+
"mergerLooperQPtB5Limit": "4",
88+
"mergerLooperSecondHorizontalQPtB5Limit": "2",
89+
"trackFollowingMaxRowGap": "4",
90+
"trackFollowingMaxRowGapSeed": "2",
91+
"trackFitMaxRowMissedHard": "10",
92+
"extrapolationTrackingRowRange": "45",
93+
"extrapolationTrackingMinRows": "10",
94+
"extrapolationTrackingMinHits": "8",
95+
"noisyPadsQuickCheck": "1",
96+
"cfQMaxCutoff": "3",
97+
"cfQTotCutoff": "5",
98+
"cfQMaxCutoffSingleTime": "0",
99+
"cfQMaxCutoffSinglePad": "0",
100+
"cfInnerThreshold": "0",
101+
"cfMinSplitNum": "1",
102+
"cfNoiseSuppressionEpsilon": "10",
103+
"cfNoiseSuppressionEpsilonRelative": "76",
104+
"cfEdgeTwoPads": "0",
105+
"nWays": "3",
106+
"trackFitRejectMode": "5",
107+
"rejectIFCLowRadiusCluster": "1",
108+
"dEdxTruncLow": "2",
109+
"dEdxTruncHigh": "77",
110+
"extrapolationTracking": "1",
111+
"disableRefitAttachment": "0",
112+
"rejectionStrategy": "1",
113+
"mergeLoopersAfterburner": "1",
114+
"compressionTypeMask": "7",
115+
"compressionSortOrder": "0",
116+
"sigBitsCharge": "4",
117+
"sigBitsWidth": "3",
118+
"dropLoopers": "0",
119+
"mergerInterpolateErrors": "1",
120+
"mergerInterpolateRejectAlsoOnCurrentPosition": "1",
121+
"mergerNonInterpolateRejectMinNDF": "5",
122+
"mergeCE": "1",
123+
"retryRefit": "1",
124+
"enablePID": "1",
125+
"PID_useNsigma": "1",
126+
"adddEdxSubThresholdClusters": "1",
127+
"dEdxClusterRejectionFlagMask": "4",
128+
"dEdxClusterRejectionFlagMaskAlt": "4",
129+
"rejectEdgeClustersInSeeding": "0",
130+
"rejectEdgeClustersInTrackFit": "0",
131+
"tubeExtraProtectMinRow": "20",
132+
"tubeExtraProtectEdgePads": "2",
133+
"PID_remap[0]": "0",
134+
"PID_remap[1]": "1",
135+
"PID_remap[2]": "2",
136+
"PID_remap[3]": "3",
137+
"PID_remap[4]": "4",
138+
"PID_remap[5]": "5",
139+
"PID_remap[6]": "6",
140+
"PID_remap[7]": "7",
141+
"PID_remap[8]": "8"
142+
},
143+
"GPU_rec": {
144+
"maxTrackQPtB5": "100",
145+
"fwdTPCDigitsAsClusters": "0",
146+
"bz0Pt10MeV": "60",
147+
"fitInProjections": "-1",
148+
"fitPropagateBzOnly": "-1",
149+
"useMatLUT": "0",
150+
"trackingRefitGPUModel": "1",
151+
"enableCyclicGraphWorkarounds": "0"
152+
},
153+
"GPU_proc_param": {
154+
"tpcErrorParamY[0]": "0.06",
155+
"tpcErrorParamY[1]": "0.24",
156+
"tpcErrorParamY[2]": "0.12",
157+
"tpcErrorParamY[3]": "0.1",
158+
"tpcErrorParamZ[0]": "0.06",
159+
"tpcErrorParamZ[1]": "0.24",
160+
"tpcErrorParamZ[2]": "0.15",
161+
"tpcErrorParamZ[3]": "0.1",
162+
"tpcTriggerHandling": "true"
163+
},
164+
"GPU_proc": {
165+
"deviceNum": "-2",
166+
"gpuDeviceOnly": "false",
167+
"globalInitMutex": "false",
168+
"trdNCandidates": "3",
169+
"trdTrackModelO2": "false",
170+
"debugLevel": "0",
171+
"allocDebugLevel": "0",
172+
"debugMask": "262143",
173+
"debugLogSuffix": "",
174+
"debugFileHexFloat": "-1",
175+
"debugCSV": "",
176+
"debugMarkdown": "false",
177+
"serializeGPU": "0",
178+
"recoTaskTiming": "false",
179+
"deterministicGPUReconstruction": "-1",
180+
"showOutputStat": "false",
181+
"runCompressionStatistics": "false",
182+
"resetTimers": "1",
183+
"deviceTimers": "true",
184+
"keepAllMemory": "false",
185+
"keepDisplayMemory": "false",
186+
"disableMemoryReuse": "false",
187+
"memoryAllocationStrategy": "0",
188+
"forceMemoryPoolSize": "1",
189+
"forceHostMemoryPoolSize": "0",
190+
"memoryScalingFactor": "1",
191+
"memoryScalingFuzz": "0",
192+
"tpcInputWithClusterRejection": "0",
193+
"forceMaxMemScalers": "0",
194+
"registerStandaloneInputMemory": "false",
195+
"nHostThreads": "-1",
196+
"ompThreads": "-1",
197+
"inKernelParallel": "2",
198+
"autoAdjustHostThreads": "true",
199+
"nStreams": "8",
200+
"nTPCClustererLanes": "-1",
201+
"overrideClusterizerFragmentLen": "-1",
202+
"delayedOutput": "true",
203+
"mergerSortTracks": "-1",
204+
"alternateBorderSort": "-1",
205+
"tpcCompressionGatherMode": "-1",
206+
"tpcCompressionGatherModeKernel": "-1",
207+
"tpccfGatherKernel": "true",
208+
"doublePipeline": "false",
209+
"doublePipelineClusterizer": "true",
210+
"prefetchTPCpageScan": "0",
211+
"runMC": "false",
212+
"runQA": "0",
213+
"qcRunFraction": "100",
214+
"outputSharedClusterMap": "false",
215+
"disableTPCNoisyPadFilter": "false",
216+
"createO2Output": "2",
217+
"clearO2OutputFromGPU": "false",
218+
"ignoreNonFatalGPUErrors": "false",
219+
"tpcIncreasedMinClustersPerRow": "0",
220+
"noGPUMemoryRegistration": "false",
221+
"o2PropagatorUseGPUField": "true",
222+
"willProvideO2PropagatorLate": "false",
223+
"calibObjectsExtraMemorySize": "10485760",
224+
"fastTransformObjectsMinMemorySize": "419430400",
225+
"lateO2MatLutProvisioningSize": "0",
226+
"throttleAlarms": "false",
227+
"outputSanityCheck": "false",
228+
"mergerSanityCheck": "false",
229+
"clusterizerZSSanityCheck": "false",
230+
"allSanityChecks": "false",
231+
"tpcSingleSector": "-1",
232+
"tpcDownscaledEdx": "0",
233+
"tpcMaxAttachedClustersPerSectorRow": "51000",
234+
"tpcUseOldCPUDecoding": "false",
235+
"tpcApplyCFCutsAtDecoding": "false",
236+
"tpcApplyClusterFilterOnCPU": "0",
237+
"tpcWriteClustersAfterRejection": "false",
238+
"oclPlatformNum": "-1",
239+
"oclCompileFromSources": "false",
240+
"oclOverrideSourceBuildFlags": "",
241+
"hipOverrideAMDEUSperCU": "-1",
242+
"printSettings": "false",
243+
"tpcFreeAllocatedMemoryAfterProcessing": "false",
244+
"debugOnFailure": "0",
245+
"debugOnFailureSignalMask": "4294967295",
246+
"debugOnFailureErrorMask": "18446744073709551615",
247+
"debugOnFailureNoForwardSignal": "false",
248+
"debugOnFailureMaxN": "1",
249+
"debugOnFailureMaxFiles": "0",
250+
"debugOnFailureMaxSize": "0",
251+
"debugOnFailureDirectory": ".",
252+
"memoryStat": "false"
253+
},
254+
"GPU_global": {
255+
"solenoidBzNominalGPU": "-1e+06",
256+
"constBz": "false",
257+
"setMaxTimeBin": "-2",
258+
"overrideNHbfPerTF": "0",
259+
"overrideTPCTimeBinCur": "0",
260+
"deviceType": "CPU",
261+
"forceDeviceType": "true",
262+
"synchronousProcessing": "false",
263+
"dump": "0",
264+
"dumpFirst": "0",
265+
"dumpLast": "-1",
266+
"dumpFolder": "",
267+
"display": "false",
268+
"rundEdx": "-1",
269+
"dEdxSplineTopologyCorrFile": "",
270+
"dEdxCorrFile": "",
271+
"dEdxPolTopologyCorrFile": "",
272+
"dEdxDisableTopologyPol": "false",
273+
"dEdxDisableThresholdMap": "false",
274+
"dEdxDisableGainMap": "false",
275+
"dEdxDisableResidualGainMap": "false",
276+
"dEdxDisableResidualGain": "false",
277+
"dEdxUseFullGainMap": "false",
278+
"transformationFile": "",
279+
"transformationSCFile": "",
280+
"matLUTFile": "",
281+
"gainCalibFile": "",
282+
"gainCalibDisableCCDB": "false",
283+
"thresholdCalibFile": "",
284+
"allocateOutputOnTheFly": "true",
285+
"outputBufferSize": "200000000",
286+
"mutexMemReg": "false",
287+
"printSettings": "0",
288+
"gpuDisplayfilterMacro": "",
289+
"benchmarkMemoryRegistration": "false",
290+
"registerSelectedSegmentIds": "-1",
291+
"disableCalibUpdates": "false",
292+
"partialOutputForNonFatalErrors": "false",
293+
"checkFirstTfOrbit": "false",
294+
"tpcTriggeredMode": "false",
295+
"zsOnTheFlyDigitsFilter": "false",
296+
"dumpBadTFs": "0",
297+
"dumpBadTFMode": "0"
298+
},
299+
"trackTuneParams": {
300+
"tpcCovInnerType": "Disable",
301+
"tpcCovOuterType": "Disable",
302+
"sourceLevelTPC": "true",
303+
"applyWhenReading": "false",
304+
"useTPCInnerCorr": "false",
305+
"useTPCOuterCorr": "false",
306+
"tpcParInner[0]": "0",
307+
"tpcParInner[1]": "0",
308+
"tpcParInner[2]": "0",
309+
"tpcParInner[3]": "0",
310+
"tpcParInner[4]": "0",
311+
"tpcParOuter[0]": "0",
312+
"tpcParOuter[1]": "0",
313+
"tpcParOuter[2]": "0",
314+
"tpcParOuter[3]": "0",
315+
"tpcParOuter[4]": "0",
316+
"tpcCovInner[0]": "0",
317+
"tpcCovInner[1]": "0",
318+
"tpcCovInner[2]": "0",
319+
"tpcCovInner[3]": "0",
320+
"tpcCovInner[4]": "0",
321+
"tpcCovOuter[0]": "0",
322+
"tpcCovOuter[1]": "0",
323+
"tpcCovOuter[2]": "0",
324+
"tpcCovOuter[3]": "0",
325+
"tpcCovOuter[4]": "0",
326+
"tpcCovInnerSlope[0]": "0",
327+
"tpcCovInnerSlope[1]": "0",
328+
"tpcCovInnerSlope[2]": "0",
329+
"tpcCovInnerSlope[3]": "0",
330+
"tpcCovInnerSlope[4]": "0",
331+
"tpcCovOuterSlope[0]": "0",
332+
"tpcCovOuterSlope[1]": "0",
333+
"tpcCovOuterSlope[2]": "0",
334+
"tpcCovOuterSlope[3]": "0",
335+
"tpcCovOuterSlope[4]": "0"
336+
}
337+
}

GPU/GPUTracking/DataCompression/GPUTPCDecompression.cxx

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -118,9 +118,10 @@ void GPUTPCDecompression::RegisterMemoryAllocation()
118118
void GPUTPCDecompression::SetMaxData(const GPUTrackingInOutPointers& io)
119119
{
120120
uint32_t maxAttachedClsMargin1 = *std::max_element(mInputGPU.nSliceRowClusters, mInputGPU.nSliceRowClusters + mInputGPU.nSliceRows);
121-
float clsRatio1 = (mInputGPU.nUnattachedClusters > 0 ? float(mInputGPU.nAttachedClusters) / float(mInputGPU.nUnattachedClusters) : 1.0f) * 1.5f;
121+
float clsRatio1 = (mInputGPU.nUnattachedClusters > 0 ? float(mInputGPU.nAttachedClusters) / float(mInputGPU.nUnattachedClusters) : 1.0f) * mRec->MemoryScalers()->tpcDecodingClusterRatioFactor1;
122122
maxAttachedClsMargin1 *= clsRatio1;
123-
uint32_t maxAttachedClsMargin2 = mInputGPU.nSliceRows > 0 ? (mInputGPU.nAttachedClusters / mInputGPU.nSliceRows * 3.5) : 0; // mean #attached cls per SectorRow multiplied by 3.5 (tuned)
124-
mMaxNativeClustersPerBuffer = std::max({maxAttachedClsMargin1, maxAttachedClsMargin2, 1000u}); // take biggest margin, 1000 clusters minimum
125-
mMaxNativeClustersPerBuffer = std::min(mMaxNativeClustersPerBuffer, mRec->GetProcessingSettings().tpcMaxAttachedClustersPerSectorRow); // upperbound given by configurable param
123+
uint32_t maxAttachedClsMargin2 = mInputGPU.nSliceRows > 0 ? (mInputGPU.nAttachedClusters / mInputGPU.nSliceRows * mRec->MemoryScalers()->tpcDecodingClusterRatioFactor2) : 0; // mean #attached cls per SectorRow multiplied by 3.5 (tuned)
124+
mMaxNativeClustersPerBuffer = std::max({maxAttachedClsMargin1, maxAttachedClsMargin2, 1000u}); // take biggest margin, 1000 clusters minimum
125+
mMaxNativeClustersPerBuffer = std::min(mMaxNativeClustersPerBuffer, mRec->GetProcessingSettings().tpcMaxAttachedClustersPerSectorRow); // upperbound given by configurable param
126+
mMaxNativeClustersPerBuffer += mRec->MemoryScalers()->tpcDecodingSafetyBuffer;
126127
}

GPU/GPUTracking/Definitions/GPUSettingsList.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,7 @@ AddOption(nnCCDBInteractionRate, std::string, "500", "", 0, "Distinguishes betwe
297297
AddHelp("help", 'h')
298298
EndConfig()
299299

300-
// Settings steering the processing of NN Clusterization
300+
// Scaling factors for gpu buffer size estimation
301301
BeginSubConfig(GPUSettingsProcessingScaling, scaling, configStandalone.proc, "SCALING", 0, "Processing settings for neural network clusterizer", proc_scaling)
302302
AddOption(offset, float, 1000., "", 0, "Scaling Factor: offset")
303303
AddOption(hitOffset, float, 20000, "", 0, "Scaling Factor: hitOffset")
@@ -315,6 +315,9 @@ AddOption(tpcMergedTrackPerSectorTrack, float, 1.0, "", 0, "Scaling Factor: tpcM
315315
AddOption(tpcMergedTrackHitPerSectorHit, float, 1.1, "", 0, "Scaling Factor: tpcMergedTrackHitPerSectorHit")
316316
AddOptionArray(tpcCompressedUnattachedHitsBase1024, int32_t, 3, (900, 900, 500), "", 0, "Scaling Factor: tpcCompressedUnattachedHitsBase1024")
317317
AddOption(conservativeMemoryEstimate, bool, false, "", 0, "Use some more conservative defaults for larger buffers during TPC processing")
318+
AddOption(tpcDecodingClusterRatioFactor1, float, 1.5, "", 0, "Scaling Factor: for first margin of dynamic buffer allocation for attached clusters in TPC Decoding")
319+
AddOption(tpcDecodingClusterRatioFactor2, float, 3.5, "", 0, "Scaling Factor: for second margin of dynamic buffer allocation for attached clusters in TPC Decoding")
320+
AddOption(tpcDecodingSafetyBuffer, uint16_t, 1000, "", 0, "Scaling Factor: safety cluster buffer to add to dynamic buffer allocation for attached clusters in TPC Decoding")
318321
AddHelp("help", 'h')
319322
EndConfig()
320323

ctf_read_ntf.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
1

0 commit comments

Comments
 (0)