Skip to content

Commit 6975c75

Browse files
committed
GPU: Add tuned parameters for NVIDIA Blackwell
1 parent 5fd3f95 commit 6975c75

2 files changed

Lines changed: 119 additions & 115 deletions

File tree

Lines changed: 113 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -1,113 +1,113 @@
1-
Architecture,default,default_cpu,MI100,VEGA,TAHITI,TESLA,FERMI,PASCAL,KEPLER,AMPERE,TURING,ADA,OPENCL,RDNA,MI210
2-
,,,,,,,,,,,,,,,
3-
CORE:,,,,,,,,,,,,,,,
4-
WARP_SIZE,0,,64,64,32,32,32,32,32,32,32,32,32,32,64
5-
THREAD_COUNT_DEFAULT,256,,256,256,,,,,,512,512,512,256,512,512
6-
,,,,,,,,,,,,,,,
7-
LB:,,,,,,,,,,,,,,,
8-
GPUTPCCreateTrackingData,256,,"[256, 7]","[192, 2]",,,,,,384,256,256,,,
9-
GPUTPCTrackletConstructor,256,,"[768, 8]","[512, 10]","[256, 2]","[256, 1]","[256, 2]","[1024, 2]","[512, 4]","[256, 2]","[256, 2]","[256, 2]",,,
10-
GPUTPCTrackletSelector,256,,"[384, 5]","[192, 10]","[256, 3]","[256, 1]","[256, 3]","[512, 4]","[256, 3]","[192, 3]","[192, 3]","[192, 3]",,,
11-
GPUTPCNeighboursFinder,256,,"[192, 8]","[960, 8]",256,256,256,512,256,"[640, 1]","[640, 1]","[640, 1]",,,
12-
GPUTPCNeighboursCleaner,256,,"[128, 5]","[384, 9]",256,256,256,256,256,512,512,512,,,
13-
GPUTPCExtrapolationTracking,256,,"[256, 7]","[256, 2]",,,,,,"[128, 4]","[192, 2]","[192, 2]",,,
14-
GPUTRDTrackerKernels_gpuVersion,512,,,,,,,,,,,,,,
15-
GPUTPCCreateOccupancyMap_fill,256,,,,,,,,,,,,,,
16-
GPUTPCCreateOccupancyMap_fold,256,,,,,,,,,,,,,,
17-
GPUTRDTrackerKernels_o2Version,512,,,,,,,,,,,,,,
18-
GPUTPCCompressionKernels_step0attached,256,,"[128, 1]","[64, 2]",,,,,,"[64, 2]",128,128,,,
19-
GPUTPCCompressionKernels_step1unattached,256,,"[512, 2]","[512, 2]",,,,,,"[512, 3]","[512, 2]","[512, 2]",,,
20-
GPUTPCDecompressionKernels_step0attached,256,,"[128, 2]","[128, 2]",,,,,,"[32, 1]","[32, 1]","[32, 1]",,,
21-
GPUTPCDecompressionKernels_step1unattached,256,,"[64, 2]","[64, 2]",,,,,,"[32, 1]","[32, 1]","[32, 1]",,,
22-
GPUTPCDecompressionUtilKernels_sortPerSectorRow,256,,,,,,,,,,,,,,
23-
GPUTPCDecompressionUtilKernels_countFilteredClusters,256,,,,,,,,,,,,,,
24-
GPUTPCDecompressionUtilKernels_storeFilteredClusters,256,,,,,,,,,,,,,,
25-
GPUTPCCFDecodeZS,"[128, 4]",,"[64, 4]","[64, 1]",,,,,,"[64, 10]","[64, 8]","[64, 8]",,,
26-
GPUTPCCFDecodeZSLink,"""GPUCA_WARP_SIZE""",,"""GPUCA_WARP_SIZE""","""GPUCA_WARP_SIZE""",,,,,,"""GPUCA_WARP_SIZE""","""GPUCA_WARP_SIZE""","""GPUCA_WARP_SIZE""",,,
27-
GPUTPCCFDecodeZSDenseLink,"""GPUCA_WARP_SIZE""",,"[""GPUCA_WARP_SIZE"", 4]","[""GPUCA_WARP_SIZE"", 14]",,,,,,"""GPUCA_WARP_SIZE""","""GPUCA_WARP_SIZE""","""GPUCA_WARP_SIZE""",,,
28-
GPUTPCCFGather,"[1024, 1]",,"[1024, 5]","[1024, 1]",,,,,,"[1024, 1]","[1024, 1]","[1024, 1]",,,
29-
COMPRESSION_GATHER,1024,,1024,1024,,,,,,1024,1024,1024,,,
30-
GPUTPCGMMergerTrackFit,256,,"[192, 2]","[64, 7]",,,,,,"[64, 4]","[32, 8]","[32, 8]",,,
31-
GPUTPCGMMergerFollowLoopers,256,,"[256, 5]","[256, 4]",,,,,,"[64, 12]","[128, 4]","[128, 4]",,,
32-
GPUTPCGMMergerSectorRefit,256,,"[64, 4]","[256, 2]",,,,,,"[32, 6]","[64, 5]","[64, 5]",,,
33-
GPUTPCGMMergerUnpackResetIds,256,,256,256,,,,,,256,256,256,,,
34-
GPUTPCGMMergerUnpackGlobal,256,,256,256,,,,,,256,256,256,,,
35-
GPUTPCGMMergerResolve_step0,256,,512,256,,,,,,256,256,256,,,
36-
GPUTPCGMMergerResolve_step1,256,,512,256,,,,,,256,256,256,,,
37-
GPUTPCGMMergerResolve_step2,256,,512,256,,,,,,256,256,256,,,
38-
GPUTPCGMMergerResolve_step3,256,,512,256,,,,,,256,256,256,,,
39-
GPUTPCGMMergerResolve_step4,256,,512,256,,,,,,"[256, 4]","[256, 4]","[256, 4]",,,
40-
GPUTPCGMMergerClearLinks,256,,256,256,,,,,,256,256,256,,,
41-
GPUTPCGMMergerMergeWithinPrepare,256,,256,256,,,,,,256,256,256,,,
42-
GPUTPCGMMergerMergeSectorsPrepare,256,,256,256,,,,,,"[256, 2]","[256, 2]","[256, 2]",,,
43-
GPUTPCGMMergerMergeBorders_step0,256,,512,256,,,,,,192,192,192,,,
44-
GPUTPCGMMergerMergeBorders_step2,256,,512,256,,,,,,"[64, 2]",256,256,,,
45-
GPUTPCGMMergerMergeCE,256,,512,256,,,,,,256,256,256,,,
46-
GPUTPCGMMergerLinkExtrapolatedTracks,256,,256,256,,,,,,256,256,256,,,
47-
GPUTPCGMMergerCollect,256,,"[768, 1]","[1024, 1]",,,,,,"[256, 2]","[128, 2]","[128, 2]",,,
48-
GPUTPCGMMergerSortTracksPrepare,256,,256,256,,,,,,256,256,256,,,
49-
GPUTPCGMMergerPrepareForFit_step0,256,,256,256,,,,,,256,256,256,,,
50-
GPUTPCGMMergerPrepareForFit_step1,256,,256,256,,,,,,256,256,256,,,
51-
GPUTPCGMMergerPrepareForFit_step2,256,,256,256,,,,,,256,256,256,,,
52-
GPUTPCGMMergerFinalize_step0,256,,,256,,,,,,,,,,,
53-
GPUTPCGMMergerFinalize_step1,256,,,256,,,,,,,,,,,
54-
GPUTPCGMMergerFinalize_step2,256,,,256,,,,,,,,,,,
55-
GPUTPCGMMergerMergeLoopers_step0,256,,,,,,,,,,,,,,
56-
GPUTPCGMMergerMergeLoopers_step1,256,,,,,,,,,,,,,,
57-
GPUTPCGMMergerMergeLoopers_step2,256,,,,,,,,,,,,,,
58-
GPUTPCGMO2Output_prepare,256,,,,,,,,,,,,,,
59-
GPUTPCGMO2Output_output,256,,,,,,,,,,,,,,
60-
GPUTPCStartHitsFinder,256,,"[1024, 2]","[1024, 7]",256,256,256,256,256,512,512,512,,,
61-
GPUTPCStartHitsSorter,256,,"[1024, 5]","[512, 7]",256,256,256,256,256,"[512, 1]","[512, 1]","[512, 1]",,,
62-
GPUTPCCFCheckPadBaseline,576,,"[576, 2]","[576, 2]",,,,,,"[576, 2]",,,,,
63-
GPUTPCCFChargeMapFiller_fillIndexMap,512,,512,512,,,,,,448,,,,,
64-
GPUTPCCFChargeMapFiller_fillFromDigits,512,,512,512,,,,,,448,,,,,
65-
GPUTPCCFChargeMapFiller_findFragmentStart,512,,512,512,,,,,,448,,,,,
66-
GPUTPCCFPeakFinder,512,,"[512, 9]","[512, 4]",,,,,,128,,,,,
67-
GPUTPCCFNoiseSuppression,512,,512,512,,,,,,448,,,,,
68-
GPUTPCCFDeconvolution,512,,"[512, 5]","[512, 5]",,,,,,384,,,,,
69-
GPUTPCCFClusterizer,512,,"[448, 3]","[512, 2]",,,,,,448,,,,,
70-
GPUTPCNNClusterizerKernels,512,,,,,,,,,,,,,,
71-
GPUTrackingRefitKernel_mode0asGPU,256,,,,,,,,,,,,,,
72-
GPUTrackingRefitKernel_mode1asTrackParCov,256,,,,,,,,,,,,,,
73-
GPUMemClean16,"[""GPUCA_THREAD_COUNT_DEFAULT"", 1]",,,,,,,,,,,,,,
74-
GPUitoa,"[""GPUCA_THREAD_COUNT_DEFAULT"", 1]",,,,,,,,,,,,,,
75-
GPUTPCCFNoiseSuppression_noiseSuppression,"""GPUCA_LB_GPUTPCCFNoiseSuppression""",,,,,,,,,,,,,,
76-
GPUTPCCFNoiseSuppression_updatePeaks,"""GPUCA_LB_GPUTPCCFNoiseSuppression""",,,,,,,,,,,,,,
77-
GPUTPCNNClusterizerKernels_runCfClusterizer,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,
78-
GPUTPCNNClusterizerKernels_fillInputNNCPU,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,
79-
GPUTPCNNClusterizerKernels_fillInputNNGPU,1024,,,,,,,,,,,,,,
80-
GPUTPCNNClusterizerKernels_determineClass1Labels,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,
81-
GPUTPCNNClusterizerKernels_determineClass2Labels,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,
82-
GPUTPCNNClusterizerKernels_publishClass1Regression,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,
83-
GPUTPCNNClusterizerKernels_publishClass2Regression,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,
84-
GPUTPCNNClusterizerKernels_publishDeconvolutionFlags,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,
85-
GPUTPCCFStreamCompaction_scanStart,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,,,,,
86-
GPUTPCCFStreamCompaction_scanUp,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,,,,,
87-
GPUTPCCFStreamCompaction_scanTop,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,,,,,
88-
GPUTPCCFStreamCompaction_scanDown,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,,,,,
89-
GPUTPCCFStreamCompaction_compactDigits,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,,,,,
90-
GPUTPCCompressionGatherKernels_unbuffered,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,,,,,,
91-
GPUTPCCompressionGatherKernels_buffered32,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,,,,,,
92-
GPUTPCCompressionGatherKernels_buffered64,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,,,,,,
93-
GPUTPCCompressionGatherKernels_buffered128,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,,,,,,
94-
GPUTPCCompressionGatherKernels_multiBlock,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,,,,,,
95-
GPUTPCGMMergerFinalize_0,256,,256,,,,,,,256,256,256,,,
96-
GPUTPCGMMergerFinalize_1,256,,256,,,,,,,256,256,256,,,
97-
GPUTPCGMMergerFinalize_2,256,,256,,,,,,,256,256,256,,,
98-
,,,,,,,,,,,,,,,
99-
PAR:,,,,,,,,,,,,,,,
100-
AMD_EUS_PER_CU,0,0,4,4,,,,,,,,,,,
101-
SORT_STARTHITS,1,0,,,,,,,,,,,,,
102-
NEIGHBOURS_FINDER_MAX_NNEIGHUP,6,0,10,4,,,,,,4,4,4,,,
103-
NEIGHBOURS_FINDER_UNROLL_GLOBAL,4,0,4,2,,,,,,,,,,,
104-
NEIGHBOURS_FINDER_UNROLL_SHARED,1,0,0,0,,,,,,,,,,,
105-
TRACKLET_SELECTOR_HITS_REG_SIZE,12,0,9,27,,,,,,20,20,20,,,
106-
ALTERNATE_BORDER_SORT,0,0,1,1,,,,,,1,1,1,,,
107-
SORT_BEFORE_FIT,0,0,1,1,,,,,,1,1,1,,,
108-
NO_ATOMIC_PRECHECK,0,0,1,1,,,,,,1,1,1,,,
109-
DEDX_STORAGE_TYPE,"""float""","""float""","""uint16_t""","""uint16_t""",,,,,,"""uint16_t""","""uint16_t""","""uint16_t""",,,
110-
MERGER_INTERPOLATION_ERROR_TYPE,"""float""","""float""","""half""","""half""",,,,,,"""half""","""half""","""half""",,,
111-
COMP_GATHER_KERNEL,0,0,4,4,,,,,,4,4,4,,,
112-
COMP_GATHER_MODE,2,0,3,3,,,,,,3,3,3,,,
113-
CF_SCAN_WORKGROUP_SIZE,512,0,,,,,,,,,,,,,
1+
Architecture,default,default_cpu,MI100,VEGA,TAHITI,TESLA,FERMI,PASCAL,KEPLER,AMPERE,TURING,ADA,OPENCL,RDNA,MI210,BLACKWELL
2+
,,,,,,,,,,,,,,,,
3+
CORE:,,,,,,,,,,,,,,,,
4+
WARP_SIZE,0,,64,64,32,32,32,32,32,32,32,32,32,32,64,32
5+
THREAD_COUNT_DEFAULT,256,,256,256,,,,,,512,512,512,256,512,512,512
6+
,,,,,,,,,,,,,,,,
7+
LB:,,,,,,,,,,,,,,,,
8+
GPUTPCCreateTrackingData,256,,"[256, 7]","[192, 2]",,,,,,384,256,256,,,,384
9+
GPUTPCTrackletConstructor,256,,"[768, 8]","[512, 10]","[256, 2]","[256, 1]","[256, 2]","[1024, 2]","[512, 4]","[256, 2]","[256, 2]","[256, 2]",,,,768
10+
GPUTPCTrackletSelector,256,,"[384, 5]","[192, 10]","[256, 3]","[256, 1]","[256, 3]","[512, 4]","[256, 3]","[192, 3]","[192, 3]","[192, 3]",,,,992
11+
GPUTPCNeighboursFinder,256,,"[192, 8]","[960, 8]",256,256,256,512,256,"[640, 1]","[640, 1]","[640, 1]",,,,992
12+
GPUTPCNeighboursCleaner,256,,"[128, 5]","[384, 9]",256,256,256,256,256,512,512,512,,,,672
13+
GPUTPCExtrapolationTracking,256,,"[256, 7]","[256, 2]",,,,,,"[128, 4]","[192, 2]","[192, 2]",,,,896
14+
GPUTRDTrackerKernels_gpuVersion,512,,,,,,,,,,,,,,,
15+
GPUTPCCreateOccupancyMap_fill,256,,,,,,,,,,,,,,,
16+
GPUTPCCreateOccupancyMap_fold,256,,,,,,,,,,,,,,,
17+
GPUTRDTrackerKernels_o2Version,512,,,,,,,,,,,,,,,
18+
GPUTPCCompressionKernels_step0attached,256,,"[128, 1]","[64, 2]",,,,,,"[64, 2]",128,128,,,,"[96, 3]"
19+
GPUTPCCompressionKernels_step1unattached,256,,"[512, 2]","[512, 2]",,,,,,"[512, 3]","[512, 2]","[512, 2]",,,,"[512, 2]"
20+
GPUTPCDecompressionKernels_step0attached,256,,"[128, 2]","[128, 2]",,,,,,"[32, 1]","[32, 1]","[32, 1]",,,,"[32, 1]"
21+
GPUTPCDecompressionKernels_step1unattached,256,,"[64, 2]","[64, 2]",,,,,,"[32, 1]","[32, 1]","[32, 1]",,,,"[32, 1]"
22+
GPUTPCDecompressionUtilKernels_sortPerSectorRow,256,,,,,,,,,,,,,,,
23+
GPUTPCDecompressionUtilKernels_countFilteredClusters,256,,,,,,,,,,,,,,,
24+
GPUTPCDecompressionUtilKernels_storeFilteredClusters,256,,,,,,,,,,,,,,,
25+
GPUTPCCFDecodeZS,"[128, 4]",,"[64, 4]","[64, 1]",,,,,,"[64, 10]","[64, 8]","[64, 8]",,,,"[64, 10]"
26+
GPUTPCCFDecodeZSLink,"""GPUCA_WARP_SIZE""",,"""GPUCA_WARP_SIZE""","""GPUCA_WARP_SIZE""",,,,,,"""GPUCA_WARP_SIZE""","""GPUCA_WARP_SIZE""","""GPUCA_WARP_SIZE""",,,,"""GPUCA_WARP_SIZE"""
27+
GPUTPCCFDecodeZSDenseLink,"""GPUCA_WARP_SIZE""",,"[""GPUCA_WARP_SIZE"", 4]","[""GPUCA_WARP_SIZE"", 14]",,,,,,"""GPUCA_WARP_SIZE""","""GPUCA_WARP_SIZE""","""GPUCA_WARP_SIZE""",,,,"[""GPUCA_WARP_SIZE"", 8]"
28+
GPUTPCCFGather,"[1024, 1]",,"[1024, 5]","[1024, 1]",,,,,,"[1024, 1]","[1024, 1]","[1024, 1]",,,,"[1024, 1]"
29+
COMPRESSION_GATHER,1024,,1024,1024,,,,,,1024,1024,1024,,,,
30+
GPUTPCGMMergerTrackFit,256,,"[192, 2]","[64, 7]",,,,,,"[64, 4]","[32, 8]","[32, 8]",,,,"[64, 8]"
31+
GPUTPCGMMergerFollowLoopers,256,,"[256, 5]","[256, 4]",,,,,,"[64, 12]","[128, 4]","[128, 4]",,,,"[224, 3]"
32+
GPUTPCGMMergerSectorRefit,256,,"[64, 4]","[256, 2]",,,,,,"[32, 6]","[64, 5]","[64, 5]",,,,"[32, 10]"
33+
GPUTPCGMMergerUnpackResetIds,256,,256,256,,,,,,256,256,256,,,,256
34+
GPUTPCGMMergerUnpackGlobal,256,,256,256,,,,,,256,256,256,,,,256
35+
GPUTPCGMMergerResolve_step0,256,,512,256,,,,,,256,256,256,,,,256
36+
GPUTPCGMMergerResolve_step1,256,,512,256,,,,,,256,256,256,,,,256
37+
GPUTPCGMMergerResolve_step2,256,,512,256,,,,,,256,256,256,,,,256
38+
GPUTPCGMMergerResolve_step3,256,,512,256,,,,,,256,256,256,,,,256
39+
GPUTPCGMMergerResolve_step4,256,,512,256,,,,,,"[256, 4]","[256, 4]","[256, 4]",,,,"[256, 4]"
40+
GPUTPCGMMergerClearLinks,256,,256,256,,,,,,256,256,256,,,,256
41+
GPUTPCGMMergerMergeWithinPrepare,256,,256,256,,,,,,256,256,256,,,,256
42+
GPUTPCGMMergerMergeSectorsPrepare,256,,256,256,,,,,,"[256, 2]","[256, 2]","[256, 2]",,,,"[256, 2]"
43+
GPUTPCGMMergerMergeBorders_step0,256,,512,256,,,,,,192,192,192,,,,192
44+
GPUTPCGMMergerMergeBorders_step2,256,,512,256,,,,,,"[64, 2]",256,256,,,,"[64, 2]"
45+
GPUTPCGMMergerMergeCE,256,,512,256,,,,,,256,256,256,,,,256
46+
GPUTPCGMMergerLinkExtrapolatedTracks,256,,256,256,,,,,,256,256,256,,,,256
47+
GPUTPCGMMergerCollect,256,,"[768, 1]","[1024, 1]",,,,,,"[256, 2]","[128, 2]","[128, 2]",,,,"[288, 1]"
48+
GPUTPCGMMergerSortTracksPrepare,256,,256,256,,,,,,256,256,256,,,,256
49+
GPUTPCGMMergerPrepareForFit_step0,256,,256,256,,,,,,256,256,256,,,,256
50+
GPUTPCGMMergerPrepareForFit_step1,256,,256,256,,,,,,256,256,256,,,,256
51+
GPUTPCGMMergerPrepareForFit_step2,256,,256,256,,,,,,256,256,256,,,,256
52+
GPUTPCGMMergerFinalize_step0,256,,,256,,,,,,,,,,,,256
53+
GPUTPCGMMergerFinalize_step1,256,,,256,,,,,,,,,,,,256
54+
GPUTPCGMMergerFinalize_step2,256,,,256,,,,,,,,,,,,256
55+
GPUTPCGMMergerMergeLoopers_step0,256,,,,,,,,,,,,,,,256
56+
GPUTPCGMMergerMergeLoopers_step1,256,,,,,,,,,,,,,,,256
57+
GPUTPCGMMergerMergeLoopers_step2,256,,,,,,,,,,,,,,,256
58+
GPUTPCGMO2Output_prepare,256,,,,,,,,,,,,,,,256
59+
GPUTPCGMO2Output_output,256,,,,,,,,,,,,,,,256
60+
GPUTPCStartHitsFinder,256,,"[1024, 2]","[1024, 7]",256,256,256,256,256,512,512,512,,,,608
61+
GPUTPCStartHitsSorter,256,,"[1024, 5]","[512, 7]",256,256,256,256,256,"[512, 1]","[512, 1]","[512, 1]",,,,608
62+
GPUTPCCFCheckPadBaseline,576,,"[576, 2]","[576, 2]",,,,,,"[576, 2]",,,,,,"[576, 2]"
63+
GPUTPCCFChargeMapFiller_fillIndexMap,512,,512,512,,,,,,448,,,,,,448
64+
GPUTPCCFChargeMapFiller_fillFromDigits,512,,512,512,,,,,,448,,,,,,448
65+
GPUTPCCFChargeMapFiller_findFragmentStart,512,,512,512,,,,,,448,,,,,,448
66+
GPUTPCCFPeakFinder,512,,"[512, 9]","[512, 4]",,,,,,128,,,,,,"[128, 5]"
67+
GPUTPCCFNoiseSuppression,512,,512,512,,,,,,448,,,,,,
68+
GPUTPCCFDeconvolution,512,,"[512, 5]","[512, 5]",,,,,,384,,,,,,384
69+
GPUTPCCFClusterizer,512,,"[448, 3]","[512, 2]",,,,,,448,,,,,,"[160, 5]"
70+
GPUTPCNNClusterizerKernels,512,,,,,,,,,,,,,,,
71+
GPUTrackingRefitKernel_mode0asGPU,256,,,,,,,,,,,,,,,256
72+
GPUTrackingRefitKernel_mode1asTrackParCov,256,,,,,,,,,,,,,,,256
73+
GPUMemClean16,"[""GPUCA_THREAD_COUNT_DEFAULT"", 1]",,,,,,,,,,,,,,,
74+
GPUitoa,"[""GPUCA_THREAD_COUNT_DEFAULT"", 1]",,,,,,,,,,,,,,,
75+
GPUTPCCFNoiseSuppression_noiseSuppression,"""GPUCA_LB_GPUTPCCFNoiseSuppression""",,,,,,,,,,,,,,,448
76+
GPUTPCCFNoiseSuppression_updatePeaks,"""GPUCA_LB_GPUTPCCFNoiseSuppression""",,,,,,,,,,,,,,,448
77+
GPUTPCNNClusterizerKernels_runCfClusterizer,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,,
78+
GPUTPCNNClusterizerKernels_fillInputNNCPU,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,,
79+
GPUTPCNNClusterizerKernels_fillInputNNGPU,1024,,,,,,,,,,,,,,,
80+
GPUTPCNNClusterizerKernels_determineClass1Labels,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,,
81+
GPUTPCNNClusterizerKernels_determineClass2Labels,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,,
82+
GPUTPCNNClusterizerKernels_publishClass1Regression,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,,
83+
GPUTPCNNClusterizerKernels_publishClass2Regression,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,,
84+
GPUTPCNNClusterizerKernels_publishDeconvolutionFlags,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,,,,
85+
GPUTPCCFStreamCompaction_scanStart,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,,,,,,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE"""
86+
GPUTPCCFStreamCompaction_scanUp,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,,,,,,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE"""
87+
GPUTPCCFStreamCompaction_scanTop,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,,,,,,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE"""
88+
GPUTPCCFStreamCompaction_scanDown,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,,,,,,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE"""
89+
GPUTPCCFStreamCompaction_compactDigits,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,,,,,,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE"""
90+
GPUTPCCompressionGatherKernels_unbuffered,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,,,,,,,
91+
GPUTPCCompressionGatherKernels_buffered32,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,,,,,,,
92+
GPUTPCCompressionGatherKernels_buffered64,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,,,,,,,
93+
GPUTPCCompressionGatherKernels_buffered128,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,,,,,,,
94+
GPUTPCCompressionGatherKernels_multiBlock,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,,,,,,,
95+
GPUTPCGMMergerFinalize_0,256,,256,,,,,,,256,256,256,,,,256
96+
GPUTPCGMMergerFinalize_1,256,,256,,,,,,,256,256,256,,,,256
97+
GPUTPCGMMergerFinalize_2,256,,256,,,,,,,256,256,256,,,,256
98+
,,,,,,,,,,,,,,,,
99+
PAR:,,,,,,,,,,,,,,,,
100+
AMD_EUS_PER_CU,0,0,4,4,,,,,,,,,,,,0
101+
SORT_STARTHITS,1,0,,,,,,,,,,,,,,1
102+
NEIGHBOURS_FINDER_MAX_NNEIGHUP,6,0,10,4,,,,,,4,4,4,,,,2
103+
NEIGHBOURS_FINDER_UNROLL_GLOBAL,4,0,4,2,,,,,,,,,,,,2
104+
NEIGHBOURS_FINDER_UNROLL_SHARED,1,0,0,0,,,,,,,,,,,,1
105+
TRACKLET_SELECTOR_HITS_REG_SIZE,12,0,9,27,,,,,,20,20,20,,,,2
106+
ALTERNATE_BORDER_SORT,0,0,1,1,,,,,,1,1,1,,,,1
107+
SORT_BEFORE_FIT,0,0,1,1,,,,,,1,1,1,,,,1
108+
NO_ATOMIC_PRECHECK,0,0,1,1,,,,,,1,1,1,,,,1
109+
DEDX_STORAGE_TYPE,"""float""","""float""","""uint16_t""","""uint16_t""",,,,,,"""uint16_t""","""uint16_t""","""uint16_t""",,,,"""uint16_t"""
110+
MERGER_INTERPOLATION_ERROR_TYPE,"""float""","""float""","""half""","""half""",,,,,,"""half""","""half""","""half""",,,,"""half"""
111+
COMP_GATHER_KERNEL,0,0,4,4,,,,,,4,4,4,,,,4
112+
COMP_GATHER_MODE,2,0,3,3,,,,,,3,3,3,,,,3
113+
CF_SCAN_WORKGROUP_SIZE,512,0,,,,,,,,,,,,,,

dependencies/FindO2GPU.cmake

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
# or submit itself to any jurisdiction.
1111

1212
# NOTE!!!! - Whenever this file is changed, move it over to alidist/resources
13-
# FindO2GPU.cmake Version 15
13+
# FindO2GPU.cmake Version 16
1414

1515
set(CUDA_COMPUTETARGET_DEFAULT_FULL 80-real 86-real 89-real 120-real 75-virtual)
1616
set(HIP_AMDGPUTARGET_DEFAULT_FULL gfx906;gfx908)
@@ -52,7 +52,11 @@ function(detect_gpu_arch backend) # Detect GPU architecture, optionally filterri
5252
set(CUDA_FIRST_TARGET 86)
5353
message(STATUS "CUDA_COMPUTETARGET not set, defaulting CUDA optimization for architecture ${CUDA_FIRST_TARGET}")
5454
endif()
55-
if(CUDA_FIRST_TARGET GREATER_EQUAL 86)
55+
if(CUDA_FIRST_TARGET GREATER_EQUAL 120)
56+
set(CUDA_TARGET BLACKWELL)
57+
elseif(CUDA_FIRST_TARGET GREATER_EQUAL 89)
58+
set(CUDA_TARGET ADA)
59+
elseif(CUDA_FIRST_TARGET GREATER_EQUAL 86)
5660
set(CUDA_TARGET AMPERE)
5761
elseif(CUDA_FIRST_TARGET GREATER_EQUAL 75)
5862
set(CUDA_TARGET TURING)

0 commit comments

Comments
 (0)