Skip to content

Commit a84a88c

Browse files
committed
GPU: Add tuned parameters for NVIDIA Blackwell
1 parent bc173f4 commit a84a88c

File tree

2 files changed

+119
-115
lines changed

2 files changed

+119
-115
lines changed
Lines changed: 113 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -1,113 +1,113 @@
1-
Architecture,default,default_cpu,MI100,VEGA,TAHITI,TESLA,FERMI,PASCAL,KEPLER,AMPERE,TURING
2-
,,,,,,,,,,,
3-
CORE:,,,,,,,,,,,
4-
WARP_SIZE,32,,64,64,32,32,32,32,32,32,32
5-
THREAD_COUNT_DEFAULT,256,,256,256,,,,,,512,512
6-
,,,,,,,,,,,
7-
LB:,,,,,,,,,,,
8-
GPUTPCCreateTrackingData,256,,"[256, 7]","[192, 2]",,,,,,384,256
9-
GPUTPCTrackletConstructor,256,,"[768, 8]","[512, 10]","[256, 2]","[256, 1]","[256, 2]","[1024, 2]","[512, 4]","[256, 2]","[256, 2]"
10-
GPUTPCTrackletSelector,256,,"[384, 5]","[192, 10]","[256, 3]","[256, 1]","[256, 3]","[512, 4]","[256, 3]","[192, 3]","[192, 3]"
11-
GPUTPCNeighboursFinder,256,,"[192, 8]","[960, 8]",256,256,256,512,256,"[640, 1]","[640, 1]"
12-
GPUTPCNeighboursCleaner,256,,"[128, 5]","[384, 9]",256,256,256,256,256,512,512
13-
GPUTPCExtrapolationTracking,256,,"[256, 7]","[256, 2]",,,,,,"[128, 4]","[192, 2]"
14-
GPUTRDTrackerKernels_gpuVersion,512,,,,,,,,,,
15-
GPUTPCCreateOccupancyMap_fill,256,,,,,,,,,,
16-
GPUTPCCreateOccupancyMap_fold,256,,,,,,,,,,
17-
GPUTRDTrackerKernels_o2Version,512,,,,,,,,,,
18-
GPUTPCCompressionKernels_step0attached,256,,"[128, 1]","[64, 2]",,,,,,"[64, 2]",128
19-
GPUTPCCompressionKernels_step1unattached,256,,"[512, 2]","[512, 2]",,,,,,"[512, 3]","[512, 2]"
20-
GPUTPCDecompressionKernels_step0attached,256,,"[128, 2]","[128, 2]",,,,,,"[32, 1]","[32, 1]"
21-
GPUTPCDecompressionKernels_step1unattached,256,,"[64, 2]","[64, 2]",,,,,,"[32, 1]","[32, 1]"
22-
GPUTPCDecompressionUtilKernels_sortPerSectorRow,256,,,,,,,,,,
23-
GPUTPCDecompressionUtilKernels_countFilteredClusters,256,,,,,,,,,,
24-
GPUTPCDecompressionUtilKernels_storeFilteredClusters,256,,,,,,,,,,
25-
GPUTPCCFDecodeZS,"[128, 4]",,"[64, 4]","[64, 1]",,,,,,"[64, 10]","[64, 8]"
26-
GPUTPCCFDecodeZSLink,"""GPUCA_WARP_SIZE""",,"""GPUCA_WARP_SIZE""","""GPUCA_WARP_SIZE""",,,,,,"""GPUCA_WARP_SIZE""","""GPUCA_WARP_SIZE"""
27-
GPUTPCCFDecodeZSDenseLink,"""GPUCA_WARP_SIZE""",,"[""GPUCA_WARP_SIZE"", 4]","[""GPUCA_WARP_SIZE"", 14]",,,,,,"""GPUCA_WARP_SIZE""","""GPUCA_WARP_SIZE"""
28-
GPUTPCCFGather,"[1024, 1]",,"[1024, 5]","[1024, 1]",,,,,,"[1024, 1]","[1024, 1]"
29-
COMPRESSION_GATHER,1024,,1024,1024,,,,,,1024,1024
30-
GPUTPCGMMergerTrackFit,256,,"[192, 2]","[64, 7]",,,,,,"[64, 4]","[32, 8]"
31-
GPUTPCGMMergerFollowLoopers,256,,"[256, 5]","[256, 4]",,,,,,"[64, 12]","[128, 4]"
32-
GPUTPCGMMergerSectorRefit,256,,"[64, 4]","[256, 2]",,,,,,"[32, 6]","[64, 5]"
33-
GPUTPCGMMergerUnpackResetIds,256,,256,256,,,,,,256,256
34-
GPUTPCGMMergerUnpackGlobal,256,,256,256,,,,,,256,256
35-
GPUTPCGMMergerResolve_step0,256,,512,256,,,,,,256,256
36-
GPUTPCGMMergerResolve_step1,256,,512,256,,,,,,256,256
37-
GPUTPCGMMergerResolve_step2,256,,512,256,,,,,,256,256
38-
GPUTPCGMMergerResolve_step3,256,,512,256,,,,,,256,256
39-
GPUTPCGMMergerResolve_step4,256,,512,256,,,,,,"[256, 4]","[256, 4]"
40-
GPUTPCGMMergerClearLinks,256,,256,256,,,,,,256,256
41-
GPUTPCGMMergerMergeWithinPrepare,256,,256,256,,,,,,256,256
42-
GPUTPCGMMergerMergeSectorsPrepare,256,,256,256,,,,,,"[256, 2]","[256, 2]"
43-
GPUTPCGMMergerMergeBorders_step0,256,,512,256,,,,,,192,192
44-
GPUTPCGMMergerMergeBorders_step2,256,,512,256,,,,,,"[64, 2]",256
45-
GPUTPCGMMergerMergeCE,256,,512,256,,,,,,256,256
46-
GPUTPCGMMergerLinkExtrapolatedTracks,256,,256,256,,,,,,256,256
47-
GPUTPCGMMergerCollect,256,,"[768, 1]","[1024, 1]",,,,,,"[256, 2]","[128, 2]"
48-
GPUTPCGMMergerSortTracksPrepare,256,,256,256,,,,,,256,256
49-
GPUTPCGMMergerPrepareForFit_step0,256,,256,256,,,,,,256,256
50-
GPUTPCGMMergerPrepareForFit_step1,256,,256,256,,,,,,256,256
51-
GPUTPCGMMergerPrepareForFit_step2,256,,256,256,,,,,,256,256
52-
GPUTPCGMMergerFinalize_step0,256,,,256,,,,,,,
53-
GPUTPCGMMergerFinalize_step1,256,,,256,,,,,,,
54-
GPUTPCGMMergerFinalize_step2,256,,,256,,,,,,,
55-
GPUTPCGMMergerMergeLoopers_step0,256,,,,,,,,,,
56-
GPUTPCGMMergerMergeLoopers_step1,256,,,,,,,,,,
57-
GPUTPCGMMergerMergeLoopers_step2,256,,,,,,,,,,
58-
GPUTPCGMO2Output_prepare,256,,,,,,,,,,
59-
GPUTPCGMO2Output_output,256,,,,,,,,,,
60-
GPUTPCStartHitsFinder,256,,"[1024, 2]","[1024, 7]",256,256,256,256,256,512,512
61-
GPUTPCStartHitsSorter,256,,"[1024, 5]","[512, 7]",256,256,256,256,256,"[512, 1]","[512, 1]"
62-
GPUTPCCFCheckPadBaseline,576,,"[576, 2]","[576, 2]",,,,,,"[576, 2]",
63-
GPUTPCCFChargeMapFiller_fillIndexMap,512,,512,512,,,,,,448,
64-
GPUTPCCFChargeMapFiller_fillFromDigits,512,,512,512,,,,,,448,
65-
GPUTPCCFChargeMapFiller_findFragmentStart,512,,512,512,,,,,,448,
66-
GPUTPCCFPeakFinder,512,,"[512, 9]","[512, 4]",,,,,,128,
67-
GPUTPCCFNoiseSuppression,512,,512,512,,,,,,448,
68-
GPUTPCCFDeconvolution,512,,"[512, 5]","[512, 5]",,,,,,384,
69-
GPUTPCCFClusterizer,512,,"[448, 3]","[512, 2]",,,,,,448,
70-
GPUTPCNNClusterizerKernels,512,,,,,,,,,,
71-
GPUTrackingRefitKernel_mode0asGPU,256,,,,,,,,,,
72-
GPUTrackingRefitKernel_mode1asTrackParCov,256,,,,,,,,,,
73-
GPUMemClean16,"[""GPUCA_THREAD_COUNT_DEFAULT"", 1]",,,,,,,,,,
74-
GPUitoa,"[""GPUCA_THREAD_COUNT_DEFAULT"", 1]",,,,,,,,,,
75-
GPUTPCCFNoiseSuppression_noiseSuppression,"""GPUCA_LB_GPUTPCCFNoiseSuppression""",,,,,,,,,,
76-
GPUTPCCFNoiseSuppression_updatePeaks,"""GPUCA_LB_GPUTPCCFNoiseSuppression""",,,,,,,,,,
77-
GPUTPCNNClusterizerKernels_runCfClusterizer,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,
78-
GPUTPCNNClusterizerKernels_fillInputNNCPU,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,
79-
GPUTPCNNClusterizerKernels_fillInputNNGPU,1024,,,,,,,,,,
80-
GPUTPCNNClusterizerKernels_determineClass1Labels,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,
81-
GPUTPCNNClusterizerKernels_determineClass2Labels,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,
82-
GPUTPCNNClusterizerKernels_publishClass1Regression,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,
83-
GPUTPCNNClusterizerKernels_publishClass2Regression,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,
84-
GPUTPCNNClusterizerKernels_publishDeconvolutionFlags,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,
85-
GPUTPCCFStreamCompaction_scanStart,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,
86-
GPUTPCCFStreamCompaction_scanUp,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,
87-
GPUTPCCFStreamCompaction_scanTop,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,
88-
GPUTPCCFStreamCompaction_scanDown,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,
89-
GPUTPCCFStreamCompaction_compactDigits,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,
90-
GPUTPCCompressionGatherKernels_unbuffered,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,,
91-
GPUTPCCompressionGatherKernels_buffered32,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,,
92-
GPUTPCCompressionGatherKernels_buffered64,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,,
93-
GPUTPCCompressionGatherKernels_buffered128,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,,
94-
GPUTPCCompressionGatherKernels_multiBlock,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,,
95-
GPUTPCGMMergerFinalize_0,256,,256,,,,,,,256,256
96-
GPUTPCGMMergerFinalize_1,256,,256,,,,,,,256,256
97-
GPUTPCGMMergerFinalize_2,256,,256,,,,,,,256,256
98-
,,,,,,,,,,,
99-
PAR:,,,,,,,,,,,
100-
AMD_EUS_PER_CU,0,0,4,4,,,,,,,
101-
SORT_STARTHITS,1,0,,,,,,,,,
102-
NEIGHBOURS_FINDER_MAX_NNEIGHUP,6,0,10,4,,,,,,4,4
103-
NEIGHBOURS_FINDER_UNROLL_GLOBAL,4,0,4,2,,,,,,,
104-
NEIGHBOURS_FINDER_UNROLL_SHARED,1,0,0,0,,,,,,,
105-
TRACKLET_SELECTOR_HITS_REG_SIZE,12,0,9,27,,,,,,20,20
106-
ALTERNATE_BORDER_SORT,0,0,1,1,,,,,,1,1
107-
SORT_BEFORE_FIT,0,0,1,1,,,,,,1,1
108-
NO_ATOMIC_PRECHECK,0,0,1,1,,,,,,1,1
109-
DEDX_STORAGE_TYPE,"""float""","""float""","""uint16_t""","""uint16_t""",,,,,,"""uint16_t""","""uint16_t"""
110-
MERGER_INTERPOLATION_ERROR_TYPE,"""float""","""float""","""half""","""half""",,,,,,"""half""","""half"""
111-
COMP_GATHER_KERNEL,0,0,4,4,,,,,,4,4
112-
COMP_GATHER_MODE,2,0,3,3,,,,,,3,3
113-
CF_SCAN_WORKGROUP_SIZE,512,0,,,,,,,,,
1+
Architecture,default,default_cpu,MI100,VEGA,TAHITI,TESLA,FERMI,PASCAL,KEPLER,AMPERE,TURING,ADA,BLACKWELL
2+
,,,,,,,,,,,,,
3+
CORE:,,,,,,,,,,,,,
4+
WARP_SIZE,32,,64,64,32,32,32,32,32,32,32,32,32
5+
THREAD_COUNT_DEFAULT,256,,256,256,,,,,,512,512,512,512
6+
,,,,,,,,,,,,,
7+
LB:,,,,,,,,,,,,,
8+
GPUTPCCreateTrackingData,256,,"[256, 7]","[192, 2]",,,,,,384,256,256,"[384, 1]"
9+
GPUTPCTrackletConstructor,256,,"[768, 8]","[512, 10]","[256, 2]","[256, 1]","[256, 2]","[1024, 2]","[512, 4]","[256, 2]","[256, 2]","[256, 2]","[384, 9]"
10+
GPUTPCTrackletSelector,256,,"[384, 5]","[192, 10]","[256, 3]","[256, 1]","[256, 3]","[512, 4]","[256, 3]","[192, 3]","[192, 3]","[192, 3]","[960, 8]"
11+
GPUTPCNeighboursFinder,256,,"[192, 8]","[960, 8]",256,256,256,512,256,"[640, 1]","[640, 1]","[640, 1]","[960, 2]"
12+
GPUTPCNeighboursCleaner,256,,"[128, 5]","[384, 9]",256,256,256,256,256,512,512,512,"[96, 3]"
13+
GPUTPCExtrapolationTracking,256,,"[256, 7]","[256, 2]",,,,,,"[128, 4]","[192, 2]","[192, 2]","[928, 6]"
14+
GPUTRDTrackerKernels_gpuVersion,512,,,,,,,,,,,,
15+
GPUTPCCreateOccupancyMap_fill,256,,,,,,,,,,,,
16+
GPUTPCCreateOccupancyMap_fold,256,,,,,,,,,,,,
17+
GPUTRDTrackerKernels_o2Version,512,,,,,,,,,,,,
18+
GPUTPCCompressionKernels_step0attached,256,,"[128, 1]","[64, 2]",,,,,,"[64, 2]",128,128,"[96, 3]"
19+
GPUTPCCompressionKernels_step1unattached,256,,"[512, 2]","[512, 2]",,,,,,"[512, 3]","[512, 2]","[512, 2]","[512, 2]"
20+
GPUTPCDecompressionKernels_step0attached,256,,"[128, 2]","[128, 2]",,,,,,"[32, 1]","[32, 1]","[32, 1]","[32, 1]"
21+
GPUTPCDecompressionKernels_step1unattached,256,,"[64, 2]","[64, 2]",,,,,,"[32, 1]","[32, 1]","[32, 1]","[32, 1]"
22+
GPUTPCDecompressionUtilKernels_sortPerSectorRow,256,,,,,,,,,,,,
23+
GPUTPCDecompressionUtilKernels_countFilteredClusters,256,,,,,,,,,,,,
24+
GPUTPCDecompressionUtilKernels_storeFilteredClusters,256,,,,,,,,,,,,
25+
GPUTPCCFDecodeZS,"[128, 4]",,"[64, 4]","[64, 1]",,,,,,"[64, 10]","[64, 8]","[64, 8]","[64, 8]"
26+
GPUTPCCFDecodeZSLink,"""GPUCA_WARP_SIZE""",,"""GPUCA_WARP_SIZE""","""GPUCA_WARP_SIZE""",,,,,,"""GPUCA_WARP_SIZE""","""GPUCA_WARP_SIZE""","""GPUCA_WARP_SIZE""","""GPUCA_WARP_SIZE"""
27+
GPUTPCCFDecodeZSDenseLink,"""GPUCA_WARP_SIZE""",,"[""GPUCA_WARP_SIZE"", 4]","[""GPUCA_WARP_SIZE"", 14]",,,,,,"""GPUCA_WARP_SIZE""","""GPUCA_WARP_SIZE""","""GPUCA_WARP_SIZE""","[32, 3]"
28+
GPUTPCCFGather,"[1024, 1]",,"[1024, 5]","[1024, 1]",,,,,,"[1024, 1]","[1024, 1]","[1024, 1]","[1024, 7]"
29+
COMPRESSION_GATHER,1024,,1024,1024,,,,,,1024,1024,1024,1024
30+
GPUTPCGMMergerTrackFit,256,,"[192, 2]","[64, 7]",,,,,,"[64, 4]","[32, 8]","[32, 8]","[256, 1]"
31+
GPUTPCGMMergerFollowLoopers,256,,"[256, 5]","[256, 4]",,,,,,"[64, 12]","[128, 4]","[128, 4]","[128, 1]"
32+
GPUTPCGMMergerSectorRefit,256,,"[64, 4]","[256, 2]",,,,,,"[32, 6]","[64, 5]","[64, 5]","[32, 1]"
33+
GPUTPCGMMergerUnpackResetIds,256,,256,256,,,,,,256,256,256,256
34+
GPUTPCGMMergerUnpackGlobal,256,,256,256,,,,,,256,256,256,256
35+
GPUTPCGMMergerResolve_step0,256,,512,256,,,,,,256,256,256,256
36+
GPUTPCGMMergerResolve_step1,256,,512,256,,,,,,256,256,256,256
37+
GPUTPCGMMergerResolve_step2,256,,512,256,,,,,,256,256,256,256
38+
GPUTPCGMMergerResolve_step3,256,,512,256,,,,,,256,256,256,256
39+
GPUTPCGMMergerResolve_step4,256,,512,256,,,,,,"[256, 4]","[256, 4]","[256, 4]","[256, 4]"
40+
GPUTPCGMMergerClearLinks,256,,256,256,,,,,,256,256,256,256
41+
GPUTPCGMMergerMergeWithinPrepare,256,,256,256,,,,,,256,256,256,256
42+
GPUTPCGMMergerMergeSectorsPrepare,256,,256,256,,,,,,"[256, 2]","[256, 2]","[256, 2]","[256, 2]"
43+
GPUTPCGMMergerMergeBorders_step0,256,,512,256,,,,,,192,192,192,192
44+
GPUTPCGMMergerMergeBorders_step2,256,,512,256,,,,,,"[64, 2]",256,256,"[64, 2]"
45+
GPUTPCGMMergerMergeCE,256,,512,256,,,,,,256,256,256,256
46+
GPUTPCGMMergerLinkExtrapolatedTracks,256,,256,256,,,,,,256,256,256,256
47+
GPUTPCGMMergerCollect,256,,"[768, 1]","[1024, 1]",,,,,,"[256, 2]","[128, 2]","[128, 2]","[256, 1]"
48+
GPUTPCGMMergerSortTracksPrepare,256,,256,256,,,,,,256,256,256,256
49+
GPUTPCGMMergerPrepareForFit_step0,256,,256,256,,,,,,256,256,256,256
50+
GPUTPCGMMergerPrepareForFit_step1,256,,256,256,,,,,,256,256,256,256
51+
GPUTPCGMMergerPrepareForFit_step2,256,,256,256,,,,,,256,256,256,256
52+
GPUTPCGMMergerFinalize_step0,256,,,256,,,,,,,,,
53+
GPUTPCGMMergerFinalize_step1,256,,,256,,,,,,,,,
54+
GPUTPCGMMergerFinalize_step2,256,,,256,,,,,,,,,
55+
GPUTPCGMMergerMergeLoopers_step0,256,,,,,,,,,,,,
56+
GPUTPCGMMergerMergeLoopers_step1,256,,,,,,,,,,,,
57+
GPUTPCGMMergerMergeLoopers_step2,256,,,,,,,,,,,,
58+
GPUTPCGMO2Output_prepare,256,,,,,,,,,,,,
59+
GPUTPCGMO2Output_output,256,,,,,,,,,,,,
60+
GPUTPCStartHitsFinder,256,,"[1024, 2]","[1024, 7]",256,256,256,256,256,512,512,512,"[96, 10]"
61+
GPUTPCStartHitsSorter,256,,"[1024, 5]","[512, 7]",256,256,256,256,256,"[512, 1]","[512, 1]","[512, 1]","[32, 3]"
62+
GPUTPCCFCheckPadBaseline,576,,"[576, 2]","[576, 2]",,,,,,"[576, 2]",,,"[64, 2]"
63+
GPUTPCCFChargeMapFiller_fillIndexMap,512,,512,512,,,,,,448,,,448
64+
GPUTPCCFChargeMapFiller_fillFromDigits,512,,512,512,,,,,,448,,,448
65+
GPUTPCCFChargeMapFiller_findFragmentStart,512,,512,512,,,,,,448,,,448
66+
GPUTPCCFPeakFinder,512,,"[512, 9]","[512, 4]",,,,,,128,,,"[128, 6]"
67+
GPUTPCCFNoiseSuppression,512,,512,512,,,,,,448,,,"[448, 6]"
68+
GPUTPCCFDeconvolution,512,,"[512, 5]","[512, 5]",,,,,,384,,,"[384, 5]"
69+
GPUTPCCFClusterizer,512,,"[448, 3]","[512, 2]",,,,,,448,,,"[256, 5]"
70+
GPUTPCNNClusterizerKernels,512,,,,,,,,,,,,
71+
GPUTrackingRefitKernel_mode0asGPU,256,,,,,,,,,,,,
72+
GPUTrackingRefitKernel_mode1asTrackParCov,256,,,,,,,,,,,,
73+
GPUMemClean16,"[""GPUCA_THREAD_COUNT_DEFAULT"", 1]",,,,,,,,,,,,
74+
GPUitoa,"[""GPUCA_THREAD_COUNT_DEFAULT"", 1]",,,,,,,,,,,,
75+
GPUTPCCFNoiseSuppression_noiseSuppression,"""GPUCA_LB_GPUTPCCFNoiseSuppression""",,,,,,,,,,,,"[448, 6]"
76+
GPUTPCCFNoiseSuppression_updatePeaks,"""GPUCA_LB_GPUTPCCFNoiseSuppression""",,,,,,,,,,,,"[448, 1]"
77+
GPUTPCNNClusterizerKernels_runCfClusterizer,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,
78+
GPUTPCNNClusterizerKernels_fillInputNNCPU,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,
79+
GPUTPCNNClusterizerKernels_fillInputNNGPU,1024,,,,,,,,,,,,
80+
GPUTPCNNClusterizerKernels_determineClass1Labels,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,
81+
GPUTPCNNClusterizerKernels_determineClass2Labels,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,
82+
GPUTPCNNClusterizerKernels_publishClass1Regression,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,
83+
GPUTPCNNClusterizerKernels_publishClass2Regression,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,
84+
GPUTPCNNClusterizerKernels_publishDeconvolutionFlags,"""GPUCA_LB_GPUTPCNNClusterizerKernels""",,,,,,,,,,,,
85+
GPUTPCCFStreamCompaction_scanStart,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,,,"[512, 1]"
86+
GPUTPCCFStreamCompaction_scanUp,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,,,"[512, 2]"
87+
GPUTPCCFStreamCompaction_scanTop,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,,,"[512, 3]"
88+
GPUTPCCFStreamCompaction_scanDown,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,,,"[512, 6]"
89+
GPUTPCCFStreamCompaction_compactDigits,"""GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE""",,,,,,,,,,,,"[512, 9]"
90+
GPUTPCCompressionGatherKernels_unbuffered,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,,,,1024
91+
GPUTPCCompressionGatherKernels_buffered32,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,,,,1024
92+
GPUTPCCompressionGatherKernels_buffered64,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,,,,1024
93+
GPUTPCCompressionGatherKernels_buffered128,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,,,,1024
94+
GPUTPCCompressionGatherKernels_multiBlock,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,,,,,,,,1024
95+
GPUTPCGMMergerFinalize_0,256,,256,,,,,,,256,256,256,256
96+
GPUTPCGMMergerFinalize_1,256,,256,,,,,,,256,256,256,256
97+
GPUTPCGMMergerFinalize_2,256,,256,,,,,,,256,256,256,256
98+
,,,,,,,,,,,,,
99+
PAR:,,,,,,,,,,,,,
100+
AMD_EUS_PER_CU,0,0,4,4,,,,,,,,,
101+
SORT_STARTHITS,1,0,,,,,,,,,,,
102+
NEIGHBOURS_FINDER_MAX_NNEIGHUP,6,0,10,4,,,,,,4,4,4,1
103+
NEIGHBOURS_FINDER_UNROLL_GLOBAL,4,0,4,2,,,,,,,,,1
104+
NEIGHBOURS_FINDER_UNROLL_SHARED,1,0,0,0,,,,,,,,,0
105+
TRACKLET_SELECTOR_HITS_REG_SIZE,12,0,9,27,,,,,,20,20,20,6
106+
ALTERNATE_BORDER_SORT,0,0,1,1,,,,,,1,1,1,1
107+
SORT_BEFORE_FIT,0,0,1,1,,,,,,1,1,1,1
108+
NO_ATOMIC_PRECHECK,0,0,1,1,,,,,,1,1,1,1
109+
DEDX_STORAGE_TYPE,"""float""","""float""","""uint16_t""","""uint16_t""",,,,,,"""uint16_t""","""uint16_t""","""uint16_t""","""uint16_t"""
110+
MERGER_INTERPOLATION_ERROR_TYPE,"""float""","""float""","""half""","""half""",,,,,,"""half""","""half""","""half""","""half"""
111+
COMP_GATHER_KERNEL,0,0,4,4,,,,,,4,4,4,4
112+
COMP_GATHER_MODE,2,0,3,3,,,,,,3,3,3,3
113+
CF_SCAN_WORKGROUP_SIZE,512,0,,,,,,,,,,,

dependencies/FindO2GPU.cmake

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
# or submit itself to any jurisdiction.
1111

1212
# NOTE!!!! - Whenever this file is changed, move it over to alidist/resources
13-
# FindO2GPU.cmake Version 14
13+
# FindO2GPU.cmake Version 15
1414

1515
set(CUDA_COMPUTETARGET_DEFAULT_FULL 80-real 86-real 89-real 120-real 75-virtual)
1616
set(HIP_AMDGPUTARGET_DEFAULT_FULL gfx906;gfx908)
@@ -52,7 +52,11 @@ function(detect_gpu_arch backend) # Detect GPU architecture, optionally filterri
5252
set(CUDA_FIRST_TARGET 86)
5353
message(STATUS "CUDA_COMPUTETARGET not set, defaulting CUDA optimization for architecture ${CUDA_FIRST_TARGET}")
5454
endif()
55-
if(CUDA_FIRST_TARGET GREATER_EQUAL 86)
55+
if(CUDA_FIRST_TARGET GREATER_EQUAL 120)
56+
set(CUDA_TARGET BLACKWELL)
57+
elseif(CUDA_FIRST_TARGET GREATER_EQUAL 89)
58+
set(CUDA_TARGET ADA)
59+
elseif(CUDA_FIRST_TARGET GREATER_EQUAL 86)
5660
set(CUDA_TARGET AMPERE)
5761
elseif(CUDA_FIRST_TARGET GREATER_EQUAL 75)
5862
set(CUDA_TARGET TURING)

0 commit comments

Comments
 (0)