@@ -63,8 +63,9 @@ struct ClusterNative {
6363 static constexpr int scalePadPacked = 64 ; // < ~60 is needed for 0.1mm precision, but power of two avoids rounding
6464 static constexpr int scaleSigmaTimePacked = 32 ; // 1/32nd of pad/timebin precision for cluster size
6565 static constexpr int scaleSigmaPadPacked = 32 ;
66- static constexpr int scaleSaturatedQTot = 4 ;
67- static constexpr int maxSaturatedQTot = USHRT_MAX * scaleSaturatedQTot;
66+ static constexpr int scaleSaturatedQtot = 8 ;
67+ static constexpr int maxRegularQtot = 25 * 1024 ;
68+ static constexpr int maxSaturatedQtot = (USHRT_MAX - maxRegularQtot) * scaleSaturatedQtot;
6869
6970 uint32_t timeFlagsPacked; // < Contains the time in the lower 24 bits in a packed format, contains the flags in the
7071 // upper 8 bits
@@ -89,9 +90,7 @@ struct ClusterNative {
8990 GPUd () uint16_t getQtot () const
9091 {
9192 if (isSaturated ()) [[unlikely]] {
92- // Check for overflow, so return type can stay uint16
93- auto sqtot = getSaturatedQtot ();
94- return sqtot <= USHRT_MAX ? sqtot : USHRT_MAX;
93+ return maxRegularQtot;
9594 }
9695 return qTot;
9796 }
@@ -155,19 +154,19 @@ struct ClusterNative {
155154 sigmaPadPacked = tmp;
156155 }
157156
158- GPUd () bool isSaturated () const { return qMax >= 1023 ; }
157+ GPUd () bool isSaturated () const { return qTot > maxRegularQtot ; }
159158
160159 GPUd () void setSaturatedQtot (uint32_t qtot)
161160 {
162- if (qtot > maxSaturatedQTot ) {
163- qtot = maxSaturatedQTot ;
161+ if (qtot > maxSaturatedQtot ) {
162+ qtot = maxSaturatedQtot ;
164163 }
165- this ->qTot = (qtot + scaleSaturatedQTot / 2 ) / scaleSaturatedQTot ;
164+ this ->qTot = (( qtot + scaleSaturatedQtot / 2 ) / scaleSaturatedQtot) + maxRegularQtot ;
166165 }
167166
168167 GPUd () uint32_t getSaturatedQtot () const
169168 {
170- return uint32_t (qTot) * scaleSaturatedQTot ;
169+ return uint32_t (qTot - maxRegularQtot ) * scaleSaturatedQtot ;
171170 }
172171
173172 GPUd () void setSaturatedTailLength (uint32_t tail)
0 commit comments