Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 131 additions & 2 deletions DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#ifndef ALICEO2_DATAFORMATSTPC_CLUSTERNATIVE_H
#define ALICEO2_DATAFORMATSTPC_CLUSTERNATIVE_H
#ifndef GPUCA_GPUCODE_DEVICE
#include <climits>
#include <cstdint>
#include <cstddef> // for size_t
#include <utility>
Expand Down Expand Up @@ -62,6 +63,8 @@ struct ClusterNative {
static constexpr int scalePadPacked = 64; //< ~60 is needed for 0.1mm precision, but power of two avoids rounding
static constexpr int scaleSigmaTimePacked = 32; // 1/32nd of pad/timebin precision for cluster size
static constexpr int scaleSigmaPadPacked = 32;
static constexpr int scaleSaturatedQTot = 4;
static constexpr int maxSaturatedQTot = USHRT_MAX * scaleSaturatedQTot;

uint32_t timeFlagsPacked; //< Contains the time in the lower 24 bits in a packed format, contains the flags in the
// upper 8 bits
Expand All @@ -83,7 +86,15 @@ struct ClusterNative {
}

GPUd() uint16_t getQmax() const { return qMax; }
GPUd() uint16_t getQtot() const { return qTot; }
GPUd() uint16_t getQtot() const
{
if (isSaturated()) [[unlikely]] {
// Check for overflow, so return type can stay uint16
auto sqtot = getSaturatedQtot();
return sqtot <= USHRT_MAX ? sqtot : USHRT_MAX;
}
return qTot;
}
GPUd() uint8_t getFlags() const { return timeFlagsPacked >> 24; }
GPUd() uint32_t getTimePacked() const { return timeFlagsPacked & 0xFFFFFF; }
GPUd() void setTimePackedFlags(uint32_t timePacked, uint8_t flags)
Expand Down Expand Up @@ -119,7 +130,13 @@ struct ClusterNative {
/// Y = (12.4 - 0.5 * (66 - 1)) * 4.16mm = -83.616mm
GPUd() float getPad() const { return unpackPad(padPacked); }
GPUd() void setPad(float pad) { padPacked = packPad(pad); }
GPUd() float getSigmaTime() const { return float(sigmaTimePacked) * (1.f / scaleSigmaTimePacked); }
GPUd() float getSigmaTime() const
{
if (isSaturated()) [[unlikely]] {
return 0;
}
return float(sigmaTimePacked) * (1.f / scaleSigmaTimePacked);
}
GPUd() void setSigmaTime(float sigmaTime)
{
uint32_t tmp = sigmaTime * scaleSigmaTimePacked + 0.5;
Expand All @@ -138,6 +155,31 @@ struct ClusterNative {
sigmaPadPacked = tmp;
}

GPUd() bool isSaturated() const { return qMax >= 1023; }

GPUd() void setSaturatedQtot(uint32_t qtot)
{
if (qtot > maxSaturatedQTot) {
qtot = maxSaturatedQTot;
}
this->qTot = (qtot + scaleSaturatedQTot / 2) / scaleSaturatedQTot;
}

GPUd() uint32_t getSaturatedQtot() const
{
return uint32_t(qTot) * scaleSaturatedQTot;
}

GPUd() void setSaturatedTailLength(uint32_t tail)
{
sigmaTimePacked = encodeTailLength(tail);
}

GPUd() uint32_t getSaturatedTailLength() const
{
return decodeTailLength(sigmaTimePacked);
}

GPUd() bool operator<(const ClusterNative& rhs) const
{
if (this->getTimePacked() != rhs.getTimePacked()) {
Expand Down Expand Up @@ -167,6 +209,93 @@ struct ClusterNative {
this->qTot == rhs.qTot &&
this->getFlags() == rhs.getFlags();
}

private:
static constexpr GPUd() uint32_t decodeTailLength(uint8_t code)
{
// Quantize tail length into 8bits.
// Max expected length is 1500 tbs.
// But allow outliers up to 8000 tbs.
//
// Full code layout is:
//
// | Code range | Decoded values | Step | Codes |
// | ---------: | -------------: | ----: | ----: |
// | `0..63` | `0..63` | `1` | `64` |
// | `64..95` | `64..126` | `2` | `32` |
// | `96..127` | `128..252` | `4` | `32` |
// | `128..159` | `256..504` | `8` | `32` |
// | `160..223` | `512..1520` | `16` | `64` |
// | `224..239` | `1552..2032` | `32` | `16` |
// | `240..255` | `2048..8048` | `400` | `16` |
//

if (code < 64) {
return code;
}

if (code < 160) {
uint32_t q = (uint32_t)code - 64u;
uint32_t exponent = (q >> 5) + 1u; // 1, 2, 3
uint32_t mantissa = q & 31u; // 0..31

return (32u + mantissa) << exponent;
}

if (code < 224) {
return 512u + 16u * ((uint32_t)code - 160u);
}

if (code < 240) {
return 1552u + 32u * ((uint32_t)code - 224u);
}

return 2048u + 400u * ((uint32_t)code - 240u);
}

static constexpr GPUd() uint8_t encodeTailLength(uint32_t value)
{
// Saturate above representable range.
if (value >= decodeTailLength(255)) [[unlikely]] {
return 255;
}

// Binary search for the first code whose decoded value >= value.
uint8_t lo = 0;
uint8_t hi = 255;

while (lo < hi) {
uint8_t mid = lo + ((hi - lo) >> 1);
uint32_t decoded = decodeTailLength(mid);

if (decoded < value) {
lo = mid + 1;
} else {
hi = mid;
}
}

// lo is now the first code with decoded >= value.
if (lo == 0) [[unlikely]] {
return 0;
}

uint8_t above_code = lo;
uint8_t below_code = lo - 1;

uint32_t above_value = decodeTailLength(above_code);
uint32_t below_value = decodeTailLength(below_code);

uint32_t above_error = above_value - value;
uint32_t below_error = value - below_value;

// Tie-break downward.
if (below_error <= above_error) {
return below_code;
} else {
return above_code;
}
}
};

// This is an index struct to access TPC clusters inside sectors and rows. It shall not own the data, but just point to
Expand Down
6 changes: 4 additions & 2 deletions GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,11 @@ void GPUTPCClusterStatistics::RunStatistics(const o2::tpc::ClusterNativeAccess*
tmpClusters[k] = clustersNative->clusters[i][j][k];
if (param.rec.tpc.compressionTypeMask & GPUSettings::CompressionTruncate) {
GPUTPCCompression::truncateSignificantBitsChargeMax(tmpClusters[k].qMax, param);
GPUTPCCompression::truncateSignificantBitsCharge(tmpClusters[k].qTot, param);
GPUTPCCompression::truncateSignificantBitsWidth(tmpClusters[k].sigmaPadPacked, param);
GPUTPCCompression::truncateSignificantBitsWidth(tmpClusters[k].sigmaTimePacked, param);
if (!tmpClusters[k].isSaturated()) {
GPUTPCCompression::truncateSignificantBitsCharge(tmpClusters[k].qTot, param);
GPUTPCCompression::truncateSignificantBitsWidth(tmpClusters[k].sigmaTimePacked, param);
}
}
}
std::sort(tmpClusters.begin(), tmpClusters.end());
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/DataCompression/GPUTPCCompression.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class GPUTPCCompression : public GPUProcessor
#endif

static constexpr uint32_t P_MAX_QMAX = 1 << 10;
static constexpr uint32_t P_MAX_QTOT = 5 * 5 * P_MAX_QMAX;
static constexpr uint32_t P_MAX_QTOT = 1 << 16;
static constexpr uint32_t P_MAX_TIME = 1 << 24;
static constexpr uint32_t P_MAX_PAD = 1 << 16;
static constexpr uint32_t P_MAX_SIGMA = 1 << 8;
Expand Down
12 changes: 8 additions & 4 deletions GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,11 @@ GPUdii() void GPUTPCCompressionKernels::Thread<GPUTPCCompressionKernels::step0at
uint8_t sigmapad = orgCl.sigmaPadPacked, sigmatime = orgCl.sigmaTimePacked;
if (param.rec.tpc.compressionTypeMask & GPUSettings::CompressionTruncate) {
compressor.truncateSignificantBitsChargeMax(qmax, param);
compressor.truncateSignificantBitsCharge(qtot, param);
compressor.truncateSignificantBitsWidth(sigmapad, param);
compressor.truncateSignificantBitsWidth(sigmatime, param);
if (!orgCl.isSaturated()) {
compressor.truncateSignificantBitsCharge(qtot, param);
compressor.truncateSignificantBitsWidth(sigmatime, param);
}
}
c.qTotA[cidx] = qtot;
c.qMaxA[cidx] = qmax;
Expand Down Expand Up @@ -298,9 +300,11 @@ GPUdii() void GPUTPCCompressionKernels::Thread<GPUTPCCompressionKernels::step1un
uint8_t sigmapad = orgCl.sigmaPadPacked, sigmatime = orgCl.sigmaTimePacked;
if (param.rec.tpc.compressionTypeMask & GPUSettings::CompressionTruncate) {
compressor.truncateSignificantBitsChargeMax(qmax, param);
compressor.truncateSignificantBitsCharge(qtot, param);
compressor.truncateSignificantBitsWidth(sigmapad, param);
compressor.truncateSignificantBitsWidth(sigmatime, param);
if (!orgCl.isSaturated()) {
compressor.truncateSignificantBitsCharge(qtot, param);
compressor.truncateSignificantBitsWidth(sigmatime, param);
}
}
c.qTotU[outidx] = qtot;
c.qMaxU[outidx] = qmax;
Expand Down
14 changes: 7 additions & 7 deletions GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -529,11 +529,12 @@ GPUd() void GPUTPCCFHIPClusterizer::Thread<0>(int32_t nBlocks, int32_t nThreads,
const float firstWeight = tail->qTot;
const float firstPad = tail->pad;
const float firstTime = HIPTailTimeMean(*tail);
const float firstTimeVariance = HIPTailTimeVariance(*tail);
float padSum = firstWeight * firstPad;
float padSqSum = firstWeight * firstPad * firstPad;
float timeSum = firstWeight * firstTime;
float timeSqSum = firstWeight * (firstTime * firstTime + firstTimeVariance);

uint32_t tailStart = tail->tailStart;
uint32_t tailEnd = tail->tailEnd;

while (tail->iNext != 0) {

Expand All @@ -542,28 +543,27 @@ GPUd() void GPUTPCCFHIPClusterizer::Thread<0>(int32_t nBlocks, int32_t nThreads,
const float tailWeight = tail->qTot;
const float tailPad = tail->pad;
const float tailTime = HIPTailTimeMean(*tail);
const float tailTimeVariance = HIPTailTimeVariance(*tail);
qMax = CAMath::Max(qMax, tail->qMax);
qTot += tail->qTot;
padSum += tailWeight * tailPad;
padSqSum += tailWeight * tailPad * tailPad;
timeSum += tailWeight * tailTime;
timeSqSum += tailWeight * (tailTime * tailTime + tailTimeVariance);
tailStart = CAMath::Min<uint32_t>(tailStart, tail->tailStart);
tailEnd = CAMath::Max<uint32_t>(tailEnd, tail->tailEnd);
}

const float weightSum = CAMath::Max(qTot, 1.f);
float padMean = padSum / weightSum;
float timeMean = timeSum / weightSum; // TODO: Use timebin of saturated signal instead! Time mean is biased for long tails.
float padSigma = CAMath::Sqrt(CAMath::Max(0.f, padSqSum / weightSum - padMean * padMean));
float timeSigma = CAMath::Sqrt(CAMath::Max(0.f, timeSqSum / weightSum - timeMean * timeMean));

tpc::ClusterNative cn;
cn.qMax = qMax;
cn.qTot = (uint16_t)CAMath::Min(qTot, 65535.f);
cn.setSaturatedQtot(qTot);
cn.setSaturatedTailLength(tailEnd - tailStart);
float clusterTime = fragment.start + timeMean - clusterer.Param().rec.tpc.clustersShiftTimebinsClusterizer;
cn.setTimeFlags(clusterTime, 0);
cn.setPad(padMean);
cn.setSigmaTime(timeSigma);
cn.setSigmaPad(padSigma);

if (cn.qMax >= 1023) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,13 @@ void GPUTPCClusterFinder::DumpClusters(std::ostream& out)

out << "Row: " << i << ": " << N << "\n";
for (const auto& cl : sortedCluster) {
out << std::hex << cl.timeFlagsPacked << std::dec << " " << cl.padPacked << " " << int32_t{cl.sigmaTimePacked} << " " << int32_t{cl.sigmaPadPacked} << " " << cl.qMax << " " << cl.qTot << "\n";
uint32_t qTot = cl.qTot;
uint32_t sigmaTime = cl.sigmaTimePacked;
if (cl.isSaturated()) {
qTot = cl.getSaturatedQtot();
sigmaTime = cl.getSaturatedTailLength();
}
out << std::hex << cl.timeFlagsPacked << std::dec << " " << cl.padPacked << " " << sigmaTime << " " << int32_t{cl.sigmaPadPacked} << " " << cl.qMax << " " << qTot << "\n";
}
}
}