Skip to content

Commit 08f8150

Browse files
committed
ITS: add memory stats
Signed-off-by: Felix Schlepper <felix.schlepper@cern.ch>
1 parent ded827e commit 08f8150

6 files changed

Lines changed: 160 additions & 74 deletions

File tree

Detectors/ITSMFT/ITS/tracking/include/ITStracking/BoundedAllocator.h

Lines changed: 123 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -22,109 +22,197 @@
2222
#include <new>
2323
#include <vector>
2424

25+
#if !defined(__HIPCC__) && !defined(__CUDACC__)
26+
#include <format>
27+
#include <string>
28+
#include "Framework/Logger.h"
29+
#endif
2530
#include "ITStracking/ExternalAllocator.h"
26-
27-
#include "GPUCommonLogger.h"
31+
#include "ITStracking/Constants.h"
2832

2933
namespace o2::its
3034
{
3135

36+
// #define BOUNDED_MR_STATS
3237
class BoundedMemoryResource final : public std::pmr::memory_resource
3338
{
3439
public:
3540
class MemoryLimitExceeded final : public std::bad_alloc
3641
{
3742
public:
3843
MemoryLimitExceeded(size_t attempted, size_t used, size_t max)
39-
: mAttempted(attempted), mUsed(used), mMax(max) {}
40-
const char* what() const noexcept final
4144
{
42-
static thread_local char msg[256];
43-
if (mAttempted != 0) {
44-
snprintf(msg, sizeof(msg),
45-
"Reached set memory limit (attempted: %zu, used: %zu, max: %zu)",
46-
mAttempted, mUsed, mMax);
45+
char buf[256];
46+
if (attempted != 0) {
47+
(void)snprintf(buf, sizeof(buf), "Reached set memory limit (attempted: %zu, used: %zu, max: %zu)", attempted, used, max);
4748
} else {
48-
snprintf(msg, sizeof(msg),
49-
"New set maximum below current used (newMax: %zu, used: %zu)",
50-
mMax, mUsed);
49+
(void)snprintf(buf, sizeof(buf), "New set maximum below current used (newMax: %zu, used: %zu)", max, used);
5150
}
52-
return msg;
51+
mMsg = buf;
5352
}
53+
const char* what() const noexcept final { return mMsg.c_str(); }
5454

5555
private:
56-
size_t mAttempted{0}, mUsed{0}, mMax{0};
56+
std::string mMsg;
5757
};
5858

59-
BoundedMemoryResource(size_t maxBytes = std::numeric_limits<size_t>::max(), std::pmr::memory_resource* upstream = std::pmr::get_default_resource())
59+
BoundedMemoryResource(size_t maxBytes = std::numeric_limits<size_t>::max(),
60+
std::pmr::memory_resource* upstream = std::pmr::get_default_resource())
6061
: mMaxMemory(maxBytes), mUpstream(upstream) {}
61-
BoundedMemoryResource(ExternalAllocator* alloc) : mAdaptor(std::make_unique<ExternalAllocatorAdaptor>(alloc)), mUpstream(mAdaptor.get()) {}
62+
63+
BoundedMemoryResource(ExternalAllocator* alloc,
64+
size_t maxBytes = std::numeric_limits<size_t>::max())
65+
: mMaxMemory(maxBytes),
66+
mAdaptor(std::make_unique<ExternalAllocatorAdaptor>(alloc)),
67+
mUpstream(mAdaptor.get()) {}
6268

6369
void* do_allocate(size_t bytes, size_t alignment) final
6470
{
65-
size_t new_used{0}, current_used{mUsedMemory.load(std::memory_order_relaxed)};
71+
size_t new_used{0};
72+
size_t current_used{mUsedMemory.load(std::memory_order_relaxed)};
6673
do {
6774
new_used = current_used + bytes;
68-
if (new_used > mMaxMemory) {
69-
++mCountThrow;
70-
throw MemoryLimitExceeded(new_used, current_used, mMaxMemory);
75+
if (new_used > mMaxMemory.load(std::memory_order_relaxed)) {
76+
mCountThrow.fetch_add(1, std::memory_order_relaxed);
77+
throw MemoryLimitExceeded(new_used, current_used,
78+
mMaxMemory.load(std::memory_order_relaxed));
7179
}
7280
} while (!mUsedMemory.compare_exchange_weak(current_used, new_used,
7381
std::memory_order_acq_rel,
7482
std::memory_order_relaxed));
83+
7584
void* p{nullptr};
7685
try {
7786
p = mUpstream->allocate(bytes, alignment);
7887
} catch (...) {
7988
mUsedMemory.fetch_sub(bytes, std::memory_order_relaxed);
89+
#ifdef BOUNDED_MR_STATS
90+
mStats.upstreamFailures.fetch_add(1, std::memory_order_relaxed);
91+
#endif
8092
throw;
8193
}
94+
95+
#ifdef BOUNDED_MR_STATS
96+
size_t peak = mStats.peak.load(std::memory_order_relaxed);
97+
while (new_used > peak &&
98+
!mStats.peak.compare_exchange_weak(peak, new_used,
99+
std::memory_order_relaxed)) {
100+
}
101+
mStats.live.fetch_add(1, std::memory_order_relaxed);
102+
mStats.nAlloc.fetch_add(1, std::memory_order_relaxed);
103+
mStats.totalAlloc.fetch_add(bytes, std::memory_order_relaxed);
104+
105+
size_t ma = mStats.maxAlign.load(std::memory_order_relaxed);
106+
while (alignment > ma && !mStats.maxAlign.compare_exchange_weak(ma, alignment, std::memory_order_relaxed)) {
107+
}
108+
#endif
82109
return p;
83110
}
84111

85112
void do_deallocate(void* p, size_t bytes, size_t alignment) final
86113
{
87114
mUpstream->deallocate(p, bytes, alignment);
88115
mUsedMemory.fetch_sub(bytes, std::memory_order_relaxed);
116+
#ifdef BOUNDED_MR_STATS
117+
mStats.live.fetch_sub(1, std::memory_order_relaxed);
118+
mStats.nFree.fetch_add(1, std::memory_order_relaxed);
119+
mStats.totalFreed.fetch_add(bytes, std::memory_order_relaxed);
120+
#endif
89121
}
90122

91123
bool do_is_equal(const std::pmr::memory_resource& other) const noexcept final
92124
{
93125
return this == &other;
94126
}
95127

96-
size_t getUsedMemory() const noexcept { return mUsedMemory.load(); }
97-
size_t getMaxMemory() const noexcept { return mMaxMemory; }
128+
[[nodiscard]] size_t getUsedMemory() const noexcept
129+
{
130+
return mUsedMemory.load(std::memory_order_relaxed);
131+
}
132+
[[nodiscard]] size_t getMaxMemory() const noexcept
133+
{
134+
return mMaxMemory.load(std::memory_order_relaxed);
135+
}
136+
[[nodiscard]] size_t getThrowCount() const noexcept
137+
{
138+
return mCountThrow.load(std::memory_order_relaxed);
139+
}
140+
98141
void setMaxMemory(size_t max)
99142
{
100-
if (max == mMaxMemory) {
143+
size_t current = mMaxMemory.load(std::memory_order_relaxed);
144+
if (max == current) {
101145
return;
102146
}
103-
size_t used = mUsedMemory.load(std::memory_order_acquire);
104-
if (used > max) {
105-
++mCountThrow;
106-
throw MemoryLimitExceeded(0, used, max);
147+
for (;;) {
148+
size_t used = mUsedMemory.load(std::memory_order_acquire);
149+
if (used > max) {
150+
mCountThrow.fetch_add(1, std::memory_order_relaxed);
151+
throw MemoryLimitExceeded(0, used, max);
152+
}
153+
if (mMaxMemory.compare_exchange_weak(current, max,
154+
std::memory_order_release,
155+
std::memory_order_relaxed)) {
156+
return;
157+
}
158+
if (current == max) {
159+
return;
160+
}
107161
}
108-
mMaxMemory.store(max, std::memory_order_release);
109162
}
110163

111-
void print() const
164+
#if !defined(__HIPCC__) && !defined(__CUDACC__)
165+
std::string asString() const
112166
{
113-
#if !defined(GPUCA_GPUCODE_DEVICE)
114-
constexpr double GB{1024 * 1024 * 1024};
115-
auto throw_ = mCountThrow.load(std::memory_order_relaxed);
116-
auto used = static_cast<double>(mUsedMemory.load(std::memory_order_relaxed));
117-
LOGP(info, "maxthrow={} maxmem={:.2f} GB used={:.2f} ({:.2f}%)",
118-
throw_, (double)mMaxMemory / GB, used / GB, 100. * used / (double)mMaxMemory);
167+
const auto throw_ = mCountThrow.load(std::memory_order_relaxed);
168+
const auto used = static_cast<double>(mUsedMemory.load(std::memory_order_relaxed));
169+
const auto maxm = mMaxMemory.load(std::memory_order_relaxed);
170+
std::string ret;
171+
if (maxm == std::numeric_limits<size_t>::max()) {
172+
ret += std::format("maxthrow={} maxmem=unbounded used={:.2f} GB", throw_, used / constants::GB);
173+
} else {
174+
ret += std::format("maxthrow={} maxmem={:.2f} GB used={:.2f} GB ({:.2f}%)", throw_, (double)maxm / constants::GB, used / constants::GB, 100.0 * used / (double)maxm);
175+
}
176+
#ifdef BOUNDED_MR_STATS
177+
ret += std::format(" peak={:.2f} GB live={} nAlloc={} nFree={} totalAlloc={:.2f} GB totalFreed={:.2f} GB maxAlign={} upstreamFail={}",
178+
(float)mStats.peak.load(std::memory_order_relaxed) / constants::GB,
179+
mStats.live.load(std::memory_order_relaxed),
180+
mStats.nAlloc.load(std::memory_order_relaxed),
181+
mStats.nFree.load(std::memory_order_relaxed),
182+
(float)mStats.totalAlloc.load(std::memory_order_relaxed) / constants::GB,
183+
(float)mStats.totalFreed.load(std::memory_order_relaxed) / constants::GB,
184+
mStats.maxAlign.load(std::memory_order_relaxed),
185+
mStats.upstreamFailures.load(std::memory_order_relaxed));
119186
#endif
187+
return ret;
120188
}
121189

190+
void print() const
191+
{
192+
LOGP(info, "{}", asString());
193+
}
194+
#endif
195+
122196
private:
123197
std::atomic<size_t> mMaxMemory{std::numeric_limits<size_t>::max()};
124198
std::atomic<size_t> mCountThrow{0};
125199
std::atomic<size_t> mUsedMemory{0};
126200
std::unique_ptr<ExternalAllocatorAdaptor> mAdaptor{nullptr};
127201
std::pmr::memory_resource* mUpstream{nullptr};
202+
203+
#ifdef BOUNDED_MR_STATS
204+
struct Stats {
205+
std::atomic<size_t> peak{0};
206+
std::atomic<size_t> live{0};
207+
std::atomic<size_t> nAlloc{0};
208+
std::atomic<size_t> nFree{0};
209+
std::atomic<size_t> totalAlloc{0};
210+
std::atomic<size_t> totalFreed{0};
211+
std::atomic<size_t> maxAlign{0};
212+
std::atomic<size_t> upstreamFailures{0};
213+
};
214+
Stats mStats{};
215+
#endif
128216
};
129217

130218
template <typename T>

Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818

1919
#include <array>
2020
#include <chrono>
21-
#include <cmath>
2221
#include <fstream>
2322
#include <iomanip>
2423
#include <iosfwd>
@@ -54,7 +53,7 @@ class Tracker
5453

5554
void adoptTimeFrame(TimeFrame<NLayers>& tf);
5655

57-
void clustersToTracks(
56+
float clustersToTracks(
5857
const LogFunc& = [](const std::string& s) { std::cout << s << '\n'; },
5958
const LogFunc& = [](const std::string& s) { std::cerr << s << '\n'; });
6059

@@ -78,7 +77,7 @@ class Tracker
7877
void sortTracks();
7978

8079
template <typename... T, typename... F>
81-
float evaluateTask(void (Tracker::*task)(T...), std::string_view taskName, int iteration, LogFunc logger, F&&... args);
80+
float evaluateTask(void (Tracker::*task)(T...), std::string_view taskName, int iteration, const LogFunc& logger, F&&... args);
8281

8382
TrackerTraits<NLayers>* mTraits = nullptr; /// Observer pointer, not owned by this class
8483
TimeFrame<NLayers>* mTimeFrame = nullptr; /// Observer pointer, not owned by this class
@@ -106,7 +105,7 @@ class Tracker
106105

107106
template <int NLayers>
108107
template <typename... T, typename... F>
109-
float Tracker<NLayers>::evaluateTask(void (Tracker<NLayers>::*task)(T...), std::string_view taskName, int iteration, LogFunc logger, F&&... args)
108+
float Tracker<NLayers>::evaluateTask(void (Tracker<NLayers>::*task)(T...), std::string_view taskName, int iteration, const LogFunc& logger, F&&... args)
110109
{
111110
float diff{0.f};
112111

@@ -140,6 +139,10 @@ float Tracker<NLayers>::evaluateTask(void (Tracker<NLayers>::*task)(T...), std::
140139
(this->*task)(std::forward<F>(args)...);
141140
}
142141

142+
if (mTrkParams[iteration].PrintMemory) {
143+
LOGP(info, "iter:{}:{}: {}", iteration, StateNames[mCurState], mMemoryPool->asString());
144+
}
145+
143146
return diff;
144147
}
145148

Detectors/ITSMFT/ITS/tracking/include/ITStracking/Vertexer.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,10 @@ float Vertexer<NLayers>::evaluateTask(void (Vertexer<NLayers>::*task)(T...), std
152152
(this->*task)(std::forward<T>(args)...);
153153
}
154154

155+
if (mVertParams[iteration].PrintMemory) {
156+
LOGP(info, "iter:{}:{}: {}", iteration, StateNames[mCurState], mMemoryPool->asString());
157+
}
158+
155159
return diff;
156160
}
157161

Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx

Lines changed: 9 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,11 @@ Tracker<NLayers>::Tracker(TrackerTraits<NLayers>* traits) : mTraits(traits)
3434
}
3535

3636
template <int NLayers>
37-
void Tracker<NLayers>::clustersToTracks(const LogFunc& logger, const LogFunc& error)
37+
float Tracker<NLayers>::clustersToTracks(const LogFunc& logger, const LogFunc& error)
3838
{
3939
LogFunc evalLog = [](const std::string&) {};
4040

41-
double total{0};
41+
float total{0};
4242
mTraits->updateTrackingParameters(mTrkParams);
4343

4444
int maxNvertices{-1};
@@ -71,12 +71,13 @@ void Tracker<NLayers>::clustersToTracks(const LogFunc& logger, const LogFunc& er
7171
if (iteration == 3 && mTrkParams[0].DoUPCIteration) {
7272
mTimeFrame->useUPCMask();
7373
}
74-
float timeTracklets{0.}, timeCells{0.}, timeNeighbours{0.}, timeRoads{0.};
74+
float timeFrame{0.}, timeTracklets{0.}, timeCells{0.}, timeNeighbours{0.}, timeRoads{0.};
7575
size_t nTracklets{0}, nCells{0}, nNeighbours{0};
7676
int nTracks{-static_cast<int>(mTimeFrame->getNumberOfTracks())};
7777
iVertex = std::min(maxNvertices, 0);
7878
logger(std::format("==== ITS {} Tracking iteration {} summary ====", mTraits->getName(), iteration));
79-
total += evaluateTask(&Tracker::initialiseTimeFrame, StateNames[mCurState = TFInit], iteration, logger, iteration);
79+
total += timeFrame = evaluateTask(&Tracker::initialiseTimeFrame, StateNames[mCurState = TFInit], iteration, evalLog, iteration);
80+
logger(std::format(" - TimeFrame initialisation completed in {:.2f} ms", timeFrame));
8081
do {
8182
timeTracklets += evaluateTask(&Tracker::computeTracklets, StateNames[mCurState = Trackleting], iteration, evalLog, iteration, iVertex);
8283
nTracklets += mTraits->getTFNumberOfTracklets();
@@ -91,24 +92,18 @@ void Tracker<NLayers>::clustersToTracks(const LogFunc& logger, const LogFunc& er
9192
logger(std::format(" - Neighbours finding: {} neighbours found in {:.2f} ms", nNeighbours, timeNeighbours));
9293
logger(std::format(" - Track finding: {} tracks found in {:.2f} ms", nTracks + mTimeFrame->getNumberOfTracks(), timeRoads));
9394
total += timeTracklets + timeCells + timeNeighbours + timeRoads;
94-
if (mTrkParams[iteration].PrintMemory) {
95-
mMemoryPool->print();
96-
}
97-
}
98-
if constexpr (constants::DoTimeBenchmarks) {
99-
logger(std::format("=== TimeSlice {} processing completed in: {:.2f} ms using {} thread(s) ===", mTimeSlice, total, mTraits->getNThreads()));
10095
}
10196
} catch (const BoundedMemoryResource::MemoryLimitExceeded& err) {
10297
handleException(err);
103-
return;
98+
return -1.f;
10499
} catch (const std::bad_alloc& err) {
105100
handleException(err);
106-
return;
101+
return -1.f;
107102
} catch (const std::exception& err) {
108103
error(std::format("Uncaught exception, all bets are off... {}", err.what()));
109104
// clear tracks explicitly since if not fatalising on exception this may contain partial output
110105
mTimeFrame->getTracks().clear();
111-
return;
106+
return -1.f;
112107
}
113108

114109
if (mTimeFrame->hasMCinformation()) {
@@ -120,10 +115,7 @@ void Tracker<NLayers>::clustersToTracks(const LogFunc& logger, const LogFunc& er
120115
++mTimeFrameCounter;
121116
mTotalTime += total;
122117

123-
if (mTrkParams[0].PrintMemory) {
124-
mTimeFrame->printArtefactsMemory();
125-
mMemoryPool->print();
126-
}
118+
return total;
127119
}
128120

129121
template <int NLayers>

0 commit comments

Comments
 (0)