|
22 | 22 | #include <new> |
23 | 23 | #include <vector> |
24 | 24 |
|
| 25 | +#if !defined(__HIPCC__) && !defined(__CUDACC__) |
| 26 | +#include <format> |
| 27 | +#include <string> |
| 28 | +#include "Framework/Logger.h" |
| 29 | +#endif |
25 | 30 | #include "ITStracking/ExternalAllocator.h" |
26 | | - |
27 | | -#include "GPUCommonLogger.h" |
| 31 | +#include "ITStracking/Constants.h" |
28 | 32 |
|
29 | 33 | namespace o2::its |
30 | 34 | { |
31 | 35 |
|
| 36 | +// #define BOUNDED_MR_STATS |
32 | 37 | class BoundedMemoryResource final : public std::pmr::memory_resource |
33 | 38 | { |
34 | 39 | public: |
35 | 40 | class MemoryLimitExceeded final : public std::bad_alloc |
36 | 41 | { |
37 | 42 | public: |
38 | 43 | MemoryLimitExceeded(size_t attempted, size_t used, size_t max) |
39 | | - : mAttempted(attempted), mUsed(used), mMax(max) {} |
40 | | - const char* what() const noexcept final |
41 | 44 | { |
42 | | - static thread_local char msg[256]; |
43 | | - if (mAttempted != 0) { |
44 | | - snprintf(msg, sizeof(msg), |
45 | | - "Reached set memory limit (attempted: %zu, used: %zu, max: %zu)", |
46 | | - mAttempted, mUsed, mMax); |
| 45 | + char buf[256]; |
| 46 | + if (attempted != 0) { |
| 47 | + (void)snprintf(buf, sizeof(buf), "Reached set memory limit (attempted: %zu, used: %zu, max: %zu)", attempted, used, max); |
47 | 48 | } else { |
48 | | - snprintf(msg, sizeof(msg), |
49 | | - "New set maximum below current used (newMax: %zu, used: %zu)", |
50 | | - mMax, mUsed); |
| 49 | + (void)snprintf(buf, sizeof(buf), "New set maximum below current used (newMax: %zu, used: %zu)", max, used); |
51 | 50 | } |
52 | | - return msg; |
| 51 | + mMsg = buf; |
53 | 52 | } |
| 53 | + const char* what() const noexcept final { return mMsg.c_str(); } |
54 | 54 |
|
55 | 55 | private: |
56 | | - size_t mAttempted{0}, mUsed{0}, mMax{0}; |
| 56 | + std::string mMsg; |
57 | 57 | }; |
58 | 58 |
|
59 | | - BoundedMemoryResource(size_t maxBytes = std::numeric_limits<size_t>::max(), std::pmr::memory_resource* upstream = std::pmr::get_default_resource()) |
| 59 | + BoundedMemoryResource(size_t maxBytes = std::numeric_limits<size_t>::max(), |
| 60 | + std::pmr::memory_resource* upstream = std::pmr::get_default_resource()) |
60 | 61 | : mMaxMemory(maxBytes), mUpstream(upstream) {} |
61 | | - BoundedMemoryResource(ExternalAllocator* alloc) : mAdaptor(std::make_unique<ExternalAllocatorAdaptor>(alloc)), mUpstream(mAdaptor.get()) {} |
| 62 | + |
| 63 | + BoundedMemoryResource(ExternalAllocator* alloc, |
| 64 | + size_t maxBytes = std::numeric_limits<size_t>::max()) |
| 65 | + : mMaxMemory(maxBytes), |
| 66 | + mAdaptor(std::make_unique<ExternalAllocatorAdaptor>(alloc)), |
| 67 | + mUpstream(mAdaptor.get()) {} |
62 | 68 |
|
63 | 69 | void* do_allocate(size_t bytes, size_t alignment) final |
64 | 70 | { |
65 | | - size_t new_used{0}, current_used{mUsedMemory.load(std::memory_order_relaxed)}; |
| 71 | + size_t new_used{0}; |
| 72 | + size_t current_used{mUsedMemory.load(std::memory_order_relaxed)}; |
66 | 73 | do { |
67 | 74 | new_used = current_used + bytes; |
68 | | - if (new_used > mMaxMemory) { |
69 | | - ++mCountThrow; |
70 | | - throw MemoryLimitExceeded(new_used, current_used, mMaxMemory); |
| 75 | + if (new_used > mMaxMemory.load(std::memory_order_relaxed)) { |
| 76 | + mCountThrow.fetch_add(1, std::memory_order_relaxed); |
| 77 | + throw MemoryLimitExceeded(new_used, current_used, |
| 78 | + mMaxMemory.load(std::memory_order_relaxed)); |
71 | 79 | } |
72 | 80 | } while (!mUsedMemory.compare_exchange_weak(current_used, new_used, |
73 | 81 | std::memory_order_acq_rel, |
74 | 82 | std::memory_order_relaxed)); |
| 83 | + |
75 | 84 | void* p{nullptr}; |
76 | 85 | try { |
77 | 86 | p = mUpstream->allocate(bytes, alignment); |
78 | 87 | } catch (...) { |
79 | 88 | mUsedMemory.fetch_sub(bytes, std::memory_order_relaxed); |
| 89 | +#ifdef BOUNDED_MR_STATS |
| 90 | + mStats.upstreamFailures.fetch_add(1, std::memory_order_relaxed); |
| 91 | +#endif |
80 | 92 | throw; |
81 | 93 | } |
| 94 | + |
| 95 | +#ifdef BOUNDED_MR_STATS |
| 96 | + size_t peak = mStats.peak.load(std::memory_order_relaxed); |
| 97 | + while (new_used > peak && |
| 98 | + !mStats.peak.compare_exchange_weak(peak, new_used, |
| 99 | + std::memory_order_relaxed)) { |
| 100 | + } |
| 101 | + mStats.live.fetch_add(1, std::memory_order_relaxed); |
| 102 | + mStats.nAlloc.fetch_add(1, std::memory_order_relaxed); |
| 103 | + mStats.totalAlloc.fetch_add(bytes, std::memory_order_relaxed); |
| 104 | + |
| 105 | + size_t ma = mStats.maxAlign.load(std::memory_order_relaxed); |
| 106 | + while (alignment > ma && !mStats.maxAlign.compare_exchange_weak(ma, alignment, std::memory_order_relaxed)) { |
| 107 | + } |
| 108 | +#endif |
82 | 109 | return p; |
83 | 110 | } |
84 | 111 |
|
85 | 112 | void do_deallocate(void* p, size_t bytes, size_t alignment) final |
86 | 113 | { |
87 | 114 | mUpstream->deallocate(p, bytes, alignment); |
88 | 115 | mUsedMemory.fetch_sub(bytes, std::memory_order_relaxed); |
| 116 | +#ifdef BOUNDED_MR_STATS |
| 117 | + mStats.live.fetch_sub(1, std::memory_order_relaxed); |
| 118 | + mStats.nFree.fetch_add(1, std::memory_order_relaxed); |
| 119 | + mStats.totalFreed.fetch_add(bytes, std::memory_order_relaxed); |
| 120 | +#endif |
89 | 121 | } |
90 | 122 |
|
91 | 123 | bool do_is_equal(const std::pmr::memory_resource& other) const noexcept final |
92 | 124 | { |
93 | 125 | return this == &other; |
94 | 126 | } |
95 | 127 |
|
96 | | - size_t getUsedMemory() const noexcept { return mUsedMemory.load(); } |
97 | | - size_t getMaxMemory() const noexcept { return mMaxMemory; } |
| 128 | + [[nodiscard]] size_t getUsedMemory() const noexcept |
| 129 | + { |
| 130 | + return mUsedMemory.load(std::memory_order_relaxed); |
| 131 | + } |
| 132 | + [[nodiscard]] size_t getMaxMemory() const noexcept |
| 133 | + { |
| 134 | + return mMaxMemory.load(std::memory_order_relaxed); |
| 135 | + } |
| 136 | + [[nodiscard]] size_t getThrowCount() const noexcept |
| 137 | + { |
| 138 | + return mCountThrow.load(std::memory_order_relaxed); |
| 139 | + } |
| 140 | + |
98 | 141 | void setMaxMemory(size_t max) |
99 | 142 | { |
100 | | - if (max == mMaxMemory) { |
| 143 | + size_t current = mMaxMemory.load(std::memory_order_relaxed); |
| 144 | + if (max == current) { |
101 | 145 | return; |
102 | 146 | } |
103 | | - size_t used = mUsedMemory.load(std::memory_order_acquire); |
104 | | - if (used > max) { |
105 | | - ++mCountThrow; |
106 | | - throw MemoryLimitExceeded(0, used, max); |
| 147 | + for (;;) { |
| 148 | + size_t used = mUsedMemory.load(std::memory_order_acquire); |
| 149 | + if (used > max) { |
| 150 | + mCountThrow.fetch_add(1, std::memory_order_relaxed); |
| 151 | + throw MemoryLimitExceeded(0, used, max); |
| 152 | + } |
| 153 | + if (mMaxMemory.compare_exchange_weak(current, max, |
| 154 | + std::memory_order_release, |
| 155 | + std::memory_order_relaxed)) { |
| 156 | + return; |
| 157 | + } |
| 158 | + if (current == max) { |
| 159 | + return; |
| 160 | + } |
107 | 161 | } |
108 | | - mMaxMemory.store(max, std::memory_order_release); |
109 | 162 | } |
110 | 163 |
|
111 | | - void print() const |
| 164 | +#if !defined(__HIPCC__) && !defined(__CUDACC__) |
| 165 | + std::string asString() const |
112 | 166 | { |
113 | | -#if !defined(GPUCA_GPUCODE_DEVICE) |
114 | | - constexpr double GB{1024 * 1024 * 1024}; |
115 | | - auto throw_ = mCountThrow.load(std::memory_order_relaxed); |
116 | | - auto used = static_cast<double>(mUsedMemory.load(std::memory_order_relaxed)); |
117 | | - LOGP(info, "maxthrow={} maxmem={:.2f} GB used={:.2f} ({:.2f}%)", |
118 | | - throw_, (double)mMaxMemory / GB, used / GB, 100. * used / (double)mMaxMemory); |
| 167 | + const auto throw_ = mCountThrow.load(std::memory_order_relaxed); |
| 168 | + const auto used = static_cast<double>(mUsedMemory.load(std::memory_order_relaxed)); |
| 169 | + const auto maxm = mMaxMemory.load(std::memory_order_relaxed); |
| 170 | + std::string ret; |
| 171 | + if (maxm == std::numeric_limits<size_t>::max()) { |
| 172 | + ret += std::format("maxthrow={} maxmem=unbounded used={:.2f} GB", throw_, used / constants::GB); |
| 173 | + } else { |
| 174 | + ret += std::format("maxthrow={} maxmem={:.2f} GB used={:.2f} GB ({:.2f}%)", throw_, (double)maxm / constants::GB, used / constants::GB, 100.0 * used / (double)maxm); |
| 175 | + } |
| 176 | +#ifdef BOUNDED_MR_STATS |
| 177 | + ret += std::format(" peak={:.2f} GB live={} nAlloc={} nFree={} totalAlloc={:.2f} GB totalFreed={:.2f} GB maxAlign={} upstreamFail={}", |
| 178 | + (float)mStats.peak.load(std::memory_order_relaxed) / constants::GB, |
| 179 | + mStats.live.load(std::memory_order_relaxed), |
| 180 | + mStats.nAlloc.load(std::memory_order_relaxed), |
| 181 | + mStats.nFree.load(std::memory_order_relaxed), |
| 182 | + (float)mStats.totalAlloc.load(std::memory_order_relaxed) / constants::GB, |
| 183 | + (float)mStats.totalFreed.load(std::memory_order_relaxed) / constants::GB, |
| 184 | + mStats.maxAlign.load(std::memory_order_relaxed), |
| 185 | + mStats.upstreamFailures.load(std::memory_order_relaxed)); |
119 | 186 | #endif |
| 187 | + return ret; |
120 | 188 | } |
121 | 189 |
|
| 190 | + void print() const |
| 191 | + { |
| 192 | + LOGP(info, "{}", asString()); |
| 193 | + } |
| 194 | +#endif |
| 195 | + |
122 | 196 | private: |
123 | 197 | std::atomic<size_t> mMaxMemory{std::numeric_limits<size_t>::max()}; |
124 | 198 | std::atomic<size_t> mCountThrow{0}; |
125 | 199 | std::atomic<size_t> mUsedMemory{0}; |
126 | 200 | std::unique_ptr<ExternalAllocatorAdaptor> mAdaptor{nullptr}; |
127 | 201 | std::pmr::memory_resource* mUpstream{nullptr}; |
| 202 | + |
| 203 | +#ifdef BOUNDED_MR_STATS |
| 204 | + struct Stats { |
| 205 | + std::atomic<size_t> peak{0}; |
| 206 | + std::atomic<size_t> live{0}; |
| 207 | + std::atomic<size_t> nAlloc{0}; |
| 208 | + std::atomic<size_t> nFree{0}; |
| 209 | + std::atomic<size_t> totalAlloc{0}; |
| 210 | + std::atomic<size_t> totalFreed{0}; |
| 211 | + std::atomic<size_t> maxAlign{0}; |
| 212 | + std::atomic<size_t> upstreamFailures{0}; |
| 213 | + }; |
| 214 | + Stats mStats{}; |
| 215 | +#endif |
128 | 216 | }; |
129 | 217 |
|
130 | 218 | template <typename T> |
|
0 commit comments