From 2e93e3e3e7f4c7193fbbcd97a5ebbb0160547b26 Mon Sep 17 00:00:00 2001 From: Sertonix Date: Sat, 14 Mar 2026 20:48:06 +0100 Subject: [PATCH] Add and use readLE/writeLE helpers According to godbolt.org these functions optimize to a simple *(T *)ptr in many cases while ensuring that memory alignment requirements and endianess does not effect the behavior. The unroll pragma is needed for GCC to properly optimize the code. The approach of using bit shifts is also used in multiple other parts of binaryen (eg. WasmBinaryReader::getInt16) but usage of the helper function doesn't seem to be that easy there. Ref https://github.com/WebAssembly/binaryen/issues/2983 --- src/literal.h | 4 ++- src/shell-interface.h | 26 ++-------------- src/support/bits.h | 52 ++++++++++++++++++++++++++++++++ src/tools/wasm-ctor-eval.cpp | 9 ++---- src/tools/wasm-fuzz-lattices.cpp | 5 +-- src/tools/wasm-fuzz-types.cpp | 3 +- src/wasm-interpreter.h | 10 +++--- src/wasm/literal.cpp | 23 +++++--------- 8 files changed, 77 insertions(+), 55 deletions(-) diff --git a/src/literal.h b/src/literal.h index 9eb27177d75..2387ef733b4 100644 --- a/src/literal.h +++ b/src/literal.h @@ -20,6 +20,7 @@ #include #include +#include "support/bits.h" #include "support/hash.h" #include "support/name.h" #include "support/small_vector.h" @@ -823,7 +824,8 @@ template<> struct hash { return digest; case wasm::Type::v128: uint64_t chunks[2]; - memcpy(&chunks, a.getv128Ptr(), 16); + chunks[0] = wasm::Bits::readLE(a.getv128Ptr()); + chunks[1] = wasm::Bits::readLE(&a.getv128Ptr()[8]); wasm::rehash(digest, chunks[0]); wasm::rehash(digest, chunks[1]); return digest; diff --git a/src/shell-interface.h b/src/shell-interface.h index 9a16499f4e3..6a8763c9c3f 100644 --- a/src/shell-interface.h +++ b/src/shell-interface.h @@ -25,6 +25,7 @@ #include "interpreter/exception.h" #include "ir/module-utils.h" #include "shared-constants.h" +#include "support/bits.h" #include "support/name.h" #include "support/utilities.h" #include "wasm-interpreter.h" @@ -33,20 +34,9 @@ namespace wasm { struct ShellExternalInterface : ModuleRunner::ExternalInterface { - // The underlying memory can be accessed through unaligned pointers which - // isn't well-behaved in C++. WebAssembly nonetheless expects it to behave - // properly. Avoid emitting unaligned load/store by checking for alignment - // explicitly, and performing memcpy if unaligned. - // - // The allocated memory tries to have the same alignment as the memory being - // simulated. class Memory { // Use char because it doesn't run afoul of aliasing rules. std::vector memory; - template static bool aligned(const char* address) { - static_assert(!(sizeof(T) & (sizeof(T) - 1)), "must be a power of 2"); - return 0 == (reinterpret_cast(address) & (sizeof(T) - 1)); - } public: Memory() = default; @@ -65,20 +55,10 @@ struct ShellExternalInterface : ModuleRunner::ExternalInterface { } } template void set(size_t address, T value) { - if (aligned(&memory[address])) { - *reinterpret_cast(&memory[address]) = value; - } else { - std::memcpy(&memory[address], &value, sizeof(T)); - } + Bits::writeLE(value, &memory[address]); } template T get(size_t address) { - if (aligned(&memory[address])) { - return *reinterpret_cast(&memory[address]); - } else { - T loaded; - std::memcpy(&loaded, &memory[address], sizeof(T)); - return loaded; - } + return Bits::readLE(&memory[address]); } }; diff --git a/src/support/bits.h b/src/support/bits.h index 9c68a7a15e3..51cee600a31 100644 --- a/src/support/bits.h +++ b/src/support/bits.h @@ -17,8 +17,10 @@ #ifndef wasm_support_bits_h #define wasm_support_bits_h +#include #include #include +#include #include /* @@ -94,6 +96,56 @@ template inline static T rotateRight(T val, U count) { uint32_t log2(uint32_t v); uint32_t pow2(uint32_t v); +template< + typename T, + typename std::enable_if< + std::is_same::value>>:: + value, + bool>::type = true> +void writeLE(T val, void* ptr) { + memcpy(ptr, val.data(), sizeof(T)); +} + +template::value, bool>::type = true> +void writeLE(T val, void* ptr) { + auto v = typename std::conditional::value, + typename std::make_unsigned::type, + T>::type(val); + unsigned char* buf = reinterpret_cast(ptr); +#pragma GCC unroll 10 + for (size_t i = 0; i < sizeof(T); ++i) { + buf[i] = v >> (CHAR_BIT * i); + } +} + +template< + typename T, + typename std::enable_if< + std::is_same::value>>:: + value, + bool>::type = true> +T readLE(const void* ptr) { + T v; + memcpy(v.data(), ptr, sizeof(T)); + return v; +} + +template::value, bool>::type = true> +T readLE(const void* ptr) { + using TU = typename std::conditional::value, + typename std::make_unsigned::type, + T>::type; + TU v = 0; + const unsigned char* buf = reinterpret_cast(ptr); +#pragma GCC unroll 10 + for (size_t i = 0; i < sizeof(T); ++i) { + v += (TU)buf[i] << (CHAR_BIT * i); + } + return v; +} + } // namespace wasm::Bits #endif // wasm_support_bits_h diff --git a/src/tools/wasm-ctor-eval.cpp b/src/tools/wasm-ctor-eval.cpp index 9b2800a2d84..673925efb5f 100644 --- a/src/tools/wasm-ctor-eval.cpp +++ b/src/tools/wasm-ctor-eval.cpp @@ -32,6 +32,7 @@ #include "ir/memory-utils.h" #include "ir/names.h" #include "pass.h" +#include "support/bits.h" #include "support/colors.h" #include "support/file.h" #include "support/insert_ordered.h" @@ -497,15 +498,11 @@ struct CtorEvalExternalInterface : EvallingModuleRunner::ExternalInterface { } template void doStore(Address address, T value, Name memoryName) { - // Use memcpy to avoid UB if unaligned. - memcpy(getMemory(address, memoryName, sizeof(T)), &value, sizeof(T)); + Bits::writeLE(value, getMemory(address, memoryName, sizeof(T))); } template T doLoad(Address address, Name memoryName) { - // Use memcpy to avoid UB if unaligned. - T ret; - memcpy(&ret, getMemory(address, memoryName, sizeof(T)), sizeof(T)); - return ret; + return Bits::readLE(getMemory(address, memoryName, sizeof(T))); } // Clear the state of the operation of applying the interpreter's runtime diff --git a/src/tools/wasm-fuzz-lattices.cpp b/src/tools/wasm-fuzz-lattices.cpp index a6231cd42ea..4d36761c69e 100644 --- a/src/tools/wasm-fuzz-lattices.cpp +++ b/src/tools/wasm-fuzz-lattices.cpp @@ -36,6 +36,7 @@ #include "analysis/reaching-definitions-transfer-function.h" #include "analysis/transfer-function.h" +#include "support/bits.h" #include "support/command-line.h" #include "tools/fuzzing.h" #include "tools/fuzzing/random.h" @@ -995,7 +996,7 @@ struct Fuzzer { // Fewer bytes are needed to generate three random lattices. std::vector funcBytes(128); for (size_t i = 0; i < funcBytes.size(); i += sizeof(uint64_t)) { - *(uint64_t*)(funcBytes.data() + i) = getFuncRand(); + Bits::writeLE(getFuncRand(), funcBytes.data() + i); } Random rand(std::move(funcBytes)); @@ -1030,7 +1031,7 @@ struct Fuzzer { // 4kb of random bytes should be enough for anyone! std::vector bytes(4096); for (size_t i = 0; i < bytes.size(); i += sizeof(uint64_t)) { - *(uint64_t*)(bytes.data() + i) = getRand(); + Bits::writeLE(getRand(), bytes.data() + i); } Module testModule; diff --git a/src/tools/wasm-fuzz-types.cpp b/src/tools/wasm-fuzz-types.cpp index dc04ae96733..6bb3a0c44d4 100644 --- a/src/tools/wasm-fuzz-types.cpp +++ b/src/tools/wasm-fuzz-types.cpp @@ -20,6 +20,7 @@ #include #include +#include "support/bits.h" #include "support/command-line.h" #include "tools/fuzzing/heap-types.h" #include "tools/fuzzing/random.h" @@ -68,7 +69,7 @@ void Fuzzer::run(uint64_t seed) { // 4kb of random bytes should be enough for anyone! std::vector bytes(4096); for (size_t i = 0; i < bytes.size(); i += sizeof(uint64_t)) { - *(uint64_t*)(bytes.data() + i) = getRand(); + Bits::writeLE(getRand(), bytes.data() + i); } rand = Random(std::move(bytes)); diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h index 369745d0b67..b6544ac4224 100644 --- a/src/wasm-interpreter.h +++ b/src/wasm-interpreter.h @@ -2777,14 +2777,12 @@ class ExpressionRunner : public OverriddenVisitor { case Field::NotPacked: return Literal::makeFromMemory(p, field.type); case Field::i8: { - int8_t i; - memcpy(&i, p, sizeof(i)); - return truncateForPacking(Literal(int32_t(i)), field); + return truncateForPacking(Literal(int32_t(Bits::readLE(p))), + field); } case Field::i16: { - int16_t i; - memcpy(&i, p, sizeof(i)); - return truncateForPacking(Literal(int32_t(i)), field); + return truncateForPacking(Literal(int32_t(Bits::readLE(p))), + field); } case Field::WaitQueue: { WASM_UNREACHABLE("waitqueue not implemented"); diff --git a/src/wasm/literal.cpp b/src/wasm/literal.cpp index 1bf14432c90..22c539e682f 100644 --- a/src/wasm/literal.cpp +++ b/src/wasm/literal.cpp @@ -240,8 +240,7 @@ static void extractBytes(uint8_t (&dest)[16], const LaneArray& lanes) { for (size_t lane_index = 0; lane_index < Lanes; ++lane_index) { uint8_t bits[16]; lanes[lane_index].getBits(bits); - LaneT lane; - memcpy(&lane, bits, sizeof(lane)); + LaneT lane = Bits::readLE(bits); for (size_t offset = 0; offset < lane_width; ++offset) { bytes.at(lane_index * lane_width + offset) = uint8_t(lane >> (8 * offset)); @@ -316,24 +315,16 @@ Literal Literal::makeFromMemory(void* p, Type type) { assert(type.isNumber()); switch (type.getBasic()) { case Type::i32: { - int32_t i; - memcpy(&i, p, sizeof(i)); - return Literal(i); + return Literal(Bits::readLE(p)); } case Type::i64: { - int64_t i; - memcpy(&i, p, sizeof(i)); - return Literal(i); + return Literal(Bits::readLE(p)); } case Type::f32: { - int32_t i; - memcpy(&i, p, sizeof(i)); - return Literal(bit_cast(i)); + return Literal(bit_cast(Bits::readLE(p))); } case Type::f64: { - int64_t i; - memcpy(&i, p, sizeof(i)); - return Literal(bit_cast(i)); + return Literal(bit_cast(Bits::readLE(p))); } case Type::v128: { uint8_t bytes[16]; @@ -460,11 +451,11 @@ void Literal::getBits(uint8_t (&buf)[16]) const { switch (type.getBasic()) { case Type::i32: case Type::f32: - memcpy(buf, &i32, sizeof(i32)); + Bits::writeLE(i32, buf); break; case Type::i64: case Type::f64: - memcpy(buf, &i64, sizeof(i64)); + Bits::writeLE(i64, buf); break; case Type::v128: memcpy(buf, &v128, sizeof(v128));