From 283f912f0b0253e5d20c690c6f868ddbdd576ba3 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Wed, 25 Feb 2026 12:59:52 -0800 Subject: [PATCH 1/2] Fix atomic ordering EffectAnalyzer previously considered atomic operations to be ordered only with operations that read or write Wasm memories. But they should be ordered with all operations that read or write the machine's memory, which includes things like globals, tables, and the GC heap. Fix the problem. One test is updated now that memory.grow (which is atomic) is ordered with global accesses. --- src/ir/effects.h | 11 +++-- test/lit/passes/O4_disable-bulk-memory.wast | 51 +++++++++++---------- 2 files changed, 33 insertions(+), 29 deletions(-) diff --git a/src/ir/effects.h b/src/ir/effects.h index 1a7f4af616b..dfcdf0b5fd8 100644 --- a/src/ir/effects.h +++ b/src/ir/effects.h @@ -206,6 +206,9 @@ class EffectAnalyzer { return mutableGlobalsRead.size() || readsMemory || readsTable || readsMutableStruct || readsArray || isAtomic || calls; } + bool accessesGlobalState() const { + return readsMutableGlobalState() || writesGlobalState(); + } bool hasNonTrapSideEffects() const { return localsWritten.size() > 0 || danglingPop || writesGlobalState() || @@ -292,10 +295,10 @@ class EffectAnalyzer { (danglingPop || other.danglingPop)) { return true; } - // All atomics are sequentially consistent for now, and ordered wrt other - // memory references. - if ((isAtomic && other.accessesMemory()) || - (other.isAtomic && accessesMemory())) { + // We model all atomics as sequentially consistent for now. They are ordered + // wrt other loads and stores from anything, not just memories. + if ((isAtomic && other.accessesGlobalState()) || + (other.isAtomic && accessesGlobalState())) { return true; } for (auto local : localsWritten) { diff --git a/test/lit/passes/O4_disable-bulk-memory.wast b/test/lit/passes/O4_disable-bulk-memory.wast index d75258a9640..89296fea292 100644 --- a/test/lit/passes/O4_disable-bulk-memory.wast +++ b/test/lit/passes/O4_disable-bulk-memory.wast @@ -239,31 +239,32 @@ ;; CHECK-NEXT: (unreachable) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (if - ;; CHECK-NEXT: (i32.gt_u - ;; CHECK-NEXT: (local.tee $2 - ;; CHECK-NEXT: (i32.and - ;; CHECK-NEXT: (i32.add - ;; CHECK-NEXT: (i32.add - ;; CHECK-NEXT: (select - ;; CHECK-NEXT: (i32.const 1) - ;; CHECK-NEXT: (local.get $0) - ;; CHECK-NEXT: (i32.le_u - ;; CHECK-NEXT: (local.get $0) - ;; CHECK-NEXT: (i32.const 1) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (local.tee $0 - ;; CHECK-NEXT: (global.get $global$1) - ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $0 + ;; CHECK-NEXT: (i32.and + ;; CHECK-NEXT: (i32.add + ;; CHECK-NEXT: (i32.add + ;; CHECK-NEXT: (local.tee $1 + ;; CHECK-NEXT: (global.get $global$1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: (i32.le_u + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: (i32.const 1) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (i32.const 7) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (i32.const -8) ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 7) ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const -8) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.gt_u + ;; CHECK-NEXT: (local.get $0) ;; CHECK-NEXT: (i32.shl - ;; CHECK-NEXT: (local.tee $1 + ;; CHECK-NEXT: (local.tee $2 ;; CHECK-NEXT: (memory.size) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (i32.const 16) @@ -274,14 +275,14 @@ ;; CHECK-NEXT: (i32.lt_s ;; CHECK-NEXT: (memory.grow ;; CHECK-NEXT: (select - ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: (local.get $2) ;; CHECK-NEXT: (local.tee $3 ;; CHECK-NEXT: (i32.shr_u ;; CHECK-NEXT: (i32.and ;; CHECK-NEXT: (i32.add ;; CHECK-NEXT: (i32.sub - ;; CHECK-NEXT: (local.get $2) ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: (local.get $1) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (i32.const 65535) ;; CHECK-NEXT: ) @@ -291,7 +292,7 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (i32.gt_s - ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: (local.get $2) ;; CHECK-NEXT: (local.get $3) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) @@ -315,9 +316,9 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (global.set $global$1 - ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: (local.get $0) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: (local.get $1) ;; CHECK-NEXT: ) (func $~lib/allocator/arena/__memory_allocate (; 6 ;) (type $3) (param $0 i32) (result i32) (local $1 i32) From 557b7869a3e5886aa230036b7e3d7e9c56e16ec3 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Wed, 25 Feb 2026 17:50:57 -0800 Subject: [PATCH 2/2] updates --- src/ir/effects.h | 43 ++++---- src/wasm.h | 8 ++ test/lit/passes/O4_disable-bulk-memory.wast | 51 +++++---- .../simplify-locals-atomic-effects.wast | 101 ++++++++++++++++++ 4 files changed, 154 insertions(+), 49 deletions(-) create mode 100644 test/lit/passes/simplify-locals-atomic-effects.wast diff --git a/src/ir/effects.h b/src/ir/effects.h index dfcdf0b5fd8..708c04262f7 100644 --- a/src/ir/effects.h +++ b/src/ir/effects.h @@ -601,24 +601,26 @@ class EffectAnalyzer { } void visitLoad(Load* curr) { parent.readsMemory = true; - parent.isAtomic |= curr->isAtomic(); + parent.isAtomic |= + curr->isAtomic() && parent.module.getMemory(curr->memory)->shared; parent.implicitTrap = true; } void visitStore(Store* curr) { parent.writesMemory = true; - parent.isAtomic |= curr->isAtomic(); + parent.isAtomic |= + curr->isAtomic() && parent.module.getMemory(curr->memory)->shared; parent.implicitTrap = true; } void visitAtomicRMW(AtomicRMW* curr) { parent.readsMemory = true; parent.writesMemory = true; - parent.isAtomic = true; + parent.isAtomic = parent.module.getMemory(curr->memory)->shared; parent.implicitTrap = true; } void visitAtomicCmpxchg(AtomicCmpxchg* curr) { parent.readsMemory = true; parent.writesMemory = true; - parent.isAtomic = true; + parent.isAtomic = parent.module.getMemory(curr->memory)->shared; parent.implicitTrap = true; } void visitAtomicWait(AtomicWait* curr) { @@ -743,8 +745,9 @@ class EffectAnalyzer { // memory.size accesses the size of the memory, and thus can be modeled as // reading memory parent.readsMemory = true; - // Atomics are sequentially consistent with memory.size. - parent.isAtomic = true; + // Synchronizes when memory.grow on other threads, but only when operating + // on shared memories. + parent.isAtomic = parent.module.getMemory(curr->memory)->shared; } void visitMemoryGrow(MemoryGrow* curr) { // TODO: find out if calls is necessary here @@ -754,8 +757,9 @@ class EffectAnalyzer { // addresses, and just a read operation in the failure case parent.readsMemory = true; parent.writesMemory = true; - // Atomics are also sequentially consistent with memory.grow. - parent.isAtomic = true; + // Synchronizes with memory.size on other threads, but only when operating + // on shared memories. + parent.isAtomic = parent.module.getMemory(curr->memory)->shared; } void visitRefNull(RefNull* curr) {} void visitRefIsNull(RefIsNull* curr) {} @@ -899,18 +903,8 @@ class EffectAnalyzer { if (curr->ref->type.isNullable()) { parent.implicitTrap = true; } - switch (curr->order) { - case MemoryOrder::Unordered: - break; - case MemoryOrder::SeqCst: - // Synchronizes with other threads. - parent.isAtomic = true; - break; - case MemoryOrder::AcqRel: - // Only synchronizes if other threads can read the field. - parent.isAtomic = curr->ref->type.getHeapType().isShared(); - break; - } + parent.isAtomic |= + curr->isAtomic() && curr->ref->type.getHeapType().isShared(); } void visitStructSet(StructSet* curr) { if (curr->ref->type.isNull()) { @@ -922,9 +916,8 @@ class EffectAnalyzer { if (curr->ref->type.isNullable()) { parent.implicitTrap = true; } - if (curr->order != MemoryOrder::Unordered) { - parent.isAtomic = true; - } + parent.isAtomic |= + curr->isAtomic() && curr->ref->type.getHeapType().isShared(); } void visitStructRMW(StructRMW* curr) { if (curr->ref->type.isNull()) { @@ -972,6 +965,8 @@ class EffectAnalyzer { parent.readsArray = true; // traps when the arg is null or the index out of bounds parent.implicitTrap = true; + parent.isAtomic |= + curr->isAtomic() && curr->ref->type.getHeapType().isShared(); } void visitArraySet(ArraySet* curr) { if (curr->ref->type.isNull()) { @@ -981,6 +976,8 @@ class EffectAnalyzer { parent.writesArray = true; // traps when the arg is null or the index out of bounds parent.implicitTrap = true; + parent.isAtomic |= + curr->isAtomic() && curr->ref->type.getHeapType().isShared(); } void visitArrayLen(ArrayLen* curr) { if (curr->ref->type.isNull()) { diff --git a/src/wasm.h b/src/wasm.h index 3c84e02b984..70a1ba17253 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -1714,6 +1714,8 @@ class StructGet : public SpecificExpression { bool signed_ = false; MemoryOrder order = MemoryOrder::Unordered; + bool isAtomic() const { return order != MemoryOrder::Unordered; } + void finalize(); }; @@ -1727,6 +1729,8 @@ class StructSet : public SpecificExpression { Expression* value; MemoryOrder order = MemoryOrder::Unordered; + bool isAtomic() const { return order != MemoryOrder::Unordered; } + void finalize(); }; @@ -1818,6 +1822,8 @@ class ArrayGet : public SpecificExpression { bool signed_ = false; MemoryOrder order = MemoryOrder::Unordered; + bool isAtomic() const { return order != MemoryOrder::Unordered; } + void finalize(); }; @@ -1831,6 +1837,8 @@ class ArraySet : public SpecificExpression { Expression* value; MemoryOrder order = MemoryOrder::Unordered; + bool isAtomic() const { return order != MemoryOrder::Unordered; } + void finalize(); }; diff --git a/test/lit/passes/O4_disable-bulk-memory.wast b/test/lit/passes/O4_disable-bulk-memory.wast index 89296fea292..d75258a9640 100644 --- a/test/lit/passes/O4_disable-bulk-memory.wast +++ b/test/lit/passes/O4_disable-bulk-memory.wast @@ -239,32 +239,31 @@ ;; CHECK-NEXT: (unreachable) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (local.set $0 - ;; CHECK-NEXT: (i32.and - ;; CHECK-NEXT: (i32.add - ;; CHECK-NEXT: (i32.add - ;; CHECK-NEXT: (local.tee $1 - ;; CHECK-NEXT: (global.get $global$1) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (select - ;; CHECK-NEXT: (i32.const 1) - ;; CHECK-NEXT: (local.get $0) - ;; CHECK-NEXT: (i32.le_u - ;; CHECK-NEXT: (local.get $0) - ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.gt_u + ;; CHECK-NEXT: (local.tee $2 + ;; CHECK-NEXT: (i32.and + ;; CHECK-NEXT: (i32.add + ;; CHECK-NEXT: (i32.add + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: (i32.le_u + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.tee $0 + ;; CHECK-NEXT: (global.get $global$1) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 7) ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const -8) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (i32.const 7) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (i32.const -8) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (if - ;; CHECK-NEXT: (i32.gt_u - ;; CHECK-NEXT: (local.get $0) ;; CHECK-NEXT: (i32.shl - ;; CHECK-NEXT: (local.tee $2 + ;; CHECK-NEXT: (local.tee $1 ;; CHECK-NEXT: (memory.size) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (i32.const 16) @@ -275,14 +274,14 @@ ;; CHECK-NEXT: (i32.lt_s ;; CHECK-NEXT: (memory.grow ;; CHECK-NEXT: (select - ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: (local.get $1) ;; CHECK-NEXT: (local.tee $3 ;; CHECK-NEXT: (i32.shr_u ;; CHECK-NEXT: (i32.and ;; CHECK-NEXT: (i32.add ;; CHECK-NEXT: (i32.sub + ;; CHECK-NEXT: (local.get $2) ;; CHECK-NEXT: (local.get $0) - ;; CHECK-NEXT: (local.get $1) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (i32.const 65535) ;; CHECK-NEXT: ) @@ -292,7 +291,7 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (i32.gt_s - ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: (local.get $1) ;; CHECK-NEXT: (local.get $3) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) @@ -316,9 +315,9 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (global.set $global$1 - ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: (local.get $2) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: (local.get $0) ;; CHECK-NEXT: ) (func $~lib/allocator/arena/__memory_allocate (; 6 ;) (type $3) (param $0 i32) (result i32) (local $1 i32) diff --git a/test/lit/passes/simplify-locals-atomic-effects.wast b/test/lit/passes/simplify-locals-atomic-effects.wast new file mode 100644 index 00000000000..2fcaa88093a --- /dev/null +++ b/test/lit/passes/simplify-locals-atomic-effects.wast @@ -0,0 +1,101 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. +;; RUN: wasm-opt %s -all --simplify-locals -S -o - | filecheck %s + +(module + ;; CHECK: (type $shared-struct (shared (struct (field (mut i32))))) + (type $shared-struct (shared (struct (field (mut i32))))) + ;; CHECK: (type $unshared-struct (struct (field (mut i32)))) + (type $unshared-struct (struct (field (mut i32)))) + + ;; CHECK: (memory $shared 1 1 shared) + (memory $shared 1 1 shared) + ;; CHECK: (memory $unshared 1 1) + (memory $unshared 1 1) + + ;; CHECK: (func $memory.size-shared-shared (type $2) (param $shared (ref null $shared-struct)) (result i32) + ;; CHECK-NEXT: (local $x i32) + ;; CHECK-NEXT: (local.set $x + ;; CHECK-NEXT: (memory.size $shared) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $shared-struct 0 + ;; CHECK-NEXT: (local.get $shared) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + (func $memory.size-shared-shared (param $shared (ref null $shared-struct)) (result i32) + (local $x i32) + ;; memory.size can synchronize with memory.grow on another thread when the + ;; memory is shared. It cannot be moved past a write to a shared struct. + (local.set $x + (memory.size $shared) + ) + (struct.set $shared-struct 0 (local.get $shared) (i32.const 0)) + (local.get $x) + ) + + ;; CHECK: (func $memory.size-unshared-shared (type $2) (param $shared (ref null $shared-struct)) (result i32) + ;; CHECK-NEXT: (local $x i32) + ;; CHECK-NEXT: (local.set $x + ;; CHECK-NEXT: (memory.size $shared) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $shared-struct 0 + ;; CHECK-NEXT: (local.get $shared) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + (func $memory.size-unshared-shared (param $shared (ref null $shared-struct)) (result i32) + (local $x i32) + ;; Now the memory is unshared. In principle we could reorder because no + ;; other thread could observe the reordering, but we do not distinguish + ;; between accesses to shared and unshared state. TODO. + (local.set $x + (memory.size $shared) + ) + (struct.set $shared-struct 0 (local.get $shared) (i32.const 0)) + (local.get $x) + ) + + ;; CHECK: (func $memory.size-shared-unshared (type $3) (param $unshared (ref null $unshared-struct)) (result i32) + ;; CHECK-NEXT: (local $x i32) + ;; CHECK-NEXT: (local.set $x + ;; CHECK-NEXT: (memory.size $shared) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $unshared-struct 0 + ;; CHECK-NEXT: (local.get $unshared) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + (func $memory.size-shared-unshared (param $unshared (ref null $unshared-struct)) (result i32) + (local $x i32) + ;; Now the memory is shared but the struct is unshared. Again, we could + ;; reorder in principle, but we do not yet. TODO. + (local.set $x + (memory.size $shared) + ) + (struct.set $unshared-struct 0 (local.get $unshared) (i32.const 0)) + (local.get $x) + ) + + ;; CHECK: (func $memory.size-unshared-unshared (type $3) (param $unshared (ref null $unshared-struct)) (result i32) + ;; CHECK-NEXT: (local $x i32) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (struct.set $unshared-struct 0 + ;; CHECK-NEXT: (local.get $unshared) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (memory.size $unshared) + ;; CHECK-NEXT: ) + (func $memory.size-unshared-unshared (param $unshared (ref null $unshared-struct)) (result i32) + (local $x i32) + ;; Now both the memory and struct are unshared, so neither can synchronize + ;; with anything and we reorder freely. + (local.set $x + (memory.size $unshared) + ) + (struct.set $unshared-struct 0 (local.get $unshared) (i32.const 0)) + (local.get $x) + ) +)