diff --git a/src/ir/effects.h b/src/ir/effects.h index 1a7f4af616b..708c04262f7 100644 --- a/src/ir/effects.h +++ b/src/ir/effects.h @@ -206,6 +206,9 @@ class EffectAnalyzer { return mutableGlobalsRead.size() || readsMemory || readsTable || readsMutableStruct || readsArray || isAtomic || calls; } + bool accessesGlobalState() const { + return readsMutableGlobalState() || writesGlobalState(); + } bool hasNonTrapSideEffects() const { return localsWritten.size() > 0 || danglingPop || writesGlobalState() || @@ -292,10 +295,10 @@ class EffectAnalyzer { (danglingPop || other.danglingPop)) { return true; } - // All atomics are sequentially consistent for now, and ordered wrt other - // memory references. - if ((isAtomic && other.accessesMemory()) || - (other.isAtomic && accessesMemory())) { + // We model all atomics as sequentially consistent for now. They are ordered + // wrt other loads and stores from anything, not just memories. + if ((isAtomic && other.accessesGlobalState()) || + (other.isAtomic && accessesGlobalState())) { return true; } for (auto local : localsWritten) { @@ -598,24 +601,26 @@ class EffectAnalyzer { } void visitLoad(Load* curr) { parent.readsMemory = true; - parent.isAtomic |= curr->isAtomic(); + parent.isAtomic |= + curr->isAtomic() && parent.module.getMemory(curr->memory)->shared; parent.implicitTrap = true; } void visitStore(Store* curr) { parent.writesMemory = true; - parent.isAtomic |= curr->isAtomic(); + parent.isAtomic |= + curr->isAtomic() && parent.module.getMemory(curr->memory)->shared; parent.implicitTrap = true; } void visitAtomicRMW(AtomicRMW* curr) { parent.readsMemory = true; parent.writesMemory = true; - parent.isAtomic = true; + parent.isAtomic = parent.module.getMemory(curr->memory)->shared; parent.implicitTrap = true; } void visitAtomicCmpxchg(AtomicCmpxchg* curr) { parent.readsMemory = true; parent.writesMemory = true; - parent.isAtomic = true; + parent.isAtomic = parent.module.getMemory(curr->memory)->shared; parent.implicitTrap = true; } void visitAtomicWait(AtomicWait* curr) { @@ -740,8 +745,9 @@ class EffectAnalyzer { // memory.size accesses the size of the memory, and thus can be modeled as // reading memory parent.readsMemory = true; - // Atomics are sequentially consistent with memory.size. - parent.isAtomic = true; + // Synchronizes when memory.grow on other threads, but only when operating + // on shared memories. + parent.isAtomic = parent.module.getMemory(curr->memory)->shared; } void visitMemoryGrow(MemoryGrow* curr) { // TODO: find out if calls is necessary here @@ -751,8 +757,9 @@ class EffectAnalyzer { // addresses, and just a read operation in the failure case parent.readsMemory = true; parent.writesMemory = true; - // Atomics are also sequentially consistent with memory.grow. - parent.isAtomic = true; + // Synchronizes with memory.size on other threads, but only when operating + // on shared memories. + parent.isAtomic = parent.module.getMemory(curr->memory)->shared; } void visitRefNull(RefNull* curr) {} void visitRefIsNull(RefIsNull* curr) {} @@ -896,18 +903,8 @@ class EffectAnalyzer { if (curr->ref->type.isNullable()) { parent.implicitTrap = true; } - switch (curr->order) { - case MemoryOrder::Unordered: - break; - case MemoryOrder::SeqCst: - // Synchronizes with other threads. - parent.isAtomic = true; - break; - case MemoryOrder::AcqRel: - // Only synchronizes if other threads can read the field. - parent.isAtomic = curr->ref->type.getHeapType().isShared(); - break; - } + parent.isAtomic |= + curr->isAtomic() && curr->ref->type.getHeapType().isShared(); } void visitStructSet(StructSet* curr) { if (curr->ref->type.isNull()) { @@ -919,9 +916,8 @@ class EffectAnalyzer { if (curr->ref->type.isNullable()) { parent.implicitTrap = true; } - if (curr->order != MemoryOrder::Unordered) { - parent.isAtomic = true; - } + parent.isAtomic |= + curr->isAtomic() && curr->ref->type.getHeapType().isShared(); } void visitStructRMW(StructRMW* curr) { if (curr->ref->type.isNull()) { @@ -969,6 +965,8 @@ class EffectAnalyzer { parent.readsArray = true; // traps when the arg is null or the index out of bounds parent.implicitTrap = true; + parent.isAtomic |= + curr->isAtomic() && curr->ref->type.getHeapType().isShared(); } void visitArraySet(ArraySet* curr) { if (curr->ref->type.isNull()) { @@ -978,6 +976,8 @@ class EffectAnalyzer { parent.writesArray = true; // traps when the arg is null or the index out of bounds parent.implicitTrap = true; + parent.isAtomic |= + curr->isAtomic() && curr->ref->type.getHeapType().isShared(); } void visitArrayLen(ArrayLen* curr) { if (curr->ref->type.isNull()) { diff --git a/src/wasm.h b/src/wasm.h index 3c84e02b984..70a1ba17253 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -1714,6 +1714,8 @@ class StructGet : public SpecificExpression { bool signed_ = false; MemoryOrder order = MemoryOrder::Unordered; + bool isAtomic() const { return order != MemoryOrder::Unordered; } + void finalize(); }; @@ -1727,6 +1729,8 @@ class StructSet : public SpecificExpression { Expression* value; MemoryOrder order = MemoryOrder::Unordered; + bool isAtomic() const { return order != MemoryOrder::Unordered; } + void finalize(); }; @@ -1818,6 +1822,8 @@ class ArrayGet : public SpecificExpression { bool signed_ = false; MemoryOrder order = MemoryOrder::Unordered; + bool isAtomic() const { return order != MemoryOrder::Unordered; } + void finalize(); }; @@ -1831,6 +1837,8 @@ class ArraySet : public SpecificExpression { Expression* value; MemoryOrder order = MemoryOrder::Unordered; + bool isAtomic() const { return order != MemoryOrder::Unordered; } + void finalize(); }; diff --git a/test/lit/passes/simplify-locals-atomic-effects.wast b/test/lit/passes/simplify-locals-atomic-effects.wast new file mode 100644 index 00000000000..2fcaa88093a --- /dev/null +++ b/test/lit/passes/simplify-locals-atomic-effects.wast @@ -0,0 +1,101 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. +;; RUN: wasm-opt %s -all --simplify-locals -S -o - | filecheck %s + +(module + ;; CHECK: (type $shared-struct (shared (struct (field (mut i32))))) + (type $shared-struct (shared (struct (field (mut i32))))) + ;; CHECK: (type $unshared-struct (struct (field (mut i32)))) + (type $unshared-struct (struct (field (mut i32)))) + + ;; CHECK: (memory $shared 1 1 shared) + (memory $shared 1 1 shared) + ;; CHECK: (memory $unshared 1 1) + (memory $unshared 1 1) + + ;; CHECK: (func $memory.size-shared-shared (type $2) (param $shared (ref null $shared-struct)) (result i32) + ;; CHECK-NEXT: (local $x i32) + ;; CHECK-NEXT: (local.set $x + ;; CHECK-NEXT: (memory.size $shared) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $shared-struct 0 + ;; CHECK-NEXT: (local.get $shared) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + (func $memory.size-shared-shared (param $shared (ref null $shared-struct)) (result i32) + (local $x i32) + ;; memory.size can synchronize with memory.grow on another thread when the + ;; memory is shared. It cannot be moved past a write to a shared struct. + (local.set $x + (memory.size $shared) + ) + (struct.set $shared-struct 0 (local.get $shared) (i32.const 0)) + (local.get $x) + ) + + ;; CHECK: (func $memory.size-unshared-shared (type $2) (param $shared (ref null $shared-struct)) (result i32) + ;; CHECK-NEXT: (local $x i32) + ;; CHECK-NEXT: (local.set $x + ;; CHECK-NEXT: (memory.size $shared) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $shared-struct 0 + ;; CHECK-NEXT: (local.get $shared) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + (func $memory.size-unshared-shared (param $shared (ref null $shared-struct)) (result i32) + (local $x i32) + ;; Now the memory is unshared. In principle we could reorder because no + ;; other thread could observe the reordering, but we do not distinguish + ;; between accesses to shared and unshared state. TODO. + (local.set $x + (memory.size $shared) + ) + (struct.set $shared-struct 0 (local.get $shared) (i32.const 0)) + (local.get $x) + ) + + ;; CHECK: (func $memory.size-shared-unshared (type $3) (param $unshared (ref null $unshared-struct)) (result i32) + ;; CHECK-NEXT: (local $x i32) + ;; CHECK-NEXT: (local.set $x + ;; CHECK-NEXT: (memory.size $shared) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $unshared-struct 0 + ;; CHECK-NEXT: (local.get $unshared) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + (func $memory.size-shared-unshared (param $unshared (ref null $unshared-struct)) (result i32) + (local $x i32) + ;; Now the memory is shared but the struct is unshared. Again, we could + ;; reorder in principle, but we do not yet. TODO. + (local.set $x + (memory.size $shared) + ) + (struct.set $unshared-struct 0 (local.get $unshared) (i32.const 0)) + (local.get $x) + ) + + ;; CHECK: (func $memory.size-unshared-unshared (type $3) (param $unshared (ref null $unshared-struct)) (result i32) + ;; CHECK-NEXT: (local $x i32) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (struct.set $unshared-struct 0 + ;; CHECK-NEXT: (local.get $unshared) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (memory.size $unshared) + ;; CHECK-NEXT: ) + (func $memory.size-unshared-unshared (param $unshared (ref null $unshared-struct)) (result i32) + (local $x i32) + ;; Now both the memory and struct are unshared, so neither can synchronize + ;; with anything and we reorder freely. + (local.set $x + (memory.size $unshared) + ) + (struct.set $unshared-struct 0 (local.get $unshared) (i32.const 0)) + (local.get $x) + ) +)