diff --git a/scripts/test/fuzzing.py b/scripts/test/fuzzing.py index a50a86ed157..1d46bbf8901 100644 --- a/scripts/test/fuzzing.py +++ b/scripts/test/fuzzing.py @@ -120,6 +120,7 @@ 'optimize-instructions_idempotent.wast', 'duplicate-function-elimination_annotations.wast', 'once-reduction_idempotent.wast', + 'local-cse_idempotent.wast', # Not fully implemented. 'waitqueue.wast', ] diff --git a/src/passes/LocalCSE.cpp b/src/passes/LocalCSE.cpp index 79831fb89ab..b17ee43157f 100644 --- a/src/passes/LocalCSE.cpp +++ b/src/passes/LocalCSE.cpp @@ -118,6 +118,7 @@ #include #include +#include #include #include #include @@ -207,6 +208,22 @@ struct RequestInfoMap : public std::unordered_map { } }; +// Check if a call is to an idempotent function. Note that we do not check for +// an annotation on the call itself - optimizing that would require us to verify +// idempotency on the later one specifically, but that is complicated in this +// pass (we'd end up tracking the first one unnecessarily, hoping the second is +// idempotent). Instead, we look on the called function, which is identical for +// all callers. +// TODO if users want the call annotation, handle that too. +bool isIdempotent(Expression* curr, Module& wasm) { + if (auto* call = curr->dynCast()) { + if (Intrinsics::getAnnotations(wasm.getFunction(call->target)).idempotent) { + return true; + } + } + return false; +} + struct Scanner : public LinearExecutionWalker> { PassOptions& options; @@ -368,6 +385,13 @@ struct Scanner // total size big enough - while isPossible checks conditions that prevent // using an expression at all. bool isPossible(Expression* curr) { + // A call to an idempotent function is optimizable: it may have effects the + // first time, but those do not invalidate the second appearance, and also + // it will return the same value. + if (isIdempotent(curr, *getModule())) { + return true; + } + // We will fully compute effects later, but consider shallow effects at this // early time to ignore things that cannot be optimized later, because we // use a greedy algorithm. Specifically, imagine we see this: @@ -456,9 +480,16 @@ struct Checker // children by the time we get here. effects.visit(curr); + auto idempotent = isIdempotent(curr, *getModule()); + std::vector invalidated; for (auto& kv : activeOriginals) { auto* original = kv.first; + if (idempotent && ExpressionAnalyzer::shallowEqual(curr, original)) { + // |curr| is idempotent, so it does not invalidate later appearances + // of itself. + continue; + } auto& originalInfo = kv.second; if (effects.invalidates(originalInfo.effects)) { invalidated.push_back(original); diff --git a/test/lit/idempotent.wast b/test/lit/idempotent.wast new file mode 100644 index 00000000000..16cc293033d --- /dev/null +++ b/test/lit/idempotent.wast @@ -0,0 +1,33 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. + +;; RUN: wasm-opt -all %s -S -o - | filecheck %s +;; RUN: wasm-opt -all --roundtrip %s -S -o - | filecheck %s + +;; Test text and binary handling of @binaryen.idempotent. + +(module + ;; CHECK: (type $0 (func)) + + ;; CHECK: (@binaryen.idempotent) + ;; CHECK-NEXT: (func $func-annotation (type $0) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (@binaryen.idempotent) + (func $func-annotation + (drop + (i32.const 0) + ) + ) + + ;; CHECK: (func $call (type $0) + ;; CHECK-NEXT: (@binaryen.idempotent) + ;; CHECK-NEXT: (call $call) + ;; CHECK-NEXT: ) + (func $call + (@binaryen.idempotent) + (call $call) + ) +) + diff --git a/test/lit/passes/local-cse_idempotent.wast b/test/lit/passes/local-cse_idempotent.wast new file mode 100644 index 00000000000..a211e7e7d0f --- /dev/null +++ b/test/lit/passes/local-cse_idempotent.wast @@ -0,0 +1,279 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. +;; NOTE: This test was ported using port_passes_tests_to_lit.py and could be cleaned up. + +;; RUN: foreach %s %t wasm-opt --local-cse -all -S -o - | filecheck %s + +(module + ;; CHECK: (type $0 (func)) + + ;; CHECK: (type $1 (func (param i32) (result i32))) + + ;; CHECK: (import "a" "b" (func $import (type $0))) + (import "a" "b" (func $import)) + + ;; CHECK: (global $mutable (mut i32) (i32.const 42)) + (global $mutable (mut i32) (i32.const 42)) + + ;; CHECK: (global $immutable i32 (i32.const 1337)) + (global $immutable i32 (i32.const 1337)) + + ;; CHECK: (@binaryen.idempotent) + ;; CHECK-NEXT: (func $idempotent (type $1) (param $0 i32) (result i32) + ;; CHECK-NEXT: (call $import) + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + (@binaryen.idempotent) + (func $idempotent (param i32) (result i32) + ;; This function has side effects, but is marked idempotent. + (call $import) + (i32.const 42) + ) + + ;; CHECK: (func $potent (type $1) (param $0 i32) (result i32) + ;; CHECK-NEXT: (call $import) + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + (func $potent (param i32) (result i32) + ;; As above, but not marked as idempotent. + (call $import) + (i32.const 1337) + ) + + ;; CHECK: (func $yes (type $0) + ;; CHECK-NEXT: (local $0 i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.tee $0 + ;; CHECK-NEXT: (call $idempotent + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $yes + ;; We can optimize here. + (drop + (call $idempotent + (i32.const 10) + ) + ) + (drop + (call $idempotent + (i32.const 10) + ) + ) + ) + + ;; CHECK: (func $no (type $0) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (call $potent + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (call $potent + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $no + ;; Without idempotency we cannot optimize. + (drop + (call $potent + (i32.const 10) + ) + ) + (drop + (call $potent + (i32.const 10) + ) + ) + ) + + ;; CHECK: (func $different-input (type $0) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (call $idempotent + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (call $idempotent + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $different-input + ;; We cannot optimize here. + (drop + (call $idempotent + (i32.const 10) + ) + ) + (drop + (call $idempotent + (i32.const 20) + ) + ) + ) + + ;; CHECK: (func $idem-effects (type $0) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (call $idempotent + ;; CHECK-NEXT: (global.get $mutable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (call $idempotent + ;; CHECK-NEXT: (global.get $mutable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $idem-effects + ;; An idempotent function still has effects on the first call. Those effects + ;; can invalidate the global.get here. + (drop + (call $idempotent + (global.get $mutable) + ) + ) + (drop + (call $idempotent + (global.get $mutable) + ) + ) + ) + + ;; CHECK: (func $idem-effects-immutable (type $0) + ;; CHECK-NEXT: (local $0 i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.tee $0 + ;; CHECK-NEXT: (call $idempotent + ;; CHECK-NEXT: (global.get $immutable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $idem-effects-immutable + ;; But here we read an immutable value, so we can optimize. + (drop + (call $idempotent + (global.get $immutable) + ) + ) + (drop + (call $idempotent + (global.get $immutable) + ) + ) + ) + + ;; CHECK: (func $idem-effects-interact (type $0) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.add + ;; CHECK-NEXT: (global.get $mutable) + ;; CHECK-NEXT: (global.get $mutable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (call $idempotent + ;; CHECK-NEXT: (global.get $immutable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.add + ;; CHECK-NEXT: (global.get $mutable) + ;; CHECK-NEXT: (global.get $mutable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (call $idempotent + ;; CHECK-NEXT: (global.get $immutable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $idem-effects-interact + ;; An idempotent function still has effects on the first call. That + ;; prevents optimizing the i32.add, as the first call might alter $mutable. + (drop + (i32.add + (global.get $mutable) + (global.get $mutable) + ) + ) + (drop + (call $idempotent + (global.get $immutable) + ) + ) + (drop + (i32.add + (global.get $mutable) + (global.get $mutable) + ) + ) + (drop + (call $idempotent + (global.get $immutable) + ) + ) + ) + + ;; CHECK: (func $idem-effects-interact-2 (type $0) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (call $idempotent + ;; CHECK-NEXT: (global.get $immutable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.add + ;; CHECK-NEXT: (global.get $mutable) + ;; CHECK-NEXT: (global.get $mutable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (call $idempotent + ;; CHECK-NEXT: (global.get $immutable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.add + ;; CHECK-NEXT: (global.get $mutable) + ;; CHECK-NEXT: (global.get $mutable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $idem-effects-interact-2 + ;; As above, but interleaved differently. The first i32.add is now between + ;; the two idempotent calls. Here we could optimize, but do not atm, as the + ;; mutable reads invalidate the first call (only the reverse is true, but + ;; our check is symmetrical atm), and the second call - which we fail to + ;; optimize out - invalidates the later mutable reads. TODO + (drop + (call $idempotent + (global.get $immutable) + ) + ) + (drop + (i32.add + (global.get $mutable) + (global.get $mutable) + ) + ) + (drop + (call $idempotent + (global.get $immutable) + ) + ) + (drop + (i32.add + (global.get $mutable) + (global.get $mutable) + ) + ) + ) +)