Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 1 addition & 10 deletions crates/test-util/src/wast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -622,16 +622,6 @@ impl WastTest {

#[cfg(target_arch = "x86_64")]
{
let unsupported = [
// externref/reference-types related
// simd-related failures
"misc_testsuite/simd/canonicalize-nan.wast",
];

if unsupported.iter().any(|part| self.path.ends_with(part)) {
return true;
}

// SIMD on Winch requires AVX instructions.
#[cfg(target_arch = "x86_64")]
if !(std::is_x86_feature_detected!("avx") && std::is_x86_feature_detected!("avx2"))
Expand All @@ -642,6 +632,7 @@ impl WastTest {
"misc_testsuite/int-to-float-splat.wast",
"misc_testsuite/issue6562.wast",
"misc_testsuite/simd/almost-extmul.wast",
"misc_testsuite/simd/canonicalize-nan.wast",
"misc_testsuite/simd/cvt-from-uint.wast",
"misc_testsuite/simd/edge-of-memory.wast",
"misc_testsuite/simd/issue_3327_bnot_lowering.wast",
Expand Down
40 changes: 40 additions & 0 deletions tests/disas/winch/x64/f32_add/nan_canon.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
;;! target = "x86_64"
;;! test = "winch"
;;! flags = "-Wnan-canonicalization"

(module
(func (param f32 f32) (result f32)
local.get 0
local.get 1
f32.add
)
)
;; wasm[0]::function[0]:
;; pushq %rbp
;; movq %rsp, %rbp
;; movq 8(%rdi), %r11
;; movq 0x18(%r11), %r11
;; addq $0x20, %r11
;; cmpq %rsp, %r11
;; ja 0x69
;; 1c: movq %rdi, %r14
;; subq $0x20, %rsp
;; movq %rdi, 0x18(%rsp)
;; movq %rsi, 0x10(%rsp)
;; movss %xmm0, 0xc(%rsp)
;; movss %xmm1, 8(%rsp)
;; movss 8(%rsp), %xmm0
;; movss 0xc(%rsp), %xmm1
;; addss %xmm0, %xmm1
;; ucomiss %xmm1, %xmm1
;; jnp 0x5d
;; 55: movss 0x13(%rip), %xmm1
;; movaps %xmm1, %xmm0
;; addq $0x20, %rsp
;; popq %rbp
;; retq
;; 69: ud2
;; 6b: addb %al, (%rax)
;; 6d: addb %al, (%rax)
;; 6f: addb %al, (%rax)
;; 71: addb %al, %al
42 changes: 42 additions & 0 deletions tests/disas/winch/x64/f32x4_add/nan_canon.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
;;! target = "x86_64"
;;! test = "winch"
;;! flags = ["-Wnan-canonicalization", "-Ccranelift-has-avx"]

(module
(func (param v128 v128) (result v128)
local.get 0
local.get 1
f32x4.add
)
)
;; wasm[0]::function[0]:
;; pushq %rbp
;; movq %rsp, %rbp
;; movq 8(%rdi), %r11
;; movq 0x18(%r11), %r11
;; addq $0x30, %r11
;; cmpq %rsp, %r11
;; ja 0x6c
;; 1c: movq %rdi, %r14
;; subq $0x30, %rsp
;; movq %rdi, 0x28(%rsp)
;; movq %rsi, 0x20(%rsp)
;; movdqu %xmm0, 0x10(%rsp)
;; movdqu %xmm1, (%rsp)
;; movdqu (%rsp), %xmm0
;; movdqu 0x10(%rsp), %xmm1
;; vaddps %xmm0, %xmm1, %xmm1
;; vcmpunordps %xmm1, %xmm1, %xmm15
;; vandnps %xmm1, %xmm15, %xmm1
;; vandps 0x15(%rip), %xmm15, %xmm15
;; vorps %xmm1, %xmm15, %xmm1
;; movdqa %xmm1, %xmm0
;; addq $0x30, %rsp
;; popq %rbp
;; retq
;; 6c: ud2
;; 6e: addb %al, (%rax)
;; 70: addb %al, (%rax)
;; 72: sarb $0, (%rdi)
;; 76: sarb $0, (%rdi)
;; 7a: sarb $0, (%rdi)
43 changes: 43 additions & 0 deletions tests/disas/winch/x64/f64_div/nan_canon.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
;;! target = "x86_64"
;;! test = "winch"
;;! flags = "-Wnan-canonicalization"

(module
(func (param f64 f64) (result f64)
local.get 0
local.get 1
f64.div
)
)
;; wasm[0]::function[0]:
;; pushq %rbp
;; movq %rsp, %rbp
;; movq 8(%rdi), %r11
;; movq 0x18(%r11), %r11
;; addq $0x20, %r11
;; cmpq %rsp, %r11
;; ja 0x68
;; 1c: movq %rdi, %r14
;; subq $0x20, %rsp
;; movq %rdi, 0x18(%rsp)
;; movq %rsi, 0x10(%rsp)
;; movsd %xmm0, 8(%rsp)
;; movsd %xmm1, (%rsp)
;; movsd (%rsp), %xmm0
;; movsd 8(%rsp), %xmm1
;; divsd %xmm0, %xmm1
;; ucomisd %xmm1, %xmm1
;; jnp 0x5c
;; 54: movsd 0x14(%rip), %xmm1
;; movaps %xmm1, %xmm0
;; addq $0x20, %rsp
;; popq %rbp
;; retq
;; 68: ud2
;; 6a: addb %al, (%rax)
;; 6c: addb %al, (%rax)
;; 6e: addb %al, (%rax)
;; 70: addb %al, (%rax)
;; 72: addb %al, (%rax)
;; 74: addb %al, (%rax)
;; 76: clc
153 changes: 153 additions & 0 deletions tests/misc_testsuite/canonicalize-nan-scalar.wast
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we also implement this functionality for the non-scalar counterparts? I believe that all vector instructions are implemented in order to respect canonicalization. Once implemented we should be able to fully test simd/canonicalize-nan.wast in x86-64, by removing this line https://github.com/bytecodealliance/wasmtime/blob/main/crates/test-util/src/wast.rs#L614

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok here we go: cdef987 (this PR).

Added maybe_canonicalize_v128_nan to the masm trait and implemented it for x64

I removed the skip and it looks like the tests pass 🎉

Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
;;! nan_canonicalization = true

;; Scalar counterpart to simd/canonicalize-nan.wast.

(module
(func (export "f32.add") (param f32 f32) (result f32)
local.get 0
local.get 1
f32.add)
(func (export "f32.sub") (param f32 f32) (result f32)
local.get 0
local.get 1
f32.sub)
(func (export "f32.mul") (param f32 f32) (result f32)
local.get 0
local.get 1
f32.mul)
(func (export "f32.div") (param f32 f32) (result f32)
local.get 0
local.get 1
f32.div)
(func (export "f32.min") (param f32 f32) (result f32)
local.get 0
local.get 1
f32.min)
(func (export "f32.max") (param f32 f32) (result f32)
local.get 0
local.get 1
f32.max)
(func (export "f32.sqrt") (param f32) (result f32)
local.get 0
f32.sqrt)
(func (export "f32.ceil") (param f32) (result f32)
local.get 0
f32.ceil)
(func (export "f32.floor") (param f32) (result f32)
local.get 0
f32.floor)
(func (export "f32.trunc") (param f32) (result f32)
local.get 0
f32.trunc)
(func (export "f32.nearest") (param f32) (result f32)
local.get 0
f32.nearest)

(func (export "f64.add") (param f64 f64) (result f64)
local.get 0
local.get 1
f64.add)
(func (export "f64.sub") (param f64 f64) (result f64)
local.get 0
local.get 1
f64.sub)
(func (export "f64.mul") (param f64 f64) (result f64)
local.get 0
local.get 1
f64.mul)
(func (export "f64.div") (param f64 f64) (result f64)
local.get 0
local.get 1
f64.div)
(func (export "f64.min") (param f64 f64) (result f64)
local.get 0
local.get 1
f64.min)
(func (export "f64.max") (param f64 f64) (result f64)
local.get 0
local.get 1
f64.max)
(func (export "f64.sqrt") (param f64) (result f64)
local.get 0
f64.sqrt)
(func (export "f64.ceil") (param f64) (result f64)
local.get 0
f64.ceil)
(func (export "f64.floor") (param f64) (result f64)
local.get 0
f64.floor)
(func (export "f64.trunc") (param f64) (result f64)
local.get 0
f64.trunc)
(func (export "f64.nearest") (param f64) (result f64)
local.get 0
f64.nearest)

(func (export "reinterpret-and-demote") (param i64) (result i32)
local.get 0
f64.reinterpret_i64
f32.demote_f64
i32.reinterpret_f32)
(func (export "reinterpret-and-promote") (param i32) (result i64)
local.get 0
f32.reinterpret_i32
f64.promote_f32
i64.reinterpret_f64)

;; Expose raw bits of 0/0 to verify exact canonical NaN bit patterns.
(func (export "f32.div-nan-bits") (result i32)
f32.const 0
f32.const 0
f32.div
i32.reinterpret_f32)
(func (export "f64.div-nan-bits") (result i64)
f64.const 0
f64.const 0
f64.div
i64.reinterpret_f64)
)

;; Exact bit patterns: canonical f32 NaN = 0x7fc00000, f64 = 0x7ff8000000000000
(assert_return (invoke "f32.div-nan-bits") (i32.const 0x7fc00000))
(assert_return (invoke "f64.div-nan-bits") (i64.const 0x7ff8000000000000))

;; NaN-producing operations
(assert_return (invoke "f32.div" (f32.const 0) (f32.const 0)) (f32.const nan:0x400000))
(assert_return (invoke "f64.div" (f64.const 0) (f64.const 0)) (f64.const nan:0x8000000000000))
(assert_return (invoke "f32.sqrt" (f32.const -1)) (f32.const nan:0x400000))
(assert_return (invoke "f64.sqrt" (f64.const -1)) (f64.const nan:0x8000000000000))

;; NaN propagation through f32 arithmetic
(assert_return (invoke "f32.add" (f32.const nan) (f32.const 1)) (f32.const nan:0x400000))
(assert_return (invoke "f32.sub" (f32.const nan) (f32.const 1)) (f32.const nan:0x400000))
(assert_return (invoke "f32.mul" (f32.const nan) (f32.const 1)) (f32.const nan:0x400000))
(assert_return (invoke "f32.min" (f32.const nan) (f32.const 1)) (f32.const nan:0x400000))
(assert_return (invoke "f32.max" (f32.const nan) (f32.const 1)) (f32.const nan:0x400000))

;; NaN propagation through f64 arithmetic
(assert_return (invoke "f64.add" (f64.const nan) (f64.const 1)) (f64.const nan:0x8000000000000))
(assert_return (invoke "f64.sub" (f64.const nan) (f64.const 1)) (f64.const nan:0x8000000000000))
(assert_return (invoke "f64.mul" (f64.const nan) (f64.const 1)) (f64.const nan:0x8000000000000))
(assert_return (invoke "f64.min" (f64.const nan) (f64.const 1)) (f64.const nan:0x8000000000000))
(assert_return (invoke "f64.max" (f64.const nan) (f64.const 1)) (f64.const nan:0x8000000000000))

;; Rounding NaN (f32)
(assert_return (invoke "f32.ceil" (f32.const nan)) (f32.const nan:0x400000))
(assert_return (invoke "f32.floor" (f32.const nan)) (f32.const nan:0x400000))
(assert_return (invoke "f32.trunc" (f32.const nan)) (f32.const nan:0x400000))
(assert_return (invoke "f32.nearest" (f32.const nan)) (f32.const nan:0x400000))

;; Rounding NaN (f64)
(assert_return (invoke "f64.ceil" (f64.const nan)) (f64.const nan:0x8000000000000))
(assert_return (invoke "f64.floor" (f64.const nan)) (f64.const nan:0x8000000000000))
(assert_return (invoke "f64.trunc" (f64.const nan)) (f64.const nan:0x8000000000000))
(assert_return (invoke "f64.nearest" (f64.const nan)) (f64.const nan:0x8000000000000))

;; Demote/promote with non-canonical NaN bit patterns
(assert_return (invoke "reinterpret-and-demote" (i64.const 0xfffefdfccccdcecf)) (i32.const 0x7fc00000))
(assert_return (invoke "reinterpret-and-promote" (i32.const 0xfffefdfc)) (i64.const 0x7ff8000000000000))

;; Normal values pass through unchanged
(assert_return (invoke "f32.add" (f32.const 1) (f32.const 2)) (f32.const 3))
(assert_return (invoke "f64.div" (f64.const 10) (f64.const 2)) (f64.const 5))
(assert_return (invoke "f32.sqrt" (f32.const 4)) (f32.const 2))
42 changes: 41 additions & 1 deletion winch/codegen/src/isa/aarch64/masm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ pub(crate) struct MacroAssembler {
ptr_size: OperandSize,
/// Scratch register scope.
scratch_scope: RegAlloc,
/// Shared flags.
shared_flags: settings::Flags,
}

impl MacroAssembler {
Expand All @@ -71,10 +73,11 @@ impl MacroAssembler {
Ok(Self {
sp_max: 0,
stack_max_use_add: None,
asm: Assembler::new(shared_flags, isa_flags),
asm: Assembler::new(shared_flags.clone(), isa_flags),
sp_offset: 0u32,
ptr_size: ptr_type_from_ptr_size(ptr_size.size()).try_into()?,
scratch_scope: RegAlloc::from(scratch_gpr_bitset(), scratch_fpr_bitset()),
shared_flags,
})
}

Expand Down Expand Up @@ -713,6 +716,43 @@ impl Masm for MacroAssembler {
Ok(())
}

fn maybe_canonicalize_nan(&mut self, reg: WritableReg, size: OperandSize) -> Result<()> {
if !self.shared_flags.enable_nan_canonicalization() {
return Ok(());
}

let done_label = self.asm.buffer_mut().get_label();

self.asm.fcmp(reg.to_reg(), reg.to_reg(), size);
self.asm.jmp_if(Cond::Vc, done_label);

let canonical_nan = match size {
OperandSize::S32 => crate::masm::CANONICAL_NAN_F32,
OperandSize::S64 => crate::masm::CANONICAL_NAN_F64,
_ => bail!(CodeGenError::unexpected_operand_size()),
};
let constant = self.asm.add_constant(canonical_nan);
self.asm.uload(
inst::AMode::Const { addr: constant },
reg,
size,
TRUSTED_FLAGS,
);

self.asm
.buffer_mut()
.bind_label(done_label, &mut Default::default());
Ok(())
}

fn maybe_canonicalize_v128_nan(
&mut self,
_reg: WritableReg,
_lane_size: OperandSize,
) -> Result<()> {
bail!(CodeGenError::unimplemented_masm_instruction())
}

fn and(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()> {
match (rhs, lhs, dst) {
(RegImm::Imm(v), rn, rd) => {
Expand Down
Loading
Loading