diff --git a/crates/test-util/src/wast.rs b/crates/test-util/src/wast.rs index a2f3a3304ff3..ee046a824f83 100644 --- a/crates/test-util/src/wast.rs +++ b/crates/test-util/src/wast.rs @@ -491,6 +491,7 @@ impl WastTest { "spec_testsuite/proposals/threads/exports.wast", "spec_testsuite/proposals/threads/memory.wast", "misc_testsuite/memory64/threads.wast", + "misc_testsuite/winch/rmw32_cmpxchg_u_wrap.wast", ]; if unsupported.iter().any(|part| self.path.ends_with(part)) { diff --git a/tests/disas/winch/x64/atomic/rmw/cmpxchg/i64_atomic_rmw32_cmpxchgu.wat b/tests/disas/winch/x64/atomic/rmw/cmpxchg/i64_atomic_rmw32_cmpxchgu.wat index 2fa2451eae72..a6c3a954e728 100644 --- a/tests/disas/winch/x64/atomic/rmw/cmpxchg/i64_atomic_rmw32_cmpxchgu.wat +++ b/tests/disas/winch/x64/atomic/rmw/cmpxchg/i64_atomic_rmw32_cmpxchgu.wat @@ -12,7 +12,7 @@ ;; movq 0x18(%r11), %r11 ;; addq $0x20, %r11 ;; cmpq %rsp, %r11 -;; ja 0x6f +;; ja 0x71 ;; 1c: movq %rdi, %r14 ;; subq $0x10, %rsp ;; movq %rdi, 8(%rsp) @@ -22,7 +22,7 @@ ;; movl $0, %edx ;; andl $3, %edx ;; cmpl $0, %edx -;; jne 0x71 +;; jne 0x73 ;; 4d: movl $0, %edx ;; movq 0x30(%r14), %r11 ;; movq (%r11), %rbx @@ -33,8 +33,9 @@ ;; popq %rcx ;; popq %rax ;; lock cmpxchgl %ecx, (%rbx) +;; movl %eax, %eax ;; addq $0x10, %rsp ;; popq %rbp ;; retq -;; 6f: ud2 ;; 71: ud2 +;; 73: ud2 diff --git a/tests/disas/winch/x64/atomic/rmw/cmpxchg/i64_atomic_rmw32_cmpxchgu_extend.wat b/tests/disas/winch/x64/atomic/rmw/cmpxchg/i64_atomic_rmw32_cmpxchgu_extend.wat new file mode 100644 index 000000000000..1221f51cda28 --- /dev/null +++ b/tests/disas/winch/x64/atomic/rmw/cmpxchg/i64_atomic_rmw32_cmpxchgu_extend.wat @@ -0,0 +1,44 @@ +;;! target = "x86_64" +;;! test = "winch" + +(module + (memory 1 1 shared) + (func (export "f") (result i64) + i32.const 0 + i64.const 0xDEADBEEF00000000 + i64.const 0x1234 + i64.atomic.rmw32.cmpxchg_u)) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x18(%r11), %r11 +;; addq $0x20, %r11 +;; cmpq %rsp, %r11 +;; ja 0x76 +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; movl $0x1234, %eax +;; movabsq $16045690981097406464, %rcx +;; movl $0, %edx +;; andl $3, %edx +;; cmpl $0, %edx +;; jne 0x78 +;; 52: movl $0, %edx +;; movq 0x30(%r14), %r11 +;; movq (%r11), %rbx +;; movl %edx, %edx +;; addq %rdx, %rbx +;; pushq %rcx +;; pushq %rax +;; popq %rcx +;; popq %rax +;; lock cmpxchgl %ecx, (%rbx) +;; movl %eax, %eax +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 76: ud2 +;; 78: ud2 diff --git a/tests/misc_testsuite/winch/rmw32_cmpxchg_u_wrap.wast b/tests/misc_testsuite/winch/rmw32_cmpxchg_u_wrap.wast new file mode 100644 index 000000000000..3d14edb0ca07 --- /dev/null +++ b/tests/misc_testsuite/winch/rmw32_cmpxchg_u_wrap.wast @@ -0,0 +1,11 @@ +;;! threads = true + +(module + (memory 1 1 shared) + (func (export "f") (result i64) + i32.const 0 + i64.const 0xDEADBEEF00000000 + i64.const 0x1234 + i64.atomic.rmw32.cmpxchg_u)) + +(assert_return (invoke "f") (i64.const 0)) diff --git a/winch/codegen/src/codegen/mod.rs b/winch/codegen/src/codegen/mod.rs index 0d08767192e2..732eea6b431e 100644 --- a/winch/codegen/src/codegen/mod.rs +++ b/winch/codegen/src/codegen/mod.rs @@ -1416,21 +1416,27 @@ where size: OperandSize, extend: Option>, ) -> Result<()> { - // Emission for this instruction is a bit trickier. The address for the CAS is the 3rd from - // the top of the stack, and we must emit instruction to compute the actual address with - // `emit_compute_heap_address_align_checked`, while we still have access to self. However, - // some ISAs have requirements with regard to the registers used for some arguments, so we - // need to pass the context to the masm. To solve this issue, we pop the two first - // arguments from the stack, compute the address, push back the arguments, and hand over - // the control to masm. The implementer of `atomic_cas` can expect to find `expected` and - // `replacement` at the top the context's stack. - - // pop the args + // At this point in the stack we have: + // [ address, expected, replacement ] + // + // Therefore, emission for this instruction is a bit + // trickier. The address for the CAS is the 3rd from the top + // of the stack, and we must emit instruction to compute the + // actual address with + // `emit_compute_heap_address_align_checked`, while we still + // have access to self. However, some ISAs have requirements + // with regard to the registers used for some arguments, so we + // need to pass the context to the masm. To solve this issue, + // we pop the two first arguments from the stack, compute the + // address, push back the arguments, and hand over the control + // to masm. The implementer of `atomic_cas` can expect to find + // `expected` and `replacement` at the top the context's + // stack. + let replacement = self.context.pop_to_reg(self.masm, None)?; let expected = self.context.pop_to_reg(self.masm, None)?; if let Some(addr) = self.emit_compute_heap_address_align_checked(arg, size)? { - // push back the args self.context.stack.push(expected.into()); self.context.stack.push(replacement.into()); diff --git a/winch/codegen/src/isa/x64/masm.rs b/winch/codegen/src/isa/x64/masm.rs index 83a0789edbe6..a6d123f3f08a 100644 --- a/winch/codegen/src/isa/x64/masm.rs +++ b/winch/codegen/src/isa/x64/masm.rs @@ -1781,23 +1781,20 @@ impl Masm for MacroAssembler { ) -> Result<()> { // `cmpxchg` expects `expected` to be in the `*a*` register. // reserve rax for the expected argument. - let rax = context.reg(regs::rax(), self)?; - let replacement = context.pop_to_reg(self, None)?; + let replacement = + context.without::, _, _>(&[regs::rax()], self, |cx, masm| { + cx.pop_to_reg(masm, None) + })??; - // mark `rax` as allocatable again. - context.free_reg(rax); let expected = context.pop_to_reg(self, Some(regs::rax()))?; self.asm .cmpxchg(addr, replacement.reg, writable!(expected.reg), size, flags); if let Some(extend) = extend { - // We don't need to zero-extend from 32 to 64bits. - if !(extend.from_bits() == 32 && extend.to_bits() == 64) { - self.asm - .movzx_rr(expected.reg, writable!(expected.reg), extend); - } + self.asm + .movzx_rr(expected.reg, writable!(expected.reg), extend); } context.stack.push(expected.into());