Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
2 changes: 1 addition & 1 deletion cranelift/codegen/meta/src/cdsl/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ pub(crate) struct InstructionBuilder {
operands_in: Option<Vec<Operand>>,
operands_out: Option<Vec<Operand>>,

// See Instruction comments for the meaning of these fields.
// See InstructionContent comments for the meaning of these fields.
is_terminator: bool,
is_branch: bool,
is_call: bool,
Expand Down
34 changes: 34 additions & 0 deletions cranelift/codegen/meta/src/shared/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,40 @@ fn define_control_flow(
.call()
.branches(),
);

ig.push(
Inst::new(
"dead_load_with_context",
r#"
Load 32 bits from memory at ``load_ptr`` while also keeping ``context``
in a fixed register and reserving a second as scratch space.

This is intended for implementing MMU-triggered jumps as in
epoch-interruption-via-mmu, where the load conditionally triggers a
segfault, which hands off control to a signal handler for further
action. The handler has access to ``context`` (typically the
``VMContext``'s ``vm_store_context``) and can use the second
reserved register to store a temp value, as needed on platforms
where signal handlers cannot push stack frames.

On x64, RDI holds ``context``, and R10 is used as scratch space.
"#,
&formats.binary,
)
.operands_in(vec![
Operand::new("load_ptr", iAddr).with_doc("memory location to load from"),
Operand::new("context", iAddr)
.with_doc("arbitrary address-sized context to pass to signal handler"),
])
// Are we a call? stack_switch calls itself one "as it continues
// execution elsewhere". See reasoning at
// https://github.com/bytecodealliance/wasmtime/pull/9078#issuecomment-2273869774.
.call()
.can_load()
// Don't optimize me out just because I don't def anything. TODO: Can we use side_effects_idempotent()?
.other_side_effects(),
// If `load` is not can_trap(), this isn't either.
);
}

#[inline(never)]
Expand Down
8 changes: 6 additions & 2 deletions cranelift/codegen/src/ir/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -613,9 +613,13 @@ impl InstructionData {
Self::Ternary {
opcode: Opcode::StackSwitch,
..
}
| Self::Binary {
opcode: Opcode::DeadLoadWithContext,
..
} => {
// `StackSwitch` is not actually a call, but has the .call() side
// effect as it continues execution elsewhere.
// These instructions aren't actually calls, but they have the
// .call() side effect, as they continue execution elsewhere.
CallInfo::NotACall
}
_ => {
Expand Down
11 changes: 10 additions & 1 deletion cranelift/codegen/src/isa/x64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
;; =========================================
;; Stack manipulation.

;; Emits a inline stack probe loop.
;; Emits an inline stack probe loop.
(StackProbeLoop (tmp WritableReg)
(frame_size u32)
(guard_size u32))
Expand Down Expand Up @@ -194,6 +194,9 @@
(offset i64)
(distance RelocDistance))

(DeadLoadWithContext (load_ptr Gpr)
(context Gpr))

;; =========================================
;; Instructions pertaining to atomic memory accesses.

Expand Down Expand Up @@ -1342,6 +1345,12 @@
(rule (return_call_unknown info)
(SideEffectNoResult.Inst (MInst.ReturnCallUnknown info)))

;; Helper for creating `DeadLoadWithContext` instructions.
(decl x64_dead_load_with_context (Gpr Gpr) SideEffectNoResult)
(rule (x64_dead_load_with_context load_ptr context)
(SideEffectNoResult.Inst (MInst.DeadLoadWithContext load_ptr
context)))

;;;; Helpers for emitting stack switches ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(decl x64_stack_switch_basic (Gpr Gpr Gpr) Gpr)
Expand Down
8 changes: 8 additions & 0 deletions cranelift/codegen/src/isa/x64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -645,6 +645,14 @@ pub(crate) fn emit(
sink.bind_label(resume, state.ctrl_plane_mut());
}

Inst::DeadLoadWithContext { .. } => {
// The ISLE has already emitted the dead load. Put the address of
// this instruction aside so we can later distinguish whether a
// segfault is its fault.

// Search for "let pc_offset = layout.ip_offset as i32;" as a string to pull on.
}

Inst::JmpKnown { dst } => uncond_jmp(sink, *dst),

Inst::WinchJmpIf { cc, taken } => one_way_jmp(sink, *cc, *taken),
Expand Down
23 changes: 22 additions & 1 deletion cranelift/codegen/src/isa/x64/inst/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
//! This module defines x86_64-specific machine instruction types.

pub use emit_state::EmitState;
use regalloc2::PRegSet;

use crate::binemit::{Addend, CodeOffset, Reloc};
use crate::ir::{ExternalName, LibCall, TrapCode, Type, types};
Expand Down Expand Up @@ -96,6 +97,7 @@ impl Inst {
| Inst::Args { .. }
| Inst::Rets { .. }
| Inst::StackSwitchBasic { .. }
| Inst::DeadLoadWithContext { .. }
| Inst::TrapIf { .. }
| Inst::TrapIfAnd { .. }
| Inst::TrapIfOr { .. }
Expand Down Expand Up @@ -156,7 +158,7 @@ impl Inst {
Inst::External { inst }
}

/// Writes the `simm64` immedaite into `dst`.
/// Writes the `simm64` immediate into `dst`.
///
/// Note that if `dst_size` is less than 64-bits then the upper bits of
/// `simm64` will be converted to zero.
Expand Down Expand Up @@ -671,6 +673,12 @@ impl PrettyPrint for Inst {
)
}

Inst::DeadLoadWithContext { load_ptr, context } => {
let load_ptr = pretty_print_reg(**load_ptr, 8);
let context = pretty_print_reg(**context, 8);
format!("dead_load_with_context {load_ptr}, {context}")
}

Inst::JmpKnown { dst } => {
let op = ljustify("jmp".to_string());
let dst = dst.to_string();
Expand Down Expand Up @@ -1045,6 +1053,19 @@ fn x64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
collector.reg_clobbers(clobbers);
}

Inst::DeadLoadWithContext { load_ptr, context } => {
// load_ptr is an input param.
collector.reg_use(load_ptr);
// Demand context (vmctx) go into RDI.
// TODO: Do I still have to move it, or will regalloc make sure it's there?
collector.reg_fixed_use(context, regs::rdi());
// Reserve r10 as a place for the signal handler to stow the return
// address (which we're overwriting with that of the epoch-ending
// stub). Picking r10 because it's caller-saved but otherwise
// arbitrarily.
collector.reg_clobbers(PRegSet::empty().with(regs::gpr_preg(asm::gpr::enc::R10)));
}

Inst::ReturnCallKnown { info } => {
let ReturnCallInfo {
dest, uses, tmp, ..
Expand Down
17 changes: 17 additions & 0 deletions cranelift/codegen/src/isa/x64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -3578,6 +3578,23 @@
(in_payload0 Gpr (put_in_gpr in_payload0)))
(x64_stack_switch_basic store_context_ptr load_context_ptr in_payload0)))

;;;; Rules for `dead_load_with_context` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (dead_load_with_context load_ptr context))
(let (
;; Put vmctx into rdi. TODO: Do we have to shove context in a different reg first? The type system seems happy to allow it without.
;; Actually, I suspect we don't even need to do this move; because we use reg_fixed_use() to constrain the `context` arg of DeadLoadWithContext to RDI, I hypothesize regalloc will insert any MOV needed to make that so. It is, after all, in the business of inserting MOVs for spills.
;; (_ SideEffectNoResult (mov_to_preg (preg_rdi) context))

;; Load from load_ptr to perhaps trigger a trap.
;; TODO: May be able to change x64_load to higher-level "load", from codegen/meta/src/shared/instructions.rs.
(_ Gpr (x64_load $I64 (to_amode (mem_flags_aligned_read_only)
load_ptr
(zero_offset))
(ExtKind.None)))
(_ SideEffectNoResult (x64_dead_load_with_context load_ptr context)))
(output_none)))

;;;; Rules for `get_{frame,stack}_pointer` and `get_return_address` ;;;;;;;;;;;;

(rule (lower (get_frame_pointer))
Expand Down
2 changes: 2 additions & 0 deletions cranelift/codegen/src/isa/x64/pcc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,8 @@ pub(crate) fn check(

Inst::StackSwitchBasic { .. } => Err(PccError::UnimplementedInst),

Inst::DeadLoadWithContext { .. } => Err(PccError::UnimplementedInst),

Inst::LabelAddress { .. } => Err(PccError::UnimplementedInst),

Inst::SequencePoint { .. } => Ok(()),
Expand Down
5 changes: 5 additions & 0 deletions cranelift/codegen/src/isle_prelude.rs
Original file line number Diff line number Diff line change
Expand Up @@ -638,6 +638,11 @@ macro_rules! isle_common_prelude_methods {
MemFlags::trusted()
}

#[inline]
fn mem_flags_aligned_read_only(&mut self) -> MemFlags {
MemFlags::new().with_aligned().with_readonly()
}

#[inline]
fn little_or_native_endian(&mut self, flags: MemFlags) -> Option<MemFlags> {
match flags.explicit_endianness() {
Expand Down
2 changes: 1 addition & 1 deletion cranelift/codegen/src/machinst/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1685,7 +1685,7 @@ impl<I: VCodeInst> MachBuffer<I> {
});
}

/// Add a patchable call record at the current offset The actual
/// Add a patchable call record at the current offset. The actual
/// call is expected to have been emitted; the VCodeInst trait
/// specifies how to NOP it out, and we carry that information to
/// the finalized Machbuffer.
Expand Down
7 changes: 6 additions & 1 deletion cranelift/codegen/src/prelude.isle
Original file line number Diff line number Diff line change
Expand Up @@ -304,10 +304,15 @@

;; `MemFlags::trusted`
(spec (mem_flags_trusted)
(provide (= result #x0003)))
(provide (= result #x0003))) ;; Shouldn't this be 0001?
(decl pure mem_flags_trusted () MemFlags)
(extern constructor mem_flags_trusted mem_flags_trusted)

(spec (mem_flags_aligned_read_only)
(provide (= result #x0003)))
(decl pure mem_flags_aligned_read_only () MemFlags)
(extern constructor mem_flags_aligned_read_only mem_flags_aligned_read_only)

;; Determine if flags specify little- or native-endian.
(decl little_or_native_endian (MemFlags) MemFlags)
(extern extractor little_or_native_endian little_or_native_endian)
Expand Down
2 changes: 1 addition & 1 deletion cranelift/docs/ir.md
Original file line number Diff line number Diff line change
Expand Up @@ -644,7 +644,7 @@ slot on the stack for its entire live range. Since the live range of an SSA
value can be quite large, it is sometimes beneficial to split the live range
into smaller parts.

A live range is split by creating new SSA values that are copies or the
A live range is split by creating new SSA values that are copies of the
original value or each other. The copies are created by inserting `copy`,
`spill`, or `fill` instructions, depending on whether the values
are assigned to registers or stack slots.
Expand Down
1 change: 1 addition & 0 deletions cranelift/interpreter/src/step.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1316,6 +1316,7 @@ where
Opcode::X86Pmaddubsw => unimplemented!("X86Pmaddubsw"),
Opcode::X86Cvtt2dq => unimplemented!("X86Cvtt2dq"),
Opcode::StackSwitch => unimplemented!("StackSwitch"),
Opcode::DeadLoadWithContext => unimplemented!("DeadLoadWithContext"),

Opcode::TryCall => unimplemented!("TryCall"),
Opcode::TryCallIndirect => unimplemented!("TryCallIndirect"),
Expand Down
7 changes: 7 additions & 0 deletions crates/cli-flags/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,10 @@ wasmtime_option_group! {
/// Yield when a global epoch counter changes, allowing for async
/// operation without blocking the executor.
pub epoch_interruption: Option<bool>,
/// Use MMU tricks to speed epoch deadline checks.
/// TODO: Document whether this should be used mutually exclusively with
/// epoch_interruption.
pub epoch_interruption_via_mmu: Option<bool>,
/// Maximum stack size, in bytes, that wasm is allowed to consume before a
/// stack overflow is reported.
pub max_wasm_stack: Option<usize>,
Expand Down Expand Up @@ -830,6 +834,9 @@ impl CommonOptions {
if let Some(enable) = self.wasm.epoch_interruption {
config.epoch_interruption(enable);
}
if let Some(enable) = self.wasm.epoch_interruption_via_mmu {
config.epoch_interruption_via_mmu(enable);
}
if let Some(enable) = self.debug.address_map {
config.generate_address_map(enable);
}
Expand Down
56 changes: 55 additions & 1 deletion crates/cranelift/src/func_environ.rs
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,10 @@ pub struct FuncEnvironment<'module_environment> {
/// any yield.
epoch_deadline_var: cranelift_frontend::Variable,

// A cached pointer to the epoch interrupt page so we don't have to
// continually dig it out of the `VMStoreContext`
epoch_interrupt_page_ptr_var: cranelift_frontend::Variable,

/// A cached pointer to the per-Engine epoch counter, when
/// performing epoch-based interruption. Initialized in the
/// function prologue. We prefer to use a variable here rather
Expand Down Expand Up @@ -264,6 +268,7 @@ impl<'module_environment> FuncEnvironment<'module_environment> {
fuel_var: Variable::reserved_value(),
epoch_deadline_var: Variable::reserved_value(),
epoch_ptr_var: Variable::reserved_value(),
epoch_interrupt_page_ptr_var: Variable::reserved_value(),

// Start with at least one fuel being consumed because even empty
// functions should consume at least some fuel.
Expand All @@ -284,6 +289,7 @@ impl<'module_environment> FuncEnvironment<'module_environment> {
self.isa.pointer_type()
}

/// Retrieves the VMContext, creating it first if necessary.
pub(crate) fn vmctx(&mut self, func: &mut Function) -> ir::GlobalValue {
self.vmctx.unwrap_or_else(|| {
let vmctx = func.create_global_value(ir::GlobalValueData::VMContext);
Expand All @@ -310,6 +316,7 @@ impl<'module_environment> FuncEnvironment<'module_environment> {
})
}

/// Codegens a reference to the VMContext and returns it as well.
pub(crate) fn vmctx_val(&mut self, pos: &mut FuncCursor<'_>) -> ir::Value {
let pointer_type = self.pointer_type();
let vmctx = self.vmctx(&mut pos.func);
Expand Down Expand Up @@ -380,7 +387,7 @@ impl<'module_environment> FuncEnvironment<'module_environment> {
ptr
}

/// Get the `*mut VMStoreContext` value for our `VMContext`.
/// Codegens and returns the `*mut VMStoreContext` value for our `VMContext`.
fn get_vmstore_context_ptr(&mut self, builder: &mut FunctionBuilder) -> ir::Value {
let global = self.get_vmstore_context_ptr_global(&mut builder.func);
builder.ins().global_value(self.pointer_type(), global)
Expand Down Expand Up @@ -641,6 +648,42 @@ impl<'module_environment> FuncEnvironment<'module_environment> {
self.epoch_check_full(builder, cur_epoch_value, continuation_block);
}

/// Codegens what needs to go at the top of a function to support
/// epoch_interrupt_via_mmu.
fn epoch_mmu_function_entry(&mut self, builder: &mut FunctionBuilder<'_>) {
debug_assert!(self.epoch_interrupt_page_ptr_var.is_reserved_value());
self.epoch_interrupt_page_ptr_var = builder.declare_var(self.pointer_type());

// Cache ptr to interrupt page in a local (and hopefully a register, at
// the discretion of regalloc), rather than digging it out of the
// `VMStoreContext` every time.
let vmstore_ctx = self.get_vmstore_context_ptr(builder);
let epoch_interrupt_page_ptr = builder.ins().load(
self.pointer_type(),
ir::MemFlags::trusted(),
vmstore_ctx,
ir::immediates::Offset32::new(
self.offsets.ptr.vmstore_context_epoch_interrupt_page_ptr() as i32,
),
);
builder.def_var(self.epoch_interrupt_page_ptr_var, epoch_interrupt_page_ptr);

self.epoch_mmu_interruption_check(epoch_interrupt_page_ptr, builder);
}

/// Codegens a dead load from the epoch interrupt page, which causes a trap
/// if an interrupt is due.
fn epoch_mmu_interruption_check(
&mut self,
epoch_interrupt_page_ptr: ir::Value,
builder: &mut FunctionBuilder<'_>,
) {
let vmctx = self.vmctx_val(&mut builder.cursor());
let _ = builder
.ins()
.dead_load_with_context(epoch_interrupt_page_ptr, vmctx);
}

#[cfg(feature = "wmemcheck")]
fn hook_malloc_exit(&mut self, builder: &mut FunctionBuilder, retvals: &[ir::Value]) {
let check_malloc = self.builtin_functions.check_malloc(builder.func);
Expand Down Expand Up @@ -3755,6 +3798,13 @@ impl FuncEnvironment<'_> {
self.epoch_check(builder);
}

// If we're using MMU-based epoch detection, provoke an interrupt if
// it's time.
if self.tunables.epoch_interruption_via_mmu {
let page_ptr = builder.use_var(self.epoch_interrupt_page_ptr_var);
self.epoch_mmu_interruption_check(page_ptr, builder);
}

Ok(())
}

Expand Down Expand Up @@ -3820,6 +3870,10 @@ impl FuncEnvironment<'_> {
self.epoch_function_entry(builder);
}

if self.tunables.epoch_interruption_via_mmu {
self.epoch_mmu_function_entry(builder);
}

#[cfg(feature = "wmemcheck")]
if self.compiler.wmemcheck {
let func_name = self.current_func_name(builder);
Expand Down
Loading