From 509c85b7aafe266a0b72cce5c5dd62ea55f063ff Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 9 Sep 2025 11:42:12 +0300 Subject: [PATCH 1/7] mem/analyze: fix some latent issues in merging, caused by ZSTs. --- src/mem/analyze.rs | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/src/mem/analyze.rs b/src/mem/analyze.rs index d9390e34..8c5ea9c1 100644 --- a/src/mem/analyze.rs +++ b/src/mem/analyze.rs @@ -169,7 +169,18 @@ impl AccessMerger<'_> { // Decompose the "smaller" and/or "less strict" side (`b`) first. match b.kind { // `Dead`s are always ignored. - DataHappKind::Dead => return MergeResult::ok(a), + DataHappKind::Dead + if { + // HACK(eddyb) see similar comment below, but also the comment + // above is invalidated by this condition - the issue is that + // only an unused offset of `0` is a true noop, otherwise + // there is a dead `qptr.offset` instruction which still + // needs a field to reference. + b_offset_in_a == 0 + } => + { + return MergeResult::ok(a); + } DataHappKind::Disjoint(b_entries) if { @@ -392,12 +403,26 @@ impl AccessMerger<'_> { .range(( Bound::Unbounded, b.max_size.map_or(Bound::Unbounded, |b_max_size| { - Bound::Excluded(b_offset_in_a.checked_add(b_max_size).unwrap()) + // HACK(eddyb) the unconditional `insert` below, at + // `b_offset_in_a`, can overwrite an existing entry + // if the ZST case isn't correctly handled. + if b_max_size == 0 { + Bound::Included(b_offset_in_a) + } else { + Bound::Excluded(b_offset_in_a.checked_add(b_max_size).unwrap()) + } }), )) .rev() - .take_while(|(a_sub_offset, a_sub_happ)| { + .take_while(|&(&a_sub_offset, a_sub_happ)| { a_sub_happ.max_size.is_none_or(|a_sub_max_size| { + // HACK(eddyb) the unconditional `insert` below, at + // `b_offset_in_a`, can overwrite an existing entry + // if the ZST case isn't correctly handled. + if b.max_size == Some(0) && a_sub_offset == b_offset_in_a { + return true; + } + a_sub_offset.checked_add(a_sub_max_size).unwrap() > b_offset_in_a }) }); From ccc1e2ad837305b17443dd9f00f6ccc718fc1a80 Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 9 Sep 2025 11:42:12 +0300 Subject: [PATCH 2/7] mem/analyze: don't panic on oddball `Const` pointers. --- src/mem/analyze.rs | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/mem/analyze.rs b/src/mem/analyze.rs index 8c5ea9c1..43574a0d 100644 --- a/src/mem/analyze.rs +++ b/src/mem/analyze.rs @@ -903,8 +903,21 @@ impl<'a> GatherAccesses<'a> { ConstKind::PtrToGlobalVar(gv) => { this.global_var_accesses.entry(gv).or_default() } - // FIXME(eddyb) may be relevant? - _ => unreachable!(), + // FIXME(eddyb) attach on the `Const` by replacing + // it with a copy that also has an extra attribute, + // or actually support by adding the accesses attribute + // in the same manner (if it makes sense to do so). + _ => { + accesses_or_err_attrs_to_attach.push(( + AttrTarget::Node(node), + Err(AnalysisError(Diag::bug([ + "unsupported pointer constant `".into(), + ct.into(), + "`".into(), + ]))), + )); + return; + } }, Value::Var(ptr) => match func_def_body.at(ptr).decl().kind() { VarKind::RegionInput { region, input_idx } From f0a403f18d2ffc2499c2a951c1a149bcd89eae06 Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 9 Sep 2025 11:42:12 +0300 Subject: [PATCH 3/7] mem: add an immediate `offset` to `Load`/`Store` ops. --- src/mem/analyze.rs | 64 +++++++- src/mem/mod.rs | 20 ++- src/print/mod.rs | 35 +++-- src/qptr/lift.rs | 374 +++++++++++++++++++++++---------------------- src/qptr/lower.rs | 47 +++++- src/transform.rs | 4 +- src/visit.rs | 4 +- 7 files changed, 334 insertions(+), 214 deletions(-) diff --git a/src/mem/analyze.rs b/src/mem/analyze.rs index 43574a0d..eb275ac1 100644 --- a/src/mem/analyze.rs +++ b/src/mem/analyze.rs @@ -1209,14 +1209,14 @@ impl<'a> GatherAccesses<'a> { ), ); } - DataInstKind::Mem(op @ (MemOp::Load | MemOp::Store)) => { + DataInstKind::Mem(op @ (MemOp::Load { offset } | MemOp::Store { offset })) => { // HACK(eddyb) `_` will match multiple variants soon. #[allow(clippy::match_wildcard_for_single_variants)] let (op_name, access_type) = match op { - MemOp::Load => { + MemOp::Load { .. } => { ("Load", func_def_body.at(data_inst_def.outputs[0]).decl().ty) } - MemOp::Store => { + MemOp::Store { .. } => { ("Store", func_def_body.at(data_inst_def.inputs[1]).type_of(&cx)) } _ => unreachable!(), @@ -1228,7 +1228,9 @@ impl<'a> GatherAccesses<'a> { .layout_of(access_type) .map_err(|LayoutError(e)| AnalysisError(e)) .and_then(|layout| match layout { - TypeLayout::Handle(shapes::Handle::Opaque(ty)) => { + TypeLayout::Handle(shapes::Handle::Opaque(ty)) + if offset.is_none() => + { Ok(MemAccesses::Handles(shapes::Handle::Opaque(ty))) } TypeLayout::Handle(shapes::Handle::Buffer(..)) => { @@ -1237,6 +1239,11 @@ impl<'a> GatherAccesses<'a> { ) .into()]))) } + TypeLayout::Handle(_) => Err(AnalysisError(Diag::bug([format!( + "{op_name} {{ offset: {offset:?} }}: \ + cannot offset in handle memory" + ) + .into()]))), TypeLayout::HandleArray(..) => { Err(AnalysisError(Diag::bug([format!( "{op_name}: cannot access whole HandleArray" @@ -1251,10 +1258,51 @@ impl<'a> GatherAccesses<'a> { ) .into()]))) } - TypeLayout::Concrete(concrete) => Ok(MemAccesses::Data(DataHapp { - max_size: Some(concrete.mem_layout.fixed_base.size), - kind: DataHappKind::Direct(access_type), - })), + TypeLayout::Concrete(concrete) => { + let happ = DataHapp { + max_size: Some(concrete.mem_layout.fixed_base.size), + kind: DataHappKind::Direct(access_type), + }; + + // FIXME(eddyb) deduplicate this with + // `QPtrOp::Offset` above. + let offset = offset + .map(|offset| u32::try_from(offset.get())) + .transpose() + .ok() + .ok_or_else(|| { + AnalysisError(Diag::bug([format!( + "{op_name} {{ offset: {offset:?} }}: \ + negative offset" + ) + .into()])) + })?; + + let Some(offset) = offset else { + return Ok(MemAccesses::Data(happ)); + }; + + Ok(MemAccesses::Data(DataHapp { + max_size: happ + .max_size + .map(|max_size| { + offset.checked_add(max_size).ok_or_else(|| { + AnalysisError(Diag::bug([format!( + "{op_name} {{ offset: {offset} }}: \ + size overflow ({offset}+{max_size})" + ) + .into()])) + }) + }) + .transpose()?, + // FIXME(eddyb) allocating `Rc>` + // to represent the one-element case, seems + // quite wasteful when it's likely consumed. + kind: DataHappKind::Disjoint(Rc::new( + [(offset, happ)].into(), + )), + })) + } }), ); } diff --git a/src/mem/mod.rs b/src/mem/mod.rs index f52208a5..1078749e 100644 --- a/src/mem/mod.rs +++ b/src/mem/mod.rs @@ -12,7 +12,7 @@ use crate::{OrdAssertEq, Type}; use std::collections::BTreeMap; -use std::num::NonZeroU32; +use std::num::{NonZeroI32, NonZeroU32}; use std::rc::Rc; // NOTE(eddyb) all the modules are declared here, but they're documented "inside" @@ -137,15 +137,25 @@ pub enum MemOp { // a `SpvInst` - once fn-local variables are lowered, this should go there. FuncLocalVar(shapes::MemLayout), - /// Read a single value from a pointer (`inputs[0]`). + /// Read a single value from a pointer (`inputs[0]`) at `offset`. // // FIXME(eddyb) limit this to data - and scalars, maybe vectors at most. - Load, + Load { + // FIXME(eddyb) make this an `enum`, with another variant encoding some + // GEP-like (aka SPIR-V `OpAccessChain`) "field path", and/or even one + // (or more) stride(s) to allow direct array indexing at access time. + offset: Option, + }, - /// Write a single value (`inputs[1]`) to a pointer (`inputs[0]`). + /// Write a single value (`inputs[1]`) to a pointer (`inputs[0]`) at `offset`. // // FIXME(eddyb) limit this to data - and scalars, maybe vectors at most. - Store, + Store { + // FIXME(eddyb) make this an `enum`, with another variant encoding some + // GEP-like (aka SPIR-V `OpAccessChain`) "field path", and/or even one + // (or more) stride(s) to allow direct array indexing at access time. + offset: Option, + }, // // FIXME(eddyb) implement more ops (e.g. copies, atomics). } diff --git a/src/print/mod.rs b/src/print/mod.rs index dafb022f..1580b9af 100644 --- a/src/print/mod.rs +++ b/src/print/mod.rs @@ -3892,18 +3892,35 @@ impl FuncAt<'_, DataInst> { ) } - MemOp::Load => { + &MemOp::Load { offset } => { assert_eq!(inputs.len(), 1); - ("load", [inputs[0].print(printer)].into_iter().collect()) + let mut ptr = inputs[0].print(printer); + if let Some(offset) = offset { + ptr = pretty::Fragment::new([ + ptr, + if offset.get() < 0 { " - " } else { " + " }.into(), + printer + .numeric_literal_style() + .apply(offset.abs().to_string()) + .into(), + ]); + } + ("load", [ptr].into_iter().collect()) } - MemOp::Store => { + &MemOp::Store { offset } => { assert_eq!(inputs.len(), 2); - ( - "store", - [inputs[0].print(printer), inputs[1].print(printer)] - .into_iter() - .collect(), - ) + let mut ptr = inputs[0].print(printer); + if let Some(offset) = offset { + ptr = pretty::Fragment::new([ + ptr, + if offset.get() < 0 { " - " } else { " + " }.into(), + printer + .numeric_literal_style() + .apply(offset.abs().to_string()) + .into(), + ]); + } + ("store", [ptr, inputs[1].print(printer)].into_iter().collect()) } }; diff --git a/src/qptr/lift.rs b/src/qptr/lift.rs index 68381b6c..b0588506 100644 --- a/src/qptr/lift.rs +++ b/src/qptr/lift.rs @@ -541,104 +541,41 @@ impl LiftToSpvPtrInstsInFunc<'_> { &DataInstKind::QPtr(QPtrOp::Offset(offset)) => { let base_ptr = data_inst_def.inputs[0]; let (addr_space, layout) = type_of_val_as_spv_ptr_with_layout(base_ptr)?; - let mut layout = match layout { - TypeLayout::Handle(_) | TypeLayout::HandleArray(..) => { - return Err(LiftError(Diag::bug(["cannot offset Handles".into()]))); - } - TypeLayout::Concrete(mem_layout) => mem_layout, - }; - let mut offset = u32::try_from(offset) - .ok() - .ok_or_else(|| LiftError(Diag::bug(["negative offset".into()])))?; - let mut access_chain_inputs: SmallVec<_> = [base_ptr].into_iter().collect(); - // FIXME(eddyb) deduplicate with access chain loop for Load/Store. - while offset > 0 { - let idx = { - // HACK(eddyb) supporting ZSTs would be a pain because - // they can "fit" in weird ways, e.g. given 3 offsets - // A, B, C (before/between/after a pair of fields), - // `B..B` is included in both `A..B` and `B..C`. - let allow_zst = false; - let offset_range = if allow_zst { - offset..offset - } else { - offset..offset.saturating_add(1) - }; - let mut component_indices = - layout.components.find_components_containing(offset_range); - match (component_indices.next(), component_indices.next()) { - (None, _) => { - // FIXME(eddyb) this could include the chosen indices, - // and maybe the current type and/or layout. - return Err(LiftError(Diag::bug([format!( - "offset {offset} not found in type layout, \ - after {} access chain indices", - access_chain_inputs.len() - 1 - ) - .into()]))); - } - (Some(idx), Some(_)) => { - // FIXME(eddyb) !!! this can also be illegal overlap - if allow_zst { - return Err(LiftError(Diag::bug([ - "ambiguity due to ZSTs in type layout".into(), - ]))); - } - // HACK(eddyb) letting illegal overlap through - idx - } - (Some(idx), None) => idx, - } - }; - - let idx_as_i32 = i32::try_from(idx).ok().ok_or_else(|| { - LiftError(Diag::bug([ - format!("{idx} not representable as a positive s32").into() - ])) - })?; - access_chain_inputs - .push(Value::Const(cx.intern(scalar::Const::from_u32(idx_as_i32 as u32)))); - - match &layout.components { - Components::Scalar => unreachable!(), - Components::Elements { stride, elem, .. } => { - offset %= stride.get(); - layout = elem.clone(); - } - Components::Fields { offsets, layouts } => { - offset -= offsets[idx]; - layout = layouts[idx].clone(); - } + let maybe_access_chain = self.maybe_adjust_pointer_for_offset_or_access( + base_ptr, + addr_space, + layout.clone(), + offset, + None, + )?; + let (new_output_ty, data_inst_def) = match maybe_access_chain { + Some((access_chain_output_ptr_type, mut access_chain_data_inst_def)) => { + access_chain_data_inst_def.outputs.push(data_inst_def.outputs[0]); + (access_chain_output_ptr_type, access_chain_data_inst_def) } - } + None => { + self.deferred_ptr_noops.insert( + data_inst_def.outputs[0], + DeferredPtrNoop { + output_pointer: base_ptr, + output_pointer_addr_space: addr_space, + output_pointee_layout: layout, + parent_region: self.parent_region.unwrap(), + }, + ); + + // FIXME(eddyb) avoid the repeated call to `type_of_val`, + // maybe don't even replace the `QPtrOp::Offset` instruction? + let mut data_inst_def = data_inst_def.clone(); + data_inst_def.kind = QPtrOp::Offset(0).into(); + (type_of_val(base_ptr), data_inst_def) + } + }; - let mut data_inst_def = data_inst_def.clone(); - if access_chain_inputs.len() == 1 { - self.deferred_ptr_noops.insert( - data_inst_def.outputs[0], - DeferredPtrNoop { - output_pointer: base_ptr, - output_pointer_addr_space: addr_space, - output_pointee_layout: TypeLayout::Concrete(layout), - parent_region: self.parent_region.unwrap(), - }, - ); + let output_decl = func_at_data_inst.reborrow().at(data_inst_def.outputs[0]).decl(); + output_decl.ty = new_output_ty; - // FIXME(eddyb) avoid the repeated call to `type_of_val`, - // maybe don't even replace the `QPtrOp::Offset` instruction? - data_inst_def.kind = QPtrOp::Offset(0).into(); - let new_output_ty = type_of_val(base_ptr); - let output_decl = - func_at_data_inst.reborrow().at(data_inst_def.outputs[0]).decl(); - output_decl.ty = new_output_ty; - } else { - data_inst_def.kind = DataInstKind::SpvInst(wk.OpAccessChain.into()); - data_inst_def.inputs = access_chain_inputs; - let output_decl = - func_at_data_inst.reborrow().at(data_inst_def.outputs[0]).decl(); - output_decl.ty = self.lifter.spv_ptr_type(addr_space, layout.original_type); - } data_inst_def } DataInstKind::QPtr(QPtrOp::DynOffset { stride, index_bounds }) => { @@ -666,7 +603,7 @@ impl LiftToSpvPtrInstsInFunc<'_> { break; } - // FIXME(eddyb) deduplicate with `maybe_adjust_pointer_for_access`. + // FIXME(eddyb) deduplicate with `maybe_adjust_pointer_for_offset_or_access`. let idx = { // FIXME(eddyb) there might be a better way to // estimate a relevant offset range for the array, @@ -677,7 +614,7 @@ impl LiftToSpvPtrInstsInFunc<'_> { .and_then(|index_bounds| u32::try_from(index_bounds.end).ok()) .unwrap_or(0); let offset_range = - 0..min_expected_len.checked_add(stride.get()).unwrap_or(0); + 0..min_expected_len.checked_mul(stride.get()).unwrap_or(0); let mut component_indices = layout.components.find_components_containing(offset_range); match (component_indices.next(), component_indices.next()) { @@ -719,12 +656,12 @@ impl LiftToSpvPtrInstsInFunc<'_> { output_decl.ty = self.lifter.spv_ptr_type(addr_space, layout.original_type); data_inst_def } - DataInstKind::Mem(op @ (MemOp::Load | MemOp::Store)) => { + DataInstKind::Mem(op @ (MemOp::Load { offset } | MemOp::Store { offset })) => { // HACK(eddyb) `_` will match multiple variants soon. #[allow(clippy::match_wildcard_for_single_variants)] let (spv_opcode, access_type) = match op { - MemOp::Load => (wk.OpLoad, func.at(data_inst_def.outputs[0]).decl().ty), - MemOp::Store => (wk.OpStore, type_of_val(data_inst_def.inputs[1])), + MemOp::Load { .. } => (wk.OpLoad, func.at(data_inst_def.outputs[0]).decl().ty), + MemOp::Store { .. } => (wk.OpStore, type_of_val(data_inst_def.inputs[1])), _ => unreachable!(), }; @@ -733,15 +670,15 @@ impl LiftToSpvPtrInstsInFunc<'_> { let input_idx = 0; let ptr = data_inst_def.inputs[input_idx]; let (addr_space, pointee_layout) = type_of_val_as_spv_ptr_with_layout(ptr)?; - self.maybe_adjust_pointer_for_access(ptr, pointee_layout, access_type)? - .map(|access_chain_data_inst_def| { - ( - input_idx, - self.lifter.spv_ptr_type(addr_space, access_type), - access_chain_data_inst_def, - ) - }) - .into_iter() + self.maybe_adjust_pointer_for_offset_or_access( + ptr, + addr_space, + pointee_layout, + offset.map_or(0, |o| o.get()), + Some(access_type), + )? + .map(|access_chain| (input_idx, access_chain)) + .into_iter() }; let mut new_data_inst_def = DataInstDef { @@ -750,7 +687,7 @@ impl LiftToSpvPtrInstsInFunc<'_> { }; // FIXME(eddyb) written in a more general style for future deduplication. - for (input_idx, access_chain_output_ptr_type, mut access_chain_data_inst_def) in + for (input_idx, (access_chain_output_ptr_type, mut access_chain_data_inst_def)) in maybe_ajustment { // HACK(eddyb) account for `deferred_ptr_noops` interactions. @@ -817,19 +754,16 @@ impl LiftToSpvPtrInstsInFunc<'_> { let (input_ptr_addr_space, input_pointee_layout) = type_of_val_as_spv_ptr_with_layout(input_ptr)?; - if let Some(access_chain_data_inst_def) = self - .maybe_adjust_pointer_for_access( + if let Some(access_chain) = self + .maybe_adjust_pointer_for_offset_or_access( input_ptr, + input_ptr_addr_space, input_pointee_layout, - expected_pointee_type, + 0, + Some(expected_pointee_type), )? { - to_spv_ptr_input_adjustments.push(( - input_idx, - self.lifter - .spv_ptr_type(input_ptr_addr_space, expected_pointee_type), - access_chain_data_inst_def, - )); + to_spv_ptr_input_adjustments.push((input_idx, access_chain)); } } Attr::QPtr(QPtrAttr::FromSpvPtrOutput { addr_space, pointee }) => { @@ -849,7 +783,7 @@ impl LiftToSpvPtrInstsInFunc<'_> { let func = func_at_data_inst.reborrow().at(()); // FIXME(eddyb) deduplicate with `Load`/`Store`. - for (input_idx, access_chain_output_ptr_type, mut access_chain_data_inst_def) in + for (input_idx, (access_chain_output_ptr_type, mut access_chain_data_inst_def)) in to_spv_ptr_input_adjustments { // HACK(eddyb) account for `deferred_ptr_noops` interactions. @@ -895,26 +829,43 @@ impl LiftToSpvPtrInstsInFunc<'_> { Ok(Transformed::Changed(replacement_data_inst_def)) } - /// If necessary, construct an `OpAccessChain` instruction to turn `ptr` - /// (pointing to a type with `pointee_layout`) into a pointer to `access_type` - /// (which can then be used with e.g. `OpLoad`/`OpStore`). + /// If necessary, construct an `OpAccessChain` instruction to offset `ptr` + /// (pointing to a type with `pointee_layout`) by `offset`, and (optionally) + /// turn it into a pointer to `access_type` (for e.g. `OpLoad`/`OpStore`). /// /// **Note**: the returned instruction has empty `outputs`, the caller must - /// add one (of type `self.lifter.spv_ptr_type(addr_space, access_type)`). + /// add one, using the type returend alongside the instruction. // - // FIXME(eddyb) customize errors, to tell apart Load/Store/ToSpvPtrInput. - fn maybe_adjust_pointer_for_access( + // FIXME(eddyb) customize errors, to tell apart Offset/Load/Store/ToSpvPtrInput. + fn maybe_adjust_pointer_for_offset_or_access( &self, ptr: Value, + addr_space: AddrSpace, mut pointee_layout: TypeLayout, - access_type: Type, - ) -> Result, LiftError> { + offset: i32, + access_type: Option, + ) -> Result, LiftError> { let wk = self.lifter.wk; - let access_layout = self.lifter.layout_of(access_type)?; + let mk_access_chain = |access_chain_inputs: SmallVec<_>, final_pointee_type| { + if access_chain_inputs.len() > 1 { + Some(( + self.lifter.spv_ptr_type(addr_space, final_pointee_type), + DataInstDef { + attrs: Default::default(), + kind: DataInstKind::SpvInst(wk.OpAccessChain.into()), + inputs: access_chain_inputs, + child_regions: [].into_iter().collect(), + outputs: [].into_iter().collect(), + }, + )) + } else { + None + } + }; + + let access_layout = access_type.map(|ty| self.lifter.layout_of(ty)).transpose()?; - // The access type might be merely a prefix of the pointee type, - // requiring injecting an extra `OpAccessChain` to "dig in". let mut access_chain_inputs: SmallVec<_> = [ptr].into_iter().collect(); if let TypeLayout::HandleArray(handle, _) = pointee_layout { @@ -922,96 +873,149 @@ impl LiftToSpvPtrInstsInFunc<'_> { .push(Value::Const(self.lifter.cx.intern(scalar::Const::from_u32(0)))); pointee_layout = TypeLayout::Handle(handle); } - match (pointee_layout, access_layout) { + let (mut pointee_layout, access_layout) = match (pointee_layout, access_layout) { (TypeLayout::HandleArray(..), _) => unreachable!(), // All the illegal cases are here to keep the rest tidier. - (_, TypeLayout::Handle(shapes::Handle::Buffer(..))) => { + (_, Some(TypeLayout::Handle(shapes::Handle::Buffer(..)))) => { return Err(LiftError(Diag::bug(["cannot access whole Buffer".into()]))); } - (_, TypeLayout::HandleArray(..)) => { + (_, Some(TypeLayout::HandleArray(..))) => { return Err(LiftError(Diag::bug(["cannot access whole HandleArray".into()]))); } - (_, TypeLayout::Concrete(access_layout)) + (_, Some(TypeLayout::Concrete(access_layout))) if access_layout.mem_layout.dyn_unit_stride.is_some() => { return Err(LiftError(Diag::bug(["cannot access unsized type".into()]))); } + (TypeLayout::Handle(_), Some(_)) if offset != 0 => { + return Err(LiftError(Diag::bug(["cannot offset Handles for access".into()]))); + } + (TypeLayout::Handle(_), None) => { + // FIXME(eddyb) this disallows even a noop offset on a handle pointer. + return Err(LiftError(Diag::bug(["cannot offset Handles".into()]))); + } (TypeLayout::Handle(shapes::Handle::Buffer(..)), _) => { - return Err(LiftError(Diag::bug(["cannot access into Buffer".into()]))); + return Err(LiftError(Diag::bug(["cannot offset/access into Buffer".into()]))); } - (TypeLayout::Handle(_), TypeLayout::Concrete(_)) => { + (TypeLayout::Handle(_), Some(TypeLayout::Concrete(_))) => { return Err(LiftError(Diag::bug(["cannot access Handle as memory".into()]))); } - (TypeLayout::Concrete(_), TypeLayout::Handle(_)) => { + (TypeLayout::Concrete(_), Some(TypeLayout::Handle(_))) => { return Err(LiftError(Diag::bug(["cannot access memory as Handle".into()]))); } ( TypeLayout::Handle(shapes::Handle::Opaque(pointee_handle_type)), - TypeLayout::Handle(shapes::Handle::Opaque(access_handle_type)), + Some(TypeLayout::Handle(shapes::Handle::Opaque(access_handle_type))), ) => { + assert_eq!(offset, 0); + if pointee_handle_type != access_handle_type { return Err(LiftError(Diag::bug([ "(opaque handle) pointer vs access type mismatch".into(), ]))); } + + return Ok(mk_access_chain(access_chain_inputs, pointee_handle_type)); } - (TypeLayout::Concrete(mut pointee_layout), TypeLayout::Concrete(access_layout)) => { - // FIXME(eddyb) deduplicate with access chain loop for Offset. - while pointee_layout.original_type != access_layout.original_type { - let idx = { - let offset_range = 0..access_layout.mem_layout.fixed_base.size; - let mut component_indices = - pointee_layout.components.find_components_containing(offset_range); - match (component_indices.next(), component_indices.next()) { - (None, _) => { - return Err(LiftError(Diag::bug([ - "accessed type not found in pointee type layout".into(), - ]))); - } - // FIXME(eddyb) obsolete this case entirely, - // by removing stores of ZSTs, and replacing - // loads of ZSTs with `OpUndef` constants. - (Some(_), Some(_)) => { - return Err(LiftError(Diag::bug([ - "ambiguity due to ZSTs in pointee type layout".into(), - ]))); - } - (Some(idx), None) => idx, - } - }; + (TypeLayout::Concrete(pointee_layout), Some(TypeLayout::Concrete(access_layout))) => { + (pointee_layout, Some(access_layout)) + } + (TypeLayout::Concrete(pointee_layout), None) => (pointee_layout, None), + }; - let idx_as_i32 = i32::try_from(idx).ok().ok_or_else(|| { - LiftError(Diag::bug([ - format!("{idx} not representable as a positive s32").into() - ])) + let mut offset = u32::try_from(offset) + .ok() + .ok_or_else(|| LiftError(Diag::bug(["negative offset".into()])))?; + + // FIXME(eddyb) deduplicate with access chain loop for Offset. + loop { + let done = offset == 0 + && access_layout.as_ref().is_none_or(|access_layout| { + pointee_layout.original_type == access_layout.original_type + }); + if done { + break; + } + + let idx = { + let min_component_size = match &access_layout { + Some(access_layout) => access_layout.mem_layout.fixed_base.size, + None => { + // HACK(eddyb) supporting ZSTs would be a pain because + // they can "fit" in weird ways, e.g. given 3 offsets + // A, B, C (before/between/after a pair of fields), + // `B..B` is included in both `A..B` and `B..C`. + let allow_zst = false; + if allow_zst { 0 } else { 1 } + } + }; + + let offset_range = offset..offset.saturating_add(min_component_size); + let mut component_indices = + pointee_layout.components.find_components_containing(offset_range.clone()); + + let idx = component_indices + .next() + .or_else(|| { + // HACK(eddyb) when dealing with a lone ZST, the search can + // fail because it expects at least one byte, so we retry + // with an empty range instead. + // FIXME(eddyb) this can still fail if there's another + // component that ends where the ZST is, maybe we need + // some way to filter for specifically such a ZST. + if access_layout.is_none() && offset_range.len() == 1 { + component_indices = pointee_layout + .components + .find_components_containing(offset..offset); + component_indices.next() + } else { + None + } + }) + .ok_or_else(|| { + // FIXME(eddyb) this could include the chosen indices, + // and maybe the current type and/or layout. + LiftError(Diag::bug([format!( + "offsets {offset_range:?} not found in pointee type layout, \ + after {} access chain indices", + access_chain_inputs.len() - 1 + ) + .into()])) })?; - access_chain_inputs.push(Value::Const( - self.lifter.cx.intern(scalar::Const::from_u32(idx_as_i32 as u32)), - )); - pointee_layout = match &pointee_layout.components { - Components::Scalar => unreachable!(), - Components::Elements { elem, .. } => elem.clone(), - Components::Fields { layouts, .. } => layouts[idx].clone(), - }; + if component_indices.next().is_some() { + return Err(LiftError(Diag::bug([ + "ambiguity due to ZSTs in pointee type layout".into(), + ]))); + } + + idx + }; + + let idx_as_i32 = i32::try_from(idx).ok().ok_or_else(|| { + LiftError(Diag::bug([format!("{idx} not representable as a positive s32").into()])) + })?; + access_chain_inputs.push(Value::Const( + self.lifter.cx.intern(scalar::Const::from_u32(idx_as_i32 as u32)), + )); + + match &pointee_layout.components { + Components::Scalar => unreachable!(), + Components::Elements { stride, elem, .. } => { + offset %= stride.get(); + pointee_layout = elem.clone(); + } + Components::Fields { offsets, layouts } => { + offset -= offsets[idx]; + pointee_layout = layouts[idx].clone(); } } } - Ok(if access_chain_inputs.len() > 1 { - Some(DataInstDef { - attrs: Default::default(), - kind: DataInstKind::SpvInst(wk.OpAccessChain.into()), - inputs: access_chain_inputs, - child_regions: [].into_iter().collect(), - outputs: [].into_iter().collect(), - }) - } else { - None - }) + Ok(mk_access_chain(access_chain_inputs, pointee_layout.original_type)) } /// Apply rewrites implied by `deferred_ptr_noops` to `values`. diff --git a/src/qptr/lower.rs b/src/qptr/lower.rs index 4eb97d2d..0d37c4c1 100644 --- a/src/qptr/lower.rs +++ b/src/qptr/lower.rs @@ -7,12 +7,12 @@ use crate::transform::{InnerInPlaceTransform, Transformed, Transformer}; use crate::{ AddrSpace, AttrSet, AttrSetDef, Const, ConstDef, ConstKind, Context, DataInst, DataInstDef, DataInstKind, Diag, FuncDecl, GlobalVarDecl, Node, NodeKind, OrdAssertEq, Region, Type, - TypeKind, TypeOrConst, Value, VarDecl, spv, + TypeKind, TypeOrConst, Value, VarDecl, VarKind, spv, }; use itertools::{Either, Itertools as _}; use smallvec::SmallVec; use std::cell::Cell; -use std::num::NonZeroU32; +use std::num::{NonZeroI32, NonZeroU32}; use std::rc::Rc; // HACK(eddyb) sharing layout code with other modules. @@ -409,6 +409,20 @@ impl LowerFromSpvPtrInstsInFunc<'_> { // FIXME(eddyb) wasteful clone? (needed due to borrowing issues) let outputs = data_inst_def.outputs.clone(); + // Map `ptr` to its base & offset, if it points to a `QPtrOp::Offset`. + let ptr_to_base_ptr_and_offset = |ptr| { + if let Value::Var(ptr) = ptr + && let VarKind::NodeOutput { node: ptr_inst, output_idx: 0 } = + func.at(ptr).decl().kind() + { + let ptr_inst_def = func.at(ptr_inst).def(); + if let DataInstKind::QPtr(QPtrOp::Offset(ptr_offset)) = ptr_inst_def.kind { + return Some((ptr_inst_def.inputs[0], NonZeroI32::new(ptr_offset))); + } + } + None + }; + let replacement_kind_and_inputs = if spv_inst.opcode == wk.OpVariable { assert!(data_inst_def.inputs.len() <= 1); let (_, var_data_type) = @@ -430,14 +444,25 @@ impl LowerFromSpvPtrInstsInFunc<'_> { return Ok(Transformed::Unchanged); } assert_eq!(data_inst_def.inputs.len(), 1); - (MemOp::Load.into(), data_inst_def.inputs.clone()) + + let ptr = data_inst_def.inputs[0]; + + let (ptr, offset) = ptr_to_base_ptr_and_offset(ptr).unwrap_or((ptr, None)); + + (MemOp::Load { offset }.into(), [ptr].into_iter().collect()) } else if spv_inst.opcode == wk.OpStore { // FIXME(eddyb) support memory operands somehow. if !spv_inst.imms.is_empty() { return Ok(Transformed::Unchanged); } assert_eq!(data_inst_def.inputs.len(), 2); - (MemOp::Store.into(), data_inst_def.inputs.clone()) + + let ptr = data_inst_def.inputs[0]; + let value = data_inst_def.inputs[1]; + + let (ptr, offset) = ptr_to_base_ptr_and_offset(ptr).unwrap_or((ptr, None)); + + (MemOp::Store { offset }.into(), [ptr, value].into_iter().collect()) } else if spv_inst.opcode == wk.OpArrayLength { let field_idx = match spv_inst.imms[..] { [spv::Imm::Short(_, field_idx)] => field_idx, @@ -528,14 +553,26 @@ impl LowerFromSpvPtrInstsInFunc<'_> { self.lowerer.layout_of(base_pointee_type)? }; + let mut ptr = base_ptr; let mut steps = self.try_lower_access_chain(access_chain_base_layout, &data_inst_def.inputs[1..])?; + + // Fold a previous `Offset` into an initial offset step, where possible. + if let Some(QPtrChainStep { op: QPtrOp::Offset(first_offset), dyn_idx: None }) = + steps.first_mut() + && let Some((ptr_base_ptr, ptr_offset)) = ptr_to_base_ptr_and_offset(ptr) + && let Some(new_first_offset) = + first_offset.checked_add(ptr_offset.map_or(0, |o| o.get())) + { + ptr = ptr_base_ptr; + *first_offset = new_first_offset; + } + // HACK(eddyb) noop cases should probably not use any `DataInst`s at all, // but that would require the ability to replace all uses of a `Value`. let final_step = steps.pop().unwrap_or(QPtrChainStep { op: QPtrOp::Offset(0), dyn_idx: None }); - let mut ptr = base_ptr; for step in steps { let func = func_at_data_inst.reborrow().at(()); diff --git a/src/transform.rs b/src/transform.rs index d6ed049f..f4a04bd4 100644 --- a/src/transform.rs +++ b/src/transform.rs @@ -638,7 +638,9 @@ impl InnerInPlaceTransform for FuncAtMut<'_, Node> { | NodeKind::ExitInvocation(cf::ExitInvocationKind::SpvInst(_)) | DataInstKind::Scalar(_) | DataInstKind::Vector(_) - | DataInstKind::Mem(MemOp::FuncLocalVar(_) | MemOp::Load | MemOp::Store) + | DataInstKind::Mem( + MemOp::FuncLocalVar(_) | MemOp::Load { .. } | MemOp::Store { .. }, + ) | DataInstKind::QPtr( QPtrOp::HandleArrayIndex | QPtrOp::BufferData diff --git a/src/visit.rs b/src/visit.rs index 2dbabd71..596f7964 100644 --- a/src/visit.rs +++ b/src/visit.rs @@ -482,7 +482,9 @@ impl<'a> FuncAt<'a, Node> { | NodeKind::ExitInvocation(cf::ExitInvocationKind::SpvInst(_)) | DataInstKind::Scalar(_) | DataInstKind::Vector(_) - | DataInstKind::Mem(MemOp::FuncLocalVar(_) | MemOp::Load | MemOp::Store) + | DataInstKind::Mem( + MemOp::FuncLocalVar(_) | MemOp::Load { .. } | MemOp::Store { .. }, + ) | DataInstKind::QPtr( QPtrOp::HandleArrayIndex | QPtrOp::BufferData From 27ba458b6f762ebc0e4643d99455cd9123dcb5ac Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Thu, 2 Oct 2025 12:11:06 +0300 Subject: [PATCH 4/7] WIP: Add an immediate `offset` to `ConstKind::PtrToGlobalVar`. --- src/lib.rs | 12 ++++- src/mem/analyze.rs | 108 ++++++++++++++++++++++++------------------- src/print/mod.rs | 20 ++++++-- src/qptr/lift.rs | 18 ++++++-- src/qptr/lower.rs | 2 +- src/spv/canonical.rs | 2 +- src/spv/lift.rs | 17 ++++--- src/spv/lower.rs | 10 ++-- src/transform.rs | 6 +-- src/visit.rs | 4 +- 10 files changed, 127 insertions(+), 72 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index beecdc7d..103e276e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -176,6 +176,7 @@ pub mod vector; use smallvec::SmallVec; use std::borrow::Cow; use std::collections::BTreeSet; +use std::num::NonZeroU32; use std::rc::Rc; // HACK(eddyb) work around the lack of `FxIndex{Map,Set}` type aliases elsewhere. @@ -669,7 +670,16 @@ pub enum ConstKind { // there's still the need to rename "global variable" post-`Var`-refactor, // and last but not least, `PtrToFunc` needs `SPV_INTEL_function_pointers`, // an OpenCL-only extension Intel came up with for their own SPIR-V tooling. - PtrToGlobalVar(GlobalVar), + PtrToGlobalVar { + global_var: GlobalVar, + + // FIXME(eddyb) try using this feature in more places. + // FIXME(eddyb) make this an `enum`, with another variant encoding some + // GEP-like (aka SPIR-V `OpAccessChain`) "field path". + // FIXME(eddyb) consider some kind of "capability slicing" replacement, + // which could limit the usable range of the resulting pointer. + offset: Option, + }, PtrToFunc(Func), // HACK(eddyb) this is a fallback case that should become increasingly rare diff --git a/src/mem/analyze.rs b/src/mem/analyze.rs index eb275ac1..8ab24d96 100644 --- a/src/mem/analyze.rs +++ b/src/mem/analyze.rs @@ -897,11 +897,53 @@ impl<'a> GatherAccesses<'a> { let per_output_accesses = node_to_per_output_accesses.remove(&node).unwrap_or_default(); let node_def = func_def_body.at(node).def(); - let mut generate_accesses = |this: &mut Self, ptr: Value, new_accesses| { + let offset_accesses = |accesses, offset: u32| { + let happ = match accesses { + MemAccesses::Handles(_) => { + return Err(AnalysisError(Diag::bug([format!( + "Offset({offset}): cannot offset in handle memory" + ) + .into()]))); + } + MemAccesses::Data(happ) => happ, + }; + + // FIXME(eddyb) these should be normalized + // (e.g. constant-folded) out of existence, + // but while they exist, they should be noops. + if offset == 0 { + return Ok(MemAccesses::Data(happ)); + } + + Ok(MemAccesses::Data(DataHapp { + max_size: happ + .max_size + .map(|max_size| { + offset.checked_add(max_size).ok_or_else(|| { + AnalysisError(Diag::bug([format!( + "Offset({offset}): size overflow ({offset}+{max_size})" + ) + .into()])) + }) + }) + .transpose()?, + // FIXME(eddyb) allocating `Rc>` + // to represent the one-element case, seems + // quite wasteful when it's likely consumed. + kind: DataHappKind::Disjoint(Rc::new([(offset, happ)].into())), + })) + }; + let mut generate_accesses = |this: &mut Self, ptr: Value, mut new_accesses| { let slot = match ptr { Value::Const(ct) => match cx[ct].kind { - ConstKind::PtrToGlobalVar(gv) => { - this.global_var_accesses.entry(gv).or_default() + ConstKind::PtrToGlobalVar { global_var, offset } => { + if let Some(offset) = offset + && let Ok(accesses) = new_accesses + { + new_accesses = offset_accesses(accesses, offset.get()); + } + + this.global_var_accesses.entry(global_var).or_default() } // FIXME(eddyb) attach on the `Const` by replacing // it with a copy that also has an extra attribute, @@ -1099,51 +1141,21 @@ impl<'a> GatherAccesses<'a> { generate_accesses( self, data_inst_def.inputs[0], - output_accesses.unwrap_or(Ok(MemAccesses::Data(DataHapp::DEAD))).and_then( - |accesses| { - let happ = match accesses { - MemAccesses::Handles(_) => { - return Err(AnalysisError(Diag::bug([format!( - "Offset({offset}): cannot offset in handle memory" - ) - .into()]))); - } - MemAccesses::Data(happ) => happ, - }; - let offset = u32::try_from(offset).ok().ok_or_else(|| { - AnalysisError(Diag::bug([format!( - "Offset({offset}): negative offset" - ) - .into()])) - })?; - - // FIXME(eddyb) these should be normalized - // (e.g. constant-folded) out of existence, - // but while they exist, they should be noops. - if offset == 0 { - return Ok(MemAccesses::Data(happ)); - } - - Ok(MemAccesses::Data(DataHapp { - max_size: happ - .max_size - .map(|max_size| { - offset.checked_add(max_size).ok_or_else(|| { - AnalysisError(Diag::bug([format!( - "Offset({offset}): size overflow \ - ({offset}+{max_size})" - ) - .into()])) - }) - }) - .transpose()?, - // FIXME(eddyb) allocating `Rc>` - // to represent the one-element case, seems - // quite wasteful when it's likely consumed. - kind: DataHappKind::Disjoint(Rc::new([(offset, happ)].into())), - })) - }, - ), + u32::try_from(offset) + .ok() + .ok_or_else(|| { + AnalysisError(Diag::bug([format!( + "Offset({offset}): negative offset" + ) + .into()])) + }) + .and_then(|offset| { + offset_accesses( + output_accesses + .unwrap_or(Ok(MemAccesses::Data(DataHapp::DEAD)))?, + offset, + ) + }), ); } DataInstKind::QPtr(QPtrOp::DynOffset { stride, index_bounds }) => { diff --git a/src/print/mod.rs b/src/print/mod.rs index 1580b9af..e8540176 100644 --- a/src/print/mod.rs +++ b/src/print/mod.rs @@ -561,9 +561,9 @@ impl<'a> Visitor<'a> for Plan<'a> { fn visit_const_def(&mut self, ct_def: &'a ConstDef) { // HACK(eddyb) the type of a `PtrToGlobalVar` is never printed, skip it. - if let ConstKind::PtrToGlobalVar(gv) = ct_def.kind { + if let ConstKind::PtrToGlobalVar { global_var, offset: _ } = ct_def.kind { self.visit_attr_set_use(ct_def.attrs); - self.visit_global_var_use(gv); + self.visit_global_var_use(global_var); } else { ct_def.inner_visit_with(self); } @@ -3202,8 +3202,18 @@ impl Print for ConstDef { ty.print(printer), pretty::join_comma_sep("(", ct.elems().map(|elem| print_scalar(elem, false)), ")"), ]), - &ConstKind::PtrToGlobalVar(gv) => { - pretty::Fragment::new(["&".into(), gv.print(printer)]) + &ConstKind::PtrToGlobalVar { global_var, offset } => { + let ptr = pretty::Fragment::new(["&".into(), global_var.print(printer)]); + match offset { + Some(offset) => pretty::Fragment::new([ + "(".into(), + ptr, + " + ".into(), + printer.numeric_literal_style().apply(offset.to_string()).into(), + ")".into(), + ]), + None => ptr, + } } &ConstKind::PtrToFunc(func) => pretty::Fragment::new(["&".into(), func.print(printer)]), @@ -4098,7 +4108,7 @@ impl FuncAt<'_, DataInst> { match &printer.cx[ct].kind { ConstKind::Undef | ConstKind::Vector(_) - | ConstKind::PtrToGlobalVar(_) + | ConstKind::PtrToGlobalVar { .. } | ConstKind::PtrToFunc(_) | ConstKind::SpvInst { .. } => {} diff --git a/src/qptr/lift.rs b/src/qptr/lift.rs index b0588506..a79e7e27 100644 --- a/src/qptr/lift.rs +++ b/src/qptr/lift.rs @@ -1066,10 +1066,22 @@ impl Transformer for LiftToSpvPtrInstsInFunc<'_> { fn transform_const_use(&mut self, ct: Const) -> Transformed { // FIXME(eddyb) maybe cache this remap (in `LiftToSpvPtrs`, globally). let ct_def = &self.lifter.cx[ct]; - if let ConstKind::PtrToGlobalVar(gv) = ct_def.kind { + if let ConstKind::PtrToGlobalVar { global_var, offset } = ct_def.kind { + let mut attrs = ct_def.attrs; + let mut ty = ct_def.ty; + + // TODO(eddyb) implement. + if let Some(offset) = offset { + attrs.push_diag( + &self.lifter.cx, + Diag::bug([format!("NYI: global var immediate offset ({offset})").into()]), + ); + } else { + ty = self.global_vars[global_var].type_of_ptr_to; + } Transformed::Changed(self.lifter.cx.intern(ConstDef { - attrs: ct_def.attrs, - ty: self.global_vars[gv].type_of_ptr_to, + attrs, + ty, kind: ct_def.kind.clone(), })) } else { diff --git a/src/qptr/lower.rs b/src/qptr/lower.rs index 0d37c4c1..a29205db 100644 --- a/src/qptr/lower.rs +++ b/src/qptr/lower.rs @@ -225,7 +225,7 @@ impl Transformer for EraseSpvPtrs<'_> { fn transform_const_use(&mut self, ct: Const) -> Transformed { // FIXME(eddyb) maybe cache this remap (in `LowerFromSpvPtrs`, globally). let ct_def = &self.lowerer.cx[ct]; - if let ConstKind::PtrToGlobalVar(_) = ct_def.kind { + if let ConstKind::PtrToGlobalVar { .. } = ct_def.kind { Transformed::Changed(self.lowerer.cx.intern(ConstDef { attrs: ct_def.attrs, ty: self.lowerer.qptr_type(), diff --git a/src/spv/canonical.rs b/src/spv/canonical.rs index 079bb15e..130d03b5 100644 --- a/src/spv/canonical.rs +++ b/src/spv/canonical.rs @@ -416,7 +416,7 @@ impl spv::Inst { ct.elems().map(|elem| cx.intern(elem)).collect(), )), - ConstKind::PtrToGlobalVar(_) + ConstKind::PtrToGlobalVar { .. } | ConstKind::PtrToFunc(_) | ConstKind::SpvInst { .. } | ConstKind::SpvStringLiteralForExtInst(_) => None, diff --git a/src/spv/lift.rs b/src/spv/lift.rs index f94dced6..9840e995 100644 --- a/src/spv/lift.rs +++ b/src/spv/lift.rs @@ -184,7 +184,7 @@ impl Visitor<'_> for NeedsIdsCollector<'_> { ConstKind::Undef | ConstKind::Scalar(_) | ConstKind::Vector(_) - | ConstKind::PtrToGlobalVar(_) + | ConstKind::PtrToGlobalVar { .. } | ConstKind::PtrToFunc(_) | ConstKind::SpvInst { .. } => { self.visit_const_def(ct_def); @@ -1296,8 +1296,8 @@ impl LazyInst<'_, '_> { Global::Const(ct) => { let ct_def = &cx[ct]; match ct_def.kind { - ConstKind::PtrToGlobalVar(gv) => { - let gv_decl = &module.global_vars[gv]; + ConstKind::PtrToGlobalVar { global_var, offset: _ } => { + let gv_decl = &module.global_vars[global_var]; let import = match gv_decl.def { DeclDef::Imported(import) => Some(import), DeclDef::Present(_) => None, @@ -1451,10 +1451,15 @@ impl LazyInst<'_, '_> { unreachable!("should've been handled as canonical") } - Err(&ConstKind::PtrToGlobalVar(gv)) => { + Err(&ConstKind::PtrToGlobalVar { global_var, offset }) => { + assert_eq!( + offset, None, + "immediate offsets should be legalized away before lifting" + ); + assert!(ct_def.attrs == AttrSet::default()); - let gv_decl = &module.global_vars[gv]; + let gv_decl = &module.global_vars[global_var]; assert!(ct_def.ty == gv_decl.type_of_ptr_to); @@ -1715,7 +1720,7 @@ impl Module { let ptr_to_global_var = cx.intern(ConstDef { attrs: AttrSet::default(), ty: type_of_ptr_to_global_var, - kind: ConstKind::PtrToGlobalVar(gv), + kind: ConstKind::PtrToGlobalVar { global_var: gv, offset: None }, }); Global::Const(ptr_to_global_var) }; diff --git a/src/spv/lower.rs b/src/spv/lower.rs index 0a42db1b..6ef88921 100644 --- a/src/spv/lower.rs +++ b/src/spv/lower.rs @@ -769,7 +769,7 @@ impl Module { let ptr_to_global_var = cx.intern(ConstDef { attrs: AttrSet::default(), ty: type_of_ptr_to_global_var, - kind: ConstKind::PtrToGlobalVar(global_var), + kind: ConstKind::PtrToGlobalVar { global_var, offset: None }, }); id_defs.insert(global_var_id, IdDef::Const(ptr_to_global_var)); @@ -2016,7 +2016,9 @@ impl Module { Export::Linkage { name, target_id } => { let exportee = match id_defs.get(&target_id) { Some(id_def @ &IdDef::Const(ct)) => match cx[ct].kind { - ConstKind::PtrToGlobalVar(gv) => Ok(Exportee::GlobalVar(gv)), + ConstKind::PtrToGlobalVar { global_var, offset: None } => { + Ok(Exportee::GlobalVar(global_var)) + } _ => Err(id_def.descr(&cx)), }, Some(&IdDef::Func(func)) => Ok(Exportee::Func(func)), @@ -2045,7 +2047,9 @@ impl Module { .into_iter() .map(|id| match id_defs.get(&id) { Some(id_def @ &IdDef::Const(ct)) => match cx[ct].kind { - ConstKind::PtrToGlobalVar(gv) => Ok(gv), + ConstKind::PtrToGlobalVar { global_var, offset: None } => { + Ok(global_var) + } _ => Err(id_def.descr(&cx)), }, Some(id_def) => Err(id_def.descr(&cx)), diff --git a/src/transform.rs b/src/transform.rs index f4a04bd4..59004b44 100644 --- a/src/transform.rs +++ b/src/transform.rs @@ -480,9 +480,9 @@ impl InnerTransform for ConstDef { | ConstKind::Vector(_) | ConstKind::SpvStringLiteralForExtInst(_) => Transformed::Unchanged, - ConstKind::PtrToGlobalVar(gv) => transform!({ - gv -> transformer.transform_global_var_use(*gv), - } => ConstKind::PtrToGlobalVar(gv)), + ConstKind::PtrToGlobalVar { global_var, offset } => transform!({ + global_var -> transformer.transform_global_var_use(*global_var), + } => ConstKind::PtrToGlobalVar { global_var, offset: *offset }), ConstKind::PtrToFunc(func) => transform!({ func -> transformer.transform_func_use(*func), diff --git a/src/visit.rs b/src/visit.rs index 596f7964..2a6dc1a3 100644 --- a/src/visit.rs +++ b/src/visit.rs @@ -351,7 +351,9 @@ impl InnerVisit for ConstDef { | ConstKind::Vector(_) | ConstKind::SpvStringLiteralForExtInst(_) => {} - &ConstKind::PtrToGlobalVar(gv) => visitor.visit_global_var_use(gv), + &ConstKind::PtrToGlobalVar { global_var, offset: _ } => { + visitor.visit_global_var_use(global_var); + } &ConstKind::PtrToFunc(func) => visitor.visit_func_use(func), ConstKind::SpvInst { spv_inst_and_const_inputs } => { let (_spv_inst, const_inputs) = &**spv_inst_and_const_inputs; From 2685ad6def1c825707bb5468875b0c9978a6b74e Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 9 Sep 2025 11:42:12 +0300 Subject: [PATCH 5/7] qptr/lift: emit `mem.{load,store}`s with no offset, instead of SPIR-V `OpLoad`/`OpStore`. --- src/qptr/lift.rs | 18 ++++++++++-------- src/spv/canonical.rs | 13 ++++++++++++- src/spv/lift.rs | 2 ++ 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/src/qptr/lift.rs b/src/qptr/lift.rs index a79e7e27..286739d0 100644 --- a/src/qptr/lift.rs +++ b/src/qptr/lift.rs @@ -659,9 +659,13 @@ impl LiftToSpvPtrInstsInFunc<'_> { DataInstKind::Mem(op @ (MemOp::Load { offset } | MemOp::Store { offset })) => { // HACK(eddyb) `_` will match multiple variants soon. #[allow(clippy::match_wildcard_for_single_variants)] - let (spv_opcode, access_type) = match op { - MemOp::Load { .. } => (wk.OpLoad, func.at(data_inst_def.outputs[0]).decl().ty), - MemOp::Store { .. } => (wk.OpStore, type_of_val(data_inst_def.inputs[1])), + let (access_op, access_type) = match op { + MemOp::Load { .. } => { + (MemOp::Load { offset: None }, func.at(data_inst_def.outputs[0]).decl().ty) + } + MemOp::Store { .. } => { + (MemOp::Store { offset: None }, type_of_val(data_inst_def.inputs[1])) + } _ => unreachable!(), }; @@ -681,10 +685,8 @@ impl LiftToSpvPtrInstsInFunc<'_> { .into_iter() }; - let mut new_data_inst_def = DataInstDef { - kind: DataInstKind::SpvInst(spv_opcode.into()), - ..data_inst_def.clone() - }; + let mut new_data_inst_def = + DataInstDef { kind: access_op.into(), ..data_inst_def.clone() }; // FIXME(eddyb) written in a more general style for future deduplication. for (input_idx, (access_chain_output_ptr_type, mut access_chain_data_inst_def)) in @@ -831,7 +833,7 @@ impl LiftToSpvPtrInstsInFunc<'_> { /// If necessary, construct an `OpAccessChain` instruction to offset `ptr` /// (pointing to a type with `pointee_layout`) by `offset`, and (optionally) - /// turn it into a pointer to `access_type` (for e.g. `OpLoad`/`OpStore`). + /// turn it into a pointer to `access_type`. /// /// **Note**: the returned instruction has empty `outputs`, the caller must /// add one, using the type returend alongside the instruction. diff --git a/src/spv/canonical.rs b/src/spv/canonical.rs index 130d03b5..c7348cd0 100644 --- a/src/spv/canonical.rs +++ b/src/spv/canonical.rs @@ -7,6 +7,7 @@ // // FIXME(eddyb) should interning attempts check/apply these canonicalizations? +use crate::mem::MemOp; use crate::spv::{self, spec}; use crate::{Const, ConstKind, Context, NodeKind, Type, TypeKind, TypeOrConst, scalar, vector}; use itertools::Itertools; @@ -564,11 +565,21 @@ impl spv::Inst { } }), + // FIXME(eddyb) consider lowering `OpLoad`/`OpStore` to `mem.{load,store}` + // as well, not just this lifting (but that'd require more changes). + NodeKind::Mem(op) => { + let wk = &spec::Spec::get().well_known; + match op { + MemOp::Load { offset: None } => Some(wk.OpLoad.into()), + MemOp::Store { offset: None } => Some(wk.OpStore.into()), + _ => None, + } + } + NodeKind::Select(_) | NodeKind::Loop { .. } | NodeKind::ExitInvocation(_) | NodeKind::FuncCall(_) - | NodeKind::Mem(_) | NodeKind::QPtr(_) | NodeKind::ThunkBind(_) | NodeKind::SpvInst(..) diff --git a/src/spv/lift.rs b/src/spv/lift.rs index 9840e995..1136c98e 100644 --- a/src/spv/lift.rs +++ b/src/spv/lift.rs @@ -2,6 +2,7 @@ use crate::cf::{self, SelectionKind}; use crate::func_at::FuncAt; +use crate::mem::MemOp; use crate::spv::{self, spec}; use crate::visit::{InnerVisit, Visitor}; use crate::{ @@ -258,6 +259,7 @@ impl Visitor<'_> for NeedsIdsCollector<'_> { | NodeKind::ExitInvocation(_) | DataInstKind::Scalar(_) | DataInstKind::Vector(_) + | DataInstKind::Mem(MemOp::Load { offset: None } | MemOp::Store { offset: None }) | DataInstKind::FuncCall(_) | DataInstKind::ThunkBind(_) | DataInstKind::SpvInst(_) => {} From 91ab2985858af715c943b7c9678ce371cd529262 Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 9 Sep 2025 11:42:12 +0300 Subject: [PATCH 6/7] qptr/lower: more aggressively strip `Offset(0)` and remove unused instructions. --- src/qptr/lower.rs | 213 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 175 insertions(+), 38 deletions(-) diff --git a/src/qptr/lower.rs b/src/qptr/lower.rs index a29205db..00f35a97 100644 --- a/src/qptr/lower.rs +++ b/src/qptr/lower.rs @@ -5,13 +5,15 @@ use crate::mem::{MemOp, shapes}; use crate::qptr::{QPtrAttr, QPtrOp}; use crate::transform::{InnerInPlaceTransform, Transformed, Transformer}; use crate::{ - AddrSpace, AttrSet, AttrSetDef, Const, ConstDef, ConstKind, Context, DataInst, DataInstDef, - DataInstKind, Diag, FuncDecl, GlobalVarDecl, Node, NodeKind, OrdAssertEq, Region, Type, - TypeKind, TypeOrConst, Value, VarDecl, VarKind, spv, + AddrSpace, AttrSetDef, Const, ConstDef, ConstKind, Context, DataInst, DataInstDef, + DataInstKind, DeclDef, Diag, EntityOrientedDenseMap, FuncDecl, GlobalVarDecl, Node, NodeDef, + NodeKind, OrdAssertEq, Region, Type, TypeKind, TypeOrConst, Value, Var, VarDecl, VarKind, spv, }; use itertools::{Either, Itertools as _}; +use rustc_hash::FxHashMap; use smallvec::SmallVec; use std::cell::Cell; +use std::mem; use std::num::{NonZeroI32, NonZeroU32}; use std::rc::Rc; @@ -143,8 +145,14 @@ impl<'a> LowerFromSpvPtrs<'a> { // separately - so `LowerFromSpvPtrInstsInFunc` will leave all value defs // (including replaced instructions!) with unchanged `OpTypePointer` // types, that only `EraseSpvPtrs`, later, replaces with `QPtr`. - LowerFromSpvPtrInstsInFunc { lowerer: self, parent_region: None } - .in_place_transform_func_decl(func_decl); + LowerFromSpvPtrInstsInFunc { + lowerer: self, + parent_region: None, + var_use_counts: Default::default(), + remove_inst_if_dead_output_with_parent_region: Default::default(), + noop_offsets_to_base_ptr: Default::default(), + } + .in_place_transform_func_decl(func_decl); EraseSpvPtrs { lowerer: self }.in_place_transform_func_decl(func_decl); } @@ -241,6 +249,18 @@ struct LowerFromSpvPtrInstsInFunc<'a> { lowerer: &'a LowerFromSpvPtrs<'a>, parent_region: Option, + + var_use_counts: EntityOrientedDenseMap, + + // HACK(eddyb) this acts as a "queue" for `qptr` outputs of instructions, + // which may end up dead because they're unused (either unused originally, + // in SPIR-V, or because of offset folding). + remove_inst_if_dead_output_with_parent_region: Vec<(Var, Region)>, + + // FIXME(eddyb) this is redundant with a few other things and only here + // because it needs to be available from `transform_value`, which doesn't + // have access to a `FuncAt` to look up anything. + noop_offsets_to_base_ptr: FxHashMap, } /// One `QPtr`->`QPtr` step used in the lowering of `Op*AccessChain`. @@ -386,7 +406,7 @@ impl LowerFromSpvPtrInstsInFunc<'_> { } fn try_lower_data_inst_def( - &self, + &mut self, mut func_at_data_inst: FuncAtMut<'_, DataInst>, ) -> Result, LowerError> { let cx = &self.lowerer.cx; @@ -399,7 +419,7 @@ impl LowerFromSpvPtrInstsInFunc<'_> { // FIXME(eddyb) is this a good convention? let func = func_at_data_inst_frozen.at(()); - let mut attrs = data_inst_def.attrs; + let attrs = data_inst_def.attrs; let spv_inst = match &data_inst_def.kind { DataInstKind::SpvInst(spv_inst) => spv_inst, @@ -409,18 +429,26 @@ impl LowerFromSpvPtrInstsInFunc<'_> { // FIXME(eddyb) wasteful clone? (needed due to borrowing issues) let outputs = data_inst_def.outputs.clone(); - // Map `ptr` to its base & offset, if it points to a `QPtrOp::Offset`. - let ptr_to_base_ptr_and_offset = |ptr| { - if let Value::Var(ptr) = ptr - && let VarKind::NodeOutput { node: ptr_inst, output_idx: 0 } = - func.at(ptr).decl().kind() - { - let ptr_inst_def = func.at(ptr_inst).def(); - if let DataInstKind::QPtr(QPtrOp::Offset(ptr_offset)) = ptr_inst_def.kind { - return Some((ptr_inst_def.inputs[0], NonZeroI32::new(ptr_offset))); - } + // Flatten `QPtrOp::Offset`s behind `ptr` into a base pointer and offset. + let flatten_offsets = |mut ptr| { + let mut offset = None::; + loop { + (ptr, offset) = if let Value::Var(ptr) = ptr + && let VarKind::NodeOutput { node: ptr_inst, output_idx: 0 } = + func.at(ptr).decl().kind() + && let NodeDef { + kind: DataInstKind::QPtr(QPtrOp::Offset(ptr_offset)), + inputs, + .. + } = func.at(ptr_inst).def() + && let Some(new_offset) = ptr_offset.checked_add(offset.map_or(0, |o| o.get())) + { + (inputs[0], NonZeroI32::new(new_offset)) + } else { + break; + }; } - None + (ptr, offset) }; let replacement_kind_and_inputs = if spv_inst.opcode == wk.OpVariable { @@ -447,7 +475,7 @@ impl LowerFromSpvPtrInstsInFunc<'_> { let ptr = data_inst_def.inputs[0]; - let (ptr, offset) = ptr_to_base_ptr_and_offset(ptr).unwrap_or((ptr, None)); + let (ptr, offset) = flatten_offsets(ptr); (MemOp::Load { offset }.into(), [ptr].into_iter().collect()) } else if spv_inst.opcode == wk.OpStore { @@ -460,7 +488,7 @@ impl LowerFromSpvPtrInstsInFunc<'_> { let ptr = data_inst_def.inputs[0]; let value = data_inst_def.inputs[1]; - let (ptr, offset) = ptr_to_base_ptr_and_offset(ptr).unwrap_or((ptr, None)); + let (ptr, offset) = flatten_offsets(ptr); (MemOp::Store { offset }.into(), [ptr, value].into_iter().collect()) } else if spv_inst.opcode == wk.OpArrayLength { @@ -560,12 +588,14 @@ impl LowerFromSpvPtrInstsInFunc<'_> { // Fold a previous `Offset` into an initial offset step, where possible. if let Some(QPtrChainStep { op: QPtrOp::Offset(first_offset), dyn_idx: None }) = steps.first_mut() - && let Some((ptr_base_ptr, ptr_offset)) = ptr_to_base_ptr_and_offset(ptr) - && let Some(new_first_offset) = - first_offset.checked_add(ptr_offset.map_or(0, |o| o.get())) { - ptr = ptr_base_ptr; - *first_offset = new_first_offset; + let (ptr_base_ptr, ptr_offset) = flatten_offsets(ptr); + if let Some(new_first_offset) = + first_offset.checked_add(ptr_offset.map_or(0, |o| o.get())) + { + ptr = ptr_base_ptr; + *first_offset = new_first_offset; + } } // HACK(eddyb) noop cases should probably not use any `DataInst`s at all, @@ -611,6 +641,15 @@ impl LowerFromSpvPtrInstsInFunc<'_> { func.nodes, ); + // HACK(eddyb) account for traversal never seeing this, + // while still needing value replacement and/or use tracking. + func.at(step_data_inst).inner_in_place_transform_with(self); + + // HACK(eddyb) this tracking is kind of ad-hoc but should + // easily cover everything we care about for now. + self.remove_inst_if_dead_output_with_parent_region + .push((step_output_var, self.parent_region.unwrap())); + ptr = Value::Var(step_output_var); } final_step.into_data_inst_kind_and_inputs(ptr) @@ -620,15 +659,9 @@ impl LowerFromSpvPtrInstsInFunc<'_> { if self.lowerer.as_spv_ptr_type(func.at(input).type_of(cx)).is_some() && self.lowerer.as_spv_ptr_type(func.at(outputs[0]).decl().ty).is_some() { - // HACK(eddyb) noop cases should not use any `DataInst`s at all, - // but that would require the ability to replace all uses of a `Value`. - let noop_step = QPtrChainStep { op: QPtrOp::Offset(0), dyn_idx: None }; - - // HACK(eddyb) since we're not removing the `DataInst` entirely, - // at least get rid of its attributes to clearly mark it as synthetic. - attrs = AttrSet::default(); - - noop_step.into_data_inst_kind_and_inputs(input) + // HACK(eddyb) this will end added to `noop_offsets_to_base_ptr`, + // which should replace all uses of this bitcast with its input. + (QPtrOp::Offset(0).into(), data_inst_def.inputs.clone()) } else { return Ok(Transformed::Unchanged); } @@ -710,9 +743,51 @@ impl LowerFromSpvPtrInstsInFunc<'_> { func_at_data_inst.def().attrs = cx.intern(attrs); } } + + // FIXME(eddyb) these are only this whacky because an `u32` is being + // encoded as `Option` for (dense) map entry reasons. + fn add_value_uses(&mut self, values: &[Value]) { + for &v in values { + if let Value::Var(v) = v { + let count = self.var_use_counts.entry(v); + *count = Some( + NonZeroU32::new(count.map_or(0, |c| c.get()).checked_add(1).unwrap()).unwrap(), + ); + } + } + } + fn remove_value_uses(&mut self, values: &[Value]) { + for &v in values { + if let Value::Var(v) = v { + let count = self.var_use_counts.entry(v); + *count = NonZeroU32::new(count.unwrap().get() - 1); + } + } + } } impl Transformer for LowerFromSpvPtrInstsInFunc<'_> { + // NOTE(eddyb) it's important that this only gets invoked on already lowered + // `Value`s, so we can rely on e.g. `noop_offsets_to_base_ptr` being filled. + fn transform_value_use(&mut self, v: &Value) -> Transformed { + let mut v = *v; + + let transformed = match v { + Value::Var(v) => self + .noop_offsets_to_base_ptr + .get(&v) + .copied() + .map_or(Transformed::Unchanged, Transformed::Changed), + + Value::Const(_) => Transformed::Unchanged, + }; + + transformed.apply_to(&mut v); + self.add_value_uses(&[v]); + + transformed + } + fn in_place_transform_region_def(&mut self, mut func_at_region: FuncAtMut<'_, Region>) { let outer_region = self.parent_region.replace(func_at_region.position); func_at_region.inner_in_place_transform_with(self); @@ -720,14 +795,76 @@ impl Transformer for LowerFromSpvPtrInstsInFunc<'_> { } fn in_place_transform_node_def(&mut self, mut func_at_node: FuncAtMut<'_, Node>) { - func_at_node.reborrow().inner_in_place_transform_with(self); - match self.try_lower_data_inst_def(func_at_node.reborrow()) { Ok(Transformed::Changed(new_def)) => { - *func_at_node.def() = new_def; + // HACK(eddyb) this tracking is kind of ad-hoc but should + // easily cover everything we care about for now. + if let DataInstKind::QPtr( + op @ (QPtrOp::HandleArrayIndex + | QPtrOp::BufferData + | QPtrOp::BufferDynLen { .. } + | QPtrOp::Offset(_) + | QPtrOp::DynOffset { .. }), + ) = &new_def.kind + { + self.remove_inst_if_dead_output_with_parent_region.push(( + func_at_node.reborrow().def().outputs[0], + self.parent_region.unwrap(), + )); + + if let QPtrOp::Offset(0) = op { + let mut base_ptr = new_def.inputs[0]; + if let Value::Var(base_ptr_var) = base_ptr + && let Some(&base_ptr_base_ptr) = + self.noop_offsets_to_base_ptr.get(&base_ptr_var) + { + base_ptr = base_ptr_base_ptr; + } + self.noop_offsets_to_base_ptr + .insert(func_at_node.reborrow().def().outputs[0], base_ptr); + } + } + + *func_at_node.reborrow().def() = new_def; } result @ (Ok(Transformed::Unchanged) | Err(_)) => { - self.add_fallback_attrs_to_data_inst_def(func_at_node, result.err()); + self.add_fallback_attrs_to_data_inst_def(func_at_node.reborrow(), result.err()); + } + } + + // NOTE(eddyb) this is done last so that `transform_value_use` only sees + // the lowered `Value`s, not the original ones. + func_at_node.inner_in_place_transform_with(self); + } + + fn in_place_transform_func_decl(&mut self, func_decl: &mut FuncDecl) { + func_decl.inner_in_place_transform_with(self); + + // Apply all `remove_inst_if_dead_output_with_parent_region` removals, that are truly unused. + if let DeclDef::Present(func_def_body) = &mut func_decl.def { + let remove_inst_if_dead_output_with_parent_region = + mem::take(&mut self.remove_inst_if_dead_output_with_parent_region); + // NOTE(eddyb) reverse order is important, as each removal can reduce + // use counts of an earlier definition, allowing further removal. + for (output_var, parent_region) in + remove_inst_if_dead_output_with_parent_region.into_iter().rev() + { + let is_used = self.var_use_counts.get(output_var).is_some(); + if !is_used { + let inst = func_def_body.at(output_var).decl().def_parent.right().unwrap(); + + // HACK(eddyb) can't really use helpers like `FuncAtMut::def`, + // due to the need to borrow `regions` and `nodes` + // at the same time - perhaps some kind of `FuncAtMut` position + // types for "where a list is in a parent entity" could be used + // to make this more ergonomic, although the potential need for + // an actual list entity of its own, should be considered. + func_def_body.regions[parent_region] + .children + .remove(inst, &mut func_def_body.nodes); + + self.remove_value_uses(&func_def_body.at(inst).def().inputs); + } } } } From 1a5d85f028b5dea03045f013c4613c82a74ff26e Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 9 Sep 2025 11:42:12 +0300 Subject: [PATCH 7/7] Preserve all `#[mem.accesses]` in `mem::analyze` and use them in `qptr::lift`. --- src/mem/analyze.rs | 477 +++++++++++++++------------ src/mem/layout.rs | 96 ++++-- src/mem/mod.rs | 3 + src/qptr/lift.rs | 782 ++++++++++++++++++++++++--------------------- 4 files changed, 765 insertions(+), 593 deletions(-) diff --git a/src/mem/analyze.rs b/src/mem/analyze.rs index 8ab24d96..a8d290a2 100644 --- a/src/mem/analyze.rs +++ b/src/mem/analyze.rs @@ -9,10 +9,9 @@ use crate::visit::{InnerVisit, Visitor}; use crate::{ AddrSpace, Attr, AttrSet, AttrSetDef, Const, ConstKind, Context, DataInstKind, DeclDef, Diag, ExportKey, Exportee, Func, FxIndexMap, GlobalVar, Module, Node, NodeKind, OrdAssertEq, Type, - TypeKind, Value, Var, VarKind, + TypeKind, Value, Var, }; -use itertools::{Either, Itertools as _}; -use rustc_hash::FxHashMap; +use itertools::Either; use smallvec::SmallVec; use std::mem; use std::num::NonZeroU32; @@ -556,37 +555,36 @@ impl MemTypeLayout { } { - // FIXME(eddyb) should `DataHapp` track a `min_size` as well? - // FIXME(eddyb) duplicated below. - let min_happ_offset_range = - happ_offset..happ_offset.saturating_add(happ.max_size.unwrap_or(0)); + // FIXME(eddyb) should `DataHapp` have have an `.extent()` method? + let happ_extent = Extent { start: 0, end: happ.max_size }.saturating_add(happ_offset); // "Fast reject" based on size alone (expected w/ multiple attempts). - if self.mem_layout.dyn_unit_stride.is_none() - && (self.mem_layout.fixed_base.size < min_happ_offset_range.end - || happ.max_size.is_none()) - { + // FIXME(eddyb) should `MemTypeLayout` have have an `.extent()` method? + let extent = Extent { + start: 0, + end: (self.mem_layout.dyn_unit_stride.is_none()) + .then_some(self.mem_layout.fixed_base.size), + }; + if !extent.includes(&happ_extent) { return false; } } let any_component_supports = |happ_offset: u32, happ: &DataHapp| { - // FIXME(eddyb) should `DataHapp` track a `min_size` as well? - // FIXME(eddyb) duplicated above. - let min_happ_offset_range = - happ_offset..happ_offset.saturating_add(happ.max_size.unwrap_or(0)); + // FIXME(eddyb) should `DataHapp` have have an `.extent()` method? + let happ_extent = Extent { start: 0, end: happ.max_size }.saturating_add(happ_offset); // FIXME(eddyb) `find_components_containing` is linear today but // could be made logarithmic (via binary search). - self.components.find_components_containing(min_happ_offset_range).any(|idx| match &self - .components - { - Components::Scalar => unreachable!(), - Components::Elements { stride, elem, .. } => { - elem.supports_happ_at_offset(happ_offset % stride.get(), happ) - } - Components::Fields { offsets, layouts, .. } => { - layouts[idx].supports_happ_at_offset(happ_offset - offsets[idx], happ) + self.components.find_components_containing(happ_extent).any(|idx| { + match &self.components { + Components::Scalar => unreachable!(), + Components::Elements { stride, elem, .. } => { + elem.supports_happ_at_offset(happ_offset % stride.get(), happ) + } + Components::Fields { offsets, layouts, .. } => { + layouts[idx].supports_happ_at_offset(happ_offset - offsets[idx], happ) + } } }) }; @@ -682,6 +680,7 @@ impl MemTypeLayout { enum AttrTarget { Var(Var), Node(Node), + Func, } struct FuncGatherAccessesResults { @@ -699,7 +698,7 @@ pub struct GatherAccesses<'a> { cx: Rc, layout_cache: LayoutCache<'a>, - global_var_accesses: FxIndexMap>>, + global_var_accesses: FxIndexMap>, func_states: FxIndexMap, } @@ -726,7 +725,7 @@ impl<'a> GatherAccesses<'a> { ExportKey::SpvEntryPoint { imms: _, interface_global_vars } => { for &gv in interface_global_vars { self.global_var_accesses.entry(gv).or_insert_with(|| { - Some(Ok(match module.global_vars[gv].shape { + Ok(match module.global_vars[gv].shape { Some(shapes::GlobalVarShape::Handles { handle, .. }) => { MemAccesses::Handles(match handle { shapes::Handle::Opaque(ty) => shapes::Handle::Opaque(ty), @@ -737,7 +736,7 @@ impl<'a> GatherAccesses<'a> { }) } _ => MemAccesses::Data(DataHapp::DEAD), - })) + }) }); } } @@ -746,23 +745,21 @@ impl<'a> GatherAccesses<'a> { // Analysis over, write all attributes back to the module. for (gv, accesses) in self.global_var_accesses { - if let Some(accesses) = accesses { - let global_var_def = &mut module.global_vars[gv]; - match accesses { - Ok(accesses) => { - // FIXME(eddyb) deduplicate attribute manipulation. - global_var_def.attrs = self.cx.intern(AttrSetDef { - attrs: self.cx[global_var_def.attrs] - .attrs - .iter() - .cloned() - .chain([Attr::Mem(MemAttr::Accesses(OrdAssertEq(accesses)))]) - .collect(), - }); - } - Err(AnalysisError(e)) => { - global_var_def.attrs.push_diag(&self.cx, e); - } + let global_var_def = &mut module.global_vars[gv]; + match accesses { + Ok(accesses) => { + // FIXME(eddyb) deduplicate attribute manipulation. + global_var_def.attrs = self.cx.intern(AttrSetDef { + attrs: self.cx[global_var_def.attrs] + .attrs + .iter() + .cloned() + .chain([Attr::Mem(MemAttr::Accesses(OrdAssertEq(accesses)))]) + .collect(), + }); + } + Err(AnalysisError(e)) => { + global_var_def.attrs.push_diag(&self.cx, e); } } } @@ -799,7 +796,8 @@ impl<'a> GatherAccesses<'a> { } } - let func_def_body = match &mut module.funcs[func].def { + let func_decl = &mut module.funcs[func]; + let func_def_body = match &mut func_decl.def { DeclDef::Present(func_def_body) => func_def_body, DeclDef::Imported(_) => continue, }; @@ -812,6 +810,11 @@ impl<'a> GatherAccesses<'a> { &mut func_def_body.at_mut(node).def().attrs } + AttrTarget::Func => { + assert!(accesses.is_err()); + + &mut func_decl.attrs + } }; match accesses { Ok(accesses) => { @@ -861,20 +864,40 @@ impl<'a> GatherAccesses<'a> { let is_qptr = |ty: Type| matches!(cx[ty].kind, TypeKind::QPtr); let func_decl = &module.funcs[func]; - let mut param_accesses: SmallVec<[_; 2]> = - (0..func_decl.params.len()).map(|_| None).collect(); let mut accesses_or_err_attrs_to_attach = vec![]; - let func_def_body = match &module.funcs[func].def { + // FIXME(eddyb) should such a "small vec/int map" be a proper type? + fn small_vec_from_position_value_pairs( + entries: impl IntoIterator, + ) -> SmallVec<[Option; N]> + where + [Option; N]: smallvec::Array>, + { + let mut slots = SmallVec::new(); + for (i, x) in entries { + if i >= slots.len() { + slots.extend((slots.len()..=i).map(|_| None)); + } + slots[i] = Some(x); + } + slots + } + + let func_def_body = match &func_decl.def { DeclDef::Present(func_def_body) => func_def_body, DeclDef::Imported(_) => { - for (param, param_accesses) in func_decl.params.iter().zip(&mut param_accesses) { - if is_qptr(param.ty) { - *param_accesses = Some(Err(AnalysisError(Diag::bug([ - "pointer param of imported func".into(), - ])))); - } - } + let param_accesses = small_vec_from_position_value_pairs( + func_decl.params.iter().enumerate().filter(|(_, param)| is_qptr(param.ty)).map( + |(i, _)| { + ( + i, + Err(AnalysisError(Diag::bug([ + "pointer param of imported func".into() + ]))), + ) + }, + ), + ); return FuncGatherAccessesResults { param_accesses, accesses_or_err_attrs_to_attach, @@ -882,8 +905,11 @@ impl<'a> GatherAccesses<'a> { } }; - let mut node_to_per_output_accesses: FxHashMap<_, SmallVec<[Option<_>; 2]>> = - FxHashMap::default(); + // HACK(eddyb) this could be `FxHashMap`, except it's iterated at the end + // to generate errors for outputs of nodes that somehow weren't visited, + // or inputs of regions (i.e. loop bodies, which don't support pointers), + // and while *technically* not a hazard, it's better to be deterministic. + let mut var_accesses = FxIndexMap::default(); // HACK(eddyb) reversing a post-order traversal to get RPO, which for // structured control-flow means outside-in/top-down (just like pre-order), @@ -893,10 +919,27 @@ impl<'a> GatherAccesses<'a> { before: |_| {}, after: |node| post_order_nodes.push(node), }); - for node in post_order_nodes.into_iter().rev() { - let per_output_accesses = node_to_per_output_accesses.remove(&node).unwrap_or_default(); - + for &node in post_order_nodes.iter().rev() { let node_def = func_def_body.at(node).def(); + + // FIXME(eddyb) consider avoiding this collection step. + let per_output_accesses = small_vec_from_position_value_pairs::<_, 1>( + node_def + .outputs + .iter() + .enumerate() + .filter_map(|(i, &v)| Some((i, var_accesses.swap_remove(&v)?))), + ); + + // Always attach attributes to `qptr`-typed outputs, + // on top of propagating them from uses to definitions. + for (&output_var, accesses) in node_def.outputs.iter().zip(&per_output_accesses) { + if let Some(accesses) = accesses { + accesses_or_err_attrs_to_attach + .push((AttrTarget::Var(output_var), Clone::clone(accesses))); + } + } + let offset_accesses = |accesses, offset: u32| { let happ = match accesses { MemAccesses::Handles(_) => { @@ -933,18 +976,29 @@ impl<'a> GatherAccesses<'a> { kind: DataHappKind::Disjoint(Rc::new([(offset, happ)].into())), })) }; - let mut generate_accesses = |this: &mut Self, ptr: Value, mut new_accesses| { - let slot = match ptr { + let mut generate_accesses = |this: &mut Self, ptr: Value, new_accesses| { + // HACK(eddyb) in order to handle the different `Entry` types + // (inevitable due to different key types for different maps), + // the `.or_insert_with(|| new_accesses.take().unwrap())` pattern + // is used to distinguish "newly inserted" vs "needs merge". + let mut new_accesses = Some(new_accesses); + + let accesses = match ptr { Value::Const(ct) => match cx[ct].kind { + // TODO(eddyb) implement `offset: Some(_)` by analogy + // to `qptr.offset` on the `offset: None` case. ConstKind::PtrToGlobalVar { global_var, offset } => { if let Some(offset) = offset - && let Ok(accesses) = new_accesses + && let Some(Ok(accesses)) = new_accesses { - new_accesses = offset_accesses(accesses, offset.get()); + new_accesses = Some(offset_accesses(accesses, offset.get())); } - this.global_var_accesses.entry(global_var).or_default() + this.global_var_accesses + .entry(global_var) + .or_insert_with(|| new_accesses.take().unwrap()) } + // FIXME(eddyb) attach on the `Const` by replacing // it with a copy that also has an extra attribute, // or actually support by adding the accesses attribute @@ -961,53 +1015,35 @@ impl<'a> GatherAccesses<'a> { return; } }, - Value::Var(ptr) => match func_def_body.at(ptr).decl().kind() { - VarKind::RegionInput { region, input_idx } - if region == func_def_body.body => - { - &mut param_accesses[input_idx as usize] - } - VarKind::RegionInput { .. } => { - // FIXME(eddyb) don't throw away `new_accesses`. - accesses_or_err_attrs_to_attach.push(( - AttrTarget::Var(ptr), - Err(AnalysisError(Diag::bug(["unsupported φ".into()]))), - )); - return; - } - VarKind::NodeOutput { node: ptr_node, output_idx } => { - let i = output_idx as usize; - let slots = node_to_per_output_accesses.entry(ptr_node).or_default(); - if i >= slots.len() { - slots.extend((slots.len()..=i).map(|_| None)); - } - &mut slots[i] - } - }, + Value::Var(ptr) => { + var_accesses.entry(ptr).or_insert_with(|| new_accesses.take().unwrap()) + } }; - *slot = Some(match slot.take() { - Some(old) => old.and_then(|old| { + + // HACK(eddyb) also see the comment on `new_accesses`. + if let Some(new_accesses) = new_accesses { + // HACK(eddyb) using a placeholder to get by-value access. + let old_accesses = mem::replace(accesses, Err(AnalysisError(Diag::bug([])))); + *accesses = old_accesses.and_then(|old_accesses| { AccessMerger { layout_cache: &this.layout_cache } - .merge(old, new_accesses?) + .merge(old_accesses, new_accesses?) .into_result() - }), - None => new_accesses, - }); + }); + } }; match &node_def.kind { NodeKind::Select(_) | NodeKind::Loop { .. } | NodeKind::ExitInvocation { .. } => { - for (&output_var, accesses) in node_def.outputs.iter().zip(&per_output_accesses) + for (&output_var, accesses) in node_def.outputs.iter().zip(per_output_accesses) { - if let Some(_accesses) = accesses { - // FIXME(eddyb) don't throw away `accesses`. + // HACK(eddyb) `accesses` was already attached earlier. + if accesses.is_some() { accesses_or_err_attrs_to_attach.push(( AttrTarget::Var(output_var), - Err(AnalysisError(Diag::bug(["unsupported φ".into()]))), + Err(AnalysisError(Diag::bug(["unsupported dynamic qptr".into()]))), )); } } - continue; } @@ -1034,41 +1070,29 @@ impl<'a> GatherAccesses<'a> { DataInstKind::Scalar(_) | DataInstKind::Vector(_) => {} - &DataInstKind::FuncCall(callee) => { - match self.gather_accesses_in_func(module, callee) { - FuncGatherAccessesState::Complete(callee_results) => { - for (&arg, param_accesses) in - data_inst_def.inputs.iter().zip(&callee_results.param_accesses) - { - if let Some(param_accesses) = param_accesses { - generate_accesses(self, arg, param_accesses.clone()); - } + &DataInstKind::FuncCall(callee) => match self + .gather_accesses_in_func(module, callee) + { + FuncGatherAccessesState::Complete(callee_results) => { + for (&arg, param_accesses) in + data_inst_def.inputs.iter().zip(&callee_results.param_accesses) + { + if let Some(param_accesses) = param_accesses { + generate_accesses(self, arg, param_accesses.clone()); } } - FuncGatherAccessesState::InProgress => { - accesses_or_err_attrs_to_attach.push(( - AttrTarget::Node(node), - Err(AnalysisError(Diag::bug( - ["unsupported recursive call".into()], - ))), - )); - } - }; - // HACK(eddyb) multi-output instructions don't exist pre-disaggregate. - if (data_inst_def.outputs.iter().at_most_one().ok().unwrap()) - .is_some_and(|&o| is_qptr(func_def_body.at(o).decl().ty)) - && let Some(accesses) = output_accesses - { - accesses_or_err_attrs_to_attach - .push((AttrTarget::Var(data_inst_def.outputs[0]), accesses)); } - } - - DataInstKind::Mem(MemOp::FuncLocalVar(_)) => { - if let Some(accesses) = output_accesses { - accesses_or_err_attrs_to_attach - .push((AttrTarget::Var(data_inst_def.outputs[0]), accesses)); + FuncGatherAccessesState::InProgress => { + accesses_or_err_attrs_to_attach.push(( + AttrTarget::Node(node), + Err(AnalysisError(Diag::bug(["unsupported recursive call".into()]))), + )); } + }, + + DataInstKind::Mem(MemOp::FuncLocalVar(_mem_layout)) => { + // FIXME(eddyb) merge/intersect `mem.accesses` from uses, + // with the inherent size/align (given by `_mem_layout`)? } DataInstKind::QPtr(QPtrOp::HandleArrayIndex) => { generate_accesses( @@ -1335,91 +1359,134 @@ impl<'a> GatherAccesses<'a> { } DataInstKind::SpvInst(_) | DataInstKind::SpvExtInst { .. } => { - let mut has_from_spv_ptr_output_attr = false; for attr in &cx[data_inst_def.attrs].attrs { - match *attr { - Attr::QPtr(QPtrAttr::ToSpvPtrInput { input_idx, pointee }) => { - let ty = pointee.0; - generate_accesses( - self, - data_inst_def.inputs[input_idx as usize], - self.layout_cache - .layout_of(ty) - .map_err(|LayoutError(e)| AnalysisError(e)) - .and_then(|layout| match layout { - TypeLayout::Handle(handle) => { - let handle = match handle { - shapes::Handle::Opaque(ty) => { - shapes::Handle::Opaque(ty) - } - // NOTE(eddyb) this error is important, - // as the `Block` annotation on the - // buffer type means the type is *not* - // usable anywhere inside buffer data, - // since it would conflict with our - // own `Block`-annotated wrapper. - shapes::Handle::Buffer(..) => { - return Err(AnalysisError(Diag::bug([ - "ToSpvPtrInput: \ - whole Buffer ambiguous \ - (handle vs buffer data)" - .into(), - ]))); - } - }; - Ok(MemAccesses::Handles(handle)) - } - // NOTE(eddyb) because we can't represent - // the original type, in the same way we - // use `DataHappKind::StrictlyTyped` - // for non-handles, we can't guarantee - // a generated type that matches the - // desired `pointee` type. - TypeLayout::HandleArray(..) => { - Err(AnalysisError(Diag::bug([ - "ToSpvPtrInput: whole handle array \ - unrepresentable" - .into(), - ]))) - } - TypeLayout::Concrete(concrete) => { - Ok(MemAccesses::Data(DataHapp { - max_size: if concrete - .mem_layout - .dyn_unit_stride - .is_some() - { - None - } else { - Some(concrete.mem_layout.fixed_base.size) - }, - kind: DataHappKind::StrictlyTyped(ty), - })) - } - }), - ); - } - Attr::QPtr(QPtrAttr::FromSpvPtrOutput { - addr_space: _, - pointee: _, - }) => { - has_from_spv_ptr_output_attr = true; - } - _ => {} + if let Attr::QPtr(QPtrAttr::ToSpvPtrInput { input_idx, pointee }) = *attr { + let ty = pointee.0; + generate_accesses( + self, + data_inst_def.inputs[input_idx as usize], + self.layout_cache + .layout_of(ty) + .map_err(|LayoutError(e)| AnalysisError(e)) + .and_then(|layout| match layout { + TypeLayout::Handle(handle) => { + let handle = match handle { + shapes::Handle::Opaque(ty) => { + shapes::Handle::Opaque(ty) + } + // NOTE(eddyb) this error is important, + // as the `Block` annotation on the + // buffer type means the type is *not* + // usable anywhere inside buffer data, + // since it would conflict with our + // own `Block`-annotated wrapper. + shapes::Handle::Buffer(..) => { + return Err(AnalysisError(Diag::bug([ + "ToSpvPtrInput: \ + whole Buffer ambiguous \ + (handle vs buffer data)" + .into(), + ]))); + } + }; + Ok(MemAccesses::Handles(handle)) + } + // NOTE(eddyb) because we can't represent + // the original type, in the same way we + // use `DataHappKind::StrictlyTyped` + // for non-handles, we can't guarantee + // a generated type that matches the + // desired `pointee` type. + TypeLayout::HandleArray(..) => { + Err(AnalysisError(Diag::bug([ + "ToSpvPtrInput: whole handle array unrepresentable" + .into(), + ]))) + } + TypeLayout::Concrete(concrete) => { + Ok(MemAccesses::Data(DataHapp { + max_size: (concrete + .mem_layout + .dyn_unit_stride + .is_none()) + .then_some(concrete.mem_layout.fixed_base.size), + kind: DataHappKind::StrictlyTyped(ty), + })) + } + }), + ); } } + } + } + } - if has_from_spv_ptr_output_attr { - // FIXME(eddyb) merge with `FromSpvPtrOutput`'s `pointee`. - if let Some(accesses) = output_accesses { - accesses_or_err_attrs_to_attach - .push((AttrTarget::Var(data_inst_def.outputs[0]), accesses)); + let param_accesses = small_vec_from_position_value_pairs( + func_def_body + .at_body() + .def() + .inputs + .iter() + .enumerate() + .filter_map(|(i, &v)| Some((i, var_accesses.swap_remove(&v)?))), + ); + + if !var_accesses.is_empty() { + // HACK(eddyb) this extra traversal only exists in case the reason + // for leftover `var_accesses` entries is just the visit order, + // however unlikely that is (compared to the even worse case). + for &node in &post_order_nodes { + let node_def = func_def_body.at(node).def(); + for &output_var in &node_def.outputs { + if let Some(accesses) = var_accesses.swap_remove(&output_var) { + let diag = match accesses { + Ok(accesses) => Diag::bug([ + "extra mem.accesses contributions ignored (visited too late): " + .into(), + accesses.into(), + ]), + Err(AnalysisError(mut diag)) => { + diag.message.insert( + 0, + "extra mem.accesses-related errors (visited too late): ".into(), + ); + diag + } + }; + accesses_or_err_attrs_to_attach + .push((AttrTarget::Var(output_var), Err(AnalysisError(diag)))); + } + } + for ®ion in &node_def.child_regions { + for &input_var in &func_def_body.at(region).def().inputs { + if let Some(accesses) = var_accesses.swap_remove(&input_var) { + accesses_or_err_attrs_to_attach.extend([ + (AttrTarget::Var(input_var), accesses), + ( + AttrTarget::Var(input_var), + Err(AnalysisError(Diag::bug([ + "unsupported dynamic qptr".into() + ]))), + ), + ]); } } } } } + // FIXME(eddyb) this should ideally be detected by a SPIR-T verifier. + if !var_accesses.is_empty() { + accesses_or_err_attrs_to_attach.push(( + AttrTarget::Func, + Err(AnalysisError(Diag::bug([format!( + "{} `qptr` values are used, but their definitions were never visited", + var_accesses.len() + ) + .into()]))), + )); + } + FuncGatherAccessesResults { param_accesses, accesses_or_err_attrs_to_attach } } } diff --git a/src/mem/layout.rs b/src/mem/layout.rs index 1a4721c4..acdb4b92 100644 --- a/src/mem/layout.rs +++ b/src/mem/layout.rs @@ -99,23 +99,68 @@ pub(crate) enum Components { }, } +// FIXME(eddyb) review this, especially wrt using it in more places. +#[derive(Copy, Clone, PartialEq, Eq)] +pub(crate) struct Extent { + pub(crate) start: T, + // FIXME(eddyb) would `Option>` be clearer? + pub(crate) end: Option, +} + +// HACK(eddyb) comparison helper for `Option` so that `Some(_) < None`. +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +enum InfOr { + Fin(T), + Inf, +} + +impl From> for Extent { + fn from(range: Range) -> Self { + Self { start: range.start, end: Some(range.end) } + } +} + +impl Extent { + pub(crate) fn includes(&self, other: &Self) -> bool { + self.start <= other.start + && other.end.as_ref().map_or(InfOr::Inf, InfOr::Fin) + <= self.end.as_ref().map_or(InfOr::Inf, InfOr::Fin) + } +} + +impl Extent { + pub(crate) fn checked_add(self, delta: u32) -> Option { + Some(Self { + start: self.start.checked_add(delta)?, + end: self.end.map(|end| end.checked_add(delta).ok_or(())).transpose().ok()?, + }) + } + + // FIXME(eddyb) this pattern was already used in a few places, is it reasonable? + pub(crate) fn saturating_add(self, delta: u32) -> Self { + Self { + start: self.start.saturating_add(delta), + end: self.end.map(|end| end.saturating_add(delta)), + } + } +} + impl Components { - /// Return all components (by index), which completely contain `offset_range`. + /// Return all components (by index), which completely contain `extent`. /// - /// If `offset_range` is zero-sized (`offset_range.start == offset_range.end`), - /// this can return multiple components, with at most one ever being non-ZST. + /// If `extent` is zero-sized (`Some(extent.start) == extent.end`), this + /// can return multiple components, with at most one ever being non-ZST. // // FIXME(eddyb) be more aggressive in pruning ZSTs so this can be simpler. pub(crate) fn find_components_containing( &self, - // FIXME(eddyb) consider renaming such offset ranges to "extent". - offset_range: Range, + extent: Extent, ) -> impl Iterator + '_ { - match self { + let candidate_component_indices_with_extent = match self { Components::Scalar => Either::Left(None.into_iter()), Components::Elements { stride, elem, fixed_len } => { Either::Left( - Some(offset_range.start / stride.get()) + Some(extent.start / stride.get()) .and_then(|elem_idx| { let elem_idx_vs_len = fixed_len .map_or(Ordering::Less, |fixed_len| elem_idx.cmp(&fixed_len.get())); @@ -127,11 +172,11 @@ impl Components { Ordering::Greater => return None, }; - let elem_start = elem_idx * stride.get(); - Some((elem_idx, elem_start..elem_start.checked_add(elem_size)?)) + Some(( + usize::try_from(elem_idx).ok()?, + Extent::from(0..elem_size).checked_add(elem_idx * stride.get())?, + )) }) - .filter(|(_, elem_range)| offset_range.end <= elem_range.end) - .and_then(|(elem_idx, _)| usize::try_from(elem_idx).ok()) .into_iter(), ) } @@ -143,27 +188,20 @@ impl Components { .iter() .zip(layouts) .map(|(&field_offset, field)| { - // HACK(eddyb) really need a maybe-open-ended range type. - if field.mem_layout.dyn_unit_stride.is_some() { - Err(field_offset..) - } else { - Ok(field_offset - ..field_offset - .checked_add(field.mem_layout.fixed_base.size) - .unwrap()) - } - }) - .enumerate() - .filter(move |(_, field_range)| match field_range { - Ok(field_range) => { - field_range.start <= offset_range.start - && offset_range.end <= field_range.end + Extent { + start: 0, + end: (field.mem_layout.dyn_unit_stride.is_none()) + .then_some(field.mem_layout.fixed_base.size), } - Err(field_range) => field_range.start <= offset_range.start, + .checked_add(field_offset) + .unwrap() }) - .map(|(field_idx, _)| field_idx), + .enumerate(), ), - } + }; + candidate_component_indices_with_extent + .filter(move |(_, component_extent)| component_extent.includes(&extent)) + .map(|(component_idx, _)| component_idx) } } diff --git a/src/mem/mod.rs b/src/mem/mod.rs index 1078749e..433291ad 100644 --- a/src/mem/mod.rs +++ b/src/mem/mod.rs @@ -29,6 +29,9 @@ pub use layout::LayoutConfig; pub enum MemAttr { /// When applied to a `GlobalVar` or `FuncLocalVar`, this tracks all possible /// access patterns its memory may be subjected to (see [`MemAccesses`]). + // + // FIXME(eddyb) either document that `qptr` offsetting ops also get this + // attribute applied to them, or undo that change entirely. Accesses(OrdAssertEq), } diff --git a/src/qptr/lift.rs b/src/qptr/lift.rs index 286739d0..63e079f2 100644 --- a/src/qptr/lift.rs +++ b/src/qptr/lift.rs @@ -1,6 +1,6 @@ //! [`QPtr`](crate::TypeKind::QPtr) lifting (e.g. to SPIR-V). -use crate::func_at::FuncAtMut; +use crate::func_at::{FuncAt, FuncAtMut}; use crate::mem::{DataHapp, DataHappKind, MemAccesses, MemAttr, MemOp, shapes}; use crate::qptr::{QPtrAttr, QPtrOp}; use crate::transform::{InnerInPlaceTransform, InnerTransform, Transformed, Transformer}; @@ -74,14 +74,15 @@ impl<'a> LiftToSpvPtrs<'a> { } } - fn find_mem_accesses_attr(&self, attrs: AttrSet) -> Result<&MemAccesses, LiftError> { - self.cx[attrs] - .attrs - .iter() - .find_map(|attr| match attr { - Attr::Mem(MemAttr::Accesses(accesses)) => Some(&accesses.0), - _ => None, - }) + fn find_mem_accesses_attr(&self, attrs: AttrSet) -> Option<&MemAccesses> { + self.cx[attrs].attrs.iter().find_map(|attr| match attr { + Attr::Mem(MemAttr::Accesses(accesses)) => Some(&accesses.0), + _ => None, + }) + } + + fn require_mem_accesses_attr(&self, attrs: AttrSet) -> Result<&MemAccesses, LiftError> { + self.find_mem_accesses_attr(attrs) .ok_or_else(|| LiftError(Diag::bug(["missing `mem.accesses` attribute".into()]))) } @@ -102,7 +103,7 @@ impl<'a> LiftToSpvPtrs<'a> { ) -> Result<(Type, AddrSpace), LiftError> { let wk = self.wk; - let mem_accesses = self.find_mem_accesses_attr(global_var_decl.attrs)?; + let mem_accesses = self.require_mem_accesses_attr(global_var_decl.attrs)?; let shape = global_var_decl.shape.ok_or_else(|| LiftError(Diag::bug(["missing shape".into()])))?; @@ -375,6 +376,31 @@ struct DeferredPtrNoop { } impl LiftToSpvPtrInstsInFunc<'_> { + // FIXME(eddyb) maybe all this data should be packaged up together in a + // type with fields like those of `DeferredPtrNoop` (or even more). + fn type_of_val_as_spv_ptr_with_layout( + &self, + func_at_value: FuncAt<'_, Value>, + ) -> Result<(AddrSpace, TypeLayout), LiftError> { + let v = func_at_value.position; + + if let Value::Var(v) = v + && let Some(ptr_noop) = self.deferred_ptr_noops.get(&v) + { + return Ok(( + ptr_noop.output_pointer_addr_space, + ptr_noop.output_pointee_layout.clone(), + )); + } + + let (addr_space, pointee_type) = self + .lifter + .as_spv_ptr_type(func_at_value.type_of(&self.lifter.cx)) + .ok_or_else(|| LiftError(Diag::bug(["pointer input not an `OpTypePointer`".into()])))?; + + Ok((addr_space, self.lifter.layout_of(pointee_type)?)) + } + fn try_lift_data_inst_def( &mut self, mut func_at_data_inst: FuncAtMut<'_, DataInst>, @@ -387,25 +413,31 @@ impl LiftToSpvPtrInstsInFunc<'_> { let data_inst_def = func_at_data_inst_frozen.def(); let func = func_at_data_inst_frozen.at(()); let type_of_val = |v: Value| func.at(v).type_of(cx); - // FIXME(eddyb) maybe all this data should be packaged up together in a - // type with fields like those of `DeferredPtrNoop` (or even more). - let type_of_val_as_spv_ptr_with_layout = |v: Value| { - if let Value::Var(var) = v - && let Some(ptr_noop) = self.deferred_ptr_noops.get(&var) - { - return Ok(( - ptr_noop.output_pointer_addr_space, - ptr_noop.output_pointee_layout.clone(), - )); - } - let (addr_space, pointee_type) = - self.lifter.as_spv_ptr_type(type_of_val(v)).ok_or_else(|| { - LiftError(Diag::bug(["pointer input not an `OpTypePointer`".into()])) - })?; + // FIXME(eddyb) this should be a method on some sort of "cursor" type. + let insert_aux_data_inst = + |this: &mut Self, func: FuncAtMut<'_, ()>, mut aux_data_inst_def: DataInstDef| { + // HACK(eddyb) account for `deferred_ptr_noops` interactions. + this.resolve_deferred_ptr_noop_uses(&mut aux_data_inst_def.inputs); + this.add_value_uses(&aux_data_inst_def.inputs); + + let aux_data_inst = func.nodes.define(cx, aux_data_inst_def.into()); + + // HACK(eddyb) can't really use helpers like `FuncAtMut::def`, + // due to the need to borrow `regions` and `nodes` + // at the same time - perhaps some kind of `FuncAtMut` position + // types for "where a list is in a parent entity" could be used + // to make this more ergonomic, although the potential need for + // an actual list entity of its own, should be considered. + func.regions[this.parent_region.unwrap()].children.insert_before( + aux_data_inst, + data_inst, + func.nodes, + ); + + aux_data_inst + }; - Ok((addr_space, self.lifter.layout_of(pointee_type)?)) - }; let replacement_data_inst_def = match &data_inst_def.kind { NodeKind::Select(_) | NodeKind::Loop { .. } | NodeKind::ExitInvocation(_) => { return Ok(Transformed::Unchanged); @@ -425,12 +457,12 @@ impl LiftToSpvPtrInstsInFunc<'_> { } DataInstKind::Mem(MemOp::FuncLocalVar(_mem_layout)) => { - let mem_accesses = self + let output_mem_accesses = self .lifter - .find_mem_accesses_attr(func.at(data_inst_def.outputs[0]).decl().attrs)?; + .require_mem_accesses_attr(func.at(data_inst_def.outputs[0]).decl().attrs)?; // FIXME(eddyb) validate against `mem_layout`! - let pointee_type = self.lifter.pointee_type_for_accesses(mem_accesses)?; + let pointee_type = self.lifter.pointee_type_for_accesses(output_mem_accesses)?; let mut data_inst_def = data_inst_def.clone(); data_inst_def.kind = DataInstKind::SpvInst(spv::Inst { @@ -445,7 +477,7 @@ impl LiftToSpvPtrInstsInFunc<'_> { } DataInstKind::QPtr(QPtrOp::HandleArrayIndex) => { let (addr_space, layout) = - type_of_val_as_spv_ptr_with_layout(data_inst_def.inputs[0])?; + self.type_of_val_as_spv_ptr_with_layout(func.at(data_inst_def.inputs[0]))?; let handle = match layout { // FIXME(eddyb) standardize variant order in enum/match. TypeLayout::HandleArray(handle, _) => handle, @@ -466,12 +498,14 @@ impl LiftToSpvPtrInstsInFunc<'_> { let mut data_inst_def = data_inst_def.clone(); data_inst_def.kind = DataInstKind::SpvInst(wk.OpAccessChain.into()); let output_decl = func_at_data_inst.reborrow().at(data_inst_def.outputs[0]).decl(); + output_decl.attrs = self.lifter.strip_mem_accesses_attr(output_decl.attrs); output_decl.ty = self.lifter.spv_ptr_type(addr_space, handle_type); data_inst_def } DataInstKind::QPtr(QPtrOp::BufferData) => { let buf_ptr = data_inst_def.inputs[0]; - let (addr_space, buf_layout) = type_of_val_as_spv_ptr_with_layout(buf_ptr)?; + let (addr_space, buf_layout) = + self.type_of_val_as_spv_ptr_with_layout(func.at(buf_ptr))?; let buf_data_layout = match buf_layout { TypeLayout::Handle(shapes::Handle::Buffer(_, buf)) => TypeLayout::Concrete(buf), @@ -500,7 +534,7 @@ impl LiftToSpvPtrInstsInFunc<'_> { } &DataInstKind::QPtr(QPtrOp::BufferDynLen { fixed_base_size, dyn_unit_stride }) => { let buf_ptr = data_inst_def.inputs[0]; - let (_, buf_layout) = type_of_val_as_spv_ptr_with_layout(buf_ptr)?; + let (_, buf_layout) = self.type_of_val_as_spv_ptr_with_layout(func.at(buf_ptr))?; let buf_data_layout = match buf_layout { TypeLayout::Handle(shapes::Handle::Buffer(_, buf)) => buf, @@ -539,124 +573,132 @@ impl LiftToSpvPtrInstsInFunc<'_> { } } &DataInstKind::QPtr(QPtrOp::Offset(offset)) => { - let base_ptr = data_inst_def.inputs[0]; - let (addr_space, layout) = type_of_val_as_spv_ptr_with_layout(base_ptr)?; - - let maybe_access_chain = self.maybe_adjust_pointer_for_offset_or_access( - base_ptr, - addr_space, - layout.clone(), - offset, - None, - )?; - let (new_output_ty, data_inst_def) = match maybe_access_chain { - Some((access_chain_output_ptr_type, mut access_chain_data_inst_def)) => { - access_chain_data_inst_def.outputs.push(data_inst_def.outputs[0]); - (access_chain_output_ptr_type, access_chain_data_inst_def) - } - None => { - self.deferred_ptr_noops.insert( - data_inst_def.outputs[0], - DeferredPtrNoop { - output_pointer: base_ptr, - output_pointer_addr_space: addr_space, - output_pointee_layout: layout, - parent_region: self.parent_region.unwrap(), - }, - ); - - // FIXME(eddyb) avoid the repeated call to `type_of_val`, - // maybe don't even replace the `QPtrOp::Offset` instruction? - let mut data_inst_def = data_inst_def.clone(); - data_inst_def.kind = QPtrOp::Offset(0).into(); - (type_of_val(base_ptr), data_inst_def) - } - }; + let mut data_inst_def = data_inst_def.clone(); - let output_decl = func_at_data_inst.reborrow().at(data_inst_def.outputs[0]).decl(); + let output_mem_accesses = self + .lifter + .find_mem_accesses_attr(func.at(data_inst_def.outputs[0]).decl().attrs) + .unwrap_or(&MemAccesses::Data(DataHapp::DEAD)); + + let mut func = func_at_data_inst.reborrow().at(()); + let (output_pointer, (output_pointer_addr_space, output_pointee_layout)) = self + .adjust_pointer_for_offset_and_accesses( + data_inst_def.inputs[0], + offset, + output_mem_accesses, + func.reborrow(), + insert_aux_data_inst, + )?; + // FIXME(eddyb) not being able to reuse the original `DataInst` + // is a bit ridiculous, but correctly doing that would complicate + // `adjust_pointer_for_offset_and_accesses` in general. + self.deferred_ptr_noops.insert( + data_inst_def.outputs[0], + DeferredPtrNoop { + output_pointer, + output_pointer_addr_space, + output_pointee_layout, + parent_region: self.parent_region.unwrap(), + }, + ); + // FIXME(eddyb) avoid the repeated call to `type_of_val`, + // maybe don't even replace the `QPtrOp::Offset` instruction? + data_inst_def.kind = QPtrOp::Offset(0).into(); + let new_output_ty = func.reborrow().freeze().at(output_pointer).type_of(cx); + let output_decl = func.at(data_inst_def.outputs[0]).decl(); output_decl.ty = new_output_ty; - data_inst_def } DataInstKind::QPtr(QPtrOp::DynOffset { stride, index_bounds }) => { - let base_ptr = data_inst_def.inputs[0]; - let (addr_space, layout) = type_of_val_as_spv_ptr_with_layout(base_ptr)?; - let mut layout = match layout { - TypeLayout::Handle(_) | TypeLayout::HandleArray(..) => { - return Err(LiftError(Diag::bug(["cannot offset Handles".into()]))); - } - TypeLayout::Concrete(mem_layout) => mem_layout, - }; + let (stride, index_bounds) = (*stride, index_bounds.clone()); + let data_inst_def = data_inst_def.clone(); - let mut access_chain_inputs: SmallVec<_> = [base_ptr].into_iter().collect(); - loop { - if let Components::Elements { stride: layout_stride, elem, fixed_len } = - &layout.components - && layout_stride == stride - && Ok(index_bounds.clone()) - == fixed_len - .map(|len| i32::try_from(len.get()).map(|len| 0..len)) - .transpose() - { - access_chain_inputs.push(data_inst_def.inputs[1]); - layout = elem.clone(); - break; - } + let output_mem_accesses = self + .lifter + .find_mem_accesses_attr(func.at(data_inst_def.outputs[0]).decl().attrs) + .unwrap_or(&MemAccesses::Data(DataHapp::DEAD)); + + let strided_mem_accesses = MemAccesses::Data(DataHapp { + // FIXME(eddyb) there might be a better way to estimate the + // relevant extent for the array, maybe assume length >= 1 + // so the minimum range is always `0..stride`? + max_size: index_bounds.clone().map(|index_bounds| { + u32::try_from(index_bounds.end) + .ok() + .unwrap_or(0) + .checked_mul(stride.get()) + .unwrap_or(0) + }), + kind: DataHappKind::Repeated { + // FIXME(eddyb) allocating `Rc` a bit wasteful here. + element: Rc::new(DataHapp::DEAD), - // FIXME(eddyb) deduplicate with `maybe_adjust_pointer_for_offset_or_access`. - let idx = { - // FIXME(eddyb) there might be a better way to - // estimate a relevant offset range for the array, - // maybe assume length >= 1 so the minimum range - // is always `0..stride`? - let min_expected_len = index_bounds - .clone() - .and_then(|index_bounds| u32::try_from(index_bounds.end).ok()) - .unwrap_or(0); - let offset_range = - 0..min_expected_len.checked_mul(stride.get()).unwrap_or(0); - let mut component_indices = - layout.components.find_components_containing(offset_range); - match (component_indices.next(), component_indices.next()) { - (None, _) => { - return Err(LiftError(Diag::bug([ - "matching array not found in pointee type layout".into(), - ]))); - } - // FIXME(eddyb) obsolete this case entirely, - // by removing stores of ZSTs, and replacing - // loads of ZSTs with `OpUndef` constants. - (Some(_), Some(_)) => { - return Err(LiftError(Diag::bug([ - "ambiguity due to ZSTs in pointee type layout".into(), - ]))); - } - (Some(idx), None) => idx, + stride, + }, + }); + + let (array_ptr, (array_addr_space, array_layout)) = self + .adjust_pointer_for_offset_and_accesses( + data_inst_def.inputs[0], + 0, + &strided_mem_accesses, + func_at_data_inst.reborrow().at(()), + insert_aux_data_inst, + )?; + + let (elem_layout, array_index_multiplier) = match array_layout { + TypeLayout::Concrete(array_layout) => match &array_layout.components { + Components::Elements { stride: layout_stride, elem, fixed_len } + if stride.get().is_multiple_of(layout_stride.get()) + && Ok(index_bounds.clone()) + == fixed_len + .map(|len| i32::try_from(len.get()).map(|len| 0..len)) + .transpose() => + { + (elem.clone(), stride.get() / layout_stride.get()) } - }; + _ => { + return Err(LiftError(Diag::bug([ + "matching array not found in pointee type layout".into(), + ]))); + } + }, + _ => unreachable!(), + }; - let idx_as_i32 = i32::try_from(idx).ok().ok_or_else(|| { - LiftError(Diag::bug([ - format!("{idx} not representable as a positive s32").into() - ])) - })?; - access_chain_inputs - .push(Value::Const(cx.intern(scalar::Const::from_u32(idx_as_i32 as u32)))); - - layout = match &layout.components { - Components::Scalar => unreachable!(), - Components::Elements { elem, .. } => elem.clone(), - Components::Fields { layouts, .. } => layouts[idx].clone(), - }; + let array_index = if array_index_multiplier == 1 { + data_inst_def.inputs[1] + } else { + // FIXME(eddyb) implement + return Err(LiftError(Diag::bug([ + "unimplemented stride factor (index multiplier)".into(), + ]))); + }; + + let accesses_bounded_intra_elem = match output_mem_accesses { + &MemAccesses::Data(DataHapp { max_size: Some(max_size), .. }) => { + max_size <= elem_layout.mem_layout.fixed_base.size + } + _ => false, + }; + if !accesses_bounded_intra_elem { + // FIXME(eddyb) should this change the choice of pointer + // representation, or at least leave `QPtrOp::DynIndex` + // behind unchanged? } - let mut data_inst_def = data_inst_def.clone(); + + let mut data_inst_def = data_inst_def; data_inst_def.kind = DataInstKind::SpvInst(wk.OpAccessChain.into()); - data_inst_def.inputs = access_chain_inputs; + data_inst_def.inputs = [array_ptr, array_index].into_iter().collect(); let output_decl = func_at_data_inst.reborrow().at(data_inst_def.outputs[0]).decl(); - output_decl.ty = self.lifter.spv_ptr_type(addr_space, layout.original_type); + output_decl.attrs = self.lifter.strip_mem_accesses_attr(output_decl.attrs); + output_decl.ty = + self.lifter.spv_ptr_type(array_addr_space, elem_layout.original_type); data_inst_def } DataInstKind::Mem(op @ (MemOp::Load { offset } | MemOp::Store { offset })) => { + let mut data_inst_def = data_inst_def.clone(); + // HACK(eddyb) `_` will match multiple variants soon. #[allow(clippy::match_wildcard_for_single_variants)] let (access_op, access_type) = match op { @@ -669,64 +711,54 @@ impl LiftToSpvPtrInstsInFunc<'_> { _ => unreachable!(), }; - // FIXME(eddyb) written in a more general style for future deduplication. - let maybe_ajustment = { - let input_idx = 0; - let ptr = data_inst_def.inputs[input_idx]; - let (addr_space, pointee_layout) = type_of_val_as_spv_ptr_with_layout(ptr)?; - self.maybe_adjust_pointer_for_offset_or_access( - ptr, - addr_space, - pointee_layout, - offset.map_or(0, |o| o.get()), - Some(access_type), - )? - .map(|access_chain| (input_idx, access_chain)) - .into_iter() + // FIXME(eddyb) this is awkward (or at least its needs DRY-ing) + // because only an approximation is needed, most checks are + // done by `adjust_pointer_for_offset_and_accesses`. + let access_mem_accesses = match self.lifter.layout_of(access_type)? { + TypeLayout::HandleArray(..) => { + return Err(LiftError(Diag::bug([ + "cannot access whole HandleArray".into() + ]))); + } + TypeLayout::Handle(shapes::Handle::Opaque(ty)) => { + MemAccesses::Handles(shapes::Handle::Opaque(ty)) + } + TypeLayout::Handle(shapes::Handle::Buffer(as_, _)) => { + MemAccesses::Handles(shapes::Handle::Buffer(as_, DataHapp::DEAD)) + } + TypeLayout::Concrete(concrete) => MemAccesses::Data(DataHapp { + max_size: (concrete.mem_layout.dyn_unit_stride.is_none()) + .then_some(concrete.mem_layout.fixed_base.size), + kind: DataHappKind::Direct(concrete.original_type), + }), }; - let mut new_data_inst_def = - DataInstDef { kind: access_op.into(), ..data_inst_def.clone() }; - - // FIXME(eddyb) written in a more general style for future deduplication. - for (input_idx, (access_chain_output_ptr_type, mut access_chain_data_inst_def)) in - maybe_ajustment - { - // HACK(eddyb) account for `deferred_ptr_noops` interactions. - self.resolve_deferred_ptr_noop_uses(&mut access_chain_data_inst_def.inputs); - self.add_value_uses(&access_chain_data_inst_def.inputs); - - let func = func_at_data_inst.reborrow().at(()); - - let access_chain_data_inst = - func.nodes.define(cx, access_chain_data_inst_def.into()); - let access_chain_output_var = func.vars.define( - cx, - VarDecl { - attrs: Default::default(), - ty: access_chain_output_ptr_type, - def_parent: Either::Right(access_chain_data_inst), - def_idx: 0, - }, - ); - func.nodes[access_chain_data_inst].outputs.push(access_chain_output_var); + let (adjusted_ptr, (_, adjusted_pointee_layout)) = self + .adjust_pointer_for_offset_and_accesses( + data_inst_def.inputs[0], + offset.map_or(0, |o| o.get()), + &access_mem_accesses, + func_at_data_inst.reborrow().at(()), + insert_aux_data_inst, + )?; - // HACK(eddyb) can't really use helpers like `FuncAtMut::def`, - // due to the need to borrow `regions` and `nodes` - // at the same time - perhaps some kind of `FuncAtMut` position - // types for "where a list is in a parent entity" could be used - // to make this more ergonomic, although the potential need for - // an actual list entity of its own, should be considered. - func.regions[self.parent_region.unwrap()].children.insert_before( - access_chain_data_inst, - data_inst, - func.nodes, - ); + // FIXME(eddyb) implement at least same-size bitcasting + // (more generally, accesses should be {de,re}composed). + match adjusted_pointee_layout { + TypeLayout::Handle(shapes::Handle::Opaque(ty)) if ty == access_type => {} + TypeLayout::Concrete(concrete) if concrete.original_type == access_type => {} - new_data_inst_def.inputs[input_idx] = Value::Var(access_chain_output_var); + _ => { + return Err(LiftError(Diag::bug([ + "expected access type not found in pointee type layout".into(), + ]))); + } } - new_data_inst_def + data_inst_def.kind = access_op.into(); + data_inst_def.inputs[0] = adjusted_ptr; + + data_inst_def } &DataInstKind::ThunkBind(_) => { @@ -741,118 +773,124 @@ impl LiftToSpvPtrInstsInFunc<'_> { } DataInstKind::SpvInst(_) | DataInstKind::SpvExtInst { .. } => { - let mut to_spv_ptr_input_adjustments = vec![]; - let mut from_spv_ptr_output = None; + let mut changed_data_inst_def = None; + for attr in &cx[data_inst_def.attrs].attrs { + let attr = match attr { + Attr::QPtr(attr) => attr, + _ => continue, + }; + + let data_inst_def = changed_data_inst_def + .get_or_insert_with(|| func_at_data_inst.reborrow().def().clone()); + match *attr { - Attr::QPtr(QPtrAttr::ToSpvPtrInput { - input_idx, - pointee: expected_pointee_type, - }) => { + QPtrAttr::ToSpvPtrInput { input_idx, pointee: expected_pointee_type } => { let input_idx = usize::try_from(input_idx).unwrap(); let expected_pointee_type = expected_pointee_type.0; let input_ptr = data_inst_def.inputs[input_idx]; - let (input_ptr_addr_space, input_pointee_layout) = - type_of_val_as_spv_ptr_with_layout(input_ptr)?; - if let Some(access_chain) = self - .maybe_adjust_pointer_for_offset_or_access( + // FIXME(eddyb) this is awkward (or at least its needs DRY-ing) + // because only an approximation is needed, most checks are + // done by `adjust_pointer_for_offset_and_accesses`. + let expected_pointee_layout = + self.lifter.layout_of(expected_pointee_type)?; + let expected_mem_accesses = match &expected_pointee_layout { + TypeLayout::HandleArray(..) => { + return Err(LiftError(Diag::bug([ + "cannot access whole HandleArray".into(), + ]))); + } + &TypeLayout::Handle(shapes::Handle::Opaque(ty)) => { + MemAccesses::Handles(shapes::Handle::Opaque(ty)) + } + &TypeLayout::Handle(shapes::Handle::Buffer(as_, _)) => { + MemAccesses::Handles(shapes::Handle::Buffer( + as_, + DataHapp::DEAD, + )) + } + TypeLayout::Concrete(concrete) => MemAccesses::Data(DataHapp { + max_size: (concrete.mem_layout.dyn_unit_stride.is_none()) + .then_some(concrete.mem_layout.fixed_base.size), + kind: DataHappKind::StrictlyTyped(concrete.original_type), + }), + }; + + let (adjusted_ptr, (_, adjusted_pointee_layout)) = self + .adjust_pointer_for_offset_and_accesses( input_ptr, - input_ptr_addr_space, - input_pointee_layout, 0, - Some(expected_pointee_type), - )? - { - to_spv_ptr_input_adjustments.push((input_idx, access_chain)); + &expected_mem_accesses, + func_at_data_inst.reborrow().at(()), + insert_aux_data_inst, + )?; + match (adjusted_pointee_layout, expected_pointee_layout) { + ( + TypeLayout::Handle(shapes::Handle::Opaque(a)), + TypeLayout::Handle(shapes::Handle::Opaque(b)), + ) if a == b => {} + (TypeLayout::Concrete(a), TypeLayout::Concrete(b)) + if a.original_type == b.original_type => {} + + _ => { + return Err(LiftError(Diag::bug([ + "ToSpvPtrInput: expected type not found in pointee type layout" + .into(), + ]))); + } } + data_inst_def.inputs[input_idx] = adjusted_ptr; } - Attr::QPtr(QPtrAttr::FromSpvPtrOutput { addr_space, pointee }) => { - assert!(from_spv_ptr_output.is_none()); - from_spv_ptr_output = Some((addr_space.0, pointee.0)); + QPtrAttr::FromSpvPtrOutput { addr_space, pointee } => { + let output_decl = + func_at_data_inst.reborrow().at(data_inst_def.outputs[0]).decl(); + output_decl.ty = self.lifter.spv_ptr_type(addr_space.0, pointee.0); } - _ => {} } } - if to_spv_ptr_input_adjustments.is_empty() && from_spv_ptr_output.is_none() { - return Ok(Transformed::Unchanged); - } - - let mut new_data_inst_def = data_inst_def.clone(); - - let func = func_at_data_inst.reborrow().at(()); - - // FIXME(eddyb) deduplicate with `Load`/`Store`. - for (input_idx, (access_chain_output_ptr_type, mut access_chain_data_inst_def)) in - to_spv_ptr_input_adjustments - { - // HACK(eddyb) account for `deferred_ptr_noops` interactions. - self.resolve_deferred_ptr_noop_uses(&mut access_chain_data_inst_def.inputs); - self.add_value_uses(&access_chain_data_inst_def.inputs); - - let access_chain_data_inst = - func.nodes.define(cx, access_chain_data_inst_def.into()); - let access_chain_output_var = func.vars.define( - cx, - VarDecl { - attrs: Default::default(), - ty: access_chain_output_ptr_type, - def_parent: Either::Right(access_chain_data_inst), - def_idx: 0, - }, - ); - func.nodes[access_chain_data_inst].outputs.push(access_chain_output_var); - - // HACK(eddyb) can't really use helpers like `FuncAtMut::def`, - // due to the need to borrow `regions` and `nodes` - // at the same time - perhaps some kind of `FuncAtMut` position - // types for "where a list is in a parent entity" could be used - // to make this more ergonomic, although the potential need for - // an actual list entity of its own, should be considered. - func.regions[self.parent_region.unwrap()].children.insert_before( - access_chain_data_inst, - data_inst, - func.nodes, - ); - - new_data_inst_def.inputs[input_idx] = Value::Var(access_chain_output_var); - } - - if let Some((addr_space, pointee_type)) = from_spv_ptr_output { - func.vars[new_data_inst_def.outputs[0]].ty = - self.lifter.spv_ptr_type(addr_space, pointee_type); - } - - new_data_inst_def + return Ok( + changed_data_inst_def.map_or(Transformed::Unchanged, Transformed::Changed) + ); } }; Ok(Transformed::Changed(replacement_data_inst_def)) } - /// If necessary, construct an `OpAccessChain` instruction to offset `ptr` - /// (pointing to a type with `pointee_layout`) by `offset`, and (optionally) - /// turn it into a pointer to `access_type`. - /// - /// **Note**: the returned instruction has empty `outputs`, the caller must - /// add one, using the type returend alongside the instruction. + /// Derive a pointer from `ptr` which simultaneously accounts for `offset` + /// and compatibility with `target_accesses`, by introducing new instructions + /// (e.g. `OpAccessChain`) if needed (via `insert_aux_data_inst`). // // FIXME(eddyb) customize errors, to tell apart Offset/Load/Store/ToSpvPtrInput. - fn maybe_adjust_pointer_for_offset_or_access( - &self, + // FIXME(eddyb) the returned `(AddrSpace, TypeLayout)` describes the returned + // pointer, i.e. it's a cached copy of `as_spv_ptr_type(type_of(final_ptr))`, + // ideally it would be wrapped in some `struct` that disambiguates it. + // + // FIXME(eddyb) consider undoing all of this work, and relying on a more + // flexible pointer representation, instead. + fn adjust_pointer_for_offset_and_accesses( + &mut self, ptr: Value, - addr_space: AddrSpace, - mut pointee_layout: TypeLayout, offset: i32, - access_type: Option, - ) -> Result, LiftError> { + target_accesses: &MemAccesses, + + // FIXME(eddyb) bundle these into some kind of "cursor" type. + mut func: FuncAtMut<'_, ()>, + mut insert_aux_data_inst: impl FnMut(&mut Self, FuncAtMut<'_, ()>, DataInstDef) -> DataInst, + ) -> Result<(Value, (AddrSpace, TypeLayout)), LiftError> { let wk = self.lifter.wk; + let cx = &self.lifter.cx; + + let (addr_space, mut pointee_layout) = + self.type_of_val_as_spv_ptr_with_layout(func.reborrow().freeze().at(ptr))?; - let mk_access_chain = |access_chain_inputs: SmallVec<_>, final_pointee_type| { + let mut mk_access_chain = |access_chain_inputs: SmallVec<_>, final_pointee_type| { if access_chain_inputs.len() > 1 { - Some(( - self.lifter.spv_ptr_type(addr_space, final_pointee_type), + let node = insert_aux_data_inst( + self, + func.reborrow(), DataInstDef { attrs: Default::default(), kind: DataInstKind::SpvInst(wk.OpAccessChain.into()), @@ -860,149 +898,172 @@ impl LiftToSpvPtrInstsInFunc<'_> { child_regions: [].into_iter().collect(), outputs: [].into_iter().collect(), }, - )) + ); + + let output_var = func.vars.define( + cx, + VarDecl { + attrs: Default::default(), + ty: self.lifter.spv_ptr_type(addr_space, final_pointee_type), + def_parent: Either::Right(node), + def_idx: 0, + }, + ); + func.nodes[node].outputs.push(output_var); + + Value::Var(output_var) } else { - None + ptr } }; - let access_layout = access_type.map(|ty| self.lifter.layout_of(ty)).transpose()?; - let mut access_chain_inputs: SmallVec<_> = [ptr].into_iter().collect(); if let TypeLayout::HandleArray(handle, _) = pointee_layout { - access_chain_inputs - .push(Value::Const(self.lifter.cx.intern(scalar::Const::from_u32(0)))); + access_chain_inputs.push(Value::Const(cx.intern(scalar::Const::from_u32(0)))); pointee_layout = TypeLayout::Handle(handle); } - let (mut pointee_layout, access_layout) = match (pointee_layout, access_layout) { + let (mut pointee_layout, target_happ) = match (pointee_layout, target_accesses) { (TypeLayout::HandleArray(..), _) => unreachable!(), // All the illegal cases are here to keep the rest tidier. - (_, Some(TypeLayout::Handle(shapes::Handle::Buffer(..)))) => { + (_, MemAccesses::Handles(shapes::Handle::Buffer(..))) => { return Err(LiftError(Diag::bug(["cannot access whole Buffer".into()]))); } - (_, Some(TypeLayout::HandleArray(..))) => { - return Err(LiftError(Diag::bug(["cannot access whole HandleArray".into()]))); - } - (_, Some(TypeLayout::Concrete(access_layout))) - if access_layout.mem_layout.dyn_unit_stride.is_some() => - { - return Err(LiftError(Diag::bug(["cannot access unsized type".into()]))); - } - (TypeLayout::Handle(_), Some(_)) if offset != 0 => { - return Err(LiftError(Diag::bug(["cannot offset Handles for access".into()]))); - } - (TypeLayout::Handle(_), None) => { - // FIXME(eddyb) this disallows even a noop offset on a handle pointer. + (TypeLayout::Handle(_), _) if offset != 0 => { return Err(LiftError(Diag::bug(["cannot offset Handles".into()]))); } (TypeLayout::Handle(shapes::Handle::Buffer(..)), _) => { return Err(LiftError(Diag::bug(["cannot offset/access into Buffer".into()]))); } - (TypeLayout::Handle(_), Some(TypeLayout::Concrete(_))) => { + (TypeLayout::Handle(_), MemAccesses::Data(_)) => { return Err(LiftError(Diag::bug(["cannot access Handle as memory".into()]))); } - (TypeLayout::Concrete(_), Some(TypeLayout::Handle(_))) => { + (TypeLayout::Concrete(_), MemAccesses::Handles(_)) => { return Err(LiftError(Diag::bug(["cannot access memory as Handle".into()]))); } ( TypeLayout::Handle(shapes::Handle::Opaque(pointee_handle_type)), - Some(TypeLayout::Handle(shapes::Handle::Opaque(access_handle_type))), + &MemAccesses::Handles(shapes::Handle::Opaque(access_handle_type)), ) => { assert_eq!(offset, 0); if pointee_handle_type != access_handle_type { return Err(LiftError(Diag::bug([ - "(opaque handle) pointer vs access type mismatch".into(), + "(opaque handle) pointer vs access type mismatch (".into(), + pointee_handle_type.into(), + " vs ".into(), + access_handle_type.into(), + ")".into(), ]))); } - return Ok(mk_access_chain(access_chain_inputs, pointee_handle_type)); + return Ok(( + mk_access_chain(access_chain_inputs, pointee_handle_type), + (addr_space, TypeLayout::Handle(shapes::Handle::Opaque(pointee_handle_type))), + )); } - (TypeLayout::Concrete(pointee_layout), Some(TypeLayout::Concrete(access_layout))) => { - (pointee_layout, Some(access_layout)) + (TypeLayout::Concrete(pointee_layout), MemAccesses::Data(data_happ)) => { + (pointee_layout, data_happ) } - (TypeLayout::Concrete(pointee_layout), None) => (pointee_layout, None), }; let mut offset = u32::try_from(offset) .ok() .ok_or_else(|| LiftError(Diag::bug(["negative offset".into()])))?; - // FIXME(eddyb) deduplicate with access chain loop for Offset. loop { - let done = offset == 0 - && access_layout.as_ref().is_none_or(|access_layout| { - pointee_layout.original_type == access_layout.original_type - }); - if done { + // FIXME(eddyb) should `DataHapp` have have an `.extent()` method? + let target_extent = + Extent { start: 0, end: target_happ.max_size }.saturating_add(offset); + + // FIXME(eddyb) should `MemTypeLayout` have have an `.extent()` method? + let pointee_extent = Extent { + start: 0, + end: (pointee_layout.mem_layout.dyn_unit_stride.is_none()) + .then_some(pointee_layout.mem_layout.fixed_base.size), + }; + + // FIXME(eddyb) how much of this is actually useful given the + // `if offset == 0 { break; }` tied to running out of leaves? + let is_compatible = offset == 0 && pointee_extent.includes(&target_extent) && { + match target_happ.kind { + DataHappKind::Dead | DataHappKind::Disjoint(_) => true, + DataHappKind::StrictlyTyped(target_ty) => { + pointee_layout.original_type == target_ty + } + DataHappKind::Direct(target_ty) => { + // NOTE(eddyb) in theory, non-atomic accesses understood + // by SPIR-T natively (mostly `mem.{load,store}`) only + // need to cover the extent of the access, as long as + // the types involved are plain bits (scalars/vectors). + // + // FIXME(eddyb) take advantage of this by implementing + // scalar merge/auto-bitcast in `mem::analyze`+`qptr::lift`. + let can_bitcast = |ty: Type| { + matches!(cx[ty].kind, TypeKind::Scalar(_) | TypeKind::Vector(_)) + }; + pointee_layout.original_type == target_ty + || can_bitcast(pointee_layout.original_type) && can_bitcast(target_ty) + } + DataHappKind::Repeated { stride: target_stride, .. } => { + match pointee_layout.components { + Components::Elements { stride, .. } => { + // FIXME(eddyb) take advantage of this by implementing + // stride factoring in `mem::analyze`+`qptr::lift`. + target_stride.get().is_multiple_of(stride.get()) + } + _ => false, + } + } + } + }; + + // Only stop descending into the pointee type when it already fits + // `target_happ` exactly (i.e. can only get worse, not better). + if is_compatible && pointee_extent == target_extent { break; } - let idx = { - let min_component_size = match &access_layout { - Some(access_layout) => access_layout.mem_layout.fixed_base.size, - None => { - // HACK(eddyb) supporting ZSTs would be a pain because - // they can "fit" in weird ways, e.g. given 3 offsets - // A, B, C (before/between/after a pair of fields), - // `B..B` is included in both `A..B` and `B..C`. - let allow_zst = false; - if allow_zst { 0 } else { 1 } + let mut component_indices = + pointee_layout.components.find_components_containing(target_extent); + let idx = match (component_indices.next(), component_indices.next()) { + (None, _) => { + // While none of the components fully contain `target_extent`, + // there's a good chance the pointer is already compatible + // with `target_happ` (and the only reason to keep going + // would be to find smaller types that remain compatible). + if is_compatible { + break; } - }; - let offset_range = offset..offset.saturating_add(min_component_size); - let mut component_indices = - pointee_layout.components.find_components_containing(offset_range.clone()); - - let idx = component_indices - .next() - .or_else(|| { - // HACK(eddyb) when dealing with a lone ZST, the search can - // fail because it expects at least one byte, so we retry - // with an empty range instead. - // FIXME(eddyb) this can still fail if there's another - // component that ends where the ZST is, maybe we need - // some way to filter for specifically such a ZST. - if access_layout.is_none() && offset_range.len() == 1 { - component_indices = pointee_layout - .components - .find_components_containing(offset..offset); - component_indices.next() - } else { - None - } - }) - .ok_or_else(|| { - // FIXME(eddyb) this could include the chosen indices, - // and maybe the current type and/or layout. - LiftError(Diag::bug([format!( - "offsets {offset_range:?} not found in pointee type layout, \ - after {} access chain indices", - access_chain_inputs.len() - 1 - ) - .into()])) - })?; - - if component_indices.next().is_some() { + // FIXME(eddyb) this could include the chosen indices, + // and maybe the current type and/or layout. + return Err(LiftError(Diag::bug([format!( + "offsets {:?}..{:?} not found in pointee type layout, \ + after {} access chain indices", + target_extent.start, + target_extent.end, + access_chain_inputs.len() - 1 + ) + .into()]))); + } + (Some(_), Some(_)) => { return Err(LiftError(Diag::bug([ "ambiguity due to ZSTs in pointee type layout".into(), ]))); } - - idx + (Some(idx), None) => idx, }; + drop(component_indices); let idx_as_i32 = i32::try_from(idx).ok().ok_or_else(|| { LiftError(Diag::bug([format!("{idx} not representable as a positive s32").into()])) })?; - access_chain_inputs.push(Value::Const( - self.lifter.cx.intern(scalar::Const::from_u32(idx_as_i32 as u32)), - )); + access_chain_inputs + .push(Value::Const(cx.intern(scalar::Const::from_u32(idx_as_i32 as u32)))); match &pointee_layout.components { Components::Scalar => unreachable!(), @@ -1017,7 +1078,10 @@ impl LiftToSpvPtrInstsInFunc<'_> { } } - Ok(mk_access_chain(access_chain_inputs, pointee_layout.original_type)) + Ok(( + mk_access_chain(access_chain_inputs, pointee_layout.original_type), + (addr_space, TypeLayout::Concrete(pointee_layout)), + )) } /// Apply rewrites implied by `deferred_ptr_noops` to `values`.