diff --git a/README.md b/README.md index 6628eaae..25f29469 100644 --- a/README.md +++ b/README.md @@ -135,7 +135,7 @@ fn main() -> @location(0) i32 { #[spv.Decoration.Location(Location: 0)] global_var GV0 in spv.StorageClass.Output: s32 -func F0() -> spv.OpTypeVoid { +func F0() { (_: s32, _: s32, v0: s32) = loop(v1: s32 <- 1s32, v2: s32 <- 1s32, _: s32 <- undef: s32) { v3 = s.lt(v2, 10s32): bool (v4: s32, v5: s32) = if v3 { diff --git a/examples/spv-lower-link-qptr-lift.rs b/examples/spv-lower-link-qptr-lift.rs index f71238bb..e987a40c 100644 --- a/examples/spv-lower-link-qptr-lift.rs +++ b/examples/spv-lower-link-qptr-lift.rs @@ -83,7 +83,7 @@ fn main() -> std::io::Result<()> { let layout_config = &spirt::mem::LayoutConfig { abstract_bool_size_align: (1, 1), logical_ptr_size_align: (4, 4), - ..spirt::mem::LayoutConfig::VULKAN_SCALAR_LAYOUT + ..spirt::mem::LayoutConfig::VULKAN_SCALAR_LAYOUT_LE }; eprint_duration(|| { diff --git a/src/cf/structurize.rs b/src/cf/structurize.rs index 082956cf..3cb2bbaf 100644 --- a/src/cf/structurize.rs +++ b/src/cf/structurize.rs @@ -600,15 +600,7 @@ impl<'a> Structurizer<'a> { const_true, const_false, - func_ret_types: { - let is_void = match &cx[func_decl.ret_type].kind { - TypeKind::SpvInst { spv_inst, .. } => { - spv_inst.opcode == crate::spv::spec::Spec::get().well_known.OpTypeVoid - } - _ => false, - }; - if is_void { &[][..] } else { std::slice::from_ref(&func_decl.ret_type) } - }, + func_ret_types: &func_decl.ret_types, func_def_body, loop_header_to_exit_targets, diff --git a/src/cf/unstructured.rs b/src/cf/unstructured.rs index fec3a3be..7cd4d19f 100644 --- a/src/cf/unstructured.rs +++ b/src/cf/unstructured.rs @@ -61,7 +61,8 @@ pub struct ControlFlowGraph { pub enum ControlTarget { Region(Region), - /// Leave the current function (returning `target_inputs`, if any). + /// Leave the current function, returning some number of [`Value`]s, as per + /// the function's signature (`ret_types` in [`FuncDecl`](crate::FuncDecl)). // // FIXME(eddyb) now that this is used through `NodeKind::ThunkBind`, // it should probably be more like `break` or some kind of "leave scope". diff --git a/src/lib.rs b/src/lib.rs index 103e276e..6d7c8bd7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -572,10 +572,12 @@ pub enum TypeKind { // TODO(eddyb) reconsider name? add signature? etc. Thunk, + // FIXME(eddyb) consider wrapping all of these in an `Rc` like `ConstKind`. SpvInst { spv_inst: spv::Inst, // FIXME(eddyb) find a better name. type_and_const_inputs: SmallVec<[TypeOrConst; 2]>, + value_lowering: spv::ValueLowering, }, /// The type of a [`ConstKind::SpvStringLiteralForExtInst`] constant, i.e. @@ -620,10 +622,12 @@ impl Type { } } -/// Interned handle for a [`ConstDef`](crate::ConstDef) (a constant value). +/// Interned handle for a [`ConstDef`](crate::ConstDef) (a constant [`Value`](crate::Value)). pub use context::Const; -/// Definition for a [`Const`]: a constant value. +/// Definition for a [`Const`]: a constant [`Value`]. +/// +/// See [`Value`] docs for limitations on the types of values, including [`Const`]s. // // FIXME(eddyb) maybe special-case some basic consts like integer literals. #[derive(PartialEq, Eq, Hash)] @@ -782,10 +786,31 @@ pub enum AddrSpace { } /// The body of a [`GlobalVar`] definition. +// +// FIXME(eddyb) make "interface variables" go through imports, not definitions. #[derive(Clone)] pub struct GlobalVarDefBody { - /// If `Some`, the global variable will start out with the specified value. - pub initializer: Option, + pub initializer: Option, +} + +/// Initial contents for a [`GlobalVar`] definition. +// +// FIXME(eddyb) add special cases for for undef/zeroed/etc. +// FIXME(eddyb) consider renaming this to `ConstData` or `ConstBlob`? +#[derive(Clone)] +pub enum GlobalVarInit { + /// Single valid (constant) value (see [`Value`] docs for valid types). + // + // FIXME(eddyb) does this need to be its own case at all? + Direct(Const), + + /// SPIR-V "aggregate" (`OpTypeStruct`/`OpTypeArray`), represented as its + /// non-aggregate leaves (i.e. it's disaggregated, as per [`Value`] docs). + SpvAggregate { ty: Type, leaves: SmallVec<[Const; 4]> }, + + /// Explicitly laid out constant data, using [`mem::const_data::ConstData`] + /// to efficiently mix concrete bytes with symbolic ([`Const`]) values. + Data(mem::const_data::ConstData), } /// Entity handle for a [`FuncDecl`](crate::FuncDecl) (a function). @@ -796,7 +821,7 @@ pub use context::Func; pub struct FuncDecl { pub attrs: AttrSet, - pub ret_type: Type, + pub ret_types: SmallVec<[Type; 2]>, pub params: SmallVec<[FuncParam; 2]>, @@ -964,7 +989,19 @@ pub struct NodeDef { /// child [`Region`]: /// * when this is a `Select`: the case that was chosen /// * when this is a `Loop`: the last iteration of the body - // TODO(eddyb) include former `DataInst`s in above docs. + /// + // + // FIXME(eddyb) recombine with these `DataInstDef` docs: + // + /// Types (and attributes) for all the outputs of this instruction. + /// + /// That is, `vars[outputs[i]].ty` is the type of the [`VarKind::DataInstOutput`] + /// with `output_idx == i` (see also [`Value`] documentation). + /// + /// Most instructions have `0` or `1` outputs, with the notable exception + /// of SPIR-V instructions which originally produced SPIR-V "aggregates" + /// (`OpTypeStruct`/`OpTypeArray`) before [`spv::lower`] decomposed them + /// (in the general case, [`spv::InstLowering`] tracks original types). pub outputs: SmallVec<[Var; 2]>, } @@ -1030,10 +1067,11 @@ pub enum NodeKind { ThunkBind(cf::unstructured::ControlTarget), // FIXME(eddyb) should this have `#[from]`? - SpvInst(spv::Inst), + SpvInst(spv::Inst, spv::InstLowering), SpvExtInst { ext_set: InternedStr, inst: u32, + lowering: spv::InstLowering, }, } @@ -1097,6 +1135,32 @@ pub enum VarKind { NodeOutput { node: Node, output_idx: u32 }, } +/// Use of a value, either constant or defined earlier in the same function. +/// +/// Each `Value` can only have one of these types: +/// * [`scalar`] (`bool`, integer, and floating-point), i.e. [`TypeKind::Scalar`] +/// * vectors (small array of [`scalar`]s) +/// * these are *not* traditional SIMD vectors, but more a form of "compression" +/// (i.e. vector ops often applying the equivalent scalar op per-component), +/// and sometimes also mandated by specs (e.g. some Vulkan `BuiltIn` types) +/// * matrices (small array of vectors) +/// * less fundamental than vectors, may be treated like arrays in the future +/// * pointers and by-value (but still opaque) resource handles +/// * SPIR-V has both opaque resource handles that behave much like pointers, +/// even physical ones (e.g. ray-tracing `OpTypeAccelerationStructureKHR`s), +/// and others that are only loaded from memory just before using them as +/// operands (e.g. images/samplers), and such mismatches in indirection may +/// result in SPIR-T making further distinctions here in the future +/// +/// Notably, "aggregate" types (SPIR-V `OpTypeStruct`/`OpTypeArray`) are excluded, +/// so they have to be (recursively) disaggregated into their constituents, and +/// passed around as separate `Value`s (see also [`DataInstDef`] docs). +/// * SPIR-V inherited "by-value aggregates" from LLVM, which supports them under +/// the name "FCA" ("first-class aggregates"), but other IRs (and LLVM passes) +/// avoid them because of their (negative) impact on analyses and transforms, +/// with their main vestigial purpose being to encode multiple return values +/// from functions, which can be done more directly in other IRs (and SPIR-T) +// // FIXME(eddyb) add a `context::PackedEither` using the sign of s/u32/i32/ // interned/entity, and use it to be more compact than `Either`. #[derive(Copy, Clone, PartialEq, Eq, Hash)] diff --git a/src/mem/analyze.rs b/src/mem/analyze.rs index a8d290a2..1437d675 100644 --- a/src/mem/analyze.rs +++ b/src/mem/analyze.rs @@ -923,7 +923,7 @@ impl<'a> GatherAccesses<'a> { let node_def = func_def_body.at(node).def(); // FIXME(eddyb) consider avoiding this collection step. - let per_output_accesses = small_vec_from_position_value_pairs::<_, 1>( + let mut per_output_accesses = small_vec_from_position_value_pairs::<_, 1>( node_def .outputs .iter() @@ -1053,13 +1053,19 @@ impl<'a> GatherAccesses<'a> { | DataInstKind::Mem(_) | DataInstKind::QPtr(_) | DataInstKind::ThunkBind(_) - | DataInstKind::SpvInst(_) + | DataInstKind::SpvInst(..) | DataInstKind::SpvExtInst { .. } => {} } - // HACK(eddyb) multi-output instructions don't exist pre-disaggregate. - assert!(per_output_accesses.len() <= 1); - let output_accesses = per_output_accesses.into_iter().next().flatten(); + // HACK(eddyb) this may be a bit wasteful, but it avoids + // complicating acessing `per_output_accesses` below, and + // most instructions should only have at most two outputs. + { + let expected = node_def.outputs.len(); + if per_output_accesses.len() < expected { + per_output_accesses.extend((per_output_accesses.len()..expected).map(|_| None)); + } + } // FIXME(eddyb) merge with `match &node_def.kind` above. let data_inst_def = node_def; @@ -1095,10 +1101,12 @@ impl<'a> GatherAccesses<'a> { // with the inherent size/align (given by `_mem_layout`)? } DataInstKind::QPtr(QPtrOp::HandleArrayIndex) => { + assert_eq!(per_output_accesses.len(), 1); generate_accesses( self, data_inst_def.inputs[0], - output_accesses + per_output_accesses[0] + .take() .unwrap_or_else(|| { Err(AnalysisError(Diag::bug([ "HandleArrayIndex: unknown element".into() @@ -1113,11 +1121,14 @@ impl<'a> GatherAccesses<'a> { ); } DataInstKind::QPtr(QPtrOp::BufferData) => { + assert_eq!(per_output_accesses.len(), 1); generate_accesses( self, data_inst_def.inputs[0], - output_accesses.unwrap_or(Ok(MemAccesses::Data(DataHapp::DEAD))).and_then( - |accesses| { + per_output_accesses[0] + .take() + .unwrap_or(Ok(MemAccesses::Data(DataHapp::DEAD))) + .and_then(|accesses| { let happ = match accesses { MemAccesses::Handles(_) => { return Err(AnalysisError(Diag::bug([ @@ -1130,8 +1141,7 @@ impl<'a> GatherAccesses<'a> { AddrSpace::Handles, happ, ))) - }, - ), + }), ); } &DataInstKind::QPtr(QPtrOp::BufferDynLen { fixed_base_size, dyn_unit_stride }) => { @@ -1162,6 +1172,7 @@ impl<'a> GatherAccesses<'a> { ); } &DataInstKind::QPtr(QPtrOp::Offset(offset)) => { + assert_eq!(per_output_accesses.len(), 1); generate_accesses( self, data_inst_def.inputs[0], @@ -1175,7 +1186,8 @@ impl<'a> GatherAccesses<'a> { }) .and_then(|offset| { offset_accesses( - output_accesses + per_output_accesses[0] + .take() .unwrap_or(Ok(MemAccesses::Data(DataHapp::DEAD)))?, offset, ) @@ -1183,11 +1195,14 @@ impl<'a> GatherAccesses<'a> { ); } DataInstKind::QPtr(QPtrOp::DynOffset { stride, index_bounds }) => { + assert_eq!(per_output_accesses.len(), 1); generate_accesses( self, data_inst_def.inputs[0], - output_accesses.unwrap_or(Ok(MemAccesses::Data(DataHapp::DEAD))).and_then( - |accesses| { + per_output_accesses[0] + .take() + .unwrap_or(Ok(MemAccesses::Data(DataHapp::DEAD))) + .and_then(|accesses| { let happ = match accesses { MemAccesses::Handles(_) => { return Err(AnalysisError(Diag::bug([ @@ -1241,8 +1256,7 @@ impl<'a> GatherAccesses<'a> { stride: *stride, }, })) - }, - ), + }), ); } DataInstKind::Mem(op @ (MemOp::Load { offset } | MemOp::Store { offset })) => { @@ -1358,7 +1372,7 @@ impl<'a> GatherAccesses<'a> { } } - DataInstKind::SpvInst(_) | DataInstKind::SpvExtInst { .. } => { + DataInstKind::SpvInst(..) | DataInstKind::SpvExtInst { .. } => { for attr in &cx[data_inst_def.attrs].attrs { if let Attr::QPtr(QPtrAttr::ToSpvPtrInput { input_idx, pointee }) = *attr { let ty = pointee.0; diff --git a/src/mem/const_data.rs b/src/mem/const_data.rs new file mode 100644 index 00000000..4fb7f0bc --- /dev/null +++ b/src/mem/const_data.rs @@ -0,0 +1,307 @@ +//! Constant data efficiently mixing concrete bytes with symbolic values. + +use crate::scalar; +use itertools::Itertools; +use smallvec::SmallVec; +use std::collections::BTreeMap; +use std::iter; +use std::num::NonZeroU32; +use std::ops::Range; + +/// Constant data "blob" or "chunk", where each byte can be part of: +/// - uninitialized areas (e.g. SPIR-V `OpUndef`) +/// - concrete data (i.e. `u8` values) +/// - symbolic values of type `V` (spanning some number of bytes) +/// +/// This is similar to (and inspired by), [`rustc`'s `mir::interpret::Allocation`]( +/// https://rustc-dev-guide.rust-lang.org/const-eval/interpret.html#memory), +/// which only has abstract pointers as symbolic values, encoded as "relocations" +/// (i.e. concrete data contains the respective offset for each abstract pointer, +/// whereas here the symbolic values are completely disjoint with concrete data). +#[derive(Clone)] +pub struct ConstData { + /// The bit `init[i / 64] & (1 << (i % 64))` is set iff byte offset `i` is + /// initialized, either with concrete data, or as part of a symbolic value. + // + // FIXME(eddyb) come up with a centralized "bitset"/"bitvec" instead. + init: Box<[u64]>, + + /// Concrete data bytes, with each byte only used when `init` indicates + /// it is initialized *and* no symbolic value overlaps it. Unused bytes can + /// have any values in `bytes`, as they're guaranteed to be always ignored. + data: Box<[u8]>, + + /// Non-overlapping set of symbolic `V` values, forming an "overlay" on top + /// of the concrete data bytes, with `syms[offset] = (size, value)` + /// indicating bytes `offset..(offset + size)` are occupied by `value`. + syms: BTreeMap, + + /// Largest symbolic value size, i.e. `syms.values().map(|(size, _)| size).max()`. + // + // FIXME(eddyb) this is only needed to help with scanning overlaps in `syms`, + // and because there is no inherent limit on the size of symbolic values. + max_sym_size: NonZeroU32, +} + +/// One uniform "slice" of a `ConstData` (*not* mixing value categories). +#[derive(Clone)] +pub enum Part<'a, V> { + Uninit { + size: NonZeroU32, + }, + Bytes(&'a [u8]), + Symbolic { + size: NonZeroU32, + /// This is only the full `value` if `maybe_partial_slice == 0..size`. + maybe_partial_slice: Range, + value: V, + }, +} + +impl Part<'_, V> { + // FIXME(eddyb) should there just be a common `size` field? + pub fn size(&self) -> NonZeroU32 { + match *self { + Part::Uninit { size } | Part::Symbolic { size, .. } => size, + Part::Bytes(bytes) => NonZeroU32::new(bytes.len().try_into().unwrap()).unwrap(), + } + } +} + +/// Error type for write operations, emitted when they would otherwise cause a +/// partial overwrite of a symbolic value, if allowed to take effect. +#[derive(Debug)] +pub struct PartialSymbolicOverlap { + pub offsets: Range, +} + +// FIXME(eddyb) come up with a nicer abstraction for bitvecs, or use a crate. +fn bitrange_word_chunks(range: Range) -> (Range, impl Iterator>) { + // HACK(eddyb) `/ 64` and `% 64` work directly for inclusive positions, + // but it's more useful to be able to use `Range` in general. + let (first, last) = (range.start, range.end - 1); + let words = (first / 64)..((last / 64) + 1); + ( + (words.start as usize)..(words.end as usize), + words.map(move |i| { + let [first_in_word, last_in_word] = [0, 63] + .map(|offset_in_word| ((i * 64 + offset_in_word).clamp(first, last) % 64) as u8); + first_in_word..(last_in_word + 1) + }), + ) +} + +impl ConstData { + pub fn new(size: u32) -> Self { + let size = size as usize; + Self { + init: vec![0; size.div_ceil(64)].into_boxed_slice(), + data: vec![0; size].into_boxed_slice(), + syms: BTreeMap::new(), + max_sym_size: NonZeroU32::new(1).unwrap(), + } + } + + pub fn size(&self) -> u32 { + self.data.len() as u32 + } + + // HACK(eddyb) only needed for `visit`. + pub fn used_symbolic_values(&self) -> impl Iterator { + self.syms.values().map(|(_, v)| v) + } + + // HACK(eddyb) only needed for `transform`. + pub fn used_symbolic_values_mut(&mut self) -> impl Iterator { + self.syms.values_mut().map(|(_, v)| v) + } + + pub fn read(&self, range: Range) -> impl Iterator> { + // HACK(eddyb) trigger bounds-checking panics. + let _ = &self.data[(range.start as usize)..(range.end as usize)]; + + // HACK(eddyb) the range has to be extended backwards, because a partial + // overlap could exist, i.e. `range.start` being in the middle of a value, + // but then irrelevant values have to be ignored. + let mut syms = self + .syms + .range(range.start.saturating_sub(self.max_sym_size.get() - 1)..range.end) + .map(|(&offset, (size, value))| (offset..(offset + size.get()), value.clone())) + .peekable(); + while let Some((sym_range, _)) = syms.peek() { + if sym_range.end > range.start { + break; + } + syms.next().unwrap(); + } + + let mut part_start = range.start; + iter::from_fn(move || { + if part_start >= range.end { + return None; + } + let next_sym_range = syms.peek().cloned().map_or(range.end..range.end, |(r, _)| r); + + let max_part_end = if next_sym_range.contains(&part_start) { + next_sym_range.end + } else { + next_sym_range.start + }; + // FIXME(eddyb) come up with a nicer abstraction for bitvecs, or use a crate. + let (part_is_init, part_size) = { + let (words, word_bitslices) = bitrange_word_chunks(part_start..max_part_end); + let mut init_runs = self.init[words] + .iter() + .zip_eq(word_bitslices) + .flat_map(|(&word, word_bitslice)| { + let mut remaining_word = + (word & (!0 >> (64 - word_bitslice.end))) >> word_bitslice.start; + let mut remaining_bit_len = word_bitslice.len() as u32; + + iter::from_fn(move || { + if remaining_bit_len == 0 { + return None; + } + let is_set = (remaining_word & 1) != 0; + let run_len = if is_set { + remaining_word.trailing_ones() + } else { + remaining_word.trailing_zeros().min(remaining_bit_len) + }; + // HACK(eddyb) work around overlong shifts. + remaining_word >>= 1; + remaining_word >>= run_len - 1; + remaining_bit_len -= run_len; + Some((is_set, NonZeroU32::new(run_len).unwrap())) + }) + }) + .coalesce(|(a, a_run), (b, b_run)| { + if a == b { + Ok((a, a_run.checked_add(b_run.get()).unwrap())) + } else { + Err(((a, a_run), (b, b_run))) + } + }); + + init_runs.next().unwrap() + }; + + let part_end = part_start + part_size.get(); + let part = if !part_is_init { + Part::Uninit { size: part_size } + } else if next_sym_range.contains(&part_start) { + let (sym_range, value) = syms.next().unwrap(); + // HACK(eddyb) ensure slicing is caused by `range`, *not* `init`. + assert_eq!( + part_start..part_end, + sym_range.start.clamp(range.start, range.end) + ..sym_range.end.clamp(range.start, range.end) + ); + Part::Symbolic { + size: NonZeroU32::new(sym_range.len() as u32).unwrap(), + maybe_partial_slice: (part_start - sym_range.start) + ..(part_end - sym_range.start), + value, + } + } else { + Part::Bytes(&self.data[(part_start as usize)..(part_end as usize)]) + }; + part_start = part_end; + Some(part) + }) + } + + /// Helper for `write_bytes` and `write_symbolic`, which only modifies `self` + /// (removing fully overwritten symbolic values, and setting `init` bits), + /// when it can guarantee it will return `Ok(())` (i.e. after error checks). + fn try_init(&mut self, range: Range) -> Result<(), PartialSymbolicOverlap> { + // HACK(eddyb) trigger bounds-checking panics. + let _ = &self.data[(range.start as usize)..(range.end as usize)]; + + // HACK(eddyb) the range has to be extended backwards, because a partial + // overlap could exit, i.e. `range.start` being in the middle of a value, + // but then irrelevant values have to be ignored. + let syms_ranges = self + .syms + .range(range.start.saturating_sub(self.max_sym_size.get() - 1)..range.end) + .map(|(&offset, &(size, _))| offset..(offset + size.get())); + + // FIXME(eddyb) this is a bit inefficient but we don't have + // cursors, so we have to buffer the `BTreeMap` keys here. + let mut fully_overwritten_sym_offsets = SmallVec::<[u32; 16]>::new(); + for sym_range in syms_ranges { + let overlap = sym_range.start.clamp(range.start, range.end) + ..sym_range.end.clamp(range.start, range.end); + if overlap.is_empty() { + continue; + } + if overlap == sym_range { + fully_overwritten_sym_offsets.push(sym_range.start); + } else { + return Err(PartialSymbolicOverlap { offsets: overlap }); + } + } + for offset in fully_overwritten_sym_offsets { + self.syms.remove(&offset); + } + + // FIXME(eddyb) come up with a nicer abstraction for bitvecs, or use a crate. + { + let (words, word_bitslices) = bitrange_word_chunks(range); + for (word, word_bitslice) in self.init[words].iter_mut().zip(word_bitslices) { + *word |= (!0 << word_bitslice.start) & (!0 >> (64 - word_bitslice.end)); + } + } + + Ok(()) + } + + // HACK(eddyb) returns the written range (should it be just a byte length?). + pub fn write_bytes( + &mut self, + offset: u32, + bytes: &[u8], + ) -> Result, PartialSymbolicOverlap> { + let range = offset..(offset + u32::try_from(bytes.len()).unwrap()); + self.try_init(range.clone())?; + self.data[(range.start as usize)..(range.end as usize)].copy_from_slice(bytes); + Ok(range) + } + + // FIXME(eddyb) should this take an offset range instead? + pub fn write_symbolic( + &mut self, + offset: u32, + size: NonZeroU32, + value: V, + ) -> Result<(), PartialSymbolicOverlap> { + let range = offset..(offset + size.get()); + self.try_init(range.clone())?; + self.syms.insert(offset, (size, value)); + self.max_sym_size = self.max_sym_size.max(size); + Ok(()) + } + + // HACK(eddyb) returns the written range (should it be just a byte length?). + pub fn write_scalar( + &mut self, + offset: u32, + scalar: scalar::Const, + layout_config: &crate::mem::LayoutConfig, + ) -> Result, PartialSymbolicOverlap> { + let byte_len = match scalar.ty() { + scalar::Type::Bool => layout_config.abstract_bool_size_align.0, + scalar::Type::SInt(_) | scalar::Type::UInt(_) | scalar::Type::Float(_) => { + let bit_width = scalar.ty().bit_width(); + assert_eq!(bit_width % 8, 0); + bit_width / 8 + } + }; + let mut bytes = scalar.bits().to_le_bytes(); + let bytes = &mut bytes[..byte_len as usize]; + if layout_config.is_big_endian { + bytes.reverse(); + } + self.write_bytes(offset, bytes) + } +} diff --git a/src/mem/layout.rs b/src/mem/layout.rs index acdb4b92..56812d55 100644 --- a/src/mem/layout.rs +++ b/src/mem/layout.rs @@ -17,6 +17,8 @@ use std::rc::Rc; // // FIXME(eddyb) use proper newtypes (and log2 for align!). pub struct LayoutConfig { + pub is_big_endian: bool, + pub ignore_legacy_align: bool, pub min_aggregate_legacy_align: u32, @@ -46,19 +48,21 @@ pub struct LayoutConfig { } impl LayoutConfig { - pub const VULKAN_SCALAR_LAYOUT: Self = Self { + pub const VULKAN_SCALAR_LAYOUT_LE: Self = Self { + is_big_endian: false, + ignore_legacy_align: true, min_aggregate_legacy_align: 1, abstract_bool_size_align: (1, 1), logical_ptr_size_align: (1, 1), }; - pub const VULKAN_STANDARD_LAYOUT: Self = - Self { ignore_legacy_align: false, ..Self::VULKAN_SCALAR_LAYOUT }; + pub const VULKAN_STANDARD_LAYOUT_LE: Self = + Self { ignore_legacy_align: false, ..Self::VULKAN_SCALAR_LAYOUT_LE }; // FIXME(eddyb) is this even useful? (all the storage classes that have any // kind of alignment requirements, require explicit offsets) - pub const VULKAN_EXTENDED_ALIGN_UBO_LAYOUT: Self = - Self { min_aggregate_legacy_align: 16, ..Self::VULKAN_STANDARD_LAYOUT }; + pub const VULKAN_EXTENDED_ALIGN_UBO_LAYOUT_LE: Self = + Self { min_aggregate_legacy_align: 16, ..Self::VULKAN_STANDARD_LAYOUT_LE }; } pub(crate) struct LayoutError(pub(crate) Diag); @@ -99,6 +103,80 @@ pub(crate) enum Components { }, } +impl MemTypeLayout { + /// Recursively expand `MemTypeLayout`s into their components, at every level + /// for which `predicate` returns `true`. `each_leaf` is called for each + /// leaf (scalar or `recurse_into` returned `false`) component, and includes + /// its offset (starting at `base_offset`). + /// + /// Because each array element has its own offset, each array element will + /// be separately flattened, such that the entire array will be covered. + /// + /// `Err` may be returned in some cases (e.g. offset overflows, dynamic arrays), + /// in which case the sequence of leaves `each_leaf` produced can be considered + /// incomplete and shouldn't be used. + pub(crate) fn deeply_flatten_if( + &self, + base_offset: i32, + recurse_into: &impl Fn(&Self) -> bool, + each_leaf: &mut impl FnMut(i32, &Self) -> Result<(), LayoutError>, + ) -> Result<(), LayoutError> { + match &self.components { + Components::Scalar => each_leaf(base_offset, self), + _ if !recurse_into(self) => each_leaf(base_offset, self), + + Components::Elements { stride, elem, fixed_len } => { + let len = fixed_len.ok_or_else(|| { + LayoutError(Diag::err([ + "dynamically sized type `".into(), + self.original_type.into(), + "` cannot be flattened into a finite sequence of leaves".into(), + ])) + })?; + + for i in 0..len.get() { + let offset = i32::try_from(i) + .ok() + .and_then(|i| { + // HACK(eddyb) don't claim an overflow for `0 * stride` + // even if `stride` doesn't fit in `i32`. + if i == 0 { + Some(base_offset) + } else { + let stride = i32::try_from(stride.get()).ok()?; + base_offset.checked_add(i.checked_mul(stride)?) + } + }) + .ok_or_else(|| { + LayoutError(Diag::bug([format!( + "`{base_offset} + {i} * {stride}` overflowed `s32`" + ) + .into()])) + })?; + elem.deeply_flatten_if(offset, recurse_into, each_leaf)?; + } + Ok(()) + } + + Components::Fields { offsets, layouts } => { + for (&field_offset, field) in offsets.iter().zip(layouts) { + let offset = i32::try_from(field_offset) + .ok() + .and_then(|field_offset| base_offset.checked_add(field_offset)) + .ok_or_else(|| { + LayoutError(Diag::bug([format!( + "`{base_offset} + {field_offset}` overflowed `s32`" + ) + .into()])) + })?; + field.deeply_flatten_if(offset, recurse_into, each_leaf)?; + } + Ok(()) + } + } + } +} + // FIXME(eddyb) review this, especially wrt using it in more places. #[derive(Copy, Clone, PartialEq, Eq)] pub(crate) struct Extent { @@ -210,7 +288,7 @@ pub(crate) struct LayoutCache<'a> { cx: Rc, wk: &'static spv::spec::WellKnown, - config: &'a LayoutConfig, + pub(crate) config: &'a LayoutConfig, cache: RefCell>, } @@ -397,7 +475,7 @@ impl<'a> LayoutCache<'a> { TypeKind::Thunk => { return Err(LayoutError(Diag::bug(["`layout_of(thunk)`".into()]))); } - TypeKind::SpvInst { spv_inst, type_and_const_inputs } => { + TypeKind::SpvInst { spv_inst, type_and_const_inputs, .. } => { (spv_inst, type_and_const_inputs) } TypeKind::SpvStringLiteralForExtInst => { diff --git a/src/mem/mod.rs b/src/mem/mod.rs index 433291ad..c5e926e5 100644 --- a/src/mem/mod.rs +++ b/src/mem/mod.rs @@ -18,6 +18,7 @@ use std::rc::Rc; // NOTE(eddyb) all the modules are declared here, but they're documented "inside" // (i.e. using inner doc comments). pub mod analyze; +pub mod const_data; // FIXME(eddyb) make this public? pub(crate) mod layout; pub mod shapes; diff --git a/src/print/mod.rs b/src/print/mod.rs index e8540176..a6f0f8d4 100644 --- a/src/print/mod.rs +++ b/src/print/mod.rs @@ -29,9 +29,10 @@ use crate::{ AddrSpace, Attr, AttrSet, AttrSetDef, Const, ConstDef, ConstKind, Context, DataInst, DataInstDef, DataInstKind, DbgSrcLoc, DeclDef, Diag, DiagLevel, DiagMsgPart, EntityOrientedDenseMap, ExportKey, Exportee, Func, FuncDecl, FuncDefBody, FuncParam, - FxIndexMap, FxIndexSet, GlobalVar, GlobalVarDecl, GlobalVarDefBody, Import, InternedStr, - Module, ModuleDebugInfo, ModuleDialect, Node, NodeDef, NodeKind, OrdAssertEq, Region, - RegionDef, Type, TypeDef, TypeKind, TypeOrConst, Value, Var, VarDecl, scalar, spv, vector, + FxIndexMap, FxIndexSet, GlobalVar, GlobalVarDecl, GlobalVarDefBody, GlobalVarInit, Import, + InternedStr, Module, ModuleDebugInfo, ModuleDialect, Node, NodeDef, NodeKind, OrdAssertEq, + Region, RegionDef, Type, TypeDef, TypeKind, TypeOrConst, Value, Var, VarDecl, scalar, spv, + vector, }; use arrayvec::ArrayVec; use itertools::Either; @@ -576,7 +577,7 @@ impl<'a> Visitor<'a> for Plan<'a> { let wk = &spv::spec::Spec::get().well_known; match &self.cx[gv_decl.type_of_ptr_to].kind { - TypeKind::SpvInst { spv_inst, type_and_const_inputs } + TypeKind::SpvInst { spv_inst, type_and_const_inputs, .. } if spv_inst.opcode == wk.OpTypePointer => { match type_and_const_inputs[..] { @@ -1522,13 +1523,17 @@ impl Printer<'_> { fn error_style(&self) -> pretty::Styles { pretty::Styles::color(pretty::palettes::simple::MAGENTA) } + // HACK(eddyb) only used in hex dumps of `ConstData` initializer currently. + fn non_deemphasized_comment_style(&self) -> pretty::Styles { + pretty::Styles::color(pretty::palettes::simple::DARK_GRAY) + } fn comment_style(&self) -> pretty::Styles { pretty::Styles { color_opacity: Some(0.3), size: Some(-4), // FIXME(eddyb) this looks wrong for some reason? // subscript: true, - ..pretty::Styles::color(pretty::palettes::simple::DARK_GRAY) + ..self.non_deemphasized_comment_style() } } fn named_argument_label_style(&self) -> pretty::Styles { @@ -3067,15 +3072,16 @@ impl Print for TypeDef { TypeKind::Thunk => printer.imperative_keyword_style().apply("thunk").into(), - TypeKind::SpvInst { spv_inst, type_and_const_inputs } => printer.pretty_spv_inst( - printer.spv_op_style(), - spv_inst.opcode, - &spv_inst.imms, - type_and_const_inputs.iter().map(|&ty_or_ct| match ty_or_ct { - TypeOrConst::Type(ty) => ty.print(printer), - TypeOrConst::Const(ct) => ct.print(printer), - }), - ), + TypeKind::SpvInst { spv_inst, type_and_const_inputs, .. } => printer + .pretty_spv_inst( + printer.spv_op_style(), + spv_inst.opcode, + &spv_inst.imms, + type_and_const_inputs.iter().map(|&ty_or_ct| match ty_or_ct { + TypeOrConst::Type(ty) => ty.print(printer), + TypeOrConst::Const(ct) => ct.print(printer), + }), + ), TypeKind::SpvStringLiteralForExtInst => pretty::Fragment::new([ printer.error_style().apply("type_of").into(), "(".into(), @@ -3363,7 +3369,7 @@ impl Print for GlobalVarDecl { printer.pretty_type_ascription_suffix(ty) } }, - TypeKind::SpvInst { spv_inst, type_and_const_inputs } + TypeKind::SpvInst { spv_inst, type_and_const_inputs, .. } if spv_inst.opcode == wk.OpTypePointer => { match type_and_const_inputs[..] { @@ -3392,7 +3398,7 @@ impl Print for GlobalVarDecl { DeclDef::Present(GlobalVarDefBody { initializer }) => { // FIXME(eddyb) `global_varX in AS: T = Y` feels a bit wonky for // the initializer, but it's cleaner than obvious alternatives. - initializer.map(|initializer| initializer.print(printer)) + initializer.as_ref().map(|initializer| initializer.print(printer)) } }; let body = maybe_rhs.map(|rhs| pretty::Fragment::new(["= ".into(), rhs])); @@ -3416,10 +3422,174 @@ impl Print for AddrSpace { } } +impl Print for GlobalVarInit { + type Output = pretty::Fragment; + fn print(&self, printer: &Printer<'_>) -> pretty::Fragment { + match self { + GlobalVarInit::Direct(ct) => ct.print(printer), + // FIXME(eddyb) should this be recursive? + GlobalVarInit::SpvAggregate { ty, leaves } => pretty::Fragment::new([ + pretty::join_comma_sep("(", leaves.iter().map(|v| v.print(printer)), ")"), + printer.pretty_type_ascription_suffix(*ty), + ]), + GlobalVarInit::Data(data) => { + let mut next_offset = 0; + let mut parts_with_offsets = data + .read(0..data.size()) + .map(|part| { + let offset = next_offset; + next_offset += part.size().get(); + (offset, part) + }) + .filter_map(|(offset, part)| { + use crate::mem::const_data::Part; + let part = match part { + // Hiding the `undef` parts as it's arguably the default. + Part::Uninit { .. } => return None, + + // FIXME(eddyb) come up with a better printing strategy? + // (integrate at least `Uninit`, maybe `Symbolic` like MIR?) + Part::Bytes(bytes) => { + const CHUNK_SIZE: u32 = 16; + let first_chunk = { + let start_in_chunk = offset % CHUNK_SIZE; + (start_in_chunk > 0).then(|| { + let len_in_chunk = (CHUNK_SIZE - start_in_chunk) as usize; + &bytes[..len_in_chunk.min(bytes.len())] + }) + }; + let after_first_chunk = + &bytes[first_chunk.map_or(0, |chunk| chunk.len())..]; + + let is_single_line = offset / CHUNK_SIZE + == (offset + part.size().get() - 1) / CHUNK_SIZE; + let chunks = + first_chunk + .map(|chunk| (CHUNK_SIZE - chunk.len() as u32, chunk, 0)) + .into_iter() + .chain(after_first_chunk.chunks(16).map(|chunk| { + (0, chunk, CHUNK_SIZE - chunk.len() as u32) + })); + + let lines = chunks.map(|(pre_gap, chunk, post_gap)| { + let bytes = ((0..pre_gap).map(|_| None)) + .chain(chunk.iter().copied().map(Some)) + .chain((0..post_gap).map(|_| None)); + + // FIXME(eddyb) consider address line prefixes, + // using inline comments (e.g. `/* 01f0 */`). + let mut hex = String::new(); + let mut ascii = + if is_single_line { " /* " } else { " // " }.to_string(); + for byte_or_gap in bytes.clone() { + if byte_or_gap.is_none() && is_single_line { + continue; + } + if !hex.is_empty() { + hex += " "; + } + match byte_or_gap { + Some(byte) => { + write!(hex, "{byte:02x}").unwrap(); + ascii.push( + (byte < 128) + .then_some(byte as char) + .filter(|c| c.is_ascii_graphic()) + .unwrap_or('.'), + ); + } + None => { + hex += " "; + ascii += " "; + } + } + } + if is_single_line { + ascii += " */"; + } + + pretty::Fragment::new( + (!is_single_line) + .then_some(pretty::Node::ForceLineSeparation) + .into_iter() + .chain([ + printer.numeric_literal_style().apply(hex), + printer + .non_deemphasized_comment_style() + .apply(ascii), + ]), + ) + }); + + pretty::Fragment::new([ + printer.declarative_keyword_style().apply("data"), + "(".into(), + pretty::Node::InlineOrIndentedBlock(vec![ + pretty::Fragment::new(lines), + ]), + ")".into(), + ]) + } + + Part::Symbolic { size, maybe_partial_slice, value } => { + assert_eq!(maybe_partial_slice, 0..size.get()); + + value.print(printer) + } + }; + Some((offset, part)) + }); + + match (parts_with_offsets.next(), parts_with_offsets.next()) { + (Some((0, whole_part)), None) => whole_part, + (first, second) => { + let parts_with_offsets = + [first, second].into_iter().flatten().chain(parts_with_offsets); + + pretty::join_comma_sep( + "{", + parts_with_offsets + .map(|(offset, part)| { + pretty::Fragment::new([ + printer + .numeric_literal_style() + .apply(format!("{offset}")) + .into(), + " => ".into(), + part, + ]) + }) + .map(|entry| { + pretty::Fragment::new([ + pretty::Node::ForceLineSeparation.into(), + entry, + ]) + }), + "}", + ) + } + } + } + } + } +} + impl Print for FuncDecl { type Output = AttrsAndDef; fn print(&self, printer: &Printer<'_>) -> AttrsAndDef { - let Self { attrs, ret_type, params, def } = self; + let Self { attrs, ret_types, params, def } = self; + + let sig_ret = if !ret_types.is_empty() { + let mut ret_types = ret_types.iter().map(|ty| ty.print(printer)); + let ret_type = if ret_types.len() == 1 { + ret_types.next().unwrap() + } else { + pretty::join_comma_sep("(", ret_types, ")") + }; + pretty::Fragment::new([" -> ".into(), ret_type]) + } else { + pretty::Fragment::default() + }; let params = match def { DeclDef::Imported(_) => Either::Left( @@ -3431,11 +3601,7 @@ impl Print for FuncDecl { } }; - let sig = pretty::Fragment::new([ - pretty::join_comma_sep("(", params, ")"), - " -> ".into(), - ret_type.print(printer), - ]); + let sig = pretty::Fragment::new([pretty::join_comma_sep("(", params, ")"), sig_ret]); let def_without_name = match def { DeclDef::Imported(import) => { @@ -3521,20 +3687,24 @@ impl<'a> FuncAt<'a, Either> { let func = self.at(()); let vars = def_parent .either(|region| &func.at(region).def().inputs, |node| &func.at(node).def().outputs); - vars.iter().map(move |&var| func.at(var).print_with_def_parent(printer, def_parent)) + vars.iter().map(move |&var| { + func.at(var).decl().print(printer).insert_name_before_def( + func.at(var).print_name_with_def_parent(printer, def_parent), + ) + }) } } impl FuncAt<'_, Var> { - fn print_with_def_parent( + fn print_name_with_def_parent( &self, printer: &Printer<'_>, expected_def_parent: Either, ) -> pretty::Fragment { - let VarDecl { attrs, ty, def_parent, def_idx } = *self.decl(); + let VarDecl { attrs: _, ty: _, def_parent, def_idx } = *self.decl(); let var = self.position; - let mut name = Use::Var(var).print_as_def(printer); + let name = Use::Var(var).print_as_def(printer); let valid_attachment_or_err = if def_parent != expected_def_parent { Err("/* BUG (attached elsewhere) */".into()) @@ -3554,16 +3724,25 @@ impl FuncAt<'_, Var> { Ok(()) } }; - if let Err(msg) = valid_attachment_or_err { - name = - pretty::Fragment::new([printer.error_style().apply(msg).into(), " ".into(), name]); + + match valid_attachment_or_err { + Ok(()) => name, + Err(msg) => { + pretty::Fragment::new([printer.error_style().apply(msg).into(), " ".into(), name]) + } } + } +} + +impl Print for VarDecl { + type Output = AttrsAndDef; + fn print(&self, printer: &Printer<'_>) -> AttrsAndDef { + let Self { attrs, ty, def_parent: _, def_idx: _ } = *self; AttrsAndDef { attrs: attrs.print(printer), def_without_name: printer.pretty_type_ascription_suffix(ty), } - .insert_name_before_def(name) } } @@ -3761,7 +3940,7 @@ impl Print for FuncAt<'_, Node> { | DataInstKind::Mem(_) | DataInstKind::QPtr(_) | DataInstKind::ThunkBind(_) - | DataInstKind::SpvInst(_) + | DataInstKind::SpvInst(..) | DataInstKind::SpvExtInst { .. } => { // FIXME(eddyb) `outputs_header` is wastefully built even in // this case (though ideally the logic would just be shared). @@ -3779,6 +3958,21 @@ impl Print for FuncAt<'_, Node> { } } +impl Print for spv::ReaggregatedIdOperand<'_, Value> { + type Output = pretty::Fragment; + fn print(&self, printer: &Printer<'_>) -> pretty::Fragment { + match *self { + Self::Direct(v) => v.print(printer), + // FIXME(eddyb) should this be recursive? it's not on the + // output side, and we largely don't care about nesting. + Self::Aggregate { ty, leaves } => pretty::Fragment::new([ + pretty::join_comma_sep("(", leaves.iter().map(|v| v.print(printer)), ")"), + printer.pretty_type_ascription_suffix(ty), + ]), + } + } +} + impl FuncAt<'_, DataInst> { fn print_data_inst(&self, printer: &Printer<'_>) -> pretty::Fragment { let DataInstDef { attrs, kind, inputs, child_regions, outputs } = self.def(); @@ -3787,17 +3981,30 @@ impl FuncAt<'_, DataInst> { let attrs = attrs.print(printer); - // HACK(eddyb) multi-output instructions don't exist pre-disaggregate. - let output_type = if !outputs.is_empty() { - assert_eq!(outputs.len(), 1); - Some(self.at(outputs[0]).decl().ty) - } else { - None - }; + // NOTE(eddyb) the LHS types and the ascryption type don't have to line up, + // all the edge cases (likely only single-leaf aggregates) are handled + // by comparing the types being printed (and showing both if not redundant). + let mut show_outputs_lhs = !outputs.is_empty(); - let mut output_use_to_print_as_lhs = output_type.map(|_| Use::Var(outputs[0])); + let mut output_type_for_ascription_suffix = match kind { + NodeKind::Select(_) | NodeKind::Loop { .. } | NodeKind::ExitInvocation(_) => { + unreachable!() + } - let mut output_type_to_print = output_type; + DataInstKind::Scalar(_) + | DataInstKind::Vector(_) + | DataInstKind::FuncCall(_) + | DataInstKind::Mem(_) + | DataInstKind::QPtr(_) + | DataInstKind::ThunkBind(_) => None, + DataInstKind::SpvInst(_, lowering) | DataInstKind::SpvExtInst { lowering, .. } => { + lowering.disaggregated_output + } + } + .or_else(|| match outputs[..] { + [o] => Some(self.at(o).decl().ty), + _ => None, + }); // FIXME(eddyb) should this be a method on `scalar::Op` instead? let print_scalar = |op: scalar::Op| { @@ -3811,7 +4018,7 @@ impl FuncAt<'_, DataInst> { ]) }; - let def_without_type = match kind { + let def_without_types = match kind { NodeKind::Select(_) | NodeKind::Loop { .. } | NodeKind::ExitInvocation(_) => { unreachable!() } @@ -4044,38 +4251,43 @@ impl FuncAt<'_, DataInst> { ), ]), - DataInstKind::SpvInst(inst) => printer.pretty_spv_inst( + DataInstKind::SpvInst(inst, lowering) => printer.pretty_spv_inst( printer.spv_op_style(), inst.opcode, &inst.imms, - inputs.iter().map(|v| v.print(printer)), + lowering.reaggreate_inputs(inputs).map(|o| o.print(printer)), ), - &DataInstKind::SpvExtInst { ext_set, inst } => { + DataInstKind::SpvExtInst { ext_set, inst, lowering } => { let spv_spec = spv::spec::Spec::get(); let wk = &spv_spec.well_known; + // HACK(eddyb) prevent accidentally using non-reaggregated `inputs`. + let inputs = lowering.reaggreate_inputs(inputs); + // HACK(eddyb) hide `OpTypeVoid` types, as they're effectively // the default, and not meaningful *even if* the resulting // value is "used" in a kind of "untyped token" way. - output_type_to_print = output_type_to_print.filter(|&ty| { - let is_void = match &printer.cx[ty].kind { - TypeKind::SpvInst { spv_inst, .. } => spv_inst.opcode == wk.OpTypeVoid, - _ => false, - }; - !is_void - }); + output_type_for_ascription_suffix = + output_type_for_ascription_suffix.filter(|&ty| { + let is_void = match &printer.cx[ty].kind { + TypeKind::SpvInst { spv_inst, .. } => spv_inst.opcode == wk.OpTypeVoid, + _ => false, + }; + !is_void + }); // HACK(eddyb) only keep around untyped outputs if they're used. - if output_type_to_print.is_none() { - output_use_to_print_as_lhs = output_use_to_print_as_lhs.filter(|output_use| { + if output_type_for_ascription_suffix.is_none() { + show_outputs_lhs = show_outputs_lhs && { + assert_eq!(outputs.len(), 1); printer .use_styles - .get(output_use) + .get(&Use::Var(outputs[0])) .is_some_and(|style| !matches!(style, UseStyle::Inline)) - }); + }; } // FIXME(eddyb) this may get expensive, cache it? - let ext_set_name = &printer.cx[ext_set]; + let ext_set_name = &printer.cx[*ext_set]; let lowercase_ext_set_name = ext_set_name.to_ascii_lowercase(); let (ext_set_alias, known_inst_desc) = (spv_spec .get_ext_inst_set_by_lowercase_name(&lowercase_ext_set_name)) @@ -4085,7 +4297,7 @@ impl FuncAt<'_, DataInst> { .map_or((&None, None), |ext_inst_set| { // FIXME(eddyb) check that these aliases are unique // across the entire output before using them! - (&ext_inst_set.short_alias, ext_inst_set.instructions.get(&inst)) + (&ext_inst_set.short_alias, ext_inst_set.instructions.get(inst)) }); // FIXME(eddyb) extract and separate out the version? @@ -4103,8 +4315,8 @@ impl FuncAt<'_, DataInst> { Str(&'a str), U32(u32), } - let pseudo_imm_from_value = |v: Value| { - if let Value::Const(ct) = v { + let pseudo_imm_from_input = |v: spv::ReaggregatedIdOperand<'_, Value>| { + if let spv::ReaggregatedIdOperand::Direct(Value::Const(ct)) = v { match &printer.cx[ct].kind { ConstKind::Undef | ConstKind::Vector(_) @@ -4126,10 +4338,8 @@ impl FuncAt<'_, DataInst> { }; let debuginfo_with_pseudo_imm_inputs: Option> = known_inst_desc - .filter(|inst_desc| { - inst_desc.is_debuginfo && output_use_to_print_as_lhs.is_none() - }) - .and_then(|_| inputs.iter().copied().map(pseudo_imm_from_value).collect()); + .filter(|inst_desc| inst_desc.is_debuginfo && !show_outputs_lhs) + .and_then(|_| inputs.clone().map(pseudo_imm_from_input).collect()); let printing_debuginfo_as_comment = debuginfo_with_pseudo_imm_inputs.is_some(); let [spv_base_style, string_literal_style, numeric_literal_style] = @@ -4209,9 +4419,9 @@ impl FuncAt<'_, DataInst> { } else { pretty::join_comma_sep( "(", - inputs.iter().zip(operand_names).map(|(&input, name)| { + inputs.zip(operand_names).map(|(input, name)| { // HACK(eddyb) no need to wrap strings in `OpString(...)`. - let printed_input = match pseudo_imm_from_value(input) { + let printed_input = match pseudo_imm_from_input(input) { Some(PseudoImm::Str(s)) => printer.pretty_string_literal(s), _ => input.print(printer), }; @@ -4241,8 +4451,8 @@ impl FuncAt<'_, DataInst> { }; let def_without_name = pretty::Fragment::new([ - def_without_type, - output_type_to_print + def_without_types, + output_type_for_ascription_suffix .map(|ty| printer.pretty_type_ascription_suffix(ty)) .unwrap_or_default(), ]); @@ -4253,11 +4463,42 @@ impl FuncAt<'_, DataInst> { def_without_name, ]); + // NOTE(eddyb) adding a type to a single output on the LHS can only + // be needed when *a different type* was shown via type ascription. + let sole_output_needs_lhs_type = outputs.len() == 1 + && output_type_for_ascription_suffix + .is_some_and(|ty| self.at(outputs[0]).decl().ty != ty); + + let outputs_lhs = show_outputs_lhs.then(|| { + // FIXME(eddyb) `_names` convention not used elsewhere, reconsider? + let output_names = outputs.iter().map(|&output_var| { + self.at(output_var) + .print_name_with_def_parent(printer, Either::Right(self.position)) + }); + + if let [output_var] = outputs[..] + && !sole_output_needs_lhs_type + { + let output_name = output_names.exactly_one().ok().unwrap(); + return AttrsAndDef { + attrs: self.at(output_var).decl().attrs.print(printer), + def_without_name: pretty::Fragment::default(), + } + .insert_name_before_def(output_name); + } + + pretty::join_comma_sep( + "(", + output_names.zip_eq(outputs).map(|(output_name, &output_var)| { + self.at(output_var).decl().print(printer).insert_name_before_def(output_name) + }), + ")", + ) + }); + AttrsAndDef { attrs, def_without_name }.insert_name_before_def( - output_use_to_print_as_lhs - .map(|output_use| { - pretty::Fragment::new([output_use.print_as_def(printer), " = ".into()]) - }) + outputs_lhs + .map(|outputs_lhs| pretty::Fragment::new([outputs_lhs, " = ".into()])) .unwrap_or_default(), ) } diff --git a/src/qptr/lift.rs b/src/qptr/lift.rs index 63e079f2..0e3f17f8 100644 --- a/src/qptr/lift.rs +++ b/src/qptr/lift.rs @@ -1,14 +1,14 @@ //! [`QPtr`](crate::TypeKind::QPtr) lifting (e.g. to SPIR-V). use crate::func_at::{FuncAt, FuncAtMut}; -use crate::mem::{DataHapp, DataHappKind, MemAccesses, MemAttr, MemOp, shapes}; +use crate::mem::{DataHapp, DataHappKind, MemAccesses, MemAttr, MemOp, const_data, shapes}; use crate::qptr::{QPtrAttr, QPtrOp}; use crate::transform::{InnerInPlaceTransform, InnerTransform, Transformed, Transformer}; use crate::{ AddrSpace, Attr, AttrSet, AttrSetDef, Const, ConstDef, ConstKind, Context, DataInst, DataInstDef, DataInstKind, DeclDef, Diag, DiagLevel, EntityDefs, EntityOrientedDenseMap, Func, - FuncDecl, FxIndexMap, GlobalVar, GlobalVarDecl, Module, Node, NodeKind, Region, Type, TypeDef, - TypeKind, TypeOrConst, Value, Var, VarDecl, scalar, spv, + FuncDecl, FxIndexMap, GlobalVar, GlobalVarDecl, GlobalVarInit, Module, Node, NodeKind, Region, + Type, TypeDef, TypeKind, TypeOrConst, Value, Var, VarDecl, scalar, spv, vector, }; use itertools::Either; use smallvec::SmallVec; @@ -41,20 +41,232 @@ impl<'a> LiftToSpvPtrs<'a> { } pub fn lift_global_var(&self, global_var_decl: &mut GlobalVarDecl) { - match self.spv_ptr_type_and_addr_space_for_global_var(global_var_decl) { - Ok((spv_ptr_type, addr_space)) => { + // HACK(eddyb) only change any fields of `global_var_decl` on success. + let lift_result = self + .spv_pointee_type_and_addr_space_for_global_var(global_var_decl) + .and_then(|(spv_pointee_type, addr_space)| { + let maybe_init = match &mut global_var_decl.def { + DeclDef::Imported(_) => None, + DeclDef::Present(global_var_def_body) => { + global_var_def_body.initializer.as_mut() + } + }; + + let maybe_init_and_lifted_init = maybe_init + .map(|init| { + let lifted_init = self.try_lift_global_var_init(init, spv_pointee_type)?; + Ok((init, lifted_init)) + }) + .transpose()?; + global_var_decl.attrs = self.strip_mem_accesses_attr(global_var_decl.attrs); - global_var_decl.type_of_ptr_to = spv_ptr_type; + global_var_decl.type_of_ptr_to = self.spv_ptr_type(addr_space, spv_pointee_type); global_var_decl.addr_space = addr_space; global_var_decl.shape = None; - } + + if let Some((init, lifted_init)) = maybe_init_and_lifted_init { + *init = lifted_init; + } + + Ok(()) + }); + match lift_result { + Ok(()) => {} Err(LiftError(e)) => { global_var_decl.attrs.push_diag(&self.cx, e); } } - // FIXME(eddyb) if globals have initializers pointing at other globals, - // here is where they might get fixed up, but that usage is illegal so - // likely needs to get legalized on `qptr`s, before here. + } + fn try_lift_global_var_init( + &self, + global_var_init: &GlobalVarInit, + ty: Type, + ) -> Result { + let data = match global_var_init { + &GlobalVarInit::Direct(ct) => return Ok(GlobalVarInit::Direct(ct)), + + // FIXME(eddyb) there is no need for this to clone, but also this + // should be rare (only an error case?). + GlobalVarInit::SpvAggregate { .. } => { + return Ok(global_var_init.clone()); + } + + GlobalVarInit::Data(data) => data, + }; + let layout = match self.layout_of(ty)? { + // FIXME(eddyb) consider bad interactions with "interface blocks"? + TypeLayout::Handle(_) | TypeLayout::HandleArray(..) => { + return Err(LiftError(Diag::bug(["handles should not have initializers".into()]))); + } + TypeLayout::Concrete(layout) => layout, + }; + + // Whether `candidate_layout` is an aggregate (to recurse into). + let is_aggregate = |candidate_layout: &MemTypeLayout| { + matches!( + &self.cx[candidate_layout.original_type].kind, + TypeKind::SpvInst { value_lowering: spv::ValueLowering::Disaggregate(_), .. } + ) + }; + + let mut leaf_values = SmallVec::new(); + let result = layout.deeply_flatten_if(0, &is_aggregate, &mut |leaf_offset, leaf| { + let leaf_offset = u32::try_from(leaf_offset).ok().ok_or_else(|| { + LayoutError(Diag::bug( + [format!("negative layout leaf offset {leaf_offset}").into()], + )) + })?; + + let leaf_size = NonZeroU32::new(leaf.mem_layout.fixed_base.size).ok_or_else(|| { + LayoutError(Diag::bug([ + format!("zero-sized initializer leaf at offset {leaf_offset}, with type `") + .into(), + leaf.original_type.into(), + "`".into(), + ])) + })?; + + // FIXME(eddyb) avoid out-of-bounds panics with malformed layouts + // (and/or guarantee certain invariants in layouts that didn't error). + let mut leaf_parts = data.read(leaf_offset..(leaf_offset + leaf_size.get())); + + let leaf_part = leaf_parts.next().unwrap(); + let is_single_whole_part = leaf_parts.next().is_none() + && match &leaf_part { + const_data::Part::Uninit { .. } | const_data::Part::Bytes(_) => true, + const_data::Part::Symbolic { size, maybe_partial_slice, value: _ } => { + maybe_partial_slice == &(0..size.get()) + } + }; + if !is_single_whole_part { + // FIXME(eddyb) needs a better error (or even partial support?). + return Err(LayoutError(Diag::bug([ + format!("NYI: initializer leaf at offset {leaf_offset}, with type `").into(), + leaf.original_type.into(), + "`, straddles an undef and/or symbolic boundary".into(), + ]))); + } + + let leaf_value = match leaf_part { + const_data::Part::Uninit { .. } => self.cx.intern(ConstDef { + attrs: Default::default(), + ty: leaf.original_type, + kind: ConstKind::Undef, + }), + const_data::Part::Bytes(bytes) => { + let mut total_read_scalar_size = 0; + let mut read_next_scalar = |leaf_scalar_type: scalar::Type| { + let byte_len = match leaf_scalar_type { + scalar::Type::Bool => { + self.layout_cache.config.abstract_bool_size_align.0 + } + scalar::Type::SInt(_) + | scalar::Type::UInt(_) + | scalar::Type::Float(_) => { + let bit_width = leaf_scalar_type.bit_width(); + assert_eq!(bit_width % 8, 0); + bit_width / 8 + } + } as usize; + + let mut copied_bytes = [0; 16]; + copied_bytes[..byte_len] + .copy_from_slice(&bytes[total_read_scalar_size..][..byte_len]); + if self.layout_cache.config.is_big_endian { + copied_bytes[..byte_len].reverse(); + } + let bits = u128::from_le_bytes(copied_bytes); + + let leaf_scalar = scalar::Const::try_from_bits(leaf_scalar_type, bits) + .ok_or_else(|| { + // HACK(eddyb) only `bool` should be able to fail this, + // everything else uses whole bytes (enforced above). + assert!(matches!(leaf_scalar_type, scalar::Type::Bool)); + // FIXME(eddyb) needs a better error, esp. for `bool`. + LayoutError(Diag::bug([ + format!( + "initializer leaf at offset {}, with type `", + leaf_offset + (total_read_scalar_size as u32) + ) + .into(), + leaf.original_type.into(), + format!("`, has invalid value {bits}").into(), + ])) + })?; + + total_read_scalar_size += byte_len; + + Ok(leaf_scalar) + }; + + let leaf_const_kind = match self.cx[leaf.original_type].kind { + TypeKind::Scalar(ty) => read_next_scalar(ty)?.into(), + TypeKind::Vector(ty) => { + // HACK(eddyb) buffering elems due to `Result`. + let elems: SmallVec<[_; 4]> = (0..ty.elem_count.get()) + .map(|_| read_next_scalar(ty.elem)) + .collect::>()?; + vector::Const::from_elems(ty, elems).into() + } + _ => { + return Err(LayoutError(Diag::bug([ + format!( + "NYI: initializer leaf at offset {leaf_offset}, with type `" + ) + .into(), + leaf.original_type.into(), + format!("`, made of bytes ({bytes:?})").into(), + ]))); + } + }; + + assert_eq!(total_read_scalar_size, bytes.len()); + + self.cx.intern(ConstDef { + attrs: Default::default(), + ty: leaf.original_type, + kind: leaf_const_kind, + }) + } + const_data::Part::Symbolic { value, .. } => value, + }; + + let expected_ty = leaf.original_type; + let found_ty = self.cx[leaf_value].ty; + if expected_ty != found_ty { + return Err(LayoutError(Diag::bug([ + "initializer leaf type mismatch: expected `".into(), + expected_ty.into(), + "`, found `".into(), + found_ty.into(), + "` typed value `".into(), + leaf_value.into(), + "`".into(), + ]))); + } + + leaf_values.push(leaf_value); + + Ok(()) + }); + result.map_err(|LayoutError(e)| LiftError(e))?; + + let expected_leaf_count = self.cx[layout.original_type].disaggregated_leaf_count(); + let found_leaf_count = leaf_values.len(); + if expected_leaf_count != found_leaf_count { + return Err(LiftError(Diag::bug([format!( + "initializer leaf count mismatch: expected {expected_leaf_count} leaves, \ + found {found_leaf_count} leaves" + ) + .into()]))); + } + + Ok(if is_aggregate(&layout) { + GlobalVarInit::SpvAggregate { ty, leaves: leaf_values } + } else { + assert_eq!(leaf_values.len(), 1); + GlobalVarInit::Direct(leaf_values.pop().unwrap()) + }) } pub fn lift_all_funcs(&self, module: &mut Module, funcs: impl IntoIterator) { @@ -97,7 +309,7 @@ impl<'a> LiftToSpvPtrs<'a> { }) } - fn spv_ptr_type_and_addr_space_for_global_var( + fn spv_pointee_type_and_addr_space_for_global_var( &self, global_var_decl: &GlobalVarDecl, ) -> Result<(Type, AddrSpace), LiftError> { @@ -107,48 +319,25 @@ impl<'a> LiftToSpvPtrs<'a> { let shape = global_var_decl.shape.ok_or_else(|| LiftError(Diag::bug(["missing shape".into()])))?; - let (storage_class, pointee_type) = match (global_var_decl.addr_space, shape) { - (AddrSpace::Handles, shapes::GlobalVarShape::Handles { handle, fixed_count }) => { - let (storage_class, handle_type) = match handle { - shapes::Handle::Opaque(ty) => { - if self.pointee_type_for_accesses(mem_accesses)? != ty { - return Err(LiftError(Diag::bug([ - "mismatched opaque handle types in `mem.accesses` vs `shape`" - .into(), - ]))); - } - (wk.UniformConstant, ty) - } - // FIXME(eddyb) validate accesses against `buf` and/or expand - // the type to make sure it has the right size. - shapes::Handle::Buffer(AddrSpace::SpvStorageClass(storage_class), _buf) => { - (storage_class, self.pointee_type_for_accesses(mem_accesses)?) + let pointee_type = self.pointee_type_for_shape_and_accesses(shape, mem_accesses)?; + let storage_class = match (global_var_decl.addr_space, shape) { + (AddrSpace::Handles, shapes::GlobalVarShape::Handles { handle, fixed_count: _ }) => { + match handle { + shapes::Handle::Opaque(_) => wk.UniformConstant, + shapes::Handle::Buffer(AddrSpace::SpvStorageClass(storage_class), _) => { + storage_class } shapes::Handle::Buffer(AddrSpace::Handles, _) => { return Err(LiftError(Diag::bug([ "invalid `AddrSpace::Handles` in `Handle::Buffer`".into(), ]))); } - }; - ( - storage_class, - if fixed_count == Some(NonZeroU32::new(1).unwrap()) { - handle_type - } else { - self.spv_op_type_array(handle_type, fixed_count.map(|c| c.get()), None)? - }, - ) + } } - // FIXME(eddyb) validate accesses against `layout` and/or expand - // the type to make sure it has the right size. - ( - AddrSpace::SpvStorageClass(storage_class), - shapes::GlobalVarShape::UntypedData(_layout), - ) => (storage_class, self.pointee_type_for_accesses(mem_accesses)?), ( AddrSpace::SpvStorageClass(storage_class), - shapes::GlobalVarShape::TypedInterface(ty), - ) => (storage_class, ty), + shapes::GlobalVarShape::UntypedData(_) | shapes::GlobalVarShape::TypedInterface(_), + ) => storage_class, ( AddrSpace::Handles, @@ -159,7 +348,7 @@ impl<'a> LiftToSpvPtrs<'a> { } }; let addr_space = AddrSpace::SpvStorageClass(storage_class); - Ok((self.spv_ptr_type(addr_space, pointee_type), addr_space)) + Ok((pointee_type, addr_space)) } /// Returns `Some` iff `ty` is a SPIR-V `OpTypePointer`. @@ -167,7 +356,7 @@ impl<'a> LiftToSpvPtrs<'a> { // FIXME(eddyb) deduplicate with `qptr::lower`. fn as_spv_ptr_type(&self, ty: Type) -> Option<(AddrSpace, Type)> { match &self.cx[ty].kind { - TypeKind::SpvInst { spv_inst, type_and_const_inputs } + TypeKind::SpvInst { spv_inst, type_and_const_inputs, .. } if spv_inst.opcode == self.wk.OpTypePointer => { let sc = match spv_inst.imms[..] { @@ -191,60 +380,130 @@ impl<'a> LiftToSpvPtrs<'a> { AddrSpace::Handles => unreachable!(), AddrSpace::SpvStorageClass(storage_class) => storage_class, }; - self.cx.intern(TypeKind::SpvInst { - spv_inst: spv::Inst { + self.cx.intern( + spv::Inst { opcode: wk.OpTypePointer, imms: [spv::Imm::Short(wk.StorageClass, storage_class)].into_iter().collect(), - }, - type_and_const_inputs: [TypeOrConst::Type(pointee_type)].into_iter().collect(), - }) + } + .into_canonical_type_with( + &self.cx, + [TypeOrConst::Type(pointee_type)].into_iter().collect(), + ), + ) } - fn pointee_type_for_accesses(&self, accesses: &MemAccesses) -> Result { + fn pointee_type_for_shape_and_accesses( + &self, + shape: shapes::GlobalVarShape, + accesses: &MemAccesses, + ) -> Result { let wk = self.wk; - match accesses { - &MemAccesses::Handles(shapes::Handle::Opaque(ty)) => Ok(ty), - MemAccesses::Handles(shapes::Handle::Buffer(_, data_happ)) => { - let attr_spv_decorate_block = Attr::SpvAnnotation(spv::Inst { - opcode: wk.OpDecorate, - imms: [spv::Imm::Short(wk.Decoration, wk.Block)].into_iter().collect(), - }); - match &data_happ.kind { - DataHappKind::Dead => self.spv_op_type_struct([], [attr_spv_decorate_block]), - DataHappKind::Disjoint(fields) => self.spv_op_type_struct( - fields.iter().map(|(&field_offset, field_happ)| { - Ok((field_offset, self.pointee_type_for_data_happ(field_happ)?)) - }), - [attr_spv_decorate_block], - ), - DataHappKind::StrictlyTyped(_) - | DataHappKind::Direct(_) - | DataHappKind::Repeated { .. } => self.spv_op_type_struct( - [Ok((0, self.pointee_type_for_data_happ(data_happ)?))], - [attr_spv_decorate_block], - ), + match (shape, accesses) { + ( + shapes::GlobalVarShape::Handles { handle, fixed_count }, + MemAccesses::Handles(handle_accesses), + ) => { + let handle_type = match (handle, handle_accesses) { + (shapes::Handle::Opaque(ty), &shapes::Handle::Opaque(access_ty)) => { + if access_ty != ty { + return Err(LiftError(Diag::bug([ + "mismatched opaque handle types in `mem.accesses` vs `shape`" + .into(), + ]))); + } + ty + } + (shapes::Handle::Buffer(_, buf), shapes::Handle::Buffer(_, data_happ)) => { + let max_size_allowed_by_shape = + buf.dyn_unit_stride.is_none().then_some(buf.fixed_base.size); + let attr_spv_decorate_block = Attr::SpvAnnotation(spv::Inst { + opcode: wk.OpDecorate, + imms: [spv::Imm::Short(wk.Decoration, wk.Block)].into_iter().collect(), + }); + match &data_happ.kind { + DataHappKind::Dead => { + self.spv_op_type_struct([], [attr_spv_decorate_block])? + } + DataHappKind::Disjoint(fields) => self.spv_op_type_struct( + fields.iter().map(|(&field_offset, field_happ)| { + Ok(( + field_offset, + self.pointee_type_for_data_happ( + field_happ, + max_size_allowed_by_shape + .and_then(|max| max.checked_sub(field_offset)), + )?, + )) + }), + [attr_spv_decorate_block], + )?, + DataHappKind::StrictlyTyped(_) + | DataHappKind::Direct(_) + | DataHappKind::Repeated { .. } => self.spv_op_type_struct( + [Ok(( + 0, + self.pointee_type_for_data_happ( + data_happ, + max_size_allowed_by_shape, + )?, + ))], + [attr_spv_decorate_block], + )?, + } + } + _ => { + return Err(LiftError(Diag::bug([ + "mismatched `mem.accesses` and `shape`".into(), + ]))); + } + }; + if fixed_count == Some(NonZeroU32::new(1).unwrap()) { + Ok(handle_type) + } else { + self.spv_op_type_array(handle_type, fixed_count.map(|c| c.get()), None) } } - MemAccesses::Data(happ) => self.pointee_type_for_data_happ(happ), + (shapes::GlobalVarShape::UntypedData(layout), MemAccesses::Data(happ)) => { + self.pointee_type_for_data_happ(happ, Some(layout.size)) + } + + // FIXME(eddyb) validate against accesses? (maybe in `mem::analyze`?) + (shapes::GlobalVarShape::TypedInterface(ty), _) => Ok(ty), + + _ => Err(LiftError(Diag::bug(["mismatched `mem.accesses` and `shape`".into()]))), } } - fn pointee_type_for_data_happ(&self, happ: &DataHapp) -> Result { + fn pointee_type_for_data_happ( + &self, + happ: &DataHapp, + // HACK(eddyb) `mem::analyze` should be merging shape and accesses itself. + // FIXME(eddyb) this isn't actually used to validate anything, only as + // a fallback for now (i.e. to avoid spurious `OpTypeRuntimeArray`s). + max_size_allowed_by_shape: Option, + ) -> Result { match &happ.kind { DataHappKind::Dead => self.spv_op_type_struct([], []), &DataHappKind::StrictlyTyped(ty) | &DataHappKind::Direct(ty) => Ok(ty), DataHappKind::Disjoint(fields) => self.spv_op_type_struct( fields.iter().map(|(&field_offset, field_happ)| { - Ok((field_offset, self.pointee_type_for_data_happ(field_happ)?)) + Ok(( + field_offset, + self.pointee_type_for_data_happ( + field_happ, + max_size_allowed_by_shape.and_then(|max| max.checked_sub(field_offset)), + )?, + )) }), [], ), DataHappKind::Repeated { element, stride } => { - let element_type = self.pointee_type_for_data_happ(element)?; + let element_type = self.pointee_type_for_data_happ(element, None)?; let fixed_len = happ .max_size + .or(max_size_allowed_by_shape) .map(|size| { if !size.is_multiple_of(stride.get()) { return Err(LiftError(Diag::bug([format!( @@ -288,15 +547,18 @@ impl<'a> LiftToSpvPtrs<'a> { Ok(self.cx.intern(TypeDef { attrs: stride_attrs.unwrap_or_default(), - kind: TypeKind::SpvInst { - spv_inst: spv_opcode.into(), - type_and_const_inputs: [TypeOrConst::Type(element_type)] - .into_iter() - .chain(fixed_len.map(|len| { + kind: spv::Inst::from(spv_opcode).into_canonical_type_with( + &self.cx, + [ + Some(TypeOrConst::Type(element_type)), + fixed_len.map(|len| { TypeOrConst::Const(self.cx.intern(scalar::Const::from_u32(len))) - })) - .collect(), - }, + }), + ] + .into_iter() + .flatten() + .collect(), + ), })) } @@ -328,7 +590,8 @@ impl<'a> LiftToSpvPtrs<'a> { attrs.attrs.extend(extra_attrs); Ok(self.cx.intern(TypeDef { attrs: self.cx.intern(attrs), - kind: TypeKind::SpvInst { spv_inst: wk.OpTypeStruct.into(), type_and_const_inputs }, + kind: spv::Inst::from(wk.OpTypeStruct) + .into_canonical_type_with(&self.cx, type_and_const_inputs), })) } @@ -456,19 +719,25 @@ impl LiftToSpvPtrInstsInFunc<'_> { return Ok(Transformed::Unchanged); } - DataInstKind::Mem(MemOp::FuncLocalVar(_mem_layout)) => { + &DataInstKind::Mem(MemOp::FuncLocalVar(mem_layout)) => { let output_mem_accesses = self .lifter .require_mem_accesses_attr(func.at(data_inst_def.outputs[0]).decl().attrs)?; - // FIXME(eddyb) validate against `mem_layout`! - let pointee_type = self.lifter.pointee_type_for_accesses(output_mem_accesses)?; + // HACK(eddyb) reusing the same functionality meant for globals. + let pointee_type = self.lifter.pointee_type_for_shape_and_accesses( + shapes::GlobalVarShape::UntypedData(mem_layout), + output_mem_accesses, + )?; let mut data_inst_def = data_inst_def.clone(); - data_inst_def.kind = DataInstKind::SpvInst(spv::Inst { - opcode: wk.OpVariable, - imms: [spv::Imm::Short(wk.StorageClass, wk.Function)].into_iter().collect(), - }); + data_inst_def.kind = DataInstKind::SpvInst( + spv::Inst { + opcode: wk.OpVariable, + imms: [spv::Imm::Short(wk.StorageClass, wk.Function)].into_iter().collect(), + }, + spv::InstLowering::default(), + ); let output_decl = func_at_data_inst.reborrow().at(data_inst_def.outputs[0]).decl(); output_decl.attrs = self.lifter.strip_mem_accesses_attr(output_decl.attrs); output_decl.ty = @@ -496,7 +765,8 @@ impl LiftToSpvPtrInstsInFunc<'_> { }; let mut data_inst_def = data_inst_def.clone(); - data_inst_def.kind = DataInstKind::SpvInst(wk.OpAccessChain.into()); + data_inst_def.kind = + DataInstKind::SpvInst(wk.OpAccessChain.into(), spv::InstLowering::default()); let output_decl = func_at_data_inst.reborrow().at(data_inst_def.outputs[0]).decl(); output_decl.attrs = self.lifter.strip_mem_accesses_attr(output_decl.attrs); output_decl.ty = self.lifter.spv_ptr_type(addr_space, handle_type); @@ -565,10 +835,15 @@ impl LiftToSpvPtrInstsInFunc<'_> { }; DataInstDef { - kind: DataInstKind::SpvInst(spv::Inst { - opcode: wk.OpArrayLength, - imms: [spv::Imm::Short(wk.LiteralInteger, field_idx)].into_iter().collect(), - }), + kind: DataInstKind::SpvInst( + spv::Inst { + opcode: wk.OpArrayLength, + imms: [spv::Imm::Short(wk.LiteralInteger, field_idx)] + .into_iter() + .collect(), + }, + spv::InstLowering::default(), + ), ..data_inst_def.clone() } } @@ -688,7 +963,8 @@ impl LiftToSpvPtrInstsInFunc<'_> { } let mut data_inst_def = data_inst_def; - data_inst_def.kind = DataInstKind::SpvInst(wk.OpAccessChain.into()); + data_inst_def.kind = + DataInstKind::SpvInst(wk.OpAccessChain.into(), spv::InstLowering::default()); data_inst_def.inputs = [array_ptr, array_index].into_iter().collect(); let output_decl = func_at_data_inst.reborrow().at(data_inst_def.outputs[0]).decl(); output_decl.attrs = self.lifter.strip_mem_accesses_attr(output_decl.attrs); @@ -772,7 +1048,9 @@ impl LiftToSpvPtrInstsInFunc<'_> { return Ok(Transformed::Unchanged); } - DataInstKind::SpvInst(_) | DataInstKind::SpvExtInst { .. } => { + DataInstKind::SpvInst(_, lowering) | DataInstKind::SpvExtInst { lowering, .. } => { + let lowering_disaggregated_output = lowering.disaggregated_output; + let mut changed_data_inst_def = None; for attr in &cx[data_inst_def.attrs].attrs { @@ -844,6 +1122,9 @@ impl LiftToSpvPtrInstsInFunc<'_> { data_inst_def.inputs[input_idx] = adjusted_ptr; } QPtrAttr::FromSpvPtrOutput { addr_space, pointee } => { + assert!(lowering_disaggregated_output.is_none()); + + assert_eq!(data_inst_def.outputs.len(), 1); let output_decl = func_at_data_inst.reborrow().at(data_inst_def.outputs[0]).decl(); output_decl.ty = self.lifter.spv_ptr_type(addr_space.0, pointee.0); @@ -893,7 +1174,10 @@ impl LiftToSpvPtrInstsInFunc<'_> { func.reborrow(), DataInstDef { attrs: Default::default(), - kind: DataInstKind::SpvInst(wk.OpAccessChain.into()), + kind: DataInstKind::SpvInst( + wk.OpAccessChain.into(), + spv::InstLowering::default(), + ), inputs: access_chain_inputs, child_regions: [].into_iter().collect(), outputs: [].into_iter().collect(), diff --git a/src/qptr/lower.rs b/src/qptr/lower.rs index 00f35a97..a3ef0301 100644 --- a/src/qptr/lower.rs +++ b/src/qptr/lower.rs @@ -1,15 +1,16 @@ //! [`QPtr`](crate::TypeKind::QPtr) lowering (e.g. from SPIR-V). use crate::func_at::FuncAtMut; -use crate::mem::{MemOp, shapes}; +use crate::mem::{MemOp, const_data, shapes}; use crate::qptr::{QPtrAttr, QPtrOp}; use crate::transform::{InnerInPlaceTransform, Transformed, Transformer}; use crate::{ - AddrSpace, AttrSetDef, Const, ConstDef, ConstKind, Context, DataInst, DataInstDef, - DataInstKind, DeclDef, Diag, EntityOrientedDenseMap, FuncDecl, GlobalVarDecl, Node, NodeDef, - NodeKind, OrdAssertEq, Region, Type, TypeKind, TypeOrConst, Value, Var, VarDecl, VarKind, spv, + AddrSpace, AttrSet, AttrSetDef, Const, ConstDef, ConstKind, Context, DataInst, DataInstDef, + DataInstKind, DeclDef, Diag, EntityOrientedDenseMap, FuncDecl, GlobalVarDecl, GlobalVarInit, + Node, NodeDef, NodeKind, OrdAssertEq, Region, Type, TypeKind, TypeOrConst, Value, Var, VarDecl, + VarKind, scalar, spv, }; -use itertools::{Either, Itertools as _}; +use itertools::Either; use rustc_hash::FxHashMap; use smallvec::SmallVec; use std::cell::Cell; @@ -53,7 +54,29 @@ impl<'a> LowerFromSpvPtrs<'a> { shapes::Handle::Buffer(addr_space, buf.mem_layout) } }; - let mut shape_result = self.layout_of(pointee_type).and_then(|layout| { + let addr_space_requires_typed_interface = match global_var_decl.addr_space { + // These SPIR-V Storage Classes are defined to require + // exact types, either because they're `BuiltIn`s, or + // for "interface matching" between pipeline stages. + AddrSpace::SpvStorageClass(sc) => [ + wk.Input, + wk.Output, + wk.IncomingRayPayloadKHR, + wk.IncomingCallableDataKHR, + wk.HitAttributeKHR, + wk.RayPayloadKHR, + wk.CallableDataKHR, + ] + .contains(&sc), + + AddrSpace::Handles => false, + }; + let layout_result = self.layout_of(pointee_type); + let concrete_mem_layout = layout_result.as_ref().ok().and_then(|layout| match layout { + TypeLayout::Handle(_) | TypeLayout::HandleArray(..) => None, + TypeLayout::Concrete(concrete) => Some(concrete.mem_layout), + }); + let mut shape_result = layout_result.and_then(|layout| { Ok(match layout { TypeLayout::Handle(handle) => shapes::GlobalVarShape::Handles { handle: handle_layout_to_handle(handle), @@ -71,26 +94,10 @@ impl<'a> LowerFromSpvPtrs<'a> { "`".into(), ]))); } - match global_var_decl.addr_space { - // These SPIR-V Storage Classes are defined to require - // exact types, either because they're `BuiltIn`s, or - // for "interface matching" between pipeline stages. - AddrSpace::SpvStorageClass(sc) - if [ - wk.Input, - wk.Output, - wk.IncomingRayPayloadKHR, - wk.IncomingCallableDataKHR, - wk.HitAttributeKHR, - wk.RayPayloadKHR, - wk.CallableDataKHR, - ] - .contains(&sc) => - { - shapes::GlobalVarShape::TypedInterface(pointee_type) - } - - _ => shapes::GlobalVarShape::UntypedData(concrete.mem_layout.fixed_base), + if addr_space_requires_typed_interface { + shapes::GlobalVarShape::TypedInterface(pointee_type) + } else { + shapes::GlobalVarShape::UntypedData(concrete.mem_layout.fixed_base) } } }) @@ -126,19 +133,240 @@ impl<'a> LowerFromSpvPtrs<'a> { global_var_decl.addr_space = AddrSpace::Handles; } } + + // HACK(eddyb) the interactions with `shape_result` are a bit too ad-hoc, + // but they help testing for now (until Rust-GPU is more accurate). + if let DeclDef::Present(global_var_def_body) = &mut global_var_decl.def { + let lowered_init = global_var_def_body.initializer.as_ref().and_then(|init| { + self.try_lower_global_var_init(init) + .map_err(|LowerError(e)| { + if shape_result.is_ok() { + shape_result = Err(LowerError(e)); + } else { + global_var_decl.attrs.push_diag(&self.cx, e); + } + }) + .ok() + }); + if let Some(init) = lowered_init { + // HACK(eddyb) recover the shape from the initializer. + match (&shape_result, concrete_mem_layout, &init) { + (Err(_), Some(mem_layout), GlobalVarInit::Data(data)) + if !addr_space_requires_typed_interface + && mem_layout.dyn_unit_stride.is_some() + && mem_layout.fixed_base.size <= data.size() => + { + let mut fixed_layout = mem_layout.fixed_base; + fixed_layout.size = data.size(); + shape_result = Ok(shapes::GlobalVarShape::UntypedData(fixed_layout)); + } + _ => {} + } + if shape_result.is_ok() { + global_var_def_body.initializer = Some(init); + } + } + } + + // HACK(eddyb) in case anything goes wrong, we want to keep `OpTypePointer`. + let original_type_of_ptr_to = global_var_decl.type_of_ptr_to; + + EraseSpvPtrs { lowerer: self }.in_place_transform_global_var_decl(global_var_decl); + match shape_result { Ok(shape) => { global_var_decl.shape = Some(shape); - - // HACK(eddyb) this should handle shallow `QPtr` in the initializer, but - // typed initializers should be replaced with miri/linker-style ones. - EraseSpvPtrs { lowerer: self }.in_place_transform_global_var_decl(global_var_decl); } Err(LowerError(e)) => { global_var_decl.attrs.push_diag(&self.cx, e); + + // HACK(eddyb) effectively undoes `EraseSpvPtrs` for one field. + global_var_decl.type_of_ptr_to = original_type_of_ptr_to; } } } + fn try_lower_global_var_init( + &self, + global_var_init: &GlobalVarInit, + ) -> Result { + let (aggregate_type, aggregate_leaves) = match global_var_init { + &GlobalVarInit::Direct(ct) => return Ok(GlobalVarInit::Direct(ct)), + + GlobalVarInit::Data(_) => { + return Err(LowerError(Diag::bug([ + "unexpected `GlobalVarInit::Data` (already lowered?)".into(), + ]))); + } + + GlobalVarInit::SpvAggregate { ty, leaves } => (*ty, leaves), + }; + let aggregate_layout = match self.layout_of(aggregate_type)? { + // FIXME(eddyb) consider bad interactions with "interface blocks"? + TypeLayout::Handle(_) | TypeLayout::HandleArray(..) => { + return Err(LowerError(Diag::bug(["handles are not aggregates".into()]))); + } + TypeLayout::Concrete(layout) => layout, + }; + + let mut leaf_values = aggregate_leaves.iter().copied(); + let mut data = const_data::ConstData::new(aggregate_layout.mem_layout.fixed_base.size); + let result = aggregate_layout.deeply_flatten_if( + 0, + // Whether `candidate_layout` is an aggregate (to recurse into). + &|candidate_layout| { + matches!( + &self.cx[candidate_layout.original_type].kind, + TypeKind::SpvInst { value_lowering: spv::ValueLowering::Disaggregate(_), .. } + ) + }, + &mut |leaf_offset, leaf| { + let leaf_offset = u32::try_from(leaf_offset).ok().ok_or_else(|| { + LayoutError(Diag::bug([format!( + "negative initializer leaf offset {leaf_offset}" + ) + .into()])) + })?; + + let leaf_value = leaf_values.next().ok_or_else(|| { + LayoutError(Diag::bug(["fewer initializer leaves than layout".into()])) + })?; + let leaf_value_def = &self.cx[leaf_value]; + + // FIXME(eddyb) should this compare only size/shape? + let expected_ty = leaf.original_type; + let found_ty = leaf_value_def.ty; + if expected_ty != found_ty { + return Err(LayoutError(Diag::bug([ + "initializer leaf type mismatch: expected `".into(), + expected_ty.into(), + "`, found `".into(), + found_ty.into(), + "` typed value `".into(), + leaf_value.into(), + "`".into(), + ]))); + } + + let leaf_size = + NonZeroU32::new(leaf.mem_layout.fixed_base.size).ok_or_else(|| { + LayoutError(Diag::bug([ + format!( + "zero-sized initializer leaf at offset {leaf_offset}, with value `" + ) + .into(), + leaf_value.into(), + "`".into(), + ])) + })?; + + self.try_write_to_const_data_at(&mut data, leaf_offset, leaf_size, leaf_value) + }, + ); + result.map_err(|LayoutError(e)| LowerError(e))?; + + if leaf_values.next().is_some() { + return Err(LowerError(Diag::bug(["more initializer leaves than layout".into()]))); + } + + Ok(GlobalVarInit::Data(data)) + } + // FIXME(eddyb) move this to a more general `ConstData` helper. + fn try_write_to_const_data_at( + &self, + data: &mut const_data::ConstData, + offset: u32, + size: NonZeroU32, + ct: Const, + ) -> Result<(), LayoutError> { + // HACK(eddyb) strip bitcasts as long as the input and output size match. + let (ct, ct_def) = { + let (mut ct, mut ct_def) = (ct, &self.cx[ct]); + while let ConstKind::SpvInst { spv_inst_and_const_inputs } = &ct_def.kind { + let (spv_inst, const_inputs) = &**spv_inst_and_const_inputs; + + if let (&[input], &[spv::Imm::Short(_, op)]) = + (&const_inputs[..], &spv_inst.imms[..]) + && spv_inst.opcode == self.wk.OpSpecConstantOp + && op == u32::from(self.wk.OpBitcast.as_u16()) + { + let input_def = &self.cx[input]; + let input_size = + self.layout_cache.layout_of(input_def.ty).ok().and_then(|layout| { + match layout { + TypeLayout::Concrete(layout) + if layout.mem_layout.dyn_unit_stride.is_none() => + { + NonZeroU32::new(layout.mem_layout.fixed_base.size) + } + _ => None, + } + }); + if input_size == Some(size) { + (ct, ct_def) = (input, input_def); + continue; + } + } + break; + } + (ct, ct_def) + }; + + let err_to_diag = |err| { + let const_data::PartialSymbolicOverlap { offsets } = err; + LayoutError(Diag::bug([ + format!("initializer leaf at offset {offset}, with value `").into(), + ct.into(), + format!("`, overlaps with leaf at offsets {offsets:?} (invalid layout?)").into(), + ])) + }; + + let mut total_written_range = offset..offset; + + // HACK(eddyb) helper shared by `Scalar` and `Vector`. + let mut write_next_scalar = |leaf_scalar: scalar::Const| { + // FIXME(eddyb) try harder to avoid panicking due to out-of-bounds + // offsets caused by e.g. malformed layouts (and/or guarantee certain + // invariants for types that didn't error during layout computation). + let written_range = data + .write_scalar(total_written_range.end, leaf_scalar, self.layout_cache.config) + .map_err(err_to_diag)?; + total_written_range.end = written_range.end; + Ok(()) + }; + + match &ct_def.kind { + // HACK(eddyb) Rust-GPU still uses `undef` + // w/ custom attributes for some error cases, + // so care must be taken until that's deemed + // incorrect (if at all). + // FIXME(eddyb) handle this elsewhere, too. + ConstKind::Undef if ct_def.attrs == AttrSet::default() => { + return Ok(()); + } + + &ConstKind::Scalar(leaf_scalar) => { + write_next_scalar(leaf_scalar)?; + } + + ConstKind::Vector(leaf_vector) => { + for elem in leaf_vector.elems() { + write_next_scalar(elem)?; + } + } + + // FIXME(eddyb) try harder to avoid panicking due to out-of-bounds + // offsets caused by e.g. malformed layouts (and/or guarantee certain + // invariants for types that didn't error during layout computation). + _ => { + data.write_symbolic(offset, size, ct).map_err(err_to_diag)?; + total_written_range.end += size.get(); + } + } + + assert_eq!(total_written_range, offset..(offset + size.get())); + + Ok(()) + } pub fn lower_func(&self, func_decl: &mut FuncDecl) { // HACK(eddyb) two-step to avoid having to record the original types @@ -165,7 +393,7 @@ impl<'a> LowerFromSpvPtrs<'a> { // (!!! may cause bad interactions with storage class inference `Generic` abuse) fn as_spv_ptr_type(&self, ty: Type) -> Option<(AddrSpace, Type)> { match &self.cx[ty].kind { - TypeKind::SpvInst { spv_inst, type_and_const_inputs } + TypeKind::SpvInst { spv_inst, type_and_const_inputs, .. } if spv_inst.opcode == self.wk.OpTypePointer => { let sc = match spv_inst.imms[..] { @@ -421,17 +649,22 @@ impl LowerFromSpvPtrInstsInFunc<'_> { let attrs = data_inst_def.attrs; - let spv_inst = match &data_inst_def.kind { - DataInstKind::SpvInst(spv_inst) => spv_inst, + let (spv_inst, spv_inst_lowering) = match &data_inst_def.kind { + DataInstKind::SpvInst(spv_inst, lowering) => (spv_inst, lowering), _ => return Ok(Transformed::Unchanged), }; // FIXME(eddyb) wasteful clone? (needed due to borrowing issues) let outputs = data_inst_def.outputs.clone(); + // HACK(eddyb) this is for easy bailing/asserting. + let disaggregated_output_or_inputs_during_lowering = + spv_inst_lowering.disaggregated_output.is_some() + || !spv_inst_lowering.disaggregated_inputs.is_empty(); + // Flatten `QPtrOp::Offset`s behind `ptr` into a base pointer and offset. let flatten_offsets = |mut ptr| { - let mut offset = None::; + let mut offset = 0; loop { (ptr, offset) = if let Value::Var(ptr) = ptr && let VarKind::NodeOutput { node: ptr_inst, output_idx: 0 } = @@ -441,9 +674,9 @@ impl LowerFromSpvPtrInstsInFunc<'_> { inputs, .. } = func.at(ptr_inst).def() - && let Some(new_offset) = ptr_offset.checked_add(offset.map_or(0, |o| o.get())) + && let Some(new_offset) = ptr_offset.checked_add(offset) { - (inputs[0], NonZeroI32::new(new_offset)) + (inputs[0], new_offset) } else { break; }; @@ -451,12 +684,31 @@ impl LowerFromSpvPtrInstsInFunc<'_> { (ptr, offset) }; + // NOTE(eddyb) the ordering of some checks below is not purely aesthetic, + // if the types are invalid there could e.g. be disaggregation where it + // should never otherwise appear, so type checks should precede them. + let replacement_kind_and_inputs = if spv_inst.opcode == wk.OpVariable { - assert!(data_inst_def.inputs.len() <= 1); + // HACK(eddyb) only needed because of potentially invalid SPIR-V. + let output_type = spv_inst_lowering + .disaggregated_output + .unwrap_or_else(|| func.at(outputs[0]).decl().ty); let (_, var_data_type) = - self.lowerer.as_spv_ptr_type(func.at(outputs[0]).decl().ty).ok_or_else(|| { + self.lowerer.as_spv_ptr_type(output_type).ok_or_else(|| { LowerError(Diag::bug(["output type not an `OpTypePointer`".into()])) })?; + + assert!(spv_inst_lowering.disaggregated_output.is_none()); + + // FIXME(eddyb) this can be happen due to the optional initializer. + // FIXME(eddyb) lower the initializer to store(s) just after variables. + if !spv_inst_lowering.disaggregated_inputs.is_empty() { + return Ok(Transformed::Unchanged); + } + + assert_eq!(outputs.len(), 1); + assert!(data_inst_def.inputs.len() <= 1); + match self.lowerer.layout_of(var_data_type)? { TypeLayout::Concrete(concrete) if concrete.mem_layout.dyn_unit_stride.is_none() => { ( @@ -466,32 +718,15 @@ impl LowerFromSpvPtrInstsInFunc<'_> { } _ => return Ok(Transformed::Unchanged), } - } else if spv_inst.opcode == wk.OpLoad { - // FIXME(eddyb) support memory operands somehow. - if !spv_inst.imms.is_empty() { - return Ok(Transformed::Unchanged); - } - assert_eq!(data_inst_def.inputs.len(), 1); - - let ptr = data_inst_def.inputs[0]; - - let (ptr, offset) = flatten_offsets(ptr); - - (MemOp::Load { offset }.into(), [ptr].into_iter().collect()) - } else if spv_inst.opcode == wk.OpStore { - // FIXME(eddyb) support memory operands somehow. - if !spv_inst.imms.is_empty() { - return Ok(Transformed::Unchanged); + } else if spv_inst.opcode == wk.OpArrayLength { + if disaggregated_output_or_inputs_during_lowering { + return Err(LowerError(Diag::bug([format!( + "unexpected aggregate types in `{}`", + spv_inst.opcode.name() + ) + .into()]))); } - assert_eq!(data_inst_def.inputs.len(), 2); - let ptr = data_inst_def.inputs[0]; - let value = data_inst_def.inputs[1]; - - let (ptr, offset) = flatten_offsets(ptr); - - (MemOp::Store { offset }.into(), [ptr, value].into_iter().collect()) - } else if spv_inst.opcode == wk.OpArrayLength { let field_idx = match spv_inst.imms[..] { [spv::Imm::Short(_, field_idx)] => field_idx, _ => unreachable!(), @@ -561,6 +796,14 @@ impl LowerFromSpvPtrInstsInFunc<'_> { ] .contains(&spv_inst.opcode) { + if disaggregated_output_or_inputs_during_lowering { + return Err(LowerError(Diag::bug([format!( + "unexpected aggregate types in `{}`", + spv_inst.opcode.name() + ) + .into()]))); + } + // FIXME(eddyb) avoid erasing the "inbounds" qualifier. let base_ptr = data_inst_def.inputs[0]; let (_, base_pointee_type) = @@ -572,11 +815,12 @@ impl LowerFromSpvPtrInstsInFunc<'_> { // a `OpTypeRuntimeArray`, with the original type as the element type. let access_chain_base_layout = if [wk.OpPtrAccessChain, wk.OpInBoundsPtrAccessChain].contains(&spv_inst.opcode) { - self.lowerer.layout_of(cx.intern(TypeKind::SpvInst { - spv_inst: wk.OpTypeRuntimeArray.into(), - type_and_const_inputs: + self.lowerer.layout_of(cx.intern( + spv::Inst::from(wk.OpTypeRuntimeArray).into_canonical_type_with( + cx, [TypeOrConst::Type(base_pointee_type)].into_iter().collect(), - }))? + ), + ))? } else { self.lowerer.layout_of(base_pointee_type)? }; @@ -590,9 +834,7 @@ impl LowerFromSpvPtrInstsInFunc<'_> { steps.first_mut() { let (ptr_base_ptr, ptr_offset) = flatten_offsets(ptr); - if let Some(new_first_offset) = - first_offset.checked_add(ptr_offset.map_or(0, |o| o.get())) - { + if let Some(new_first_offset) = first_offset.checked_add(ptr_offset) { ptr = ptr_base_ptr; *first_offset = new_first_offset; } @@ -610,7 +852,9 @@ impl LowerFromSpvPtrInstsInFunc<'_> { let step_data_inst = func.nodes.define( cx, DataInstDef { - attrs: Default::default(), + // FIXME(eddyb) filter attributes into debuginfo and + // semantic, and understand the semantic ones. + attrs, kind, inputs, child_regions: [].into_iter().collect(), @@ -653,7 +897,454 @@ impl LowerFromSpvPtrInstsInFunc<'_> { ptr = Value::Var(step_output_var); } final_step.into_data_inst_kind_and_inputs(ptr) + } else if [wk.OpLoad, wk.OpStore].contains(&spv_inst.opcode) { + let ptr = data_inst_def.inputs[0]; + + // HACK(eddyb) only needed because of potentially invalid SPIR-V. + let type_of_ptr = match &spv_inst_lowering.disaggregated_inputs[..] { + [(range, _), ..] if range.start == 0 => None, + _ => Some(func.at(ptr).type_of(cx)), + }; + let (_, pointee_type) = type_of_ptr + .and_then(|type_of_ptr| self.lowerer.as_spv_ptr_type(type_of_ptr)) + .ok_or_else(|| { + LowerError(Diag::bug(["pointer input not an `OpTypePointer`".into()])) + })?; + + #[derive(Copy, Clone)] + enum Access { + Load { output: Var }, + Store(Value), + } + + impl Access { + fn to_data_inst_def(self, attrs: AttrSet, ptr: Value, offset: i32) -> DataInstDef { + let offset = NonZeroI32::new(offset); + match self { + Access::Load { output } => DataInstDef { + attrs, + kind: MemOp::Load { offset }.into(), + inputs: [ptr].into_iter().collect(), + child_regions: [].into_iter().collect(), + outputs: [output].into_iter().collect(), + }, + Access::Store(value) => DataInstDef { + attrs, + kind: MemOp::Store { offset }.into(), + inputs: [ptr, value].into_iter().collect(), + child_regions: [].into_iter().collect(), + outputs: [].into_iter().collect(), + }, + } + } + } + + enum Accesses> { + Single(Access), + AggregateLeaves { aggregate_type: Type, leaf_accesses: LLA }, + } + + let accesses = if spv_inst.opcode == wk.OpLoad { + assert!(spv_inst_lowering.disaggregated_inputs.is_empty()); + assert_eq!(data_inst_def.inputs.len(), 1); + + match spv_inst_lowering.disaggregated_output { + None => Accesses::Single(Access::Load { output: outputs[0] }), + Some(aggregate_type) => Accesses::AggregateLeaves { + aggregate_type, + leaf_accesses: Either::Left( + outputs.iter().map(|&output| Access::Load { output }), + ), + }, + } + } else { + assert!(spv_inst_lowering.disaggregated_output.is_none()); + + match spv_inst_lowering.disaggregated_inputs[..] { + [] => { + assert_eq!(data_inst_def.inputs.len(), 2); + + Accesses::Single(Access::Store(data_inst_def.inputs[1])) + } + [(ref range, aggregate_type)] => { + assert_eq!(*range, 1..u32::try_from(data_inst_def.inputs.len()).unwrap()); + + Accesses::AggregateLeaves { + aggregate_type, + leaf_accesses: Either::Right( + data_inst_def.inputs[1..].iter().map(|&v| Access::Store(v)), + ), + } + } + _ => unreachable!(), + } + }; + + let type_of_access = |access| match access { + Access::Load { output } => func.at(output).decl().ty, + Access::Store(value) => func.at(value).type_of(cx), + }; + + let original_access_type = match accesses { + Accesses::Single(access) => type_of_access(access), + Accesses::AggregateLeaves { aggregate_type, .. } => aggregate_type, + }; + + if pointee_type != original_access_type { + return Err(LowerError(Diag::bug([ + "access type different from pointee type".into() + ]))); + } + + let (ptr, base_offset) = flatten_offsets(ptr); + + // FIXME(eddyb) support memory operands somehow. + if !spv_inst.imms.is_empty() { + return Ok(Transformed::Unchanged); + } + + // FIXME(eddyb) consider skipping `undef` leaf stores (and even + // treating a non-aggregate `undef` store as a single leaf), + // but that might be too much of an "implicit optimization" here. + match accesses { + Accesses::Single(access) => { + return Ok(Transformed::Changed(access.to_data_inst_def( + attrs, + ptr, + base_offset, + ))); + } + + // If this is an aggregate `OpLoad`/`OpStore`, we should generate + // one instruction per leaf, instead. + Accesses::AggregateLeaves { aggregate_type: _, mut leaf_accesses } => { + // FIXME(eddyb) this may need to automatically generate an + // intermediary `QPtrOp::BufferData` when accessing buffers. + let mem_data_layout = match self.lowerer.layout_of(pointee_type)? { + TypeLayout::Concrete(mem) => mem, + _ => { + return Err(LowerError(Diag::bug([ + "by-value aggregate type without memory layout: ".into(), + pointee_type.into(), + ]))); + } + }; + + // HACK(eddyb) we have to buffer the details of the new + // instructions because we're iterating over the original + // one, and can't allocate the new `DataInst`s as we go. + let mut leaf_accesses_with_offsets = SmallVec::<[_; 4]>::new(); + mem_data_layout + .deeply_flatten_if( + base_offset, + // Whether `candidate_layout` is an aggregate (to recurse into). + &|candidate_layout| matches!( + &cx[candidate_layout.original_type].kind, + TypeKind::SpvInst { value_lowering: spv::ValueLowering::Disaggregate(_), .. } + ), + &mut |leaf_offset, leaf| { + let leaf_access = leaf_accesses.next().ok_or_else(|| { + LayoutError(Diag::bug([ + "`spv::lower` and `mem::layout` disagree on aggregate leaves of ".into(), + pointee_type.into(), + ])) + })?; + let leaf_type = type_of_access(leaf_access); + if leaf_type != leaf.original_type { + return Err(LayoutError(Diag::bug([ + "aggregate leaf mismatch: `".into(), + leaf_type.into(), + "` vs `".into(), + leaf.original_type.into(), + "`".into() + ]))); + } + leaf_accesses_with_offsets.push((leaf_access, leaf_offset)); + Ok(()) + }, + ) + .map_err(|LayoutError(err)| LowerError(err))?; + + if leaf_accesses.next().is_some() { + return Err(LowerError(Diag::bug([ + "`spv::lower` and `mem::layout` disagree on aggregate leaves of " + .into(), + pointee_type.into(), + ]))); + } + + let mut func = func_at_data_inst.reborrow().at(()); + + // This is the point of no return: we're inserting several + // new instructions, and removing the original one entirely. + for (leaf_access, leaf_offset) in leaf_accesses_with_offsets { + // FIXME(eddyb) filter attributes into debuginfo and + // semantic, and understand the semantic ones. + let leaf_attrs = attrs; + + let leaf_data_inst = func.nodes.define( + cx, + leaf_access.to_data_inst_def(leaf_attrs, ptr, leaf_offset).into(), + ); + + // HACK(eddyb) attach any output vars to the new node. + for (output_idx, &output_var) in + func.nodes[leaf_data_inst].outputs.iter().enumerate() + { + let output_var_decl = &mut func.vars[output_var]; + output_var_decl.def_parent = Either::Right(leaf_data_inst); + output_var_decl.def_idx = output_idx.try_into().unwrap(); + } + + // HACK(eddyb) can't really use helpers like `FuncAtMut::def`, + // due to the need to borrow `regions` and `nodes` + // at the same time - perhaps some kind of `FuncAtMut` position + // types for "where a list is in a parent entity" could be used + // to make this more ergonomic, although the potential need for + // an actual list entity of its own, should be considered. + func.regions[self.parent_region.unwrap()].children.insert_before( + leaf_data_inst, + data_inst, + func.nodes, + ); + + // HACK(eddyb) account for traversal never seeing this, + // while still needing value replacement and/or use tracking. + func.reborrow().at(leaf_data_inst).inner_in_place_transform_with(self); + } + + func.regions[self.parent_region.unwrap()] + .children + .remove(data_inst, func.nodes); + + // HACK(eddyb) no good "tombstone" for the original def. + return Ok(Transformed::Changed(DataInstDef { + attrs: AttrSet::default(), + kind: DataInstKind::SpvInst(wk.OpNop.into(), spv::InstLowering::default()), + inputs: [].into_iter().collect(), + child_regions: [].into_iter().collect(), + outputs: [].into_iter().collect(), + })); + } + } + } else if spv_inst.opcode == wk.OpCopyMemory { + if disaggregated_output_or_inputs_during_lowering { + return Err(LowerError(Diag::bug([format!( + "unexpected aggregate types in `{}`", + spv_inst.opcode.name() + ) + .into()]))); + } + + assert_eq!(data_inst_def.inputs.len(), 2); + + let dst_ptr = data_inst_def.inputs[0]; + let src_ptr = data_inst_def.inputs[1]; + + let (_, dst_pointee_type) = + self.lowerer.as_spv_ptr_type(func.at(dst_ptr).type_of(cx)).ok_or_else(|| { + LowerError(Diag::bug([ + "destination pointer input not an `OpTypePointer`".into() + ])) + })?; + let (_, src_pointee_type) = + self.lowerer.as_spv_ptr_type(func.at(src_ptr).type_of(cx)).ok_or_else(|| { + LowerError(Diag::bug(["source pointer input not an `OpTypePointer`".into()])) + })?; + + if dst_pointee_type != src_pointee_type { + return Err(LowerError(Diag::bug([ + "copy destination pointee type different from source pointee type".into(), + ]))); + } + + // FIXME(eddyb) this may need to automatically generate an + // intermediary `QPtrOp::BufferData` when accessing buffers. + let mem_data_layout = match self.lowerer.layout_of(src_pointee_type)? { + TypeLayout::Concrete(mem) => mem, + _ => { + return Err(LowerError(Diag::bug([ + "`OpCopyMemory` of data with non-memory type: ".into(), + src_pointee_type.into(), + ]))); + } + }; + + let (dst_ptr, dst_base_offset) = flatten_offsets(dst_ptr); + let (src_ptr, src_base_offset) = flatten_offsets(src_ptr); + + // FIXME(eddyb) support memory operands somehow. + if !spv_inst.imms.is_empty() { + return Ok(Transformed::Unchanged); + } + + // HACK(eddyb) this is speculative, so we just give up if we hit + // some situation we don't currently support - ideally, there would + // be an *untyped* `qptr.copy`, but that is harder to support overall. + // HACK(eddyb) this is a `try {...}`-like use of a closure. + let try_gather_leaf_offsets_and_types = || { + struct UnsupportedLargeArray; + let recurse_into_layout = |layout: &MemTypeLayout| { + let aggregate_shape = match &cx[layout.original_type].kind { + TypeKind::SpvInst { + value_lowering: spv::ValueLowering::Disaggregate(aggregate_shape), + .. + } => aggregate_shape, + _ => return Ok(false), + }; + match *aggregate_shape { + spv::AggregateShape::Struct { .. } => Ok(true), + + // HACK(eddyb) 16 leaves allows for a 4x4 matrix, even + // when represented as e.g. `[f32; 16]` or `[[f32; 4]; 4]` + // (this comparison gets more complex when accounting + // for vectors, e.g. `[f32x4; 4]`, which is only 4 leaves), + // but ideally most types accepted here will be even + // smaller arrays (which could've e.g. been structs). + // FIXME(eddyb) larger arrays should lower to loops that + // copy a small number of leaves per iteration, or even + // some general-purpose `qptr.copy`, to avoid generating + // amounts of IR that scale with the array length, which + // (unlike struct fields) can be arbitrarily large. + spv::AggregateShape::Array { total_leaf_count, .. } => { + if total_leaf_count <= 16 { + Ok(true) + } else { + Err(UnsupportedLargeArray) + } + } + } + }; + + // HACK(eddyb) buffering the details of the instructions we'll + // be generating, because we don't know ahead of time whether we + // even want to expand the `OpCopyMemory`, at all. + let mut leaf_offsets_and_types = SmallVec::<[_; 8]>::new(); + mem_data_layout + .deeply_flatten_if( + 0, + &|candidate_layout| recurse_into_layout(candidate_layout).unwrap_or(false), + &mut |leaf_offset, leaf| { + // FIMXE(eddyb) ideally this would not be computed twice. + recurse_into_layout(leaf).map_err(|UnsupportedLargeArray| { + // HACK(eddyb) not an error, just stopping traversal. + LayoutError(Diag::bug([])) + })?; + + // HACK(eddyb) `deeply_flatten_if` takes a base offset, + // but we have two, so we need our own overflow checks. + if dst_base_offset.checked_add(leaf_offset).is_none() + || src_base_offset.checked_add(leaf_offset).is_none() + { + // HACK(eddyb) not an error, just stopping traversal. + return Err(LayoutError(Diag::bug([]))); + } + + leaf_offsets_and_types.push((leaf_offset, leaf.original_type)); + + Ok(()) + }, + ) + .ok()?; + Some(leaf_offsets_and_types) + }; + let leaf_offsets_and_types = match try_gather_leaf_offsets_and_types() { + Some(leaf_offsets_and_types) => leaf_offsets_and_types, + None => return Ok(Transformed::Unchanged), + }; + + let mut func = func_at_data_inst.reborrow().at(()); + + // This is the point of no return: we're inserting several + // new instructions, and removing the original one entirely. + for (leaf_offset, leaf_type) in leaf_offsets_and_types { + let leaf_load_data_inst = func.nodes.define( + cx, + DataInstDef { + // FIXME(eddyb) filter attributes into debuginfo and + // semantic, and understand the semantic ones. + attrs, + kind: MemOp::Load { + offset: NonZeroI32::new( + src_base_offset.checked_add(leaf_offset).unwrap(), + ), + } + .into(), + inputs: [src_ptr].into_iter().collect(), + child_regions: [].into_iter().collect(), + outputs: [].into_iter().collect(), + } + .into(), + ); + let leaf_load_output_var = func.vars.define( + cx, + VarDecl { + attrs: Default::default(), + ty: leaf_type, + def_parent: Either::Right(leaf_load_data_inst), + def_idx: 0, + }, + ); + func.nodes[leaf_load_data_inst].outputs.push(leaf_load_output_var); + + let leaf_store_data_inst = func.nodes.define( + cx, + DataInstDef { + // FIXME(eddyb) filter attributes into debuginfo and + // semantic, and understand the semantic ones. + attrs, + kind: MemOp::Store { + offset: NonZeroI32::new( + dst_base_offset.checked_add(leaf_offset).unwrap(), + ), + } + .into(), + inputs: [dst_ptr, Value::Var(leaf_load_output_var)].into_iter().collect(), + child_regions: [].into_iter().collect(), + outputs: [].into_iter().collect(), + } + .into(), + ); + + // HACK(eddyb) can't really use helpers like `FuncAtMut::def`, + // due to the need to borrow `regions` and `nodes` + // at the same time - perhaps some kind of `FuncAtMut` position + // types for "where a list is in a parent entity" could be used + // to make this more ergonomic, although the potential need for + // an actual list entity of its own, should be considered. + let parent_region_children = + &mut func.regions[self.parent_region.unwrap()].children; + parent_region_children.insert_before(leaf_load_data_inst, data_inst, func.nodes); + parent_region_children.insert_before(leaf_store_data_inst, data_inst, func.nodes); + + // HACK(eddyb) account for traversal never seeing these, + // while still needing value replacement and/or use tracking. + func.reborrow().at(leaf_load_data_inst).inner_in_place_transform_with(self); + func.reborrow().at(leaf_store_data_inst).inner_in_place_transform_with(self); + } + + func.regions[self.parent_region.unwrap()].children.remove(data_inst, func.nodes); + + // HACK(eddyb) no good "tombstone" for the original def. + return Ok(Transformed::Changed(DataInstDef { + attrs: AttrSet::default(), + kind: DataInstKind::SpvInst(wk.OpNop.into(), spv::InstLowering::default()), + inputs: [].into_iter().collect(), + child_regions: [].into_iter().collect(), + outputs: [].into_iter().collect(), + })); } else if spv_inst.opcode == wk.OpBitcast { + if disaggregated_output_or_inputs_during_lowering { + return Err(LowerError(Diag::bug([format!( + "unexpected aggregate types in `{}`", + spv_inst.opcode.name() + ) + .into()]))); + } + + assert_eq!(outputs.len(), 1); + assert_eq!(data_inst_def.inputs.len(), 1); + let input = data_inst_def.inputs[0]; // Pointer-to-pointer casts are noops on `qptr`. if self.lowerer.as_spv_ptr_type(func.at(input).type_of(cx)).is_some() @@ -692,7 +1383,7 @@ impl LowerFromSpvPtrInstsInFunc<'_> { // FIXME(eddyb) is this a good convention? let func = func_at_data_inst_frozen.at(()); - match data_inst_def.kind { + let spv_inst_lowering = match &data_inst_def.kind { // Known semantics, no need to preserve SPIR-V pointer information. NodeKind::Select(_) | NodeKind::Loop { .. } @@ -704,12 +1395,18 @@ impl LowerFromSpvPtrInstsInFunc<'_> { | DataInstKind::QPtr(_) | DataInstKind::ThunkBind(_) => return, - DataInstKind::SpvInst(_) | DataInstKind::SpvExtInst { .. } => {} - } + DataInstKind::SpvInst(_, lowering) | DataInstKind::SpvExtInst { lowering, .. } => { + lowering + } + }; let mut old_and_new_attrs = None; let get_old_attrs = || AttrSetDef { attrs: cx[data_inst_def.attrs].attrs.clone() }; + if let Some(LowerError(e)) = extra_error { + old_and_new_attrs.get_or_insert_with(get_old_attrs).push_diag(e); + } + for (input_idx, &v) in data_inst_def.inputs.iter().enumerate() { if let Some((_, pointee)) = self.lowerer.as_spv_ptr_type(func.at(v).type_of(cx)) { old_and_new_attrs.get_or_insert_with(get_old_attrs).attrs.insert( @@ -721,22 +1418,28 @@ impl LowerFromSpvPtrInstsInFunc<'_> { ); } } - // HACK(eddyb) multi-output instructions don't exist pre-disaggregate. - if let Some(&output_var) = data_inst_def.outputs.iter().at_most_one().ok().unwrap() - && let Some((addr_space, pointee)) = + for (output_idx, &output_var) in data_inst_def.outputs.iter().enumerate() { + if let Some((addr_space, pointee)) = self.lowerer.as_spv_ptr_type(func.at(output_var).decl().ty) - { - old_and_new_attrs.get_or_insert_with(get_old_attrs).attrs.insert( - QPtrAttr::FromSpvPtrOutput { - addr_space: OrdAssertEq(addr_space), - pointee: OrdAssertEq(pointee), + { + // FIXME(eddyb) make this impossible by lowering all instructions + // that may produce aggregates with pointer leaves. + if output_idx != 0 || spv_inst_lowering.disaggregated_output.is_some() { + old_and_new_attrs.get_or_insert_with(get_old_attrs).push_diag(Diag::bug([ + format!("unsupported pointer as aggregate leaf (output #{output_idx})") + .into(), + ])); + continue; } - .into(), - ); - } - if let Some(LowerError(e)) = extra_error { - old_and_new_attrs.get_or_insert_with(get_old_attrs).push_diag(e); + old_and_new_attrs.get_or_insert_with(get_old_attrs).attrs.insert( + QPtrAttr::FromSpvPtrOutput { + addr_space: OrdAssertEq(addr_space), + pointee: OrdAssertEq(pointee), + } + .into(), + ); + } } if let Some(attrs) = old_and_new_attrs { @@ -764,28 +1467,30 @@ impl LowerFromSpvPtrInstsInFunc<'_> { } } } + + // HACK(eddyb) this is a helper *only* for `transform_value_use` and + // `in_place_transform_node_def`, and should not be used elsewhere. + fn apply_value_replacements(&self, mut value: Value) -> Value { + while let Value::Var(var) = value { + value = if let Some(&base_ptr) = self.noop_offsets_to_base_ptr.get(&var) { + base_ptr + } else { + break; + }; + } + value + } } impl Transformer for LowerFromSpvPtrInstsInFunc<'_> { // NOTE(eddyb) it's important that this only gets invoked on already lowered // `Value`s, so we can rely on e.g. `noop_offsets_to_base_ptr` being filled. fn transform_value_use(&mut self, v: &Value) -> Transformed { - let mut v = *v; + let new_v = self.apply_value_replacements(*v); - let transformed = match v { - Value::Var(v) => self - .noop_offsets_to_base_ptr - .get(&v) - .copied() - .map_or(Transformed::Unchanged, Transformed::Changed), + self.add_value_uses(&[new_v]); - Value::Const(_) => Transformed::Unchanged, - }; - - transformed.apply_to(&mut v); - self.add_value_uses(&[v]); - - transformed + if *v == new_v { Transformed::Unchanged } else { Transformed::Changed(new_v) } } fn in_place_transform_region_def(&mut self, mut func_at_region: FuncAtMut<'_, Region>) { @@ -813,13 +1518,7 @@ impl Transformer for LowerFromSpvPtrInstsInFunc<'_> { )); if let QPtrOp::Offset(0) = op { - let mut base_ptr = new_def.inputs[0]; - if let Value::Var(base_ptr_var) = base_ptr - && let Some(&base_ptr_base_ptr) = - self.noop_offsets_to_base_ptr.get(&base_ptr_var) - { - base_ptr = base_ptr_base_ptr; - } + let base_ptr = self.apply_value_replacements(new_def.inputs[0]); self.noop_offsets_to_base_ptr .insert(func_at_node.reborrow().def().outputs[0], base_ptr); } @@ -838,6 +1537,102 @@ impl Transformer for LowerFromSpvPtrInstsInFunc<'_> { } fn in_place_transform_func_decl(&mut self, func_decl: &mut FuncDecl) { + // HACK(eddyb) separately pre-process all `OpVariable`s with initializers, + // as the `OpStore`s needed to initialize them, have to be injected + // *after* the last `OpVariable` + if let DeclDef::Present(func_def_body) = &mut func_decl.def { + let last_func_local_var = func_def_body + .at_body() + .at_children() + .into_iter() + .take_while(|func_at_node| match &func_at_node.def().kind { + DataInstKind::SpvInst(spv_inst, _) => { + spv_inst.opcode == self.lowerer.wk.OpVariable + } + _ => false, + }) + .map(|func_at_node| func_at_node.position) + .last(); + + // FIXME(eddyb) a cursor abstraction would be clearer. + let mut insert_after = last_func_local_var; + + let body = func_def_body.body; + let mut func_at_body_children = func_def_body.at_mut_body().at_children().into_iter(); + while let Some(func_at_node) = func_at_body_children.next() { + let node = func_at_node.position; + let func = func_at_node.at(()); + + let node_def = &mut *func.nodes[node]; + let spv_inst_lowering = match &mut node_def.kind { + DataInstKind::SpvInst(spv_inst, lowering) + if spv_inst.opcode == self.lowerer.wk.OpVariable => + { + lowering + } + _ => break, + }; + + let Some(local_var_ptr) = (spv_inst_lowering.disaggregated_output.is_none()) + .then(|| { + assert!(node_def.outputs.len() == 1); + node_def.outputs[0] + }) + .filter(|&output_var| { + self.lowerer.as_spv_ptr_type(func.vars[output_var].ty).is_some() + }) + .map(Value::Var) + else { + continue; + }; + + // FIXME(eddyb) filter attributes into debuginfo and + // semantic, and understand the semantic ones. + let init_attrs = node_def.attrs; + let mut init_inputs = mem::take(&mut node_def.inputs); + let mut init_input_lowering = + mem::take(&mut spv_inst_lowering.disaggregated_inputs); + + init_inputs.insert(0, local_var_ptr); + match &mut init_input_lowering[..] { + [] => { + if init_inputs.len() == 1 { + continue; + } + } + [(old_range, _)] => { + let new_range = 1..u32::try_from(init_inputs.len()).unwrap(); + assert_eq!(*old_range, 0..(new_range.end - 1)); + *old_range = new_range; + } + _ => unreachable!(), + } + + let store_inst = func.nodes.define( + &self.lowerer.cx, + DataInstDef { + attrs: init_attrs, + kind: DataInstKind::SpvInst( + self.lowerer.wk.OpStore.into(), + spv::InstLowering { + disaggregated_output: None, + disaggregated_inputs: init_input_lowering, + }, + ), + inputs: init_inputs, + child_regions: [].into_iter().collect(), + outputs: [].into_iter().collect(), + } + .into(), + ); + + // FIXME(eddyb) a cursor abstraction would be clearer. + let insert_after = insert_after.as_mut().unwrap(); + func.regions[body].children.insert_after(store_inst, *insert_after, func.nodes); + *insert_after = store_inst; + } + } + func_decl.inner_in_place_transform_with(self); // Apply all `remove_inst_if_dead_output_with_parent_region` removals, that are truly unused. diff --git a/src/qptr/mod.rs b/src/qptr/mod.rs index ce675ebe..41392123 100644 --- a/src/qptr/mod.rs +++ b/src/qptr/mod.rs @@ -30,7 +30,7 @@ pub enum QPtrAttr { // FIXME(eddyb) reduce usage by modeling more of SPIR-V inside SPIR-T. ToSpvPtrInput { input_idx: u32, pointee: OrdAssertEq }, - /// When applied to a `DataInst` with a `QPtr`-typed output value, + /// When applied to a `DataInst` with a single `QPtr`-typed output value, /// this describes the original `OpTypePointer` produced by an unknown /// SPIR-V instruction (likely creating it, without deriving from an input). /// diff --git a/src/spv/canonical.rs b/src/spv/canonical.rs index c7348cd0..85f47a9e 100644 --- a/src/spv/canonical.rs +++ b/src/spv/canonical.rs @@ -75,6 +75,7 @@ def_mappable_ops! { } const { OpUndef, + OpConstantNull, OpConstantFalse, OpConstantTrue, OpConstant, @@ -266,7 +267,23 @@ impl spv::Inst { // FIXME(eddyb) automate bidirectional mappings more (although the need // for conditional, i.e. "partial", mappings, adds a lot of complexity). - pub(super) fn as_canonical_type( + pub fn into_canonical_type_with( + self, + cx: &Context, + type_and_const_inputs: SmallVec<[TypeOrConst; 2]>, + ) -> TypeKind { + let value_lowering = match spv::AggregateShape::compute(cx, &self, &type_and_const_inputs) { + Some(aggregate_shape) => spv::ValueLowering::Disaggregate(aggregate_shape), + None => spv::ValueLowering::Direct, + }; + if let Some(type_kind) = self.as_canonical_non_spv_type(cx, &type_and_const_inputs) { + assert!(value_lowering == spv::ValueLowering::Direct); + type_kind + } else { + TypeKind::SpvInst { spv_inst: self, type_and_const_inputs, value_lowering } + } + } + fn as_canonical_non_spv_type( &self, cx: &Context, type_and_const_inputs: &[TypeOrConst], @@ -360,6 +377,12 @@ impl spv::Inst { mo.OpUndef == self.opcode } + // HACK(eddyb) this only exists as a helper for `spv::lower`. + pub(super) fn lower_const_by_distributing_to_aggregate_leaves(&self) -> bool { + let mo = MappableOps::get(); + [mo.OpUndef, mo.OpConstantNull].contains(&self.opcode) + } + // FIXME(eddyb) automate bidirectional mappings more (although the need // for conditional, i.e. "partial", mappings, adds a lot of complexity). pub(super) fn as_canonical_const( @@ -381,6 +404,27 @@ impl spv::Inst { (_, []) if opcode == mo.OpConstant => { Some(scalar::Const::try_decode_from_spv_imms(ty.as_scalar(cx)?, imms)?.into()) } + + ([], []) if opcode == mo.OpConstantNull => { + let null_scalar = |ty: scalar::Type| { + if ty.bit_width() > 128 { + return None; + } + Some(scalar::Const::from_bits(ty, 0)) + }; + match cx[ty].kind { + TypeKind::Scalar(ty) => Some(null_scalar(ty)?.into()), + TypeKind::Vector(ty) => { + let elem = null_scalar(ty.elem)?; + Some( + vector::Const::from_elems(ty, (0..ty.elem_count.get()).map(|_| elem)) + .into(), + ) + } + _ => None, + } + } + _ if opcode == wk.OpConstantComposite => { let ty = ty.as_vector(cx)?; let elems = (const_inputs.len() == usize::from(ty.elem_count.get()) @@ -388,6 +432,7 @@ impl spv::Inst { .then(|| const_inputs.iter().map(|ct| *ct.as_scalar(cx).unwrap()))?; Some(vector::Const::from_elems(ty, elems).into()) } + _ => None, } } diff --git a/src/spv/lift.rs b/src/spv/lift.rs index 1136c98e..16d53a41 100644 --- a/src/spv/lift.rs +++ b/src/spv/lift.rs @@ -7,19 +7,31 @@ use crate::spv::{self, spec}; use crate::visit::{InnerVisit, Visitor}; use crate::{ AddrSpace, Attr, AttrSet, Const, ConstDef, ConstKind, Context, DataInst, DataInstDef, - DataInstKind, DbgSrcLoc, DeclDef, ExportKey, Exportee, Func, FuncDecl, FuncParam, FxIndexMap, - FxIndexSet, GlobalVar, GlobalVarDefBody, Import, Module, ModuleDebugInfo, ModuleDialect, Node, - NodeDef, NodeKind, OrdAssertEq, Region, Type, TypeDef, TypeKind, TypeOrConst, Value, Var, - VarDecl, VarKind, scalar, + DataInstKind, DbgSrcLoc, DeclDef, EntityOrientedDenseMap, ExportKey, Exportee, Func, FuncDecl, + FuncDefBody, FuncParam, FxIndexMap, FxIndexSet, GlobalVar, GlobalVarDefBody, GlobalVarInit, + Import, Module, ModuleDebugInfo, ModuleDialect, Node, NodeDef, NodeKind, OrdAssertEq, Region, + Type, TypeDef, TypeKind, TypeOrConst, Value, Var, VarDecl, VarKind, scalar, }; -use itertools::Itertools; +use itertools::Itertools as _; use rustc_hash::FxHashMap; use smallvec::SmallVec; -use std::collections::{BTreeMap, BTreeSet}; -use std::num::NonZeroU32; +use std::collections::BTreeMap; +use std::num::NonZeroUsize; +use std::ops::Range; use std::path::Path; +use std::rc::Rc; use std::{io, iter, mem}; +// HACK(eddyb) getting around the lack of a `Step` impl on `spv::Id` (`NonZeroU32`). +trait IdRangeExt { + fn iter(&self) -> iter::Map, fn(u32) -> spv::Id>; +} +impl IdRangeExt for Range { + fn iter(&self) -> iter::Map, fn(u32) -> spv::Id> { + (self.start.get()..self.end.get()).map(|i| spv::Id::new(i).unwrap()) + } +} + impl spv::Dialect { fn capability_insts(&self) -> impl Iterator + '_ { let wk = &spec::Spec::get().well_known; @@ -76,33 +88,39 @@ impl spv::ModuleDebugInfo { } } -impl FuncDecl { - fn spv_func_type(&self, cx: &Context) -> Type { - let wk = &spec::Spec::get().well_known; - - cx.intern(TypeDef { - attrs: AttrSet::default(), - kind: TypeKind::SpvInst { - spv_inst: wk.OpTypeFunction.into(), - type_and_const_inputs: iter::once(self.ret_type) - .chain(self.params.iter().map(|param| param.ty)) - .map(TypeOrConst::Type) - .collect(), - }, - }) +/// ID allocation callback, kept as a closure (instead of having its state +/// be part of `Lifter`) to avoid misuse. +trait AllocIds: FnMut(usize) -> Range { + fn one(&mut self) -> spv::Id { + self(1).start } } -struct NeedsIdsCollector<'a> { +impl Range> AllocIds for F {} + +struct Lifter<'a, AI: AllocIds> { cx: &'a Context, module: &'a Module, - ext_inst_imports: BTreeSet<&'a str>, - debug_strings: BTreeSet<&'a str>, + alloc_ids: AI, + + ids: ModuleIds<'a>, - globals: FxIndexSet, global_vars_seen: FxIndexSet, - funcs: FxIndexSet, +} + +#[derive(Default)] +struct ModuleIds<'a> { + ext_inst_imports: BTreeMap<&'a str, spv::Id>, + debug_strings: BTreeMap<&'a str, spv::Id>, + + // FIXME(eddyb) use `EntityOrientedDenseMap` here. + globals: FxIndexMap, + // FIXME(eddyb) use `EntityOrientedDenseMap` here. + funcs: FxIndexMap>, + + // FIXME(eddyb) should this be somehow snuck into `globals`? + reaggregated_global_var_initializers: FxHashMap, } #[derive(Copy, Clone, PartialEq, Eq, Hash)] @@ -111,13 +129,26 @@ enum Global { Const(Const), } -impl Visitor<'_> for NeedsIdsCollector<'_> { +// FIXME(eddyb) this is inconsistently named with `FuncBodyLifting`. +struct FuncIds<'a> { + spv_func_ret_type: Type, + // FIXME(eddyb) should we even be interning an `OpTypeFunction` in `Context`? + // (it's easier this way, but it could also be tracked in `ModuleIds`) + spv_func_type: Type, + + func_id: spv::Id, + param_ids: Range, + + body: Option>, +} + +impl Visitor<'_> for Lifter<'_, AI> { fn visit_attr_set_use(&mut self, attrs: AttrSet) { self.visit_attr_set_def(&self.cx[attrs]); } fn visit_type_use(&mut self, ty: Type) { let global = Global::Type(ty); - if self.globals.contains(&global) { + if self.ids.globals.contains_key(&global) { return; } let ty_def = &self.cx[ty]; @@ -158,11 +189,11 @@ impl Visitor<'_> for NeedsIdsCollector<'_> { } self.visit_type_def(ty_def); - self.globals.insert(global); + self.ids.globals.insert(global, self.alloc_ids.one()); } fn visit_const_use(&mut self, ct: Const) { let global = Global::Const(ct); - if self.globals.contains(&global) { + if self.ids.globals.contains_key(&global) { return; } let ct_def = &self.cx[ct]; @@ -189,7 +220,7 @@ impl Visitor<'_> for NeedsIdsCollector<'_> { | ConstKind::PtrToFunc(_) | ConstKind::SpvInst { .. } => { self.visit_const_def(ct_def); - self.globals.insert(global); + self.ids.globals.insert(global, self.alloc_ids.one()); } // HACK(eddyb) because this is an `OpString` and needs to go earlier @@ -207,35 +238,105 @@ impl Visitor<'_> for NeedsIdsCollector<'_> { } ); - self.debug_strings.insert(&self.cx[s]); + self.ids.debug_strings.entry(&self.cx[s]).or_insert_with(|| self.alloc_ids.one()); } } } fn visit_global_var_use(&mut self, gv: GlobalVar) { - if self.global_vars_seen.insert(gv) { - self.visit_global_var_decl(&self.module.global_vars[gv]); + if !self.global_vars_seen.insert(gv) { + return; + } + let gv_decl = &self.module.global_vars[gv]; + self.visit_global_var_decl(gv_decl); + + match &gv_decl.def { + DeclDef::Imported(_) => {} + DeclDef::Present(gv_def_body) => match &gv_def_body.initializer { + None | Some(GlobalVarInit::Direct(_)) => {} + + // FIXME(eddyb) this should be a proper `Result`-based error instead, + // and/or `spv::lift` should mutate the module for legalization. + Some(GlobalVarInit::Data(_)) => { + unreachable!( + "`GlobalVarInit::Composite` should be legalized away before lifting" + ); + } + + // HACK(eddyb) recursively reconstruct an initializer as a tree + // of (otherwise illegal) `Const`s, with SPIR-V aggregate types. + // FIXME(eddyb) this *technically* pollutes the `Context`, but + // is easier than having two ways of tracking SPIR-V constants. + Some(GlobalVarInit::SpvAggregate { ty, leaves }) => { + let init = self.reaggregate_const(*ty, leaves); + self.ids.reaggregated_global_var_initializers.insert(gv, init); + } + }, } } fn visit_func_use(&mut self, func: Func) { - if self.funcs.contains(&func) { + if self.ids.funcs.contains_key(&func) { return; } - // NOTE(eddyb) inserting first results in a different function ordering - // in the resulting module, but the order doesn't matter, and we need - // to avoid infinite recursion for recursive functions. - self.funcs.insert(func); - let func_decl = &self.module.funcs[func]; - // FIXME(eddyb) should this be cached in `self.funcs`? - self.visit_type_use(func_decl.spv_func_type(self.cx)); + + // Synthesize an `OpTypeFunction` type (that SPIR-T itself doesn't carry). + let wk = &spec::Spec::get().well_known; + let spv_func_ret_type = match &func_decl.ret_types[..] { + &[ty] => ty, + // Reaggregate multiple return types into an `OpTypeStruct`. + ret_types => { + let opcode = if ret_types.is_empty() { wk.OpTypeVoid } else { wk.OpTypeStruct }; + self.cx.intern(spv::Inst::from(opcode).into_canonical_type_with( + self.cx, + ret_types.iter().copied().map(TypeOrConst::Type).collect(), + )) + } + }; + let spv_func_type = self.cx.intern( + spv::Inst::from(wk.OpTypeFunction).into_canonical_type_with( + self.cx, + iter::once(spv_func_ret_type) + .chain(func_decl.params.iter().map(|param| param.ty)) + .map(TypeOrConst::Type) + .collect(), + ), + ); + self.visit_type_use(spv_func_type); + + // NOTE(eddyb) inserting first produces a different function ordering + // overall in the final module, but the order doesn't matter, and we + // need to avoid infinite recursion for recursive functions. + self.ids.funcs.insert( + func, + FuncIds { + spv_func_ret_type, + spv_func_type, + func_id: self.alloc_ids.one(), + param_ids: (self.alloc_ids)(func_decl.params.len()), + body: None, + }, + ); + self.visit_func_decl(func_decl); + + // Handle the body last, to minimize recursion hazards (see comment above), + // and to allow `FuncBodyLifting` to look up its dependencies in `self.ids`. + match &func_decl.def { + DeclDef::Imported(_) => {} + DeclDef::Present(func_def_body) => { + let func_body_lifting = FuncBodyLifting::from_func_def_body(self, func_def_body); + self.ids.funcs.get_mut(&func).unwrap().body = Some(func_body_lifting); + } + } } fn visit_spv_module_debug_info(&mut self, debug_info: &spv::ModuleDebugInfo) { for sources in debug_info.source_languages.values() { // The file operand of `OpSource` has to point to an `OpString`. - self.debug_strings.extend(sources.file_contents.keys().copied().map(|s| &self.cx[s])); + for &s in sources.file_contents.keys() { + self.ids.debug_strings.entry(&self.cx[s]).or_insert_with(|| self.alloc_ids.one()); + } } } fn visit_attr(&mut self, attr: &Attr) { @@ -246,7 +347,10 @@ impl Visitor<'_> for NeedsIdsCollector<'_> { | Attr::SpvAnnotation { .. } | Attr::SpvBitflagsOperand(_) => {} Attr::DbgSrcLoc(OrdAssertEq(DbgSrcLoc { file_path, .. })) => { - self.debug_strings.insert(&self.cx[file_path]); + self.ids + .debug_strings + .entry(&self.cx[file_path]) + .or_insert_with(|| self.alloc_ids.one()); } } attr.inner_visit_with(self); @@ -262,7 +366,7 @@ impl Visitor<'_> for NeedsIdsCollector<'_> { | DataInstKind::Mem(MemOp::Load { offset: None } | MemOp::Store { offset: None }) | DataInstKind::FuncCall(_) | DataInstKind::ThunkBind(_) - | DataInstKind::SpvInst(_) => {} + | DataInstKind::SpvInst(..) => {} // FIXME(eddyb) this should be a proper `Result`-based error instead, // and/or `spv::lift` should mutate the module for legalization. @@ -277,36 +381,63 @@ impl Visitor<'_> for NeedsIdsCollector<'_> { } DataInstKind::SpvExtInst { ext_set, .. } => { - self.ext_inst_imports.insert(&self.cx[ext_set]); + self.ids + .ext_inst_imports + .entry(&self.cx[ext_set]) + .or_insert_with(|| self.alloc_ids.one()); } } func_at_node.inner_visit_with(self); } } -struct AllocatedIds<'a> { - ext_inst_imports: BTreeMap<&'a str, spv::Id>, - debug_strings: BTreeMap<&'a str, spv::Id>, +impl Lifter<'_, AI> { + // FIXME(eddyb) maybe use this for `DataInstDef` inputs as well, when `Const`s, + // not just `GlobalVarInit::SpvAggregate`? + fn reaggregate_const(&mut self, ty: Type, leaves: &[Const]) -> Const { + let ty_def = &self.cx[ty]; + assert_eq!(leaves.len(), ty_def.disaggregated_leaf_count()); - // FIXME(eddyb) use `EntityOrientedDenseMap` here. - globals: FxIndexMap, - // FIXME(eddyb) use `EntityOrientedDenseMap` here. - funcs: FxIndexMap>, + if let spv::ValueLowering::Direct = ty_def.spv_value_lowering() { + let &[ct] = leaves.try_into().unwrap(); + return ct; + } + + // HACK(eddyb) this is a bit inefficient but increases code reuse, in + // a case that'd otherwise require e.g. an `Iterator` w/ `nth` overload. + let mut used_leaves = 0..0; + let components = (0..) + .map_while(|i| ty.aggregate_component_type_and_leaf_range(self.cx, i)) + .map(|(component_type, component_leaf_range)| { + assert_eq!(used_leaves.end, component_leaf_range.start); + used_leaves.end = component_leaf_range.end; + self.reaggregate_const(component_type, &leaves[component_leaf_range]) + }) + .collect(); + assert_eq!(used_leaves, 0..leaves.len()); + + let wk = &spec::Spec::get().well_known; + let ct = self.cx.intern(ConstDef { + attrs: AttrSet::default(), + ty, + kind: ConstKind::SpvInst { + spv_inst_and_const_inputs: Rc::new((wk.OpConstantComposite.into(), components)), + }, + }); + // HACK(eddyb) visit constants as they're created, to ensure they're recorded. + self.visit_const_use(ct); + ct + } } -// FIXME(eddyb) should this use ID ranges instead of `SmallVec<[spv::Id; 4]>`? -struct FuncLifting<'a> { +// FIXME(eddyb) this is inconsistently named with `FuncIds`. +struct FuncBodyLifting<'a> { // HACK(eddyb) temporary workaround before it's clear how to map everything // to use the new `Var` abstraction effectively. - vars: Option<&'a crate::EntityDefs>, + vars: &'a crate::EntityDefs, - func_id: spv::Id, - param_ids: SmallVec<[spv::Id; 4]>, - - // FIXME(eddyb) use `EntityOrientedDenseMap` here. - region_inputs_source: FxHashMap, - // FIXME(eddyb) use `EntityOrientedDenseMap` here. - data_inst_output_ids: FxHashMap, + region_inputs_source: EntityOrientedDenseMap, + data_insts: EntityOrientedDenseMap, label_ids: FxHashMap, blocks: FxIndexMap>, @@ -322,6 +453,40 @@ enum RegionInputsSource { LoopHeaderPhis(Node), } +struct DataInstLifting { + result_id: Option, + + /// If the SPIR-V result type is "aggregate" (`OpTypeStruct`/`OpTypeArray`), + /// this describes how to extract its leaves, which is necessary as on the + /// SPIR-T side, [`Value`]s can only refer to individual leaves. + disaggregate_result: Option, + + /// `reaggregate_inputs[i]` describes how to recreate the "aggregate" value + /// demanded by [`spv::InstLowering`]'s `disaggregated_inputs[i]`. + reaggregate_inputs: SmallVec<[ReaggregateFromLeaves; 1]>, +} + +/// All the information necessary to decompose a SPIR-V "aggregate" value into +/// its leaves, with one `OpCompositeExtract` per leaf. +// +// FIXME(eddyb) it might be more efficient to only extract actually used leaves, +// or chain partial extracts following nesting structure - but this is simpler. +struct DisaggregateToLeaves { + op_composite_extract_result_ids: Range, +} + +/// All the information necessary to recreate a SPIR-V "aggregate" value, with +/// one `OpCompositeInsert` per leaf (starting with an `OpUndef` of that type). +// +// FIXME(eddyb) it might be more efficient to use other strategies, such as +// `OpCompositeConstruct`, special-casing constants, reusing whole results +// of other `DataInstDef`s with an aggregate result, etc. - but this is simpler +// for now, and it reuses the "one instruction per leaf" used for extractions. +struct ReaggregateFromLeaves { + op_undef: Const, + op_composite_insert_result_ids: Range, +} + /// Any of the possible points in structured or unstructured SPIR-T control-flow, /// that may require a separate SPIR-V basic block. #[derive(Copy, Clone, PartialEq, Eq, Hash)] @@ -371,6 +536,11 @@ struct Terminator<'a> { kind: TerminatorKind<'a>, + /// If this is a [`cfg::ControlInstKind::Return`] with `inputs.len() > 1`, + /// this ID is for the `OpCompositeConstruct` needed to produce the single + /// `OpTypeStruct` (`spv_func_ret_type`) value required by `OpReturnValue`. + reaggregated_return_value_id: Option, + // FIXME(eddyb) use `Cow` or something, but ideally the "owned" case always // has at most one input, so allocating a whole `Vec` for that seems unwise. inputs: SmallVec<[Value; 2]>, @@ -419,41 +589,6 @@ enum Merge { }, } -impl<'a> NeedsIdsCollector<'a> { - fn alloc_ids( - self, - mut alloc_id: impl FnMut() -> Result, - ) -> Result, E> { - let Self { - cx, - module, - ext_inst_imports, - debug_strings, - globals, - global_vars_seen: _, - funcs, - } = self; - - Ok(AllocatedIds { - ext_inst_imports: ext_inst_imports - .into_iter() - .map(|name| Ok((name, alloc_id()?))) - .collect::>()?, - debug_strings: debug_strings - .into_iter() - .map(|s| Ok((s, alloc_id()?))) - .collect::>()?, - globals: globals.into_iter().map(|g| Ok((g, alloc_id()?))).collect::>()?, - funcs: funcs - .into_iter() - .map(|func| { - Ok((func, FuncLifting::from_func_decl(cx, &module.funcs[func], &mut alloc_id)?)) - }) - .collect::>()?, - }) - } -} - /// Helper type for deep traversal of the CFG (as a graph of [`CfgPoint`]s), which /// tracks the necessary context for navigating a [`Region`]/[`Node`]. #[derive(Copy, Clone)] @@ -528,7 +663,7 @@ impl<'p> FuncAt<'_, CfgCursor<'p>> { | DataInstKind::Mem(_) | DataInstKind::QPtr(_) | DataInstKind::ThunkBind(_) - | DataInstKind::SpvInst(_) + | DataInstKind::SpvInst(..) | DataInstKind::SpvExtInst { .. } => { Some(CfgCursor { point: CfgPoint::NodeExit(node), parent: cursor.parent }) } @@ -562,81 +697,60 @@ impl<'p> FuncAt<'_, CfgCursor<'p>> { impl FuncAt<'_, Region> { /// Traverse every [`CfgPoint`] (deeply) contained in this [`Region`], /// in reverse post-order (RPO), with `f` receiving each [`CfgPoint`] - /// in turn (wrapped in [`CfgCursor`], for further traversal flexibility), - /// and being able to stop iteration by returning `Err`. + /// in turn (wrapped in [`CfgCursor`], for further traversal flexibility). /// /// RPO iteration over a CFG provides certain guarantees, most importantly /// that dominators are visited before the entire subgraph they dominate. - fn rev_post_order_try_for_each( - self, - mut f: impl FnMut(CfgCursor<'_>) -> Result<(), E>, - ) -> Result<(), E> { - self.rev_post_order_try_for_each_inner(&mut f, None) + fn rev_post_order_for_each(self, mut f: impl FnMut(CfgCursor<'_>)) { + self.rev_post_order_for_each_inner(&mut f, None); } - fn rev_post_order_try_for_each_inner( + fn rev_post_order_for_each_inner( self, - f: &mut impl FnMut(CfgCursor<'_>) -> Result<(), E>, + f: &mut impl FnMut(CfgCursor<'_>), parent: Option<&CfgCursor<'_, ControlParent>>, - ) -> Result<(), E> { + ) { let region = self.position; - f(CfgCursor { point: CfgPoint::RegionEntry(region), parent })?; + f(CfgCursor { point: CfgPoint::RegionEntry(region), parent }); for func_at_node in self.at_children() { - func_at_node.rev_post_order_try_for_each_inner( + func_at_node.rev_post_order_for_each_inner( f, &CfgCursor { point: ControlParent::Region(region), parent }, - )?; + ); } - f(CfgCursor { point: CfgPoint::RegionExit(region), parent }) + f(CfgCursor { point: CfgPoint::RegionExit(region), parent }); } } impl FuncAt<'_, Node> { - fn rev_post_order_try_for_each_inner( + fn rev_post_order_for_each_inner( self, - f: &mut impl FnMut(CfgCursor<'_>) -> Result<(), E>, + f: &mut impl FnMut(CfgCursor<'_>), parent: &CfgCursor<'_, ControlParent>, - ) -> Result<(), E> { + ) { let node = self.position; let parent = Some(parent); - f(CfgCursor { point: CfgPoint::NodeEntry(node), parent })?; + f(CfgCursor { point: CfgPoint::NodeEntry(node), parent }); for ®ion in &self.def().child_regions { - self.at(region).rev_post_order_try_for_each_inner( + self.at(region).rev_post_order_for_each_inner( f, Some(&CfgCursor { point: ControlParent::Node(node), parent }), - )?; + ); } - f(CfgCursor { point: CfgPoint::NodeExit(node), parent }) + f(CfgCursor { point: CfgPoint::NodeExit(node), parent }); } } -impl<'a> FuncLifting<'a> { - fn from_func_decl( - cx: &Context, - func_decl: &'a FuncDecl, - mut alloc_id: impl FnMut() -> Result, - ) -> Result { - let func_id = alloc_id()?; - let param_ids = func_decl.params.iter().map(|_| alloc_id()).collect::>()?; - - let func_def_body = match &func_decl.def { - DeclDef::Imported(_) => { - return Ok(Self { - vars: None, - - func_id, - param_ids, - region_inputs_source: Default::default(), - data_inst_output_ids: Default::default(), - label_ids: Default::default(), - blocks: Default::default(), - }); - } - DeclDef::Present(def) => def, - }; +impl<'a> FuncBodyLifting<'a> { + fn from_func_def_body( + lifter: &mut Lifter<'_, impl AllocIds>, + func_def_body: &'a FuncDefBody, + ) -> Self { + let cx = lifter.cx; - let mut region_inputs_source = FxHashMap::default(); + let mut region_inputs_source = EntityOrientedDenseMap::new(); region_inputs_source.insert(func_def_body.body, RegionInputsSource::FuncParams); + let mut data_insts = EntityOrientedDenseMap::new(); // Create a SPIR-V block for every CFG point needing one. let mut blocks = FxIndexMap::default(); @@ -645,7 +759,7 @@ impl<'a> FuncLifting<'a> { let phis = match point { CfgPoint::RegionEntry(region) => { - if region_inputs_source.contains_key(®ion) { + if region_inputs_source.get(region).is_some() { // Region inputs handled by the parent of the region. SmallVec::new() } else { @@ -656,16 +770,16 @@ impl<'a> FuncLifting<'a> { .iter() .map(|&input_var| { let &VarDecl { attrs, ty, .. } = func_def_body.at(input_var).decl(); - Ok(Phi { + Phi { attrs, ty, - result_id: alloc_id()?, + result_id: lifter.alloc_ids.one(), cases: FxIndexMap::default(), default_value: None, - }) + } }) - .collect::>()? + .collect() } } CfgPoint::RegionExit(_) | CfgPoint::UnstructuredEdge { .. } => SmallVec::new(), @@ -692,16 +806,16 @@ impl<'a> FuncLifting<'a> { .map(|(&input_var, &initial_value)| { let &VarDecl { attrs, ty, .. } = func_def_body.at(input_var).decl(); - Ok(Phi { + Phi { attrs, ty, - result_id: alloc_id()?, + result_id: lifter.alloc_ids.one(), cases: FxIndexMap::default(), default_value: Some(initial_value), - }) + } }) - .collect::>()? + .collect() } _ => SmallVec::new(), } @@ -715,16 +829,16 @@ impl<'a> FuncLifting<'a> { .map(|&output_var| { let &VarDecl { attrs, ty, .. } = func_def_body.at(output_var).decl(); - Ok(Phi { + Phi { attrs, ty, - result_id: alloc_id()?, + result_id: lifter.alloc_ids.one(), cases: FxIndexMap::default(), default_value: None, - }) + } }) - .collect::>()? + .collect() } else { SmallVec::new() } @@ -732,21 +846,29 @@ impl<'a> FuncLifting<'a> { }; let insts = match point { - CfgPoint::NodeEntry(node) => match func_def_body.at(node).def().kind { - NodeKind::Select(_) | NodeKind::Loop { .. } | NodeKind::ExitInvocation(_) => { - SmallVec::new() - } + CfgPoint::NodeEntry(node) => { + let func_at_node = func_def_body.at(node); + match func_at_node.def().kind { + NodeKind::Select(_) + | NodeKind::Loop { .. } + | NodeKind::ExitInvocation(_) => SmallVec::new(), - DataInstKind::Scalar(_) - | DataInstKind::Vector(_) - | DataInstKind::FuncCall(_) - | DataInstKind::Mem(_) - | DataInstKind::QPtr(_) - | DataInstKind::SpvInst(_) - | DataInstKind::SpvExtInst { .. } => [node].into_iter().collect(), + DataInstKind::Scalar(_) + | DataInstKind::Vector(_) + | DataInstKind::FuncCall(_) + | DataInstKind::Mem(_) + | DataInstKind::QPtr(_) + | DataInstKind::SpvInst(..) + | DataInstKind::SpvExtInst { .. } => { + data_insts + .insert(node, DataInstLifting::from_inst(lifter, func_at_node)); - DataInstKind::ThunkBind(_) => unreachable!(), - }, + [node].into_iter().collect() + } + + DataInstKind::ThunkBind(_) => unreachable!(), + } + } _ => SmallVec::new(), }; @@ -798,6 +920,7 @@ impl<'a> FuncLifting<'a> { Terminator { attrs: thunk_node_def.map(|def| def.attrs).unwrap_or_default(), kind, + reaggregated_return_value_id: None, inputs, // FIXME(eddyb) try limiting this to repeated target `Region`s // which *also* pass different value inputs. @@ -813,10 +936,16 @@ impl<'a> FuncLifting<'a> { } else { // Structured return out of the function body. assert!(region == func_def_body.body); + let inputs = func_def_body.at_body().def().outputs.clone(); Terminator { attrs: AttrSet::default(), kind: TerminatorKind::Return, - inputs: func_def_body.at_body().def().outputs.clone(), + reaggregated_return_value_id: if inputs.len() > 1 { + Some(lifter.alloc_ids.one()) + } else { + None + }, + inputs, targets: [].into_iter().collect(), target_phi_values: FxIndexMap::default(), merge: None, @@ -840,6 +969,7 @@ impl<'a> FuncLifting<'a> { Ok(target) => Terminator { attrs, kind: TerminatorKind::Branch, + reaggregated_return_value_id: None, inputs: [].into_iter().collect(), targets: [CfgPoint::RegionEntry(target)].into_iter().collect(), target_phi_values: [(CfgPoint::RegionEntry(target), target_inputs)] @@ -848,9 +978,16 @@ impl<'a> FuncLifting<'a> { merge: None, }, Err(terminator_kind) => { + let reaggregated_return_value_id = match terminator_kind { + TerminatorKind::Return if target_inputs.len() > 1 => { + Some(lifter.alloc_ids.one()) + } + _ => None, + }; Terminator { attrs, kind: terminator_kind, + reaggregated_return_value_id, // FIXME(eddyb) borrow these whenever possible. inputs: target_inputs.iter().copied().collect(), targets: [].into_iter().collect(), @@ -868,6 +1005,7 @@ impl<'a> FuncLifting<'a> { NodeKind::Select(kind) => Terminator { attrs: AttrSet::default(), kind: TerminatorKind::SelectBranch(kind), + reaggregated_return_value_id: None, inputs: [node_def.inputs[0]].into_iter().collect(), targets: node_def .child_regions @@ -883,6 +1021,7 @@ impl<'a> FuncLifting<'a> { Terminator { attrs: AttrSet::default(), kind: TerminatorKind::Branch, + reaggregated_return_value_id: None, inputs: [].into_iter().collect(), targets: [CfgPoint::RegionEntry(body)].into_iter().collect(), target_phi_values: FxIndexMap::default(), @@ -903,6 +1042,7 @@ impl<'a> FuncLifting<'a> { NodeKind::ExitInvocation(kind) => Terminator { attrs: AttrSet::default(), kind: TerminatorKind::ExitInvocation(kind), + reaggregated_return_value_id: None, inputs: node_def.inputs.clone(), targets: [].into_iter().collect(), target_phi_values: FxIndexMap::default(), @@ -915,7 +1055,7 @@ impl<'a> FuncLifting<'a> { | DataInstKind::Mem(_) | DataInstKind::QPtr(_) | DataInstKind::ThunkBind(_) - | DataInstKind::SpvInst(_) + | DataInstKind::SpvInst(..) | DataInstKind::SpvExtInst { .. } => unreachable!(), } } @@ -935,6 +1075,7 @@ impl<'a> FuncLifting<'a> { NodeKind::Select { .. } => Terminator { attrs: AttrSet::default(), kind: TerminatorKind::Branch, + reaggregated_return_value_id: None, inputs: [].into_iter().collect(), targets: [parent_exit].into_iter().collect(), target_phi_values: region_outputs @@ -961,6 +1102,7 @@ impl<'a> FuncLifting<'a> { Terminator { attrs: AttrSet::default(), kind: TerminatorKind::Branch, + reaggregated_return_value_id: None, inputs: [].into_iter().collect(), targets: [backedge].into_iter().collect(), target_phi_values, @@ -978,6 +1120,7 @@ impl<'a> FuncLifting<'a> { Terminator { attrs: AttrSet::default(), kind: TerminatorKind::SelectBranch(&SelectionKind::BoolCond), + reaggregated_return_value_id: None, inputs: [repeat_condition].into_iter().collect(), targets: [backedge, parent_exit].into_iter().collect(), target_phi_values, @@ -993,7 +1136,7 @@ impl<'a> FuncLifting<'a> { | DataInstKind::Mem(_) | DataInstKind::QPtr(_) | DataInstKind::ThunkBind(_) - | DataInstKind::SpvInst(_) + | DataInstKind::SpvInst(..) | DataInstKind::SpvExtInst { .. } => unreachable!(), } } @@ -1010,6 +1153,7 @@ impl<'a> FuncLifting<'a> { (_, Some(succ_cursor)) => Terminator { attrs: AttrSet::default(), kind: TerminatorKind::Branch, + reaggregated_return_value_id: None, inputs: [].into_iter().collect(), targets: [succ_cursor.point].into_iter().collect(), target_phi_values: FxIndexMap::default(), @@ -1023,14 +1167,14 @@ impl<'a> FuncLifting<'a> { }; blocks.insert(point, BlockLifting { phis, insts, terminator }); - - Ok(()) }; match &func_def_body.unstructured_cfg { - None => func_def_body.at_body().rev_post_order_try_for_each(visit_cfg_point)?, + None => { + func_def_body.at_body().rev_post_order_for_each(visit_cfg_point); + } Some(cfg) => { for region in cfg.rev_post_order(func_def_body) { - func_def_body.at(region).rev_post_order_try_for_each(&mut visit_cfg_point)?; + func_def_body.at(region).rev_post_order_for_each(&mut visit_cfg_point); // FIXME(eddyb) try limiting this to repeated target `Region`s // which *also* pass different value inputs. @@ -1040,7 +1184,7 @@ impl<'a> FuncLifting<'a> { visit_cfg_point(CfgCursor { point: CfgPoint::UnstructuredEdge(edge), parent: None, - })?; + }); } } } @@ -1115,13 +1259,22 @@ impl<'a> FuncLifting<'a> { phis, insts, terminator: - Terminator { attrs, kind, inputs, targets, target_phi_values, merge }, + Terminator { + attrs, + kind, + reaggregated_return_value_id, + inputs, + targets, + target_phi_values, + merge, + }, } = &blocks[&target]; (phis.is_empty() && insts.is_empty() && *attrs == AttrSet::default() && matches!(kind, TerminatorKind::Branch) + && reaggregated_return_value_id.is_none() && inputs.is_empty() && targets.len() == 1 && target_phi_values.is_empty() @@ -1140,11 +1293,19 @@ impl<'a> FuncLifting<'a> { let block = &blocks[block_idx]; let is_trivial_branch = { - let Terminator { attrs, kind, inputs, targets, target_phi_values, merge } = - &block.terminator; + let Terminator { + attrs, + kind, + reaggregated_return_value_id, + inputs, + targets, + target_phi_values, + merge, + } = &block.terminator; (*attrs == AttrSet::default() && matches!(kind, TerminatorKind::Branch) + && reaggregated_return_value_id.is_none() && inputs.is_empty() && targets.len() == 1 && target_phi_values.is_empty() @@ -1171,6 +1332,7 @@ impl<'a> FuncLifting<'a> { Terminator { attrs: Default::default(), kind: TerminatorKind::Unreachable, + reaggregated_return_value_id: None, inputs: Default::default(), targets: Default::default(), target_phi_values: Default::default(), @@ -1225,38 +1387,126 @@ impl<'a> FuncLifting<'a> { } } - let all_insts_with_output = blocks - .values() - .flat_map(|block| block.insts.iter().copied()) - .filter(|&inst| !func_def_body.at(inst).def().outputs.is_empty()); + Self { + vars: &func_def_body.vars, - Ok(Self { - vars: Some(&func_def_body.vars), - - func_id, - param_ids, region_inputs_source, - data_inst_output_ids: all_insts_with_output - .map(|inst| Ok((inst, alloc_id()?))) - .collect::>()?, - label_ids: blocks - .keys() - .map(|&point| Ok((point, alloc_id()?))) - .collect::>()?, + data_insts, + + label_ids: blocks.keys().map(|&point| (point, lifter.alloc_ids.one())).collect(), blocks, - }) + } + } +} + +impl DataInstLifting { + fn from_inst( + lifter: &mut Lifter<'_, impl AllocIds>, + func_at_inst: FuncAt<'_, DataInst>, + ) -> Self { + let wk = &spec::Spec::get().well_known; + let cx = lifter.cx; + + let inst_def = func_at_inst.def(); + let output_types = inst_def.outputs.iter().map(|&o| func_at_inst.at(o).decl().ty); + + let mut new_spv_inst_lowering = spv::InstLowering::default(); + let spv_inst_lowering = match &inst_def.kind { + NodeKind::Select(_) | NodeKind::Loop { .. } | NodeKind::ExitInvocation(_) => { + unreachable!() + } + + DataInstKind::Scalar(_) + | DataInstKind::Vector(_) + | DataInstKind::Mem(MemOp::Load { offset: None } | MemOp::Store { offset: None }) => { + // FIXME(eddyb) deduplicate creating this `OpTypeStruct`. + if output_types.len() > 1 { + let tuple_ty = + cx.intern(spv::Inst::from(wk.OpTypeStruct).into_canonical_type_with( + cx, + output_types.clone().map(TypeOrConst::Type).collect(), + )); + lifter.visit_type_use(tuple_ty); + new_spv_inst_lowering.disaggregated_output = Some(tuple_ty); + } + &new_spv_inst_lowering + } + DataInstKind::FuncCall(callee) => { + if output_types.len() > 1 { + new_spv_inst_lowering.disaggregated_output = + Some(lifter.ids.funcs[callee].spv_func_ret_type); + } + &new_spv_inst_lowering + } + DataInstKind::SpvInst(_, lowering) | DataInstKind::SpvExtInst { lowering, .. } => { + lowering + } + + DataInstKind::Mem(_) | DataInstKind::QPtr(_) | DataInstKind::ThunkBind(_) => { + // Disallowed while visiting. + unreachable!() + } + }; + + let reaggregate_inputs = spv_inst_lowering + .disaggregated_inputs + .iter() + .map(|&(_, ty)| { + let op_undef = + cx.intern(ConstDef { attrs: AttrSet::default(), ty, kind: ConstKind::Undef }); + lifter.visit_const_use(op_undef); + let op_composite_insert_result_ids = + (lifter.alloc_ids)(cx[ty].disaggregated_leaf_count()); + ReaggregateFromLeaves { op_undef, op_composite_insert_result_ids } + }) + .collect(); + + // `OpFunctionCall` always has a result (but may be `OpTypeVoid`-typed). + let has_result = matches!(inst_def.kind, DataInstKind::FuncCall(_)) + || spv_inst_lowering.disaggregated_output.is_some() + || output_types.len() > 0; + let result_id = if has_result { Some(lifter.alloc_ids.one()) } else { None }; + + let disaggregate_result = + spv_inst_lowering.disaggregated_output.map(|ty| DisaggregateToLeaves { + op_composite_extract_result_ids: (lifter.alloc_ids)( + cx[ty].disaggregated_leaf_count(), + ), + }); + + DataInstLifting { result_id, disaggregate_result, reaggregate_inputs } + } + + fn id_for_output(&self, output_idx: u32) -> spv::Id { + let output_idx = usize::try_from(output_idx).unwrap(); + if let Some(disaggregate_result) = &self.disaggregate_result { + let result_id = disaggregate_result + .op_composite_extract_result_ids + .start + .checked_add(output_idx.try_into().unwrap()) + .unwrap(); + assert!(disaggregate_result.op_composite_extract_result_ids.contains(&result_id)); + result_id + } else { + assert_eq!(output_idx, 0); + self.result_id.unwrap() + } } } -/// "Maybe-decorated "lazy" SPIR-V instruction, allowing separately emitting -/// decorations from attributes, and the instruction itself, without eagerly -/// allocating all the instructions. +/// Maybe-decorated "lazy" SPIR-V instruction, allowing separately emitting +/// *both* decorations (from certain [`Attr`]s), *and* the instruction itself, +/// without eagerly allocating all the instructions. +/// +/// Note that SPIR-T disaggregating SPIR-V `OpTypeStruct`/`OpTypeArray`s values +/// may require additional [`spv::Inst`]s for each `LazyInst`, either for +/// reaggregating inputs, or taking apart aggregate outputs. #[derive(Copy, Clone)] enum LazyInst<'a, 'b> { Global(Global), OpFunction { - func_id: spv::Id, func_decl: &'a FuncDecl, + func_ids: &'b FuncIds<'a>, }, OpFunctionParameter { param_id: spv::Id, @@ -1266,27 +1516,37 @@ enum LazyInst<'a, 'b> { label_id: spv::Id, }, OpPhi { - parent_func: &'b FuncLifting<'a>, + parent_func_ids: &'b FuncIds<'a>, phi: &'b Phi, }, DataInst { - parent_func: &'b FuncLifting<'a>, - result_id: Option, + parent_func_ids: &'b FuncIds<'a>, data_inst_def: &'a DataInstDef, + lifting: &'b DataInstLifting, }, + // FIXME(eddyb) should merge instructions be generated by `Terminator`? Merge(Merge), Terminator { - parent_func: &'b FuncLifting<'a>, + parent_func_ids: &'b FuncIds<'a>, terminator: &'b Terminator<'a>, }, OpFunctionEnd, } +/// [`Attr::DbgSrcLoc`], extracted from [`AttrSet`], and used for emitting +/// `OpLine`/`OpNoLine` SPIR-V instructions. +#[derive(Copy, Clone, PartialEq, Eq)] +struct SpvDebugLine { + file_path_id: spv::Id, + line: u32, + col: u32, +} + impl LazyInst<'_, '_> { fn result_id_attrs_and_import( self, module: &Module, - ids: &AllocatedIds<'_>, + ids: &ModuleIds<'_>, ) -> (Option, AttrSet, Option) { let cx = module.cx_ref(); @@ -1320,72 +1580,97 @@ impl LazyInst<'_, '_> { }; (Some(ids.globals[&global]), attrs, import) } - Self::OpFunction { func_id, func_decl } => { + Self::OpFunction { func_decl, func_ids } => { let import = match func_decl.def { DeclDef::Imported(import) => Some(import), DeclDef::Present(_) => None, }; - (Some(func_id), func_decl.attrs, import) + (Some(func_ids.func_id), func_decl.attrs, import) } Self::OpFunctionParameter { param_id, param } => (Some(param_id), param.attrs, None), Self::OpLabel { label_id } => (Some(label_id), AttrSet::default(), None), - Self::OpPhi { parent_func: _, phi } => (Some(phi.result_id), phi.attrs, None), - Self::DataInst { parent_func: _, result_id, data_inst_def } => { - (result_id, data_inst_def.attrs, None) + Self::OpPhi { parent_func_ids: _, phi } => (Some(phi.result_id), phi.attrs, None), + Self::DataInst { parent_func_ids: _, data_inst_def, lifting } => { + (lifting.result_id, data_inst_def.attrs, None) } Self::Merge(_) => (None, AttrSet::default(), None), - Self::Terminator { parent_func: _, terminator } => (None, terminator.attrs, None), + Self::Terminator { parent_func_ids: _, terminator } => (None, terminator.attrs, None), Self::OpFunctionEnd => (None, AttrSet::default(), None), } } - fn to_inst_and_attrs( + /// Expand this `LazyInst` to one or more (see disaggregation/reaggregation + /// note in [`LazyInst`]'s doc comment for when it can be more than one) + /// [`spv::Inst`]s (with their respective [`SpvDebugLine`]s, if applicable), + /// with `each_spv_inst_with_debug_line` being called for each one. + fn for_each_spv_inst_with_debug_line( self, module: &Module, - ids: &AllocatedIds<'_>, - ) -> (spv::InstWithIds, AttrSet) { + ids: &ModuleIds<'_>, + mut each_spv_inst_with_debug_line: impl FnMut(spv::InstWithIds, Option), + ) { let wk = &spec::Spec::get().well_known; let cx = module.cx_ref(); - let value_to_id = |parent_func: &FuncLifting<'_>, v| match v { + let value_to_id = |parent_func_ids: &FuncIds<'_>, v| match v { Value::Const(ct) => match cx[ct].kind { ConstKind::SpvStringLiteralForExtInst(s) => ids.debug_strings[&cx[s]], _ => ids.globals[&Global::Const(ct)], }, - Value::Var(v) => match parent_func.vars.unwrap()[v].kind() { - VarKind::RegionInput { region, input_idx } => { - let input_idx = usize::try_from(input_idx).unwrap(); - match parent_func.region_inputs_source.get(®ion) { - Some(RegionInputsSource::FuncParams) => parent_func.param_ids[input_idx], - Some(&RegionInputsSource::LoopHeaderPhis(loop_node)) => { - parent_func.blocks[&CfgPoint::NodeEntry(loop_node)].phis[input_idx] - .result_id - } - None => { - parent_func.blocks[&CfgPoint::RegionEntry(region)].phis[input_idx] - .result_id + Value::Var(v) => { + let parent_func_body_lifting = parent_func_ids.body.as_ref().unwrap(); + match parent_func_body_lifting.vars[v].kind() { + VarKind::RegionInput { region, input_idx } => { + let input_idx = usize::try_from(input_idx).unwrap(); + match parent_func_body_lifting.region_inputs_source.get(region) { + Some(RegionInputsSource::FuncParams) => { + let param_id = parent_func_ids + .param_ids + .start + .checked_add(input_idx.try_into().unwrap()) + .unwrap(); + assert!(parent_func_ids.param_ids.contains(¶m_id)); + param_id + } + Some(&RegionInputsSource::LoopHeaderPhis(loop_node)) => { + parent_func_body_lifting.blocks[&CfgPoint::NodeEntry(loop_node)] + .phis[input_idx] + .result_id + } + None => { + parent_func_body_lifting.blocks[&CfgPoint::RegionEntry(region)].phis + [input_idx] + .result_id + } } } - } - VarKind::NodeOutput { node, output_idx } => { - if let Some(&data_inst_output_id) = parent_func.data_inst_output_ids.get(&node) - { - // HACK(eddyb) multi-output instructions don't exist pre-disaggregate. - assert_eq!(output_idx, 0); - data_inst_output_id - } else { - parent_func.blocks[&CfgPoint::NodeExit(node)].phis - [usize::try_from(output_idx).unwrap()] - .result_id + VarKind::NodeOutput { node, output_idx } => { + if let Some(inst_lifting) = parent_func_body_lifting.data_insts.get(node) { + inst_lifting.id_for_output(output_idx) + } else { + parent_func_body_lifting.blocks[&CfgPoint::NodeExit(node)].phis + [usize::try_from(output_idx).unwrap()] + .result_id + } } } - }, + } }; let (result_id, attrs, _) = self.result_id_attrs_and_import(module, ids); - let inst = match self { - Self::Global(global) => match global { + + let spv_debug_line = attrs.dbg_src_loc(cx).map(|dbg_src_loc| SpvDebugLine { + file_path_id: ids.debug_strings[&cx[dbg_src_loc.file_path]], + line: dbg_src_loc.start_line_col.0, + col: dbg_src_loc.start_line_col.1, + }); + + // HACK(eddyb) there is no need to allow `spv_debug_line` to vary per-inst. + let mut each_inst = |inst| each_spv_inst_with_debug_line(inst, spv_debug_line); + + match self { + Self::Global(global) => each_inst(match global { Global::Type(ty) => { let ty_def = &cx[ty]; match spv::Inst::from_canonical_type(cx, &ty_def.kind) @@ -1397,7 +1682,7 @@ impl LazyInst<'_, '_> { } Ok((spv_inst, type_and_const_inputs)) - | Err(TypeKind::SpvInst { spv_inst, type_and_const_inputs }) => { + | Err(TypeKind::SpvInst { spv_inst, type_and_const_inputs, .. }) => { spv::InstWithIds { without_ids: spv_inst.clone(), result_type_id: None, @@ -1475,9 +1760,19 @@ impl LazyInst<'_, '_> { spv::Imm::Short(wk.StorageClass, sc) } }; - let initializer = match gv_decl.def { + let initializer = match &gv_decl.def { DeclDef::Imported(_) => None, DeclDef::Present(GlobalVarDefBody { initializer }) => initializer + .as_ref() + .map(|initializer| match initializer { + // Disallowed while visiting. + GlobalVarInit::Data(_) => unreachable!(), + + &GlobalVarInit::Direct(ct) => ct, + GlobalVarInit::SpvAggregate { .. } => { + ids.reaggregated_global_var_initializers[&global_var] + } + }) .map(|initializer| ids.globals[&Global::Const(initializer)]), }; spv::InstWithIds { @@ -1502,8 +1797,8 @@ impl LazyInst<'_, '_> { Err(ConstKind::SpvStringLiteralForExtInst(_)) => unreachable!(), } } - }, - Self::OpFunction { func_id: _, func_decl } => { + }), + Self::OpFunction { func_decl: _, func_ids } => { // FIXME(eddyb) make this less of a search and more of a // lookup by splitting attrs into key and value parts. let func_ctrl = cx[attrs] @@ -1519,30 +1814,29 @@ impl LazyInst<'_, '_> { }) .unwrap_or(0); - spv::InstWithIds { + each_inst(spv::InstWithIds { without_ids: spv::Inst { opcode: wk.OpFunction, imms: iter::once(spv::Imm::Short(wk.FunctionControl, func_ctrl)).collect(), }, - result_type_id: Some(ids.globals[&Global::Type(func_decl.ret_type)]), + result_type_id: Some(ids.globals[&Global::Type(func_ids.spv_func_ret_type)]), result_id, - ids: iter::once(ids.globals[&Global::Type(func_decl.spv_func_type(cx))]) - .collect(), - } + ids: iter::once(ids.globals[&Global::Type(func_ids.spv_func_type)]).collect(), + }); } - Self::OpFunctionParameter { param_id: _, param } => spv::InstWithIds { + Self::OpFunctionParameter { param_id: _, param } => each_inst(spv::InstWithIds { without_ids: wk.OpFunctionParameter.into(), result_type_id: Some(ids.globals[&Global::Type(param.ty)]), result_id, ids: [].into_iter().collect(), - }, - Self::OpLabel { label_id: _ } => spv::InstWithIds { + }), + Self::OpLabel { label_id: _ } => each_inst(spv::InstWithIds { without_ids: wk.OpLabel.into(), result_type_id: None, result_id, ids: [].into_iter().collect(), - }, - Self::OpPhi { parent_func, phi } => spv::InstWithIds { + }), + Self::OpPhi { parent_func_ids, phi } => each_inst(spv::InstWithIds { without_ids: wk.OpPhi.into(), result_type_id: Some(ids.globals[&Global::Type(phi.ty)]), result_id: Some(phi.result_id), @@ -1550,61 +1844,164 @@ impl LazyInst<'_, '_> { .cases .iter() .flat_map(|(&source_point, &v)| { - [value_to_id(parent_func, v), parent_func.label_ids[&source_point]] + [ + value_to_id(parent_func_ids, v), + parent_func_ids.body.as_ref().unwrap().label_ids[&source_point], + ] }) .collect(), - }, - Self::DataInst { parent_func, result_id: _, data_inst_def } => { + }), + Self::DataInst { parent_func_ids, data_inst_def, lifting } => { + let parent_func_body_lifting = parent_func_ids.body.as_ref().unwrap(); + let kind = &data_inst_def.kind; - let (inst, extra_initial_id_operand) = - match spv::Inst::from_canonical_node_kind(kind).ok_or(kind) { - Ok(spv_inst) => (spv_inst, None), + let output_types = + data_inst_def.outputs.iter().map(|&o| parent_func_body_lifting.vars[o].ty); - Err( - NodeKind::Select(_) - | NodeKind::Loop { .. } - | NodeKind::ExitInvocation(_), - ) => unreachable!(), + let mut id_operands = SmallVec::new(); - Err(DataInstKind::Scalar(_) | DataInstKind::Vector(_)) => { - unreachable!("should've been handled as canonical") + let mut new_spv_inst_lowering = spv::InstLowering::default(); + let mut override_result_type = None; + let (inst, spv_inst_lowering) = match spv::Inst::from_canonical_node_kind(kind) + .ok_or(kind) + { + Ok(spv_inst) => { + // FIXME(eddyb) deduplicate creating this `OpTypeStruct`. + if output_types.len() > 1 { + new_spv_inst_lowering.disaggregated_output = Some(cx.intern( + spv::Inst::from(wk.OpTypeStruct).into_canonical_type_with( + cx, + output_types.clone().map(TypeOrConst::Type).collect(), + ), + )); } + (spv_inst, &new_spv_inst_lowering) + } - Err( - DataInstKind::Mem(_) - | DataInstKind::QPtr(_) - | DataInstKind::ThunkBind(_), - ) => { - // Disallowed while visiting. - unreachable!() - } + Err( + NodeKind::Select(_) | NodeKind::Loop { .. } | NodeKind::ExitInvocation(_), + ) => unreachable!(), - Err(&DataInstKind::FuncCall(callee)) => { - (wk.OpFunctionCall.into(), Some(ids.funcs[&callee].func_id)) + Err(DataInstKind::Scalar(_) | DataInstKind::Vector(_)) => { + unreachable!("should've been handled as canonical") + } + + Err( + DataInstKind::Mem(_) | DataInstKind::QPtr(_) | DataInstKind::ThunkBind(_), + ) => { + // Disallowed while visiting. + unreachable!() + } + + // `OpFunctionCall` always has a result (but may be `OpTypeVoid`-typed). + Err(DataInstKind::FuncCall(callee)) => { + let callee_ids = &ids.funcs[callee]; + override_result_type = Some(callee_ids.spv_func_ret_type); + if output_types.len() > 1 { + new_spv_inst_lowering.disaggregated_output = override_result_type; } - Err(DataInstKind::SpvInst(inst)) => (inst.clone(), None), - Err(&DataInstKind::SpvExtInst { ext_set, inst }) => ( + id_operands.push(callee_ids.func_id); + (wk.OpFunctionCall.into(), &new_spv_inst_lowering) + } + Err(DataInstKind::SpvInst(inst, lowering)) => (inst.clone(), lowering), + Err(DataInstKind::SpvExtInst { ext_set, inst, lowering }) => { + id_operands.push(ids.ext_inst_imports[&cx[*ext_set]]); + ( spv::Inst { opcode: wk.OpExtInst, - imms: iter::once(spv::Imm::Short(wk.LiteralExtInstInteger, inst)) + imms: [spv::Imm::Short(wk.LiteralExtInstInteger, *inst)] + .into_iter() .collect(), }, - Some(ids.ext_inst_imports[&cx[ext_set]]), - ), + lowering, + ) + } + }; + + let int_imm = |i| spv::Imm::Short(wk.LiteralInteger, i); + + // Emit any `OpCompositeInsert`s needed by the inputs, first, + // while gathering the `id_operands` for the instruction itself. + let mut reaggregate_inputs = lifting.reaggregate_inputs.iter(); + for id_operand in spv_inst_lowering.reaggreate_inputs(&data_inst_def.inputs) { + let value_to_id = |v| value_to_id(parent_func_ids, v); + let id_operand = match id_operand { + spv::ReaggregatedIdOperand::Direct(v) => value_to_id(v), + spv::ReaggregatedIdOperand::Aggregate { ty, leaves } => { + let result_type_id = Some(ids.globals[&Global::Type(ty)]); + + let ReaggregateFromLeaves { op_undef, op_composite_insert_result_ids } = + reaggregate_inputs.next().unwrap(); + let mut aggregate_id = ids.globals[&Global::Const(*op_undef)]; + let leaf_paths = ty + .disaggregated_leaf_types(cx) + .map_with_parent_component_path(|_, leaf_path| { + leaf_path.iter().map(|&(_, i)| i).map(int_imm).collect() + }); + for ((leaf_path_imms, op_composite_insert_result_id), &leaf_value) in + leaf_paths + .zip_eq(op_composite_insert_result_ids.iter()) + .zip_eq(leaves) + { + each_inst(spv::InstWithIds { + without_ids: spv::Inst { + opcode: wk.OpCompositeInsert, + imms: leaf_path_imms, + }, + result_type_id, + result_id: Some(op_composite_insert_result_id), + ids: [value_to_id(leaf_value), aggregate_id] + .into_iter() + .collect(), + }); + aggregate_id = op_composite_insert_result_id; + } + aggregate_id + } }; - spv::InstWithIds { + id_operands.push(id_operand); + } + assert!(reaggregate_inputs.next().is_none()); + + let result_type = override_result_type + .or(spv_inst_lowering.disaggregated_output) + .or_else(|| output_types.at_most_one().ok().unwrap()); + each_inst(spv::InstWithIds { without_ids: inst, - // HACK(eddyb) multi-output instructions don't exist pre-disaggregate. - result_type_id: (data_inst_def.outputs.iter().at_most_one().ok().unwrap()) - .map(|&o| ids.globals[&Global::Type(parent_func.vars.unwrap()[o].ty)]), + result_type_id: result_type.map(|ty| ids.globals[&Global::Type(ty)]), result_id, - ids: extra_initial_id_operand - .into_iter() - .chain(data_inst_def.inputs.iter().map(|&v| value_to_id(parent_func, v))) - .collect(), + ids: id_operands, + }); + + // Emit any `OpCompositeExtract`s needed for the result, last. + if let Some(DisaggregateToLeaves { op_composite_extract_result_ids }) = + &lifting.disaggregate_result + { + let aggregate_id = result_id.unwrap(); + let leaf_types_and_paths = spv_inst_lowering + .disaggregated_output + .unwrap() + .disaggregated_leaf_types(cx) + .map_with_parent_component_path(|leaf_type, leaf_path| { + (leaf_type, leaf_path.iter().map(|&(_, i)| i).map(int_imm).collect()) + }); + for ((leaf_type, leaf_path_imms), op_composite_extract_result_id) in + leaf_types_and_paths.zip_eq(op_composite_extract_result_ids.iter()) + { + each_inst(spv::InstWithIds { + without_ids: spv::Inst { + opcode: wk.OpCompositeExtract, + imms: leaf_path_imms, + }, + result_type_id: Some(ids.globals[&Global::Type(leaf_type)]), + result_id: Some(op_composite_extract_result_id), + ids: [aggregate_id].into_iter().collect(), + }); + } } } - Self::Merge(Merge::Selection(merge_label_id)) => spv::InstWithIds { + // FIXME(eddyb) should merge instructions be generated by `Terminator`? + Self::Merge(Merge::Selection(merge_label_id)) => each_inst(spv::InstWithIds { without_ids: spv::Inst { opcode: wk.OpSelectionMerge, imms: [spv::Imm::Short(wk.SelectionControl, 0)].into_iter().collect(), @@ -1612,11 +2009,11 @@ impl LazyInst<'_, '_> { result_type_id: None, result_id: None, ids: [merge_label_id].into_iter().collect(), - }, + }), Self::Merge(Merge::Loop { loop_merge: merge_label_id, loop_continue: continue_label_id, - }) => spv::InstWithIds { + }) => each_inst(spv::InstWithIds { without_ids: spv::Inst { opcode: wk.OpLoopMerge, imms: [spv::Imm::Short(wk.LoopControl, 0)].into_iter().collect(), @@ -1624,15 +2021,38 @@ impl LazyInst<'_, '_> { result_type_id: None, result_id: None, ids: [merge_label_id, continue_label_id].into_iter().collect(), - }, - Self::Terminator { parent_func, terminator } => { - let mut ids: SmallVec<[_; 4]> = terminator + }), + Self::Terminator { parent_func_ids, terminator } => { + let parent_func_body_lifting = parent_func_ids.body.as_ref().unwrap(); + let mut id_operands = terminator .inputs .iter() - .map(|&v| value_to_id(parent_func, v)) - .chain(terminator.targets.iter().map(|&target| parent_func.label_ids[&target])) + .map(|&v| value_to_id(parent_func_ids, v)) + .chain( + terminator + .targets + .iter() + .map(|&target| parent_func_body_lifting.label_ids[&target]), + ) .collect(); + if let Some(reaggregated_value_id) = terminator.reaggregated_return_value_id { + assert!( + matches!(terminator.kind, TerminatorKind::Return) + && terminator.inputs.len() > 1 + ); + + each_inst(spv::InstWithIds { + without_ids: wk.OpCompositeConstruct.into(), + result_type_id: Some( + ids.globals[&Global::Type(parent_func_ids.spv_func_ret_type)], + ), + result_id: Some(reaggregated_value_id), + ids: id_operands, + }); + id_operands = [reaggregated_value_id].into_iter().collect(); + } + // FIXME(eddyb) move some of this to `spv::canonical`. let inst = match terminator.kind { TerminatorKind::Unreachable => wk.OpUnreachable.into(), @@ -1640,6 +2060,8 @@ impl LazyInst<'_, '_> { if terminator.inputs.is_empty() { wk.OpReturn.into() } else { + // Multiple return values get reaggregated above. + assert_eq!(id_operands.len(), 1); wk.OpReturnValue.into() } } @@ -1654,8 +2076,8 @@ impl LazyInst<'_, '_> { } TerminatorKind::SelectBranch(SelectionKind::Switch { case_consts }) => { // HACK(eddyb) move the default case from last back to first. - let default_target = ids.pop().unwrap(); - ids.insert(1, default_target); + let default_target = id_operands.pop().unwrap(); + id_operands.insert(1, default_target); spv::Inst { opcode: wk.OpSwitch, @@ -1666,16 +2088,20 @@ impl LazyInst<'_, '_> { } } }; - spv::InstWithIds { without_ids: inst, result_type_id: None, result_id: None, ids } + each_inst(spv::InstWithIds { + without_ids: inst, + result_type_id: None, + result_id: None, + ids: id_operands, + }); } - Self::OpFunctionEnd => spv::InstWithIds { + Self::OpFunctionEnd => each_inst(spv::InstWithIds { without_ids: wk.OpFunctionEnd.into(), result_type_id: None, result_id: None, ids: [].into_iter().collect(), - }, - }; - (inst, attrs) + }), + } } } @@ -1702,18 +2128,6 @@ impl Module { } }; - // Collect uses scattered throughout the module, that require def IDs. - let mut needs_ids_collector = NeedsIdsCollector { - cx: &cx, - module: self, - ext_inst_imports: BTreeSet::new(), - debug_strings: BTreeSet::new(), - globals: FxIndexSet::default(), - global_vars_seen: FxIndexSet::default(), - funcs: FxIndexSet::default(), - }; - needs_ids_collector.visit_module(self); - // Because `GlobalVar`s are given IDs by the `Const`s that point to them // (i.e. `ConstKind::PtrToGlobalVar`), any `GlobalVar`s in other positions // require extra care to ensure the ID-giving `Const` is visited. @@ -1726,76 +2140,106 @@ impl Module { }); Global::Const(ptr_to_global_var) }; - for &gv in &needs_ids_collector.global_vars_seen { - needs_ids_collector.globals.insert(global_var_to_id_giving_global(gv)); - } - // IDs can be allocated once we have the full sets needing them, whether - // sorted by contents, or ordered by the first occurence in the module. - let mut id_bound = NonZeroU32::MIN; - let ids = needs_ids_collector.alloc_ids(|| { - let id = id_bound; + // Collect uses scattered throughout the module, allocating IDs for them. + let (ids, id_bound) = { + let mut id_bound = NonZeroUsize::MIN; + let mut lifter = Lifter { + cx: &cx, + module: self, + alloc_ids: |count| { + let start = id_bound; + let end = + start.checked_add(count).expect("overflowing `usize` should be impossible"); + id_bound = end; + + // NOTE(eddyb) `MAX` is just a placeholder - the check for overflows + // is done below, after all IDs that may be allocated, have been + // (this is in order to not need this closure to return a `Result`). + let from_usize = + |id| spv::Id::try_from(id).unwrap_or(spv::Id::new(u32::MAX).unwrap()); + from_usize(start)..from_usize(end) + }, + ids: ModuleIds::default(), + global_vars_seen: FxIndexSet::default(), + }; + lifter.visit_module(self); + + // See comment on `global_var_to_id_giving_global` for why this is here. + for &gv in &lifter.global_vars_seen { + lifter + .ids + .globals + .entry(global_var_to_id_giving_global(gv)) + .or_insert_with(|| lifter.alloc_ids.one()); + } - match id_bound.checked_add(1) { - Some(new_bound) => { - id_bound = new_bound; - Ok(id) - } - None => Err(io::Error::new( + let ids = lifter.ids; + + let id_bound = spv::Id::try_from(id_bound).ok().ok_or_else(|| { + io::Error::new( io::ErrorKind::InvalidData, "ID bound of SPIR-V module doesn't fit in 32 bits", - )), - } - })?; + ) + })?; + + (ids, id_bound) + }; // HACK(eddyb) allow `move` closures below to reference `cx` or `ids` // without causing unwanted moves out of them. let (cx, ids) = (&*cx, &ids); let global_and_func_insts = ids.globals.keys().copied().map(LazyInst::Global).chain( - ids.funcs.iter().flat_map(|(&func, func_lifting)| { + ids.funcs.iter().flat_map(|(&func, func_ids)| { let func_decl = &self.funcs[func]; - let func_def_body = match &func_decl.def { - DeclDef::Imported(_) => None, - DeclDef::Present(def) => Some(def), + let body_with_lifting = match (&func_decl.def, &func_ids.body) { + (DeclDef::Imported(_), None) => None, + (DeclDef::Present(def), Some(func_body_lifting)) => { + Some((def, func_body_lifting)) + } + _ => unreachable!(), }; - iter::once(LazyInst::OpFunction { func_id: func_lifting.func_id, func_decl }) - .chain(func_lifting.param_ids.iter().zip(&func_decl.params).map( - |(¶m_id, param)| LazyInst::OpFunctionParameter { param_id, param }, - )) - .chain(func_lifting.blocks.iter().flat_map(move |(point, block)| { + let param_insts = func_ids + .param_ids + .iter() + .zip_eq(&func_decl.params) + .map(|(param_id, param)| LazyInst::OpFunctionParameter { param_id, param }); + let body_insts = body_with_lifting.map(|(func_def_body, func_body_lifting)| { + func_body_lifting.blocks.iter().flat_map(move |(point, block)| { let BlockLifting { phis, insts, terminator } = block; - iter::once(LazyInst::OpLabel { label_id: func_lifting.label_ids[point] }) - .chain( - phis.iter() - .map(|phi| LazyInst::OpPhi { parent_func: func_lifting, phi }), - ) - .chain(insts.iter().copied().map(move |inst| { - let data_inst_def = func_def_body.unwrap().at(inst).def(); - LazyInst::DataInst { - parent_func: func_lifting, - // HACK(eddyb) multi-output instructions don't exist pre-disaggregate. - result_id: (data_inst_def.outputs.iter().at_most_one().ok()) - .unwrap() - .map(|_| func_lifting.data_inst_output_ids[&inst]), - data_inst_def, + iter::once(LazyInst::OpLabel { + label_id: func_body_lifting.label_ids[point], + }) + .chain( + phis.iter() + .map(|phi| LazyInst::OpPhi { parent_func_ids: func_ids, phi }), + ) + .chain(insts.iter().copied().map(move |inst| LazyInst::DataInst { + parent_func_ids: func_ids, + data_inst_def: func_def_body.at(inst).def(), + lifting: &func_body_lifting.data_insts[inst], + })) + .chain(terminator.merge.map(|merge| { + LazyInst::Merge(match merge { + Merge::Selection(merge) => { + Merge::Selection(func_body_lifting.label_ids[&merge]) } - })) - .chain(terminator.merge.map(|merge| { - LazyInst::Merge(match merge { - Merge::Selection(merge) => { - Merge::Selection(func_lifting.label_ids[&merge]) - } - Merge::Loop { loop_merge, loop_continue } => Merge::Loop { - loop_merge: func_lifting.label_ids[&loop_merge], - loop_continue: func_lifting.label_ids[&loop_continue], - }, - }) - })) - .chain([LazyInst::Terminator { parent_func: func_lifting, terminator }]) - })) + Merge::Loop { loop_merge, loop_continue } => Merge::Loop { + loop_merge: func_body_lifting.label_ids[&loop_merge], + loop_continue: func_body_lifting.label_ids[&loop_continue], + }, + }) + })) + .chain([LazyInst::Terminator { parent_func_ids: func_ids, terminator }]) + }) + }); + + iter::once(LazyInst::OpFunction { func_decl, func_ids }) + .chain(param_insts) + .chain(body_insts.into_iter().flatten()) .chain([LazyInst::OpFunctionEnd]) }), ); @@ -2046,52 +2490,56 @@ impl Module { let mut current_debug_line = None; let mut current_block_id = None; // HACK(eddyb) for `current_debug_line` resets. for lazy_inst in global_and_func_insts { - let (inst, attrs) = lazy_inst.to_inst_and_attrs(self, ids); + let mut result: Result<(), _> = Ok(()); + lazy_inst.for_each_spv_inst_with_debug_line(self, ids, |inst, new_debug_line| { + if result.is_err() { + return; + } - // Reset line debuginfo when crossing/leaving blocks. - let new_block_id = if inst.opcode == wk.OpLabel { - Some(inst.result_id.unwrap()) - } else if inst.opcode == wk.OpFunctionEnd { - None - } else { - current_block_id - }; - if current_block_id != new_block_id { - current_debug_line = None; - } - current_block_id = new_block_id; - - // Determine whether to emit `OpLine`/`OpNoLine` before `inst`, - // in order to end up with the expected line debuginfo. - // FIXME(eddyb) make this less of a search and more of a - // lookup by splitting attrs into key and value parts. - let new_debug_line = attrs.dbg_src_loc(cx).map(|dbg_src_loc| { - (ids.debug_strings[&cx[dbg_src_loc.file_path]], dbg_src_loc.start_line_col) - }); - if current_debug_line != new_debug_line { - let (opcode, imms, ids) = match new_debug_line { - Some((file_path_id, (line, col))) => ( - wk.OpLine, - [ - spv::Imm::Short(wk.LiteralInteger, line), - spv::Imm::Short(wk.LiteralInteger, col), - ] - .into_iter() - .collect(), - iter::once(file_path_id).collect(), - ), - None => (wk.OpNoLine, [].into_iter().collect(), [].into_iter().collect()), + // Reset line debuginfo when crossing/leaving blocks. + let new_block_id = if inst.opcode == wk.OpLabel { + Some(inst.result_id.unwrap()) + } else if inst.opcode == wk.OpFunctionEnd { + None + } else { + current_block_id }; - emitter.push_inst(&spv::InstWithIds { - without_ids: spv::Inst { opcode, imms }, - result_type_id: None, - result_id: None, - ids, - })?; - } - current_debug_line = new_debug_line; + if current_block_id != new_block_id { + current_debug_line = None; + } + current_block_id = new_block_id; + + // Determine whether to emit `OpLine`/`OpNoLine` before `inst`, + // in order to end up with the expected line debuginfo. + if current_debug_line != new_debug_line { + let (opcode, imms, ids) = match new_debug_line { + Some(SpvDebugLine { file_path_id, line, col }) => ( + wk.OpLine, + [ + spv::Imm::Short(wk.LiteralInteger, line), + spv::Imm::Short(wk.LiteralInteger, col), + ] + .into_iter() + .collect(), + iter::once(file_path_id).collect(), + ), + None => (wk.OpNoLine, [].into_iter().collect(), [].into_iter().collect()), + }; + result = emitter.push_inst(&spv::InstWithIds { + without_ids: spv::Inst { opcode, imms }, + result_type_id: None, + result_id: None, + ids, + }); + if result.is_err() { + return; + } + } + current_debug_line = new_debug_line; - emitter.push_inst(&inst)?; + result = emitter.push_inst(&inst); + }); + result?; } Ok(emitter) diff --git a/src/spv/lower.rs b/src/spv/lower.rs index 6ef88921..76a44658 100644 --- a/src/spv/lower.rs +++ b/src/spv/lower.rs @@ -3,19 +3,20 @@ use crate::cf::{self, SelectionKind}; use crate::spv::{self, spec}; // FIXME(eddyb) import more to avoid `crate::` everywhere. -use crate::func_at::FuncAtMut; +use crate::func_at::{FuncAt, FuncAtMut}; use crate::{ AddrSpace, Attr, AttrSet, Const, ConstDef, ConstKind, Context, DataInstDef, DataInstKind, DbgSrcLoc, DeclDef, Diag, EntityDefs, ExportKey, Exportee, Func, FuncDecl, FuncDefBody, - FuncParam, FxIndexMap, GlobalVarDecl, GlobalVarDefBody, Import, InternedStr, Module, NodeDef, - NodeKind, Region, RegionDef, Type, TypeDef, TypeKind, TypeOrConst, Value, VarDecl, print, - scalar, + FuncParam, FxIndexMap, GlobalVarDecl, GlobalVarDefBody, GlobalVarInit, Import, InternedStr, + Module, NodeDef, NodeKind, Region, RegionDef, Type, TypeDef, TypeKind, TypeOrConst, Value, Var, + VarDecl, print, scalar, }; use itertools::{Either, Itertools as _}; use rustc_hash::{FxHashMap, FxHashSet}; use smallvec::SmallVec; use std::collections::{BTreeMap, BTreeSet}; use std::num::NonZeroU32; +use std::ops::Range; use std::path::Path; use std::rc::Rc; use std::{io, mem}; @@ -25,6 +26,15 @@ enum IdDef { Type(Type), Const(Const), + /// Like `Const`, but for SPIR-V "aggregate" (`OpTypeStruct`/`OpTypeArray`) + /// constants (e.g. `OpConstantComposite`s of those types, but also more + /// general constants like `OpUndef`/`OpConstantNull` etc.). + AggregateConst { + whole_type: Type, + + leaves: SmallVec<[Const; 4]>, + }, + Func(Func), // HACK(eddyb) despite `FuncBody` deferring ID resolution to allow forward @@ -41,8 +51,10 @@ impl IdDef { match *self { // FIXME(eddyb) print these with some kind of "maximum depth", // instead of just describing the kind of definition. + // FIXME(eddyb) replace these with the `Diag` embedding system. IdDef::Type(_) => "a type".into(), IdDef::Const(_) => "a constant".into(), + IdDef::AggregateConst { .. } => "an aggregate constant".into(), IdDef::Func(_) | IdDef::FuncForwardRef(_) => "a function".into(), @@ -54,6 +66,34 @@ impl IdDef { } } +impl Type { + // HACK(eddyb) `indices` is a `&mut` because it specifically only consumes + // the indices it needs, so when this function returns `Some`, all remaining + // indices will be left over for the caller to process itself. + fn aggregate_component_path_type_and_leaf_range( + self, + cx: &Context, + indices: &mut impl Iterator, + ) -> Option<(Type, Range)> { + let (mut leaf_type, mut leaf_range) = + self.aggregate_component_type_and_leaf_range(cx, indices.next()?)?; + + while let spv::ValueLowering::Disaggregate(_) = cx[leaf_type].spv_value_lowering() { + let (sub_leaf_type, sub_leaf_range) = match indices.next() { + Some(i) => leaf_type.aggregate_component_type_and_leaf_range(cx, i)?, + None => break, + }; + + assert!(sub_leaf_range.end <= leaf_range.len()); + leaf_range.end = leaf_range.start + sub_leaf_range.end; + leaf_range.start += sub_leaf_range.start; + leaf_type = sub_leaf_type; + } + + Some((leaf_type, leaf_range)) + } +} + /// Deferred export, needed because the IDs are initially forward refs. enum Export { Linkage { @@ -88,7 +128,7 @@ struct IntraFuncInst { ids: SmallVec<[spv::Id; 4]>, } -// FIXME(eddyb) stop abusing `io::Error` for error reporting. +// FIXME(eddyb) stop abusing `io::Error` for error reporting and switch to `Diag`. fn invalid(reason: &str) -> io::Error { io::Error::new(io::ErrorKind::InvalidData, format!("malformed SPIR-V ({reason})")) } @@ -110,9 +150,10 @@ fn invalid_factory_for_spv_inst( // FIXME(eddyb) provide more information about any normalization that happened: // * stats about deduplication that occured through interning // * sets of unused global vars and functions (and types+consts only they use) -// FIXME(eddyb) consider introducing a "deferred error" system, where `spv::lower` -// (and more directproducers) can keep around errors in the SPIR-T IR, and still -// have the opportunity of silencing them e.g. by removing dead code. +// FIXME(eddyb) use `Diag` instead of `io::Error`, maybe with a return type like +// `Result` where `IncompletelyLoweredModule` +// contains a `Module`, maps of all the SPIR-V IDs (to the SPIR-T definitions), +// global `Diag`s (where they can't be attached to specific `AttrSet`s), etc. impl Module { pub fn lower_from_spv_file(cx: Rc, path: impl AsRef) -> io::Result { Self::lower_from_spv_module_parser(cx, spv::read::ModuleParser::read_from_spv_file(path)?) @@ -145,22 +186,18 @@ impl Module { ); attrs }, - // FIXME(eddyb) this gets simpler w/ disaggregation. - ret_type: cx.intern(TypeKind::SpvInst { - spv_inst: wk.OpTypeVoid.into(), - type_and_const_inputs: [].into_iter().collect(), - }), + ret_types: [].into_iter().collect(), params: [].into_iter().collect(), def: DeclDef::Imported(Import::LinkName(cx.intern(""))), }; // HACK(eddyb) no `PartialEq` on `FuncDecl`. let assert_is_dummy_decl_for_func_forward_ref = |decl: &FuncDecl| { let [expected, found] = [&dummy_decl_for_func_forward_ref, decl].map( - |FuncDecl { attrs, ret_type, params, def }| { + |FuncDecl { attrs, ret_types, params, def }| { let DeclDef::Imported(import) = def else { unreachable!(); }; - (attrs, ret_type, params, import) + (attrs, ret_types, params, import) }, ); assert!(expected == found); @@ -605,6 +642,7 @@ impl Module { kind: TypeKind::SpvInst { spv_inst: spv::Inst { opcode, imms: [sc].into_iter().collect() }, type_and_const_inputs: [].into_iter().collect(), + value_lowering: Default::default(), }, }); id_defs.insert(id, IdDef::Type(ty)); @@ -631,9 +669,7 @@ impl Module { let ty = cx.intern(TypeDef { attrs: mem::take(&mut attrs), - kind: inst.as_canonical_type(&cx, &type_and_const_inputs).unwrap_or( - TypeKind::SpvInst { spv_inst: inst.without_ids, type_and_const_inputs }, - ), + kind: inst.without_ids.into_canonical_type_with(&cx, type_and_const_inputs), }); id_defs.insert(id, IdDef::Type(ty)); @@ -674,33 +710,180 @@ impl Module { || inst.always_lower_as_const() { let id = inst.result_id.unwrap(); + let ty = result_type.unwrap(); - let const_inputs: SmallVec<_> = inst - .ids - .iter() - .map(|&id| match id_defs.get(&id) { - Some(&IdDef::Const(ct)) => Ok(ct), - Some(id_def) => Err(id_def.descr(&cx)), - None => Err(format!("a forward reference to %{id}")), - }) - .map(|result| { - result.map_err(|descr| { - invalid(&format!("unsupported use of {descr} in a constant")) - }) - }) - .collect::>()?; + // HACK(eddyb) while creating constants of unsized array types + // is *technically* illegal in SPIR-V, array semantics always + // are length-independent, so we can pretend this is an array + // of the right length (as long as we track the error on it). + let maybe_fixup_unsized_array_type = |ty: Type| { + if ![wk.OpConstantComposite, wk.OpSpecConstantComposite].contains(&opcode) { + return None; + }; + let actual_component_count = u32::try_from(inst.ids.len()).ok()?; + + let ty_def = &cx[ty]; + let elem_type_of_unsized_array = match &ty_def.kind { + TypeKind::SpvInst { spv_inst: ty_inst, type_and_const_inputs, .. } => { + match type_and_const_inputs[..] { + [TypeOrConst::Type(elem_type), TypeOrConst::Const(len)] + if ty_inst.opcode == wk.OpTypeArray + && len.as_scalar(&cx).is_none() => + { + elem_type + } + [TypeOrConst::Type(elem_type)] + if ty_inst.opcode == wk.OpTypeRuntimeArray => + { + elem_type + } + _ => return None, + } + } + _ => return None, + }; + let mut attrs = ty_def.attrs; + attrs.push_diag( + &cx, + Diag::err([ + "illegal constant: values of type `".into(), + ty.into(), + "` should only be accessed through pointers".into(), + ]), + ); + Some( + cx.intern(TypeDef { + attrs, + kind: spv::Inst::from(wk.OpTypeArray).into_canonical_type_with( + &cx, + [ + TypeOrConst::Type(elem_type_of_unsized_array), + TypeOrConst::Const( + cx.intern(scalar::Const::from_u32(actual_component_count)), + ), + ] + .into_iter() + .collect(), + ), + }), + ) + }; + let ty = maybe_fixup_unsized_array_type(ty).unwrap_or(ty); - let ct = cx.intern(ConstDef { - attrs: mem::take(&mut attrs), - ty, - kind: inst.as_canonical_const(&cx, ty, &const_inputs).unwrap_or_else(|| { - ConstKind::SpvInst { - spv_inst_and_const_inputs: Rc::new((inst.without_ids, const_inputs)), + let mut all_leaves = SmallVec::new(); + match cx[ty].spv_value_lowering() { + spv::ValueLowering::Direct => { + all_leaves.reserve(inst.ids.len()); + } + spv::ValueLowering::Disaggregate(_) => { + // HACK(eddyb) this expands `OpUndef`/`OpConstantNull`. + // FIXME(eddyb) this could potentially create a very + // inefficient large array, even when the intent can + // be expressed much more compactly in theory. + if inst.lower_const_by_distributing_to_aggregate_leaves() { + assert_eq!(inst.ids.len(), 0); + all_leaves.extend(ty.disaggregated_leaf_types(&cx).map(|leaf_type| { + cx.intern(ConstDef { + attrs: Default::default(), + ty: leaf_type, + kind: inst + .as_canonical_const(&cx, leaf_type, &[]) + .unwrap_or_else(|| ConstKind::SpvInst { + spv_inst_and_const_inputs: Rc::new(( + inst.without_ids.clone(), + [].into_iter().collect(), + )), + }), + }) + })); + } else if [wk.OpConstantComposite, wk.OpSpecConstantComposite] + .contains(&opcode) + { + all_leaves.reserve(cx[ty].disaggregated_leaf_count()); + } else { + attrs.push_diag( + &cx, + Diag::bug(["unsupported aggregate-producing constant".into()]), + ); } - }), - }); - id_defs.insert(id, IdDef::Const(ct)); + } + } + + let invalid = |descr| invalid(&format!("unsupported use of {descr} in a constant")); + for &id in &inst.ids { + match id_defs.get(&id) { + Some(&IdDef::Const(ct)) => { + all_leaves.push(ct); + } + Some(IdDef::AggregateConst { whole_type, leaves }) => { + all_leaves.extend(leaves.iter().copied()); + + match cx[ty].spv_value_lowering() { + // FIXME(eddyb) this also covers invalid consts + // of e.g. unsized aggregate types, as well. + spv::ValueLowering::Direct => { + attrs.push_diag( + &cx, + Diag::err([ + "unexpected aggregate constant of type `".into(), + (*whole_type).into(), + "`".into(), + ]), + ); + } + spv::ValueLowering::Disaggregate(_) => {} + } + } + Some(id_def) => return Err(invalid(&id_def.descr(&cx))), + None => return Err(invalid(&format!("a forward reference to %{id}"))), + } + } + + let lowering = &cx[ty].spv_value_lowering(); + let lowering = match lowering { + spv::ValueLowering::Disaggregate(_) + if cx[ty].disaggregated_leaf_count() != all_leaves.len() => + { + attrs.push_diag( + &cx, + Diag::err([format!( + "aggregate leaf count mismatch (expected {}, found {})", + cx[ty].disaggregated_leaf_count(), + all_leaves.len() + ) + .into()]), + ); + // HACK(eddyb) pretend the type isn't an aggregate, so + // that it doesn't end up using `IdDef::AggregateConst`, + // which requires having the exact number of leaves. + &spv::ValueLowering::Direct + } + _ => lowering, + }; + + let attrs = mem::take(&mut attrs); + id_defs.insert( + id, + match lowering { + spv::ValueLowering::Direct => IdDef::Const(cx.intern(ConstDef { + attrs, + ty, + kind: inst.as_canonical_const(&cx, ty, &all_leaves).unwrap_or_else( + || ConstKind::SpvInst { + spv_inst_and_const_inputs: Rc::new(( + inst.without_ids, + all_leaves, + )), + }, + ), + })), + spv::ValueLowering::Disaggregate(_) => { + // FIXME(eddyb) this may lose semantic `attrs`. + IdDef::AggregateConst { whole_type: ty, leaves: all_leaves } + } + }, + ); if inst_category != spec::InstructionCategory::Const { // `OpUndef` can appear either among constants, or in a @@ -732,7 +915,13 @@ impl Module { let initializer = initializer .map(|id| match id_defs.get(&id) { - Some(&IdDef::Const(ct)) => Ok(ct), + Some(&IdDef::Const(ct)) => Ok(GlobalVarInit::Direct(ct)), + Some(IdDef::AggregateConst { whole_type, leaves }) => { + Ok(GlobalVarInit::SpvAggregate { + ty: *whole_type, + leaves: leaves.clone(), + }) + } Some(id_def) => Err(id_def.descr(&cx)), None => Err(format!("a forward reference to %{id}")), }) @@ -780,8 +969,6 @@ impl Module { } let func_id = inst.result_id.unwrap(); - // FIXME(eddyb) hide this from SPIR-T, it's the function return - // type, *not* the function type, which is in `func_type`. let func_ret_type = result_type.unwrap(); let func_type_id = match (&inst.imms[..], &inst.ids[..]) { @@ -794,7 +981,7 @@ impl Module { let (func_type_ret_type, func_type_param_types) = match id_defs.get(&func_type_id) { Some(&IdDef::Type(ty)) => match &cx[ty].kind { - TypeKind::SpvInst { spv_inst, type_and_const_inputs } + TypeKind::SpvInst { spv_inst, type_and_const_inputs, .. } if spv_inst.opcode == wk.OpTypeFunction => { let mut types = @@ -847,14 +1034,29 @@ impl Module { }) } }; - let decl = FuncDecl { - attrs: mem::take(&mut attrs), - ret_type: func_ret_type, - params: func_type_param_types - .map(|ty| FuncParam { attrs: AttrSet::default(), ty }) - .collect(), - def, + + // Always flatten aggregates in param and return types. + let ret_types = match &cx[func_ret_type].kind { + // HACK(eddyb) `OpTypeVoid` special-cased here as if it were + // an aggregate with `0` leaves. + TypeKind::SpvInst { spv_inst: func_ret_type_spv_inst, .. } + if func_ret_type_spv_inst.opcode == wk.OpTypeVoid => + { + [].into_iter().collect() + } + + _ => func_ret_type.disaggregated_leaf_types(&cx).collect(), }; + let mut params = SmallVec::with_capacity(func_type_param_types.len()); + for param_type in func_type_param_types { + params.extend( + param_type + .disaggregated_leaf_types(&cx) + .map(|ty| FuncParam { attrs: AttrSet::default(), ty }), + ); + } + + let decl = FuncDecl { attrs: mem::take(&mut attrs), ret_types, params, def }; let func = { use std::collections::hash_map::Entry; @@ -960,6 +1162,9 @@ impl Module { struct PhiKey { source_block_id: spv::Id, target_block_id: spv::Id, + // FIXME(eddyb) remove this, key phis only by the edge, and keep + // a per-edge list of phi input `spv::Id`s (with validation for + // missing entries/duplicates). target_phi_idx: u32, } @@ -1121,13 +1326,37 @@ impl Module { None }; - #[derive(Copy, Clone)] - enum LocalIdDef { - Value(Type, Value), + // HACK(eddyb) this is generic to allow `IdDef::AggregateConst`s + // to be converted to `LocalIdDef::Value`s, inside `lookup_id`. + enum LocalIdDef>> { + Value { whole_type: Type, leaves: VL }, BlockLabel(Region), } - let mut local_id_defs = FxIndexMap::default(); + #[derive(Copy, Clone)] + struct VarRange { + start: Var, + count: NonZeroU32, + } + + impl VarRange { + // FIXME(eddyb) make this return `&[Var]` instead, maybe even + // have `FuncAt` implement `Deref`. + fn iter(self, func: FuncAt<'_, ()>) -> impl ExactSizeIterator { + let start_decl = func.at(self.start).decl(); + let all_vars_in_def_parent = start_decl.def_parent.either( + |region| &func.at(region).def().inputs, + |node| &func.at(node).def().outputs, + ); + all_vars_in_def_parent[start_decl.def_idx as usize..] + [..self.count.get() as usize] + .iter() + .copied() + .map(Value::Var) + } + } + + let mut local_id_defs = FxIndexMap::::default(); // Labels can be forward-referenced, so always have them present. local_id_defs.extend( @@ -1169,7 +1398,7 @@ impl Module { if opcode == wk.OpLabel { current_block = match local_id_defs[&result_id.unwrap()] { LocalIdDef::BlockLabel(region) => region, - LocalIdDef::Value(..) => unreachable!(), + LocalIdDef::Value { .. } => unreachable!(), }; continue; } @@ -1265,94 +1494,12 @@ impl Module { let invalid = invalid_factory_for_spv_inst(&raw_inst.without_ids, result_id, ids); - // FIXME(eddyb) find a more compact name and/or make this a method. - // FIXME(eddyb) this returns `LocalIdDef` even for global values. - let lookup_global_or_local_id_for_data_or_control_inst_input = |id| match id_defs - .get(&id) - { - Some(&IdDef::Const(ct)) => Ok(LocalIdDef::Value(cx[ct].ty, Value::Const(ct))), - Some(id_def @ IdDef::Type(_)) => Err(invalid(&format!( - "unsupported use of {} as an operand for \ - an instruction in a function", - id_def.descr(&cx), - ))), - Some(id_def @ IdDef::Func(_)) => Err(invalid(&format!( - "unsupported use of {} outside `OpFunctionCall`", - id_def.descr(&cx), - ))), - Some(id_def @ IdDef::SpvDebugString(s)) => { - if opcode == wk.OpExtInst { - // HACK(eddyb) intern `OpString`s as `Const`s on - // the fly, as it's a less likely usage than the - // `OpLine` one. - let ty = cx.intern(TypeKind::SpvStringLiteralForExtInst); - let ct = cx.intern(ConstDef { - attrs: AttrSet::default(), - ty, - kind: ConstKind::SpvStringLiteralForExtInst(*s), - }); - Ok(LocalIdDef::Value(ty, Value::Const(ct))) - } else { - Err(invalid(&format!( - "unsupported use of {} outside `OpSource`, \ - `OpLine`, or `OpExtInst`", - id_def.descr(&cx), - ))) - } - } - Some(id_def @ IdDef::SpvExtInstImport(_)) => Err(invalid(&format!( - "unsupported use of {} outside `OpExtInst`", - id_def.descr(&cx), - ))), - // FIXME(eddyb) scan the rest of the function for any - // instructions returning this ID, to report an invalid - // forward reference (use before def). - None | Some(IdDef::FuncForwardRef(_)) => local_id_defs - .get(&id) - .copied() - .ok_or_else(|| invalid(&format!("undefined ID %{id}",))), - }; - - if opcode == wk.OpFunctionParameter { - if current_block.is_some() { - return Err(invalid( - "out of order: `OpFunctionParameter`s should come \ - before the function's blocks", - )); - } - - assert!(imms.is_empty() && ids.is_empty()); - - let ty = result_type.unwrap(); - params.push(FuncParam { attrs, ty }); - - if let Some(func_def_body) = &mut func_def_body { - let body_inputs = &mut func_def_body.regions[func_def_body.body].inputs; - let input_var = func_def_body.vars.define( - &cx, - VarDecl { - attrs, - ty: result_type.unwrap(), - - def_parent: Either::Left(func_def_body.body), - def_idx: body_inputs.len().try_into().unwrap(), - }, - ); - body_inputs.push(input_var); - - local_id_defs.insert( - result_id.unwrap(), - LocalIdDef::Value(ty, Value::Var(input_var)), - ); - } - - continue; - } - let func_def_body = func_def_body.as_deref_mut().unwrap(); - let is_last_in_block = lookahead_raw_inst(1) .is_none_or(|next_raw_inst| next_raw_inst.without_ids.opcode == wk.OpLabel); + // HACK(eddyb) this is handled early because it's the only case + // where a `result_id` isn't a value, and `OpFunctionParameter` + // wants to be able to use common value result helpers. if opcode == wk.OpLabel { if is_last_in_block { return Err(invalid("block lacks terminator instruction")); @@ -1362,7 +1509,7 @@ impl Module { // to be able to have an entry in `local_id_defs`. let region = match local_id_defs[&result_id.unwrap()] { LocalIdDef::BlockLabel(region) => region, - LocalIdDef::Value(..) => unreachable!(), + LocalIdDef::Value { .. } => unreachable!(), }; let details = &block_details[®ion]; assert_eq!(details.label_id, result_id.unwrap()); @@ -1381,10 +1528,79 @@ impl Module { }); continue; } + + // Helper shared by `OpFunctionParameter` and `OpPhi`. + let attrs_for_result_leaf = |leaf_type: Type| { + if result_type == Some(leaf_type) { + attrs + } else { + // FIXME(eddyb) this may lose semantic `attrs`. + AttrSet::default() + } + }; + + if opcode == wk.OpFunctionParameter { + let result_type = result_type.unwrap(); + + if current_block.is_some() { + return Err(invalid( + "out of order: `OpFunctionParameter`s should come \ + before the function's blocks", + )); + } + + assert!(imms.is_empty() && ids.is_empty()); + + let param_start = params.len(); + params.extend( + result_type + .disaggregated_leaf_types(&cx) + .map(|ty| FuncParam { attrs: attrs_for_result_leaf(ty), ty }), + ); + let param_end = params.len(); + + if let Some(func_def_body) = &mut func_def_body { + let body_inputs = &mut func_def_body.regions[func_def_body.body].inputs; + let start = u32::try_from(body_inputs.len()).unwrap(); + body_inputs.extend(params[param_start..param_end].iter().zip(start..).map( + |(&FuncParam { attrs, ty }, def_idx)| { + func_def_body.vars.define( + &cx, + VarDecl { + attrs, + ty, + + def_parent: Either::Left(func_def_body.body), + def_idx, + }, + ) + }, + )); + let end = u32::try_from(body_inputs.len()).unwrap(); + + local_id_defs.insert( + result_id.unwrap(), + LocalIdDef::Value { + whole_type: result_type, + leaves: NonZeroU32::new(end - start).map_or( + Either::Right(SmallVec::new()), + |count| { + Either::Left(VarRange { + start: body_inputs[start as usize], + count, + }) + }, + ), + }, + ); + } + continue; + } + let func_def_body = func_def_body.as_deref_mut().unwrap(); + let current_block = current_block.as_mut().ok_or_else(|| { invalid("out of order: not expected before the function's blocks") })?; - let current_block_region_def = &mut func_def_body.regions[current_block.region]; // HACK(eddyb) the `Region` inputs for inter-block uses // have to be inserted just after all the `OpPhi`s' region inputs, @@ -1395,40 +1611,146 @@ impl Module { && current_block.shadowed_local_id_defs.is_empty() && !current_block.details.cfgssa_inter_block_uses.is_empty() { + let current_block_region_def = &mut func_def_body.regions[current_block.region]; assert!(current_block_region_def.children.is_empty()); current_block.shadowed_local_id_defs.extend( current_block.details.cfgssa_inter_block_uses.iter().map( |(&used_id, &ty)| { - let input_var = func_def_body.vars.define( - &cx, - VarDecl { - attrs: AttrSet::default(), - ty, - - def_parent: Either::Left(current_block.region), - def_idx: current_block_region_def - .inputs - .len() - .try_into() - .unwrap(), + let inputs = &mut current_block_region_def.inputs; + let start = u32::try_from(inputs.len()).unwrap(); + inputs.extend(ty.disaggregated_leaf_types(&cx).zip(start..).map( + |(ty, def_idx)| { + func_def_body.vars.define( + &cx, + VarDecl { + attrs: AttrSet::default(), + ty, + + def_parent: Either::Left(current_block.region), + def_idx, + }, + ) }, - ); - current_block_region_def.inputs.push(input_var); + )); + let end = u32::try_from(inputs.len()).unwrap(); - (used_id, LocalIdDef::Value(ty, Value::Var(input_var))) + ( + used_id, + LocalIdDef::Value { + whole_type: ty, + leaves: NonZeroU32::new(end - start).map_or( + Either::Right(SmallVec::new()), + |count| { + Either::Left(VarRange { + start: inputs[start as usize], + count, + }) + }, + ), + }, + ) }, ), ); } - // HACK(eddyb) shadowing the closure with the same name, could - // it be defined here to make use of `current_block`? - let lookup_global_or_local_id_for_data_or_control_inst_input = - |id| match current_block.shadowed_local_id_defs.get(&id) { - Some(&shadowed) => Ok(shadowed), - None => lookup_global_or_local_id_for_data_or_control_inst_input(id), - }; + // HACK(eddyb) not relying on iterators, to allow `FuncAt` usage. + #[derive(Copy, Clone)] + enum Leaves<'a> { + VarRange(VarRange), + Values(&'a [Value]), + + Const(Const), + Consts(&'a [Const]), + } + + impl Leaves<'_> { + fn iter(self, func: FuncAt<'_, ()>) -> impl ExactSizeIterator { + match self { + Leaves::VarRange(leaves) => { + Either::Left(Either::Left(leaves.iter(func))) + } + Leaves::Values(leaves) => { + Either::Left(Either::Right(leaves.iter().copied())) + } + + Leaves::Const(ct) => { + Either::Right(Either::Left([Value::Const(ct)].into_iter())) + } + Leaves::Consts(leaves) => Either::Right(Either::Right( + leaves.iter().copied().map(Value::Const), + )), + } + } + } + + // FIXME(eddyb) this returns `LocalIdDef` even for global values. + let lookup_id = |id| match id_defs.get(&id) { + None | Some(IdDef::FuncForwardRef(_)) => { + let local_id_def = (current_block.shadowed_local_id_defs.get(&id)) + .or_else(|| local_id_defs.get(&id)) + .ok_or_else(|| { + // FIXME(eddyb) scan the rest of the function for any + // instructions returning this ID, to report an invalid + // forward reference (use before def). + invalid(&format!("undefined ID %{id}")) + })?; + // HACK(eddyb) change the type of `leaves` within + // `LocalIdDef::Value` to support consts + // (see `IdDef::AggregateConst` case just below). + Ok(match local_id_def { + LocalIdDef::Value { whole_type, leaves } => LocalIdDef::Value { + whole_type: *whole_type, + leaves: leaves.as_ref().either( + |&leaves| Leaves::VarRange(leaves), + |leaves| Leaves::Values(leaves), + ), + }, + &LocalIdDef::BlockLabel(label) => LocalIdDef::BlockLabel(label), + }) + } + Some(&IdDef::Const(ct)) => { + Ok(LocalIdDef::Value { whole_type: cx[ct].ty, leaves: Leaves::Const(ct) }) + } + Some(IdDef::AggregateConst { whole_type, leaves }) => Ok(LocalIdDef::Value { + whole_type: *whole_type, + leaves: Leaves::Consts(leaves), + }), + Some(id_def @ IdDef::Type(_)) => Err(invalid(&format!( + "unsupported use of {} as an operand for \ + an instruction in a function", + id_def.descr(&cx), + ))), + Some(id_def @ IdDef::Func(_)) => Err(invalid(&format!( + "unsupported use of {} outside `OpFunctionCall`", + id_def.descr(&cx), + ))), + Some(id_def @ IdDef::SpvDebugString(s)) => { + if opcode == wk.OpExtInst { + // HACK(eddyb) intern `OpString`s as `Const`s on + // the fly, as it's a less likely usage than the + // `OpLine` one. + let ty = cx.intern(TypeKind::SpvStringLiteralForExtInst); + let ct = cx.intern(ConstDef { + attrs: AttrSet::default(), + ty, + kind: ConstKind::SpvStringLiteralForExtInst(*s), + }); + Ok(LocalIdDef::Value { whole_type: ty, leaves: Leaves::Const(ct) }) + } else { + Err(invalid(&format!( + "unsupported use of {} outside `OpSource`, \ + `OpLine`, or `OpExtInst`", + id_def.descr(&cx), + ))) + } + } + Some(id_def @ IdDef::SpvExtInstImport(_)) => Err(invalid(&format!( + "unsupported use of {} outside `OpExtInst`", + id_def.descr(&cx), + ))), + }; if is_last_in_block { if opcode.def().category != spec::InstructionCategory::ControlFlow @@ -1441,23 +1763,6 @@ impl Module { } let mut target_inputs = FxIndexMap::default(); - let descr_phi_case = |phi_key: &PhiKey| { - format!( - "`OpPhi` (#{} in %{})'s case for source block %{}", - phi_key.target_phi_idx, - phi_key.target_block_id, - phi_key.source_block_id, - ) - }; - let phi_value_id_to_value = |phi_key: &PhiKey, id| { - match lookup_global_or_local_id_for_data_or_control_inst_input(id)? { - LocalIdDef::Value(_, v) => Ok(v), - LocalIdDef::BlockLabel { .. } => Err(invalid(&format!( - "unsupported use of block label as the value for {}", - descr_phi_case(phi_key) - ))), - } - }; let mut record_cfg_edge = |target_block| -> io::Result<()> { use indexmap::map::Entry; @@ -1475,38 +1780,64 @@ impl Module { Entry::Vacant(entry) => entry, }; - let inputs = (0..target_block_details.phi_count).map(|target_phi_idx| { + let mut target_inputs = SmallVec::new(); + for target_phi_idx in 0..target_block_details.phi_count { let phi_key = PhiKey { source_block_id: current_block.details.label_id, target_block_id: target_block_details.label_id, target_phi_idx: target_phi_idx.try_into().unwrap(), }; + let descr_phi_case = || { + format!( + "`OpPhi` (#{} in %{})'s case for source block %{}", + phi_key.target_phi_idx, + phi_key.target_block_id, + phi_key.source_block_id, + ) + }; + let phi_value_ids = phi_to_values.swap_remove(&phi_key).unwrap_or_default(); - match phi_value_ids[..] { - [] => Err(invalid(&format!( - "{} is missing", - descr_phi_case(&phi_key) - ))), - [id] => phi_value_id_to_value(&phi_key, id), - [..] => Err(invalid(&format!( - "{} is duplicated", - descr_phi_case(&phi_key) - ))), + let phi_value_id = match phi_value_ids[..] { + [] => { + return Err(invalid(&format!( + "{} is missing", + descr_phi_case() + ))); + } + [id] => id, + [..] => { + return Err(invalid(&format!( + "{} is duplicated", + descr_phi_case() + ))); + } + }; + + match lookup_id(phi_value_id)? { + LocalIdDef::Value { leaves, .. } => { + target_inputs.extend(leaves.iter(func_def_body.at(()))); + } + LocalIdDef::BlockLabel(_) => { + return Err(invalid(&format!( + "unsupported use of block label as the value for {}", + descr_phi_case() + ))); + } } - }); - let inputs = inputs.chain( - target_block_details.cfgssa_inter_block_uses.keys().map(|&used_id| { - match lookup_global_or_local_id_for_data_or_control_inst_input( - used_id, - )? { - LocalIdDef::Value(_, v) => Ok(v), - LocalIdDef::BlockLabel(_) => unreachable!(), + } + + for &used_id in target_block_details.cfgssa_inter_block_uses.keys() { + match lookup_id(used_id)? { + LocalIdDef::Value { leaves, .. } => { + target_inputs.extend(leaves.iter(func_def_body.at(()))); } - }), - ); - target_inputs_entry.insert(inputs.collect::>()?); + LocalIdDef::BlockLabel(_) => unreachable!(), + } + } + + target_inputs_entry.insert(target_inputs); Ok(()) }; @@ -1517,16 +1848,33 @@ impl Module { let mut input_types = SmallVec::<[_; 2]>::new(); let mut targets = SmallVec::<[_; 4]>::new(); for &id in ids { - match lookup_global_or_local_id_for_data_or_control_inst_input(id)? { - LocalIdDef::Value(ty, v) => { + match lookup_id(id)? { + LocalIdDef::Value { whole_type, leaves, .. } => { if !targets.is_empty() { return Err(invalid( "out of order: value operand \ after target label ID", )); } - inputs.push(v); - input_types.push(ty); + + match cx[whole_type].spv_value_lowering() { + spv::ValueLowering::Direct => {} + + // Returns are "lossily" disaggregated, just like + // function's signatures and calls to them. + spv::ValueLowering::Disaggregate(_) + if opcode == wk.OpReturnValue => {} + + spv::ValueLowering::Disaggregate(_) => { + return Err(invalid( + "unsupported aggregate value operand, \ + in non-return terminator instruction", + )); + } + } + + inputs.extend(leaves.iter(func_def_body.at(()))); + input_types.push(whole_type); } LocalIdDef::BlockLabel(target) => { record_cfg_edge(target)?; @@ -1701,7 +2049,7 @@ impl Module { } else if [wk.OpReturn, wk.OpReturnValue].contains(&opcode) && !treat_return_as_exit_invocation { - assert!(targets_with_inputs.count() == 0 && inputs.len() <= 1); + assert!(targets_with_inputs.count() == 0); build_thunk( func_def_body.at_mut(current_block.region), (cf::unstructured::ControlTarget::Return, mem::take(&mut inputs)), @@ -1749,7 +2097,14 @@ impl Module { func_def_body.regions[current_block.region].outputs = [target_thunk].into_iter().collect(); - } else if opcode == wk.OpPhi { + continue; + } + + if opcode == wk.OpPhi { + let current_block_region_def = &mut func_def_body.regions[current_block.region]; + + let result_type = result_type.unwrap(); + if !current_block_region_def.children.is_empty() { return Err(invalid( "out of order: `OpPhi`s should come before \ @@ -1757,23 +2112,40 @@ impl Module { )); } - let ty = result_type.unwrap(); - - let input_var = func_def_body.vars.define( - &cx, - VarDecl { - attrs, - ty, + let inputs = &mut current_block_region_def.inputs; + let start = u32::try_from(inputs.len()).unwrap(); + inputs.extend(result_type.disaggregated_leaf_types(&cx).zip(start..).map( + |(ty, def_idx)| { + func_def_body.vars.define( + &cx, + VarDecl { + attrs: attrs_for_result_leaf(ty), + ty, - def_parent: Either::Left(current_block.region), - def_idx: current_block_region_def.inputs.len().try_into().unwrap(), + def_parent: Either::Left(current_block.region), + def_idx, + }, + ) + }, + )); + let end = u32::try_from(inputs.len()).unwrap(); + + local_id_defs.insert( + result_id.unwrap(), + LocalIdDef::Value { + whole_type: result_type, + leaves: NonZeroU32::new(end - start).map_or( + Either::Right(SmallVec::new()), + |count| { + Either::Left(VarRange { start: inputs[start as usize], count }) + }, + ), }, ); - current_block_region_def.inputs.push(input_var); + continue; + } - local_id_defs - .insert(result_id.unwrap(), LocalIdDef::Value(ty, Value::Var(input_var))); - } else if [wk.OpSelectionMerge, wk.OpLoopMerge].contains(&opcode) { + if [wk.OpSelectionMerge, wk.OpLoopMerge].contains(&opcode) { let is_second_to_last_in_block = lookahead_raw_inst(2) .is_none_or(|next_raw_inst| next_raw_inst.without_ids.opcode == wk.OpLabel); @@ -1788,12 +2160,10 @@ impl Module { // impact on the shape of a loop, for restructurization. if opcode == wk.OpLoopMerge { assert_eq!(ids.len(), 2); - let loop_merge_target = - match lookup_global_or_local_id_for_data_or_control_inst_input(ids[0])? - { - LocalIdDef::Value(..) => return Err(invalid("expected label ID")), - LocalIdDef::BlockLabel(target) => target, - }; + let loop_merge_target = match lookup_id(ids[0])? { + LocalIdDef::Value { .. } => return Err(invalid("expected label ID")), + LocalIdDef::BlockLabel(target) => target, + }; func_def_body .unstructured_cfg @@ -1807,138 +2177,426 @@ impl Module { // especially wrt the `SelectionControl` and `LoopControl` // operands, but it's not obvious how they should map to // some "structured regions" replacement for the CFG. - } else { - let mut ids = &ids[..]; - let kind = if opcode == wk.OpFunctionCall { - assert!(imms.is_empty()); - let callee_id = ids[0]; - let maybe_callee = id_defs - .get(&callee_id) - .map(|id_def| match *id_def { - IdDef::Func(func) => Ok(func), - _ => Err(id_def.descr(&cx)), - }) - .transpose() - .map_err(|descr| { - invalid(&format!( - "unsupported use of {descr} as the `OpFunctionCall` callee" - )) - })?; + continue; + } + + // All control-flow instructions have been handled above. + // Only `Node`s get generated below here. + + let append_node = |func: FuncAtMut<'_, ()>, node_def: NodeDef| { + let node = func.nodes.define(&cx, node_def.into()); + func.regions[current_block.region].children.insert_last(node, func.nodes); + node + }; - match maybe_callee { - Some(callee) => { - ids = &ids[1..]; - DataInstKind::FuncCall(callee) + let lookup_value_id = |id| match lookup_id(id)? { + LocalIdDef::Value { whole_type, leaves } => Ok((whole_type, leaves)), + LocalIdDef::BlockLabel(_) => Err(invalid( + "unsupported use of block label as a value, \ + in non-terminator instruction", + )), + }; + + // Special-case instructions which deal with aggregates as + // "containers" for their leaves, and so have an effect which + // can be interpreted eagerly on the disaggregated form. + // FIXME(eddyb) this may lose semantic `attrs` + let eagerly_lowered_result = if opcode == wk.OpCompositeConstruct { + let result_type = result_type.unwrap(); + + match cx[result_type].spv_value_lowering() { + spv::ValueLowering::Direct => None, + spv::ValueLowering::Disaggregate(_) => { + let mut all_leaves = + SmallVec::with_capacity(cx[result_type].disaggregated_leaf_count()); + for &id in ids { + let (_, leaves) = lookup_value_id(id)?; + all_leaves.extend(leaves.iter(func_def_body.at(()))); + } + if all_leaves.len() == cx[result_type].disaggregated_leaf_count() { + Some(LocalIdDef::Value { + whole_type: result_type, + leaves: Either::Right(all_leaves), + }) + } else { + None } + } + } + } else if [wk.OpCompositeExtract, wk.OpCompositeInsert].contains(&opcode) { + let result_type = result_type.unwrap(); + + let (&composite_id, ids_without_last) = ids.split_last().unwrap(); + let (composite_type, leaves) = lookup_value_id(composite_id)?; + + // HACK(eddyb) `replace_component` and `rebuild_composite` + // are always both `None` or both `Some`, but splitting the + // two aspects of `OpCompositeInsert` makes it easier later. + let (component_type, replace_component, rebuild_composite); + match ids_without_last[..] { + [] => { + component_type = result_type; + replace_component = None; + rebuild_composite = None; + } + [replacement_component_id] => { + let (replacement_component_type, replacement_component_leaves) = + lookup_value_id(replacement_component_id)?; - // HACK(eddyb) this should be an error, but it shows - // up in Rust-GPU output (likely a zombie?). - None => DataInstKind::SpvInst(raw_inst.without_ids.clone()), + component_type = replacement_component_type; + replace_component = Some(replacement_component_leaves); + rebuild_composite = Some(result_type); } - } else if opcode == wk.OpExtInst { - let ext_set_id = ids[0]; - ids = &ids[1..]; - - let inst = match imms[..] { - [spv::Imm::Short(kind, inst)] => { - assert_eq!(kind, wk.LiteralExtInstInteger); - inst - } - _ => unreachable!(), + _ => unreachable!(), + } + + // HACK(eddyb) this is a `try {...}`-like use of a closure. + (|| { + if let Some(expected_type) = rebuild_composite + && composite_type != expected_type + { + return None; + } + + let mut imms = imms.iter(); + let (leaf_type, leaf_range) = match cx[composite_type].spv_value_lowering() + { + spv::ValueLowering::Direct => return None, + spv::ValueLowering::Disaggregate(_) => composite_type + .aggregate_component_path_type_and_leaf_range( + &cx, + &mut imms.by_ref().map(|&imm| match imm { + spv::Imm::Short(_, i) => i, + _ => unreachable!(), + }), + )?, }; + let non_aggregate_indexing_imms = imms.as_slice(); - let ext_set = match id_defs.get(&ext_set_id) { - Some(&IdDef::SpvExtInstImport(name)) => Ok(name), - Some(id_def) => Err(id_def.descr(&cx)), - None => Err(format!("unknown ID %{ext_set_id}")), + if non_aggregate_indexing_imms.is_empty() && leaf_type != component_type { + return None; } + + let component_leaves = leaves + .iter(func_def_body.at(())) + .skip(leaf_range.start) + .take(leaf_range.len()); + + // If there's any leftover indices they must be indexing + // into a vector/matrix, which requires separate handling. + let component_leaves = if !non_aggregate_indexing_imms.is_empty() { + let non_aggregate_composite = + component_leaves.exactly_one().ok().unwrap(); + + let leaf_spv_inst = spv::Inst { + opcode, + imms: non_aggregate_indexing_imms.iter().copied().collect(), + }; + let leaf_output_type = match rebuild_composite { + Some(_) => leaf_type, + None => component_type, + }; + let leaf_inputs: SmallVec<[Value; 2]> = replace_component + .map(|replacement_leaves| { + replacement_leaves + .iter(func_def_body.at(())) + .exactly_one() + .ok() + .unwrap() + }) + .into_iter() + .chain([non_aggregate_composite]) + .collect(); + let leaf_kind = leaf_spv_inst + .as_canonical_node_kind( + &cx, + [leaf_output_type].into_iter(), + leaf_inputs.iter().map(|&v| func_def_body.at(v).type_of(&cx)), + ) + .unwrap_or(DataInstKind::SpvInst( + leaf_spv_inst, + spv::InstLowering::default(), + )); + let leaf_inst = append_node( + func_def_body.at_mut(()), + DataInstDef { + attrs, + kind: leaf_kind, + inputs: leaf_inputs, + child_regions: [].into_iter().collect(), + outputs: [].into_iter().collect(), + }, + ); + + let leaf_outputs = &mut func_def_body.nodes[leaf_inst].outputs; + let leaf_output_var = func_def_body.vars.define( + &cx, + VarDecl { + // FIXME(eddyb) this may lose semantic `attrs`. + attrs: AttrSet::default(), + ty: leaf_output_type, + + def_parent: Either::Right(leaf_inst), + def_idx: leaf_outputs.len().try_into().unwrap(), + }, + ); + leaf_outputs.push(leaf_output_var); + + Either::Left([Value::Var(leaf_output_var)].into_iter()) + } else { + Either::Right(replace_component.map_or( + Either::Left(component_leaves), + |replacement_leaves| { + Either::Right(replacement_leaves.iter(func_def_body.at(()))) + }, + )) + }; + + assert_eq!( + component_leaves.len(), + cx[component_type].disaggregated_leaf_count() + ); + + let leaves = match rebuild_composite { + Some(_) => leaves + .iter(func_def_body.at(())) + .take(leaf_range.start) + .chain(component_leaves) + .chain(leaves.iter(func_def_body.at(())).skip(leaf_range.end)) + .collect(), + None => component_leaves.collect(), + }; + + Some(LocalIdDef::Value { + whole_type: result_type, + // FIXME(eddyb) avoid allocating somehow, like + // try "recompressing" into a `VarRange`, or + // preserving that form throughout above? + leaves: Either::Right(leaves), + }) + })() + } else { + None + }; + if let Some(def) = eagerly_lowered_result { + local_id_defs.insert(result_id.unwrap(), def); + continue; + } + + let mut ids = &ids[..]; + let mut kind = if opcode == wk.OpFunctionCall { + assert!(imms.is_empty()); + let callee_id = ids[0]; + let maybe_callee = id_defs + .get(&callee_id) + .map(|id_def| match *id_def { + IdDef::Func(func) => Ok(func), + _ => Err(id_def.descr(&cx)), + }) + .transpose() .map_err(|descr| { invalid(&format!( - "unsupported use of {descr} as the `OpExtInst` \ - extended instruction set ID" + "unsupported use of {descr} as the `OpFunctionCall` callee" )) })?; - DataInstKind::SpvExtInst { ext_set, inst } - } else { - DataInstKind::SpvInst(raw_inst.without_ids.clone()) - }; + match maybe_callee { + Some(callee) => { + ids = &ids[1..]; + DataInstKind::FuncCall(callee) + } - let data_inst_def = DataInstDef { - attrs, - kind, - inputs: ids - .iter() - .map(|&id| { - match lookup_global_or_local_id_for_data_or_control_inst_input(id)? - { - LocalIdDef::Value(_, v) => Ok(v), - LocalIdDef::BlockLabel { .. } => Err(invalid( - "unsupported use of block label as a value, \ - in non-terminator instruction", - )), - } - }) - .collect::>()?, - child_regions: [].into_iter().collect(), - outputs: [].into_iter().collect(), + // HACK(eddyb) this should be an error, but it shows + // up in Rust-GPU output (likely a zombie?). + None => DataInstKind::SpvInst( + raw_inst.without_ids.clone(), + spv::InstLowering::default(), + ), + } + } else if opcode == wk.OpExtInst { + let ext_set_id = ids[0]; + ids = &ids[1..]; + + let inst = match imms[..] { + [spv::Imm::Short(kind, inst)] => { + assert_eq!(kind, wk.LiteralExtInstInteger); + inst + } + _ => unreachable!(), }; - let inst = func_def_body.nodes.define(&cx, data_inst_def.into()); - if let Some(result_id) = result_id { - let ty = result_type.ok_or_else(|| { - invalid( - "expected value-producing instruction, \ - with a result type", - ) - })?; - - let outputs = &mut func_def_body.nodes[inst].outputs; - let output_var = func_def_body.vars.define( - &cx, - VarDecl { - // FIXME(eddyb) split attrs between output and inst. - attrs: AttrSet::default(), - ty, + let ext_set = match id_defs.get(&ext_set_id) { + Some(&IdDef::SpvExtInstImport(name)) => Ok(name), + Some(id_def) => Err(id_def.descr(&cx)), + None => Err(format!("unknown ID %{ext_set_id}")), + } + .map_err(|descr| { + invalid(&format!( + "unsupported use of {descr} as the `OpExtInst` \ + extended instruction set ID" + )) + })?; - def_parent: Either::Right(inst), - def_idx: outputs.len().try_into().unwrap(), - }, - ); - outputs.push(output_var); + DataInstKind::SpvExtInst { + ext_set, + inst, + lowering: spv::InstLowering::default(), + } + } else { + DataInstKind::SpvInst( + raw_inst.without_ids.clone(), + spv::InstLowering::default(), + ) + }; - local_id_defs.insert( - result_id, - LocalIdDef::Value(result_type.unwrap(), Value::Var(output_var)), - ); + // HACK(eddyb) only factored out due to `kind`'s mutable borrow. + let call_ret_type = match &kind { + DataInstKind::FuncCall(_) => true, + DataInstKind::SpvInst(spv_inst, _) => { + spv_inst.opcode == wk.OpFunctionPointerCallINTEL } + _ => false, + } + .then(|| result_type.unwrap()); + + let mut spv_inst_lowering = match &mut kind { + DataInstKind::SpvInst(_, lowering) + | DataInstKind::SpvExtInst { lowering, .. } => Some(lowering), + + // NOTE(eddyb) function signatures and calls keep their + // disaggregation even when lifting back to SPIR-V, so + // no `spv::InstLowering` is tracked for them. + DataInstKind::FuncCall(_) => None, + + NodeKind::Select(_) + | NodeKind::Loop { .. } + | NodeKind::ExitInvocation(_) + | DataInstKind::Scalar(_) + | DataInstKind::Vector(_) + | DataInstKind::Mem(_) + | DataInstKind::QPtr(_) + | DataInstKind::ThunkBind(_) => { + unreachable!() + } + }; - current_block_region_def.children.insert_last(inst, &mut func_def_body.nodes); + let (output_count_u32, output_leaf_types) = result_id + .and_then(|_| { + let result_type = result_type.unwrap(); - // HACK(eddyb) doing this after defining the maybe-uncanonical - // node, just to keep the iterators simpler. - let node_def = &mut func_def_body.nodes[inst]; - if let DataInstKind::SpvInst(spv_inst) = &node_def.kind - && let Some(canonical_kind) = spv_inst.as_canonical_node_kind( - &cx, - node_def - .outputs - .iter() - .map(|&output_var| func_def_body.vars[output_var].ty), - node_def.inputs.iter().map(|&v| { - // HACK(eddyb) `func_def_body.at(v).type_of(cx)` - // equivalent, without running into borrow issues. - match v { - Value::Const(ct) => cx[ct].ty, - Value::Var(var) => func_def_body.vars[var].ty, + // HACK(eddyb) `OpTypeVoid` special-cased for calls + // as if it were an aggregate with `0` leaves. + let ret_void = call_ret_type.is_some_and(|ty| match &cx[ty].kind { + TypeKind::SpvInst { spv_inst: ret_type_spv_inst, .. } => { + ret_type_spv_inst.opcode == wk.OpTypeVoid + } + _ => false, + }); + + if let Some(spv_inst_lowering) = &mut spv_inst_lowering { + spv_inst_lowering.disaggregated_output = + match cx[result_type].spv_value_lowering() { + // HACK(eddyb) `spv_inst_lowering` can only + // coexist with `call_ret_type` for indirect + // calls (`OpFunctionPointerCallINTEL`). + spv::ValueLowering::Direct => ret_void, + spv::ValueLowering::Disaggregate(_) => true, } - }), + .then_some(result_type); + } + + (!ret_void).then_some(result_type) + }) + .map_or((0, None), |result_type| { + ( + cx[result_type].disaggregated_leaf_count_u32(), + Some(result_type.disaggregated_leaf_types(&cx)), ) + }); + let output_leaf_types = output_leaf_types.into_iter().flatten(); + + let mut inputs = SmallVec::with_capacity(ids.len()); + for &id in ids { + let (whole_input_type, leaves) = lookup_value_id(id)?; + + let start = u32::try_from(inputs.len()).unwrap(); + inputs.extend(leaves.iter(func_def_body.at(()))); + let end = u32::try_from(inputs.len()).unwrap(); + + if let spv::ValueLowering::Disaggregate(_) = + cx[whole_input_type].spv_value_lowering() + && let Some(lowering) = &mut spv_inst_lowering { - // FIXME(eddyb) sanity-check the number/types of inputs. - node_def.kind = canonical_kind; + lowering.disaggregated_inputs.push((start..end, whole_input_type)); } } + + let node = append_node( + func_def_body.at_mut(()), + NodeDef { + attrs, + kind, + inputs, + child_regions: [].into_iter().collect(), + outputs: [].into_iter().collect(), + }, + ); + + if let Some(result_id) = result_id { + let outputs = &mut func_def_body.nodes[node].outputs; + assert_eq!(outputs.len(), 0); + outputs.extend(output_leaf_types.zip_eq(0..output_count_u32).map( + |(ty, def_idx)| { + func_def_body.vars.define( + &cx, + VarDecl { + // FIXME(eddyb) split attrs between output and inst. + attrs: AttrSet::default(), + ty, + + def_parent: Either::Right(node), + def_idx, + }, + ) + }, + )); + + local_id_defs.insert( + result_id, + LocalIdDef::Value { + whole_type: result_type.unwrap(), + leaves: NonZeroU32::new(output_count_u32) + .map_or(Either::Right(SmallVec::new()), |count| { + Either::Left(VarRange { start: outputs[0], count }) + }), + }, + ); + } + + // HACK(eddyb) doing this after defining the maybe-uncanonical + // node, just to keep the iterators simpler. + let node_def = &mut func_def_body.nodes[node]; + if let DataInstKind::SpvInst(spv_inst, lowering) = &node_def.kind + && lowering.disaggregated_inputs.is_empty() + && let Some(canonical_kind) = spv_inst.as_canonical_node_kind( + &cx, + node_def + .outputs + .iter() + .map(|&output_var| func_def_body.vars[output_var].ty), + node_def.inputs.iter().map(|&v| { + // HACK(eddyb) `func_def_body.at(v).type_of(cx)` + // equivalent, without running into borrow issues. + match v { + Value::Const(ct) => cx[ct].ty, + Value::Var(var) => func_def_body.vars[var].ty, + } + }), + ) + { + // FIXME(eddyb) sanity-check the number/types of inputs. + node_def.kind = canonical_kind; + } } // FIXME(eddyb) all functions should have the appropriate number of diff --git a/src/spv/mod.rs b/src/spv/mod.rs index 3a945287..a0b4d818 100644 --- a/src/spv/mod.rs +++ b/src/spv/mod.rs @@ -10,11 +10,12 @@ pub mod read; pub mod spec; pub mod write; -use crate::{FxIndexMap, InternedStr}; +use crate::{Context, FxIndexMap, InternedStr, Type, TypeDef, TypeKind, TypeOrConst}; use smallvec::SmallVec; use std::collections::{BTreeMap, BTreeSet}; use std::iter; use std::num::NonZeroU32; +use std::ops::Range; use std::string::FromUtf8Error; /// Semantic properties of a SPIR-V module (not tied to any IDs). @@ -51,6 +52,360 @@ pub struct DebugSources { pub file_contents: FxIndexMap, } +/// Most SPIR-V types can be used as SPIR-V (SSA) value types, but some require +/// non-trivial lowering into SPIR-T [`Value`](crate::Value)s (e.g. expanding +/// one SPIR-V value into any number of *valid* SPIR-T values). +// +// FIXME(eddyb) aggregates without known leaf counts using `Direct` is worse than +// treating them as an error (and e.g. generating `Diag`s), but it's also simpler. +#[derive(Clone, Default, PartialEq, Eq, Hash)] +pub enum ValueLowering { + /// SPIR-V values of this type map to SPIR-T [`Value`](crate::Value)s with the same type + /// (see [`Value`](crate::Value) documentation for more details, and valid types). + #[default] + Direct, + + /// SPIR-V values of this type can't be kept intract in SPIR-T, but instead + /// require decomposion into their "leaves", i.e. valid SPIR-T [`Value`](crate::Value)s. + Disaggregate(AggregateShape), +} + +#[derive(Clone, PartialEq, Eq, Hash)] +pub enum AggregateShape { + Struct { per_field_leaf_range_end: SmallVec<[u32; 4]> }, + Array { fixed_len: u32, total_leaf_count: u32 }, +} + +impl AggregateShape { + // FIXME(eddyb) force this to be used via some kind of forced canonicalization. + pub fn compute( + cx: &Context, + spv_inst: &Inst, + type_and_const_inputs: &[TypeOrConst], + ) -> Option { + let wk = &spec::Spec::get().well_known; + + if spv_inst.opcode == wk.OpTypeStruct { + let mut leaf_count = 0u32; + let mut per_field_leaf_range_end = SmallVec::new(); + for &ty_or_ct in type_and_const_inputs { + let field_type = match ty_or_ct { + TypeOrConst::Type(ty) => ty, + TypeOrConst::Const(_) => return None, + }; + let field_leaf_count = cx[field_type].disaggregated_leaf_count_u32(); + + leaf_count = leaf_count.checked_add(field_leaf_count)?; + per_field_leaf_range_end.push(leaf_count); + } + Some(Self::Struct { per_field_leaf_range_end }) + } else if spv_inst.opcode == wk.OpTypeArray { + let (elem_type, len) = match type_and_const_inputs[..] { + [TypeOrConst::Type(elem_type), TypeOrConst::Const(len)] => (elem_type, len), + _ => return None, + }; + + // NOTE(eddyb) this can legally be `None` when the length of + // the array is given by a specialization constant. + let fixed_len = len.as_scalar(cx).and_then(|len| len.int_as_u32()); + let fixed_len = fixed_len?; + + let elem_leaf_count = cx[elem_type].disaggregated_leaf_count_u32(); + + Some(Self::Array { + fixed_len, + total_leaf_count: elem_leaf_count.checked_mul(fixed_len)?, + }) + } else { + None + } + } +} + +// FIXME(eddyb) not the best place to put these utilities, but they're used in +// both `spv::lower` and `spv::lift` (and they use private methods defined here). +// FIXME(eddyb) consider moving some of this to `spv::canonical`. +impl TypeDef { + fn spv_value_lowering(&self) -> &ValueLowering { + match &self.kind { + TypeKind::Scalar(_) + | TypeKind::Vector(_) + | TypeKind::QPtr + | TypeKind::Thunk + | TypeKind::SpvStringLiteralForExtInst => &ValueLowering::Direct, + TypeKind::SpvInst { value_lowering, .. } => value_lowering, + } + } + + // HACK(eddyb) `pub fn` so that Rust-GPU SPIR-T passes can also rely on this. + pub fn disaggregated_leaf_count(&self) -> usize { + self.disaggregated_leaf_count_u32() as usize + } + + fn disaggregated_leaf_count_u32(&self) -> u32 { + match self.spv_value_lowering() { + ValueLowering::Direct => 1, + ValueLowering::Disaggregate(AggregateShape::Struct { per_field_leaf_range_end }) => { + per_field_leaf_range_end.last().copied().unwrap_or(0) + } + &ValueLowering::Disaggregate(AggregateShape::Array { total_leaf_count, .. }) => { + total_leaf_count + } + } + } +} + +/// Tree-like (preorder) traversal tool for [`ValueLowering::Disaggregate`] types. +struct AggregateCursor<'a> { + cx: &'a Context, + // FIXME(eddyb) should this cache any references into `&Context`? + current: Type, + parent_component_path: SmallVec<[(Type, u32); 8]>, +} + +impl AggregateCursor<'_> { + // HACK(eddyb) this returns `true` iff a new node was found. + fn try_advance(&mut self) -> bool { + // FIXME(eddyb) this isn't the best organization possible. + let cx = self.cx; + let get_component = move |ty: Type, idx: u32| -> Option { + let ty_def = &cx[ty]; + let type_input_idx = match ty_def.spv_value_lowering() { + ValueLowering::Direct => return None, + ValueLowering::Disaggregate(AggregateShape::Struct { .. }) => idx, + &ValueLowering::Disaggregate(AggregateShape::Array { fixed_len, .. }) => { + if idx >= fixed_len { + return None; + } + 0 + } + }; + let type_and_const_inputs = match &ty_def.kind { + TypeKind::Scalar(_) + | TypeKind::Vector(_) + | TypeKind::QPtr + | TypeKind::Thunk + | TypeKind::SpvStringLiteralForExtInst => &[][..], + TypeKind::SpvInst { type_and_const_inputs, .. } => &type_and_const_inputs[..], + }; + let expect_type = |ty_or_ct| match ty_or_ct { + TypeOrConst::Type(ty) => ty, + TypeOrConst::Const(_) => unreachable!(), + }; + Some(expect_type(*type_and_const_inputs.get(usize::try_from(type_input_idx).ok()?)?)) + }; + + // Try descending first, into the first child. + if let Some(first_child_type) = get_component(self.current, 0) { + self.parent_component_path.push((self.current, 0)); + self.current = first_child_type; + return true; + } + + // Try ascending until there is a next sibling to descend into, but only + // modifying `self` iff any such node is found, otherwise calling this + // method without checking its success, could result in infinite cycles. + for depth in (0..self.parent_component_path.len()).rev() { + let (ancestor_type, ancestor_child_idx) = &mut self.parent_component_path[depth]; + if let Some(sibling_idx) = ancestor_child_idx.checked_add(1) + && let Some(sibling_type) = get_component(*ancestor_type, sibling_idx) + { + *ancestor_child_idx = sibling_idx; + self.current = sibling_type; + self.parent_component_path.truncate(depth + 1); + return true; + } + } + + false + } + + // FIXME(eddyb) can't find a great name for this - crucially, if this is a + // leaf, it's a noop, it doesn't find the next leaf! + // HACK(eddyb) this returns `true` iff a leaf node was found. + fn try_ensure_at_leaf(&mut self) -> bool { + loop { + if let ValueLowering::Direct = self.cx[self.current].spv_value_lowering() { + return true; + } + if !self.try_advance() { + return false; + } + } + } +} + +/// Recursively flattening iterator for [`ValueLowering::Disaggregate`] types. +pub struct DisaggregatedLeafTypes<'a>(Option>); + +impl Iterator for DisaggregatedLeafTypes<'_> { + type Item = Type; + + fn size_hint(&self) -> (usize, Option) { + // HACK(eddyb) only compute a size hint for the fresh iterator. + if let Self(Some(cursor)) = self + && cursor.parent_component_path.is_empty() + { + let leaf_count = cursor.cx[cursor.current].disaggregated_leaf_count(); + return (leaf_count, Some(leaf_count)); + } + (0, None) + } + + fn next(&mut self) -> Option { + let cursor = self.0.as_mut()?; + let next = cursor.try_ensure_at_leaf().then_some(cursor.current); + + // Record advancement failure, ensuring future calls to `next` return `None`. + if !(next.is_some() && cursor.try_advance()) { + *self = Self(None); + } + + next + } +} + +impl<'a> DisaggregatedLeafTypes<'a> { + // HACK(eddyb) like `map`, but acessing the `parent_component_path` of + // the inner `AggregateCursor`, when it is possitioned at each leaf. + fn map_with_parent_component_path( + self, + mut f: impl FnMut(Type, &[(Type, u32)]) -> T, + ) -> impl Iterator { + let Self(mut cursor_slot) = self; + iter::from_fn(move || { + let cursor = cursor_slot.as_mut()?; + let next = cursor + .try_ensure_at_leaf() + .then(|| f(cursor.current, &cursor.parent_component_path)); + + // Record advancement failure, ensuring future calls to `next` return `None`. + if !(next.is_some() && cursor.try_advance()) { + cursor_slot = None; + } + + next + }) + } +} + +// FIXME(eddyb) not the best place to put these utilities, but they're used in +// both `spv::lower` and `spv::lift` (and they use private methods defined here). +// FIXME(eddyb) consider moving some of this to `spv::canonical`. +impl Type { + // HACK(eddyb) `pub fn` so that Rust-GPU SPIR-T passes can also rely on this. + pub fn disaggregated_leaf_types(self, cx: &Context) -> DisaggregatedLeafTypes<'_> { + DisaggregatedLeafTypes(Some(AggregateCursor { + cx, + current: self, + parent_component_path: SmallVec::new(), + })) + } + + fn aggregate_component_type_and_leaf_range( + self, + cx: &Context, + idx: u32, + ) -> Option<(Type, Range)> { + let (type_and_const_inputs, aggregate_shape) = match &cx[self].kind { + TypeKind::SpvInst { + spv_inst: _, + type_and_const_inputs, + value_lowering: ValueLowering::Disaggregate(aggregate_shape), + } => (type_and_const_inputs, aggregate_shape), + _ => return None, + }; + let expect_type = |ty_or_ct| match ty_or_ct { + TypeOrConst::Type(ty) => ty, + TypeOrConst::Const(_) => unreachable!(), + }; + + let idx_usize = idx as usize; + let component_type = match aggregate_shape { + AggregateShape::Struct { .. } => expect_type(*type_and_const_inputs.get(idx_usize)?), + &AggregateShape::Array { fixed_len, .. } => { + if idx >= fixed_len { + return None; + } + expect_type(type_and_const_inputs[0]) + } + }; + let component_leaf_count = cx[component_type].disaggregated_leaf_count(); + + let component_leaf_range = match aggregate_shape { + AggregateShape::Struct { per_field_leaf_range_end } => { + let end = per_field_leaf_range_end[idx_usize] as usize; + let start = end.checked_sub(component_leaf_count)?; + start..end + } + AggregateShape::Array { .. } => { + let start = component_leaf_count.checked_mul(idx_usize)?; + let end = start.checked_add(component_leaf_count)?; + start..end + } + }; + Some((component_type, component_leaf_range)) + } +} + +/// Aspects of how a [`spv::Inst`](Inst) was produced by [`spv::lower`](lower), +/// which were otherwise lost in the SPIR-T form, but are still required for +/// [`spv::lift`](lift) to reproduce the original SPIR-V instruction. +/// +/// Primarily used within [`DataInstKind`](crate::DataInstKind) due to SPIR-V +/// instructions that take or produce "aggregates" (`OpTypeStruct`/`OpTypeArray`) +/// and which may require the exact original types (i.e. may not be valid when +/// using a fresh `OpTypeStruct` of the flattened non-"aggregate" components). +#[derive(Clone, Default, PartialEq, Eq, Hash)] +pub struct InstLowering { + // FIXME(eddyb) should this be named "result" instead of "output", somewhat + // standardizing the idea that 1 SPIR-V "result" maps to N SPIR-T "outputs"? + pub disaggregated_output: Option, + + // FIXME(eddyb) only store the starts, and get the leaf counts from the `Type`. + pub disaggregated_inputs: SmallVec<[(Range, Type); 1]>, +} + +/// Helper type for [`InstLowering::reaggreate_inputs`], which corresponds to +/// one or more inputs of a SPIR-V instruction (after being lowered to SPIR-T), +/// according to the [`InstLowering`] (and its `disaggregated_inputs` field). +#[derive(Copy, Clone)] +pub enum ReaggregatedIdOperand<'a, T> { + Direct(T), + Aggregate { ty: Type, leaves: &'a [T] }, +} + +impl InstLowering { + pub fn reaggreate_inputs<'a, T: Copy>( + &'a self, + inputs: &'a [T], + ) -> impl Iterator> + Clone { + // HACK(eddyb) the `None` at the end handles remainining direct inputs. + let mut prev_end = 0; + self.disaggregated_inputs.iter().map(Some).chain([None]).flat_map( + move |maybe_disaggregated| { + // FIXME(eddyb) the range manipulation is all over the place here. + let direct_range = prev_end + ..maybe_disaggregated.map_or(inputs.len(), |(range, _)| range.start as usize); + assert!(direct_range.start <= direct_range.end); + prev_end = direct_range.end; + + let direct_inputs = + inputs[direct_range].iter().copied().map(ReaggregatedIdOperand::Direct); + + let aggregate_input = maybe_disaggregated.map(|(range, ty)| { + let leaves_range = range.start as usize..range.end as usize; + prev_end = leaves_range.end; + + ReaggregatedIdOperand::Aggregate { ty: *ty, leaves: &inputs[leaves_range] } + }); + + direct_inputs.chain(aggregate_input) + }, + ) + } +} + /// A SPIR-V instruction, in its minimal form (opcode and immediate operands). #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Inst { diff --git a/src/spv/spec.rs b/src/spv/spec.rs index ef7a5d7b..ad71a066 100644 --- a/src/spv/spec.rs +++ b/src/spv/spec.rs @@ -131,8 +131,12 @@ def_well_known! { // FIXME(eddyb) hide these from code, lowering should handle most cases. OpConstantComposite, + OpSpecConstantComposite, OpConstantFunctionPointerINTEL, + // FIXME(eddyb) this is only exposed here for `qptr`. + OpSpecConstantOp, + OpVariable, OpFunction, @@ -152,9 +156,11 @@ def_well_known! { OpSwitch, OpFunctionCall, + OpFunctionPointerCallINTEL, OpLoad, OpStore, + OpCopyMemory, OpArrayLength, OpAccessChain, OpInBoundsAccessChain, diff --git a/src/transform.rs b/src/transform.rs index 59004b44..17e9e915 100644 --- a/src/transform.rs +++ b/src/transform.rs @@ -7,9 +7,9 @@ use crate::qptr::{QPtrAttr, QPtrOp}; use crate::{ AddrSpace, Attr, AttrSet, AttrSetDef, Const, ConstDef, ConstKind, DataInstKind, DbgSrcLoc, DeclDef, EntityListIter, ExportKey, Exportee, Func, FuncDecl, FuncDefBody, FuncParam, - GlobalVar, GlobalVarDecl, GlobalVarDefBody, Import, Module, ModuleDebugInfo, ModuleDialect, - Node, NodeDef, NodeKind, OrdAssertEq, Region, RegionDef, Type, TypeDef, TypeKind, TypeOrConst, - Value, Var, VarDecl, spv, + GlobalVar, GlobalVarDecl, GlobalVarDefBody, GlobalVarInit, Import, Module, ModuleDebugInfo, + ModuleDialect, Node, NodeDef, NodeKind, OrdAssertEq, Region, RegionDef, Type, TypeDef, + TypeKind, TypeOrConst, Value, Var, VarDecl, spv, }; use std::cmp::Ordering; use std::rc::Rc; @@ -444,7 +444,7 @@ impl InnerTransform for TypeDef { | TypeKind::Thunk | TypeKind::SpvStringLiteralForExtInst => Transformed::Unchanged, - TypeKind::SpvInst { spv_inst, type_and_const_inputs } => Transformed::map_iter( + TypeKind::SpvInst { spv_inst, type_and_const_inputs, value_lowering } => Transformed::map_iter( type_and_const_inputs.iter(), |ty_or_ct| match *ty_or_ct { TypeOrConst::Type(ty) => transform!({ @@ -458,6 +458,7 @@ impl InnerTransform for TypeDef { ).map(|new_iter| TypeKind::SpvInst { spv_inst: spv_inst.clone(), type_and_const_inputs: new_iter.collect(), + value_lowering: value_lowering.clone(), }), }, } => Self { @@ -544,17 +545,38 @@ impl InnerInPlaceTransform for GlobalVarDefBody { let Self { initializer } = self; if let Some(initializer) = initializer { - transformer.transform_const_use(*initializer).apply_to(initializer); + initializer.inner_in_place_transform_with(transformer); + } + } +} + +impl InnerInPlaceTransform for GlobalVarInit { + fn inner_in_place_transform_with(&mut self, transformer: &mut impl Transformer) { + match self { + GlobalVarInit::Direct(ct) => transformer.transform_const_use(*ct).apply_to(ct), + GlobalVarInit::SpvAggregate { ty, leaves } => { + transformer.transform_type_use(*ty).apply_to(ty); + for ct in leaves { + transformer.transform_const_use(*ct).apply_to(ct); + } + } + GlobalVarInit::Data(data) => { + for ct in data.used_symbolic_values_mut() { + transformer.transform_const_use(*ct).apply_to(ct); + } + } } } } impl InnerInPlaceTransform for FuncDecl { fn inner_in_place_transform_with(&mut self, transformer: &mut impl Transformer) { - let Self { attrs, ret_type, params, def } = self; + let Self { attrs, ret_types, params, def } = self; transformer.transform_attr_set_use(*attrs).apply_to(attrs); - transformer.transform_type_use(*ret_type).apply_to(ret_type); + for ty in ret_types { + transformer.transform_type_use(*ty).apply_to(ty); + } for param in params { param.inner_transform_with(transformer).apply_to(param); } @@ -648,9 +670,12 @@ impl InnerInPlaceTransform for FuncAtMut<'_, Node> { | QPtrOp::Offset(_) | QPtrOp::DynOffset { .. }, ) - | DataInstKind::ThunkBind(_) - | DataInstKind::SpvInst(_) - | DataInstKind::SpvExtInst { .. } => {} + | DataInstKind::ThunkBind(_) => {} + + DataInstKind::SpvInst(_, lowering) + | DataInstKind::SpvExtInst { ext_set: _, inst: _, lowering } => { + lowering.inner_in_place_transform_with(transformer); + } } for v in &mut self.reborrow().def().inputs { @@ -687,6 +712,19 @@ impl InnerInPlaceTransform for VarDecl { } } +impl InnerInPlaceTransform for spv::InstLowering { + fn inner_in_place_transform_with(&mut self, transformer: &mut impl Transformer) { + let Self { disaggregated_output, disaggregated_inputs } = self; + + if let Some(disaggregated_output) = disaggregated_output { + transformer.transform_type_use(*disaggregated_output).apply_to(disaggregated_output); + } + for (_range, ty) in disaggregated_inputs { + transformer.transform_type_use(*ty).apply_to(ty); + } + } +} + impl InnerTransform for Value { fn inner_transform_with(&self, transformer: &mut impl Transformer) -> Transformed { match self { diff --git a/src/visit.rs b/src/visit.rs index 2a6dc1a3..0c0d2501 100644 --- a/src/visit.rs +++ b/src/visit.rs @@ -7,9 +7,9 @@ use crate::qptr::{QPtrAttr, QPtrOp}; use crate::{ AddrSpace, Attr, AttrSet, AttrSetDef, Const, ConstDef, ConstKind, DataInstKind, DbgSrcLoc, DeclDef, DiagMsgPart, EntityListIter, ExportKey, Exportee, Func, FuncDecl, FuncDefBody, - FuncParam, GlobalVar, GlobalVarDecl, GlobalVarDefBody, Import, Module, ModuleDebugInfo, - ModuleDialect, Node, NodeDef, NodeKind, OrdAssertEq, Region, RegionDef, Type, TypeDef, - TypeKind, TypeOrConst, Value, Var, VarDecl, spv, + FuncParam, GlobalVar, GlobalVarDecl, GlobalVarDefBody, GlobalVarInit, Import, Module, + ModuleDebugInfo, ModuleDialect, Node, NodeDef, NodeKind, OrdAssertEq, Region, RegionDef, Type, + TypeDef, TypeKind, TypeOrConst, Value, Var, VarDecl, spv, }; // FIXME(eddyb) `Sized` bound shouldn't be needed but removing it requires @@ -327,7 +327,7 @@ impl InnerVisit for TypeDef { | TypeKind::Thunk | TypeKind::SpvStringLiteralForExtInst => {} - TypeKind::SpvInst { spv_inst: _, type_and_const_inputs } => { + TypeKind::SpvInst { spv_inst: _, type_and_const_inputs, value_lowering: _ } => { for &ty_or_ct in type_and_const_inputs { match ty_or_ct { TypeOrConst::Type(ty) => visitor.visit_type_use(ty), @@ -400,18 +400,39 @@ impl InnerVisit for GlobalVarDefBody { fn inner_visit_with<'a>(&'a self, visitor: &mut impl Visitor<'a>) { let Self { initializer } = self; - if let Some(initializer) = *initializer { - visitor.visit_const_use(initializer); + if let Some(initializer) = initializer { + initializer.inner_visit_with(visitor); + } + } +} + +impl InnerVisit for GlobalVarInit { + fn inner_visit_with<'a>(&'a self, visitor: &mut impl Visitor<'a>) { + match self { + &GlobalVarInit::Direct(ct) => visitor.visit_const_use(ct), + GlobalVarInit::SpvAggregate { ty, leaves } => { + visitor.visit_type_use(*ty); + for &ct in leaves { + visitor.visit_const_use(ct); + } + } + GlobalVarInit::Data(data) => { + for &ct in data.used_symbolic_values() { + visitor.visit_const_use(ct); + } + } } } } impl InnerVisit for FuncDecl { fn inner_visit_with<'a>(&'a self, visitor: &mut impl Visitor<'a>) { - let Self { attrs, ret_type, params, def } = self; + let Self { attrs, ret_types, params, def } = self; visitor.visit_attr_set_use(*attrs); - visitor.visit_type_use(*ret_type); + for &ty in ret_types { + visitor.visit_type_use(ty); + } for param in params { param.inner_visit_with(visitor); } @@ -494,9 +515,11 @@ impl<'a> FuncAt<'a, Node> { | QPtrOp::Offset(_) | QPtrOp::DynOffset { .. }, ) - | DataInstKind::ThunkBind(_) - | DataInstKind::SpvInst(_) - | DataInstKind::SpvExtInst { .. } => {} + | DataInstKind::ThunkBind(_) => {} + DataInstKind::SpvInst(_, lowering) + | DataInstKind::SpvExtInst { ext_set: _, inst: _, lowering } => { + lowering.inner_visit_with(visitor); + } } for v in inputs { visitor.visit_value_use(v); @@ -525,6 +548,19 @@ impl InnerVisit for VarDecl { } } +impl InnerVisit for spv::InstLowering { + fn inner_visit_with<'a>(&'a self, visitor: &mut impl Visitor<'a>) { + let Self { disaggregated_output, disaggregated_inputs } = self; + + if let Some(ty) = *disaggregated_output { + visitor.visit_type_use(ty); + } + for &(_, ty) in disaggregated_inputs { + visitor.visit_type_use(ty); + } + } +} + impl InnerVisit for Value { fn inner_visit_with<'a>(&'a self, visitor: &mut impl Visitor<'a>) { match self { diff --git a/vk-layer/src/shaders_layer.rs b/vk-layer/src/shaders_layer.rs index 3b716115..85f9f5c7 100644 --- a/vk-layer/src/shaders_layer.rs +++ b/vk-layer/src/shaders_layer.rs @@ -182,7 +182,7 @@ impl ShadersDeviceHooks { let layout_config = &spirt::mem::LayoutConfig { abstract_bool_size_align: (1, 1), logical_ptr_size_align: (8, 8), - ..spirt::mem::LayoutConfig::VULKAN_SCALAR_LAYOUT + ..spirt::mem::LayoutConfig::VULKAN_SCALAR_LAYOUT_LE }; self.timed_pass("qptr::lower_from_spv_ptrs", || {