From e4da12fc667dfd52e83e3b178be80f1ab8a17dfc Mon Sep 17 00:00:00 2001 From: Ray Gao Date: Sun, 15 Mar 2026 19:39:52 -0400 Subject: [PATCH 01/14] rebase hint multi observe --- .../native/circuit/cuda/src/poseidon2.cu | 37 ++++- .../native/circuit/src/extension/mod.rs | 11 +- .../native/circuit/src/poseidon2/air.rs | 126 +++++++++++---- .../native/circuit/src/poseidon2/chip.rs | 147 +++++++++++++----- .../native/circuit/src/poseidon2/columns.rs | 24 +-- .../native/circuit/src/poseidon2/execution.rs | 39 +++-- .../native/compiler/src/asm/compiler.rs | 6 +- .../native/compiler/src/asm/instruction.rs | 11 +- .../native/compiler/src/conversion/mod.rs | 6 +- .../native/compiler/src/ir/instructions.rs | 13 +- extensions/native/compiler/src/ir/poseidon.rs | 30 +++- .../native/recursion/src/challenger/duplex.rs | 2 +- 12 files changed, 326 insertions(+), 126 deletions(-) diff --git a/extensions/native/circuit/cuda/src/poseidon2.cu b/extensions/native/circuit/cuda/src/poseidon2.cu index fdbe0d3ce5..b39038a079 100644 --- a/extensions/native/circuit/cuda/src/poseidon2.cu +++ b/extensions/native/circuit/cuda/src/poseidon2.cu @@ -355,31 +355,52 @@ template struct Poseidon2Wrapper { if (specific[COL_INDEX(MultiObserveCols, is_first)] == Fp::one()) { uint32_t very_start_timestamp = row[COL_INDEX(Cols, very_first_timestamp)].asUInt32(); - for (uint32_t i = 0; i < 4; ++i) { + // 3 register reads at timestamps +0, +1, +2 + for (uint32_t i = 0; i < 3; ++i) { mem_fill_base( mem_helper, very_start_timestamp + i, specific.slice_from(COL_INDEX(MultiObserveCols, read_data[i].base)) ); } + // 1 context array read at timestamp +3 + mem_fill_base( + mem_helper, + very_start_timestamp + 3, + specific.slice_from(COL_INDEX(MultiObserveCols, read_ctx.base)) + ); + // 1 hint_id register read at timestamp +4 (reuse spare read_data[3] on head row) + mem_fill_base( + mem_helper, + very_start_timestamp + 4, + specific.slice_from(COL_INDEX(MultiObserveCols, read_data[3].base)) + ); } else { uint32_t start_timestamp = row[COL_INDEX(Cols, start_timestamp)].asUInt32(); uint32_t chunk_start = specific[COL_INDEX(MultiObserveCols, start_idx)].asUInt32(); uint32_t chunk_end = specific[COL_INDEX(MultiObserveCols, end_idx)].asUInt32(); + // is_hint = ctx[2] + uint32_t is_hint = + specific[COL_INDEX(MultiObserveCols, ctx[2])].asUInt32(); + uint32_t ts_per_element = 2 - is_hint; for (uint32_t j = chunk_start; j < chunk_end; ++j) { + if (!is_hint) { + // Non-hint mode: fill read_data aux + mem_fill_base( + mem_helper, + start_timestamp, + specific.slice_from(COL_INDEX(MultiObserveCols, read_data[j].base)) + ); + } + // Write timestamp: start_timestamp + (1 - is_hint) for non-hint, start_timestamp for hint mem_fill_base( mem_helper, - start_timestamp, - specific.slice_from(COL_INDEX(MultiObserveCols, read_data[j].base)) - ); - mem_fill_base( - mem_helper, - start_timestamp + 1, + start_timestamp + (1 - is_hint), specific.slice_from(COL_INDEX(MultiObserveCols, write_data[j].base)) ); - start_timestamp += 2; + start_timestamp += ts_per_element; } if (chunk_end >= CHUNK) { mem_fill_base( diff --git a/extensions/native/circuit/src/extension/mod.rs b/extensions/native/circuit/src/extension/mod.rs index 924d4927e8..0aab146a13 100644 --- a/extensions/native/circuit/src/extension/mod.rs +++ b/extensions/native/circuit/src/extension/mod.rs @@ -366,11 +366,6 @@ where inventory.add_executor_chip(fri_reduced_opening); inventory.next_air::, 1>>()?; - let poseidon2 = NativePoseidon2Chip::<_, 1>::new( - NativePoseidon2Filler::new(Poseidon2Config::default()), - mem_helper.clone(), - ); - inventory.add_executor_chip(poseidon2); let hint_bus = inventory.airs().system().hint_bridge.hint_bus(); let hint_space_provider = Arc::new(HintSpaceProviderChip::new( @@ -379,6 +374,12 @@ where timestamp_max_bits, )); + let poseidon2 = NativePoseidon2Chip::<_, 1>::new( + NativePoseidon2Filler::new(Poseidon2Config::default(), hint_space_provider.clone()), + mem_helper.clone(), + ); + inventory.add_executor_chip(poseidon2); + inventory.next_air::()?; inventory.add_periphery_chip(hint_space_provider.clone()); diff --git a/extensions/native/circuit/src/poseidon2/air.rs b/extensions/native/circuit/src/poseidon2/air.rs index 9e9cdf5ce8..b7f9f458bb 100644 --- a/extensions/native/circuit/src/poseidon2/air.rs +++ b/extensions/native/circuit/src/poseidon2/air.rs @@ -713,10 +713,16 @@ impl Air let &MultiObserveCols { pc, final_timestamp_increment, + state_ptr_register, + ctx_register, + input_ptr_register, + hint_id_register, state_ptr, + ctx_ptr, input_ptr, - init_pos, - len, + hint_id, + ctx, + read_ctx, is_first, is_last, curr_len, @@ -731,35 +737,38 @@ impl Air should_permute, write_sponge_state, write_final_idx, - input_register_1, - input_register_2, - input_register_3, - output_register, } = multi_observe_specific; + // Alias context values + let init_pos = ctx[0]; + let len = ctx[1]; + let is_hint = ctx[2]; + builder.when(multi_observe_row).assert_bool(is_first); builder.when(multi_observe_row).assert_bool(is_last); builder.when(multi_observe_row).assert_bool(should_permute); + builder.when(multi_observe_row).assert_bool(is_hint); self.execution_bridge .execute_and_increment_pc( AB::F::from_canonical_usize(MULTI_OBSERVE.global_opcode().as_usize()), [ - output_register.into(), - input_register_1.into(), - input_register_2.into(), + state_ptr_register.into(), + ctx_register.into(), + input_ptr_register.into(), self.address_space.into(), self.address_space.into(), - input_register_3.into(), + hint_id_register.into(), ], ExecutionState::new(pc, very_first_timestamp), final_timestamp_increment, ) .eval(builder, multi_observe_row * is_first); + // Head row: 3 register reads + 1 context array read + 1 hint_id register read self.memory_bridge .read( - MemoryAddress::new(self.address_space, output_register), + MemoryAddress::new(self.address_space, state_ptr_register), [state_ptr], very_first_timestamp, &read_data[0], @@ -768,8 +777,8 @@ impl Air self.memory_bridge .read( - MemoryAddress::new(self.address_space, input_register_1), - [init_pos], + MemoryAddress::new(self.address_space, ctx_register), + [ctx_ptr], very_first_timestamp + AB::F::ONE, &read_data[1], ) @@ -777,41 +786,73 @@ impl Air self.memory_bridge .read( - MemoryAddress::new(self.address_space, input_register_2), + MemoryAddress::new(self.address_space, input_ptr_register), [input_ptr], very_first_timestamp + AB::F::TWO, &read_data[2], ) .eval(builder, multi_observe_row * is_first); + // Read context array: [init_pos, len, is_hint, reserved] from ctx_ptr self.memory_bridge .read( - MemoryAddress::new(self.address_space, input_register_3), - [len], + MemoryAddress::new(self.address_space, ctx_ptr), + ctx, very_first_timestamp + AB::F::from_canonical_usize(3), + &read_ctx, + ) + .eval(builder, multi_observe_row * is_first); + + // Read hint_id from register (reuse spare read_data[3] on head row) + self.memory_bridge + .read( + MemoryAddress::new(self.address_space, hint_id_register), + [hint_id], + very_first_timestamp + AB::F::from_canonical_usize(4), &read_data[3], ) .eval(builder, multi_observe_row * is_first); + // ts_per_element = 2 - is_hint (non-hint: read+write=2, hint: write-only=1) + let is_hint_expr: AB::Expr = is_hint.into(); + let ts_per_element: AB::Expr = AB::Expr::TWO - is_hint_expr.clone(); for i in 0..CHUNK { - let i_var = AB::F::from_canonical_usize(i); + let i_var: AB::Expr = AB::F::from_canonical_usize(i).into(); + let start_idx_expr: AB::Expr = start_idx.into(); + let element_start_ts: AB::Expr = + start_timestamp.into() + (i_var.clone() - start_idx_expr.clone()) * ts_per_element.clone(); + + // Non-hint mode: read from memory self.memory_bridge .read( MemoryAddress::new( self.address_space, - input_ptr + curr_len + i_var - start_idx, + input_ptr + curr_len + i_var.clone() - start_idx_expr.clone(), ), [data[i]], - start_timestamp + i_var * AB::F::TWO - start_idx * AB::F::TWO, + element_start_ts.clone(), &read_data[i], ) - .eval(builder, multi_observe_row * aux_read_enabled[i]); + .eval( + builder, + multi_observe_row * aux_read_enabled[i] * (AB::Expr::ONE - is_hint_expr.clone()), + ); + // Hint mode: lookup from hint space + self.hint_bridge.lookup( + builder, + hint_id, + curr_len + i_var.clone() - start_idx_expr.clone(), + data[i], + multi_observe_row * aux_read_enabled[i] * is_hint_expr.clone(), + ); + + // Write to sponge state (always, for both modes) self.memory_bridge .write( MemoryAddress::new(self.address_space, state_ptr + i_var), [data[i]], - start_timestamp + i_var * AB::F::TWO - start_idx * AB::F::TWO + AB::F::ONE, + element_start_ts + (AB::Expr::ONE - is_hint_expr.clone()), &write_data[i], ) .eval(builder, multi_observe_row * aux_read_enabled[i]); @@ -885,7 +926,7 @@ impl Air .write( MemoryAddress::new(self.address_space, state_ptr), full_sponge_output, - start_timestamp + (end_idx - start_idx) * AB::F::TWO, + start_timestamp + (end_idx - start_idx) * (AB::Expr::TWO - is_hint_expr.clone()), &write_sponge_state, ) .eval(builder, multi_observe_row * should_permute); @@ -909,11 +950,12 @@ impl Air // final_idx = aux_read_enabled[CHUNK-1] * 0 + (1 - aux_read_enabled[CHUNK-1]) * end_idx let final_idx = aux_read_enabled[CHUNK - 1] * AB::Expr::ZERO + (AB::Expr::ONE - aux_read_enabled[CHUNK - 1]) * end_idx; + // Write final_idx back to ctx[0] (ctx_ptr address) self.memory_bridge .write( - MemoryAddress::new(self.address_space, input_register_1), + MemoryAddress::new(self.address_space, ctx_ptr), [final_idx], - start_timestamp + (end_idx - start_idx) * AB::F::TWO + should_permute, + start_timestamp + (end_idx - start_idx) * (AB::Expr::TWO - is_hint_expr) + should_permute, &write_final_idx, ) .eval(builder, multi_observe_row * is_last); @@ -962,41 +1004,59 @@ impl Air builder .when(next.multi_observe_row) .when(not(next_multi_observe_specific.is_first)) - .assert_eq(init_pos, next_multi_observe_specific.init_pos); + .assert_eq(init_pos, next_multi_observe_specific.ctx[0]); builder .when(next.multi_observe_row) .when(not(next_multi_observe_specific.is_first)) - .assert_eq(len, next_multi_observe_specific.len); + .assert_eq(len, next_multi_observe_specific.ctx[1]); + + builder + .when(next.multi_observe_row) + .when(not(next_multi_observe_specific.is_first)) + .assert_eq( + state_ptr_register, + next_multi_observe_specific.state_ptr_register, + ); builder .when(next.multi_observe_row) .when(not(next_multi_observe_specific.is_first)) .assert_eq( - input_register_1, - next_multi_observe_specific.input_register_1, + ctx_register, + next_multi_observe_specific.ctx_register, ); builder .when(next.multi_observe_row) .when(not(next_multi_observe_specific.is_first)) .assert_eq( - input_register_2, - next_multi_observe_specific.input_register_2, + input_ptr_register, + next_multi_observe_specific.input_ptr_register, ); + builder + .when(next.multi_observe_row) + .when(not(next_multi_observe_specific.is_first)) + .assert_eq(ctx_ptr, next_multi_observe_specific.ctx_ptr); + + builder + .when(next.multi_observe_row) + .when(not(next_multi_observe_specific.is_first)) + .assert_eq(hint_id, next_multi_observe_specific.hint_id); + builder .when(next.multi_observe_row) .when(not(next_multi_observe_specific.is_first)) .assert_eq( - input_register_3, - next_multi_observe_specific.input_register_3, + hint_id_register, + next_multi_observe_specific.hint_id_register, ); builder .when(next.multi_observe_row) .when(not(next_multi_observe_specific.is_first)) - .assert_eq(output_register, next_multi_observe_specific.output_register); + .assert_eq(is_hint, next_multi_observe_specific.ctx[2]); // Timestamp constraints builder diff --git a/extensions/native/circuit/src/poseidon2/chip.rs b/extensions/native/circuit/src/poseidon2/chip.rs index 770efc7307..7dee62f60d 100644 --- a/extensions/native/circuit/src/poseidon2/chip.rs +++ b/extensions/native/circuit/src/poseidon2/chip.rs @@ -1,4 +1,5 @@ use std::borrow::{Borrow, BorrowMut}; +use std::sync::Arc; use openvm_circuit::{ arch::*, @@ -22,6 +23,7 @@ use openvm_stark_backend::{ }; use crate::{ + hint_space_provider::HintSpaceProviderChip, mem_fill_helper, poseidon2::{ columns::{ @@ -45,6 +47,7 @@ pub struct NativePoseidon2Filler { // pre-computed Poseidon2 sub cols for dummy rows. empty_poseidon2_sub_cols: Vec, pub(super) subchip: Poseidon2SubChip, + pub hint_space_provider: Arc>, } impl NativePoseidon2Executor { @@ -71,12 +74,16 @@ pub(crate) fn compress( } impl NativePoseidon2Filler { - pub fn new(poseidon2_config: Poseidon2Config) -> Self { + pub fn new( + poseidon2_config: Poseidon2Config, + hint_space_provider: Arc>, + ) -> Self { let subchip = Poseidon2SubChip::new(poseidon2_config.constants); let empty_poseidon2_sub_cols = subchip.generate_trace(vec![[F::ZERO; CHUNK * 2]]).values; Self { empty_poseidon2_sub_cols, subchip, + hint_space_provider, } } } @@ -649,11 +656,11 @@ where } else if instruction.opcode == MULTI_OBSERVE.global_opcode() { let &Instruction { a: state_ptr_register, - b: init_pos_register, + b: ctx_register, c: input_ptr_register, d: register_address_space, e: data_address_space, - f: len_register, + f: hint_id_register, .. } = instruction; @@ -663,31 +670,50 @@ where ); assert_eq!(data_address_space, F::from_canonical_u32(AS::Native as u32)); - let [init_pos]: [F; 1] = - memory_read_native(state.memory.data(), init_pos_register.as_canonical_u32()); - let [input_len]: [F; 1] = - memory_read_native(state.memory.data(), len_register.as_canonical_u32()); + // Read ctx_ptr from register, then read context array from memory + let [ctx_ptr]: [F; 1] = + memory_read_native(state.memory.data(), ctx_register.as_canonical_u32()); + let ctx: [F; 4] = + memory_read_native(state.memory.data(), ctx_ptr.as_canonical_u32()); + let init_pos = ctx[0]; + let input_len = ctx[1]; + let is_hint = ctx[2].as_canonical_u32() != 0; + + // Read hint_id from register + let [hint_id]: [F; 1] = + memory_read_native(state.memory.data(), hint_id_register.as_canonical_u32()); + + // Get hint_space data if in hint mode + let hint_data: Vec = if is_hint { + state.streams.hint_space[hint_id.as_canonical_u32() as usize].clone() + } else { + vec![] + }; let mut len = input_len.as_canonical_u32() as usize; let mut pos = init_pos.as_canonical_u32() as usize; let mut chunks: Vec<(usize, usize)> = vec![]; - const NUM_HEAD_ACCESSES: usize = 4; + // 3 register reads + 1 context array read + 1 hint_id register read = 5 head accesses + const NUM_HEAD_ACCESSES: usize = 5; let mut final_timestamp_inc = NUM_HEAD_ACCESSES; + // In hint mode: 1 timestamp per element (write only) + // In non-hint mode: 2 timestamps per element (read + write) + let ts_per_element = if is_hint { 1 } else { 2 }; while len > 0 { if len >= (CHUNK - pos) { chunks.push((pos, CHUNK)); len -= CHUNK - pos; - final_timestamp_inc += 2 * (CHUNK - pos) + 1; + final_timestamp_inc += ts_per_element * (CHUNK - pos) + 1; pos = 0; } else { chunks.push((pos, pos + len)); - final_timestamp_inc += 2 * len; + final_timestamp_inc += ts_per_element * len; len = 0; pos += len; } } - final_timestamp_inc += 1; // write back to init_pos_register + final_timestamp_inc += 1; // write back to ctx[0] let allocated_rows = arena .alloc(MultiRowLayout::new(NativePoseidon2Metadata { @@ -698,14 +724,15 @@ where let head_multi_observe_cols: &mut MultiObserveCols = head_cols.specific[..MultiObserveCols::::width()].borrow_mut(); + // 3 register reads: state_ptr, ctx_ptr, input_ptr let [state_ptr]: [F; 1] = tracing_read_native_helper( state.memory, state_ptr_register.as_canonical_u32(), head_multi_observe_cols.read_data[0].as_mut(), ); - let [init_pos]: [F; 1] = tracing_read_native_helper( + let [ctx_ptr]: [F; 1] = tracing_read_native_helper( state.memory, - init_pos_register.as_canonical_u32(), + ctx_register.as_canonical_u32(), head_multi_observe_cols.read_data[1].as_mut(), ); let [input_ptr]: [F; 1] = tracing_read_native_helper( @@ -713,9 +740,16 @@ where input_ptr_register.as_canonical_u32(), head_multi_observe_cols.read_data[2].as_mut(), ); - let [input_len]: [F; 1] = tracing_read_native_helper( + // 1 context array read: [init_pos, len, is_hint, reserved] + let ctx: [F; 4] = tracing_read_native_helper( state.memory, - len_register.as_canonical_u32(), + ctx_ptr.as_canonical_u32(), + head_multi_observe_cols.read_ctx.as_mut(), + ); + // 1 hint_id register read (reuse spare read_data[3] on head row) + let [hint_id]: [F; 1] = tracing_read_native_helper( + state.memory, + hint_id_register.as_canonical_u32(), head_multi_observe_cols.read_data[3].as_mut(), ); @@ -727,14 +761,15 @@ where for (i, cols) in allocated_rows.iter_mut().enumerate() { let multi_observe_cols: &mut MultiObserveCols = cols.specific[..MultiObserveCols::::width()].borrow_mut(); - multi_observe_cols.input_register_1 = init_pos_register; - multi_observe_cols.input_register_2 = input_ptr_register; - multi_observe_cols.input_register_3 = len_register; - multi_observe_cols.output_register = state_ptr_register; - multi_observe_cols.init_pos = init_pos; - multi_observe_cols.input_ptr = input_ptr; + multi_observe_cols.state_ptr_register = state_ptr_register; + multi_observe_cols.ctx_register = ctx_register; + multi_observe_cols.input_ptr_register = input_ptr_register; + multi_observe_cols.hint_id_register = hint_id_register; multi_observe_cols.state_ptr = state_ptr; - multi_observe_cols.len = input_len; + multi_observe_cols.ctx_ptr = ctx_ptr; + multi_observe_cols.input_ptr = input_ptr; + multi_observe_cols.hint_id = hint_id; + multi_observe_cols.ctx = ctx; cols.multi_observe_row = F::ONE; cols.very_first_timestamp = init_timestamp; @@ -779,21 +814,28 @@ where multi_observe_cols.aux_before_end[j] = F::ONE; } for j in chunk_start..chunk_end { - let n_f: [F; 1] = tracing_read_native_helper( - state.memory, - input_ptr_u32 + input_idx as u32, - multi_observe_cols.read_data[j].as_mut(), - ); + let n_f: F = if is_hint { + // In hint mode: read from hint_space + hint_data[input_idx] + } else { + // In non-hint mode: read from memory via tracing read + let [v]: [F; 1] = tracing_read_native_helper( + state.memory, + input_ptr_u32 + input_idx as u32, + multi_observe_cols.read_data[j].as_mut(), + ); + v + }; multi_observe_cols.aux_read_enabled[j] = F::ONE; tracing_write_native_inplace( state.memory, state_ptr_u32 + j as u32, - n_f, + [n_f], &mut multi_observe_cols.write_data[j], ); - multi_observe_cols.data[j] = n_f[0]; + multi_observe_cols.data[j] = n_f; input_idx += 1; - cur_timestamp += 2; + cur_timestamp += ts_per_element as u32; } let permutation_input: [F; 16] = @@ -817,7 +859,7 @@ where let final_idx = F::from_canonical_usize(chunk_end % CHUNK); tracing_write_native_inplace( state.memory, - init_pos_register.as_canonical_u32(), + ctx_ptr.as_canonical_u32(), [final_idx], &mut multi_observe_cols.write_final_idx, ); @@ -1161,7 +1203,7 @@ impl NativePoseidon2Filler::width()].borrow_mut(); let start_timestamp_u32 = head_cols.very_first_timestamp.as_canonical_u32(); - // state_ptr, init_pos, input_ptr, len + // 3 register reads: state_ptr, ctx_ptr, input_ptr mem_fill_helper( mem_helper, start_timestamp_u32, @@ -1177,12 +1219,23 @@ impl NativePoseidon2Filler = chunk_slice @@ -1194,6 +1247,8 @@ impl NativePoseidon2Filler = chunk_slice[row_idx * width..(row_idx + 1) * width].borrow_mut(); @@ -1205,18 +1260,32 @@ impl NativePoseidon2Filler= CHUNK as u32 { diff --git a/extensions/native/circuit/src/poseidon2/columns.rs b/extensions/native/circuit/src/poseidon2/columns.rs index abb8db54a2..c8490fc285 100644 --- a/extensions/native/circuit/src/poseidon2/columns.rs +++ b/extensions/native/circuit/src/poseidon2/columns.rs @@ -211,16 +211,22 @@ pub struct MultiObserveCols { pub pc: T, pub final_timestamp_increment: T, - // Initial reads from registers - // They are same across same instance of multi_observe + // Register addresses + pub state_ptr_register: T, + pub ctx_register: T, + pub input_ptr_register: T, + pub hint_id_register: T, + + // Values read from registers pub state_ptr: T, + pub ctx_ptr: T, pub input_ptr: T, - pub init_pos: T, - pub len: T, - pub input_register_1: T, - pub input_register_2: T, - pub input_register_3: T, - pub output_register: T, + pub hint_id: T, + + // Context array values read from ctx_ptr + // ctx[0] = init_pos, ctx[1] = len, ctx[2] = is_hint, ctx[3] = reserved + pub ctx: [T; 4], + pub read_ctx: MemoryReadAuxCols, pub is_first: T, pub is_last: T, @@ -240,6 +246,6 @@ pub struct MultiObserveCols { pub should_permute: T, pub write_sponge_state: MemoryWriteAuxCols, - // Final write back and registers + // Final write back to ctx[0] pub write_final_idx: MemoryWriteAuxCols, } diff --git a/extensions/native/circuit/src/poseidon2/execution.rs b/extensions/native/circuit/src/poseidon2/execution.rs index a0c1fc72a2..a558205729 100644 --- a/extensions/native/circuit/src/poseidon2/execution.rs +++ b/extensions/native/circuit/src/poseidon2/execution.rs @@ -35,9 +35,9 @@ struct Pos2PreCompute<'a, F: Field, const SBOX_REGISTERS: usize> { #[repr(C)] struct MultiObservePreCompute<'a, F: Field, const SBOX_REGISTERS: usize> { subchip: &'a Poseidon2SubChip, - pub init_pos_register: u32, + pub ctx_register: u32, pub input_ptr_register: u32, - pub len_register: u32, + pub hint_id_register: u32, pub state_ptr_register: u32, } @@ -137,9 +137,9 @@ impl<'a, F: PrimeField32, const SBOX_REGISTERS: usize> NativePoseidon2Executor = if is_hint { + exec_state.streams.hint_space[hint_id_u32 as usize].clone() + } else { + vec![] + }; for (chunk_start, chunk_end) in observation_chunks { for j in chunk_start..chunk_end { - let [n_f]: [F; 1] = exec_state.vm_read(NATIVE_AS, input_ptr_u32 + input_idx); + let n_f = if is_hint { + hint_data[input_idx as usize] + } else { + let [v]: [F; 1] = exec_state.vm_read(NATIVE_AS, input_ptr_u32 + input_idx); + v + }; exec_state.vm_write(NATIVE_AS, sponge_ptr_u32 + (j as u32), &[n_f]); input_idx += 1; } @@ -634,9 +652,10 @@ unsafe fn execute_multi_observe_e12_impl< height += 1; } if let Some(final_idx) = final_idx { + // Write final_idx back to ctx[0] (overwriting init_pos in context array) exec_state.vm_write::( NATIVE_AS, - pre_compute.init_pos_register, + ctx_ptr.as_canonical_u32(), &[F::from_canonical_usize(final_idx)], ); } diff --git a/extensions/native/compiler/src/asm/compiler.rs b/extensions/native/compiler/src/asm/compiler.rs index 689bb1ebd0..14d84b4f1f 100644 --- a/extensions/native/compiler/src/asm/compiler.rs +++ b/extensions/native/compiler/src/asm/compiler.rs @@ -489,13 +489,13 @@ impl + TwoAdicField> AsmCo DslIr::HintBitsF(var, len) => { self.push(AsmInstruction::HintBits(var.fp(), len), debug_info); } - DslIr::Poseidon2MultiObserve(dst, init_pos, arr_ptr, len) => { + DslIr::Poseidon2MultiObserve(dst, ctx_ptr, arr_ptr, hint_id) => { self.push( AsmInstruction::Poseidon2MultiObserve( dst.fp(), - init_pos.fp(), + ctx_ptr.fp(), arr_ptr.fp(), - len.get_var().fp(), + hint_id.fp(), ), debug_info, ); diff --git a/extensions/native/compiler/src/asm/instruction.rs b/extensions/native/compiler/src/asm/instruction.rs index b715d97cf1..16b2be4b49 100644 --- a/extensions/native/compiler/src/asm/instruction.rs +++ b/extensions/native/compiler/src/asm/instruction.rs @@ -110,9 +110,10 @@ pub enum AsmInstruction { /// Halt. Halt, - /// Absorbs multiple base elements into a duplex transcript with Poseidon2 permutation - /// (sponge_state, init_pos, arr_ptr, len) - /// Returns the final index position of hash sponge + /// Absorbs multiple base elements into a duplex transcript with Poseidon2 permutation. + /// (sponge_state, ctx_ptr, arr_ptr, hint_id) + /// Context array at ctx_ptr: [init_pos, len, is_hint, reserved] + /// When is_hint=1, data is read from hint space using hint_id. Poseidon2MultiObserve(i32, i32, i32, i32), /// Perform a Poseidon2 permutation on state starting at address `lhs` @@ -350,11 +351,11 @@ impl> AsmInstruction { AsmInstruction::Trap => write!(f, "trap"), AsmInstruction::Halt => write!(f, "halt"), AsmInstruction::HintBits(src, len) => write!(f, "hint_bits ({})fp, {}", src, len), - AsmInstruction::Poseidon2MultiObserve(dst, init_pos, arr, len) => { + AsmInstruction::Poseidon2MultiObserve(dst, ctx, arr, hint_id) => { write!( f, "poseidon2_multi_observe ({})fp, ({})fp ({})fp ({})fp", - dst, init_pos, arr, len + dst, ctx, arr, hint_id ) } AsmInstruction::Poseidon2Permute(dst, lhs) => { diff --git a/extensions/native/compiler/src/conversion/mod.rs b/extensions/native/compiler/src/conversion/mod.rs index 0ff358ec70..61fd726d3f 100644 --- a/extensions/native/compiler/src/conversion/mod.rs +++ b/extensions/native/compiler/src/conversion/mod.rs @@ -441,15 +441,15 @@ fn convert_instruction>( AS::Native, AS::Native, )], - AsmInstruction::Poseidon2MultiObserve(dst, init, arr, len) => vec![ + AsmInstruction::Poseidon2MultiObserve(dst, ctx, arr, hint_id) => vec![ Instruction { opcode: options.opcode_with_offset(Poseidon2Opcode::MULTI_OBSERVE), a: i32_f(dst), - b: i32_f(init), + b: i32_f(ctx), c: i32_f(arr), d: AS::Native.to_field(), e: AS::Native.to_field(), - f: i32_f(len), + f: i32_f(hint_id), g: F::ZERO, } ], diff --git a/extensions/native/compiler/src/ir/instructions.rs b/extensions/native/compiler/src/ir/instructions.rs index a4932d2826..8658a8a06e 100644 --- a/extensions/native/compiler/src/ir/instructions.rs +++ b/extensions/native/compiler/src/ir/instructions.rs @@ -208,13 +208,14 @@ pub enum DslIr { /// Permutes an array of Bn254 elements using Poseidon2 (output = p2_permute(array)). Should /// only be used when target is a circuit. CircuitPoseidon2Permute([Var; 3]), - /// Absorbs an array of baby bear elements into a duplex transcript with Poseidon2 permutations - /// (output = p2_multi_observe(array, els)). + /// Absorbs an array of baby bear elements into a duplex transcript with Poseidon2 permutations. + /// Context values (init_pos, len, is_hint) are passed via a context array instead of separate registers. + /// When is_hint=1, data is read from hint space using hint_id instead of from input array pointer. Poseidon2MultiObserve( - Ptr, // sponge_state - Var, // initial input_ptr position - Ptr, // input array (base elements) - Usize, // len of els + Ptr, // sponge_state + Ptr, // ctx_ptr (context array: [init_pos, len, is_hint, reserved]) + Ptr, // input array (base elements; used when is_hint=0) + Var, // hint_id (hint space id; used when is_hint=1) ), // Miscellaneous instructions. diff --git a/extensions/native/compiler/src/ir/poseidon.rs b/extensions/native/compiler/src/ir/poseidon.rs index 6d32f89409..deb3b47f14 100644 --- a/extensions/native/compiler/src/ir/poseidon.rs +++ b/extensions/native/compiler/src/ir/poseidon.rs @@ -19,6 +19,7 @@ impl Builder { sponge_state: &Array>, input_ptr: Ptr, arr: &Array>, + hint_id: Option>, ) -> Usize { let buffer_size: Var = Var::uninit(self); self.assign(&buffer_size, C::N::from_canonical_usize(HASH_RATE)); @@ -35,15 +36,36 @@ impl Builder { let init_pos: Var = Var::uninit(self); self.assign(&init_pos, input_ptr.address - sponge_ptr.address); + let is_hint = hint_id.is_some(); + let hint_id_var: Var = if let Some(id) = hint_id { + id + } else { + let v: Var = Var::uninit(self); + self.assign(&v, C::N::ZERO); + v + }; + + // Allocate context array: [init_pos, len, is_hint, reserved] + let ctx = self.dyn_array::>(4usize); + self.set(&ctx, 0, init_pos); + self.set(&ctx, 1, len.get_var()); + self.set( + &ctx, + 2, + if is_hint { C::N::ONE } else { C::N::ZERO }, + ); + self.set(&ctx, 3, C::N::ZERO); + self.operations.push(DslIr::Poseidon2MultiObserve( *sponge_ptr, - init_pos, + ctx.ptr(), *ptr, - len.clone(), + hint_id_var, )); - // automatically updated by Poseidon2MultiObserve operation - Usize::Var(init_pos) + // Read back the updated init_pos from ctx[0] + let final_pos: Var = self.get(&ctx, 0); + Usize::Var(final_pos) } }, } diff --git a/extensions/native/recursion/src/challenger/duplex.rs b/extensions/native/recursion/src/challenger/duplex.rs index 440b14ec59..b45639dc31 100644 --- a/extensions/native/recursion/src/challenger/duplex.rs +++ b/extensions/native/recursion/src/challenger/duplex.rs @@ -81,7 +81,7 @@ impl DuplexChallengerVariable { // This is equivalent to calling `observe` multiple times, but more efficient. pub fn observe_slice_opt(&self, builder: &mut Builder, arr: &Array>) { builder.if_ne(arr.len(), Usize::from(0)).then(|builder| { - let next_pos = builder.poseidon2_multi_observe(&self.sponge_state, self.input_ptr, arr); + let next_pos = builder.poseidon2_multi_observe(&self.sponge_state, self.input_ptr, arr, None); builder.assign(&self.input_ptr, self.io_empty_ptr + next_pos.clone()); builder.if_ne(next_pos, Usize::from(0)).then_or_else( From 38a9ba482ddda7166e90cf671b577a60db91b02a Mon Sep 17 00:00:00 2001 From: Ray Gao Date: Sun, 15 Mar 2026 22:15:42 -0400 Subject: [PATCH 02/14] adjust degree --- .../native/circuit/src/poseidon2/air.rs | 67 +++++++++++-------- .../native/circuit/src/poseidon2/chip.rs | 4 ++ .../native/circuit/src/poseidon2/columns.rs | 6 ++ 3 files changed, 49 insertions(+), 28 deletions(-) diff --git a/extensions/native/circuit/src/poseidon2/air.rs b/extensions/native/circuit/src/poseidon2/air.rs index b7f9f458bb..11b9e0fede 100644 --- a/extensions/native/circuit/src/poseidon2/air.rs +++ b/extensions/native/circuit/src/poseidon2/air.rs @@ -94,6 +94,7 @@ impl Air inside_row, simple, multi_observe_row, + not_hint_multi_observe, end_inside_row, end_top_level, start_top_level, @@ -723,6 +724,7 @@ impl Air hint_id, ctx, read_ctx, + chunk_ts_count, is_first, is_last, curr_len, @@ -748,6 +750,16 @@ impl Air builder.when(multi_observe_row).assert_bool(is_last); builder.when(multi_observe_row).assert_bool(should_permute); builder.when(multi_observe_row).assert_bool(is_hint); + builder.assert_eq( + not_hint_multi_observe, + multi_observe_row * (AB::Expr::ONE - is_hint), + ); + let hint_multi_observe: AB::Expr = multi_observe_row - not_hint_multi_observe; + // chunk_ts_count = (end_idx - start_idx) * (2 - is_hint) + builder.when(multi_observe_row).assert_eq( + chunk_ts_count, + (end_idx - start_idx) * AB::F::TWO - (end_idx - start_idx) * is_hint, + ); self.execution_bridge .execute_and_increment_pc( @@ -813,49 +825,48 @@ impl Air ) .eval(builder, multi_observe_row * is_first); - // ts_per_element = 2 - is_hint (non-hint: read+write=2, hint: write-only=1) - let is_hint_expr: AB::Expr = is_hint.into(); - let ts_per_element: AB::Expr = AB::Expr::TWO - is_hint_expr.clone(); + // Per-element constraints for chunk rows. for i in 0..CHUNK { - let i_var: AB::Expr = AB::F::from_canonical_usize(i).into(); - let start_idx_expr: AB::Expr = start_idx.into(); - let element_start_ts: AB::Expr = - start_timestamp.into() + (i_var.clone() - start_idx_expr.clone()) * ts_per_element.clone(); + let i_var = AB::F::from_canonical_usize(i); + + // Hint mode: lookup from hint space. + self.hint_bridge.lookup( + builder, + hint_id, + curr_len + i_var - start_idx, + data[i], + hint_multi_observe.clone() * aux_read_enabled[i], + ); - // Non-hint mode: read from memory + // Non-hint mode: read from memory. self.memory_bridge .read( MemoryAddress::new( self.address_space, - input_ptr + curr_len + i_var.clone() - start_idx_expr.clone(), + input_ptr + curr_len + i_var - start_idx, ), [data[i]], - element_start_ts.clone(), + start_timestamp + i_var * AB::F::TWO - start_idx * AB::F::TWO, &read_data[i], ) - .eval( - builder, - multi_observe_row * aux_read_enabled[i] * (AB::Expr::ONE - is_hint_expr.clone()), - ); - - // Hint mode: lookup from hint space - self.hint_bridge.lookup( - builder, - hint_id, - curr_len + i_var.clone() - start_idx_expr.clone(), - data[i], - multi_observe_row * aux_read_enabled[i] * is_hint_expr.clone(), - ); + .eval(builder, not_hint_multi_observe * aux_read_enabled[i]); + self.memory_bridge + .write( + MemoryAddress::new(self.address_space, state_ptr + i_var), + [data[i]], + start_timestamp + i_var * AB::F::TWO - start_idx * AB::F::TWO + AB::F::ONE, + &write_data[i], + ) + .eval(builder, not_hint_multi_observe * aux_read_enabled[i]); - // Write to sponge state (always, for both modes) self.memory_bridge .write( MemoryAddress::new(self.address_space, state_ptr + i_var), [data[i]], - element_start_ts + (AB::Expr::ONE - is_hint_expr.clone()), + start_timestamp + i_var - start_idx, &write_data[i], ) - .eval(builder, multi_observe_row * aux_read_enabled[i]); + .eval(builder, hint_multi_observe.clone() * aux_read_enabled[i]); } for i in 0..(CHUNK - 1) { @@ -926,7 +937,7 @@ impl Air .write( MemoryAddress::new(self.address_space, state_ptr), full_sponge_output, - start_timestamp + (end_idx - start_idx) * (AB::Expr::TWO - is_hint_expr.clone()), + start_timestamp + chunk_ts_count, &write_sponge_state, ) .eval(builder, multi_observe_row * should_permute); @@ -955,7 +966,7 @@ impl Air .write( MemoryAddress::new(self.address_space, ctx_ptr), [final_idx], - start_timestamp + (end_idx - start_idx) * (AB::Expr::TWO - is_hint_expr) + should_permute, + start_timestamp + chunk_ts_count + should_permute, &write_final_idx, ) .eval(builder, multi_observe_row * is_last); diff --git a/extensions/native/circuit/src/poseidon2/chip.rs b/extensions/native/circuit/src/poseidon2/chip.rs index 7dee62f60d..061c6767db 100644 --- a/extensions/native/circuit/src/poseidon2/chip.rs +++ b/extensions/native/circuit/src/poseidon2/chip.rs @@ -771,7 +771,10 @@ where multi_observe_cols.hint_id = hint_id; multi_observe_cols.ctx = ctx; + // chunk_ts_count will be filled per-chunk row below + cols.multi_observe_row = F::ONE; + cols.not_hint_multi_observe = if is_hint { F::ZERO } else { F::ONE }; cols.very_first_timestamp = init_timestamp; if i == 0 { @@ -802,6 +805,7 @@ where multi_observe_cols.start_idx = F::from_canonical_usize(chunk_start); multi_observe_cols.end_idx = F::from_canonical_usize(chunk_end); + multi_observe_cols.chunk_ts_count = F::from_canonical_usize((chunk_end - chunk_start) * ts_per_element); multi_observe_cols.is_first = F::ZERO; multi_observe_cols.is_last = if i == num_chunks - 1 { F::ONE } else { F::ZERO }; diff --git a/extensions/native/circuit/src/poseidon2/columns.rs b/extensions/native/circuit/src/poseidon2/columns.rs index c8490fc285..67557a5f73 100644 --- a/extensions/native/circuit/src/poseidon2/columns.rs +++ b/extensions/native/circuit/src/poseidon2/columns.rs @@ -31,6 +31,10 @@ pub struct NativePoseidon2Cols { /// Indicates that this row is a multi_observe row. pub multi_observe_row: T, + /// Materialized column: multi_observe_row * (1 - is_hint). + /// Lives in main cols (not overlaid specific) so it is 0 on non-multi_observe rows. + pub not_hint_multi_observe: T, + /// Indicates the last row in an inside-row block. pub end_inside_row: T, /// Indicates the last row in a top-level block. @@ -228,6 +232,8 @@ pub struct MultiObserveCols { pub ctx: [T; 4], pub read_ctx: MemoryReadAuxCols, + pub chunk_ts_count: T, + pub is_first: T, pub is_last: T, pub curr_len: T, From 1f5c698d18ffadcf8d6652991ccdb801d2789511 Mon Sep 17 00:00:00 2001 From: Ray Gao Date: Mon, 16 Mar 2026 06:03:11 -0400 Subject: [PATCH 03/14] fix --- .../native/circuit/src/extension/cuda.rs | 10 +- .../native/circuit/src/extension/mod.rs | 30 ++--- .../native/circuit/src/poseidon2/chip.rs | 12 +- .../native/circuit/src/poseidon2/cuda.rs | 112 +++++++++++++++++- .../native/circuit/src/poseidon2/execution.rs | 6 + extensions/native/compiler/src/ir/poseidon.rs | 14 ++- .../native/recursion/src/challenger/duplex.rs | 2 +- 7 files changed, 160 insertions(+), 26 deletions(-) diff --git a/extensions/native/circuit/src/extension/cuda.rs b/extensions/native/circuit/src/extension/cuda.rs index 50a9cba86d..0646eda347 100644 --- a/extensions/native/circuit/src/extension/cuda.rs +++ b/extensions/native/circuit/src/extension/cuda.rs @@ -76,8 +76,6 @@ impl VmProverExtension inventory.add_executor_chip(fri_reduced_opening); inventory.next_air::>()?; - let poseidon2 = NativePoseidon2ChipGpu::<1>::new(range_checker.clone(), timestamp_max_bits); - inventory.add_executor_chip(poseidon2); let hint_air: &HintSpaceProviderAir = inventory.next_air::()?; let cpu_chip = Arc::new(HintSpaceProviderChip::new( @@ -85,6 +83,14 @@ impl VmProverExtension range_checker.clone(), timestamp_max_bits, )); + + let poseidon2 = NativePoseidon2ChipGpu::<1>::new_with_hint_space_provider( + range_checker.clone(), + timestamp_max_bits, + cpu_chip.clone(), + ); + inventory.add_executor_chip(poseidon2); + let provider_gpu = HintSpaceProviderChipGpu::new(cpu_chip.clone()); inventory.add_periphery_chip(provider_gpu); diff --git a/extensions/native/circuit/src/extension/mod.rs b/extensions/native/circuit/src/extension/mod.rs index 0aab146a13..23d28c10d3 100644 --- a/extensions/native/circuit/src/extension/mod.rs +++ b/extensions/native/circuit/src/extension/mod.rs @@ -271,15 +271,6 @@ where ); inventory.add_air(fri_reduced_opening); - let verify_batch = NativePoseidon2Air::<_, 1>::new( - exec_bridge, - memory_bridge, - hint_bridge, - VerifyBatchBus::new(inventory.new_bus_idx()), - Poseidon2Config::default(), - ); - inventory.add_air(verify_batch); - let hint_space_provider = HintSpaceProviderAir { hint_bus: hint_bridge.hint_bus(), lt_air: IsLtSubAir::new( @@ -289,6 +280,15 @@ where }; inventory.add_air(hint_space_provider); + let verify_batch = NativePoseidon2Air::<_, 1>::new( + exec_bridge, + memory_bridge, + hint_bridge, + VerifyBatchBus::new(inventory.new_bus_idx()), + Poseidon2Config::default(), + ); + inventory.add_air(verify_batch); + let tower_evaluate = NativeSumcheckAir::new(exec_bridge, memory_bridge, hint_bridge); inventory.add_air(tower_evaluate); @@ -365,8 +365,6 @@ where FriReducedOpeningChip::new(FriReducedOpeningFiller::new(), mem_helper.clone()); inventory.add_executor_chip(fri_reduced_opening); - inventory.next_air::, 1>>()?; - let hint_bus = inventory.airs().system().hint_bridge.hint_bus(); let hint_space_provider = Arc::new(HintSpaceProviderChip::new( hint_bus, @@ -374,17 +372,19 @@ where timestamp_max_bits, )); + inventory.next_air::()?; + inventory.add_periphery_chip(hint_space_provider.clone()); + + inventory.next_air::, 1>>()?; + let poseidon2 = NativePoseidon2Chip::<_, 1>::new( NativePoseidon2Filler::new(Poseidon2Config::default(), hint_space_provider.clone()), mem_helper.clone(), ); inventory.add_executor_chip(poseidon2); - inventory.next_air::()?; - inventory.add_periphery_chip(hint_space_provider.clone()); - let tower_verify = NativeSumcheckChip::new( - NativeSumcheckFiller::new(hint_space_provider), + NativeSumcheckFiller::new(hint_space_provider.clone()), mem_helper.clone(), ); inventory.add_executor_chip(tower_verify); diff --git a/extensions/native/circuit/src/poseidon2/chip.rs b/extensions/native/circuit/src/poseidon2/chip.rs index 061c6767db..9e9a6b07ba 100644 --- a/extensions/native/circuit/src/poseidon2/chip.rs +++ b/extensions/native/circuit/src/poseidon2/chip.rs @@ -679,6 +679,10 @@ where let input_len = ctx[1]; let is_hint = ctx[2].as_canonical_u32() != 0; + + // _debug + println!("=> is_hint: {:?}", is_hint); + // Read hint_id from register let [hint_id]: [F; 1] = memory_read_native(state.memory.data(), hint_id_register.as_canonical_u32()); @@ -830,6 +834,12 @@ where ); v }; + + // _debug + if is_hint { + println!("multi_observe hint mode: reading nf = {}", n_f); + } + multi_observe_cols.aux_read_enabled[j] = F::ONE; tracing_write_native_inplace( state.memory, @@ -1265,7 +1275,7 @@ impl NativePoseidon2Filler { pub range_checker: Arc, pub timestamp_max_bits: usize, + pub hint_space_provider: Option>, +} + +impl NativePoseidon2ChipGpu { + pub fn new(range_checker: Arc, timestamp_max_bits: usize) -> Self { + Self { + range_checker, + timestamp_max_bits, + hint_space_provider: None, + } + } + + pub fn new_with_hint_space_provider( + range_checker: Arc, + timestamp_max_bits: usize, + hint_space_provider: SharedHintSpaceProviderChip, + ) -> Self { + Self { + range_checker, + timestamp_max_bits, + hint_space_provider: Some(hint_space_provider), + } + } + + /// Scans multi-observe execution records to populate the hint provider with + /// (hint_id, offset, value) triples for hint-mode rows. + fn populate_hint_provider(&self, records: &[u8]) { + let Some(hint_space_provider) = &self.hint_space_provider else { + return; + }; + + let width = NativePoseidon2Cols::::width(); + let record_size = width * size_of::(); + if records.len() % record_size != 0 { + return; + } + let height = records.len() / record_size; + + let row_slice = unsafe { + let ptr = records.as_ptr() as *const F; + from_raw_parts(ptr, height * width) + }; + + let mut row_idx = 0; + while row_idx < height { + let start = row_idx * width; + let cols: &NativePoseidon2Cols = + row_slice[start..(start + width)].borrow(); + + if cols.multi_observe_row.is_one() { + let num_rows = cols.inner.export.as_canonical_u32() as usize; + if num_rows > 1 { + let head_multi_observe_cols: &MultiObserveCols = + cols.specific[..MultiObserveCols::::width()].borrow(); + let is_hint = head_multi_observe_cols.ctx[2] != F::ZERO; + if is_hint { + let hint_id = head_multi_observe_cols.hint_id; + for local_row in 1..num_rows { + let chunk_cols: &NativePoseidon2Cols = + row_slice[(row_idx + local_row) * width + ..(row_idx + local_row + 1) * width] + .borrow(); + let multi_observe_cols: &MultiObserveCols = chunk_cols.specific + [..MultiObserveCols::::width()] + .borrow(); + + let chunk_start = multi_observe_cols.start_idx.as_canonical_u32(); + let chunk_end = multi_observe_cols.end_idx.as_canonical_u32(); + let curr_len = multi_observe_cols.curr_len.as_canonical_u32(); + + for j in chunk_start..chunk_end { + let input_idx = curr_len + (j - chunk_start); + let val = multi_observe_cols.data[j as usize]; + hint_space_provider.request( + hint_id, + F::from_canonical_u32(input_idx), + val, + ); + } + } + } + } + row_idx += num_rows.max(1); + continue; + } + + if cols.simple.is_one() { + row_idx += 1; + } else { + let num_non_inside_row = cols.inner.export.as_canonical_u32() as usize; + let non_inside_start = start + (num_non_inside_row - 1) * width; + let last_non_inside_cols: &NativePoseidon2Cols = + row_slice[non_inside_start..(non_inside_start + width)].borrow(); + let total_num_row = last_non_inside_cols.inner.export.as_canonical_u32() as usize; + row_idx += total_num_row; + } + } + } } impl Chip @@ -28,6 +127,9 @@ impl Chip return get_empty_air_proving_ctx::(); } + // Populate hint space provider from multi-observe records before GPU upload. + self.populate_hint_provider(records); + // For Poseidon2, the records are already the trace rows // Use the columns width directly let width = NativePoseidon2Cols::::width(); diff --git a/extensions/native/circuit/src/poseidon2/execution.rs b/extensions/native/circuit/src/poseidon2/execution.rs index a558205729..4cec7ab2b4 100644 --- a/extensions/native/circuit/src/poseidon2/execution.rs +++ b/extensions/native/circuit/src/poseidon2/execution.rs @@ -640,6 +640,12 @@ unsafe fn execute_multi_observe_e12_impl< let [v]: [F; 1] = exec_state.vm_read(NATIVE_AS, input_ptr_u32 + input_idx); v }; + + // _debug + if is_hint { + println!("=> n_f: {n_f}"); + } + exec_state.vm_write(NATIVE_AS, sponge_ptr_u32 + (j as u32), &[n_f]); input_idx += 1; } diff --git a/extensions/native/compiler/src/ir/poseidon.rs b/extensions/native/compiler/src/ir/poseidon.rs index deb3b47f14..aadadf46ea 100644 --- a/extensions/native/compiler/src/ir/poseidon.rs +++ b/extensions/native/compiler/src/ir/poseidon.rs @@ -19,6 +19,7 @@ impl Builder { sponge_state: &Array>, input_ptr: Ptr, arr: &Array>, + input_len: Usize, hint_id: Option>, ) -> Usize { let buffer_size: Var = Var::uninit(self); @@ -32,7 +33,7 @@ impl Builder { Array::Fixed(_) => { panic!("Base elements input must be dynamic"); } - Array::Dyn(ptr, len) => { + Array::Dyn(ptr, _) => { let init_pos: Var = Var::uninit(self); self.assign(&init_pos, input_ptr.address - sponge_ptr.address); @@ -48,7 +49,7 @@ impl Builder { // Allocate context array: [init_pos, len, is_hint, reserved] let ctx = self.dyn_array::>(4usize); self.set(&ctx, 0, init_pos); - self.set(&ctx, 1, len.get_var()); + self.set(&ctx, 1, input_len.get_var()); self.set( &ctx, 2, @@ -56,6 +57,15 @@ impl Builder { ); self.set(&ctx, 3, C::N::ZERO); + + // _debug + let ctx1 = self.get(&ctx, 1); + let ctx2 = self.get(&ctx, 2); + self.print_debug(777); + self.print_v(ctx1); + self.print_v(ctx2); + + self.operations.push(DslIr::Poseidon2MultiObserve( *sponge_ptr, ctx.ptr(), diff --git a/extensions/native/recursion/src/challenger/duplex.rs b/extensions/native/recursion/src/challenger/duplex.rs index b45639dc31..10b9bc62e9 100644 --- a/extensions/native/recursion/src/challenger/duplex.rs +++ b/extensions/native/recursion/src/challenger/duplex.rs @@ -81,7 +81,7 @@ impl DuplexChallengerVariable { // This is equivalent to calling `observe` multiple times, but more efficient. pub fn observe_slice_opt(&self, builder: &mut Builder, arr: &Array>) { builder.if_ne(arr.len(), Usize::from(0)).then(|builder| { - let next_pos = builder.poseidon2_multi_observe(&self.sponge_state, self.input_ptr, arr, None); + let next_pos = builder.poseidon2_multi_observe(&self.sponge_state, self.input_ptr, arr, arr.len(), None); builder.assign(&self.input_ptr, self.io_empty_ptr + next_pos.clone()); builder.if_ne(next_pos, Usize::from(0)).then_or_else( From 3a006f3ac63ac81876874d712d1fa175a73171e6 Mon Sep 17 00:00:00 2001 From: Ray Gao Date: Mon, 16 Mar 2026 22:22:48 -0400 Subject: [PATCH 04/14] debug --- .../native/circuit/src/extension/mod.rs | 1 + .../native/circuit/src/poseidon2/chip.rs | 43 ++++++++++++++----- .../native/circuit/src/poseidon2/execution.rs | 5 --- extensions/native/compiler/src/ir/poseidon.rs | 9 ---- 4 files changed, 33 insertions(+), 25 deletions(-) diff --git a/extensions/native/circuit/src/extension/mod.rs b/extensions/native/circuit/src/extension/mod.rs index 23d28c10d3..4a930ddafe 100644 --- a/extensions/native/circuit/src/extension/mod.rs +++ b/extensions/native/circuit/src/extension/mod.rs @@ -383,6 +383,7 @@ where ); inventory.add_executor_chip(poseidon2); + inventory.next_air::()?; let tower_verify = NativeSumcheckChip::new( NativeSumcheckFiller::new(hint_space_provider.clone()), mem_helper.clone(), diff --git a/extensions/native/circuit/src/poseidon2/chip.rs b/extensions/native/circuit/src/poseidon2/chip.rs index 9e9a6b07ba..c0a559bb65 100644 --- a/extensions/native/circuit/src/poseidon2/chip.rs +++ b/extensions/native/circuit/src/poseidon2/chip.rs @@ -679,10 +679,6 @@ where let input_len = ctx[1]; let is_hint = ctx[2].as_canonical_u32() != 0; - - // _debug - println!("=> is_hint: {:?}", is_hint); - // Read hint_id from register let [hint_id]: [F; 1] = memory_read_native(state.memory.data(), hint_id_register.as_canonical_u32()); @@ -717,7 +713,9 @@ where pos += len; } } - final_timestamp_inc += 1; // write back to ctx[0] + // Final ctx[0] writeback always happens (including zero-length input + // where the head row is both the first and last row). + final_timestamp_inc += 1; let allocated_rows = arena .alloc(MultiRowLayout::new(NativePoseidon2Metadata { @@ -788,9 +786,26 @@ where multi_observe_cols.final_timestamp_increment = F::from_canonical_usize(final_timestamp_inc); multi_observe_cols.is_first = F::ONE; - multi_observe_cols.is_last = F::ZERO; + multi_observe_cols.is_last = if chunks.is_empty() { F::ONE } else { F::ZERO }; multi_observe_cols.curr_len = F::ZERO; multi_observe_cols.should_permute = F::ZERO; + if chunks.is_empty() { + // Zero-length input: head row is both first and last. + // Set start_timestamp to right after the 5 head reads, + // and write back init_pos (unchanged) to ctx_ptr[0]. + cols.start_timestamp = F::from_canonical_u32( + init_timestamp_u32 + NUM_HEAD_ACCESSES as u32, + ); + multi_observe_cols.start_idx = init_pos; + multi_observe_cols.end_idx = init_pos; + // state.memory.timestamp == init_ts + NUM_HEAD_ACCESSES here. + tracing_write_native_inplace( + state.memory, + ctx_ptr.as_canonical_u32(), + [init_pos], + &mut multi_observe_cols.write_final_idx, + ); + } } } @@ -835,11 +850,6 @@ where v }; - // _debug - if is_hint { - println!("multi_observe hint mode: reading nf = {}", n_f); - } - multi_observe_cols.aux_read_enabled[j] = F::ONE; tracing_write_native_inplace( state.memory, @@ -1318,6 +1328,17 @@ impl NativePoseidon2Filler = + chunk_slice[..width].borrow_mut(); + let head_mo: &mut MultiObserveCols = + head_c.specific[..MultiObserveCols::::width()].borrow_mut(); + let head_ts = head_c.start_timestamp.as_canonical_u32(); + mem_fill_helper(mem_helper, head_ts, head_mo.write_final_idx.as_mut()); + } } #[inline(always)] diff --git a/extensions/native/circuit/src/poseidon2/execution.rs b/extensions/native/circuit/src/poseidon2/execution.rs index 4cec7ab2b4..d41d911812 100644 --- a/extensions/native/circuit/src/poseidon2/execution.rs +++ b/extensions/native/circuit/src/poseidon2/execution.rs @@ -641,11 +641,6 @@ unsafe fn execute_multi_observe_e12_impl< v }; - // _debug - if is_hint { - println!("=> n_f: {n_f}"); - } - exec_state.vm_write(NATIVE_AS, sponge_ptr_u32 + (j as u32), &[n_f]); input_idx += 1; } diff --git a/extensions/native/compiler/src/ir/poseidon.rs b/extensions/native/compiler/src/ir/poseidon.rs index aadadf46ea..6310917c0d 100644 --- a/extensions/native/compiler/src/ir/poseidon.rs +++ b/extensions/native/compiler/src/ir/poseidon.rs @@ -57,15 +57,6 @@ impl Builder { ); self.set(&ctx, 3, C::N::ZERO); - - // _debug - let ctx1 = self.get(&ctx, 1); - let ctx2 = self.get(&ctx, 2); - self.print_debug(777); - self.print_v(ctx1); - self.print_v(ctx2); - - self.operations.push(DslIr::Poseidon2MultiObserve( *sponge_ptr, ctx.ptr(), From 065d59811285057fc8206336503b64febad9c99f Mon Sep 17 00:00:00 2001 From: Ray Gao Date: Tue, 17 Mar 2026 16:13:03 -0400 Subject: [PATCH 05/14] adjust --- extensions/native/circuit/cuda/src/poseidon2.cu | 6 ------ extensions/native/circuit/src/poseidon2/air.rs | 1 - extensions/native/circuit/src/poseidon2/chip.rs | 2 -- 3 files changed, 9 deletions(-) diff --git a/extensions/native/circuit/cuda/src/poseidon2.cu b/extensions/native/circuit/cuda/src/poseidon2.cu index b39038a079..749599d906 100644 --- a/extensions/native/circuit/cuda/src/poseidon2.cu +++ b/extensions/native/circuit/cuda/src/poseidon2.cu @@ -355,7 +355,6 @@ template struct Poseidon2Wrapper { if (specific[COL_INDEX(MultiObserveCols, is_first)] == Fp::one()) { uint32_t very_start_timestamp = row[COL_INDEX(Cols, very_first_timestamp)].asUInt32(); - // 3 register reads at timestamps +0, +1, +2 for (uint32_t i = 0; i < 3; ++i) { mem_fill_base( mem_helper, @@ -363,13 +362,11 @@ template struct Poseidon2Wrapper { specific.slice_from(COL_INDEX(MultiObserveCols, read_data[i].base)) ); } - // 1 context array read at timestamp +3 mem_fill_base( mem_helper, very_start_timestamp + 3, specific.slice_from(COL_INDEX(MultiObserveCols, read_ctx.base)) ); - // 1 hint_id register read at timestamp +4 (reuse spare read_data[3] on head row) mem_fill_base( mem_helper, very_start_timestamp + 4, @@ -381,20 +378,17 @@ template struct Poseidon2Wrapper { specific[COL_INDEX(MultiObserveCols, start_idx)].asUInt32(); uint32_t chunk_end = specific[COL_INDEX(MultiObserveCols, end_idx)].asUInt32(); - // is_hint = ctx[2] uint32_t is_hint = specific[COL_INDEX(MultiObserveCols, ctx[2])].asUInt32(); uint32_t ts_per_element = 2 - is_hint; for (uint32_t j = chunk_start; j < chunk_end; ++j) { if (!is_hint) { - // Non-hint mode: fill read_data aux mem_fill_base( mem_helper, start_timestamp, specific.slice_from(COL_INDEX(MultiObserveCols, read_data[j].base)) ); } - // Write timestamp: start_timestamp + (1 - is_hint) for non-hint, start_timestamp for hint mem_fill_base( mem_helper, start_timestamp + (1 - is_hint), diff --git a/extensions/native/circuit/src/poseidon2/air.rs b/extensions/native/circuit/src/poseidon2/air.rs index 11b9e0fede..8b3b02ebc2 100644 --- a/extensions/native/circuit/src/poseidon2/air.rs +++ b/extensions/native/circuit/src/poseidon2/air.rs @@ -777,7 +777,6 @@ impl Air ) .eval(builder, multi_observe_row * is_first); - // Head row: 3 register reads + 1 context array read + 1 hint_id register read self.memory_bridge .read( MemoryAddress::new(self.address_space, state_ptr_register), diff --git a/extensions/native/circuit/src/poseidon2/chip.rs b/extensions/native/circuit/src/poseidon2/chip.rs index c0a559bb65..e136c2fbb0 100644 --- a/extensions/native/circuit/src/poseidon2/chip.rs +++ b/extensions/native/circuit/src/poseidon2/chip.rs @@ -1330,8 +1330,6 @@ impl NativePoseidon2Filler = chunk_slice[..width].borrow_mut(); let head_mo: &mut MultiObserveCols = From 4f33e48406a6baf8b082dccb19fa2bbc74171205 Mon Sep 17 00:00:00 2001 From: Ray Gao Date: Tue, 17 Mar 2026 22:36:50 -0400 Subject: [PATCH 06/14] adjust cuda --- .../circuit/cuda/include/native/poseidon2.cuh | 15 +++++++++------ extensions/native/circuit/cuda/src/poseidon2.cu | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/extensions/native/circuit/cuda/include/native/poseidon2.cuh b/extensions/native/circuit/cuda/include/native/poseidon2.cuh index 206c0e16c0..b794aacaef 100644 --- a/extensions/native/circuit/cuda/include/native/poseidon2.cuh +++ b/extensions/native/circuit/cuda/include/native/poseidon2.cuh @@ -65,14 +65,17 @@ template struct SimplePoseidonSpecificCols { template struct MultiObserveCols { T pc; T final_timestamp_increment; + T state_ptr_register; + T ctx_register; + T input_ptr_register; + T hint_id_register; T state_ptr; + T ctx_ptr; T input_ptr; - T init_pos; - T len; - T input_register_1; - T input_register_2; - T input_register_3; - T output_register; + T hint_id; + T ctx[4]; + MemoryReadAuxCols read_ctx; + T chunk_ts_count; T is_first; T is_last; T curr_len; diff --git a/extensions/native/circuit/cuda/src/poseidon2.cu b/extensions/native/circuit/cuda/src/poseidon2.cu index 749599d906..772a708f89 100644 --- a/extensions/native/circuit/cuda/src/poseidon2.cu +++ b/extensions/native/circuit/cuda/src/poseidon2.cu @@ -24,6 +24,7 @@ template struct NativePoseidon2Cols { T inside_row; T simple; T multi_observe_row; + T not_hint_multi_observe; T end_inside_row; T end_top_level; From 4a1272231ca0db1a81e89ac42141cce388c5b2f7 Mon Sep 17 00:00:00 2001 From: Ray Gao Date: Tue, 17 Mar 2026 22:59:54 -0400 Subject: [PATCH 07/14] fix cuda --- extensions/native/circuit/src/extension/cuda.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/extensions/native/circuit/src/extension/cuda.rs b/extensions/native/circuit/src/extension/cuda.rs index 0646eda347..d3eb6da4fc 100644 --- a/extensions/native/circuit/src/extension/cuda.rs +++ b/extensions/native/circuit/src/extension/cuda.rs @@ -78,9 +78,13 @@ impl VmProverExtension inventory.next_air::>()?; let hint_air: &HintSpaceProviderAir = inventory.next_air::()?; + let cpu_range_checker = range_checker + .cpu_chip + .clone() + .expect("VariableRangeCheckerChipGPU is expected to be hybrid with cpu_chip"); let cpu_chip = Arc::new(HintSpaceProviderChip::new( hint_air.hint_bus, - range_checker.clone(), + cpu_range_checker, timestamp_max_bits, )); From 7e6d1be52421d33102980a4fb5e6a58ee3ee799f Mon Sep 17 00:00:00 2001 From: Ray Gao Date: Tue, 17 Mar 2026 23:30:01 -0400 Subject: [PATCH 08/14] fix cuda --- extensions/native/circuit/src/extension/cuda.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/extensions/native/circuit/src/extension/cuda.rs b/extensions/native/circuit/src/extension/cuda.rs index d3eb6da4fc..9777b53dc4 100644 --- a/extensions/native/circuit/src/extension/cuda.rs +++ b/extensions/native/circuit/src/extension/cuda.rs @@ -75,8 +75,6 @@ impl VmProverExtension FriReducedOpeningChipGpu::new(range_checker.clone(), timestamp_max_bits); inventory.add_executor_chip(fri_reduced_opening); - inventory.next_air::>()?; - let hint_air: &HintSpaceProviderAir = inventory.next_air::()?; let cpu_range_checker = range_checker .cpu_chip @@ -88,6 +86,8 @@ impl VmProverExtension timestamp_max_bits, )); + inventory.next_air::>()?; + let poseidon2 = NativePoseidon2ChipGpu::<1>::new_with_hint_space_provider( range_checker.clone(), timestamp_max_bits, From fbe927e773967769af5e0bedafdc9565255c326c Mon Sep 17 00:00:00 2001 From: Ray Gao Date: Wed, 18 Mar 2026 00:39:10 -0400 Subject: [PATCH 09/14] fix cuda --- extensions/native/circuit/src/extension/cuda.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/extensions/native/circuit/src/extension/cuda.rs b/extensions/native/circuit/src/extension/cuda.rs index 9777b53dc4..0d476413ac 100644 --- a/extensions/native/circuit/src/extension/cuda.rs +++ b/extensions/native/circuit/src/extension/cuda.rs @@ -86,6 +86,9 @@ impl VmProverExtension timestamp_max_bits, )); + let provider_gpu = HintSpaceProviderChipGpu::new(cpu_chip.clone()); + inventory.add_periphery_chip(provider_gpu); + inventory.next_air::>()?; let poseidon2 = NativePoseidon2ChipGpu::<1>::new_with_hint_space_provider( @@ -95,9 +98,6 @@ impl VmProverExtension ); inventory.add_executor_chip(poseidon2); - let provider_gpu = HintSpaceProviderChipGpu::new(cpu_chip.clone()); - inventory.add_periphery_chip(provider_gpu); - inventory.next_air::()?; let sumcheck = NativeSumcheckChipGpu::new(range_checker.clone(), timestamp_max_bits, cpu_chip); From b859546809cea51e2e47d3e7740dc29106bdb0c1 Mon Sep 17 00:00:00 2001 From: Ray Gao Date: Wed, 18 Mar 2026 18:18:30 -0400 Subject: [PATCH 10/14] add debug utilities --- crates/circuits/mod-builder/src/cuda/chip.rs | 37 ++++++++++- crates/circuits/mod-builder/src/utils.rs | 65 +++++++++++++++++++ .../circuit/src/fp2_chip/cuda/addsub.rs | 15 +++++ .../circuit/src/fp2_chip/cuda/muldiv.rs | 15 +++++ .../circuit/src/modular_chip/cuda/addsub.rs | 15 +++++ .../circuit/src/modular_chip/cuda/muldiv.rs | 15 +++++ .../src/weierstrass_chip/add_ne/cuda.rs | 15 +++++ .../src/weierstrass_chip/double/cuda.rs | 15 +++++ 8 files changed, 189 insertions(+), 3 deletions(-) diff --git a/crates/circuits/mod-builder/src/cuda/chip.rs b/crates/circuits/mod-builder/src/cuda/chip.rs index ec53660e72..fd9c0b6488 100644 --- a/crates/circuits/mod-builder/src/cuda/chip.rs +++ b/crates/circuits/mod-builder/src/cuda/chip.rs @@ -23,7 +23,7 @@ use crate::{ expr_op::ExprOp, }, cuda_abi::field_expression::tracegen, - utils::biguint_to_limbs_vec, + utils::{biguint_to_limbs_vec, OPENVM_GPU_DEBUG_ID}, ExprMeta, ExprNode, FieldExprMeta, FieldExpressionChipGPU, FieldExpressionCoreAir, SymbolicExpr, }; @@ -471,7 +471,7 @@ impl FieldExpressionChipGPU { unsafe { cudaDeviceSetLimit(cudaLimit::cudaLimitStackSize, 48 * 1024); - tracegen( + if let Err(err) = tracegen( &self.records, mat.buffer(), &self.meta, @@ -487,8 +487,39 @@ impl FieldExpressionChipGPU { workspace.as_ptr(), workspace_per_thread, ) - .unwrap(); + { + panic!( + "field_expression cuda tracegen failed [{}]: err={:?}, num_records={}, record_stride={}, padded_height={}, total_trace_width={}, workspace_per_thread={}, pointer_max_bits={}, timestamp_max_bits={}, num_inputs={}, num_vars={}, num_flags={}, num_local_opcodes={}, num_output_indices={}, max_q_count={}, max_ast_depth={}", + OPENVM_GPU_DEBUG_ID, + err, + self.num_records, + self.record_stride, + padded_height, + self.total_trace_width, + workspace_per_thread, + self.pointer_max_bits, + self.timestamp_max_bits, + meta_host.num_inputs, + meta_host.expr_meta.num_vars, + meta_host.num_u32_flags, + meta_host.num_local_opcodes, + meta_host.num_output_indices, + meta_host.max_q_count, + meta_host.max_ast_depth, + ); + } } + + println!( + "[openvm-gpu-debug][{}] field_expression tracegen ok: num_records={} padded_height={} total_trace_width={} workspace_per_thread={} pointer_max_bits={} timestamp_max_bits={}", + OPENVM_GPU_DEBUG_ID, + self.num_records, + padded_height, + self.total_trace_width, + workspace_per_thread, + self.pointer_max_bits, + self.timestamp_max_bits, + ); mat } } diff --git a/crates/circuits/mod-builder/src/utils.rs b/crates/circuits/mod-builder/src/utils.rs index 2f2561ba87..d840f5e702 100644 --- a/crates/circuits/mod-builder/src/utils.rs +++ b/crates/circuits/mod-builder/src/utils.rs @@ -1,5 +1,9 @@ use num_bigint::BigUint; +use crate::FieldExpressionCoreAir; + +pub const OPENVM_GPU_DEBUG_ID: &str = "OVM-GPU-DBG-20260318"; + // Use this when num_limbs is not a constant. // little endian. // Warning: This function only returns the last NUM_LIMBS bytes of @@ -12,3 +16,64 @@ pub fn biguint_to_limbs_vec(x: &BigUint, num_limbs: usize) -> Vec { .take(num_limbs) .collect() } + +fn fnv1a64(bytes: &[u8]) -> u64 { + let mut hash: u64 = 0xcbf29ce484222325; + for &b in bytes { + hash ^= b as u64; + hash = hash.wrapping_mul(0x100000001b3); + } + hash +} + +pub fn debug_log_field_expr_gpu_input( + chip_label: &str, + record_size: usize, + num_records: usize, + adapter_width: usize, + adapter_blocks: usize, + pointer_max_bits: u32, + timestamp_max_bits: u32, + local_opcode_idx: &[usize], + opcode_flag_idx: &[usize], + air: &FieldExpressionCoreAir, + records: &[u8], +) { + let byte_len = records.len(); + let hash = fnv1a64(records); + let sample_len = byte_len.min(64); + + println!( + "[openvm-gpu-debug][{}] chip={} num_records={} record_size={} bytes={} hash=0x{:016x}", + OPENVM_GPU_DEBUG_ID, chip_label, num_records, record_size, byte_len, hash + ); + println!( + "[openvm-gpu-debug][{}] chip={} adapter_width={} adapter_blocks={} pointer_max_bits={} timestamp_max_bits={}", + OPENVM_GPU_DEBUG_ID, chip_label, adapter_width, adapter_blocks, pointer_max_bits, timestamp_max_bits + ); + println!( + "[openvm-gpu-debug][{}] chip={} local_opcode_idx={:?} opcode_flag_idx={:?}", + OPENVM_GPU_DEBUG_ID, chip_label, local_opcode_idx, opcode_flag_idx + ); + println!( + "[openvm-gpu-debug][{}] chip={} expr: num_inputs={} num_vars={} num_flags={} outputs={} computes={} constraints={} prime_limbs={} limb_bits={} canonical_num_limbs={}", + OPENVM_GPU_DEBUG_ID, + chip_label, + air.num_inputs(), + air.num_vars(), + air.num_flags(), + air.output_indices().len(), + air.expr.builder.computes.len(), + air.expr.builder.constraints.len(), + air.expr.builder.prime_limbs.len(), + air.expr.canonical_limb_bits(), + air.expr.canonical_num_limbs(), + ); + println!( + "[openvm-gpu-debug][{}] chip={} records_head({})={:?}", + OPENVM_GPU_DEBUG_ID, + chip_label, + sample_len, + &records[..sample_len] + ); +} diff --git a/extensions/algebra/circuit/src/fp2_chip/cuda/addsub.rs b/extensions/algebra/circuit/src/fp2_chip/cuda/addsub.rs index 6d5d6793d1..241074423f 100644 --- a/extensions/algebra/circuit/src/fp2_chip/cuda/addsub.rs +++ b/extensions/algebra/circuit/src/fp2_chip/cuda/addsub.rs @@ -11,6 +11,7 @@ use openvm_cuda_common::copy::MemCopyH2D; use openvm_instructions::riscv::RV32_CELL_BITS; use openvm_mod_circuit_builder::{ ExprBuilderConfig, FieldExpressionChipGPU, FieldExpressionCoreAir, FieldExpressionMetadata, + utils::debug_log_field_expr_gpu_input, }; use openvm_rv32_adapters::{Rv32VecHeapAdapterCols, Rv32VecHeapAdapterExecutor}; use openvm_stark_backend::{prover::types::AirProvingContext, Chip}; @@ -66,6 +67,20 @@ impl Chip::width(); + debug_log_field_expr_gpu_input( + "fp2_addsub", + record_size, + num_records, + adapter_width, + BLOCKS, + self.pointer_max_bits, + self.timestamp_max_bits, + &air.local_opcode_idx, + &air.opcode_flag_idx, + &air, + &records, + ); + let d_records = records.to_device().unwrap(); let field_expr_chip = FieldExpressionChipGPU::new( diff --git a/extensions/algebra/circuit/src/fp2_chip/cuda/muldiv.rs b/extensions/algebra/circuit/src/fp2_chip/cuda/muldiv.rs index bff1b62664..a8f4e693eb 100644 --- a/extensions/algebra/circuit/src/fp2_chip/cuda/muldiv.rs +++ b/extensions/algebra/circuit/src/fp2_chip/cuda/muldiv.rs @@ -11,6 +11,7 @@ use openvm_cuda_common::copy::MemCopyH2D; use openvm_instructions::riscv::RV32_CELL_BITS; use openvm_mod_circuit_builder::{ ExprBuilderConfig, FieldExpressionChipGPU, FieldExpressionCoreAir, FieldExpressionMetadata, + utils::debug_log_field_expr_gpu_input, }; use openvm_rv32_adapters::{Rv32VecHeapAdapterCols, Rv32VecHeapAdapterExecutor}; use openvm_stark_backend::{prover::types::AirProvingContext, Chip}; @@ -66,6 +67,20 @@ impl Chip::width(); + debug_log_field_expr_gpu_input( + "fp2_muldiv", + record_size, + num_records, + adapter_width, + BLOCKS, + self.pointer_max_bits, + self.timestamp_max_bits, + &air.local_opcode_idx, + &air.opcode_flag_idx, + &air, + &records, + ); + let d_records = records.to_device().unwrap(); let field_expr_chip = FieldExpressionChipGPU::new( diff --git a/extensions/algebra/circuit/src/modular_chip/cuda/addsub.rs b/extensions/algebra/circuit/src/modular_chip/cuda/addsub.rs index b369b5a27e..998d30371f 100644 --- a/extensions/algebra/circuit/src/modular_chip/cuda/addsub.rs +++ b/extensions/algebra/circuit/src/modular_chip/cuda/addsub.rs @@ -11,6 +11,7 @@ use openvm_cuda_common::copy::MemCopyH2D; use openvm_instructions::riscv::RV32_CELL_BITS; use openvm_mod_circuit_builder::{ ExprBuilderConfig, FieldExpressionChipGPU, FieldExpressionCoreAir, FieldExpressionMetadata, + utils::debug_log_field_expr_gpu_input, }; use openvm_rv32_adapters::{Rv32VecHeapAdapterCols, Rv32VecHeapAdapterExecutor}; use openvm_stark_backend::{prover::types::AirProvingContext, Chip}; @@ -66,6 +67,20 @@ impl Chip::width(); + debug_log_field_expr_gpu_input( + "modular_addsub", + record_size, + num_records, + adapter_width, + BLOCKS, + self.pointer_max_bits, + self.timestamp_max_bits, + &air.local_opcode_idx, + &air.opcode_flag_idx, + &air, + &records, + ); + let d_records = records.to_device().unwrap(); let field_expr_chip = FieldExpressionChipGPU::new( diff --git a/extensions/algebra/circuit/src/modular_chip/cuda/muldiv.rs b/extensions/algebra/circuit/src/modular_chip/cuda/muldiv.rs index 132a4b6f46..5793cf0eec 100644 --- a/extensions/algebra/circuit/src/modular_chip/cuda/muldiv.rs +++ b/extensions/algebra/circuit/src/modular_chip/cuda/muldiv.rs @@ -11,6 +11,7 @@ use openvm_cuda_common::copy::MemCopyH2D; use openvm_instructions::riscv::RV32_CELL_BITS; use openvm_mod_circuit_builder::{ ExprBuilderConfig, FieldExpressionChipGPU, FieldExpressionCoreAir, FieldExpressionMetadata, + utils::debug_log_field_expr_gpu_input, }; use openvm_rv32_adapters::{Rv32VecHeapAdapterCols, Rv32VecHeapAdapterExecutor}; use openvm_stark_backend::{prover::types::AirProvingContext, Chip}; @@ -66,6 +67,20 @@ impl Chip::width(); + debug_log_field_expr_gpu_input( + "modular_muldiv", + record_size, + num_records, + adapter_width, + BLOCKS, + self.pointer_max_bits, + self.timestamp_max_bits, + &air.local_opcode_idx, + &air.opcode_flag_idx, + &air, + &records, + ); + let d_records = records.to_device().unwrap(); let field_expr_chip = FieldExpressionChipGPU::new( diff --git a/extensions/ecc/circuit/src/weierstrass_chip/add_ne/cuda.rs b/extensions/ecc/circuit/src/weierstrass_chip/add_ne/cuda.rs index 8d1d9bc08d..100eea0180 100644 --- a/extensions/ecc/circuit/src/weierstrass_chip/add_ne/cuda.rs +++ b/extensions/ecc/circuit/src/weierstrass_chip/add_ne/cuda.rs @@ -11,6 +11,7 @@ use openvm_ecc_transpiler::Rv32WeierstrassOpcode; use openvm_instructions::riscv::RV32_CELL_BITS; use openvm_mod_circuit_builder::{ ExprBuilderConfig, FieldExpressionChipGPU, FieldExpressionCoreAir, FieldExpressionMetadata, + utils::debug_log_field_expr_gpu_input, }; use openvm_rv32_adapters::{Rv32VecHeapAdapterCols, Rv32VecHeapAdapterExecutor}; use openvm_stark_backend::{prover::types::AirProvingContext, Chip}; @@ -64,6 +65,20 @@ impl Chip::width(); + debug_log_field_expr_gpu_input( + "weierstrass_add_ne", + record_size, + num_records, + adapter_width, + BLOCKS, + self.pointer_max_bits, + self.timestamp_max_bits, + &air.local_opcode_idx, + &air.opcode_flag_idx, + &air, + &records, + ); + let d_records = records.to_device().unwrap(); let field_expr_chip = FieldExpressionChipGPU::new( diff --git a/extensions/ecc/circuit/src/weierstrass_chip/double/cuda.rs b/extensions/ecc/circuit/src/weierstrass_chip/double/cuda.rs index 04e45d068e..02c01e4630 100644 --- a/extensions/ecc/circuit/src/weierstrass_chip/double/cuda.rs +++ b/extensions/ecc/circuit/src/weierstrass_chip/double/cuda.rs @@ -12,6 +12,7 @@ use openvm_ecc_transpiler::Rv32WeierstrassOpcode; use openvm_instructions::riscv::RV32_CELL_BITS; use openvm_mod_circuit_builder::{ ExprBuilderConfig, FieldExpressionChipGPU, FieldExpressionCoreAir, FieldExpressionMetadata, + utils::debug_log_field_expr_gpu_input, }; use openvm_rv32_adapters::{Rv32VecHeapAdapterCols, Rv32VecHeapAdapterExecutor}; use openvm_stark_backend::{prover::types::AirProvingContext, Chip}; @@ -66,6 +67,20 @@ impl Chip::width(); + debug_log_field_expr_gpu_input( + "weierstrass_double", + record_size, + num_records, + adapter_width, + BLOCKS, + self.pointer_max_bits, + self.timestamp_max_bits, + &air.local_opcode_idx, + &air.opcode_flag_idx, + &air, + &records, + ); + let d_records = records.to_device().unwrap(); let field_expr_chip = FieldExpressionChipGPU::new( From 8918e55b87bb499e4fdd562dace6b8d6033369cb Mon Sep 17 00:00:00 2001 From: Ray Gao Date: Wed, 18 Mar 2026 19:54:33 -0400 Subject: [PATCH 11/14] add debug flags --- crates/vm/src/arch/vm.rs | 10 +++++ .../native/circuit/cuda/src/poseidon2.cu | 11 +++++ .../native/circuit/src/branch_eq/cuda.rs | 35 +++++++++++++-- extensions/native/circuit/src/castf/cuda.rs | 35 +++++++++++++-- .../circuit/src/field_arithmetic/cuda.rs | 35 +++++++++++++-- .../circuit/src/field_extension/cuda.rs | 35 +++++++++++++-- extensions/native/circuit/src/fri/cuda.rs | 39 ++++++++++++++-- .../native/circuit/src/jal_rangecheck/cuda.rs | 39 ++++++++++++++-- .../native/circuit/src/loadstore/cuda.rs | 37 ++++++++++++++-- .../native/circuit/src/poseidon2/cuda.rs | 38 ++++++++++++++-- .../native/circuit/src/sumcheck/cuda.rs | 35 +++++++++++++-- extensions/native/circuit/src/utils.rs | 44 +++++++++++++++++++ 12 files changed, 364 insertions(+), 29 deletions(-) diff --git a/crates/vm/src/arch/vm.rs b/crates/vm/src/arch/vm.rs index cb58a0b77a..dcc2a216a2 100644 --- a/crates/vm/src/arch/vm.rs +++ b/crates/vm/src/arch/vm.rs @@ -608,6 +608,11 @@ where .iter() .map(|(air_idx, ctx)| (*air_idx, ctx.main_trace_height())) .collect_vec(); + println!( + "[openvm-gpu-debug][OVM-NATIVE-GPU-DBG-20260318][vm.generate_proving_ctx] num_airs={} trace_heights={:?}", + ctx.per_air.len(), + idx_trace_heights + ); // 1. check max trace height isn't exceeded let max_trace_height = if TypeId::of::>() == TypeId::of::() { let min_log_blowup = log2_ceil_usize(self.config().as_ref().max_constraint_degree - 1); @@ -695,7 +700,12 @@ where let final_memory = (system_records.exit_code == Some(ExitCode::Success as u32)).then_some(to_state.memory); let ctx = self.generate_proving_ctx(system_records, record_arenas)?; + let prove_start = std::time::Instant::now(); let proof = self.engine.prove(&self.pk, ctx); + println!( + "[openvm-gpu-debug][OVM-NATIVE-GPU-DBG-20260318][vm.prove] engine.prove done in {:?}", + prove_start.elapsed() + ); Ok((proof, final_memory)) } diff --git a/extensions/native/circuit/cuda/src/poseidon2.cu b/extensions/native/circuit/cuda/src/poseidon2.cu index 772a708f89..32c0d36ec9 100644 --- a/extensions/native/circuit/cuda/src/poseidon2.cu +++ b/extensions/native/circuit/cuda/src/poseidon2.cu @@ -373,6 +373,17 @@ template struct Poseidon2Wrapper { very_start_timestamp + 4, specific.slice_from(COL_INDEX(MultiObserveCols, read_data[3].base)) ); + + // Zero-length MULTI_OBSERVE case: head row is both first and last. + // The final ctx[0] writeback lives at row.start_timestamp. + if (specific[COL_INDEX(MultiObserveCols, is_last)] == Fp::one()) { + uint32_t start_timestamp = row[COL_INDEX(Cols, start_timestamp)].asUInt32(); + mem_fill_base( + mem_helper, + start_timestamp, + specific.slice_from(COL_INDEX(MultiObserveCols, write_final_idx.base)) + ); + } } else { uint32_t start_timestamp = row[COL_INDEX(Cols, start_timestamp)].asUInt32(); uint32_t chunk_start = diff --git a/extensions/native/circuit/src/branch_eq/cuda.rs b/extensions/native/circuit/src/branch_eq/cuda.rs index b5a8dcb0a8..43b52d8add 100644 --- a/extensions/native/circuit/src/branch_eq/cuda.rs +++ b/extensions/native/circuit/src/branch_eq/cuda.rs @@ -14,6 +14,7 @@ use super::NativeBranchEqualCoreRecord; use crate::{ adapters::{BranchNativeAdapterCols, BranchNativeAdapterRecord}, cuda_abi::native_branch_eq_cuda, + utils::{OPENVM_NATIVE_GPU_DEBUG_ID, debug_log_native_gpu_tracegen_input}, }; #[derive(new)] @@ -38,20 +39,48 @@ impl Chip for NativeBranchEqChipGpu { BranchNativeAdapterCols::::width() + BranchEqualCoreCols::::width(); let trace = DeviceMatrix::::with_capacity(padded_height, trace_width); + let records_hash = debug_log_native_gpu_tracegen_input( + "native_branch_eq", + records, + RECORD_SIZE, + height, + padded_height, + trace_width, + ); + let d_records = records.to_device().unwrap(); unsafe { - native_branch_eq_cuda::tracegen( + if let Err(err) = native_branch_eq_cuda::tracegen( trace.buffer(), padded_height, trace_width, &d_records, &self.range_checker.count, self.timestamp_max_bits as u32, - ) - .unwrap(); + ) { + panic!( + "native_branch_eq cuda tracegen failed [{}]: err={:?}, height={}, padded_height={}, trace_width={}, timestamp_max_bits={}, hash=0x{:016x}", + OPENVM_NATIVE_GPU_DEBUG_ID, + err, + height, + padded_height, + trace_width, + self.timestamp_max_bits, + records_hash, + ); + } } + println!( + "[openvm-gpu-debug][{}][native_branch_eq] tracegen ok: height={} padded_height={} trace_width={} hash=0x{:016x}", + OPENVM_NATIVE_GPU_DEBUG_ID, + height, + padded_height, + trace_width, + records_hash, + ); + AirProvingContext::simple_no_pis(trace) } } diff --git a/extensions/native/circuit/src/castf/cuda.rs b/extensions/native/circuit/src/castf/cuda.rs index fd80e5cedc..19d2dd2a79 100644 --- a/extensions/native/circuit/src/castf/cuda.rs +++ b/extensions/native/circuit/src/castf/cuda.rs @@ -13,6 +13,7 @@ use super::{CastFCoreCols, CastFCoreRecord}; use crate::{ adapters::{ConvertAdapterCols, ConvertAdapterRecord}, cuda_abi::castf_cuda, + utils::{OPENVM_NATIVE_GPU_DEBUG_ID, debug_log_native_gpu_tracegen_input}, }; #[derive(new)] @@ -35,20 +36,48 @@ impl Chip for CastFChipGpu { let trace_width = ConvertAdapterCols::::width() + CastFCoreCols::::width(); let trace = DeviceMatrix::::with_capacity(padded_height, trace_width); + let records_hash = debug_log_native_gpu_tracegen_input( + "native_castf", + records, + RECORD_SIZE, + height, + padded_height, + trace_width, + ); + let d_records = records.to_device().unwrap(); unsafe { - castf_cuda::tracegen( + if let Err(err) = castf_cuda::tracegen( trace.buffer(), padded_height, trace_width, &d_records, &self.range_checker.count, self.timestamp_max_bits as u32, - ) - .unwrap(); + ) { + panic!( + "native_castf cuda tracegen failed [{}]: err={:?}, height={}, padded_height={}, trace_width={}, timestamp_max_bits={}, hash=0x{:016x}", + OPENVM_NATIVE_GPU_DEBUG_ID, + err, + height, + padded_height, + trace_width, + self.timestamp_max_bits, + records_hash, + ); + } } + println!( + "[openvm-gpu-debug][{}][native_castf] tracegen ok: height={} padded_height={} trace_width={} hash=0x{:016x}", + OPENVM_NATIVE_GPU_DEBUG_ID, + height, + padded_height, + trace_width, + records_hash, + ); + AirProvingContext::simple_no_pis(trace) } } diff --git a/extensions/native/circuit/src/field_arithmetic/cuda.rs b/extensions/native/circuit/src/field_arithmetic/cuda.rs index a6f67d231f..d2d1e44b49 100644 --- a/extensions/native/circuit/src/field_arithmetic/cuda.rs +++ b/extensions/native/circuit/src/field_arithmetic/cuda.rs @@ -13,6 +13,7 @@ use super::{FieldArithmeticCoreCols, FieldArithmeticRecord}; use crate::{ adapters::{AluNativeAdapterCols, AluNativeAdapterRecord}, cuda_abi::field_arithmetic_cuda, + utils::{OPENVM_NATIVE_GPU_DEBUG_ID, debug_log_native_gpu_tracegen_input}, }; #[derive(new)] @@ -37,10 +38,19 @@ impl Chip for FieldArithmeticChipGpu { AluNativeAdapterCols::::width() + FieldArithmeticCoreCols::::width(); let trace = DeviceMatrix::::with_capacity(padded_height, trace_width); + let records_hash = debug_log_native_gpu_tracegen_input( + "native_field_arithmetic", + records, + RECORD_SIZE, + height, + padded_height, + trace_width, + ); + let d_records = records.to_device().unwrap(); unsafe { - field_arithmetic_cuda::tracegen( + if let Err(err) = field_arithmetic_cuda::tracegen( trace.buffer(), padded_height, trace_width, @@ -48,10 +58,29 @@ impl Chip for FieldArithmeticChipGpu { self.range_checker.count.as_ptr() as *const u32, self.range_checker.count.len(), self.timestamp_max_bits as u32, - ) - .unwrap(); + ) { + panic!( + "native_field_arithmetic cuda tracegen failed [{}]: err={:?}, height={}, padded_height={}, trace_width={}, timestamp_max_bits={}, hash=0x{:016x}", + OPENVM_NATIVE_GPU_DEBUG_ID, + err, + height, + padded_height, + trace_width, + self.timestamp_max_bits, + records_hash, + ); + } } + println!( + "[openvm-gpu-debug][{}][native_field_arithmetic] tracegen ok: height={} padded_height={} trace_width={} hash=0x{:016x}", + OPENVM_NATIVE_GPU_DEBUG_ID, + height, + padded_height, + trace_width, + records_hash, + ); + AirProvingContext::simple_no_pis(trace) } } diff --git a/extensions/native/circuit/src/field_extension/cuda.rs b/extensions/native/circuit/src/field_extension/cuda.rs index af88de22b5..f8312a61a7 100644 --- a/extensions/native/circuit/src/field_extension/cuda.rs +++ b/extensions/native/circuit/src/field_extension/cuda.rs @@ -13,6 +13,7 @@ use super::{FieldExtensionCoreCols, FieldExtensionRecord, EXT_DEG}; use crate::{ adapters::{NativeVectorizedAdapterCols, NativeVectorizedAdapterRecord}, cuda_abi::field_extension_cuda, + utils::{OPENVM_NATIVE_GPU_DEBUG_ID, debug_log_native_gpu_tracegen_input}, }; #[derive(new)] @@ -39,20 +40,48 @@ impl Chip for FieldExtensionChipGpu { + FieldExtensionCoreCols::::width(); let trace = DeviceMatrix::::with_capacity(padded_height, trace_width); + let records_hash = debug_log_native_gpu_tracegen_input( + "native_field_extension", + records, + RECORD_SIZE, + height, + padded_height, + trace_width, + ); + let d_records = records.to_device().unwrap(); unsafe { - field_extension_cuda::tracegen( + if let Err(err) = field_extension_cuda::tracegen( trace.buffer(), padded_height, trace_width, &d_records, &self.range_checker.count, self.timestamp_max_bits as u32, - ) - .unwrap(); + ) { + panic!( + "native_field_extension cuda tracegen failed [{}]: err={:?}, height={}, padded_height={}, trace_width={}, timestamp_max_bits={}, hash=0x{:016x}", + OPENVM_NATIVE_GPU_DEBUG_ID, + err, + height, + padded_height, + trace_width, + self.timestamp_max_bits, + records_hash, + ); + } } + println!( + "[openvm-gpu-debug][{}][native_field_extension] tracegen ok: height={} padded_height={} trace_width={} hash=0x{:016x}", + OPENVM_NATIVE_GPU_DEBUG_ID, + height, + padded_height, + trace_width, + records_hash, + ); + AirProvingContext::simple_no_pis(trace) } } diff --git a/extensions/native/circuit/src/fri/cuda.rs b/extensions/native/circuit/src/fri/cuda.rs index 06f3e91180..bfa0003ca8 100644 --- a/extensions/native/circuit/src/fri/cuda.rs +++ b/extensions/native/circuit/src/fri/cuda.rs @@ -13,7 +13,10 @@ use openvm_cuda_common::copy::MemCopyH2D; use openvm_stark_backend::{prover::types::AirProvingContext, Chip}; use super::{FriReducedOpeningRecordMut, OVERALL_WIDTH}; -use crate::cuda_abi::fri_cuda; +use crate::{ + cuda_abi::fri_cuda, + utils::{OPENVM_NATIVE_GPU_DEBUG_ID, debug_log_native_gpu_tracegen_input}, +}; #[derive(new)] pub struct FriReducedOpeningChipGpu { @@ -56,8 +59,17 @@ impl Chip for FriReducedOpeningChipGpu { let trace_width = OVERALL_WIDTH; let trace = DeviceMatrix::::with_capacity(trace_height, trace_width); + let records_hash = debug_log_native_gpu_tracegen_input( + "native_fri_reduced_opening", + records, + 0, + record_info.len(), + trace_height, + trace_width, + ); + unsafe { - fri_cuda::tracegen( + if let Err(err) = fri_cuda::tracegen( trace.buffer(), trace_height, &d_records, @@ -65,10 +77,29 @@ impl Chip for FriReducedOpeningChipGpu { &d_record_info, &self.range_checker.count, self.timestamp_max_bits as u32, - ) - .unwrap(); + ) { + panic!( + "native_fri_reduced_opening cuda tracegen failed [{}]: err={:?}, rows={}, padded_height={}, trace_width={}, timestamp_max_bits={}, hash=0x{:016x}", + OPENVM_NATIVE_GPU_DEBUG_ID, + err, + record_info.len(), + trace_height, + trace_width, + self.timestamp_max_bits, + records_hash, + ); + } } + println!( + "[openvm-gpu-debug][{}][native_fri_reduced_opening] tracegen ok: rows={} padded_height={} trace_width={} hash=0x{:016x}", + OPENVM_NATIVE_GPU_DEBUG_ID, + record_info.len(), + trace_height, + trace_width, + records_hash, + ); + AirProvingContext::simple_no_pis(trace) } } diff --git a/extensions/native/circuit/src/jal_rangecheck/cuda.rs b/extensions/native/circuit/src/jal_rangecheck/cuda.rs index a273a1b443..31ff5ce117 100644 --- a/extensions/native/circuit/src/jal_rangecheck/cuda.rs +++ b/extensions/native/circuit/src/jal_rangecheck/cuda.rs @@ -10,7 +10,10 @@ use openvm_cuda_common::copy::MemCopyH2D; use openvm_stark_backend::{prover::types::AirProvingContext, Chip}; use super::{JalRangeCheckCols, JalRangeCheckRecord}; -use crate::cuda_abi::native_jal_rangecheck_cuda; +use crate::{ + cuda_abi::native_jal_rangecheck_cuda, + utils::{OPENVM_NATIVE_GPU_DEBUG_ID, debug_log_native_gpu_tracegen_input}, +}; #[derive(new)] pub struct JalRangeCheckGpu { @@ -33,20 +36,48 @@ impl Chip for JalRangeCheckGpu { let padded_height = next_power_of_two_or_zero(height); let trace = DeviceMatrix::::with_capacity(padded_height, width); + let records_hash = debug_log_native_gpu_tracegen_input( + "native_jal_rangecheck", + records, + RECORD_SIZE, + height, + padded_height, + width, + ); + let d_records = records.to_device().unwrap(); unsafe { - native_jal_rangecheck_cuda::tracegen( + if let Err(err) = native_jal_rangecheck_cuda::tracegen( trace.buffer(), padded_height, width, &d_records, &self.range_checker.count, self.timestamp_max_bits as u32, - ) - .unwrap(); + ) { + panic!( + "native_jal_rangecheck cuda tracegen failed [{}]: err={:?}, height={}, padded_height={}, width={}, timestamp_max_bits={}, hash=0x{:016x}", + OPENVM_NATIVE_GPU_DEBUG_ID, + err, + height, + padded_height, + width, + self.timestamp_max_bits, + records_hash, + ); + } } + println!( + "[openvm-gpu-debug][{}][native_jal_rangecheck] tracegen ok: height={} padded_height={} width={} hash=0x{:016x}", + OPENVM_NATIVE_GPU_DEBUG_ID, + height, + padded_height, + width, + records_hash, + ); + AirProvingContext::simple_no_pis(trace) } } diff --git a/extensions/native/circuit/src/loadstore/cuda.rs b/extensions/native/circuit/src/loadstore/cuda.rs index 8f7e3b83ef..a73eff9020 100644 --- a/extensions/native/circuit/src/loadstore/cuda.rs +++ b/extensions/native/circuit/src/loadstore/cuda.rs @@ -13,6 +13,7 @@ use super::{NativeLoadStoreCoreCols, NativeLoadStoreCoreRecord}; use crate::{ adapters::{NativeLoadStoreAdapterCols, NativeLoadStoreAdapterRecord}, cuda_abi::native_loadstore_cuda, + utils::{OPENVM_NATIVE_GPU_DEBUG_ID, debug_log_native_gpu_tracegen_input}, }; #[derive(new)] @@ -46,10 +47,19 @@ impl Chip + NativeLoadStoreCoreCols::::width(); let trace = DeviceMatrix::::with_capacity(padded_height, trace_width); + let records_hash = debug_log_native_gpu_tracegen_input( + "native_loadstore", + records, + record_size, + height, + padded_height, + trace_width, + ); + let d_records = records.to_device().unwrap(); unsafe { - native_loadstore_cuda::tracegen( + if let Err(err) = native_loadstore_cuda::tracegen( trace.buffer(), padded_height, trace_width, @@ -57,10 +67,31 @@ impl Chip &self.range_checker.count, NUM_CELLS as u32, self.timestamp_max_bits as u32, - ) - .unwrap(); + ) { + panic!( + "native_loadstore cuda tracegen failed [{}]: err={:?}, height={}, padded_height={}, trace_width={}, num_cells={}, timestamp_max_bits={}, hash=0x{:016x}", + OPENVM_NATIVE_GPU_DEBUG_ID, + err, + height, + padded_height, + trace_width, + NUM_CELLS, + self.timestamp_max_bits, + records_hash, + ); + } } + println!( + "[openvm-gpu-debug][{}][native_loadstore] tracegen ok: height={} padded_height={} trace_width={} num_cells={} hash=0x{:016x}", + OPENVM_NATIVE_GPU_DEBUG_ID, + height, + padded_height, + trace_width, + NUM_CELLS, + records_hash, + ); + AirProvingContext::simple_no_pis(trace) } } diff --git a/extensions/native/circuit/src/poseidon2/cuda.rs b/extensions/native/circuit/src/poseidon2/cuda.rs index 4bdf337a75..aa3ad73a50 100644 --- a/extensions/native/circuit/src/poseidon2/cuda.rs +++ b/extensions/native/circuit/src/poseidon2/cuda.rs @@ -13,6 +13,7 @@ use super::columns::{MultiObserveCols, NativePoseidon2Cols}; use crate::{ cuda_abi::poseidon2_cuda, hint_space_provider::SharedHintSpaceProviderChip, + utils::{OPENVM_NATIVE_GPU_DEBUG_ID, debug_log_native_gpu_tracegen_input}, }; pub struct NativePoseidon2ChipGpu { @@ -140,6 +141,15 @@ impl Chip let height = records.len() / record_size; let padded_height = next_power_of_two_or_zero(height); + let records_hash = debug_log_native_gpu_tracegen_input( + "native_poseidon2", + records, + record_size, + height, + padded_height, + width, + ); + let d_chunk_start = { let mut row_idx = 0; let row_slice = unsafe { @@ -175,7 +185,7 @@ impl Chip let d_records = records.to_device().unwrap(); unsafe { - poseidon2_cuda::tracegen( + if let Err(err) = poseidon2_cuda::tracegen( trace.buffer(), padded_height, width, @@ -186,10 +196,32 @@ impl Chip &self.range_checker.count, SBOX_REGISTERS as u32, self.timestamp_max_bits as u32, - ) - .unwrap(); + ) { + panic!( + "native_poseidon2 cuda tracegen failed [{}]: err={:?}, height={}, padded_height={}, width={}, chunk_count={}, sbox_registers={}, timestamp_max_bits={}, hash=0x{:016x}", + OPENVM_NATIVE_GPU_DEBUG_ID, + err, + height, + padded_height, + width, + d_chunk_start.len(), + SBOX_REGISTERS, + self.timestamp_max_bits, + records_hash, + ); + } } + println!( + "[openvm-gpu-debug][{}][native_poseidon2] tracegen ok: height={} padded_height={} width={} chunk_count={} hash=0x{:016x}", + OPENVM_NATIVE_GPU_DEBUG_ID, + height, + padded_height, + width, + d_chunk_start.len(), + records_hash, + ); + AirProvingContext::simple_no_pis(trace) } } diff --git a/extensions/native/circuit/src/sumcheck/cuda.rs b/extensions/native/circuit/src/sumcheck/cuda.rs index 2dcecd5756..0a6cfdeac3 100644 --- a/extensions/native/circuit/src/sumcheck/cuda.rs +++ b/extensions/native/circuit/src/sumcheck/cuda.rs @@ -13,6 +13,7 @@ use super::columns::{LogupSpecificCols, NativeSumcheckCols, ProdSpecificCols}; use crate::{ cuda_abi::sumcheck_cuda, hint_space_provider::SharedHintSpaceProviderChip, + utils::{OPENVM_NATIVE_GPU_DEBUG_ID, debug_log_native_gpu_tracegen_input}, }; use p3_field::FieldAlgebra; @@ -92,6 +93,15 @@ impl Chip for NativeSumcheckChipGpu { let padded_height = next_power_of_two_or_zero(height); let trace = DeviceMatrix::::with_capacity(padded_height, width); + let records_hash = debug_log_native_gpu_tracegen_input( + "native_sumcheck", + records, + record_size, + height, + padded_height, + width, + ); + let record_slice = unsafe { let ptr = records.as_ptr(); from_raw_parts(ptr as *const F, records.len() / size_of::()) @@ -99,7 +109,7 @@ impl Chip for NativeSumcheckChipGpu { let d_records = record_slice.to_device().unwrap(); unsafe { - sumcheck_cuda::tracegen( + if let Err(err) = sumcheck_cuda::tracegen( trace.buffer(), padded_height, width, @@ -107,10 +117,29 @@ impl Chip for NativeSumcheckChipGpu { height, &self.range_checker.count, self.timestamp_max_bits as u32, - ) - .unwrap(); + ) { + panic!( + "native_sumcheck cuda tracegen failed [{}]: err={:?}, height={}, padded_height={}, width={}, timestamp_max_bits={}, hash=0x{:016x}", + OPENVM_NATIVE_GPU_DEBUG_ID, + err, + height, + padded_height, + width, + self.timestamp_max_bits, + records_hash, + ); + } } + println!( + "[openvm-gpu-debug][{}][native_sumcheck] tracegen ok: height={} padded_height={} width={} hash=0x{:016x}", + OPENVM_NATIVE_GPU_DEBUG_ID, + height, + padded_height, + width, + records_hash, + ); + AirProvingContext::simple_no_pis(trace) } } diff --git a/extensions/native/circuit/src/utils.rs b/extensions/native/circuit/src/utils.rs index 3d05656f16..c2be13f80b 100644 --- a/extensions/native/circuit/src/utils.rs +++ b/extensions/native/circuit/src/utils.rs @@ -5,11 +5,55 @@ use openvm_circuit::system::{ use p3_field::PrimeField32; pub(crate) const CASTF_MAX_BITS: usize = 30; +#[cfg(feature = "cuda")] +pub(crate) const OPENVM_NATIVE_GPU_DEBUG_ID: &str = "OVM-NATIVE-GPU-DBG-20260318"; pub(crate) const fn const_max(a: usize, b: usize) -> usize { [a, b][(a < b) as usize] } +#[cfg(feature = "cuda")] +fn fnv1a64(bytes: &[u8]) -> u64 { + let mut hash: u64 = 0xcbf29ce484222325; + for &b in bytes { + hash ^= b as u64; + hash = hash.wrapping_mul(0x100000001b3); + } + hash +} + +#[cfg(feature = "cuda")] +pub(crate) fn debug_log_native_gpu_tracegen_input( + chip_label: &str, + records: &[u8], + record_size: usize, + height: usize, + padded_height: usize, + trace_width: usize, +) -> u64 { + let hash = fnv1a64(records); + let head_len = records.len().min(64); + println!( + "[openvm-gpu-debug][{}][{}] records_bytes={} record_size={} height={} padded_height={} trace_width={} hash=0x{:016x}", + OPENVM_NATIVE_GPU_DEBUG_ID, + chip_label, + records.len(), + record_size, + height, + padded_height, + trace_width, + hash + ); + println!( + "[openvm-gpu-debug][{}][{}] records_head({})={:?}", + OPENVM_NATIVE_GPU_DEBUG_ID, + chip_label, + head_len, + &records[..head_len] + ); + hash +} + /// Fill `MemoryBaseAuxCols`, assuming that the `prev_timestamp` is already set in `base_aux`. pub(crate) fn mem_fill_helper( mem_helper: &MemoryAuxColsFactory, From 853ef7d04fb26123e1afb435c8e7461a127870dd Mon Sep 17 00:00:00 2001 From: Ray Gao Date: Wed, 18 Mar 2026 20:21:26 -0400 Subject: [PATCH 12/14] remove debug flags --- crates/circuits/mod-builder/src/cuda/chip.rs | 16 +---- crates/circuits/mod-builder/src/utils.rs | 64 ------------------- crates/vm/src/arch/vm.rs | 10 --- .../circuit/src/fp2_chip/cuda/addsub.rs | 15 ----- .../circuit/src/fp2_chip/cuda/muldiv.rs | 15 ----- .../circuit/src/modular_chip/cuda/addsub.rs | 15 ----- .../circuit/src/modular_chip/cuda/muldiv.rs | 15 ----- .../src/weierstrass_chip/add_ne/cuda.rs | 15 ----- .../src/weierstrass_chip/double/cuda.rs | 15 ----- .../native/circuit/src/branch_eq/cuda.rs | 29 +-------- extensions/native/circuit/src/castf/cuda.rs | 29 +-------- .../circuit/src/field_arithmetic/cuda.rs | 29 +-------- .../circuit/src/field_extension/cuda.rs | 29 +-------- extensions/native/circuit/src/fri/cuda.rs | 29 +-------- .../native/circuit/src/jal_rangecheck/cuda.rs | 29 +-------- .../native/circuit/src/loadstore/cuda.rs | 31 +-------- .../native/circuit/src/poseidon2/cuda.rs | 24 +------ .../native/circuit/src/sumcheck/cuda.rs | 29 +-------- extensions/native/circuit/src/utils.rs | 44 ------------- 19 files changed, 19 insertions(+), 463 deletions(-) diff --git a/crates/circuits/mod-builder/src/cuda/chip.rs b/crates/circuits/mod-builder/src/cuda/chip.rs index fd9c0b6488..5d2f6e1f5a 100644 --- a/crates/circuits/mod-builder/src/cuda/chip.rs +++ b/crates/circuits/mod-builder/src/cuda/chip.rs @@ -23,7 +23,7 @@ use crate::{ expr_op::ExprOp, }, cuda_abi::field_expression::tracegen, - utils::{biguint_to_limbs_vec, OPENVM_GPU_DEBUG_ID}, + utils::biguint_to_limbs_vec, ExprMeta, ExprNode, FieldExprMeta, FieldExpressionChipGPU, FieldExpressionCoreAir, SymbolicExpr, }; @@ -489,8 +489,7 @@ impl FieldExpressionChipGPU { ) { panic!( - "field_expression cuda tracegen failed [{}]: err={:?}, num_records={}, record_stride={}, padded_height={}, total_trace_width={}, workspace_per_thread={}, pointer_max_bits={}, timestamp_max_bits={}, num_inputs={}, num_vars={}, num_flags={}, num_local_opcodes={}, num_output_indices={}, max_q_count={}, max_ast_depth={}", - OPENVM_GPU_DEBUG_ID, + "field_expression cuda tracegen failed: err={:?}, num_records={}, record_stride={}, padded_height={}, total_trace_width={}, workspace_per_thread={}, pointer_max_bits={}, timestamp_max_bits={}, num_inputs={}, num_vars={}, num_flags={}, num_local_opcodes={}, num_output_indices={}, max_q_count={}, max_ast_depth={}", err, self.num_records, self.record_stride, @@ -509,17 +508,6 @@ impl FieldExpressionChipGPU { ); } } - - println!( - "[openvm-gpu-debug][{}] field_expression tracegen ok: num_records={} padded_height={} total_trace_width={} workspace_per_thread={} pointer_max_bits={} timestamp_max_bits={}", - OPENVM_GPU_DEBUG_ID, - self.num_records, - padded_height, - self.total_trace_width, - workspace_per_thread, - self.pointer_max_bits, - self.timestamp_max_bits, - ); mat } } diff --git a/crates/circuits/mod-builder/src/utils.rs b/crates/circuits/mod-builder/src/utils.rs index d840f5e702..07c43ef2e8 100644 --- a/crates/circuits/mod-builder/src/utils.rs +++ b/crates/circuits/mod-builder/src/utils.rs @@ -1,9 +1,5 @@ use num_bigint::BigUint; -use crate::FieldExpressionCoreAir; - -pub const OPENVM_GPU_DEBUG_ID: &str = "OVM-GPU-DBG-20260318"; - // Use this when num_limbs is not a constant. // little endian. // Warning: This function only returns the last NUM_LIMBS bytes of @@ -17,63 +13,3 @@ pub fn biguint_to_limbs_vec(x: &BigUint, num_limbs: usize) -> Vec { .collect() } -fn fnv1a64(bytes: &[u8]) -> u64 { - let mut hash: u64 = 0xcbf29ce484222325; - for &b in bytes { - hash ^= b as u64; - hash = hash.wrapping_mul(0x100000001b3); - } - hash -} - -pub fn debug_log_field_expr_gpu_input( - chip_label: &str, - record_size: usize, - num_records: usize, - adapter_width: usize, - adapter_blocks: usize, - pointer_max_bits: u32, - timestamp_max_bits: u32, - local_opcode_idx: &[usize], - opcode_flag_idx: &[usize], - air: &FieldExpressionCoreAir, - records: &[u8], -) { - let byte_len = records.len(); - let hash = fnv1a64(records); - let sample_len = byte_len.min(64); - - println!( - "[openvm-gpu-debug][{}] chip={} num_records={} record_size={} bytes={} hash=0x{:016x}", - OPENVM_GPU_DEBUG_ID, chip_label, num_records, record_size, byte_len, hash - ); - println!( - "[openvm-gpu-debug][{}] chip={} adapter_width={} adapter_blocks={} pointer_max_bits={} timestamp_max_bits={}", - OPENVM_GPU_DEBUG_ID, chip_label, adapter_width, adapter_blocks, pointer_max_bits, timestamp_max_bits - ); - println!( - "[openvm-gpu-debug][{}] chip={} local_opcode_idx={:?} opcode_flag_idx={:?}", - OPENVM_GPU_DEBUG_ID, chip_label, local_opcode_idx, opcode_flag_idx - ); - println!( - "[openvm-gpu-debug][{}] chip={} expr: num_inputs={} num_vars={} num_flags={} outputs={} computes={} constraints={} prime_limbs={} limb_bits={} canonical_num_limbs={}", - OPENVM_GPU_DEBUG_ID, - chip_label, - air.num_inputs(), - air.num_vars(), - air.num_flags(), - air.output_indices().len(), - air.expr.builder.computes.len(), - air.expr.builder.constraints.len(), - air.expr.builder.prime_limbs.len(), - air.expr.canonical_limb_bits(), - air.expr.canonical_num_limbs(), - ); - println!( - "[openvm-gpu-debug][{}] chip={} records_head({})={:?}", - OPENVM_GPU_DEBUG_ID, - chip_label, - sample_len, - &records[..sample_len] - ); -} diff --git a/crates/vm/src/arch/vm.rs b/crates/vm/src/arch/vm.rs index dcc2a216a2..cb58a0b77a 100644 --- a/crates/vm/src/arch/vm.rs +++ b/crates/vm/src/arch/vm.rs @@ -608,11 +608,6 @@ where .iter() .map(|(air_idx, ctx)| (*air_idx, ctx.main_trace_height())) .collect_vec(); - println!( - "[openvm-gpu-debug][OVM-NATIVE-GPU-DBG-20260318][vm.generate_proving_ctx] num_airs={} trace_heights={:?}", - ctx.per_air.len(), - idx_trace_heights - ); // 1. check max trace height isn't exceeded let max_trace_height = if TypeId::of::>() == TypeId::of::() { let min_log_blowup = log2_ceil_usize(self.config().as_ref().max_constraint_degree - 1); @@ -700,12 +695,7 @@ where let final_memory = (system_records.exit_code == Some(ExitCode::Success as u32)).then_some(to_state.memory); let ctx = self.generate_proving_ctx(system_records, record_arenas)?; - let prove_start = std::time::Instant::now(); let proof = self.engine.prove(&self.pk, ctx); - println!( - "[openvm-gpu-debug][OVM-NATIVE-GPU-DBG-20260318][vm.prove] engine.prove done in {:?}", - prove_start.elapsed() - ); Ok((proof, final_memory)) } diff --git a/extensions/algebra/circuit/src/fp2_chip/cuda/addsub.rs b/extensions/algebra/circuit/src/fp2_chip/cuda/addsub.rs index 241074423f..6d5d6793d1 100644 --- a/extensions/algebra/circuit/src/fp2_chip/cuda/addsub.rs +++ b/extensions/algebra/circuit/src/fp2_chip/cuda/addsub.rs @@ -11,7 +11,6 @@ use openvm_cuda_common::copy::MemCopyH2D; use openvm_instructions::riscv::RV32_CELL_BITS; use openvm_mod_circuit_builder::{ ExprBuilderConfig, FieldExpressionChipGPU, FieldExpressionCoreAir, FieldExpressionMetadata, - utils::debug_log_field_expr_gpu_input, }; use openvm_rv32_adapters::{Rv32VecHeapAdapterCols, Rv32VecHeapAdapterExecutor}; use openvm_stark_backend::{prover::types::AirProvingContext, Chip}; @@ -67,20 +66,6 @@ impl Chip::width(); - debug_log_field_expr_gpu_input( - "fp2_addsub", - record_size, - num_records, - adapter_width, - BLOCKS, - self.pointer_max_bits, - self.timestamp_max_bits, - &air.local_opcode_idx, - &air.opcode_flag_idx, - &air, - &records, - ); - let d_records = records.to_device().unwrap(); let field_expr_chip = FieldExpressionChipGPU::new( diff --git a/extensions/algebra/circuit/src/fp2_chip/cuda/muldiv.rs b/extensions/algebra/circuit/src/fp2_chip/cuda/muldiv.rs index a8f4e693eb..bff1b62664 100644 --- a/extensions/algebra/circuit/src/fp2_chip/cuda/muldiv.rs +++ b/extensions/algebra/circuit/src/fp2_chip/cuda/muldiv.rs @@ -11,7 +11,6 @@ use openvm_cuda_common::copy::MemCopyH2D; use openvm_instructions::riscv::RV32_CELL_BITS; use openvm_mod_circuit_builder::{ ExprBuilderConfig, FieldExpressionChipGPU, FieldExpressionCoreAir, FieldExpressionMetadata, - utils::debug_log_field_expr_gpu_input, }; use openvm_rv32_adapters::{Rv32VecHeapAdapterCols, Rv32VecHeapAdapterExecutor}; use openvm_stark_backend::{prover::types::AirProvingContext, Chip}; @@ -67,20 +66,6 @@ impl Chip::width(); - debug_log_field_expr_gpu_input( - "fp2_muldiv", - record_size, - num_records, - adapter_width, - BLOCKS, - self.pointer_max_bits, - self.timestamp_max_bits, - &air.local_opcode_idx, - &air.opcode_flag_idx, - &air, - &records, - ); - let d_records = records.to_device().unwrap(); let field_expr_chip = FieldExpressionChipGPU::new( diff --git a/extensions/algebra/circuit/src/modular_chip/cuda/addsub.rs b/extensions/algebra/circuit/src/modular_chip/cuda/addsub.rs index 998d30371f..b369b5a27e 100644 --- a/extensions/algebra/circuit/src/modular_chip/cuda/addsub.rs +++ b/extensions/algebra/circuit/src/modular_chip/cuda/addsub.rs @@ -11,7 +11,6 @@ use openvm_cuda_common::copy::MemCopyH2D; use openvm_instructions::riscv::RV32_CELL_BITS; use openvm_mod_circuit_builder::{ ExprBuilderConfig, FieldExpressionChipGPU, FieldExpressionCoreAir, FieldExpressionMetadata, - utils::debug_log_field_expr_gpu_input, }; use openvm_rv32_adapters::{Rv32VecHeapAdapterCols, Rv32VecHeapAdapterExecutor}; use openvm_stark_backend::{prover::types::AirProvingContext, Chip}; @@ -67,20 +66,6 @@ impl Chip::width(); - debug_log_field_expr_gpu_input( - "modular_addsub", - record_size, - num_records, - adapter_width, - BLOCKS, - self.pointer_max_bits, - self.timestamp_max_bits, - &air.local_opcode_idx, - &air.opcode_flag_idx, - &air, - &records, - ); - let d_records = records.to_device().unwrap(); let field_expr_chip = FieldExpressionChipGPU::new( diff --git a/extensions/algebra/circuit/src/modular_chip/cuda/muldiv.rs b/extensions/algebra/circuit/src/modular_chip/cuda/muldiv.rs index 5793cf0eec..132a4b6f46 100644 --- a/extensions/algebra/circuit/src/modular_chip/cuda/muldiv.rs +++ b/extensions/algebra/circuit/src/modular_chip/cuda/muldiv.rs @@ -11,7 +11,6 @@ use openvm_cuda_common::copy::MemCopyH2D; use openvm_instructions::riscv::RV32_CELL_BITS; use openvm_mod_circuit_builder::{ ExprBuilderConfig, FieldExpressionChipGPU, FieldExpressionCoreAir, FieldExpressionMetadata, - utils::debug_log_field_expr_gpu_input, }; use openvm_rv32_adapters::{Rv32VecHeapAdapterCols, Rv32VecHeapAdapterExecutor}; use openvm_stark_backend::{prover::types::AirProvingContext, Chip}; @@ -67,20 +66,6 @@ impl Chip::width(); - debug_log_field_expr_gpu_input( - "modular_muldiv", - record_size, - num_records, - adapter_width, - BLOCKS, - self.pointer_max_bits, - self.timestamp_max_bits, - &air.local_opcode_idx, - &air.opcode_flag_idx, - &air, - &records, - ); - let d_records = records.to_device().unwrap(); let field_expr_chip = FieldExpressionChipGPU::new( diff --git a/extensions/ecc/circuit/src/weierstrass_chip/add_ne/cuda.rs b/extensions/ecc/circuit/src/weierstrass_chip/add_ne/cuda.rs index 100eea0180..8d1d9bc08d 100644 --- a/extensions/ecc/circuit/src/weierstrass_chip/add_ne/cuda.rs +++ b/extensions/ecc/circuit/src/weierstrass_chip/add_ne/cuda.rs @@ -11,7 +11,6 @@ use openvm_ecc_transpiler::Rv32WeierstrassOpcode; use openvm_instructions::riscv::RV32_CELL_BITS; use openvm_mod_circuit_builder::{ ExprBuilderConfig, FieldExpressionChipGPU, FieldExpressionCoreAir, FieldExpressionMetadata, - utils::debug_log_field_expr_gpu_input, }; use openvm_rv32_adapters::{Rv32VecHeapAdapterCols, Rv32VecHeapAdapterExecutor}; use openvm_stark_backend::{prover::types::AirProvingContext, Chip}; @@ -65,20 +64,6 @@ impl Chip::width(); - debug_log_field_expr_gpu_input( - "weierstrass_add_ne", - record_size, - num_records, - adapter_width, - BLOCKS, - self.pointer_max_bits, - self.timestamp_max_bits, - &air.local_opcode_idx, - &air.opcode_flag_idx, - &air, - &records, - ); - let d_records = records.to_device().unwrap(); let field_expr_chip = FieldExpressionChipGPU::new( diff --git a/extensions/ecc/circuit/src/weierstrass_chip/double/cuda.rs b/extensions/ecc/circuit/src/weierstrass_chip/double/cuda.rs index 02c01e4630..04e45d068e 100644 --- a/extensions/ecc/circuit/src/weierstrass_chip/double/cuda.rs +++ b/extensions/ecc/circuit/src/weierstrass_chip/double/cuda.rs @@ -12,7 +12,6 @@ use openvm_ecc_transpiler::Rv32WeierstrassOpcode; use openvm_instructions::riscv::RV32_CELL_BITS; use openvm_mod_circuit_builder::{ ExprBuilderConfig, FieldExpressionChipGPU, FieldExpressionCoreAir, FieldExpressionMetadata, - utils::debug_log_field_expr_gpu_input, }; use openvm_rv32_adapters::{Rv32VecHeapAdapterCols, Rv32VecHeapAdapterExecutor}; use openvm_stark_backend::{prover::types::AirProvingContext, Chip}; @@ -67,20 +66,6 @@ impl Chip::width(); - debug_log_field_expr_gpu_input( - "weierstrass_double", - record_size, - num_records, - adapter_width, - BLOCKS, - self.pointer_max_bits, - self.timestamp_max_bits, - &air.local_opcode_idx, - &air.opcode_flag_idx, - &air, - &records, - ); - let d_records = records.to_device().unwrap(); let field_expr_chip = FieldExpressionChipGPU::new( diff --git a/extensions/native/circuit/src/branch_eq/cuda.rs b/extensions/native/circuit/src/branch_eq/cuda.rs index 43b52d8add..685413eb0a 100644 --- a/extensions/native/circuit/src/branch_eq/cuda.rs +++ b/extensions/native/circuit/src/branch_eq/cuda.rs @@ -14,7 +14,6 @@ use super::NativeBranchEqualCoreRecord; use crate::{ adapters::{BranchNativeAdapterCols, BranchNativeAdapterRecord}, cuda_abi::native_branch_eq_cuda, - utils::{OPENVM_NATIVE_GPU_DEBUG_ID, debug_log_native_gpu_tracegen_input}, }; #[derive(new)] @@ -39,15 +38,6 @@ impl Chip for NativeBranchEqChipGpu { BranchNativeAdapterCols::::width() + BranchEqualCoreCols::::width(); let trace = DeviceMatrix::::with_capacity(padded_height, trace_width); - let records_hash = debug_log_native_gpu_tracegen_input( - "native_branch_eq", - records, - RECORD_SIZE, - height, - padded_height, - trace_width, - ); - let d_records = records.to_device().unwrap(); unsafe { @@ -60,27 +50,12 @@ impl Chip for NativeBranchEqChipGpu { self.timestamp_max_bits as u32, ) { panic!( - "native_branch_eq cuda tracegen failed [{}]: err={:?}, height={}, padded_height={}, trace_width={}, timestamp_max_bits={}, hash=0x{:016x}", - OPENVM_NATIVE_GPU_DEBUG_ID, - err, - height, - padded_height, - trace_width, - self.timestamp_max_bits, - records_hash, + "native_branch_eq cuda tracegen failed: err={:?}, height={}, padded_height={}, trace_width={}, timestamp_max_bits={}", + err, height, padded_height, trace_width, self.timestamp_max_bits, ); } } - println!( - "[openvm-gpu-debug][{}][native_branch_eq] tracegen ok: height={} padded_height={} trace_width={} hash=0x{:016x}", - OPENVM_NATIVE_GPU_DEBUG_ID, - height, - padded_height, - trace_width, - records_hash, - ); - AirProvingContext::simple_no_pis(trace) } } diff --git a/extensions/native/circuit/src/castf/cuda.rs b/extensions/native/circuit/src/castf/cuda.rs index 19d2dd2a79..90654cc7f8 100644 --- a/extensions/native/circuit/src/castf/cuda.rs +++ b/extensions/native/circuit/src/castf/cuda.rs @@ -13,7 +13,6 @@ use super::{CastFCoreCols, CastFCoreRecord}; use crate::{ adapters::{ConvertAdapterCols, ConvertAdapterRecord}, cuda_abi::castf_cuda, - utils::{OPENVM_NATIVE_GPU_DEBUG_ID, debug_log_native_gpu_tracegen_input}, }; #[derive(new)] @@ -36,15 +35,6 @@ impl Chip for CastFChipGpu { let trace_width = ConvertAdapterCols::::width() + CastFCoreCols::::width(); let trace = DeviceMatrix::::with_capacity(padded_height, trace_width); - let records_hash = debug_log_native_gpu_tracegen_input( - "native_castf", - records, - RECORD_SIZE, - height, - padded_height, - trace_width, - ); - let d_records = records.to_device().unwrap(); unsafe { @@ -57,27 +47,12 @@ impl Chip for CastFChipGpu { self.timestamp_max_bits as u32, ) { panic!( - "native_castf cuda tracegen failed [{}]: err={:?}, height={}, padded_height={}, trace_width={}, timestamp_max_bits={}, hash=0x{:016x}", - OPENVM_NATIVE_GPU_DEBUG_ID, - err, - height, - padded_height, - trace_width, - self.timestamp_max_bits, - records_hash, + "native_castf cuda tracegen failed: err={:?}, height={}, padded_height={}, trace_width={}, timestamp_max_bits={}", + err, height, padded_height, trace_width, self.timestamp_max_bits, ); } } - println!( - "[openvm-gpu-debug][{}][native_castf] tracegen ok: height={} padded_height={} trace_width={} hash=0x{:016x}", - OPENVM_NATIVE_GPU_DEBUG_ID, - height, - padded_height, - trace_width, - records_hash, - ); - AirProvingContext::simple_no_pis(trace) } } diff --git a/extensions/native/circuit/src/field_arithmetic/cuda.rs b/extensions/native/circuit/src/field_arithmetic/cuda.rs index d2d1e44b49..70a0faf72a 100644 --- a/extensions/native/circuit/src/field_arithmetic/cuda.rs +++ b/extensions/native/circuit/src/field_arithmetic/cuda.rs @@ -13,7 +13,6 @@ use super::{FieldArithmeticCoreCols, FieldArithmeticRecord}; use crate::{ adapters::{AluNativeAdapterCols, AluNativeAdapterRecord}, cuda_abi::field_arithmetic_cuda, - utils::{OPENVM_NATIVE_GPU_DEBUG_ID, debug_log_native_gpu_tracegen_input}, }; #[derive(new)] @@ -38,15 +37,6 @@ impl Chip for FieldArithmeticChipGpu { AluNativeAdapterCols::::width() + FieldArithmeticCoreCols::::width(); let trace = DeviceMatrix::::with_capacity(padded_height, trace_width); - let records_hash = debug_log_native_gpu_tracegen_input( - "native_field_arithmetic", - records, - RECORD_SIZE, - height, - padded_height, - trace_width, - ); - let d_records = records.to_device().unwrap(); unsafe { @@ -60,27 +50,12 @@ impl Chip for FieldArithmeticChipGpu { self.timestamp_max_bits as u32, ) { panic!( - "native_field_arithmetic cuda tracegen failed [{}]: err={:?}, height={}, padded_height={}, trace_width={}, timestamp_max_bits={}, hash=0x{:016x}", - OPENVM_NATIVE_GPU_DEBUG_ID, - err, - height, - padded_height, - trace_width, - self.timestamp_max_bits, - records_hash, + "native_field_arithmetic cuda tracegen failed: err={:?}, height={}, padded_height={}, trace_width={}, timestamp_max_bits={}", + err, height, padded_height, trace_width, self.timestamp_max_bits, ); } } - println!( - "[openvm-gpu-debug][{}][native_field_arithmetic] tracegen ok: height={} padded_height={} trace_width={} hash=0x{:016x}", - OPENVM_NATIVE_GPU_DEBUG_ID, - height, - padded_height, - trace_width, - records_hash, - ); - AirProvingContext::simple_no_pis(trace) } } diff --git a/extensions/native/circuit/src/field_extension/cuda.rs b/extensions/native/circuit/src/field_extension/cuda.rs index f8312a61a7..5e32aa3a05 100644 --- a/extensions/native/circuit/src/field_extension/cuda.rs +++ b/extensions/native/circuit/src/field_extension/cuda.rs @@ -13,7 +13,6 @@ use super::{FieldExtensionCoreCols, FieldExtensionRecord, EXT_DEG}; use crate::{ adapters::{NativeVectorizedAdapterCols, NativeVectorizedAdapterRecord}, cuda_abi::field_extension_cuda, - utils::{OPENVM_NATIVE_GPU_DEBUG_ID, debug_log_native_gpu_tracegen_input}, }; #[derive(new)] @@ -40,15 +39,6 @@ impl Chip for FieldExtensionChipGpu { + FieldExtensionCoreCols::::width(); let trace = DeviceMatrix::::with_capacity(padded_height, trace_width); - let records_hash = debug_log_native_gpu_tracegen_input( - "native_field_extension", - records, - RECORD_SIZE, - height, - padded_height, - trace_width, - ); - let d_records = records.to_device().unwrap(); unsafe { @@ -61,27 +51,12 @@ impl Chip for FieldExtensionChipGpu { self.timestamp_max_bits as u32, ) { panic!( - "native_field_extension cuda tracegen failed [{}]: err={:?}, height={}, padded_height={}, trace_width={}, timestamp_max_bits={}, hash=0x{:016x}", - OPENVM_NATIVE_GPU_DEBUG_ID, - err, - height, - padded_height, - trace_width, - self.timestamp_max_bits, - records_hash, + "native_field_extension cuda tracegen failed: err={:?}, height={}, padded_height={}, trace_width={}, timestamp_max_bits={}", + err, height, padded_height, trace_width, self.timestamp_max_bits, ); } } - println!( - "[openvm-gpu-debug][{}][native_field_extension] tracegen ok: height={} padded_height={} trace_width={} hash=0x{:016x}", - OPENVM_NATIVE_GPU_DEBUG_ID, - height, - padded_height, - trace_width, - records_hash, - ); - AirProvingContext::simple_no_pis(trace) } } diff --git a/extensions/native/circuit/src/fri/cuda.rs b/extensions/native/circuit/src/fri/cuda.rs index bfa0003ca8..cd8eb5e746 100644 --- a/extensions/native/circuit/src/fri/cuda.rs +++ b/extensions/native/circuit/src/fri/cuda.rs @@ -15,7 +15,6 @@ use openvm_stark_backend::{prover::types::AirProvingContext, Chip}; use super::{FriReducedOpeningRecordMut, OVERALL_WIDTH}; use crate::{ cuda_abi::fri_cuda, - utils::{OPENVM_NATIVE_GPU_DEBUG_ID, debug_log_native_gpu_tracegen_input}, }; #[derive(new)] @@ -59,15 +58,6 @@ impl Chip for FriReducedOpeningChipGpu { let trace_width = OVERALL_WIDTH; let trace = DeviceMatrix::::with_capacity(trace_height, trace_width); - let records_hash = debug_log_native_gpu_tracegen_input( - "native_fri_reduced_opening", - records, - 0, - record_info.len(), - trace_height, - trace_width, - ); - unsafe { if let Err(err) = fri_cuda::tracegen( trace.buffer(), @@ -79,27 +69,12 @@ impl Chip for FriReducedOpeningChipGpu { self.timestamp_max_bits as u32, ) { panic!( - "native_fri_reduced_opening cuda tracegen failed [{}]: err={:?}, rows={}, padded_height={}, trace_width={}, timestamp_max_bits={}, hash=0x{:016x}", - OPENVM_NATIVE_GPU_DEBUG_ID, - err, - record_info.len(), - trace_height, - trace_width, - self.timestamp_max_bits, - records_hash, + "native_fri_reduced_opening cuda tracegen failed: err={:?}, rows={}, padded_height={}, trace_width={}, timestamp_max_bits={}", + err, record_info.len(), trace_height, trace_width, self.timestamp_max_bits, ); } } - println!( - "[openvm-gpu-debug][{}][native_fri_reduced_opening] tracegen ok: rows={} padded_height={} trace_width={} hash=0x{:016x}", - OPENVM_NATIVE_GPU_DEBUG_ID, - record_info.len(), - trace_height, - trace_width, - records_hash, - ); - AirProvingContext::simple_no_pis(trace) } } diff --git a/extensions/native/circuit/src/jal_rangecheck/cuda.rs b/extensions/native/circuit/src/jal_rangecheck/cuda.rs index 31ff5ce117..0075dda59b 100644 --- a/extensions/native/circuit/src/jal_rangecheck/cuda.rs +++ b/extensions/native/circuit/src/jal_rangecheck/cuda.rs @@ -12,7 +12,6 @@ use openvm_stark_backend::{prover::types::AirProvingContext, Chip}; use super::{JalRangeCheckCols, JalRangeCheckRecord}; use crate::{ cuda_abi::native_jal_rangecheck_cuda, - utils::{OPENVM_NATIVE_GPU_DEBUG_ID, debug_log_native_gpu_tracegen_input}, }; #[derive(new)] @@ -36,15 +35,6 @@ impl Chip for JalRangeCheckGpu { let padded_height = next_power_of_two_or_zero(height); let trace = DeviceMatrix::::with_capacity(padded_height, width); - let records_hash = debug_log_native_gpu_tracegen_input( - "native_jal_rangecheck", - records, - RECORD_SIZE, - height, - padded_height, - width, - ); - let d_records = records.to_device().unwrap(); unsafe { @@ -57,27 +47,12 @@ impl Chip for JalRangeCheckGpu { self.timestamp_max_bits as u32, ) { panic!( - "native_jal_rangecheck cuda tracegen failed [{}]: err={:?}, height={}, padded_height={}, width={}, timestamp_max_bits={}, hash=0x{:016x}", - OPENVM_NATIVE_GPU_DEBUG_ID, - err, - height, - padded_height, - width, - self.timestamp_max_bits, - records_hash, + "native_jal_rangecheck cuda tracegen failed: err={:?}, height={}, padded_height={}, width={}, timestamp_max_bits={}", + err, height, padded_height, width, self.timestamp_max_bits, ); } } - println!( - "[openvm-gpu-debug][{}][native_jal_rangecheck] tracegen ok: height={} padded_height={} width={} hash=0x{:016x}", - OPENVM_NATIVE_GPU_DEBUG_ID, - height, - padded_height, - width, - records_hash, - ); - AirProvingContext::simple_no_pis(trace) } } diff --git a/extensions/native/circuit/src/loadstore/cuda.rs b/extensions/native/circuit/src/loadstore/cuda.rs index a73eff9020..7e9ab75c4e 100644 --- a/extensions/native/circuit/src/loadstore/cuda.rs +++ b/extensions/native/circuit/src/loadstore/cuda.rs @@ -13,7 +13,6 @@ use super::{NativeLoadStoreCoreCols, NativeLoadStoreCoreRecord}; use crate::{ adapters::{NativeLoadStoreAdapterCols, NativeLoadStoreAdapterRecord}, cuda_abi::native_loadstore_cuda, - utils::{OPENVM_NATIVE_GPU_DEBUG_ID, debug_log_native_gpu_tracegen_input}, }; #[derive(new)] @@ -47,15 +46,6 @@ impl Chip + NativeLoadStoreCoreCols::::width(); let trace = DeviceMatrix::::with_capacity(padded_height, trace_width); - let records_hash = debug_log_native_gpu_tracegen_input( - "native_loadstore", - records, - record_size, - height, - padded_height, - trace_width, - ); - let d_records = records.to_device().unwrap(); unsafe { @@ -69,29 +59,12 @@ impl Chip self.timestamp_max_bits as u32, ) { panic!( - "native_loadstore cuda tracegen failed [{}]: err={:?}, height={}, padded_height={}, trace_width={}, num_cells={}, timestamp_max_bits={}, hash=0x{:016x}", - OPENVM_NATIVE_GPU_DEBUG_ID, - err, - height, - padded_height, - trace_width, - NUM_CELLS, - self.timestamp_max_bits, - records_hash, + "native_loadstore cuda tracegen failed: err={:?}, height={}, padded_height={}, trace_width={}, num_cells={}, timestamp_max_bits={}", + err, height, padded_height, trace_width, NUM_CELLS, self.timestamp_max_bits, ); } } - println!( - "[openvm-gpu-debug][{}][native_loadstore] tracegen ok: height={} padded_height={} trace_width={} num_cells={} hash=0x{:016x}", - OPENVM_NATIVE_GPU_DEBUG_ID, - height, - padded_height, - trace_width, - NUM_CELLS, - records_hash, - ); - AirProvingContext::simple_no_pis(trace) } } diff --git a/extensions/native/circuit/src/poseidon2/cuda.rs b/extensions/native/circuit/src/poseidon2/cuda.rs index aa3ad73a50..139a09e85a 100644 --- a/extensions/native/circuit/src/poseidon2/cuda.rs +++ b/extensions/native/circuit/src/poseidon2/cuda.rs @@ -13,7 +13,6 @@ use super::columns::{MultiObserveCols, NativePoseidon2Cols}; use crate::{ cuda_abi::poseidon2_cuda, hint_space_provider::SharedHintSpaceProviderChip, - utils::{OPENVM_NATIVE_GPU_DEBUG_ID, debug_log_native_gpu_tracegen_input}, }; pub struct NativePoseidon2ChipGpu { @@ -141,15 +140,6 @@ impl Chip let height = records.len() / record_size; let padded_height = next_power_of_two_or_zero(height); - let records_hash = debug_log_native_gpu_tracegen_input( - "native_poseidon2", - records, - record_size, - height, - padded_height, - width, - ); - let d_chunk_start = { let mut row_idx = 0; let row_slice = unsafe { @@ -198,8 +188,7 @@ impl Chip self.timestamp_max_bits as u32, ) { panic!( - "native_poseidon2 cuda tracegen failed [{}]: err={:?}, height={}, padded_height={}, width={}, chunk_count={}, sbox_registers={}, timestamp_max_bits={}, hash=0x{:016x}", - OPENVM_NATIVE_GPU_DEBUG_ID, + "native_poseidon2 cuda tracegen failed: err={:?}, height={}, padded_height={}, width={}, chunk_count={}, sbox_registers={}, timestamp_max_bits={}", err, height, padded_height, @@ -207,21 +196,10 @@ impl Chip d_chunk_start.len(), SBOX_REGISTERS, self.timestamp_max_bits, - records_hash, ); } } - println!( - "[openvm-gpu-debug][{}][native_poseidon2] tracegen ok: height={} padded_height={} width={} chunk_count={} hash=0x{:016x}", - OPENVM_NATIVE_GPU_DEBUG_ID, - height, - padded_height, - width, - d_chunk_start.len(), - records_hash, - ); - AirProvingContext::simple_no_pis(trace) } } diff --git a/extensions/native/circuit/src/sumcheck/cuda.rs b/extensions/native/circuit/src/sumcheck/cuda.rs index 0a6cfdeac3..ec51c98f5e 100644 --- a/extensions/native/circuit/src/sumcheck/cuda.rs +++ b/extensions/native/circuit/src/sumcheck/cuda.rs @@ -13,7 +13,6 @@ use super::columns::{LogupSpecificCols, NativeSumcheckCols, ProdSpecificCols}; use crate::{ cuda_abi::sumcheck_cuda, hint_space_provider::SharedHintSpaceProviderChip, - utils::{OPENVM_NATIVE_GPU_DEBUG_ID, debug_log_native_gpu_tracegen_input}, }; use p3_field::FieldAlgebra; @@ -93,15 +92,6 @@ impl Chip for NativeSumcheckChipGpu { let padded_height = next_power_of_two_or_zero(height); let trace = DeviceMatrix::::with_capacity(padded_height, width); - let records_hash = debug_log_native_gpu_tracegen_input( - "native_sumcheck", - records, - record_size, - height, - padded_height, - width, - ); - let record_slice = unsafe { let ptr = records.as_ptr(); from_raw_parts(ptr as *const F, records.len() / size_of::()) @@ -119,27 +109,12 @@ impl Chip for NativeSumcheckChipGpu { self.timestamp_max_bits as u32, ) { panic!( - "native_sumcheck cuda tracegen failed [{}]: err={:?}, height={}, padded_height={}, width={}, timestamp_max_bits={}, hash=0x{:016x}", - OPENVM_NATIVE_GPU_DEBUG_ID, - err, - height, - padded_height, - width, - self.timestamp_max_bits, - records_hash, + "native_sumcheck cuda tracegen failed: err={:?}, height={}, padded_height={}, width={}, timestamp_max_bits={}", + err, height, padded_height, width, self.timestamp_max_bits, ); } } - println!( - "[openvm-gpu-debug][{}][native_sumcheck] tracegen ok: height={} padded_height={} width={} hash=0x{:016x}", - OPENVM_NATIVE_GPU_DEBUG_ID, - height, - padded_height, - width, - records_hash, - ); - AirProvingContext::simple_no_pis(trace) } } diff --git a/extensions/native/circuit/src/utils.rs b/extensions/native/circuit/src/utils.rs index c2be13f80b..3d05656f16 100644 --- a/extensions/native/circuit/src/utils.rs +++ b/extensions/native/circuit/src/utils.rs @@ -5,55 +5,11 @@ use openvm_circuit::system::{ use p3_field::PrimeField32; pub(crate) const CASTF_MAX_BITS: usize = 30; -#[cfg(feature = "cuda")] -pub(crate) const OPENVM_NATIVE_GPU_DEBUG_ID: &str = "OVM-NATIVE-GPU-DBG-20260318"; pub(crate) const fn const_max(a: usize, b: usize) -> usize { [a, b][(a < b) as usize] } -#[cfg(feature = "cuda")] -fn fnv1a64(bytes: &[u8]) -> u64 { - let mut hash: u64 = 0xcbf29ce484222325; - for &b in bytes { - hash ^= b as u64; - hash = hash.wrapping_mul(0x100000001b3); - } - hash -} - -#[cfg(feature = "cuda")] -pub(crate) fn debug_log_native_gpu_tracegen_input( - chip_label: &str, - records: &[u8], - record_size: usize, - height: usize, - padded_height: usize, - trace_width: usize, -) -> u64 { - let hash = fnv1a64(records); - let head_len = records.len().min(64); - println!( - "[openvm-gpu-debug][{}][{}] records_bytes={} record_size={} height={} padded_height={} trace_width={} hash=0x{:016x}", - OPENVM_NATIVE_GPU_DEBUG_ID, - chip_label, - records.len(), - record_size, - height, - padded_height, - trace_width, - hash - ); - println!( - "[openvm-gpu-debug][{}][{}] records_head({})={:?}", - OPENVM_NATIVE_GPU_DEBUG_ID, - chip_label, - head_len, - &records[..head_len] - ); - hash -} - /// Fill `MemoryBaseAuxCols`, assuming that the `prev_timestamp` is already set in `base_aux`. pub(crate) fn mem_fill_helper( mem_helper: &MemoryAuxColsFactory, From 210b60c214334f55aba8213dafb246ed91367927 Mon Sep 17 00:00:00 2001 From: Ray Gao Date: Wed, 18 Mar 2026 20:42:04 -0400 Subject: [PATCH 13/14] remove debug flag --- crates/circuits/mod-builder/src/cuda/chip.rs | 23 ++----------------- .../native/circuit/src/branch_eq/cuda.rs | 10 +++----- extensions/native/circuit/src/castf/cuda.rs | 10 +++----- .../circuit/src/field_arithmetic/cuda.rs | 10 +++----- .../circuit/src/field_extension/cuda.rs | 10 +++----- extensions/native/circuit/src/fri/cuda.rs | 10 +++----- .../native/circuit/src/jal_rangecheck/cuda.rs | 10 +++----- .../native/circuit/src/loadstore/cuda.rs | 10 +++----- .../native/circuit/src/poseidon2/cuda.rs | 16 +++---------- .../native/circuit/src/sumcheck/cuda.rs | 10 +++----- 10 files changed, 29 insertions(+), 90 deletions(-) diff --git a/crates/circuits/mod-builder/src/cuda/chip.rs b/crates/circuits/mod-builder/src/cuda/chip.rs index 5d2f6e1f5a..ec53660e72 100644 --- a/crates/circuits/mod-builder/src/cuda/chip.rs +++ b/crates/circuits/mod-builder/src/cuda/chip.rs @@ -471,7 +471,7 @@ impl FieldExpressionChipGPU { unsafe { cudaDeviceSetLimit(cudaLimit::cudaLimitStackSize, 48 * 1024); - if let Err(err) = tracegen( + tracegen( &self.records, mat.buffer(), &self.meta, @@ -487,26 +487,7 @@ impl FieldExpressionChipGPU { workspace.as_ptr(), workspace_per_thread, ) - { - panic!( - "field_expression cuda tracegen failed: err={:?}, num_records={}, record_stride={}, padded_height={}, total_trace_width={}, workspace_per_thread={}, pointer_max_bits={}, timestamp_max_bits={}, num_inputs={}, num_vars={}, num_flags={}, num_local_opcodes={}, num_output_indices={}, max_q_count={}, max_ast_depth={}", - err, - self.num_records, - self.record_stride, - padded_height, - self.total_trace_width, - workspace_per_thread, - self.pointer_max_bits, - self.timestamp_max_bits, - meta_host.num_inputs, - meta_host.expr_meta.num_vars, - meta_host.num_u32_flags, - meta_host.num_local_opcodes, - meta_host.num_output_indices, - meta_host.max_q_count, - meta_host.max_ast_depth, - ); - } + .unwrap(); } mat } diff --git a/extensions/native/circuit/src/branch_eq/cuda.rs b/extensions/native/circuit/src/branch_eq/cuda.rs index 685413eb0a..b5a8dcb0a8 100644 --- a/extensions/native/circuit/src/branch_eq/cuda.rs +++ b/extensions/native/circuit/src/branch_eq/cuda.rs @@ -41,19 +41,15 @@ impl Chip for NativeBranchEqChipGpu { let d_records = records.to_device().unwrap(); unsafe { - if let Err(err) = native_branch_eq_cuda::tracegen( + native_branch_eq_cuda::tracegen( trace.buffer(), padded_height, trace_width, &d_records, &self.range_checker.count, self.timestamp_max_bits as u32, - ) { - panic!( - "native_branch_eq cuda tracegen failed: err={:?}, height={}, padded_height={}, trace_width={}, timestamp_max_bits={}", - err, height, padded_height, trace_width, self.timestamp_max_bits, - ); - } + ) + .unwrap(); } AirProvingContext::simple_no_pis(trace) diff --git a/extensions/native/circuit/src/castf/cuda.rs b/extensions/native/circuit/src/castf/cuda.rs index 90654cc7f8..fd80e5cedc 100644 --- a/extensions/native/circuit/src/castf/cuda.rs +++ b/extensions/native/circuit/src/castf/cuda.rs @@ -38,19 +38,15 @@ impl Chip for CastFChipGpu { let d_records = records.to_device().unwrap(); unsafe { - if let Err(err) = castf_cuda::tracegen( + castf_cuda::tracegen( trace.buffer(), padded_height, trace_width, &d_records, &self.range_checker.count, self.timestamp_max_bits as u32, - ) { - panic!( - "native_castf cuda tracegen failed: err={:?}, height={}, padded_height={}, trace_width={}, timestamp_max_bits={}", - err, height, padded_height, trace_width, self.timestamp_max_bits, - ); - } + ) + .unwrap(); } AirProvingContext::simple_no_pis(trace) diff --git a/extensions/native/circuit/src/field_arithmetic/cuda.rs b/extensions/native/circuit/src/field_arithmetic/cuda.rs index 70a0faf72a..a6f67d231f 100644 --- a/extensions/native/circuit/src/field_arithmetic/cuda.rs +++ b/extensions/native/circuit/src/field_arithmetic/cuda.rs @@ -40,7 +40,7 @@ impl Chip for FieldArithmeticChipGpu { let d_records = records.to_device().unwrap(); unsafe { - if let Err(err) = field_arithmetic_cuda::tracegen( + field_arithmetic_cuda::tracegen( trace.buffer(), padded_height, trace_width, @@ -48,12 +48,8 @@ impl Chip for FieldArithmeticChipGpu { self.range_checker.count.as_ptr() as *const u32, self.range_checker.count.len(), self.timestamp_max_bits as u32, - ) { - panic!( - "native_field_arithmetic cuda tracegen failed: err={:?}, height={}, padded_height={}, trace_width={}, timestamp_max_bits={}", - err, height, padded_height, trace_width, self.timestamp_max_bits, - ); - } + ) + .unwrap(); } AirProvingContext::simple_no_pis(trace) diff --git a/extensions/native/circuit/src/field_extension/cuda.rs b/extensions/native/circuit/src/field_extension/cuda.rs index 5e32aa3a05..af88de22b5 100644 --- a/extensions/native/circuit/src/field_extension/cuda.rs +++ b/extensions/native/circuit/src/field_extension/cuda.rs @@ -42,19 +42,15 @@ impl Chip for FieldExtensionChipGpu { let d_records = records.to_device().unwrap(); unsafe { - if let Err(err) = field_extension_cuda::tracegen( + field_extension_cuda::tracegen( trace.buffer(), padded_height, trace_width, &d_records, &self.range_checker.count, self.timestamp_max_bits as u32, - ) { - panic!( - "native_field_extension cuda tracegen failed: err={:?}, height={}, padded_height={}, trace_width={}, timestamp_max_bits={}", - err, height, padded_height, trace_width, self.timestamp_max_bits, - ); - } + ) + .unwrap(); } AirProvingContext::simple_no_pis(trace) diff --git a/extensions/native/circuit/src/fri/cuda.rs b/extensions/native/circuit/src/fri/cuda.rs index cd8eb5e746..4f4ef59e60 100644 --- a/extensions/native/circuit/src/fri/cuda.rs +++ b/extensions/native/circuit/src/fri/cuda.rs @@ -59,7 +59,7 @@ impl Chip for FriReducedOpeningChipGpu { let trace = DeviceMatrix::::with_capacity(trace_height, trace_width); unsafe { - if let Err(err) = fri_cuda::tracegen( + fri_cuda::tracegen( trace.buffer(), trace_height, &d_records, @@ -67,12 +67,8 @@ impl Chip for FriReducedOpeningChipGpu { &d_record_info, &self.range_checker.count, self.timestamp_max_bits as u32, - ) { - panic!( - "native_fri_reduced_opening cuda tracegen failed: err={:?}, rows={}, padded_height={}, trace_width={}, timestamp_max_bits={}", - err, record_info.len(), trace_height, trace_width, self.timestamp_max_bits, - ); - } + ) + .unwrap(); } AirProvingContext::simple_no_pis(trace) diff --git a/extensions/native/circuit/src/jal_rangecheck/cuda.rs b/extensions/native/circuit/src/jal_rangecheck/cuda.rs index 0075dda59b..7ba9fa198b 100644 --- a/extensions/native/circuit/src/jal_rangecheck/cuda.rs +++ b/extensions/native/circuit/src/jal_rangecheck/cuda.rs @@ -38,19 +38,15 @@ impl Chip for JalRangeCheckGpu { let d_records = records.to_device().unwrap(); unsafe { - if let Err(err) = native_jal_rangecheck_cuda::tracegen( + native_jal_rangecheck_cuda::tracegen( trace.buffer(), padded_height, width, &d_records, &self.range_checker.count, self.timestamp_max_bits as u32, - ) { - panic!( - "native_jal_rangecheck cuda tracegen failed: err={:?}, height={}, padded_height={}, width={}, timestamp_max_bits={}", - err, height, padded_height, width, self.timestamp_max_bits, - ); - } + ) + .unwrap(); } AirProvingContext::simple_no_pis(trace) diff --git a/extensions/native/circuit/src/loadstore/cuda.rs b/extensions/native/circuit/src/loadstore/cuda.rs index 7e9ab75c4e..8f7e3b83ef 100644 --- a/extensions/native/circuit/src/loadstore/cuda.rs +++ b/extensions/native/circuit/src/loadstore/cuda.rs @@ -49,7 +49,7 @@ impl Chip let d_records = records.to_device().unwrap(); unsafe { - if let Err(err) = native_loadstore_cuda::tracegen( + native_loadstore_cuda::tracegen( trace.buffer(), padded_height, trace_width, @@ -57,12 +57,8 @@ impl Chip &self.range_checker.count, NUM_CELLS as u32, self.timestamp_max_bits as u32, - ) { - panic!( - "native_loadstore cuda tracegen failed: err={:?}, height={}, padded_height={}, trace_width={}, num_cells={}, timestamp_max_bits={}", - err, height, padded_height, trace_width, NUM_CELLS, self.timestamp_max_bits, - ); - } + ) + .unwrap(); } AirProvingContext::simple_no_pis(trace) diff --git a/extensions/native/circuit/src/poseidon2/cuda.rs b/extensions/native/circuit/src/poseidon2/cuda.rs index 139a09e85a..4bdf337a75 100644 --- a/extensions/native/circuit/src/poseidon2/cuda.rs +++ b/extensions/native/circuit/src/poseidon2/cuda.rs @@ -175,7 +175,7 @@ impl Chip let d_records = records.to_device().unwrap(); unsafe { - if let Err(err) = poseidon2_cuda::tracegen( + poseidon2_cuda::tracegen( trace.buffer(), padded_height, width, @@ -186,18 +186,8 @@ impl Chip &self.range_checker.count, SBOX_REGISTERS as u32, self.timestamp_max_bits as u32, - ) { - panic!( - "native_poseidon2 cuda tracegen failed: err={:?}, height={}, padded_height={}, width={}, chunk_count={}, sbox_registers={}, timestamp_max_bits={}", - err, - height, - padded_height, - width, - d_chunk_start.len(), - SBOX_REGISTERS, - self.timestamp_max_bits, - ); - } + ) + .unwrap(); } AirProvingContext::simple_no_pis(trace) diff --git a/extensions/native/circuit/src/sumcheck/cuda.rs b/extensions/native/circuit/src/sumcheck/cuda.rs index ec51c98f5e..2dcecd5756 100644 --- a/extensions/native/circuit/src/sumcheck/cuda.rs +++ b/extensions/native/circuit/src/sumcheck/cuda.rs @@ -99,7 +99,7 @@ impl Chip for NativeSumcheckChipGpu { let d_records = record_slice.to_device().unwrap(); unsafe { - if let Err(err) = sumcheck_cuda::tracegen( + sumcheck_cuda::tracegen( trace.buffer(), padded_height, width, @@ -107,12 +107,8 @@ impl Chip for NativeSumcheckChipGpu { height, &self.range_checker.count, self.timestamp_max_bits as u32, - ) { - panic!( - "native_sumcheck cuda tracegen failed: err={:?}, height={}, padded_height={}, width={}, timestamp_max_bits={}", - err, height, padded_height, width, self.timestamp_max_bits, - ); - } + ) + .unwrap(); } AirProvingContext::simple_no_pis(trace) From 835800054c8b62201736215bad00d388495cd902 Mon Sep 17 00:00:00 2001 From: Ray Gao Date: Thu, 19 Mar 2026 20:08:29 -0400 Subject: [PATCH 14/14] remove debug flag --- crates/circuits/mod-builder/src/utils.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/crates/circuits/mod-builder/src/utils.rs b/crates/circuits/mod-builder/src/utils.rs index 07c43ef2e8..f8dcd948b2 100644 --- a/crates/circuits/mod-builder/src/utils.rs +++ b/crates/circuits/mod-builder/src/utils.rs @@ -11,5 +11,4 @@ pub fn biguint_to_limbs_vec(x: &BigUint, num_limbs: usize) -> Vec { .chain(std::iter::repeat(0u8)) .take(num_limbs) .collect() -} - +} \ No newline at end of file