-
Notifications
You must be signed in to change notification settings - Fork 14
Deliver synchronous fault signals to the faulting thread #86
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1317,37 +1317,27 @@ static void build_sigcontext_reserved(uint8_t *reserved, | |
| memset(reserved + off, 0, 8); | ||
| } | ||
|
|
||
| int signal_deliver(hv_vcpu_t vcpu, guest_t *g, int *exit_code) | ||
| /* Build and install the rt_sigframe for `signum` on the current thread, with | ||
| * sig_lock held on entry and released on every return path. Shared by | ||
| * signal_deliver() (signal selected from the process-wide pending set) and | ||
| * signal_deliver_fault() (synchronous fault forced onto the faulting thread). | ||
| * rt_info supplies si_code/si_pid/sigval when no thread-local pending_fault is | ||
| * set; the pending_fault is consumed (one-shot) when valid. Returns 1 if a | ||
| * handler frame was installed, 0 if the signal was ignored, and -1 (with | ||
| * *exit_code set) when the default disposition terminates the guest. | ||
| */ | ||
| static int deliver_signal_locked(hv_vcpu_t vcpu, | ||
| guest_t *g, | ||
| int signum, | ||
| signal_rt_info_t rt_info, | ||
| int *exit_code) | ||
| { | ||
| pthread_mutex_lock(&sig_lock); | ||
| uint64_t *blocked = thread_blocked_ptr(); | ||
| uint64_t *saved_ptr = thread_saved_blocked_ptr(); | ||
| bool *valid_ptr = thread_saved_valid_ptr(); | ||
| uint64_t deliverable = sig_state.pending & ~*blocked; | ||
| if (deliverable == 0) { | ||
| pthread_mutex_unlock(&sig_lock); | ||
| return 0; | ||
| } | ||
|
|
||
| /* Find lowest pending unblocked signal */ | ||
| int signum = bit_ctz64(deliverable) + 1; | ||
| signal_rt_info_t rt_info = signal_default_info(signum); | ||
|
|
||
| /* Dequeue: for RT signals, decrement count and only clear the | ||
| * pending bit when the queue is empty. Standard signals are | ||
| * always cleared (single instance, bitmask semantics). | ||
| */ | ||
| if (signum >= LINUX_SIGRTMIN) { | ||
| signal_rt_dequeue_locked(signum, &rt_info); | ||
| } else { | ||
| rt_info = signal_standard_peek_locked(signum); | ||
| sig_state.std_info_valid[signum - 1] = false; | ||
| sig_state.pending &= ~sig_bit(signum); | ||
| } | ||
|
|
||
| /* signum is bit_ctz64(deliverable) + 1, bounded 1..64 by the 64-bit | ||
| * pending mask. The static analyzer cannot see the bound, so gate the | ||
| * array access defensively. | ||
| /* signum is 1..64 from the caller; the static analyzer cannot see the | ||
| * bound, so gate the array access defensively. | ||
| */ | ||
| int idx = signum - 1; | ||
| if (!RANGE_CHECK(idx, 0, LINUX_NSIG)) { | ||
|
|
@@ -1386,14 +1376,35 @@ int signal_deliver(hv_vcpu_t vcpu, guest_t *g, int *exit_code) | |
|
|
||
| /* Deliver to user handler: build rt_sigframe on guest stack */ | ||
|
|
||
| /* 1. Save current vCPU state */ | ||
| /* 1. Save current vCPU state. | ||
| * | ||
| * ELR_EL1/SPSR_EL1 hold the interrupted EL0 return state only while the | ||
| * guest is unwinding a syscall (it is at EL1 in the shim, about to ERET). | ||
| * When the vCPU was preempted while executing EL0 code -- a tight compute | ||
| * loop interrupted by SIGALRM, or the cross-process guest-signal transport | ||
| * (SIGUSR2) firing mid-execution -- the live interrupted state is in | ||
| * HV_REG_PC / HV_REG_CPSR and ELR_EL1 is stale from the previous syscall. | ||
| * Redirecting via ELR_EL1 alone is then a no-op because the resume uses | ||
| * HV_REG_PC, so the handler never runs and the X0..X2 writes below clobber | ||
| * the interrupted registers instead. Detect the EL0-preemption case from | ||
| * the live PSTATE (M[3:0]==0 => EL0t) and use PC for both save and | ||
| * redirect. | ||
| */ | ||
| uint64_t saved_regs[31]; | ||
| uint64_t saved_sp, saved_pc, saved_pstate; | ||
| uint64_t cur_cpsr = 0; | ||
| hv_vcpu_get_reg(vcpu, HV_REG_CPSR, &cur_cpsr); | ||
| bool el0_preempt = (cur_cpsr & 0xfULL) == 0; | ||
|
|
||
| vcpu_snapshot_gprs(vcpu, saved_regs); | ||
| saved_sp = vcpu_get_sysreg(vcpu, HV_SYS_REG_SP_EL0); | ||
| saved_pc = vcpu_get_sysreg(vcpu, HV_SYS_REG_ELR_EL1); | ||
| saved_pstate = vcpu_get_sysreg(vcpu, HV_SYS_REG_SPSR_EL1); | ||
| if (el0_preempt) { | ||
| hv_vcpu_get_reg(vcpu, HV_REG_PC, &saved_pc); | ||
| saved_pstate = cur_cpsr; | ||
| } else { | ||
| saved_pc = vcpu_get_sysreg(vcpu, HV_SYS_REG_ELR_EL1); | ||
| saved_pstate = vcpu_get_sysreg(vcpu, HV_SYS_REG_SPSR_EL1); | ||
| } | ||
|
|
||
| /* 1b. rseq abort: if the thread is in a restartable sequence critical | ||
| * section, abort it. Linux does this on every signal delivery. | ||
|
|
@@ -1549,6 +1560,16 @@ int signal_deliver(hv_vcpu_t vcpu, guest_t *g, int *exit_code) | |
| /* SPSR_EL1: EL0t (user mode) */ | ||
| hv_vcpu_set_sys_reg(vcpu, HV_SYS_REG_SPSR_EL1, 0); | ||
|
|
||
| /* EL0-preemption delivery: the resume runs from HV_REG_PC, not via an | ||
| * ERET that consumes ELR_EL1, so redirect the live PC/PSTATE directly. | ||
| * The ELR_EL1/SPSR_EL1 writes above still cover the rt_sigreturn path, | ||
| * which unwinds back to EL0 through the shim ERET. | ||
| */ | ||
| if (el0_preempt) { | ||
| hv_vcpu_set_reg(vcpu, HV_REG_PC, act->sa_handler); | ||
| hv_vcpu_set_reg(vcpu, HV_REG_CPSR, 0); /* EL0t */ | ||
| } | ||
|
|
||
| /* X0 = signal number */ | ||
| hv_vcpu_set_reg(vcpu, HV_REG_X0, (uint64_t) signum); | ||
|
|
||
|
|
@@ -1590,13 +1611,63 @@ int signal_deliver(hv_vcpu_t vcpu, guest_t *g, int *exit_code) | |
| * shim still has the interrupted syscall frame on its EL1 stack. Tell it | ||
| * to drop that frame so the handler PC/SP/LR/args installed above are not | ||
| * overwritten before ERET. Fault/BRK delivery paths ignore this marker. | ||
| * The EL0-preemption path resumes straight into the handler at EL0 with | ||
| * no shim frame to drop, so the marker is neither needed nor consulted. | ||
| */ | ||
| hv_vcpu_set_reg(vcpu, HV_REG_X8, 2); | ||
| if (!el0_preempt) | ||
| hv_vcpu_set_reg(vcpu, HV_REG_X8, 2); | ||
|
|
||
| pthread_mutex_unlock(&sig_lock); | ||
| return 1; | ||
| } | ||
|
|
||
| int signal_deliver(hv_vcpu_t vcpu, guest_t *g, int *exit_code) | ||
| { | ||
| pthread_mutex_lock(&sig_lock); | ||
| uint64_t *blocked = thread_blocked_ptr(); | ||
| uint64_t deliverable = sig_state.pending & ~*blocked; | ||
| if (deliverable == 0) { | ||
| pthread_mutex_unlock(&sig_lock); | ||
| return 0; | ||
| } | ||
|
|
||
| /* Find lowest pending unblocked signal */ | ||
| int signum = bit_ctz64(deliverable) + 1; | ||
| signal_rt_info_t rt_info = signal_default_info(signum); | ||
|
|
||
| /* Dequeue: for RT signals, decrement count and only clear the | ||
| * pending bit when the queue is empty. Standard signals are | ||
| * always cleared (single instance, bitmask semantics). | ||
| */ | ||
| if (signum >= LINUX_SIGRTMIN) { | ||
| signal_rt_dequeue_locked(signum, &rt_info); | ||
| } else { | ||
| rt_info = signal_standard_peek_locked(signum); | ||
| sig_state.std_info_valid[signum - 1] = false; | ||
| sig_state.pending &= ~sig_bit(signum); | ||
| } | ||
|
|
||
| return deliver_signal_locked(vcpu, g, signum, rt_info, exit_code); | ||
| } | ||
|
|
||
| int signal_deliver_fault(hv_vcpu_t vcpu, guest_t *g, int signum, int *exit_code) | ||
| { | ||
| /* Synchronous faults (SIGSEGV/SIGBUS/SIGILL/SIGFPE/SIGTRAP) are specific to | ||
| * the thread that triggered them and must be delivered to that thread with | ||
| * the thread-local fault info set by signal_set_fault_info(). Routing them | ||
| * through the process-wide pending bitmask (signal_queue + signal_deliver) | ||
| * is racy: another vCPU thread can dequeue the bit and deliver it with no | ||
| * fault info (si_code becomes SI_USER, which makes a JVM treat a recoverable | ||
| * implicit null-check as a fatal external signal), and two threads faulting | ||
| * on the same signal collapse into one bit so one fault is lost. Deliver | ||
| * directly here, never touching sig_state.pending. The blocked mask is | ||
| * intentionally ignored: a synchronous fault cannot be postponed. | ||
| */ | ||
| pthread_mutex_lock(&sig_lock); | ||
| signal_rt_info_t rt_info = signal_default_info(signum); | ||
| return deliver_signal_locked(vcpu, g, signum, rt_info, exit_code); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The comment two lines up calls this "intentionally ignored: a synchronous fault cannot be postponed" -- that's only half the Linux contract.
JVM works either way because it never blocks SIGSEGV, but the contract drift is real. A five-line precheck closes both cases with one shape: int signal_deliver_fault(hv_vcpu_t vcpu, guest_t *g, int signum, int *exit_code)
{
pthread_mutex_lock(&sig_lock);
uint64_t *blocked = thread_blocked_ptr();
linux_sigaction_t *act = &sig_state.actions[signum - 1];
if (act->sa_handler == LINUX_SIG_IGN ||
(*blocked & sig_bit(signum))) {
/* Linux force_sig_info_to_task: forced synchronous faults reset
* disposition to SIG_DFL, unblock the signum, then apply default. */
act->sa_handler = LINUX_SIG_DFL;
*blocked &= ~sig_bit(signum);
}
signal_rt_info_t rt_info = signal_default_info(signum);
return deliver_signal_locked(vcpu, g, signum, rt_info, exit_code);
}Update the "intentionally ignored" comment accordingly. |
||
| } | ||
|
|
||
| /* rt_sigreturn. */ | ||
|
|
||
| int signal_rt_sigreturn(hv_vcpu_t vcpu, guest_t *g) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
P1: When UNKNOWN exits are treated as preemption, rseq abort can read a stale PC and fail to redirect to
abort_ip, leaving a preempted rseq critical section to resume incorrectly. Consider syncing ELR/SPSR fromHV_REG_PC/HV_REG_CPSR(or usingHV_REG_PCdirectly) on UNKNOWN before therseq_try_abort()path.Prompt for AI agents