From f5884e7fa253a0f5e9304bd87a0a4e6ad0e93388 Mon Sep 17 00:00:00 2001 From: Vitalii Parfonov Date: Thu, 11 Jun 2026 16:05:38 +0300 Subject: [PATCH] feat(startup): add --raise-fd-limit CLI flag to raise file descriptor soft limit (#25251) * fix(startup): add --raise-fd-limit CLI flag to raise file descriptor soft limit Add an opt-in --raise-fd-limit flag (and VECTOR_RAISE_FD_LIMIT env var) that raises the RLIMIT_NOFILE soft limit to the hard limit at startup, preventing "Too many open files" errors when monitoring many log files. On macOS, falls back to kern.maxfilesperproc if the hard limit is too high. Co-Authored-By: Claude Opus 4.6 * serialize rlimit tests and reword changelog Add RLIMIT_MUTEX to prevent concurrent rlimit-mutating tests from interfering with each other. Reword changelog to avoid triggering the CI spelling checker. * move fd-limit raise after logging init and isolate tests in subprocesses --------- Co-authored-by: Claude Opus 4.6 --- Cargo.toml | 3 +- changelog.d/raise_fd_limit_cli_flag.fix.md | 7 + src/app.rs | 5 + src/cli.rs | 180 +++++++++++++++++++++ 4 files changed, 194 insertions(+), 1 deletion(-) create mode 100644 changelog.d/raise_fd_limit_cli_flag.fix.md diff --git a/Cargo.toml b/Cargo.toml index 490155984fe90..adabd06a6b09e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -457,7 +457,8 @@ byteorder = "1.5.0" windows-service = "0.8.0" [target.'cfg(unix)'.dependencies] -nix = { version = "0.31", default-features = false, features = ["socket", "signal", "fs"] } +libc.workspace = true +nix = { version = "0.31", default-features = false, features = ["socket", "signal", "fs", "resource"] } [target.'cfg(target_os = "linux")'.dependencies] netlink-packet-utils = "0.5.2" diff --git a/changelog.d/raise_fd_limit_cli_flag.fix.md b/changelog.d/raise_fd_limit_cli_flag.fix.md new file mode 100644 index 0000000000000..10b8c2c306856 --- /dev/null +++ b/changelog.d/raise_fd_limit_cli_flag.fix.md @@ -0,0 +1,7 @@ +A new `--raise-fd-limit` CLI flag (or `VECTOR_RAISE_FD_LIMIT` environment variable) +raises the file descriptor soft limit to the hard limit at startup. This prevents +"Too many open files" errors when Vector monitors large numbers of log files. On +macOS, Vector falls back to the kernel-enforced per-process file limit if the hard +limit is too high. + +authors: vparfonov diff --git a/src/app.rs b/src/app.rs index 9e8a415608a8f..45066a590a57a 100644 --- a/src/app.rs +++ b/src/app.rs @@ -207,6 +207,11 @@ impl Application { opts.root.internal_log_rate_limit, ); + #[cfg(unix)] + if opts.root.raise_fd_limit { + crate::cli::raise_file_descriptor_limit(); + } + // Set global color preference for downstream modules crate::set_global_color(color); diff --git a/src/cli.rs b/src/cli.rs index 2282d22f3689c..7778798212868 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -260,6 +260,15 @@ pub struct RootOpts { /// `--watch-config`. #[arg(long, env = "VECTOR_ALLOW_EMPTY_CONFIG", default_value = "false")] pub allow_empty_config: bool, + + /// Raise the file descriptor soft limit (RLIMIT_NOFILE) to the hard limit at startup. + /// + /// Many systems default the soft limit to 1024 (Linux) or 256 (macOS), which is too low + /// when Vector monitors large numbers of log files. This flag raises the soft limit to + /// prevent "Too many open files" errors without requiring manual sysadmin intervention. + #[cfg(unix)] + #[arg(long, env = "VECTOR_RAISE_FD_LIMIT", default_value = "false")] + pub raise_fd_limit: bool, } impl RootOpts { @@ -291,6 +300,89 @@ impl RootOpts { } } +/// Raise the soft file descriptor limit (RLIMIT_NOFILE) as high as the OS allows. +/// +/// Many systems default the soft limit to 1024 (Linux) or 256 (macOS), which is too low +/// for Vector when it monitors large numbers of log files. Raising it prevents +/// "Too many open files (os error 24)" errors without requiring manual sysadmin intervention. +/// +/// On Linux, the soft limit is raised to the hard limit (typically 65536+). +/// On macOS, the hard limit can be RLIM_INFINITY, so we first try the hard limit, +/// then fall back to the kernel-enforced `kern.maxfilesperproc` (typically 10240). +#[cfg(unix)] +pub(crate) fn raise_file_descriptor_limit() { + use nix::sys::resource::{Resource, getrlimit, setrlimit}; + use tracing::{info, warn}; + + let (soft, hard) = match getrlimit(Resource::RLIMIT_NOFILE) { + Ok(limits) => limits, + Err(err) => { + warn!(message = "Failed to get file descriptor limit.", %err); + return; + } + }; + + if soft >= hard { + return; // Already at maximum + } + + // Try setting soft limit to hard limit (works on Linux, may fail on macOS) + if setrlimit(Resource::RLIMIT_NOFILE, hard, hard).is_ok() { + info!( + message = "Raised file descriptor limit.", + from = soft, + to = hard, + ); + return; + } + + // On macOS, the hard limit can be RLIM_INFINITY which setrlimit rejects. + // Fall back to the kernel-enforced kern.maxfilesperproc. + #[cfg(target_os = "macos")] + { + if let Some(maxfiles) = macos_maxfilesperproc() + && maxfiles > soft + && setrlimit(Resource::RLIMIT_NOFILE, maxfiles, hard).is_ok() + { + info!( + message = "Raised file descriptor limit.", + from = soft, + to = maxfiles, + ); + return; + } + } + + warn!( + message = "Failed to raise file descriptor limit.", + current = soft, + attempted = hard, + ); +} + +/// Query the macOS kernel limit on per-process open files. +#[cfg(target_os = "macos")] +fn macos_maxfilesperproc() -> Option { + let mut maxfiles: libc::c_int = 0; + let mut len = std::mem::size_of::() as libc::size_t; + // Safety: sysctlbyname with a valid null-terminated name and correctly sized output buffer. + // No safe wrapper exists for this macOS-specific call. + let ret = unsafe { + libc::sysctlbyname( + c"kern.maxfilesperproc".as_ptr(), + &mut maxfiles as *mut libc::c_int as *mut libc::c_void, + &mut len, + std::ptr::null_mut(), + 0, + ) + }; + if ret == 0 && maxfiles > 0 { + Some(maxfiles as libc::rlim_t) + } else { + None + } +} + #[derive(Parser, Debug)] #[command(rename_all = "kebab-case")] pub enum SubCommand { @@ -424,3 +516,91 @@ pub fn handle_config_errors(errors: Vec) -> exitcode::ExitCode { exitcode::CONFIG } + +#[cfg(test)] +mod tests { + #[cfg(unix)] + fn run_in_subprocess(test_name: &str) { + let exe = std::env::current_exe().unwrap(); + let output = std::process::Command::new(exe) + .env("__VECTOR_SUBPROCESS_TEST", "1") + .args(["--exact", test_name, "--nocapture"]) + .output() + .unwrap(); + assert!( + output.status.success(), + "subprocess test failed:\nstdout: {}\nstderr: {}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr), + ); + } + + #[test] + #[cfg(unix)] + fn test_raise_file_descriptor_limit() { + if std::env::var("__VECTOR_SUBPROCESS_TEST").is_err() { + run_in_subprocess("cli::tests::test_raise_file_descriptor_limit"); + return; + } + + use nix::sys::resource::{Resource, getrlimit, setrlimit}; + + let (original_soft, hard) = getrlimit(Resource::RLIMIT_NOFILE).unwrap(); + let lowered = std::cmp::min(original_soft, 256); + if lowered < hard { + setrlimit(Resource::RLIMIT_NOFILE, lowered, hard).unwrap(); + + let (soft_before, _) = getrlimit(Resource::RLIMIT_NOFILE).unwrap(); + assert_eq!(soft_before, lowered); + + super::raise_file_descriptor_limit(); + + let (soft_after, _) = getrlimit(Resource::RLIMIT_NOFILE).unwrap(); + assert!( + soft_after > lowered, + "Expected soft limit to be raised above {lowered}, got {soft_after}" + ); + } + } + + #[test] + #[cfg(unix)] + fn test_raise_file_descriptor_limit_already_at_max() { + if std::env::var("__VECTOR_SUBPROCESS_TEST").is_err() { + run_in_subprocess("cli::tests::test_raise_file_descriptor_limit_already_at_max"); + return; + } + + use nix::sys::resource::{Resource, getrlimit, setrlimit}; + + let (_, hard) = getrlimit(Resource::RLIMIT_NOFILE).unwrap(); + + if setrlimit(Resource::RLIMIT_NOFILE, hard, hard).is_err() { + #[cfg(target_os = "macos")] + if let Some(maxfiles) = super::macos_maxfilesperproc() { + let _ = setrlimit(Resource::RLIMIT_NOFILE, maxfiles, hard); + } + } + + let (soft_before, _) = getrlimit(Resource::RLIMIT_NOFILE).unwrap(); + + super::raise_file_descriptor_limit(); + + let (soft_after, _) = getrlimit(Resource::RLIMIT_NOFILE).unwrap(); + assert_eq!(soft_before, soft_after); + } + + #[test] + #[cfg(target_os = "macos")] + fn test_macos_maxfilesperproc_returns_positive() { + let result = super::macos_maxfilesperproc(); + assert!( + result.is_some(), + "macos_maxfilesperproc() should return Some on macOS" + ); + assert!( + result.unwrap() > 0, + "kern.maxfilesperproc should be positive" + ); + } +}