diff --git a/Cargo.toml b/Cargo.toml index 490155984fe90..adabd06a6b09e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -457,7 +457,8 @@ byteorder = "1.5.0" windows-service = "0.8.0" [target.'cfg(unix)'.dependencies] -nix = { version = "0.31", default-features = false, features = ["socket", "signal", "fs"] } +libc.workspace = true +nix = { version = "0.31", default-features = false, features = ["socket", "signal", "fs", "resource"] } [target.'cfg(target_os = "linux")'.dependencies] netlink-packet-utils = "0.5.2" diff --git a/changelog.d/raise_fd_limit_cli_flag.fix.md b/changelog.d/raise_fd_limit_cli_flag.fix.md new file mode 100644 index 0000000000000..10b8c2c306856 --- /dev/null +++ b/changelog.d/raise_fd_limit_cli_flag.fix.md @@ -0,0 +1,7 @@ +A new `--raise-fd-limit` CLI flag (or `VECTOR_RAISE_FD_LIMIT` environment variable) +raises the file descriptor soft limit to the hard limit at startup. This prevents +"Too many open files" errors when Vector monitors large numbers of log files. On +macOS, Vector falls back to the kernel-enforced per-process file limit if the hard +limit is too high. + +authors: vparfonov diff --git a/src/app.rs b/src/app.rs index 9e8a415608a8f..45066a590a57a 100644 --- a/src/app.rs +++ b/src/app.rs @@ -207,6 +207,11 @@ impl Application { opts.root.internal_log_rate_limit, ); + #[cfg(unix)] + if opts.root.raise_fd_limit { + crate::cli::raise_file_descriptor_limit(); + } + // Set global color preference for downstream modules crate::set_global_color(color); diff --git a/src/cli.rs b/src/cli.rs index 2282d22f3689c..7778798212868 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -260,6 +260,15 @@ pub struct RootOpts { /// `--watch-config`. #[arg(long, env = "VECTOR_ALLOW_EMPTY_CONFIG", default_value = "false")] pub allow_empty_config: bool, + + /// Raise the file descriptor soft limit (RLIMIT_NOFILE) to the hard limit at startup. + /// + /// Many systems default the soft limit to 1024 (Linux) or 256 (macOS), which is too low + /// when Vector monitors large numbers of log files. This flag raises the soft limit to + /// prevent "Too many open files" errors without requiring manual sysadmin intervention. + #[cfg(unix)] + #[arg(long, env = "VECTOR_RAISE_FD_LIMIT", default_value = "false")] + pub raise_fd_limit: bool, } impl RootOpts { @@ -291,6 +300,89 @@ impl RootOpts { } } +/// Raise the soft file descriptor limit (RLIMIT_NOFILE) as high as the OS allows. +/// +/// Many systems default the soft limit to 1024 (Linux) or 256 (macOS), which is too low +/// for Vector when it monitors large numbers of log files. Raising it prevents +/// "Too many open files (os error 24)" errors without requiring manual sysadmin intervention. +/// +/// On Linux, the soft limit is raised to the hard limit (typically 65536+). +/// On macOS, the hard limit can be RLIM_INFINITY, so we first try the hard limit, +/// then fall back to the kernel-enforced `kern.maxfilesperproc` (typically 10240). +#[cfg(unix)] +pub(crate) fn raise_file_descriptor_limit() { + use nix::sys::resource::{Resource, getrlimit, setrlimit}; + use tracing::{info, warn}; + + let (soft, hard) = match getrlimit(Resource::RLIMIT_NOFILE) { + Ok(limits) => limits, + Err(err) => { + warn!(message = "Failed to get file descriptor limit.", %err); + return; + } + }; + + if soft >= hard { + return; // Already at maximum + } + + // Try setting soft limit to hard limit (works on Linux, may fail on macOS) + if setrlimit(Resource::RLIMIT_NOFILE, hard, hard).is_ok() { + info!( + message = "Raised file descriptor limit.", + from = soft, + to = hard, + ); + return; + } + + // On macOS, the hard limit can be RLIM_INFINITY which setrlimit rejects. + // Fall back to the kernel-enforced kern.maxfilesperproc. + #[cfg(target_os = "macos")] + { + if let Some(maxfiles) = macos_maxfilesperproc() + && maxfiles > soft + && setrlimit(Resource::RLIMIT_NOFILE, maxfiles, hard).is_ok() + { + info!( + message = "Raised file descriptor limit.", + from = soft, + to = maxfiles, + ); + return; + } + } + + warn!( + message = "Failed to raise file descriptor limit.", + current = soft, + attempted = hard, + ); +} + +/// Query the macOS kernel limit on per-process open files. +#[cfg(target_os = "macos")] +fn macos_maxfilesperproc() -> Option { + let mut maxfiles: libc::c_int = 0; + let mut len = std::mem::size_of::() as libc::size_t; + // Safety: sysctlbyname with a valid null-terminated name and correctly sized output buffer. + // No safe wrapper exists for this macOS-specific call. + let ret = unsafe { + libc::sysctlbyname( + c"kern.maxfilesperproc".as_ptr(), + &mut maxfiles as *mut libc::c_int as *mut libc::c_void, + &mut len, + std::ptr::null_mut(), + 0, + ) + }; + if ret == 0 && maxfiles > 0 { + Some(maxfiles as libc::rlim_t) + } else { + None + } +} + #[derive(Parser, Debug)] #[command(rename_all = "kebab-case")] pub enum SubCommand { @@ -424,3 +516,91 @@ pub fn handle_config_errors(errors: Vec) -> exitcode::ExitCode { exitcode::CONFIG } + +#[cfg(test)] +mod tests { + #[cfg(unix)] + fn run_in_subprocess(test_name: &str) { + let exe = std::env::current_exe().unwrap(); + let output = std::process::Command::new(exe) + .env("__VECTOR_SUBPROCESS_TEST", "1") + .args(["--exact", test_name, "--nocapture"]) + .output() + .unwrap(); + assert!( + output.status.success(), + "subprocess test failed:\nstdout: {}\nstderr: {}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr), + ); + } + + #[test] + #[cfg(unix)] + fn test_raise_file_descriptor_limit() { + if std::env::var("__VECTOR_SUBPROCESS_TEST").is_err() { + run_in_subprocess("cli::tests::test_raise_file_descriptor_limit"); + return; + } + + use nix::sys::resource::{Resource, getrlimit, setrlimit}; + + let (original_soft, hard) = getrlimit(Resource::RLIMIT_NOFILE).unwrap(); + let lowered = std::cmp::min(original_soft, 256); + if lowered < hard { + setrlimit(Resource::RLIMIT_NOFILE, lowered, hard).unwrap(); + + let (soft_before, _) = getrlimit(Resource::RLIMIT_NOFILE).unwrap(); + assert_eq!(soft_before, lowered); + + super::raise_file_descriptor_limit(); + + let (soft_after, _) = getrlimit(Resource::RLIMIT_NOFILE).unwrap(); + assert!( + soft_after > lowered, + "Expected soft limit to be raised above {lowered}, got {soft_after}" + ); + } + } + + #[test] + #[cfg(unix)] + fn test_raise_file_descriptor_limit_already_at_max() { + if std::env::var("__VECTOR_SUBPROCESS_TEST").is_err() { + run_in_subprocess("cli::tests::test_raise_file_descriptor_limit_already_at_max"); + return; + } + + use nix::sys::resource::{Resource, getrlimit, setrlimit}; + + let (_, hard) = getrlimit(Resource::RLIMIT_NOFILE).unwrap(); + + if setrlimit(Resource::RLIMIT_NOFILE, hard, hard).is_err() { + #[cfg(target_os = "macos")] + if let Some(maxfiles) = super::macos_maxfilesperproc() { + let _ = setrlimit(Resource::RLIMIT_NOFILE, maxfiles, hard); + } + } + + let (soft_before, _) = getrlimit(Resource::RLIMIT_NOFILE).unwrap(); + + super::raise_file_descriptor_limit(); + + let (soft_after, _) = getrlimit(Resource::RLIMIT_NOFILE).unwrap(); + assert_eq!(soft_before, soft_after); + } + + #[test] + #[cfg(target_os = "macos")] + fn test_macos_maxfilesperproc_returns_positive() { + let result = super::macos_maxfilesperproc(); + assert!( + result.is_some(), + "macos_maxfilesperproc() should return Some on macOS" + ); + assert!( + result.unwrap() > 0, + "kern.maxfilesperproc should be positive" + ); + } +}