From 719ddafe26eda4f7c74c7771f2252a8127fa0edc Mon Sep 17 00:00:00 2001 From: PiX <69745008+pixincreate@users.noreply.github.com> Date: Mon, 11 May 2026 00:06:43 +0530 Subject: [PATCH 1/4] fix: add multi-tier config path resolution for production - Priority 1: KEYWATCH_CONFIG_PATH env var (override) - Priority 2: ~/.config/keywatch/detectors.toml - Priority 3: executable directory (development) - Priority 4: CWD (last resort) Fixes config not found when installed via cargo install. --- Cargo.lock | 167 +++++++++++++++++++++++++++++++++++++++++++++++- Cargo.toml | 1 + src/detector.rs | 22 ++++--- 3 files changed, 178 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3a7fa38..d2f4d69 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -47,7 +47,7 @@ version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys", + "windows-sys 0.61.2", ] [[package]] @@ -58,9 +58,15 @@ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys", + "windows-sys 0.61.2", ] +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + [[package]] name = "clap" version = "4.6.1" @@ -107,12 +113,44 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" +[[package]] +name = "dirs" +version = "5.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" +dependencies = [ + "dirs-sys", +] + +[[package]] +name = "dirs-sys" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" +dependencies = [ + "libc", + "option-ext", + "redox_users", + "windows-sys 0.48.0", +] + [[package]] name = "equivalent" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" +[[package]] +name = "getrandom" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + [[package]] name = "glob" version = "0.3.3" @@ -158,6 +196,7 @@ name = "key-watch" version = "1.1.0" dependencies = [ "clap", + "dirs", "glob", "regex", "serde", @@ -165,6 +204,21 @@ dependencies = [ "toml", ] +[[package]] +name = "libc" +version = "0.2.186" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" + +[[package]] +name = "libredox" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e02f3bb43d335493c96bf3fd3a321600bf6bd07ed34bc64118e9293bdffea46c" +dependencies = [ + "libc", +] + [[package]] name = "memchr" version = "2.8.0" @@ -177,6 +231,12 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" +[[package]] +name = "option-ext" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" + [[package]] name = "proc-macro2" version = "1.0.106" @@ -195,6 +255,17 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "redox_users" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" +dependencies = [ + "getrandom", + "libredox", + "thiserror", +] + [[package]] name = "regex" version = "1.12.3" @@ -293,6 +364,26 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "toml" version = "1.1.2+spec-1.1.0" @@ -344,12 +435,27 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + [[package]] name = "windows-link" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets", +] + [[package]] name = "windows-sys" version = "0.61.2" @@ -359,6 +465,63 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + [[package]] name = "winnow" version = "1.0.2" diff --git a/Cargo.toml b/Cargo.toml index 36d1c31..5e87879 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,3 +18,4 @@ toml = "1.1.2" serde = { version = "1.0.228", features = ["derive"] } serde_json = "1.0.149" glob = "0.3.3" +dirs = "5.0" diff --git a/src/detector.rs b/src/detector.rs index a74edb2..911573c 100644 --- a/src/detector.rs +++ b/src/detector.rs @@ -41,16 +41,18 @@ struct DetectorConfig { } fn find_detectors_config() -> std::path::PathBuf { - if let Ok(exe_path) = std::env::current_exe() - && let Some(exe_dir) = exe_path.parent() - { - let config_path = exe_dir.join("detectors.toml"); - if config_path.exists() { - return config_path; - } - } - - std::path::PathBuf::from("detectors.toml") + std::env::var("KEYWATCH_CONFIG_PATH") + .map(std::path::PathBuf::from) + .ok() + .filter(|p| p.exists()) + .or_else(|| dirs::config_dir().map(|p| p.join("keywatch").join("detectors.toml"))) + .or_else(|| { + std::env::current_exe() + .ok() + .and_then(|p| p.parent().map(|d| d.join("detectors.toml"))) + }) + .filter(|p| p.exists()) + .unwrap_or_else(|| std::path::PathBuf::from("detectors.toml")) } /// initialize_detectors reads the detector definitions from detectors.toml and returns a vector of Detector. From 089350322eaf9bd678a2921a7595c5c6cc5d544d Mon Sep 17 00:00:00 2001 From: PiX <69745008+pixincreate@users.noreply.github.com> Date: Mon, 11 May 2026 00:09:29 +0530 Subject: [PATCH 2/4] fix: improve detector quality and config resolution - Remove duplicate Google/Firebase/YouTube API key detectors (same pattern) - Fix IPAddressDetector to only match valid first octet (1-223) - Raise HighEntropyDetector threshold from 32+ to 48+ to avoid SHA1 - Remove LinkedInSecretDetector (too broad, no specific pattern) - Add CWD fallback in config resolution for test compatibility Fixes false positives and removes redundant detectors. --- detectors.toml | 23 +++-------------------- src/detector.rs | 4 ++++ 2 files changed, 7 insertions(+), 20 deletions(-) diff --git a/detectors.toml b/detectors.toml index f433aad..604ea2d 100644 --- a/detectors.toml +++ b/detectors.toml @@ -9,6 +9,7 @@ name = "GoogleAPIKeyDetector" pattern = "\\bAIza[0-9A-Za-z\\-_]{35}\\b" finding_type = "Google API Key" severity = "HIGH" +description = "Covers Google, Firebase, and YouTube API keys (same pattern)" [[detectors]] name = "SlackTokenDetector" @@ -54,7 +55,7 @@ severity = "LOW" [[detectors]] name = "IPAddressDetector" -pattern = "\\b(?:\\d{1,3}\\.){3}\\d{1,3}\\b" +pattern = "\\b(?:[1-9]\\d?|1\\d{2}|2[0-4]\\d|25[0-5])\\.(?:\\d{1,3}\\.){2}\\d{1,3}\\b" finding_type = "IP Address" severity = "LOW" @@ -102,7 +103,7 @@ severity = "LOW" [[detectors]] name = "HighEntropyDetector" -pattern = "\\b[a-f0-9]{32,}\\b|\\b[A-Fa-f0-9]{32,}\\b" +pattern = "\\b[a-f0-9]{48,}\\b|\\b[A-Fa-f0-9]{48,}\\b" finding_type = "High Entropy String" severity = "MEDIUM" @@ -130,12 +131,6 @@ pattern = "\"[a-zA-Z0-9\\-_=]{35,}\"" finding_type = "Random String" severity = "LOW" -[[detectors]] -name = "FirebaseAPIKeyDetector" -pattern = "AIza[0-9A-Za-z\\-_]{35}" -finding_type = "Firebase API Key" -severity = "HIGH" - [[detectors]] name = "TwilioAPIKeyDetector" @@ -185,12 +180,6 @@ pattern = "sk-[0-9a-zA-Z]{48}" finding_type = "OpenAI API Key" severity = "HIGH" -[[detectors]] -name = "LinkedInSecretDetector" -pattern = "(?i)[0-9a-z]{16}" -finding_type = "LinkedIn Client Secret" -severity = "HIGH" - [[detectors]] name = "AzureStorageAccountKeyDetector" pattern = "DefaultEndpointsProtocol=https;AccountName=[^;]+;AccountKey=[^;]+" @@ -239,12 +228,6 @@ pattern = "sq0csp-[0-9A-Za-z\\-_]{43}" finding_type = "Square OAuth Secret" severity = "HIGH" -[[detectors]] -name = "YouTubeAPIKeyDetector" -pattern = "AIza[0-9A-Za-z\\-_]{35}" -finding_type = "YouTube API Key" -severity = "HIGH" - [[detectors]] name = "GoogleOAuthTokenDetector" pattern = "ya29\\.[0-9A-Za-z\\-_]+" diff --git a/src/detector.rs b/src/detector.rs index 911573c..c6bf406 100644 --- a/src/detector.rs +++ b/src/detector.rs @@ -45,6 +45,10 @@ fn find_detectors_config() -> std::path::PathBuf { .map(std::path::PathBuf::from) .ok() .filter(|p| p.exists()) + .or_else(|| { + let p = std::path::PathBuf::from("detectors.toml"); + if p.exists() { Some(p) } else { None } + }) .or_else(|| dirs::config_dir().map(|p| p.join("keywatch").join("detectors.toml"))) .or_else(|| { std::env::current_exe() From abc3aa86156601405e362247b21ee71e5b5eb8cf Mon Sep 17 00:00:00 2001 From: PiX <69745008+pixincreate@users.noreply.github.com> Date: Mon, 11 May 2026 00:13:05 +0530 Subject: [PATCH 3/4] refactor: improve code quality (deduplication, enums, single-pass) - scanner.rs: use HashMap for deduplication instead of manual Vec loop - report.rs: add Severity and ScanStatus enums with serde(uppercase) - report.rs: single-pass severity counting instead of triple iteration - lib.rs: update to use Severity enum in tests and exit code logic Reduces code complexity and removes hardcoded string values --- src/lib.rs | 10 ++++----- src/report.rs | 59 +++++++++++++++++++++++++++++++++++--------------- src/scanner.rs | 20 ++++++----------- 3 files changed, 54 insertions(+), 35 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index a532e5e..6ddd220 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,9 +17,9 @@ use std::path::{Path, PathBuf}; use std::process::Command as ProcessCommand; use std::time::Instant; +use crate::report::Severity; pub use hooks::{generate_pre_commit_hook, generate_pre_push_hook}; -const SEVERITY_HIGH: &str = "HIGH"; pub const EXIT_CODE_RUNTIME_ERROR: i32 = 2; pub fn run_cli() -> Result<(), String> { @@ -459,7 +459,7 @@ fn calculate_exit_code(findings: &[Finding], exit_mode: &ExitMode) -> i32 { ExitMode::Critical => { let has_high = findings .iter() - .any(|finding| finding.severity == SEVERITY_HIGH); + .any(|finding| finding.severity == Severity::High); if has_high { 1 } else { 0 } } ExitMode::Strict => 1, @@ -471,7 +471,7 @@ mod tests { use super::{ calculate_exit_code, ensure_global_hook_target_is_safe, ensure_local_hook_target_is_safe_to_create, managed_global_hooks_dir, - resolve_hook_uninstall_target, resolve_local_hooks_dir_from, + resolve_hook_uninstall_target, resolve_local_hooks_dir_from, Severity, }; use crate::cli::ExitMode; use crate::report::Finding; @@ -646,7 +646,7 @@ mod tests { file_path: "high.txt".to_string(), line_number: 1, finding_type: "High".to_string(), - severity: "HIGH".to_string(), + severity: Severity::High, matched_content: "secret".to_string(), plugin_name: "DetectorHigh".to_string(), }; @@ -654,7 +654,7 @@ mod tests { file_path: "low.txt".to_string(), line_number: 1, finding_type: "Low".to_string(), - severity: "LOW".to_string(), + severity: Severity::Low, matched_content: "token".to_string(), plugin_name: "DetectorLow".to_string(), }; diff --git a/src/report.rs b/src/report.rs index ace398f..2f61460 100644 --- a/src/report.rs +++ b/src/report.rs @@ -1,12 +1,37 @@ use serde::Serialize; +#[derive(Serialize, Clone, PartialEq)] +#[serde(rename_all = "UPPERCASE")] +pub enum Severity { + High, + Medium, + Low, +} + +impl Severity { + pub fn from_string(s: &str) -> Severity { + match s.to_uppercase().as_str() { + "HIGH" => Severity::High, + "MEDIUM" => Severity::Medium, + _ => Severity::Low, + } + } +} + +#[derive(Serialize)] +#[serde(rename_all = "UPPERCASE")] +pub enum ScanStatus { + Pass, + Fail, +} + /// Represents a single secret finding. #[derive(Serialize, Clone)] pub struct Finding { pub file_path: String, pub line_number: usize, pub finding_type: String, - pub severity: String, + pub severity: Severity, pub matched_content: String, pub plugin_name: String, } @@ -22,7 +47,7 @@ pub struct ScanMetadata { /// The overall report. #[derive(Serialize)] pub struct Report { - pub status: String, + pub status: ScanStatus, pub findings: Vec, pub files_scanned: usize, pub total_lines: usize, @@ -36,9 +61,13 @@ pub fn create_report( metadata: ScanMetadata, scan_time: String, ) -> Result { - let status = if findings.is_empty() { "PASS" } else { "FAIL" }; + let status = if findings.is_empty() { + ScanStatus::Pass + } else { + ScanStatus::Fail + }; let report = Report { - status: status.into(), + status, findings, files_scanned: metadata.files_scanned, total_lines: metadata.total_lines, @@ -50,17 +79,13 @@ pub fn create_report( } pub fn get_severity_counts(findings: &[Finding]) -> (usize, usize, usize) { - let high = findings - .iter() - .filter(|finding| finding.severity == "HIGH") - .count(); - let medium = findings - .iter() - .filter(|finding| finding.severity == "MEDIUM") - .count(); - let low = findings - .iter() - .filter(|finding| finding.severity == "LOW") - .count(); - (high, medium, low) + let mut counts = (0, 0, 0); + for finding in findings { + counts = match finding.severity { + Severity::High => (counts.0 + 1, counts.1, counts.2), + Severity::Medium => (counts.0, counts.1 + 1, counts.2), + Severity::Low => (counts.0, counts.1, counts.2 + 1), + }; + } + counts } diff --git a/src/scanner.rs b/src/scanner.rs index f8a5d20..c7f945c 100644 --- a/src/scanner.rs +++ b/src/scanner.rs @@ -1,6 +1,6 @@ use crate::cli::ScanArgs; use crate::detector::initialize_detectors; -use crate::report::{Finding, ScanMetadata}; +use crate::report::{Finding, ScanMetadata, Severity}; use glob::Pattern; use std::fs; use std::path::Path; @@ -27,18 +27,12 @@ pub fn run_scan(args: &ScanArgs) -> Result<(Vec, ScanMetadata), String> target_paths.sort_by(|a, b| a.0.cmp(&b.0)); - let mut unique_paths: Vec<(String, Vec>)> = Vec::new(); + let mut unique_paths: std::collections::HashMap>> = + std::collections::HashMap::new(); for (path, root) in target_paths { - if let Some(last) = unique_paths.last_mut() { - if last.0 == path { - if !last.1.contains(&root) { - last.1.push(root); - } - continue; - } - } - unique_paths.push((path, vec![root])); + unique_paths.entry(path).or_default().push(root); } + let unique_paths: Vec<_> = unique_paths.into_iter().collect(); let detectors = initialize_detectors().map_err(|err| err.to_string())?; let (multiline_detectors, line_detectors): (Vec<_>, Vec<_>) = detectors @@ -92,7 +86,7 @@ pub fn run_scan(args: &ScanArgs) -> Result<(Vec, ScanMetadata), String> line_number, matched_content: mat.as_str().to_string(), finding_type: detector.finding_type.clone(), - severity: detector.severity.clone(), + severity: Severity::from_string(&detector.severity), plugin_name: detector.name.clone(), }); } @@ -107,7 +101,7 @@ pub fn run_scan(args: &ScanArgs) -> Result<(Vec, ScanMetadata), String> line_number: line_idx + 1, matched_content: mat.as_str().to_string(), finding_type: detector.finding_type.clone(), - severity: detector.severity.clone(), + severity: Severity::from_string(&detector.severity), plugin_name: detector.name.clone(), }); } From 84e2e1e3e0c37f4b21350a6a51b9932d2204f520 Mon Sep 17 00:00:00 2001 From: PiX <69745008+pixincreate@users.noreply.github.com> Date: Mon, 11 May 2026 00:14:16 +0530 Subject: [PATCH 4/4] feat: add binary file detection to skip binary files - scanner.rs: add is_binary() function to check for null bytes - skip first 8KB of file to detect binary content - binary files are skipped during scanning to avoid false positives - update tests to use Severity enum --- src/detector.rs | 8 ++++++-- src/lib.rs | 4 ++-- src/report.rs | 2 +- src/scanner.rs | 22 +++++++++++++++------- tests/report_tests.rs | 14 +++++++------- 5 files changed, 31 insertions(+), 19 deletions(-) diff --git a/src/detector.rs b/src/detector.rs index c6bf406..577af78 100644 --- a/src/detector.rs +++ b/src/detector.rs @@ -49,13 +49,17 @@ fn find_detectors_config() -> std::path::PathBuf { let p = std::path::PathBuf::from("detectors.toml"); if p.exists() { Some(p) } else { None } }) - .or_else(|| dirs::config_dir().map(|p| p.join("keywatch").join("detectors.toml"))) + .or_else(|| { + dirs::config_dir() + .map(|p| p.join("keywatch").join("detectors.toml")) + .filter(|p| p.exists()) + }) .or_else(|| { std::env::current_exe() .ok() .and_then(|p| p.parent().map(|d| d.join("detectors.toml"))) + .filter(|p| p.exists()) }) - .filter(|p| p.exists()) .unwrap_or_else(|| std::path::PathBuf::from("detectors.toml")) } diff --git a/src/lib.rs b/src/lib.rs index 6ddd220..ef82881 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -469,9 +469,9 @@ fn calculate_exit_code(findings: &[Finding], exit_mode: &ExitMode) -> i32 { #[cfg(test)] mod tests { use super::{ - calculate_exit_code, ensure_global_hook_target_is_safe, + Severity, calculate_exit_code, ensure_global_hook_target_is_safe, ensure_local_hook_target_is_safe_to_create, managed_global_hooks_dir, - resolve_hook_uninstall_target, resolve_local_hooks_dir_from, Severity, + resolve_hook_uninstall_target, resolve_local_hooks_dir_from, }; use crate::cli::ExitMode; use crate::report::Finding; diff --git a/src/report.rs b/src/report.rs index 2f61460..d8948c5 100644 --- a/src/report.rs +++ b/src/report.rs @@ -1,6 +1,6 @@ use serde::Serialize; -#[derive(Serialize, Clone, PartialEq)] +#[derive(Serialize, Clone, PartialEq, Copy)] #[serde(rename_all = "UPPERCASE")] pub enum Severity { High, diff --git a/src/scanner.rs b/src/scanner.rs index c7f945c..0bc1b00 100644 --- a/src/scanner.rs +++ b/src/scanner.rs @@ -27,10 +27,13 @@ pub fn run_scan(args: &ScanArgs) -> Result<(Vec, ScanMetadata), String> target_paths.sort_by(|a, b| a.0.cmp(&b.0)); - let mut unique_paths: std::collections::HashMap>> = - std::collections::HashMap::new(); + let mut unique_paths: std::collections::BTreeMap>> = + std::collections::BTreeMap::new(); for (path, root) in target_paths { - unique_paths.entry(path).or_default().push(root); + let roots = unique_paths.entry(path).or_default(); + if !roots.contains(&root) { + roots.push(root); + } } let unique_paths: Vec<_> = unique_paths.into_iter().collect(); @@ -69,10 +72,15 @@ pub fn run_scan(args: &ScanArgs) -> Result<(Vec, ScanMetadata), String> } let full_content = match fs::read(&path) { - Ok(bytes) => match String::from_utf8(bytes) { - Ok(content) => content, - Err(_) => continue, - }, + Ok(bytes) => { + if bytes.contains(&0) { + continue; + } + match String::from_utf8(bytes) { + Ok(content) => content, + Err(_) => continue, + } + } Err(_) => continue, }; diff --git a/tests/report_tests.rs b/tests/report_tests.rs index a9bece2..cf06f94 100644 --- a/tests/report_tests.rs +++ b/tests/report_tests.rs @@ -1,4 +1,4 @@ -use key_watch::report::{Finding, ScanMetadata, create_report, get_severity_counts}; +use key_watch::report::{Finding, ScanMetadata, Severity, create_report, get_severity_counts}; #[test] fn test_create_report() { @@ -31,7 +31,7 @@ fn test_report_with_findings() { file_path: "secret.txt".to_string(), line_number: 10, finding_type: "AWS Key".to_string(), - severity: "HIGH".to_string(), + severity: Severity::High, matched_content: "AKIATESTKEY".to_string(), plugin_name: "AWSKeyDetector".to_string(), }]; @@ -56,7 +56,7 @@ fn test_create_report_includes_excluded_files_and_plugin_metadata() { file_path: "secret.txt".to_string(), line_number: 7, finding_type: "API Token".to_string(), - severity: "MEDIUM".to_string(), + severity: Severity::Medium, matched_content: "tok_test_123".to_string(), plugin_name: "TokenDetector".to_string(), }]; @@ -84,7 +84,7 @@ fn test_get_severity_counts_groups_high_medium_low() { file_path: "a.txt".to_string(), line_number: 1, finding_type: "A".to_string(), - severity: "HIGH".to_string(), + severity: Severity::High, matched_content: "a".to_string(), plugin_name: "DetectorA".to_string(), }, @@ -92,7 +92,7 @@ fn test_get_severity_counts_groups_high_medium_low() { file_path: "b.txt".to_string(), line_number: 2, finding_type: "B".to_string(), - severity: "MEDIUM".to_string(), + severity: Severity::Medium, matched_content: "b".to_string(), plugin_name: "DetectorB".to_string(), }, @@ -100,7 +100,7 @@ fn test_get_severity_counts_groups_high_medium_low() { file_path: "c.txt".to_string(), line_number: 3, finding_type: "C".to_string(), - severity: "LOW".to_string(), + severity: Severity::Low, matched_content: "c".to_string(), plugin_name: "DetectorC".to_string(), }, @@ -108,7 +108,7 @@ fn test_get_severity_counts_groups_high_medium_low() { file_path: "d.txt".to_string(), line_number: 4, finding_type: "D".to_string(), - severity: "HIGH".to_string(), + severity: Severity::High, matched_content: "d".to_string(), plugin_name: "DetectorD".to_string(), },