From 23b953511d89514b3e9feb1032eb85d2df607112 Mon Sep 17 00:00:00 2001 From: Alex Holmberg Date: Sat, 7 Jun 2025 23:10:06 +0200 Subject: [PATCH 1/6] fix: re-applying release-plz for bump improvements --- .github/workflows/release-plz.yml | 79 +++---------------------------- release-plz.toml | 54 --------------------- 2 files changed, 6 insertions(+), 127 deletions(-) delete mode 100644 release-plz.toml diff --git a/.github/workflows/release-plz.yml b/.github/workflows/release-plz.yml index 4d19d2a0..7e0b996a 100644 --- a/.github/workflows/release-plz.yml +++ b/.github/workflows/release-plz.yml @@ -1,39 +1,18 @@ name: Release-plz on: - workflow_dispatch: # Manual releases only - inputs: - release_type: - description: 'Type of release (patch, minor, auto) - NO MAJOR BUMPS' - required: false - default: 'auto' - type: choice - options: - - auto - - patch - - minor - dry_run: - description: 'Dry run (no actual release)' - required: false - default: false - type: boolean + push: + branches: + - main jobs: release-plz-release: name: Release-plz release runs-on: ubuntu-latest - if: ${{ github.repository_owner == 'syncable-dev' && github.event.inputs.dry_run != 'true' }} - + if: ${{ github.repository_owner == 'syncable-dev' }} permissions: contents: write steps: - - name: Show manual release inputs - run: | - echo "πŸš€ Manual Release Configuration:" - echo "Release Type: ${{ github.event.inputs.release_type || 'auto' }}" - echo "Dry Run: ${{ github.event.inputs.dry_run || 'false' }}" - echo "⚠️ Version Constraint: Will stay in 0.x.x range (no 1.0.0 bumps)" - - name: Checkout repository uses: actions/checkout@v4 with: @@ -41,13 +20,6 @@ jobs: token: ${{ secrets.RELEASE_PLZ_TOKEN }} - name: Install Rust toolchain uses: dtolnay/rust-toolchain@stable - - name: Check current version and constraints - run: | - CURRENT_VERSION=$(grep '^version =' Cargo.toml | cut -d'"' -f2) - echo "πŸ“Š Current version: $CURRENT_VERSION" - echo "πŸ“‹ Release type: ${{ github.event.inputs.release_type || 'auto' }}" - echo "🎯 Version constraint: Max 0.99.99 (stays in 0.x.x range)" - echo "βœ… Safe from automatic 1.0.0 bumps" - name: Run release-plz uses: release-plz/action@v0.5 with: @@ -56,44 +28,10 @@ jobs: GITHUB_TOKEN: ${{ secrets.RELEASE_PLZ_TOKEN }} CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} - release-plz-dry-run: - name: Release-plz dry run - runs-on: ubuntu-latest - if: ${{ github.repository_owner == 'syncable-dev' && github.event.inputs.dry_run == 'true' }} - permissions: - contents: read - steps: - - name: Show dry run information - run: | - echo "πŸ§ͺ DRY RUN MODE - No actual release will be performed" - echo "Release Type: ${{ github.event.inputs.release_type || 'auto' }}" - echo "This would analyze the repository and show what changes would be released." - - - name: Checkout repository - uses: actions/checkout@v4 - with: - fetch-depth: 0 - token: ${{ secrets.RELEASE_PLZ_TOKEN }} - - name: Install Rust toolchain - uses: dtolnay/rust-toolchain@stable - - name: Analyze release changes (dry run) - run: | - CURRENT_VERSION=$(grep '^version =' Cargo.toml | cut -d'"' -f2) - echo "πŸ“Š Analyzing repository for potential release..." - echo "Current version: $CURRENT_VERSION" - echo "Release type: ${{ github.event.inputs.release_type || 'auto' }}" - echo "🎯 Version constraint: Max 0.99.99 (will NOT bump to 1.0.0)" - echo "" - echo "Recent commits:" - git log --oneline -10 - echo "" - echo "βœ… Dry run complete - no actual release performed" - echo "πŸ›‘οΈ Protected from major version bumps!" - release-plz-pr: name: Release-plz PR runs-on: ubuntu-latest - if: ${{ github.repository_owner == 'syncable-dev' && github.event.inputs.dry_run != 'true' }} + if: ${{ github.repository_owner == 'syncable-dev' }} permissions: pull-requests: write contents: write @@ -101,11 +39,6 @@ jobs: group: release-plz-${{ github.ref }} cancel-in-progress: false steps: - - name: Show manual release inputs - run: | - echo "πŸ“ Creating Release PR with configuration:" - echo "Release Type: ${{ github.event.inputs.release_type || 'auto' }}" - - name: Checkout repository uses: actions/checkout@v4 with: @@ -119,4 +52,4 @@ jobs: command: release-pr env: GITHUB_TOKEN: ${{ secrets.RELEASE_PLZ_TOKEN }} - CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} + CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} \ No newline at end of file diff --git a/release-plz.toml b/release-plz.toml deleted file mode 100644 index 40947dea..00000000 --- a/release-plz.toml +++ /dev/null @@ -1,54 +0,0 @@ -[workspace] -# Manual releases only - don't release on every commit -release_always = false - -# Allow dirty working directories (for CI) -allow_dirty = false - -# Enable git operations -git_release_enable = true -git_tag_enable = true - -# Enable publishing to crates.io -publish = true - -# Changelog updates -changelog_update = true - -# Semver check -semver_check = true - -# IMPORTANT: Features always increment minor version in 0.x releases -# This prevents features from bumping 0.x to 1.0 -features_always_increment_minor = true - -# Optional: Only release on certain commit types -# This filters which commits can trigger a release -# Uncomment to be more selective: -# release_commits = "^(fix|feat|perf|docs):" - -[[package]] -name = "syncable-cli" - -# This package should be released -release = true - -# Use semantic versioning checks -semver_check = true - -# Publish this package -publish = true - -# Override at package level to ensure features don't bump major -features_always_increment_minor = true - -# Version constraints - stay in 0.x.x range -# Note: version_max might not be supported in this version -# We'll handle version constraints manually - -[changelog] -# Changelog will be updated -# Using default configuration which follows Keep a Changelog format - -# Protect breaking changes from being ignored -protect_breaking_commits = false \ No newline at end of file From e7ceaf23b3f4dcb192890be2b24e03edafa930d2 Mon Sep 17 00:00:00 2001 From: Alex Holmberg Date: Sun, 8 Jun 2025 09:47:04 +0200 Subject: [PATCH 2/6] patch: updated cli-display-modes.md file for better visualization --- docs/cli-display-modes.md | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/docs/cli-display-modes.md b/docs/cli-display-modes.md index 96a6f01a..a72530d7 100644 --- a/docs/cli-display-modes.md +++ b/docs/cli-display-modes.md @@ -20,34 +20,34 @@ sync-ctl analyze . πŸ“Š PROJECT ANALYSIS DASHBOARD ═══════════════════════════════════════════════════════════════════════════════════════════════════ -β”Œβ”€ Architecture Overview ────────────────────────────────────────────────────────────────────────┐ +β”Œβ”€ Architecture Overview ─────────────────────────────────────────────────────────────────────────┐ β”‚ Type: Monorepo (3 projects) β”‚ β”‚ Pattern: Fullstack β”‚ β”‚ Full-stack app with frontend/backend separation β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ -β”Œβ”€ Technology Stack ─────────────────────────────────────────────────────────────────────────────┐ +β”Œβ”€ Technology Stack ──────────────────────────────────────────────────────────────────────────────┐ β”‚ Languages: TypeScript β”‚ β”‚ Frameworks: Encore, Tanstack Start β”‚ β”‚ Databases: Drizzle ORM β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”Œβ”€ Projects Matrix ──────────────────────────────────────────────────────────────────────────────┐ -β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β” β”‚ -β”‚ β”‚ Project β”‚ Type β”‚ Languages β”‚ Main Tech β”‚ Ports β”‚ Docker β”‚ Deps β”‚ β”‚ -β”‚ β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€ β”‚ -β”‚ β”‚ βš™οΈ backend β”‚ Backend β”‚ TypeScriptβ”‚ Encore β”‚ 4000 β”‚ βœ“ β”‚ 32 β”‚ β”‚ -β”‚ β”‚ πŸ—οΈ devops-agent β”‚ Infrastructureβ”‚ TypeScriptβ”‚ - β”‚ - β”‚ βœ— β”‚ 5 β”‚ β”‚ -β”‚ β”‚ 🌐 frontend β”‚ Frontend β”‚ TypeScriptβ”‚ Tanstack Start β”‚ 3000 β”‚ βœ“ β”‚ 123 β”‚ β”‚ -β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”˜ β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ - -β”Œβ”€ Docker Infrastructure ────────────────────────────────────────────────────────────────────────┐ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ Project β”‚ Type β”‚ Languages β”‚ Main Tech β”‚ Ports β”‚ Docker β”‚ Deps β”‚ β”‚ +β”‚ β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ β”‚ +β”‚ β”‚ backend β”‚ Backend β”‚ TypeScriptβ”‚ Encore β”‚ 4000 β”‚ βœ“ β”‚ 32 β”‚ β”‚ +β”‚ β”‚ devops-agent β”‚ Infrastructure β”‚ TypeScript β”‚ - β”‚ - β”‚ βœ— β”‚ 5 β”‚ β”‚ +β”‚ β”‚ frontend β”‚ Frontend β”‚ TypeScriptβ”‚ Tanstack Start β”‚ 3000 β”‚ βœ“ β”‚ 123 β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + +β”Œβ”€ Docker Infrastructure ─────────────────────────────────────────────────────────────────────────┐ β”‚ Dockerfiles: 2 β”‚ β”‚ Compose Files: 2 β”‚ β”‚ Total Services: 5 β”‚ β”‚ Orchestration Patterns: Microservices β”‚ -β”‚ ───────────────────────────────────────────────────────────────────────────────────────────── β”‚ +β”‚ ────────────────────────────────────────────────────────────────────────────────────────────────│ β”‚ Service Connectivity: β”‚ β”‚ encore-postgres: 5431:5432 β”‚ β”‚ encore: 4000:8080 β†’ encore-postgres β”‚ @@ -55,8 +55,8 @@ sync-ctl analyze . β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”Œβ”€ Analysis Metrics ─────────────────────────────────────────────────────────────────────────────┐ -β”‚ ⏱️ Duration: 57ms πŸ“ Files: 294 🎯 Score: 87% πŸ”– Version: 0.3.0 β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +β”‚ Duration: 57ms Files: 294 Score: 87% Version: 0.3.0 β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ ═══════════════════════════════════════════════════════════════════════════════════════════════════ ``` From 0f78da8ecd1ba3e5a689ca233a33097e7fb8455e Mon Sep 17 00:00:00 2001 From: Alex Holmberg Date: Sun, 8 Jun 2025 18:02:14 +0200 Subject: [PATCH 3/6] feat: huge improvements towards security and secret variable detection. With the new update we don't get false positive towards files name conventions such as .env.samples, .env.templates, env.examples etc. We are also skipping if files are ignored within .gitignore, since those files aren't being track. upcoming is to ensure git cache isn't storing .gitignored files, to ensure mistakes doesn't happen --- Cargo.lock | 11 + Cargo.toml | 1 + examples/enhanced_security.rs | 123 ++++ src/analyzer/frameworks/go.rs | 4 +- src/analyzer/frameworks/rust.rs | 12 +- src/analyzer/mod.rs | 10 + src/analyzer/security/config.rs | 318 +++++++++ src/analyzer/security/core.rs | 94 +++ src/analyzer/security/gitignore.rs | 531 ++++++++++++++ src/analyzer/security/javascript.rs | 1013 +++++++++++++++++++++++++++ src/analyzer/security/mod.rs | 77 ++ src/analyzer/security/patterns.rs | 377 ++++++++++ src/analyzer/security_analyzer.rs | 390 ++++++++++- src/main.rs | 317 ++++++--- 14 files changed, 3164 insertions(+), 114 deletions(-) create mode 100644 examples/enhanced_security.rs create mode 100644 src/analyzer/security/config.rs create mode 100644 src/analyzer/security/core.rs create mode 100644 src/analyzer/security/gitignore.rs create mode 100644 src/analyzer/security/javascript.rs create mode 100644 src/analyzer/security/mod.rs create mode 100644 src/analyzer/security/patterns.rs diff --git a/Cargo.lock b/Cargo.lock index a5edabf5..5e48a6c0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3363,6 +3363,7 @@ dependencies = [ "serde_yaml", "tempfile", "tera", + "term_size", "termcolor", "textwrap", "thiserror 1.0.69", @@ -3474,6 +3475,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "term_size" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e4129646ca0ed8f45d09b929036bafad5377103edd06e50bf574b353d2b08d9" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "termcolor" version = "1.4.1" diff --git a/Cargo.toml b/Cargo.toml index 23e07397..98e67a90 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,6 +33,7 @@ termcolor = "1" chrono = { version = "0.4", features = ["serde"] } colored = "2" prettytable = "0.10" +term_size = "0.3" # Vulnerability checking dependencies rustsec = "0.29" diff --git a/examples/enhanced_security.rs b/examples/enhanced_security.rs new file mode 100644 index 00000000..3402ac6d --- /dev/null +++ b/examples/enhanced_security.rs @@ -0,0 +1,123 @@ +//! Example: Enhanced Security Analysis +//! +//! This example demonstrates the enhanced security analysis capabilities +//! including the new modular JavaScript/TypeScript security analyzer. + +use std::path::Path; +use syncable_cli::analyzer::{analyze_project, SecurityAnalyzer}; + +fn main() -> Result<(), Box> { + env_logger::init(); + + // For this example, analyze the current directory or a provided path + let project_path = std::env::args() + .nth(1) + .map(|p| Path::new(&p).to_path_buf()) + .unwrap_or_else(|| std::env::current_dir().unwrap()); + + println!("πŸ” Analyzing project security for: {}", project_path.display()); + + // First, perform regular project analysis to detect languages + let analysis = analyze_project(&project_path)?; + + println!("\nπŸ“‹ Detected Languages:"); + for lang in &analysis.languages { + println!(" β€’ {} (confidence: {:.1}%)", lang.name, lang.confidence * 100.0); + } + + println!("\nπŸ”§ Detected Technologies:"); + for tech in &analysis.technologies { + println!(" β€’ {} v{} ({:?})", + tech.name, + tech.version.as_deref().unwrap_or("unknown"), + tech.category + ); + } + + // Check if this is a JavaScript/TypeScript project + let has_js = analysis.languages.iter() + .any(|lang| matches!(lang.name.as_str(), "JavaScript" | "TypeScript" | "JSX" | "TSX")); + + if has_js { + println!("\nβœ… JavaScript/TypeScript project detected! Using enhanced security analysis..."); + } else { + println!("\nπŸ“„ Using general security analysis..."); + } + + // Run enhanced security analysis + println!("\nπŸ›‘οΈ Starting enhanced security analysis..."); + + let mut security_analyzer = SecurityAnalyzer::new()?; + let security_report = security_analyzer.analyze_security_enhanced(&analysis)?; + + // Display results + println!("\nπŸ“Š Security Analysis Results:"); + println!(" Overall Score: {:.1}/100", security_report.overall_score); + println!(" Risk Level: {:?}", security_report.risk_level); + println!(" Total Findings: {}", security_report.total_findings); + + if security_report.total_findings > 0 { + println!("\n🚨 Security Findings:"); + + // Group findings by severity + for severity in [ + syncable_cli::analyzer::security::core::SecuritySeverity::Critical, + syncable_cli::analyzer::security::core::SecuritySeverity::High, + syncable_cli::analyzer::security::core::SecuritySeverity::Medium, + syncable_cli::analyzer::security::core::SecuritySeverity::Low, + ] { + let findings: Vec<_> = security_report.findings.iter() + .filter(|f| f.severity == severity) + .collect(); + + if !findings.is_empty() { + let severity_icon = match severity { + syncable_cli::analyzer::security::core::SecuritySeverity::Critical => "πŸ”΄", + syncable_cli::analyzer::security::core::SecuritySeverity::High => "🟠", + syncable_cli::analyzer::security::core::SecuritySeverity::Medium => "🟑", + syncable_cli::analyzer::security::core::SecuritySeverity::Low => "πŸ”΅", + _ => "βšͺ", + }; + + println!("\n{} {:?} Severity ({} findings):", severity_icon, severity, findings.len()); + + for finding in findings.iter().take(3) { // Show first 3 of each severity + println!(" πŸ“ {}", finding.title); + if let Some(ref file_path) = finding.file_path { + let relative_path = file_path.strip_prefix(&project_path) + .unwrap_or(file_path); + print!(" πŸ“„ {}", relative_path.display()); + if let Some(line) = finding.line_number { + print!(":{}", line); + } + println!(); + } + println!(" πŸ’‘ {}", finding.description); + + if !finding.remediation.is_empty() { + println!(" πŸ”§ Remediation: {}", finding.remediation[0]); + } + println!(); + } + + if findings.len() > 3 { + println!(" ... and {} more findings", findings.len() - 3); + } + } + } + + // Show recommendations + if !security_report.recommendations.is_empty() { + println!("\nπŸ’‘ Recommendations:"); + for (i, recommendation) in security_report.recommendations.iter().enumerate() { + println!(" {}. {}", i + 1, recommendation); + } + } + } else { + println!("βœ… No security issues detected!"); + } + + println!("\n✨ Enhanced security analysis complete!"); + + Ok(()) +} \ No newline at end of file diff --git a/src/analyzer/frameworks/go.rs b/src/analyzer/frameworks/go.rs index 44d1ade8..3faa51ab 100644 --- a/src/analyzer/frameworks/go.rs +++ b/src/analyzer/frameworks/go.rs @@ -232,12 +232,12 @@ fn get_go_technology_rules() -> Vec { // CLI FRAMEWORKS TechnologyRule { name: "Cobra".to_string(), - category: TechnologyCategory::Library(LibraryType::Utility), + category: TechnologyCategory::Library(LibraryType::CLI), confidence: 0.85, dependency_patterns: vec!["github.com/spf13/cobra".to_string(), "cobra".to_string()], requires: vec![], conflicts_with: vec![], - is_primary_indicator: false, + is_primary_indicator: true, alternative_names: vec!["spf13/cobra".to_string()], }, diff --git a/src/analyzer/frameworks/rust.rs b/src/analyzer/frameworks/rust.rs index e9c07f1d..1b2c7cff 100644 --- a/src/analyzer/frameworks/rust.rs +++ b/src/analyzer/frameworks/rust.rs @@ -414,32 +414,32 @@ fn get_rust_technology_rules() -> Vec { // CLI FRAMEWORKS TechnologyRule { name: "clap".to_string(), - category: TechnologyCategory::Library(LibraryType::Utility), + category: TechnologyCategory::Library(LibraryType::CLI), confidence: 0.85, dependency_patterns: vec!["clap".to_string()], requires: vec![], conflicts_with: vec![], - is_primary_indicator: false, + is_primary_indicator: true, alternative_names: vec![], }, TechnologyRule { name: "structopt".to_string(), - category: TechnologyCategory::Library(LibraryType::Utility), + category: TechnologyCategory::Library(LibraryType::CLI), confidence: 0.85, dependency_patterns: vec!["structopt".to_string()], requires: vec![], conflicts_with: vec![], - is_primary_indicator: false, + is_primary_indicator: true, alternative_names: vec![], }, TechnologyRule { name: "argh".to_string(), - category: TechnologyCategory::Library(LibraryType::Utility), + category: TechnologyCategory::Library(LibraryType::CLI), confidence: 0.85, dependency_patterns: vec!["argh".to_string()], requires: vec![], conflicts_with: vec![], - is_primary_indicator: false, + is_primary_indicator: true, alternative_names: vec![], }, diff --git a/src/analyzer/mod.rs b/src/analyzer/mod.rs index 5d19830f..4951c81a 100644 --- a/src/analyzer/mod.rs +++ b/src/analyzer/mod.rs @@ -19,6 +19,7 @@ pub mod language_detector; pub mod project_context; pub mod vulnerability_checker; pub mod security_analyzer; +pub mod security; pub mod tool_installer; pub mod monorepo_detector; pub mod docker_analyzer; @@ -36,6 +37,13 @@ pub use security_analyzer::{ SecurityCategory, ComplianceStatus, SecurityAnalysisConfig }; +// Re-export new modular security analysis types +pub use security::{ + ModularSecurityAnalyzer, JavaScriptSecurityAnalyzer, + SecretPatternManager +}; +pub use security::config::SecurityConfigPreset; + // Re-export monorepo analysis types pub use monorepo_detector::{ MonorepoDetectionConfig, analyze_monorepo, analyze_monorepo_with_config @@ -102,6 +110,8 @@ pub enum LibraryType { HttpClient, /// Authentication (Auth0, Firebase Auth) Authentication, + /// CLI frameworks (clap, structopt, argh) + CLI, /// Other specific types Other(String), } diff --git a/src/analyzer/security/config.rs b/src/analyzer/security/config.rs new file mode 100644 index 00000000..473c083e --- /dev/null +++ b/src/analyzer/security/config.rs @@ -0,0 +1,318 @@ +//! # Security Analysis Configuration +//! +//! Configuration options for customizing security analysis behavior. + +use serde::{Deserialize, Serialize}; + +/// Configuration for security analysis +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SecurityAnalysisConfig { + // General settings + pub include_low_severity: bool, + pub include_info_level: bool, + + // Analysis scope + pub check_secrets: bool, + pub check_code_patterns: bool, + pub check_infrastructure: bool, + pub check_compliance: bool, + + // Language-specific settings + pub javascript_enabled: bool, + pub python_enabled: bool, + pub rust_enabled: bool, + + // Framework-specific settings + pub frameworks_to_check: Vec, + + // File filtering + pub ignore_patterns: Vec, + pub include_patterns: Vec, + + // Git integration + pub skip_gitignored_files: bool, + pub downgrade_gitignored_severity: bool, + pub check_git_history: bool, + + // Environment variable handling + pub check_env_files: bool, + pub warn_on_public_env_vars: bool, + pub sensitive_env_keywords: Vec, + + // JavaScript/TypeScript specific + pub check_package_json: bool, + pub check_node_modules: bool, + pub framework_env_prefixes: Vec, + + // Output customization + pub max_findings_per_file: Option, + pub deduplicate_findings: bool, + pub group_by_severity: bool, + + // Performance settings + pub max_file_size_mb: Option, + pub parallel_analysis: bool, + pub analysis_timeout_seconds: Option, +} + +impl Default for SecurityAnalysisConfig { + fn default() -> Self { + Self { + // General settings + include_low_severity: false, + include_info_level: false, + + // Analysis scope + check_secrets: true, + check_code_patterns: true, + check_infrastructure: true, + check_compliance: false, // Disabled by default as it requires more setup + + // Language-specific settings + javascript_enabled: true, + python_enabled: true, + rust_enabled: true, + + // Framework-specific settings + frameworks_to_check: vec![ + "React".to_string(), + "Vue".to_string(), + "Angular".to_string(), + "Next.js".to_string(), + "Vite".to_string(), + "Express".to_string(), + "Django".to_string(), + "Spring Boot".to_string(), + ], + + // File filtering + ignore_patterns: vec![ + "node_modules".to_string(), + ".git".to_string(), + "target".to_string(), + "build".to_string(), + ".next".to_string(), + "coverage".to_string(), + "dist".to_string(), + "*.min.js".to_string(), + "*.bundle.js".to_string(), + "*.map".to_string(), + "*.lock".to_string(), + "*_sample.*".to_string(), + "*example*".to_string(), + "*test*".to_string(), + "*spec*".to_string(), + "*mock*".to_string(), + "*.d.ts".to_string(), // TypeScript declaration files + ], + include_patterns: vec![], // Empty means include all (subject to ignore patterns) + + // Git integration + skip_gitignored_files: true, + downgrade_gitignored_severity: false, + check_git_history: false, // Disabled by default for performance + + // Environment variable handling + check_env_files: true, + warn_on_public_env_vars: true, + sensitive_env_keywords: vec![ + "SECRET".to_string(), + "KEY".to_string(), + "TOKEN".to_string(), + "PASSWORD".to_string(), + "PASS".to_string(), + "AUTH".to_string(), + "API".to_string(), + "PRIVATE".to_string(), + "CREDENTIAL".to_string(), + "CERT".to_string(), + "SSL".to_string(), + "TLS".to_string(), + "OAUTH".to_string(), + "CLIENT_SECRET".to_string(), + "ACCESS_TOKEN".to_string(), + "REFRESH_TOKEN".to_string(), + "DATABASE_URL".to_string(), + "DB_PASS".to_string(), + "STRIPE_SECRET".to_string(), + "AWS_SECRET".to_string(), + "FIREBASE_PRIVATE".to_string(), + ], + + // JavaScript/TypeScript specific + check_package_json: true, + check_node_modules: false, // Usually don't want to scan dependencies + framework_env_prefixes: vec![ + "REACT_APP_".to_string(), + "NEXT_PUBLIC_".to_string(), + "VITE_".to_string(), + "VUE_APP_".to_string(), + "EXPO_PUBLIC_".to_string(), + "NUXT_PUBLIC_".to_string(), + "GATSBY_".to_string(), + "STORYBOOK_".to_string(), + ], + + // Output customization + max_findings_per_file: Some(50), // Prevent overwhelming output + deduplicate_findings: true, + group_by_severity: true, + + // Performance settings + max_file_size_mb: Some(10), // Skip very large files + parallel_analysis: true, + analysis_timeout_seconds: Some(300), // 5 minutes max + } + } +} + +impl SecurityAnalysisConfig { + /// Create a configuration optimized for JavaScript/TypeScript projects + pub fn for_javascript() -> Self { + let mut config = Self::default(); + config.javascript_enabled = true; + config.python_enabled = false; + config.rust_enabled = false; + config.check_package_json = true; + config.frameworks_to_check = vec![ + "React".to_string(), + "Vue".to_string(), + "Angular".to_string(), + "Next.js".to_string(), + "Vite".to_string(), + "Express".to_string(), + "Svelte".to_string(), + "Nuxt".to_string(), + ]; + config + } + + /// Create a configuration optimized for Python projects + pub fn for_python() -> Self { + let mut config = Self::default(); + config.javascript_enabled = false; + config.python_enabled = true; + config.rust_enabled = false; + config.check_package_json = false; + config.frameworks_to_check = vec![ + "Django".to_string(), + "Flask".to_string(), + "FastAPI".to_string(), + "Tornado".to_string(), + ]; + config + } + + /// Create a high-security configuration with strict settings + pub fn high_security() -> Self { + let mut config = Self::default(); + config.include_low_severity = true; + config.include_info_level = true; + config.skip_gitignored_files = false; // Check everything + config.check_git_history = true; + config.warn_on_public_env_vars = true; + config.max_findings_per_file = None; // No limit + config + } + + /// Create a fast configuration for CI/CD pipelines + pub fn fast_ci() -> Self { + let mut config = Self::default(); + config.include_low_severity = false; + config.include_info_level = false; + config.check_compliance = false; + config.check_git_history = false; + config.parallel_analysis = true; + config.max_findings_per_file = Some(20); // Limit output + config.analysis_timeout_seconds = Some(120); // 2 minutes max + config + } + + /// Check if a file should be analyzed based on patterns + pub fn should_analyze_file(&self, file_path: &std::path::Path) -> bool { + let file_path_str = file_path.to_string_lossy(); + let file_name = file_path.file_name() + .and_then(|n| n.to_str()) + .unwrap_or(""); + + // Check ignore patterns first + for pattern in &self.ignore_patterns { + if self.matches_pattern(pattern, &file_path_str, file_name) { + return false; + } + } + + // If include patterns are specified, file must match at least one + if !self.include_patterns.is_empty() { + return self.include_patterns.iter().any(|pattern| { + self.matches_pattern(pattern, &file_path_str, file_name) + }); + } + + true + } + + /// Check if a pattern matches a file + fn matches_pattern(&self, pattern: &str, file_path: &str, file_name: &str) -> bool { + if pattern.contains('*') { + // Use glob matching for wildcard patterns + glob::Pattern::new(pattern) + .map(|p| p.matches(file_path) || p.matches(file_name)) + .unwrap_or(false) + } else { + // Simple string matching + file_path.contains(pattern) || file_name.contains(pattern) + } + } + + /// Check if an environment variable name appears sensitive + pub fn is_sensitive_env_var(&self, var_name: &str) -> bool { + let var_upper = var_name.to_uppercase(); + self.sensitive_env_keywords.iter() + .any(|keyword| var_upper.contains(keyword)) + } + + /// Check if an environment variable should be public (safe for client-side) + pub fn is_public_env_var(&self, var_name: &str) -> bool { + self.framework_env_prefixes.iter() + .any(|prefix| var_name.starts_with(prefix)) + } + + /// Get the maximum file size to analyze in bytes + pub fn max_file_size_bytes(&self) -> Option { + self.max_file_size_mb.map(|mb| mb * 1024 * 1024) + } +} + +/// Preset configurations for common use cases +#[derive(Debug, Clone, Copy)] +pub enum SecurityConfigPreset { + /// Default balanced configuration + Default, + /// Optimized for JavaScript/TypeScript projects + JavaScript, + /// Optimized for Python projects + Python, + /// High-security configuration with strict settings + HighSecurity, + /// Fast configuration for CI/CD pipelines + FastCI, +} + +impl SecurityConfigPreset { + pub fn to_config(self) -> SecurityAnalysisConfig { + match self { + Self::Default => SecurityAnalysisConfig::default(), + Self::JavaScript => SecurityAnalysisConfig::for_javascript(), + Self::Python => SecurityAnalysisConfig::for_python(), + Self::HighSecurity => SecurityAnalysisConfig::high_security(), + Self::FastCI => SecurityAnalysisConfig::fast_ci(), + } + } +} + +impl From for SecurityAnalysisConfig { + fn from(preset: SecurityConfigPreset) -> Self { + preset.to_config() + } +} \ No newline at end of file diff --git a/src/analyzer/security/core.rs b/src/analyzer/security/core.rs new file mode 100644 index 00000000..edba639f --- /dev/null +++ b/src/analyzer/security/core.rs @@ -0,0 +1,94 @@ +//! # Core Security Analysis Types +//! +//! Base types and functionality shared across all security analyzers. + +use std::collections::HashMap; +use std::path::PathBuf; +use serde::{Deserialize, Serialize}; + +/// Security finding severity levels +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum SecuritySeverity { + Critical, + High, + Medium, + Low, + Info, +} + +/// Categories of security findings +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] +pub enum SecurityCategory { + /// Exposed secrets, API keys, passwords + SecretsExposure, + /// Insecure configuration settings + InsecureConfiguration, + /// Language/framework-specific security patterns + CodeSecurityPattern, + /// Infrastructure and deployment security + InfrastructureSecurity, + /// Authentication and authorization issues + AuthenticationSecurity, + /// Data protection and privacy concerns + DataProtection, + /// Network and communication security + NetworkSecurity, + /// Compliance and regulatory requirements + Compliance, +} + +/// A security finding with details and remediation +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SecurityFinding { + pub id: String, + pub title: String, + pub description: String, + pub severity: SecuritySeverity, + pub category: SecurityCategory, + pub file_path: Option, + pub line_number: Option, + pub column_number: Option, + pub evidence: Option, + pub remediation: Vec, + pub references: Vec, + pub cwe_id: Option, + pub compliance_frameworks: Vec, +} + +/// Comprehensive security analysis report +#[derive(Debug, Serialize, Deserialize)] +pub struct SecurityReport { + pub analyzed_at: chrono::DateTime, + pub overall_score: f32, // 0-100, higher is better + pub risk_level: SecuritySeverity, + pub total_findings: usize, + pub findings_by_severity: HashMap, + pub findings_by_category: HashMap, + pub findings: Vec, + pub recommendations: Vec, + pub compliance_status: HashMap, +} + +/// Compliance framework status +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ComplianceStatus { + pub framework: String, + pub coverage: f32, // 0-100% + pub missing_controls: Vec, + pub recommendations: Vec, +} + +/// Base security analyzer trait +pub trait SecurityAnalyzer { + type Config; + type Error: std::error::Error; + + /// Analyze a project for security issues + fn analyze_project(&self, project_root: &std::path::Path) -> Result; + + /// Get the analyzer's configuration + fn config(&self) -> &Self::Config; + + /// Get supported file extensions for this analyzer + fn supported_extensions(&self) -> Vec<&'static str>; +} \ No newline at end of file diff --git a/src/analyzer/security/gitignore.rs b/src/analyzer/security/gitignore.rs new file mode 100644 index 00000000..da70a500 --- /dev/null +++ b/src/analyzer/security/gitignore.rs @@ -0,0 +1,531 @@ +//! # GitIgnore-Aware Security Analysis +//! +//! Comprehensive gitignore parsing and pattern matching for security analysis. +//! This module ensures that secret detection is gitignore-aware and can properly +//! assess whether sensitive files are appropriately protected. + +use std::collections::HashSet; +use std::path::{Path, PathBuf}; +use std::fs; +use log::{info, warn}; +use regex::Regex; + +/// GitIgnore pattern matcher for security analysis +pub struct GitIgnoreAnalyzer { + patterns: Vec, + project_root: PathBuf, + is_git_repo: bool, +} + +/// A parsed gitignore pattern with matching logic +#[derive(Debug, Clone)] +pub struct GitIgnorePattern { + pub original: String, + pub regex: Regex, + pub is_negation: bool, + pub is_directory_only: bool, + pub is_absolute: bool, // Starts with / + pub pattern_type: PatternType, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum PatternType { + /// Exact filename match (e.g., ".env") + Exact, + /// Wildcard pattern (e.g., "*.log") + Wildcard, + /// Directory pattern (e.g., "node_modules/") + Directory, + /// Path pattern (e.g., "config/*.env") + Path, +} + +/// Result of gitignore analysis for a file +#[derive(Debug, Clone)] +pub struct GitIgnoreStatus { + pub is_ignored: bool, + pub matched_pattern: Option, + pub is_tracked: bool, // Whether file is tracked by git + pub should_be_ignored: bool, // Whether file contains secrets and should be ignored + pub risk_level: GitIgnoreRisk, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum GitIgnoreRisk { + /// File is properly ignored and contains no secrets + Safe, + /// File contains secrets but is properly ignored + Protected, + /// File contains secrets and is NOT ignored (high risk) + Exposed, + /// File contains secrets, not ignored, and is tracked by git (critical risk) + Tracked, +} + +impl GitIgnoreAnalyzer { + pub fn new(project_root: &Path) -> Result { + let project_root = project_root.canonicalize()?; + let is_git_repo = project_root.join(".git").exists(); + + let patterns = if is_git_repo { + Self::parse_gitignore_files(&project_root)? + } else { + Self::create_default_patterns() + }; + + info!("Initialized GitIgnore analyzer with {} patterns for {}", + patterns.len(), project_root.display()); + + Ok(Self { + patterns, + project_root, + is_git_repo, + }) + } + + /// Parse all relevant .gitignore files + fn parse_gitignore_files(project_root: &Path) -> Result, std::io::Error> { + let mut patterns = Vec::new(); + + // Global gitignore patterns for common secret files + patterns.extend(Self::create_default_patterns()); + + // Parse project .gitignore + let gitignore_path = project_root.join(".gitignore"); + if gitignore_path.exists() { + let content = fs::read_to_string(&gitignore_path)?; + patterns.extend(Self::parse_gitignore_content(&content, project_root)?); + info!("Parsed {} patterns from .gitignore", patterns.len()); + } + + // TODO: Parse global gitignore (~/.gitignore_global) + // TODO: Parse .git/info/exclude + + Ok(patterns) + } + + /// Create default patterns for common secret files + fn create_default_patterns() -> Vec { + let default_patterns = [ + ".env", + ".env.local", + ".env.*.local", + ".env.production", + ".env.development", + ".env.staging", + ".env.test", + "*.pem", + "*.key", + "*.p12", + "*.pfx", + "id_rsa", + "id_dsa", + "id_ecdsa", + "id_ed25519", + ".aws/credentials", + ".ssh/", + "secrets/", + "private/", + ]; + + default_patterns.iter() + .filter_map(|pattern| Self::parse_pattern(pattern, &PathBuf::from(".")).ok()) + .collect() + } + + /// Parse gitignore content into patterns + fn parse_gitignore_content(content: &str, _root: &Path) -> Result, std::io::Error> { + let mut patterns = Vec::new(); + + for (line_num, line) in content.lines().enumerate() { + let line = line.trim(); + + // Skip empty lines and comments + if line.is_empty() || line.starts_with('#') { + continue; + } + + match Self::parse_pattern(line, &PathBuf::from(".")) { + Ok(pattern) => patterns.push(pattern), + Err(e) => { + warn!("Failed to parse gitignore pattern on line {}: '{}' - {}", line_num + 1, line, e); + } + } + } + + Ok(patterns) + } + + /// Parse a single gitignore pattern + fn parse_pattern(pattern: &str, _root: &Path) -> Result { + let original = pattern.to_string(); + let mut pattern = pattern.to_string(); + + // Handle negation + let is_negation = pattern.starts_with('!'); + if is_negation { + pattern = pattern[1..].to_string(); + } + + // Handle directory-only patterns + let is_directory_only = pattern.ends_with('/'); + if is_directory_only { + pattern.pop(); + } + + // Handle absolute patterns (starting with /) + let is_absolute = pattern.starts_with('/'); + if is_absolute { + pattern = pattern[1..].to_string(); + } + + // Determine pattern type + let pattern_type = if pattern.contains('/') { + PatternType::Path + } else if pattern.contains('*') || pattern.contains('?') { + PatternType::Wildcard + } else if is_directory_only { + PatternType::Directory + } else { + PatternType::Exact + }; + + // Convert to regex + let regex_pattern = Self::gitignore_to_regex(&pattern, is_absolute, &pattern_type)?; + let regex = Regex::new(®ex_pattern)?; + + Ok(GitIgnorePattern { + original, + regex, + is_negation, + is_directory_only, + is_absolute, + pattern_type, + }) + } + + /// Convert gitignore pattern to regex + fn gitignore_to_regex(pattern: &str, is_absolute: bool, pattern_type: &PatternType) -> Result { + let mut regex = String::new(); + + // Start anchor + if is_absolute { + regex.push_str("^"); + } else { + // Can match anywhere in the path + regex.push_str("(?:^|/)"); + } + + // Process the pattern + for ch in pattern.chars() { + match ch { + '*' => { + // Check if this is a double star (**) + if pattern.contains("**") { + regex.push_str(".*"); + } else { + regex.push_str("[^/]*"); + } + } + '?' => regex.push_str("[^/]"), + '.' => regex.push_str("\\."), + '^' | '$' | '(' | ')' | '[' | ']' | '{' | '}' | '+' | '|' | '\\' => { + regex.push('\\'); + regex.push(ch); + } + '/' => regex.push_str("/"), + _ => regex.push(ch), + } + } + + // Handle directory-only patterns + match pattern_type { + PatternType::Directory => { + regex.push_str("(?:/|$)"); + } + PatternType::Exact => { + regex.push_str("(?:/|$)"); + } + _ => { + regex.push_str("(?:/.*)?$"); + } + } + + Ok(regex) + } + + /// Check if a file path matches gitignore patterns + pub fn analyze_file(&self, file_path: &Path) -> GitIgnoreStatus { + let relative_path = match file_path.strip_prefix(&self.project_root) { + Ok(rel) => rel, + Err(_) => return GitIgnoreStatus { + is_ignored: false, + matched_pattern: None, + is_tracked: false, + should_be_ignored: false, + risk_level: GitIgnoreRisk::Safe, + }, + }; + + let path_str = relative_path.to_string_lossy(); + let file_name = file_path.file_name() + .and_then(|n| n.to_str()) + .unwrap_or(""); + + // Check against patterns + let mut is_ignored = false; + let mut matched_pattern = None; + + for pattern in &self.patterns { + if pattern.regex.is_match(&path_str) { + if pattern.is_negation { + is_ignored = false; + matched_pattern = None; + } else { + is_ignored = true; + matched_pattern = Some(pattern.original.clone()); + } + } + } + + // Check if file is tracked by git + let is_tracked = if self.is_git_repo { + self.check_git_tracked(file_path) + } else { + false + }; + + // Determine if file should be ignored (contains secrets) + let should_be_ignored = self.should_file_be_ignored(file_path, file_name); + + // Assess risk level + let risk_level = self.assess_risk(is_ignored, is_tracked, should_be_ignored); + + GitIgnoreStatus { + is_ignored, + matched_pattern, + is_tracked, + should_be_ignored, + risk_level, + } + } + + /// Check if file is tracked by git + fn check_git_tracked(&self, file_path: &Path) -> bool { + use std::process::Command; + + Command::new("git") + .args(&["ls-files", "--error-unmatch"]) + .arg(file_path) + .current_dir(&self.project_root) + .output() + .map(|output| output.status.success()) + .unwrap_or(false) + } + + /// Check if a file should be ignored based on its name/path + fn should_file_be_ignored(&self, file_path: &Path, file_name: &str) -> bool { + // Common secret file patterns + let secret_indicators = [ + ".env", ".key", ".pem", ".p12", ".pfx", + "id_rsa", "id_dsa", "id_ecdsa", "id_ed25519", + "credentials", "secrets", "private" + ]; + + let path_str = file_path.to_string_lossy().to_lowercase(); + let file_name_lower = file_name.to_lowercase(); + + secret_indicators.iter().any(|indicator| { + file_name_lower.contains(indicator) || path_str.contains(indicator) + }) + } + + /// Assess the risk level for a file + fn assess_risk(&self, is_ignored: bool, is_tracked: bool, should_be_ignored: bool) -> GitIgnoreRisk { + match (should_be_ignored, is_ignored, is_tracked) { + // File contains secrets + (true, true, _) => GitIgnoreRisk::Protected, // Ignored (good) + (true, false, true) => GitIgnoreRisk::Tracked, // Not ignored AND tracked (critical) + (true, false, false) => GitIgnoreRisk::Exposed, // Not ignored but not tracked (high risk) + // File doesn't contain secrets (or we think it doesn't) + (false, _, _) => GitIgnoreRisk::Safe, + } + } + + /// Get all files that should be analyzed for secrets + pub fn get_files_to_analyze(&self, extensions: &[&str]) -> Result, std::io::Error> { + let mut files = Vec::new(); + self.collect_files_recursive(&self.project_root, extensions, &mut files)?; + + // Filter files that are definitely ignored + let files_to_analyze: Vec = files.into_iter() + .filter(|file| { + let status = self.analyze_file(file); + // Analyze files that are either: + // 1. Not ignored (need to check if they should be) + // 2. Ignored but we want to verify they don't contain secrets anyway + !status.is_ignored || status.should_be_ignored + }) + .collect(); + + info!("Found {} files to analyze for secrets", files_to_analyze.len()); + Ok(files_to_analyze) + } + + /// Recursively collect files with given extensions + fn collect_files_recursive( + &self, + dir: &Path, + extensions: &[&str], + files: &mut Vec + ) -> Result<(), std::io::Error> { + for entry in fs::read_dir(dir)? { + let entry = entry?; + let path = entry.path(); + + if path.is_dir() { + // Skip obviously ignored directories + if let Some(dir_name) = path.file_name().and_then(|n| n.to_str()) { + if matches!(dir_name, ".git" | "node_modules" | "target" | "build" | "dist" | ".next") { + continue; + } + } + + // Check if directory is ignored + let status = self.analyze_file(&path); + if !status.is_ignored { + self.collect_files_recursive(&path, extensions, files)?; + } + } else if let Some(ext) = path.extension().and_then(|e| e.to_str()) { + if extensions.is_empty() || extensions.contains(&ext) { + files.push(path); + } + } else { + // Files without extensions might still be secret files + files.push(path); + } + } + + Ok(()) + } + + /// Generate recommendations for improving gitignore coverage + pub fn generate_gitignore_recommendations(&self, secret_files: &[PathBuf]) -> Vec { + let mut recommendations = Vec::new(); + let mut patterns_to_add = HashSet::new(); + + for file in secret_files { + let status = self.analyze_file(file); + + if status.risk_level == GitIgnoreRisk::Exposed || status.risk_level == GitIgnoreRisk::Tracked { + if let Some(file_name) = file.file_name().and_then(|n| n.to_str()) { + // Suggest specific patterns + if file_name.starts_with(".env") { + patterns_to_add.insert(".env*".to_string()); + } else if file_name.ends_with(".key") || file_name.ends_with(".pem") { + patterns_to_add.insert("*.key".to_string()); + patterns_to_add.insert("*.pem".to_string()); + } else { + patterns_to_add.insert(file_name.to_string()); + } + } + + if status.risk_level == GitIgnoreRisk::Tracked { + recommendations.push(format!( + "CRITICAL: '{}' contains secrets and is tracked by git! Remove from git history.", + file.display() + )); + } + } + } + + if !patterns_to_add.is_empty() { + recommendations.push("Add these patterns to your .gitignore:".to_string()); + for pattern in patterns_to_add { + recommendations.push(format!(" {}", pattern)); + } + } + + recommendations + } +} + +impl GitIgnoreStatus { + /// Get a human-readable description of the status + pub fn description(&self) -> String { + match self.risk_level { + GitIgnoreRisk::Safe => "File appears safe".to_string(), + GitIgnoreRisk::Protected => format!( + "File contains secrets but is protected (ignored by: {})", + self.matched_pattern.as_deref().unwrap_or("default pattern") + ), + GitIgnoreRisk::Exposed => "File contains secrets but is NOT in .gitignore!".to_string(), + GitIgnoreRisk::Tracked => "CRITICAL: File contains secrets and is tracked by git!".to_string(), + } + } + + /// Get recommended action for this file + pub fn recommended_action(&self) -> String { + match self.risk_level { + GitIgnoreRisk::Safe => "No action needed".to_string(), + GitIgnoreRisk::Protected => "Verify secrets are still necessary".to_string(), + GitIgnoreRisk::Exposed => "Add to .gitignore immediately".to_string(), + GitIgnoreRisk::Tracked => "Remove from git history and add to .gitignore".to_string(), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + #[test] + fn test_gitignore_pattern_parsing() { + let patterns = vec![ + ".env", + "*.log", + "/config.json", + "secrets/", + "!important.env", + ]; + + for pattern_str in patterns { + let pattern = GitIgnoreAnalyzer::parse_pattern(pattern_str, &PathBuf::from(".")); + assert!(pattern.is_ok(), "Failed to parse pattern: {}", pattern_str); + } + } + + #[test] + fn test_pattern_matching() { + let temp_dir = TempDir::new().unwrap(); + let analyzer = GitIgnoreAnalyzer::new(temp_dir.path()).unwrap(); + + // Test exact pattern matching + let env_pattern = GitIgnoreAnalyzer::parse_pattern(".env", &PathBuf::from(".")).unwrap(); + assert!(env_pattern.regex.is_match(".env")); + assert!(env_pattern.regex.is_match("subdir/.env")); + assert!(!env_pattern.regex.is_match("not-env")); + } + + #[test] + fn test_nested_directory_matching() { + let temp_dir = TempDir::new().unwrap(); + let analyzer = GitIgnoreAnalyzer::new(temp_dir.path()).unwrap(); + + // Create a pattern for .env files + let env_pattern = GitIgnoreAnalyzer::parse_pattern(".env*", &PathBuf::from(".")).unwrap(); + + // Test various nested scenarios + let test_paths = [ + ".env", + "secrets/.env", + "config/production/.env.local", + "deeply/nested/folder/.env.production", + ]; + + for path in &test_paths { + assert!(env_pattern.regex.is_match(path), "Pattern should match: {}", path); + } + } +} \ No newline at end of file diff --git a/src/analyzer/security/javascript.rs b/src/analyzer/security/javascript.rs new file mode 100644 index 00000000..2febc26c --- /dev/null +++ b/src/analyzer/security/javascript.rs @@ -0,0 +1,1013 @@ +//! # JavaScript/TypeScript Security Analyzer +//! +//! Specialized security analyzer for JavaScript and TypeScript applications. +//! +//! This analyzer focuses on: +//! - Framework-specific secret patterns (React, Vue, Angular, etc.) +//! - Environment variable misuse +//! - Hardcoded API keys in configuration objects +//! - Client-side secret exposure patterns +//! - Common JS/TS anti-patterns + +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use std::fs; +use regex::Regex; +use log::{debug, info}; + +use super::{SecurityError, SecurityFinding, SecuritySeverity, SecurityCategory, SecurityReport, SecurityAnalysisConfig, GitIgnoreAnalyzer, GitIgnoreRisk}; + +/// JavaScript/TypeScript specific security analyzer +pub struct JavaScriptSecurityAnalyzer { + config: SecurityAnalysisConfig, + js_patterns: Vec, + framework_patterns: HashMap>, + env_var_patterns: Vec, + gitignore_analyzer: Option, +} + +/// JavaScript-specific secret pattern +#[derive(Debug, Clone)] +pub struct JavaScriptSecretPattern { + pub id: String, + pub name: String, + pub pattern: Regex, + pub severity: SecuritySeverity, + pub description: String, + pub context_indicators: Vec, // Code context that increases confidence + pub false_positive_indicators: Vec, // Context that suggests false positive +} + +/// Framework-specific patterns +#[derive(Debug, Clone)] +pub struct FrameworkPattern { + pub pattern: Regex, + pub severity: SecuritySeverity, + pub description: String, + pub file_extensions: Vec, +} + +/// Environment variable patterns +#[derive(Debug, Clone)] +pub struct EnvVarPattern { + pub pattern: Regex, + pub severity: SecuritySeverity, + pub description: String, + pub public_prefixes: Vec, // Prefixes that indicate public env vars +} + +impl JavaScriptSecurityAnalyzer { + pub fn new() -> Result { + Self::with_config(SecurityAnalysisConfig::default()) + } + + pub fn with_config(config: SecurityAnalysisConfig) -> Result { + let js_patterns = Self::initialize_js_patterns()?; + let framework_patterns = Self::initialize_framework_patterns()?; + let env_var_patterns = Self::initialize_env_var_patterns()?; + + Ok(Self { + config, + js_patterns, + framework_patterns, + env_var_patterns, + gitignore_analyzer: None, // Will be initialized in analyze_project + }) + } + + /// Analyze a JavaScript/TypeScript project + pub fn analyze_project(&mut self, project_root: &Path) -> Result { + let mut findings = Vec::new(); + + // Initialize gitignore analyzer for comprehensive file protection assessment + let mut gitignore_analyzer = GitIgnoreAnalyzer::new(project_root) + .map_err(|e| SecurityError::AnalysisFailed(format!("Failed to initialize gitignore analyzer: {}", e)))?; + + info!("πŸ” Using gitignore-aware security analysis for {}", project_root.display()); + + // Get JS/TS files using gitignore-aware collection + let js_extensions = ["js", "jsx", "ts", "tsx", "vue", "svelte"]; + let js_files = gitignore_analyzer.get_files_to_analyze(&js_extensions) + .map_err(|e| SecurityError::Io(e))? + .into_iter() + .filter(|file| { + if let Some(ext) = file.extension().and_then(|e| e.to_str()) { + js_extensions.contains(&ext) + } else { + false + } + }) + .collect::>(); + + info!("Found {} JavaScript/TypeScript files to analyze (gitignore-filtered)", js_files.len()); + + // Analyze each file with gitignore context + for file_path in &js_files { + let gitignore_status = gitignore_analyzer.analyze_file(file_path); + let mut file_findings = self.analyze_js_file(file_path)?; + + // Enhance findings with gitignore risk assessment + for finding in &mut file_findings { + self.enhance_finding_with_gitignore_status(finding, &gitignore_status); + } + + findings.extend(file_findings); + } + + // Analyze package.json and other config files with gitignore awareness + findings.extend(self.analyze_config_files_with_gitignore(project_root, &mut gitignore_analyzer)?); + + // Comprehensive environment file analysis with gitignore risk assessment + findings.extend(self.analyze_env_files_with_gitignore(project_root, &mut gitignore_analyzer)?); + + // Generate gitignore recommendations for any secret files found + let secret_files: Vec = findings.iter() + .filter_map(|f| f.file_path.as_ref()) + .cloned() + .collect(); + + let gitignore_recommendations = gitignore_analyzer.generate_gitignore_recommendations(&secret_files); + + // Create report with enhanced recommendations + let mut report = SecurityReport::from_findings(findings); + report.recommendations.extend(gitignore_recommendations); + + Ok(report) + } + + /// Initialize JavaScript-specific secret patterns + fn initialize_js_patterns() -> Result, SecurityError> { + let patterns = vec![ + // Firebase config object + JavaScriptSecretPattern { + id: "js-firebase-config".to_string(), + name: "Firebase Configuration Object".to_string(), + pattern: Regex::new(r#"(?i)(?:const\s+|let\s+|var\s+)?firebaseConfig\s*[=:]\s*\{[^}]*apiKey\s*:\s*["']([^"']+)["'][^}]*\}"#)?, + severity: SecuritySeverity::Medium, + description: "Firebase configuration object with API key detected".to_string(), + context_indicators: vec!["initializeApp".to_string(), "firebase".to_string()], + false_positive_indicators: vec!["example".to_string(), "placeholder".to_string(), "your-api-key".to_string()], + }, + + // Stripe publishable key (less sensitive but should be noted) + JavaScriptSecretPattern { + id: "js-stripe-public-key".to_string(), + name: "Stripe Publishable Key".to_string(), + pattern: Regex::new(r#"(?i)pk_(?:test_|live_)[a-zA-Z0-9]{24,}"#)?, + severity: SecuritySeverity::Low, + description: "Stripe publishable key detected (public but should be environment variable)".to_string(), + context_indicators: vec!["stripe".to_string(), "payment".to_string()], + false_positive_indicators: vec![], + }, + + // Supabase anon key + JavaScriptSecretPattern { + id: "js-supabase-anon-key".to_string(), + name: "Supabase Anonymous Key".to_string(), + pattern: Regex::new(r#"(?i)(?:supabase|anon).*?["\']eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+["\']"#)?, + severity: SecuritySeverity::Medium, + description: "Supabase anonymous key detected".to_string(), + context_indicators: vec!["supabase".to_string(), "createClient".to_string()], + false_positive_indicators: vec!["example".to_string(), "placeholder".to_string()], + }, + + // Auth0 configuration + JavaScriptSecretPattern { + id: "js-auth0-config".to_string(), + name: "Auth0 Configuration".to_string(), + pattern: Regex::new(r#"(?i)(?:domain|clientId)\s*:\s*["']([a-zA-Z0-9.-]+\.auth0\.com|[a-zA-Z0-9]{32})["']"#)?, + severity: SecuritySeverity::Medium, + description: "Auth0 configuration detected".to_string(), + context_indicators: vec!["auth0".to_string(), "webAuth".to_string()], + false_positive_indicators: vec!["example".to_string(), "your-domain".to_string()], + }, + + // Process.env hardcoded values + JavaScriptSecretPattern { + id: "js-hardcoded-env".to_string(), + name: "Hardcoded process.env Assignment".to_string(), + pattern: Regex::new(r#"process\.env\.[A-Z_]+\s*=\s*["']([^"']+)["']"#)?, + severity: SecuritySeverity::High, + description: "Hardcoded assignment to process.env detected".to_string(), + context_indicators: vec![], + false_positive_indicators: vec!["development".to_string(), "test".to_string()], + }, + + // Clerk keys + JavaScriptSecretPattern { + id: "js-clerk-key".to_string(), + name: "Clerk API Key".to_string(), + pattern: Regex::new(r#"(?i)(?:clerk|pk_test_|pk_live_)[a-zA-Z0-9_-]{20,}"#)?, + severity: SecuritySeverity::Medium, + description: "Clerk API key detected".to_string(), + context_indicators: vec!["clerk".to_string(), "ClerkProvider".to_string()], + false_positive_indicators: vec![], + }, + + // Generic API key in object assignment + JavaScriptSecretPattern { + id: "js-api-key-object".to_string(), + name: "API Key in Object Assignment".to_string(), + pattern: Regex::new(r#"(?i)(?:apiKey|api_key|clientSecret|client_secret|accessToken|access_token|secretKey|secret_key)\s*:\s*["']([A-Za-z0-9_-]{20,})["']"#)?, + severity: SecuritySeverity::High, + description: "API key or secret assigned in object literal".to_string(), + context_indicators: vec!["fetch".to_string(), "axios".to_string(), "headers".to_string()], + false_positive_indicators: vec!["process.env".to_string(), "import.meta.env".to_string(), "placeholder".to_string()], + }, + + // Bearer tokens in fetch headers + JavaScriptSecretPattern { + id: "js-bearer-token".to_string(), + name: "Bearer Token in Code".to_string(), + pattern: Regex::new(r#"(?i)(?:authorization|bearer)\s*:\s*["'](?:bearer\s+)?([A-Za-z0-9_-]{20,})["']"#)?, + severity: SecuritySeverity::Critical, + description: "Bearer token hardcoded in authorization header".to_string(), + context_indicators: vec!["fetch".to_string(), "axios".to_string(), "headers".to_string()], + false_positive_indicators: vec!["${".to_string(), "process.env".to_string(), "import.meta.env".to_string()], + }, + + // Database connection strings + JavaScriptSecretPattern { + id: "js-database-url".to_string(), + name: "Database Connection URL".to_string(), + pattern: Regex::new(r#"(?i)(?:mongodb|postgres|mysql)://[^"'\s]+:[^"'\s]+@[^"'\s]+"#)?, + severity: SecuritySeverity::Critical, + description: "Database connection string with credentials detected".to_string(), + context_indicators: vec!["connect".to_string(), "mongoose".to_string(), "client".to_string()], + false_positive_indicators: vec!["localhost".to_string(), "example.com".to_string()], + }, + ]; + + Ok(patterns) + } + + /// Initialize framework-specific patterns + fn initialize_framework_patterns() -> Result>, SecurityError> { + let mut frameworks = HashMap::new(); + + // React patterns + frameworks.insert("react".to_string(), vec![ + FrameworkPattern { + pattern: Regex::new(r#"(?i)react_app_[a-z_]+\s*=\s*["']([^"']+)["']"#)?, + severity: SecuritySeverity::Medium, + description: "React environment variable potentially exposed in build".to_string(), + file_extensions: vec!["js".to_string(), "jsx".to_string(), "ts".to_string(), "tsx".to_string()], + }, + ]); + + // Next.js patterns + frameworks.insert("nextjs".to_string(), vec![ + FrameworkPattern { + pattern: Regex::new(r#"(?i)next_public_[a-z_]+\s*=\s*["']([^"']+)["']"#)?, + severity: SecuritySeverity::Low, + description: "Next.js public environment variable (ensure it should be public)".to_string(), + file_extensions: vec!["js".to_string(), "jsx".to_string(), "ts".to_string(), "tsx".to_string()], + }, + ]); + + // Vite patterns + frameworks.insert("vite".to_string(), vec![ + FrameworkPattern { + pattern: Regex::new(r#"(?i)vite_[a-z_]+\s*=\s*["']([^"']+)["']"#)?, + severity: SecuritySeverity::Medium, + description: "Vite environment variable potentially exposed in build".to_string(), + file_extensions: vec!["js".to_string(), "jsx".to_string(), "ts".to_string(), "tsx".to_string(), "vue".to_string()], + }, + ]); + + Ok(frameworks) + } + + /// Initialize environment variable patterns + fn initialize_env_var_patterns() -> Result, SecurityError> { + let patterns = vec![ + EnvVarPattern { + pattern: Regex::new(r#"process\.env\.([A-Z_]+)"#)?, + severity: SecuritySeverity::Info, + description: "Environment variable usage detected".to_string(), + public_prefixes: vec![ + "REACT_APP_".to_string(), + "NEXT_PUBLIC_".to_string(), + "VITE_".to_string(), + "VUE_APP_".to_string(), + "EXPO_PUBLIC_".to_string(), + "NUXT_PUBLIC_".to_string(), + ], + }, + EnvVarPattern { + pattern: Regex::new(r#"import\.meta\.env\.([A-Z_]+)"#)?, + severity: SecuritySeverity::Info, + description: "Vite environment variable usage detected".to_string(), + public_prefixes: vec!["VITE_".to_string()], + }, + ]; + + Ok(patterns) + } + + /// Collect all JavaScript/TypeScript files + fn collect_js_files(&self, project_root: &Path) -> Result, SecurityError> { + let extensions = ["js", "jsx", "ts", "tsx", "vue", "svelte"]; + let mut files = Vec::new(); + + fn collect_recursive(dir: &Path, extensions: &[&str], files: &mut Vec) -> Result<(), std::io::Error> { + for entry in fs::read_dir(dir)? { + let entry = entry?; + let path = entry.path(); + + if path.is_dir() { + // Skip common build/dependency directories + if let Some(dir_name) = path.file_name().and_then(|n| n.to_str()) { + if matches!(dir_name, "node_modules" | ".git" | "build" | "dist" | ".next" | "coverage") { + continue; + } + } + collect_recursive(&path, extensions, files)?; + } else if let Some(ext) = path.extension().and_then(|e| e.to_str()) { + if extensions.contains(&ext) { + files.push(path); + } + } + } + Ok(()) + } + + collect_recursive(project_root, &extensions, &mut files)?; + Ok(files) + } + + /// Analyze a single JavaScript/TypeScript file + fn analyze_js_file(&self, file_path: &Path) -> Result, SecurityError> { + let content = fs::read_to_string(file_path)?; + let mut findings = Vec::new(); + + // Check against JavaScript-specific patterns + for pattern in &self.js_patterns { + findings.extend(self.check_pattern_in_content(&content, pattern, file_path)?); + } + + // Check environment variable usage + findings.extend(self.check_env_var_usage(&content, file_path)?); + + Ok(findings) + } + + /// Check a specific pattern in file content + fn check_pattern_in_content( + &self, + content: &str, + pattern: &JavaScriptSecretPattern, + file_path: &Path, + ) -> Result, SecurityError> { + let mut findings = Vec::new(); + + for (line_num, line) in content.lines().enumerate() { + if let Some(captures) = pattern.pattern.captures(line) { + // Check for false positive indicators + if pattern.false_positive_indicators.iter().any(|indicator| { + line.to_lowercase().contains(&indicator.to_lowercase()) + }) { + debug!("Skipping potential false positive in {}: {}", file_path.display(), line.trim()); + continue; + } + + // Extract the secret value and position if captured + let (evidence, column_number) = if captures.len() > 1 { + if let Some(match_) = captures.get(1) { + (Some(match_.as_str().to_string()), Some(match_.start() + 1)) + } else { + (Some(line.trim().to_string()), None) + } + } else { + // For patterns without capture groups, use the full match + if let Some(match_) = captures.get(0) { + (Some(line.trim().to_string()), Some(match_.start() + 1)) + } else { + (Some(line.trim().to_string()), None) + } + }; + + // Check context for confidence scoring + let context_score = self.calculate_context_confidence(content, &pattern.context_indicators); + let adjusted_severity = self.adjust_severity_by_context(pattern.severity.clone(), context_score); + + findings.push(SecurityFinding { + id: format!("{}-{}", pattern.id, line_num), + title: format!("{} Detected", pattern.name), + description: format!("{} (Context confidence: {:.1})", pattern.description, context_score), + severity: adjusted_severity, + category: SecurityCategory::SecretsExposure, + file_path: Some(file_path.to_path_buf()), + line_number: Some(line_num + 1), + column_number, + evidence, + remediation: self.generate_js_remediation(&pattern.id), + references: vec![ + "https://owasp.org/www-project-top-ten/2021/A05_2021-Security_Misconfiguration/".to_string(), + "https://cheatsheetseries.owasp.org/cheatsheets/Secrets_Management_Cheat_Sheet.html".to_string(), + ], + cwe_id: Some("CWE-200".to_string()), + compliance_frameworks: vec!["SOC2".to_string(), "GDPR".to_string()], + }); + } + } + + Ok(findings) + } + + /// Check environment variable usage patterns with context-aware detection + fn check_env_var_usage(&self, content: &str, file_path: &Path) -> Result, SecurityError> { + let mut findings = Vec::new(); + + // Determine if this is likely server-side or client-side code + let is_server_side = self.is_server_side_file(file_path, content); + + for pattern in &self.env_var_patterns { + for (line_num, line) in content.lines().enumerate() { + if let Some(captures) = pattern.pattern.captures(line) { + if let Some(var_name) = captures.get(1) { + let var_name = var_name.as_str(); + + // Check if this is a public environment variable + let is_public = pattern.public_prefixes.iter().any(|prefix| var_name.starts_with(prefix)); + + // Context-aware detection: Only flag as problematic if: + // 1. It's a sensitive variable AND + // 2. It's in client-side code AND + // 3. It doesn't have a public prefix + if !is_public && self.is_sensitive_var_name(var_name) && !is_server_side { + // Extract column position from the pattern match + let column_number = captures.get(0) + .map(|m| m.start() + 1); + + findings.push(SecurityFinding { + id: format!("js-env-sensitive-{}", line_num), + title: "Sensitive Environment Variable in Client Code".to_string(), + description: format!("Environment variable '{}' appears sensitive and may be exposed to client in browser code", var_name), + severity: SecuritySeverity::High, + category: SecurityCategory::SecretsExposure, + file_path: Some(file_path.to_path_buf()), + line_number: Some(line_num + 1), + column_number, + evidence: Some(line.trim().to_string()), + remediation: vec![ + "Move sensitive environment variables to server-side code".to_string(), + "Use public environment variable prefixes only for non-sensitive data".to_string(), + "Consider using a backend API endpoint to handle sensitive operations".to_string(), + ], + references: vec![ + "https://nextjs.org/docs/basic-features/environment-variables".to_string(), + "https://vitejs.dev/guide/env-and-mode.html".to_string(), + ], + cwe_id: Some("CWE-200".to_string()), + compliance_frameworks: vec!["SOC2".to_string()], + }); + } + // For server-side code using environment variables, this is GOOD practice - don't flag it + } + } + } + } + + Ok(findings) + } + + /// Analyze configuration files (package.json, etc.) + fn analyze_config_files(&self, project_root: &Path) -> Result, SecurityError> { + let mut findings = Vec::new(); + + // Check package.json for exposed scripts or configs + let package_json = project_root.join("package.json"); + if package_json.exists() { + findings.extend(self.analyze_package_json(&package_json)?); + } + + Ok(findings) + } + + /// Analyze package.json for security issues + fn analyze_package_json(&self, package_json: &Path) -> Result, SecurityError> { + let mut findings = Vec::new(); + let content = fs::read_to_string(package_json)?; + + // Look for hardcoded secrets in scripts or config + if content.contains("REACT_APP_") || content.contains("NEXT_PUBLIC_") || content.contains("VITE_") { + for (line_num, line) in content.lines().enumerate() { + if line.contains("sk_") || line.contains("pk_live_") || line.contains("eyJ") { + findings.push(SecurityFinding { + id: format!("package-json-secret-{}", line_num), + title: "Potential Secret in package.json".to_string(), + description: "Potential API key or token found in package.json".to_string(), + severity: SecuritySeverity::High, + category: SecurityCategory::SecretsExposure, + file_path: Some(package_json.to_path_buf()), + line_number: Some(line_num + 1), + column_number: None, + evidence: Some(line.trim().to_string()), + remediation: vec![ + "Remove secrets from package.json".to_string(), + "Use environment variables instead".to_string(), + "Add package.json to .gitignore if it contains secrets (not recommended)".to_string(), + ], + references: vec![ + "https://docs.npmjs.com/cli/v8/configuring-npm/package-json".to_string(), + ], + cwe_id: Some("CWE-200".to_string()), + compliance_frameworks: vec!["SOC2".to_string()], + }); + } + } + } + + Ok(findings) + } + + /// Analyze environment files + fn analyze_env_files(&self, project_root: &Path) -> Result, SecurityError> { + let mut findings = Vec::new(); + + // Check for .env files that might be accidentally committed + let env_files = [".env", ".env.local", ".env.production", ".env.development"]; + + for env_file in &env_files { + // Skip template/example files + if self.is_template_file(env_file) { + debug!("Skipping template env file: {}", env_file); + continue; + } + + let env_path = project_root.join(env_file); + if env_path.exists() { + // Check if this file should be tracked by git + findings.push(SecurityFinding { + id: format!("env-file-{}", env_file.replace('.', "-")), + title: "Environment File Detected".to_string(), + description: format!("Environment file '{}' found - ensure it's properly protected", env_file), + severity: SecuritySeverity::Medium, + category: SecurityCategory::SecretsExposure, + file_path: Some(env_path), + line_number: None, + column_number: None, + evidence: None, + remediation: vec![ + "Ensure environment files are in .gitignore".to_string(), + "Use .env.example files for documentation".to_string(), + "Never commit actual environment files to version control".to_string(), + ], + references: vec![ + "https://github.com/motdotla/dotenv#should-i-commit-my-env-file".to_string(), + ], + cwe_id: Some("CWE-200".to_string()), + compliance_frameworks: vec!["SOC2".to_string()], + }); + } + } + + Ok(findings) + } + + /// Calculate confidence score based on context indicators + fn calculate_context_confidence(&self, content: &str, indicators: &[String]) -> f32 { + let total_indicators = indicators.len() as f32; + if total_indicators == 0.0 { + return 0.5; // Neutral confidence + } + + let found_indicators = indicators.iter() + .filter(|indicator| content.to_lowercase().contains(&indicator.to_lowercase())) + .count() as f32; + + found_indicators / total_indicators + } + + /// Adjust severity based on context confidence + fn adjust_severity_by_context(&self, base_severity: SecuritySeverity, confidence: f32) -> SecuritySeverity { + match base_severity { + SecuritySeverity::Critical => base_severity, // Keep critical as-is + SecuritySeverity::High => { + if confidence < 0.3 { + SecuritySeverity::Medium + } else { + base_severity + } + } + SecuritySeverity::Medium => { + if confidence > 0.7 { + SecuritySeverity::High + } else if confidence < 0.3 { + SecuritySeverity::Low + } else { + base_severity + } + } + _ => base_severity, + } + } + + /// Check if a variable name appears sensitive + fn is_sensitive_var_name(&self, var_name: &str) -> bool { + let sensitive_keywords = [ + "SECRET", "KEY", "TOKEN", "PASSWORD", "PASS", "AUTH", "API", + "PRIVATE", "CREDENTIAL", "CERT", "SSL", "TLS", "OAUTH", + "CLIENT_SECRET", "ACCESS_TOKEN", "REFRESH_TOKEN", + ]; + + let var_upper = var_name.to_uppercase(); + sensitive_keywords.iter().any(|keyword| var_upper.contains(keyword)) + } + + /// Determine if a JavaScript file is likely server-side or client-side + fn is_server_side_file(&self, file_path: &Path, content: &str) -> bool { + // Check file path indicators + let path_str = file_path.to_string_lossy().to_lowercase(); + let server_path_indicators = [ + "/server/", "/backend/", "/api/", "/routes/", "/controllers/", + "/middleware/", "/models/", "/services/", "/utils/", "/lib/", + "server.js", "server.ts", "index.js", "index.ts", "app.js", "app.ts", + "/pages/api/", "/app/api/", // Next.js API routes + "server-side", "backend", "node_modules", // Clear server indicators + ]; + + let client_path_indicators = [ + "/client/", "/frontend/", "/public/", "/static/", "/assets/", + "/components/", "/views/", "/pages/", "/src/components/", + "client.js", "client.ts", "main.js", "main.ts", "app.tsx", "index.html", + ]; + + // Strong server-side path indicators + if server_path_indicators.iter().any(|indicator| path_str.contains(indicator)) { + return true; + } + + // Strong client-side path indicators + if client_path_indicators.iter().any(|indicator| path_str.contains(indicator)) { + return false; + } + + // Check content for server-side indicators + let server_content_indicators = [ + "require(", "module.exports", "exports.", "__dirname", "__filename", + "process.env", "process.exit", "process.argv", "fs.readFile", "fs.writeFile", + "http.createServer", "express(", "app.listen", "app.use", "app.get", "app.post", + "import express", "import fs", "import path", "import http", "import https", + "cors(", "bodyParser", "middleware", "mongoose.connect", "sequelize", + "jwt.sign", "bcrypt", "crypto.createHash", "nodemailer", "socket.io", + "console.log", // While not exclusive, very common in server code + ]; + + let client_content_indicators = [ + "document.", "window.", "navigator.", "localStorage", "sessionStorage", + "addEventListener", "querySelector", "getElementById", "fetch(", + "XMLHttpRequest", "React.", "ReactDOM", "useState", "useEffect", + "Vue.", "Angular", "svelte", "alert(", "confirm(", "prompt(", + "location.href", "history.push", "router.push", "browser", + ]; + + let server_matches = server_content_indicators.iter() + .filter(|&indicator| content.contains(indicator)) + .count(); + + let client_matches = client_content_indicators.iter() + .filter(|&indicator| content.contains(indicator)) + .count(); + + // If we have server indicators and no clear client indicators, assume server-side + if server_matches > 0 && client_matches == 0 { + return true; + } + + // If we have client indicators and no server indicators, assume client-side + if client_matches > 0 && server_matches == 0 { + return false; + } + + // If mixed or unclear, use a heuristic + if server_matches > client_matches { + return true; + } + + // Default to client-side for mixed/unclear files (safer for security) + false + } + + /// Generate JavaScript-specific remediation advice + fn generate_js_remediation(&self, pattern_id: &str) -> Vec { + match pattern_id { + id if id.contains("firebase") => vec![ + "Move Firebase configuration to environment variables".to_string(), + "Use Firebase App Check for additional security".to_string(), + "Implement proper Firebase security rules".to_string(), + ], + id if id.contains("stripe") => vec![ + "Use environment variables for Stripe keys".to_string(), + "Ensure you're using publishable keys in client-side code".to_string(), + "Keep secret keys on the server side only".to_string(), + ], + id if id.contains("bearer") => vec![ + "Never hardcode bearer tokens in client-side code".to_string(), + "Use secure token storage mechanisms".to_string(), + "Implement token refresh flows".to_string(), + ], + _ => vec![ + "Move secrets to environment variables".to_string(), + "Use server-side API routes for sensitive operations".to_string(), + "Implement proper secret management practices".to_string(), + ], + } + } + + /// Enhance a security finding with gitignore risk assessment + fn enhance_finding_with_gitignore_status( + &self, + finding: &mut SecurityFinding, + gitignore_status: &super::gitignore::GitIgnoreStatus, + ) { + // Adjust severity based on gitignore risk + finding.severity = match gitignore_status.risk_level { + GitIgnoreRisk::Tracked => SecuritySeverity::Critical, // Always critical if tracked + GitIgnoreRisk::Exposed => { + // Upgrade severity if exposed + match &finding.severity { + SecuritySeverity::Medium => SecuritySeverity::High, + SecuritySeverity::Low => SecuritySeverity::Medium, + other => other.clone(), + } + } + GitIgnoreRisk::Protected => { + // Downgrade slightly if protected + match &finding.severity { + SecuritySeverity::Critical => SecuritySeverity::High, + SecuritySeverity::High => SecuritySeverity::Medium, + other => other.clone(), + } + } + GitIgnoreRisk::Safe => finding.severity.clone(), + }; + + // Add gitignore context to description + finding.description.push_str(&format!(" (GitIgnore: {})", gitignore_status.description())); + + // Add gitignore-specific remediation + let gitignore_action = gitignore_status.recommended_action(); + if gitignore_action != "No action needed" { + finding.remediation.insert(0, format!("πŸ”’ GitIgnore: {}", gitignore_action)); + } + + // Add git history warning for tracked files + if gitignore_status.risk_level == GitIgnoreRisk::Tracked { + finding.remediation.insert(1, "⚠️ CRITICAL: Remove this file from git history using git-filter-branch or BFG Repo-Cleaner".to_string()); + finding.remediation.insert(2, "πŸ”‘ Rotate any exposed secrets immediately".to_string()); + } + } + + /// Analyze configuration files with gitignore awareness + fn analyze_config_files_with_gitignore( + &self, + project_root: &Path, + gitignore_analyzer: &mut GitIgnoreAnalyzer, + ) -> Result, SecurityError> { + let mut findings = Vec::new(); + + // Check package.json with gitignore assessment + let package_json = project_root.join("package.json"); + if package_json.exists() { + let gitignore_status = gitignore_analyzer.analyze_file(&package_json); + let mut package_findings = self.analyze_package_json(&package_json)?; + + // Enhance findings with gitignore context + for finding in &mut package_findings { + self.enhance_finding_with_gitignore_status(finding, &gitignore_status); + } + + findings.extend(package_findings); + } + + // Check other common config files + let config_files = [ + "tsconfig.json", + "vite.config.js", + "vite.config.ts", + "next.config.js", + "next.config.ts", + "nuxt.config.js", + "nuxt.config.ts", + // Note: .env.example is now excluded as it's a template file + ]; + + for config_file in &config_files { + // Skip template/example files + if self.is_template_file(config_file) { + debug!("Skipping template config file: {}", config_file); + continue; + } + + let config_path = project_root.join(config_file); + if config_path.exists() { + let gitignore_status = gitignore_analyzer.analyze_file(&config_path); + + // Only analyze if file contains potential secrets or is not properly protected + if gitignore_status.should_be_ignored || !gitignore_status.is_ignored { + if let Ok(content) = fs::read_to_string(&config_path) { + // Basic secret pattern check for config files + if self.contains_potential_secrets(&content) { + let mut finding = SecurityFinding { + id: format!("config-file-{}", config_file.replace('.', "-")), + title: "Potential Secrets in Configuration File".to_string(), + description: format!("Configuration file '{}' may contain secrets", config_file), + severity: SecuritySeverity::Medium, + category: SecurityCategory::SecretsExposure, + file_path: Some(config_path.clone()), + line_number: None, + column_number: None, + evidence: None, + remediation: vec![ + "Review configuration file for hardcoded secrets".to_string(), + "Use environment variables for sensitive configuration".to_string(), + ], + references: vec![], + cwe_id: Some("CWE-200".to_string()), + compliance_frameworks: vec!["SOC2".to_string()], + }; + + self.enhance_finding_with_gitignore_status(&mut finding, &gitignore_status); + findings.push(finding); + } + } + } + } + } + + Ok(findings) + } + + /// Check if a file is a template/example file that should be excluded from security alerts + fn is_template_file(&self, file_name: &str) -> bool { + let template_indicators = [ + "sample", "example", "template", "template.env", "env.template", + "sample.env", "env.sample", "example.env", "env.example", + "examples", "samples", "templates", "demo", "test", + ".env.sample", ".env.example", ".env.template", ".env.demo", ".env.test" + ]; + + let file_name_lower = file_name.to_lowercase(); + + // Check for exact matches or contains patterns + template_indicators.iter().any(|indicator| { + file_name_lower == *indicator || + file_name_lower.contains(indicator) || + file_name_lower.ends_with(indicator) + }) + } + + /// Analyze environment files with comprehensive gitignore risk assessment + fn analyze_env_files_with_gitignore( + &self, + project_root: &Path, + gitignore_analyzer: &mut GitIgnoreAnalyzer, + ) -> Result, SecurityError> { + let mut findings = Vec::new(); + + // Get all potential environment files using gitignore analyzer + let env_files = gitignore_analyzer.get_files_to_analyze(&[]) + .map_err(|e| SecurityError::Io(e))? + .into_iter() + .filter(|file| { + if let Some(file_name) = file.file_name().and_then(|n| n.to_str()) { + // Exclude template/example files from security alerts + if self.is_template_file(file_name) { + debug!("Skipping template file: {}", file_name); + return false; + } + + file_name.starts_with(".env") || + file_name.contains("credentials") || + file_name.contains("secrets") || + file_name.contains("config") || + file_name.ends_with(".key") || + file_name.ends_with(".pem") + } else { + false + } + }) + .collect::>(); + + for env_file in env_files { + let gitignore_status = gitignore_analyzer.analyze_file(&env_file); + let relative_path = env_file.strip_prefix(project_root) + .unwrap_or(&env_file); + + // Create finding based on gitignore risk assessment + let (severity, title, description) = match gitignore_status.risk_level { + GitIgnoreRisk::Tracked => ( + SecuritySeverity::Critical, + "Secret File Tracked by Git".to_string(), + format!("Secret file '{}' is tracked by git and may expose credentials in version history", relative_path.display()), + ), + GitIgnoreRisk::Exposed => ( + SecuritySeverity::High, + "Secret File Not in GitIgnore".to_string(), + format!("Secret file '{}' exists but is not protected by .gitignore", relative_path.display()), + ), + GitIgnoreRisk::Protected => ( + SecuritySeverity::Info, + "Secret File Properly Protected".to_string(), + format!("Secret file '{}' is properly ignored but detected for verification", relative_path.display()), + ), + GitIgnoreRisk::Safe => continue, // Skip files that appear safe + }; + + let mut finding = SecurityFinding { + id: format!("env-file-{}", relative_path.to_string_lossy().replace('/', "-").replace('.', "-")), + title, + description, + severity, + category: SecurityCategory::SecretsExposure, + file_path: Some(env_file.clone()), + line_number: None, + column_number: None, + evidence: None, + remediation: vec![ + "Ensure sensitive files are in .gitignore".to_string(), + "Use .env.example files for documentation".to_string(), + "Never commit actual environment files to version control".to_string(), + ], + references: vec![ + "https://github.com/motdotla/dotenv#should-i-commit-my-env-file".to_string(), + ], + cwe_id: Some("CWE-200".to_string()), + compliance_frameworks: vec!["SOC2".to_string()], + }; + + self.enhance_finding_with_gitignore_status(&mut finding, &gitignore_status); + findings.push(finding); + } + + Ok(findings) + } + + /// Check if content contains potential secrets (basic patterns) + fn contains_potential_secrets(&self, content: &str) -> bool { + let secret_indicators = [ + "sk_", "pk_live_", "eyJ", "AKIA", "-----BEGIN", + "client_secret", "api_key", "access_token", + "private_key", "secret_key", "bearer", + ]; + + let content_lower = content.to_lowercase(); + secret_indicators.iter().any(|indicator| content_lower.contains(&indicator.to_lowercase())) + } +} + +impl SecurityReport { + /// Create a security report from a list of findings + pub fn from_findings(findings: Vec) -> Self { + let total_findings = findings.len(); + let mut findings_by_severity = HashMap::new(); + let mut findings_by_category = HashMap::new(); + + for finding in &findings { + *findings_by_severity.entry(finding.severity.clone()).or_insert(0) += 1; + *findings_by_category.entry(finding.category.clone()).or_insert(0) += 1; + } + + // Calculate overall score (simple implementation) + let score_penalty = findings.iter().map(|f| match f.severity { + SecuritySeverity::Critical => 25.0, + SecuritySeverity::High => 15.0, + SecuritySeverity::Medium => 8.0, + SecuritySeverity::Low => 3.0, + SecuritySeverity::Info => 1.0, + }).sum::(); + + let overall_score = (100.0 - score_penalty).max(0.0); + + // Determine risk level + let risk_level = if findings.iter().any(|f| f.severity == SecuritySeverity::Critical) { + SecuritySeverity::Critical + } else if findings.iter().any(|f| f.severity == SecuritySeverity::High) { + SecuritySeverity::High + } else if findings.iter().any(|f| f.severity == SecuritySeverity::Medium) { + SecuritySeverity::Medium + } else if !findings.is_empty() { + SecuritySeverity::Low + } else { + SecuritySeverity::Info + }; + + Self { + analyzed_at: chrono::Utc::now(), + overall_score, + risk_level, + total_findings, + findings_by_severity, + findings_by_category, + findings, + recommendations: vec![ + "Review all detected secrets and move them to environment variables".to_string(), + "Implement proper secret management practices".to_string(), + "Use framework-specific environment variable patterns correctly".to_string(), + ], + compliance_status: HashMap::new(), + } + } +} \ No newline at end of file diff --git a/src/analyzer/security/mod.rs b/src/analyzer/security/mod.rs new file mode 100644 index 00000000..d56cbab6 --- /dev/null +++ b/src/analyzer/security/mod.rs @@ -0,0 +1,77 @@ +//! # Security Analysis Module +//! +//! Modular security analysis with language-specific analyzers for better threat detection. +//! +//! This module provides a layered approach to security analysis: +//! - Core security patterns (generic) +//! - Language-specific analyzers (JS/TS, Python, etc.) +//! - Framework-specific detection +//! - Context-aware severity assessment + +use std::path::Path; +use thiserror::Error; + +pub mod core; +pub mod javascript; +pub mod patterns; +pub mod config; +pub mod gitignore; + +pub use core::{SecurityAnalyzer, SecurityReport, SecurityFinding, SecuritySeverity, SecurityCategory}; +pub use javascript::JavaScriptSecurityAnalyzer; +pub use patterns::SecretPatternManager; +pub use config::SecurityAnalysisConfig; +pub use gitignore::{GitIgnoreAnalyzer, GitIgnoreStatus, GitIgnoreRisk}; + +/// Modular security analyzer that delegates to language-specific analyzers +pub struct ModularSecurityAnalyzer { + javascript_analyzer: JavaScriptSecurityAnalyzer, + // TODO: Add other language analyzers + // python_analyzer: PythonSecurityAnalyzer, + // rust_analyzer: RustSecurityAnalyzer, +} + +impl ModularSecurityAnalyzer { + pub fn new() -> Result { + Ok(Self { + javascript_analyzer: JavaScriptSecurityAnalyzer::new()?, + }) + } + + pub fn with_config(config: SecurityAnalysisConfig) -> Result { + Ok(Self { + javascript_analyzer: JavaScriptSecurityAnalyzer::with_config(config.clone())?, + }) + } + + /// Analyze a project with appropriate language-specific analyzers + pub fn analyze_project(&mut self, project_root: &Path, languages: &[crate::analyzer::DetectedLanguage]) -> Result { + let mut all_findings = Vec::new(); + + // Analyze JavaScript/TypeScript files + if languages.iter().any(|lang| matches!(lang.name.as_str(), "JavaScript" | "TypeScript" | "JSX" | "TSX")) { + let js_report = self.javascript_analyzer.analyze_project(project_root)?; + all_findings.extend(js_report.findings); + } + + // TODO: Add other language analyzers based on detected languages + + // Combine results into a comprehensive report + Ok(SecurityReport::from_findings(all_findings)) + } +} + +#[derive(Debug, Error)] +pub enum SecurityError { + #[error("Security analysis failed: {0}")] + AnalysisFailed(String), + + #[error("Pattern compilation error: {0}")] + PatternError(#[from] regex::Error), + + #[error("IO error: {0}")] + Io(#[from] std::io::Error), + + #[error("JavaScript security analysis error: {0}")] + JavaScriptError(String), +} \ No newline at end of file diff --git a/src/analyzer/security/patterns.rs b/src/analyzer/security/patterns.rs new file mode 100644 index 00000000..8a00258a --- /dev/null +++ b/src/analyzer/security/patterns.rs @@ -0,0 +1,377 @@ +//! # Security Pattern Management +//! +//! Centralized management of security patterns for different tools and services. + +use std::collections::HashMap; +use regex::Regex; + +use super::{SecuritySeverity, SecurityCategory}; + +/// Manager for organizing security patterns by tool/service +pub struct SecretPatternManager { + patterns_by_tool: HashMap>, + generic_patterns: Vec, +} + +/// Tool-specific pattern (e.g., Firebase, Stripe, etc.) +#[derive(Debug, Clone)] +pub struct ToolPattern { + pub tool_name: String, + pub pattern_type: String, // e.g., "api_key", "config_object", "token" + pub pattern: Regex, + pub severity: SecuritySeverity, + pub description: String, + pub public_safe: bool, // Whether this type of key is safe to expose publicly + pub context_keywords: Vec, // Keywords that increase confidence + pub false_positive_keywords: Vec, // Keywords that suggest false positive +} + +/// Generic patterns that apply across tools +#[derive(Debug, Clone)] +pub struct GenericPattern { + pub id: String, + pub name: String, + pub pattern: Regex, + pub severity: SecuritySeverity, + pub category: SecurityCategory, + pub description: String, +} + +impl SecretPatternManager { + pub fn new() -> Result { + let patterns_by_tool = Self::initialize_tool_patterns()?; + let generic_patterns = Self::initialize_generic_patterns()?; + + Ok(Self { + patterns_by_tool, + generic_patterns, + }) + } + + /// Initialize patterns for specific tools/services + fn initialize_tool_patterns() -> Result>, regex::Error> { + let mut patterns = HashMap::new(); + + // Firebase patterns + patterns.insert("firebase".to_string(), vec![ + ToolPattern { + tool_name: "Firebase".to_string(), + pattern_type: "api_key".to_string(), + pattern: Regex::new(r#"(?i)(?:firebase.*)?apiKey\s*[:=]\s*["']([A-Za-z0-9_-]{39})["']"#)?, + severity: SecuritySeverity::Medium, // Firebase API keys are safe to expose + description: "Firebase API key (safe to expose publicly)".to_string(), + public_safe: true, + context_keywords: vec!["firebase".to_string(), "initializeApp".to_string(), "getApps".to_string()], + false_positive_keywords: vec!["example".to_string(), "placeholder".to_string(), "your-api-key".to_string()], + }, + ToolPattern { + tool_name: "Firebase".to_string(), + pattern_type: "service_account".to_string(), + pattern: Regex::new(r#"(?i)(?:type|client_email|private_key).*firebase.*service_account"#)?, + severity: SecuritySeverity::Critical, + description: "Firebase service account credentials (CRITICAL - never expose)".to_string(), + public_safe: false, + context_keywords: vec!["service_account".to_string(), "private_key".to_string(), "client_email".to_string()], + false_positive_keywords: vec![], + }, + ]); + + // Stripe patterns + patterns.insert("stripe".to_string(), vec![ + ToolPattern { + tool_name: "Stripe".to_string(), + pattern_type: "publishable_key".to_string(), + pattern: Regex::new(r#"pk_(?:test_|live_)[a-zA-Z0-9]{24,}"#)?, + severity: SecuritySeverity::Low, // Publishable keys are meant to be public + description: "Stripe publishable key (safe for client-side use)".to_string(), + public_safe: true, + context_keywords: vec!["stripe".to_string(), "publishable".to_string()], + false_positive_keywords: vec![], + }, + ToolPattern { + tool_name: "Stripe".to_string(), + pattern_type: "secret_key".to_string(), + pattern: Regex::new(r#"sk_(?:test_|live_)[a-zA-Z0-9]{24,}"#)?, + severity: SecuritySeverity::Critical, + description: "Stripe secret key (CRITICAL - server-side only)".to_string(), + public_safe: false, + context_keywords: vec!["stripe".to_string(), "secret".to_string()], + false_positive_keywords: vec![], + }, + ToolPattern { + tool_name: "Stripe".to_string(), + pattern_type: "webhook_secret".to_string(), + pattern: Regex::new(r#"whsec_[a-zA-Z0-9]{32,}"#)?, + severity: SecuritySeverity::High, + description: "Stripe webhook endpoint secret".to_string(), + public_safe: false, + context_keywords: vec!["webhook".to_string(), "endpoint".to_string()], + false_positive_keywords: vec![], + }, + ]); + + // Supabase patterns + patterns.insert("supabase".to_string(), vec![ + ToolPattern { + tool_name: "Supabase".to_string(), + pattern_type: "anon_key".to_string(), + pattern: Regex::new(r#"(?i)supabase.*anon.*["\']eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+["\']"#)?, + severity: SecuritySeverity::Medium, // Anon keys are meant for client-side + description: "Supabase anonymous key (safe for client-side use with RLS)".to_string(), + public_safe: true, + context_keywords: vec!["supabase".to_string(), "anon".to_string(), "createClient".to_string()], + false_positive_keywords: vec!["example".to_string(), "placeholder".to_string()], + }, + ToolPattern { + tool_name: "Supabase".to_string(), + pattern_type: "service_role_key".to_string(), + pattern: Regex::new(r#"(?i)supabase.*service.*role.*["\']eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+["\']"#)?, + severity: SecuritySeverity::Critical, + description: "Supabase service role key (CRITICAL - server-side only)".to_string(), + public_safe: false, + context_keywords: vec!["service".to_string(), "role".to_string(), "bypass".to_string()], + false_positive_keywords: vec![], + }, + ]); + + // Clerk patterns + patterns.insert("clerk".to_string(), vec![ + ToolPattern { + tool_name: "Clerk".to_string(), + pattern_type: "publishable_key".to_string(), + pattern: Regex::new(r#"pk_test_[a-zA-Z0-9_-]{60,}|pk_live_[a-zA-Z0-9_-]{60,}"#)?, + severity: SecuritySeverity::Low, + description: "Clerk publishable key (safe for client-side use)".to_string(), + public_safe: true, + context_keywords: vec!["clerk".to_string(), "publishable".to_string()], + false_positive_keywords: vec![], + }, + ToolPattern { + tool_name: "Clerk".to_string(), + pattern_type: "secret_key".to_string(), + pattern: Regex::new(r#"sk_test_[a-zA-Z0-9_-]{60,}|sk_live_[a-zA-Z0-9_-]{60,}"#)?, + severity: SecuritySeverity::Critical, + description: "Clerk secret key (CRITICAL - server-side only)".to_string(), + public_safe: false, + context_keywords: vec!["clerk".to_string(), "secret".to_string()], + false_positive_keywords: vec![], + }, + ]); + + // Auth0 patterns + patterns.insert("auth0".to_string(), vec![ + ToolPattern { + tool_name: "Auth0".to_string(), + pattern_type: "domain".to_string(), + pattern: Regex::new(r#"[a-zA-Z0-9-]+\.auth0\.com"#)?, + severity: SecuritySeverity::Low, + description: "Auth0 domain (safe to expose)".to_string(), + public_safe: true, + context_keywords: vec!["auth0".to_string(), "domain".to_string()], + false_positive_keywords: vec!["example".to_string(), "your-domain".to_string()], + }, + ToolPattern { + tool_name: "Auth0".to_string(), + pattern_type: "client_id".to_string(), + pattern: Regex::new(r#"(?i)(?:client_?id|clientId)\s*[:=]\s*["']([a-zA-Z0-9]{32})["']"#)?, + severity: SecuritySeverity::Low, + description: "Auth0 client ID (safe for client-side use)".to_string(), + public_safe: true, + context_keywords: vec!["auth0".to_string(), "client".to_string()], + false_positive_keywords: vec![], + }, + ToolPattern { + tool_name: "Auth0".to_string(), + pattern_type: "client_secret".to_string(), + pattern: Regex::new(r#"(?i)(?:client_?secret|clientSecret)\s*[:=]\s*["']([a-zA-Z0-9_-]{64})["']"#)?, + severity: SecuritySeverity::Critical, + description: "Auth0 client secret (CRITICAL - server-side only)".to_string(), + public_safe: false, + context_keywords: vec!["auth0".to_string(), "secret".to_string()], + false_positive_keywords: vec![], + }, + ]); + + // AWS patterns + patterns.insert("aws".to_string(), vec![ + ToolPattern { + tool_name: "AWS".to_string(), + pattern_type: "access_key".to_string(), + pattern: Regex::new(r#"AKIA[0-9A-Z]{16}"#)?, + severity: SecuritySeverity::Critical, + description: "AWS access key ID (CRITICAL)".to_string(), + public_safe: false, + context_keywords: vec!["aws".to_string(), "access".to_string(), "key".to_string()], + false_positive_keywords: vec![], + }, + ToolPattern { + tool_name: "AWS".to_string(), + pattern_type: "secret_key".to_string(), + pattern: Regex::new(r#"(?i)(?:aws[_-]?secret|secret[_-]?access[_-]?key)\s*[:=]\s*["']([A-Za-z0-9/+=]{40})["']"#)?, + severity: SecuritySeverity::Critical, + description: "AWS secret access key (CRITICAL)".to_string(), + public_safe: false, + context_keywords: vec!["aws".to_string(), "secret".to_string()], + false_positive_keywords: vec![], + }, + ]); + + // OpenAI patterns + patterns.insert("openai".to_string(), vec![ + ToolPattern { + tool_name: "OpenAI".to_string(), + pattern_type: "api_key".to_string(), + pattern: Regex::new(r#"sk-[A-Za-z0-9]{48}"#)?, + severity: SecuritySeverity::High, + description: "OpenAI API key".to_string(), + public_safe: false, + context_keywords: vec!["openai".to_string(), "gpt".to_string(), "api".to_string()], + false_positive_keywords: vec![], + }, + ]); + + // Vercel patterns + patterns.insert("vercel".to_string(), vec![ + ToolPattern { + tool_name: "Vercel".to_string(), + pattern_type: "token".to_string(), + pattern: Regex::new(r#"(?i)vercel.*token.*["\'][a-zA-Z0-9]{24,}["\']"#)?, + severity: SecuritySeverity::High, + description: "Vercel deployment token".to_string(), + public_safe: false, + context_keywords: vec!["vercel".to_string(), "deploy".to_string()], + false_positive_keywords: vec![], + }, + ]); + + // Netlify patterns + patterns.insert("netlify".to_string(), vec![ + ToolPattern { + tool_name: "Netlify".to_string(), + pattern_type: "access_token".to_string(), + pattern: Regex::new(r#"(?i)netlify.*token.*["\'][a-zA-Z0-9_-]{40,}["\']"#)?, + severity: SecuritySeverity::High, + description: "Netlify access token".to_string(), + public_safe: false, + context_keywords: vec!["netlify".to_string(), "deploy".to_string()], + false_positive_keywords: vec![], + }, + ]); + + Ok(patterns) + } + + /// Initialize generic patterns that apply across tools + fn initialize_generic_patterns() -> Result, regex::Error> { + let patterns = vec![ + GenericPattern { + id: "bearer-token".to_string(), + name: "Bearer Token".to_string(), + pattern: Regex::new(r#"(?i)(?:authorization|bearer)\s*[:=]\s*["'](?:bearer\s+)?([A-Za-z0-9_-]{20,})["']"#)?, + severity: SecuritySeverity::Critical, + category: SecurityCategory::SecretsExposure, + description: "Bearer token in authorization header".to_string(), + }, + GenericPattern { + id: "jwt-token".to_string(), + name: "JWT Token".to_string(), + pattern: Regex::new(r#"eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+"#)?, + severity: SecuritySeverity::Medium, + category: SecurityCategory::SecretsExposure, + description: "JSON Web Token detected".to_string(), + }, + GenericPattern { + id: "database-url".to_string(), + name: "Database Connection URL".to_string(), + pattern: Regex::new(r#"(?i)(?:mongodb|postgres|mysql)://[^"'\s]+:[^"'\s]+@[^"'\s]+"#)?, + severity: SecuritySeverity::Critical, + category: SecurityCategory::SecretsExposure, + description: "Database connection string with credentials".to_string(), + }, + GenericPattern { + id: "private-key".to_string(), + name: "Private Key".to_string(), + pattern: Regex::new(r#"-----BEGIN (?:RSA |OPENSSH |PGP )?PRIVATE KEY-----"#)?, + severity: SecuritySeverity::Critical, + category: SecurityCategory::SecretsExposure, + description: "Private key detected".to_string(), + }, + GenericPattern { + id: "generic-api-key".to_string(), + name: "Generic API Key".to_string(), + pattern: Regex::new(r#"(?i)(?:api[_-]?key|apikey)\s*[:=]\s*["']([A-Za-z0-9_-]{20,})["']"#)?, + severity: SecuritySeverity::High, + category: SecurityCategory::SecretsExposure, + description: "Generic API key pattern".to_string(), + }, + ]; + + Ok(patterns) + } + + /// Get patterns for a specific tool + pub fn get_tool_patterns(&self, tool: &str) -> Option<&Vec> { + self.patterns_by_tool.get(tool) + } + + /// Get all generic patterns + pub fn get_generic_patterns(&self) -> &Vec { + &self.generic_patterns + } + + /// Get all supported tools + pub fn get_supported_tools(&self) -> Vec { + self.patterns_by_tool.keys().cloned().collect() + } + + /// Get patterns for JavaScript/TypeScript frameworks + pub fn get_js_framework_patterns(&self) -> Vec<&ToolPattern> { + let js_tools = ["firebase", "stripe", "supabase", "clerk", "auth0", "vercel", "netlify"]; + js_tools.iter() + .filter_map(|tool| self.patterns_by_tool.get(*tool)) + .flat_map(|patterns| patterns.iter()) + .collect() + } +} + +impl Default for SecretPatternManager { + fn default() -> Self { + Self::new().expect("Failed to initialize security patterns") + } +} + +impl ToolPattern { + /// Check if this pattern should be treated as a high-confidence match given the context + pub fn assess_confidence(&self, file_content: &str, line_content: &str) -> f32 { + let mut confidence: f32 = 0.5; // Base confidence + + // Increase confidence for context keywords + for keyword in &self.context_keywords { + if file_content.to_lowercase().contains(&keyword.to_lowercase()) { + confidence += 0.2; + } + } + + // Decrease confidence for false positive indicators + for indicator in &self.false_positive_keywords { + if line_content.to_lowercase().contains(&indicator.to_lowercase()) { + confidence -= 0.3; + } + } + + confidence.clamp(0.0, 1.0) + } + + /// Get severity adjusted for public safety + pub fn effective_severity(&self) -> SecuritySeverity { + if self.public_safe { + match &self.severity { + SecuritySeverity::Critical => SecuritySeverity::Medium, + SecuritySeverity::High => SecuritySeverity::Low, + other => other.clone(), + } + } else { + self.severity.clone() + } + } +} \ No newline at end of file diff --git a/src/analyzer/security_analyzer.rs b/src/analyzer/security_analyzer.rs index ce3929b1..39bbed7f 100644 --- a/src/analyzer/security_analyzer.rs +++ b/src/analyzer/security_analyzer.rs @@ -15,12 +15,16 @@ use std::process::Command; use regex::Regex; use serde::{Deserialize, Serialize}; use thiserror::Error; -use log::{info, debug, warn}; +use log::{info, debug}; use rayon::prelude::*; use indicatif::{ProgressBar, ProgressStyle, MultiProgress}; use crate::analyzer::{ProjectAnalysis, DetectedLanguage, DetectedTechnology, EnvVar}; use crate::analyzer::dependency_parser::Language; +use crate::analyzer::security::{ + ModularSecurityAnalyzer, SecurityAnalysisConfig as NewSecurityAnalysisConfig +}; +use crate::analyzer::security::core::SecurityReport as NewSecurityReport; #[derive(Debug, Error)] pub enum SecurityError { @@ -84,6 +88,7 @@ pub struct SecurityFinding { pub category: SecurityCategory, pub file_path: Option, pub line_number: Option, + pub column_number: Option, pub evidence: Option, pub remediation: Vec, pub references: Vec, @@ -209,6 +214,38 @@ impl SecurityAnalyzer { }) } + /// Enhanced security analysis using the new modular approach + pub fn analyze_security_enhanced(&mut self, analysis: &ProjectAnalysis) -> Result { + let start_time = Instant::now(); + info!("Starting enhanced modular security analysis"); + + // Create modular analyzer with JavaScript-specific configuration if JS/TS is detected + let has_javascript = analysis.languages.iter() + .any(|lang| matches!(lang.name.as_str(), "JavaScript" | "TypeScript" | "JSX" | "TSX")); + + let config = if has_javascript { + NewSecurityAnalysisConfig::for_javascript() + } else { + NewSecurityAnalysisConfig::default() + }; + + let mut modular_analyzer = ModularSecurityAnalyzer::with_config(config) + .map_err(|e| SecurityError::AnalysisFailed(e.to_string()))?; + + // Use the modular analyzer + let enhanced_report = modular_analyzer.analyze_project(&analysis.project_root, &analysis.languages) + .map_err(|e| SecurityError::AnalysisFailed(e.to_string()))?; + + // For now, just return the enhanced report as-is + // TODO: Combine with existing findings if needed + + // Build final report + let duration = start_time.elapsed().as_secs_f32(); + info!("Enhanced security analysis completed in {:.1}s - Found {} issues", duration, enhanced_report.total_findings); + + Ok(enhanced_report) + } + /// Perform comprehensive security analysis with appropriate progress for verbosity level pub fn analyze_security(&mut self, analysis: &ProjectAnalysis) -> Result { let start_time = Instant::now(); @@ -599,9 +636,9 @@ impl SecurityAnalyzer { ("Stripe API Key", r"sk_live_[0-9a-zA-Z]{24}", SecuritySeverity::Critical), ("Stripe Publishable Key", r"pk_live_[0-9a-zA-Z]{24}", SecuritySeverity::Medium), - // Database URLs and Passwords - ("Database URL", r#"(?i)(database_url|db_url)["']?\s*[:=]\s*["']?[^"'\s]+"#, SecuritySeverity::High), - ("Password", r#"(?i)(password|passwd|pwd)["']?\s*[:=]\s*["']?[^"']{6,}"#, SecuritySeverity::Medium), + // Database URLs and Passwords - Enhanced to avoid env var false positives + ("Hardcoded Database URL", r#"(?i)(database_url|db_url)["']?\s*[:=]\s*["']?(postgresql|mysql|mongodb)://[^"'\s]+"#, SecuritySeverity::Critical), + ("Hardcoded Password", r#"(?i)(password|passwd|pwd)["']?\s*[:=]\s*["']?[^"']{6,}["']?"#, SecuritySeverity::High), ("JWT Secret", r#"(?i)(jwt[_-]?secret)["']?\s*[:=]\s*["']?[A-Za-z0-9_\-+/=]{20,}"#, SecuritySeverity::High), // Private Keys @@ -613,9 +650,14 @@ impl SecurityAnalyzer { ("Google Cloud Service Account", r#""type":\s*"service_account""#, SecuritySeverity::High), ("Azure Storage Key", r"DefaultEndpointsProtocol=https;AccountName=", SecuritySeverity::High), - // Generic patterns last (lowest priority) - ("Generic API Key", r#"(?i)(api[_-]?key|apikey)["']?\s*[:=]\s*["']?[A-Za-z0-9_\-]{20,}"#, SecuritySeverity::High), - ("Generic Secret", r#"(?i)(secret|token|key)["']?\s*[:=]\s*["']?[A-Za-z0-9_\-+/=]{24,}"#, SecuritySeverity::Medium), + // Client-side exposed environment variables (these are the real security issues) + ("Client-side Exposed Secret", r#"(?i)(REACT_APP_|NEXT_PUBLIC_|VUE_APP_|VITE_)[A-Z_]*(?:SECRET|KEY|TOKEN|PASSWORD|API)[A-Z_]*["']?\s*[:=]\s*["']?[A-Za-z0-9_\-+/=]{10,}"#, SecuritySeverity::High), + + // Hardcoded API keys (not environment variable access) + ("Hardcoded API Key", r#"(?i)(api[_-]?key|apikey)["']?\s*[:=]\s*["']?[A-Za-z0-9_\-]{20,}["']?"#, SecuritySeverity::High), + + // Generic secrets that are clearly hardcoded (not env var access) + ("Hardcoded Secret", r#"(?i)(secret|token)["']?\s*[:=]\s*["']?[A-Za-z0-9_\-+/=]{24,}["']?"#, SecuritySeverity::Medium), ]; patterns.into_iter() @@ -1035,6 +1077,7 @@ impl SecurityAnalyzer { category: SecurityCategory::SecretsExposure, file_path: None, line_number: None, + column_number: None, evidence: Some(format!("Variable: {} = {:?}", env_var.name, env_var.default_value)), remediation: vec![ "Remove default value for sensitive environment variables".to_string(), @@ -1042,7 +1085,7 @@ impl SecurityAnalyzer { "Document required environment variables separately".to_string(), ], references: vec![ - "https://owasp.org/www-project-top-ten/2017/A3_2017-Sensitive_Data_Exposure".to_string(), + "https://owasp.org/www-project-top-ten/2021/A05_2021-Security_Misconfiguration/".to_string(), ], cwe_id: Some("CWE-200".to_string()), compliance_frameworks: vec!["SOC2".to_string(), "GDPR".to_string()], @@ -1195,12 +1238,18 @@ impl SecurityAnalyzer { for (line_num, line) in content.lines().enumerate() { for pattern in &self.secret_patterns { - if let Some(_captures) = pattern.pattern.find(line) { + if let Some(match_) = pattern.pattern.find(line) { // Skip if it looks like a placeholder or example if self.is_likely_placeholder(line) { continue; } + // NEW: Skip if this is legitimate environment variable usage + if self.is_legitimate_env_var_usage(line, file_path) { + debug!("Skipping legitimate env var usage: {}", line.trim()); + continue; + } + // Determine severity based on git status let (severity, additional_remediation) = self.determine_secret_severity(file_path, pattern.severity.clone()); @@ -1241,6 +1290,7 @@ impl SecurityAnalyzer { category: SecurityCategory::SecretsExposure, file_path: Some(file_path.to_path_buf()), line_number: Some(line_num + 1), + column_number: Some(match_.start() + 1), // 1-indexed column position evidence: Some(format!("Line: {}", line.trim())), remediation, references: vec![ @@ -1256,6 +1306,180 @@ impl SecurityAnalyzer { Ok(findings) } + /// Check if a line represents legitimate environment variable usage (not a security issue) + fn is_legitimate_env_var_usage(&self, line: &str, file_path: &Path) -> bool { + let line_trimmed = line.trim(); + + // Check for common legitimate environment variable access patterns + let legitimate_env_patterns = [ + // Node.js/JavaScript patterns + r"process\.env\.[A-Z_]+", + r#"process\.env\[['""][A-Z_]+['"]\]"#, + + // Vite/Modern JS patterns + r"import\.meta\.env\.[A-Z_]+", + r#"import\.meta\.env\[['""][A-Z_]+['"]\]"#, + + // Python patterns + r#"os\.environ\.get\(["'][A-Z_]+["']\)"#, + r#"os\.environ\[["'][A-Z_]+["']\]"#, + r#"getenv\(["'][A-Z_]+["']\)"#, + + // Rust patterns + r#"env::var\("([A-Z_]+)"\)"#, + r#"std::env::var\("([A-Z_]+)"\)"#, + + // Go patterns + r#"os\.Getenv\(["'][A-Z_]+["']\)"#, + + // Java patterns + r#"System\.getenv\(["'][A-Z_]+["']\)"#, + + // Shell/Docker patterns + r"\$\{?[A-Z_]+\}?", + r"ENV [A-Z_]+", + + // Config file access patterns + r"config\.[a-z_]+\.[A-Z_]+", + r"settings\.[A-Z_]+", + r"env\.[A-Z_]+", + ]; + + // Check if the line matches any legitimate environment variable access pattern + for pattern_str in &legitimate_env_patterns { + if let Ok(pattern) = Regex::new(pattern_str) { + if pattern.is_match(line_trimmed) { + // Additional context checks to make sure this is really legitimate + + // Check if this is in a server-side context (not client-side) + if self.is_server_side_file(file_path) { + return true; + } + + // Check if this is NOT a client-side exposed variable + if !self.is_client_side_exposed_env_var(line_trimmed) { + return true; + } + } + } + } + + // Check for assignment vs access - assignments might be setting up environment variables + // which could be legitimate in certain contexts + if self.is_env_var_assignment_context(line_trimmed, file_path) { + return true; + } + + false + } + + /// Check if a file is likely server-side code (vs client-side) + fn is_server_side_file(&self, file_path: &Path) -> bool { + let path_str = file_path.to_string_lossy().to_lowercase(); + let file_name = file_path.file_name() + .and_then(|n| n.to_str()) + .unwrap_or("") + .to_lowercase(); + + // Server-side indicators + let server_indicators = [ + "/server/", "/api/", "/backend/", "/src/app/api/", "/pages/api/", + "/routes/", "/controllers/", "/middleware/", "/models/", + "/lib/", "/utils/", "/services/", "/config/", + "server.js", "index.js", "app.js", "main.js", + ".env", "dockerfile", "docker-compose", + ]; + + // Client-side indicators (these should return false) + let client_indicators = [ + "/public/", "/static/", "/assets/", "/components/", "/pages/", + "/src/components/", "/src/pages/", "/client/", "/frontend/", + "index.html", ".html", "/dist/", "/build/", + "dist/", "build/", "public/", "static/", "assets/", + ]; + + // If it's clearly client-side, return false + if client_indicators.iter().any(|indicator| path_str.contains(indicator)) { + return false; + } + + // If it has server-side indicators, return true + if server_indicators.iter().any(|indicator| + path_str.contains(indicator) || file_name.contains(indicator) + ) { + return true; + } + + // Default to true for ambiguous cases (be conservative about flagging env var usage) + true + } + + /// Check if an environment variable is exposed to client-side (security issue) + fn is_client_side_exposed_env_var(&self, line: &str) -> bool { + let client_prefixes = [ + "REACT_APP_", "NEXT_PUBLIC_", "VUE_APP_", "VITE_", + "GATSBY_", "PUBLIC_", "NUXT_PUBLIC_", + ]; + + client_prefixes.iter().any(|prefix| line.contains(prefix)) + } + + /// Check if this is a legitimate environment variable assignment context + fn is_env_var_assignment_context(&self, line: &str, file_path: &Path) -> bool { + let path_str = file_path.to_string_lossy().to_lowercase(); + let file_name = file_path.file_name() + .and_then(|n| n.to_str()) + .unwrap_or("") + .to_lowercase(); + + // Only very specific configuration files where env var assignments are expected + // Be more restrictive to avoid false positives + let env_config_files = [ + ".env", + "docker-compose.yml", "docker-compose.yaml", + ".env.example", ".env.sample", ".env.template", + ".env.local", ".env.development", ".env.production", ".env.staging", + ]; + + // Check for exact filename matches for .env files (most common legitimate case) + if env_config_files.iter().any(|pattern| file_name == *pattern) { + return true; + } + + // Docker files are also legitimate for environment variable assignment + if file_name.starts_with("dockerfile") || file_name == "dockerfile" { + return true; + } + + // Shell scripts or CI/CD files + if file_name.ends_with(".sh") || + file_name.ends_with(".bash") || + path_str.contains(".github/workflows/") || + path_str.contains(".gitlab-ci") { + return true; + } + + // Lines that are clearly setting up environment variables for child processes + // Only match very specific patterns that indicate legitimate environment setup + let setup_patterns = [ + r"export [A-Z_]+=", // Shell export + r"ENV [A-Z_]+=", // Dockerfile ENV + r"^\s*environment:\s*$", // Docker Compose environment section header + r"^\s*env:\s*$", // Kubernetes env section header + r"process\.env\.[A-Z_]+ =", // Explicitly setting process.env (rare but legitimate) + ]; + + for pattern_str in &setup_patterns { + if let Ok(pattern) = Regex::new(pattern_str) { + if pattern.is_match(line) { + return true; + } + } + } + + false + } + fn is_likely_placeholder(&self, line: &str) -> bool { let placeholder_indicators = [ "example", "placeholder", "your_", "insert_", "replace_", @@ -1559,8 +1783,6 @@ impl SecurityAnalyzer { recommendations.push("Address critical security findings immediately".to_string()); } - // Add more generic recommendations... - recommendations } } @@ -1584,6 +1806,7 @@ mod tests { category: SecurityCategory::SecretsExposure, file_path: None, line_number: None, + column_number: None, evidence: None, remediation: vec![], references: vec![], @@ -1732,4 +1955,149 @@ mod tests { assert!(!analyzer.matches_common_env_patterns("config.json")); assert!(!analyzer.matches_common_env_patterns("package.json")); } + + #[test] + fn test_legitimate_env_var_usage() { + let analyzer = SecurityAnalyzer::new().unwrap(); + + // Create mock file paths + let server_file = Path::new("src/server/config.js"); + let client_file = Path::new("src/components/MyComponent.js"); + + // Test legitimate server-side environment variable usage (should NOT be flagged) + assert!(analyzer.is_legitimate_env_var_usage("const apiKey = process.env.RESEND_API_KEY;", server_file)); + assert!(analyzer.is_legitimate_env_var_usage("const dbUrl = process.env.DATABASE_URL;", server_file)); + assert!(analyzer.is_legitimate_env_var_usage("api_key = os.environ.get('API_KEY')", server_file)); + assert!(analyzer.is_legitimate_env_var_usage("let secret = env::var(\"JWT_SECRET\")?;", server_file)); + + // Test client-side environment variable usage (legitimate if not exposed) + assert!(analyzer.is_legitimate_env_var_usage("const apiUrl = process.env.API_URL;", client_file)); + + // Test client-side exposed variables (these ARE client-side exposed - security issues) + assert!(analyzer.is_client_side_exposed_env_var("process.env.REACT_APP_SECRET_KEY")); + assert!(analyzer.is_client_side_exposed_env_var("process.env.NEXT_PUBLIC_API_SECRET")); + + // Test hardcoded secrets (should NOT be legitimate) + assert!(!analyzer.is_legitimate_env_var_usage("const apiKey = 'sk-1234567890abcdef';", server_file)); + assert!(!analyzer.is_legitimate_env_var_usage("password = 'hardcoded123'", server_file)); + } + + #[test] + fn test_server_vs_client_side_detection() { + let analyzer = SecurityAnalyzer::new().unwrap(); + + // Server-side files + assert!(analyzer.is_server_side_file(Path::new("src/server/app.js"))); + assert!(analyzer.is_server_side_file(Path::new("src/api/users.js"))); + assert!(analyzer.is_server_side_file(Path::new("pages/api/auth.js"))); + assert!(analyzer.is_server_side_file(Path::new("src/lib/database.js"))); + assert!(analyzer.is_server_side_file(Path::new(".env"))); + assert!(analyzer.is_server_side_file(Path::new("server.js"))); + + // Client-side files + assert!(!analyzer.is_server_side_file(Path::new("src/components/Button.jsx"))); + assert!(!analyzer.is_server_side_file(Path::new("public/index.html"))); + assert!(!analyzer.is_server_side_file(Path::new("src/pages/home.js"))); + assert!(!analyzer.is_server_side_file(Path::new("dist/bundle.js"))); + + // Ambiguous files (default to server-side for conservative detection) + assert!(analyzer.is_server_side_file(Path::new("src/utils/helper.js"))); + assert!(analyzer.is_server_side_file(Path::new("config/settings.js"))); + } + + #[test] + fn test_client_side_exposed_env_vars() { + let analyzer = SecurityAnalyzer::new().unwrap(); + + // These should be flagged as client-side exposed (security issues) + assert!(analyzer.is_client_side_exposed_env_var("process.env.REACT_APP_SECRET")); + assert!(analyzer.is_client_side_exposed_env_var("import.meta.env.VITE_API_KEY")); + assert!(analyzer.is_client_side_exposed_env_var("process.env.NEXT_PUBLIC_SECRET")); + assert!(analyzer.is_client_side_exposed_env_var("process.env.VUE_APP_TOKEN")); + + // These should NOT be flagged as client-side exposed + assert!(!analyzer.is_client_side_exposed_env_var("process.env.DATABASE_URL")); + assert!(!analyzer.is_client_side_exposed_env_var("process.env.JWT_SECRET")); + assert!(!analyzer.is_client_side_exposed_env_var("process.env.API_KEY")); + } + + #[test] + fn test_env_var_assignment_context() { + let analyzer = SecurityAnalyzer::new().unwrap(); + + // Configuration files where assignments are legitimate + assert!(analyzer.is_env_var_assignment_context("API_KEY=sk-test123", Path::new(".env"))); + assert!(analyzer.is_env_var_assignment_context("DATABASE_URL=postgres://", Path::new("docker-compose.yml"))); + assert!(analyzer.is_env_var_assignment_context("export SECRET=test", Path::new("setup.sh"))); + + // Regular source files where assignments might be suspicious + assert!(!analyzer.is_env_var_assignment_context("const secret = 'hardcoded'", Path::new("src/app.js"))); + } + + #[test] + fn test_enhanced_secret_patterns() { + let analyzer = SecurityAnalyzer::new().unwrap(); + + // Test that hardcoded secrets are still detected + let hardcoded_patterns = [ + "apikey = 'sk-1234567890abcdef1234567890abcdef12345678'", + "const secret = 'my-super-secret-token-12345678901234567890'", + "password = 'hardcoded123456'", + ]; + + for pattern in &hardcoded_patterns { + let has_secret = analyzer.secret_patterns.iter().any(|sp| sp.pattern.is_match(pattern)); + assert!(has_secret, "Should detect hardcoded secret in: {}", pattern); + } + + // Test that legitimate env var usage is NOT detected as secret + let legitimate_patterns = [ + "const apiKey = process.env.API_KEY;", + "const dbUrl = process.env.DATABASE_URL || 'fallback';", + "api_key = os.environ.get('API_KEY')", + "let secret = env::var(\"JWT_SECRET\")?;", + ]; + + for pattern in &legitimate_patterns { + // These should either not match any secret pattern, or be filtered out by context detection + let matches_old_generic_pattern = pattern.to_lowercase().contains("secret") || + pattern.to_lowercase().contains("key"); + + // Our new patterns should be more specific and not match env var access + let matches_new_patterns = analyzer.secret_patterns.iter() + .filter(|sp| sp.name.contains("Hardcoded")) + .any(|sp| sp.pattern.is_match(pattern)); + + assert!(!matches_new_patterns, "Should NOT detect legitimate env var usage as hardcoded secret: {}", pattern); + } + } + + #[test] + fn test_context_aware_false_positive_reduction() { + use tempfile::TempDir; + + let temp_dir = TempDir::new().unwrap(); + let server_file = temp_dir.path().join("src/server/config.js"); + + // Create directory structure + std::fs::create_dir_all(server_file.parent().unwrap()).unwrap(); + + // Write a file with legitimate environment variable usage + let content = r#" +const config = { + apiKey: process.env.RESEND_API_KEY, + databaseUrl: process.env.DATABASE_URL, + jwtSecret: process.env.JWT_SECRET, + port: process.env.PORT || 3000 +}; +"#; + + std::fs::write(&server_file, content).unwrap(); + + let analyzer = SecurityAnalyzer::new().unwrap(); + let findings = analyzer.analyze_file_for_secrets(&server_file).unwrap(); + + // Should have zero findings because all are legitimate env var usage + assert_eq!(findings.len(), 0, "Should not flag legitimate environment variable usage as security issues"); + } } diff --git a/src/main.rs b/src/main.rs index aa6c9ce3..272b700d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,6 +3,8 @@ use syncable_cli::{ analyzer::{ self, vulnerability_checker::VulnerabilitySeverity, DetectedTechnology, TechnologyCategory, LibraryType, analyze_monorepo, ProjectCategory, + // Import new modular security types + security::SecuritySeverity, }, cli::{Cli, Commands, ToolsCommand, OutputFormat, SeverityThreshold, DisplayFormat}, config, @@ -1157,10 +1159,10 @@ fn handle_security( // Step 8: Generating Report progress.set_message("Generating security report..."); progress.set_position(100); - let security_report = security_analyzer.analyze_security(&project_analysis) + let security_report = security_analyzer.analyze_security_enhanced(&project_analysis) .map_err(|e| syncable_cli::error::IaCGeneratorError::Analysis( syncable_cli::error::AnalysisError::InvalidStructure( - format!("Security analysis failed: {}", e) + format!("Enhanced security analysis failed: {}", e) ) ))?; @@ -1169,119 +1171,244 @@ fn handle_security( // Format output in the beautiful style requested let output_string = match format { OutputFormat::Table => { + use syncable_cli::analyzer::display::BoxDrawer; + use colored::*; + let mut output = String::new(); - // Beautiful Header - output.push_str("\nπŸ›‘οΈ Security Analysis Results\n"); - output.push_str(&format!("{}\n", "=".repeat(60))); + // Header + output.push_str(&format!("\n{}\n", "πŸ›‘οΈ Security Analysis Results".bright_white().bold())); + output.push_str(&format!("{}\n", "═".repeat(80).bright_blue())); - // Security Summary - output.push_str("\nπŸ“Š SECURITY SUMMARY\n"); - output.push_str(&format!("βœ… Security Score: {:.1}/100\n", security_report.overall_score)); + // Security Score Box + let mut score_box = BoxDrawer::new("Security Summary"); + score_box.add_line("Overall Score:", &format!("{:.0}/100", security_report.overall_score).bright_yellow(), true); + score_box.add_line("Risk Level:", &format!("{:?}", security_report.risk_level).color(match security_report.risk_level { + SecuritySeverity::Critical => "bright_red", + SecuritySeverity::High => "red", + SecuritySeverity::Medium => "yellow", + SecuritySeverity::Low => "green", + SecuritySeverity::Info => "blue", + }), true); + score_box.add_line("Total Findings:", &security_report.total_findings.to_string().cyan(), true); - // Analysis Scope - only show what's actually implemented - output.push_str("\nπŸ” ANALYSIS SCOPE\n"); + // Analysis scope let config_files = security_report.findings.iter() .filter_map(|f| f.file_path.as_ref()) .collect::>() .len(); - let code_files = security_report.findings.iter() - .filter(|f| matches!(f.category, syncable_cli::analyzer::SecurityCategory::CodeSecurityPattern)) - .filter_map(|f| f.file_path.as_ref()) - .collect::>() - .len(); - - output.push_str(&format!("βœ… Secret Detection ({} files analyzed)\n", config_files.max(1))); - output.push_str(&format!("βœ… Environment Variables ({} variables checked)\n", project_analysis.environment_variables.len())); - if code_files > 0 { - output.push_str(&format!("βœ… Code Security Patterns ({} files analyzed)\n", code_files)); - } else { - output.push_str("ℹ️ Code Security Patterns (no applicable files found)\n"); - } - output.push_str("🚧 Infrastructure Security (coming soon)\n"); - output.push_str("🚧 Compliance Frameworks (coming soon)\n"); - - // Findings by Category - output.push_str("\n🎯 FINDINGS BY CATEGORY\n"); + score_box.add_line("Files Analyzed:", &config_files.max(1).to_string().green(), true); + score_box.add_line("Env Variables:", &project_analysis.environment_variables.len().to_string().green(), true); - // Count findings by our categories - let mut secret_findings = 0; - let mut code_findings = 0; - let mut infrastructure_findings = 0; - let mut compliance_findings = 0; + output.push_str(&format!("\n{}\n", score_box.draw())); - for finding in &security_report.findings { - match finding.category { - syncable_cli::analyzer::SecurityCategory::SecretsExposure => secret_findings += 1, - syncable_cli::analyzer::SecurityCategory::CodeSecurityPattern | - syncable_cli::analyzer::SecurityCategory::AuthenticationSecurity | - syncable_cli::analyzer::SecurityCategory::DataProtection => code_findings += 1, - syncable_cli::analyzer::SecurityCategory::InfrastructureSecurity | - syncable_cli::analyzer::SecurityCategory::NetworkSecurity | - syncable_cli::analyzer::SecurityCategory::InsecureConfiguration => infrastructure_findings += 1, - syncable_cli::analyzer::SecurityCategory::Compliance => compliance_findings += 1, - } - } - - output.push_str(&format!("πŸ” Secret Detection: {} findings\n", secret_findings)); - output.push_str(&format!("πŸ”’ Code Security: {} finding{}\n", code_findings, if code_findings == 1 { "" } else { "s" })); - output.push_str(&format!("πŸ—οΈ Infrastructure: {} findings\n", infrastructure_findings)); - output.push_str(&format!("πŸ“‹ Compliance: {} finding{}\n", compliance_findings, if compliance_findings == 1 { "" } else { "s" })); - - // Recommendations - if !security_report.recommendations.is_empty() { - output.push_str("\nπŸ’‘ RECOMMENDATIONS\n"); - for recommendation in &security_report.recommendations { - output.push_str(&format!("β€’ {}\n", recommendation)); - } - } else { - // Add some default recommendations based on the analysis - output.push_str("\nπŸ’‘ RECOMMENDATIONS\n"); - output.push_str("β€’ Enable dependency vulnerability scanning in CI/CD\n"); - output.push_str("β€’ Consider implementing rate limiting for API endpoints\n"); - output.push_str("β€’ Review environment variable security practices\n"); - } - - // If there are actual findings, show them in detail + // Findings in Card Format if !security_report.findings.is_empty() { - output.push_str(&format!("\n{}\n", "=".repeat(60))); - output.push_str("πŸ” DETAILED FINDINGS\n\n"); + // Get terminal width to determine optimal display width + let terminal_width = if let Some((width, _)) = term_size::dimensions() { + width.saturating_sub(10) // Leave some margin + } else { + 120 // Fallback width + }; + + let mut findings_box = BoxDrawer::new("Security Findings"); for (i, finding) in security_report.findings.iter().enumerate() { - let severity_emoji = match finding.severity { - syncable_cli::analyzer::SecuritySeverity::Critical => "🚨", - syncable_cli::analyzer::SecuritySeverity::High => "⚠️ ", - syncable_cli::analyzer::SecuritySeverity::Medium => "⚑", - syncable_cli::analyzer::SecuritySeverity::Low => "ℹ️ ", - syncable_cli::analyzer::SecuritySeverity::Info => "πŸ’‘", + let severity_color = match finding.severity { + SecuritySeverity::Critical => "bright_red", + SecuritySeverity::High => "red", + SecuritySeverity::Medium => "yellow", + SecuritySeverity::Low => "blue", + SecuritySeverity::Info => "green", }; - output.push_str(&format!("{}. {} [{}] {}\n", i + 1, severity_emoji, finding.id, finding.title)); - output.push_str(&format!(" πŸ“ {}\n", finding.description)); - - if let Some(file) = &finding.file_path { - output.push_str(&format!(" πŸ“ File: {}", file.display())); - if let Some(line) = finding.line_number { - output.push_str(&format!(" (line {})", line)); + // Extract relative file path from project root + let file_display = if let Some(file_path) = &finding.file_path { + // Canonicalize both paths to handle symlinks and resolve properly + let canonical_file = file_path.canonicalize().unwrap_or_else(|_| file_path.clone()); + let canonical_project = path.canonicalize().unwrap_or_else(|_| path.clone()); + + // Try to calculate relative path from project root + if let Ok(relative_path) = canonical_file.strip_prefix(&canonical_project) { + format!("./{}", relative_path.display()) + } else { + // Fallback: try to find any common ancestor or use absolute path + let path_str = file_path.to_string_lossy(); + if path_str.starts_with('/') { + // For absolute paths, try to extract meaningful relative portion + if let Some(project_name) = path.file_name().and_then(|n| n.to_str()) { + if let Some(project_idx) = path_str.rfind(project_name) { + let relative_part = &path_str[project_idx + project_name.len()..]; + if relative_part.starts_with('/') { + format!(".{}", relative_part) + } else if !relative_part.is_empty() { + format!("./{}", relative_part) + } else { + format!("./{}", file_path.file_name().unwrap_or_default().to_string_lossy()) + } + } else { + // Last resort: show the full path + path_str.to_string() + } + } else { + // Show full path if we can't determine project context + path_str.to_string() + } + } else { + // For relative paths that don't strip properly, use as-is + if path_str.starts_with("./") { + path_str.to_string() + } else { + format!("./{}", path_str) + } + } } - output.push_str("\n"); - } + } else { + "N/A".to_string() + }; - if let Some(evidence) = &finding.evidence { - output.push_str(&format!(" πŸ” Evidence: {}\n", evidence)); - } + // Parse gitignore status from description (clean colored text) + let gitignore_status = if finding.description.contains("is tracked by git") { + "TRACKED".bright_red().bold() + } else if finding.description.contains("is NOT in .gitignore") { + "EXPOSED".yellow().bold() + } else if finding.description.contains("is protected") || finding.description.contains("properly ignored") { + "SAFE".bright_green().bold() + } else if finding.description.contains("appears safe") { + "OK".bright_blue().bold() + } else { + "UNKNOWN".dimmed() + }; + + // Determine finding type + let finding_type = if finding.title.contains("Environment Variable") { + "ENV VAR" + } else if finding.title.contains("Secret File") { + "SECRET FILE" + } else if finding.title.contains("API Key") || finding.title.contains("Stripe") || finding.title.contains("Firebase") { + "API KEY" + } else if finding.title.contains("Configuration") { + "CONFIG" + } else { + "OTHER" + }; + + // Format position as "line:column" or just "line" if no column info + let position_display = match (finding.line_number, finding.column_number) { + (Some(line), Some(col)) => format!("{}:{}", line, col), + (Some(line), None) => format!("{}", line), + _ => "β€”".to_string(), + }; + + // Card format: File path with intelligent display based on terminal width + let box_margin = 6; // Account for box borders and padding + let available_width = terminal_width.saturating_sub(box_margin); + let max_path_width = available_width.saturating_sub(20); // Leave space for numbering and spacing - if !finding.remediation.is_empty() { - output.push_str(" πŸ”§ Fix:\n"); - for remediation in &finding.remediation { - output.push_str(&format!(" β€’ {}\n", remediation)); + if file_display.len() + 3 <= max_path_width { + // Path fits on one line with numbering + findings_box.add_value_only(&format!("{}. {}", + format!("{}", i + 1).bright_white().bold(), + file_display.cyan().bold() + )); + } else if file_display.len() <= available_width.saturating_sub(4) { + // Path fits on its own line with indentation + findings_box.add_value_only(&format!("{}.", + format!("{}", i + 1).bright_white().bold() + )); + findings_box.add_value_only(&format!(" {}", + file_display.cyan().bold() + )); + } else { + // Path is extremely long - use smart wrapping + findings_box.add_value_only(&format!("{}.", + format!("{}", i + 1).bright_white().bold() + )); + + // Smart path wrapping - prefer breaking at directory separators + let wrap_width = available_width.saturating_sub(4); + let mut remaining = file_display.as_str(); + let mut first_line = true; + + while !remaining.is_empty() { + let prefix = if first_line { " " } else { " " }; + let line_width = wrap_width.saturating_sub(prefix.len()); + + if remaining.len() <= line_width { + // Last chunk fits entirely + findings_box.add_value_only(&format!("{}{}", + prefix, remaining.cyan().bold() + )); + break; + } else { + // Find a good break point (prefer directory separator) + let chunk = &remaining[..line_width]; + let break_point = chunk.rfind('/').unwrap_or(line_width.saturating_sub(1)); + + findings_box.add_value_only(&format!("{}{}", + prefix, chunk[..break_point].cyan().bold() + )); + remaining = &remaining[break_point..]; + if remaining.starts_with('/') { + remaining = &remaining[1..]; // Skip the separator + } + } + first_line = false; } } - output.push_str("\n"); + findings_box.add_value_only(&format!(" {} {} | {} {} | {} {} | {} {}", + "Type:".dimmed(), + finding_type.yellow(), + "Severity:".dimmed(), + format!("{:?}", finding.severity).color(severity_color).bold(), + "Position:".dimmed(), + position_display.bright_cyan(), + "Status:".dimmed(), + gitignore_status + )); + + // Add spacing between findings (except for the last one) + if i < security_report.findings.len() - 1 { + findings_box.add_value_only(""); + } } + + output.push_str(&format!("\n{}\n", findings_box.draw())); + + // GitIgnore Status Legend + let mut legend_box = BoxDrawer::new("Git Status Legend"); + legend_box.add_line(&"TRACKED:".bright_red().bold().to_string(), "File is tracked by git - CRITICAL RISK", false); + legend_box.add_line(&"EXPOSED:".yellow().bold().to_string(), "File contains secrets but not in .gitignore", false); + legend_box.add_line(&"SAFE:".bright_green().bold().to_string(), "File is properly ignored by .gitignore", false); + legend_box.add_line(&"OK:".bright_blue().bold().to_string(), "File appears safe for version control", false); + output.push_str(&format!("\n{}\n", legend_box.draw())); + } else { + let mut no_findings_box = BoxDrawer::new("Security Status"); + no_findings_box.add_value_only(&"βœ… No security issues detected".green()); + no_findings_box.add_value_only("πŸ’‘ Regular security scanning recommended"); + output.push_str(&format!("\n{}\n", no_findings_box.draw())); } + // Recommendations Box + let mut rec_box = BoxDrawer::new("Key Recommendations"); + if !security_report.recommendations.is_empty() { + for (i, rec) in security_report.recommendations.iter().take(5).enumerate() { + // Clean up recommendation text + let clean_rec = rec.replace("Add these patterns to your .gitignore:", "Add to .gitignore:"); + rec_box.add_value_only(&format!("{}. {}", i + 1, clean_rec)); + } + if security_report.recommendations.len() > 5 { + rec_box.add_value_only(&format!("... and {} more recommendations", + security_report.recommendations.len() - 5).dimmed()); + } + } else { + rec_box.add_value_only("βœ… No immediate security concerns detected"); + rec_box.add_value_only("πŸ’‘ Consider implementing dependency scanning"); + rec_box.add_value_only("πŸ’‘ Review environment variable security practices"); + } + output.push_str(&format!("\n{}\n", rec_box.draw())); + output } OutputFormat::Json => { @@ -1300,10 +1427,10 @@ fn handle_security( // Exit with error code if requested and findings exist if fail_on_findings && security_report.total_findings > 0 { let critical_count = security_report.findings_by_severity - .get(&syncable_cli::analyzer::SecuritySeverity::Critical) + .get(&SecuritySeverity::Critical) .unwrap_or(&0); let high_count = security_report.findings_by_severity - .get(&syncable_cli::analyzer::SecuritySeverity::High) + .get(&SecuritySeverity::High) .unwrap_or(&0); if *critical_count > 0 { @@ -1328,7 +1455,7 @@ async fn handle_tools(command: ToolsCommand) -> syncable_cli::Result<()> { match command { ToolsCommand::Status { format, languages } => { - let mut installer = ToolInstaller::new(); + let installer = ToolInstaller::new(); // Determine which languages to check let langs_to_check = if let Some(lang_names) = languages { @@ -1504,7 +1631,7 @@ async fn handle_tools(command: ToolsCommand) -> syncable_cli::Result<()> { } ToolsCommand::Verify { languages, verbose } => { - let mut installer = ToolInstaller::new(); + let installer = ToolInstaller::new(); // Determine which languages to verify let langs_to_verify = if let Some(lang_names) = languages { From 0b5e55e52533c00e90f3490fe7a2d62630961810 Mon Sep 17 00:00:00 2001 From: Alex Holmberg Date: Sun, 8 Jun 2025 21:37:27 +0200 Subject: [PATCH 4/6] feat added python security scanning catching generat exposure secrets similar to javascript version --- src/analyzer/security/core.rs | 4 + src/analyzer/security/mod.rs | 2 + src/analyzer/security/python.rs | 1423 +++++++++++++++++++++++++++++++ 3 files changed, 1429 insertions(+) create mode 100644 src/analyzer/security/python.rs diff --git a/src/analyzer/security/core.rs b/src/analyzer/security/core.rs index edba639f..6f219669 100644 --- a/src/analyzer/security/core.rs +++ b/src/analyzer/security/core.rs @@ -35,6 +35,10 @@ pub enum SecurityCategory { NetworkSecurity, /// Compliance and regulatory requirements Compliance, + /// Code injection vulnerabilities (eval, exec, etc.) + CodeInjection, + /// Command injection vulnerabilities (subprocess, os.system, etc.) + CommandInjection, } /// A security finding with details and remediation diff --git a/src/analyzer/security/mod.rs b/src/analyzer/security/mod.rs index d56cbab6..e65719c5 100644 --- a/src/analyzer/security/mod.rs +++ b/src/analyzer/security/mod.rs @@ -13,12 +13,14 @@ use thiserror::Error; pub mod core; pub mod javascript; +pub mod python; pub mod patterns; pub mod config; pub mod gitignore; pub use core::{SecurityAnalyzer, SecurityReport, SecurityFinding, SecuritySeverity, SecurityCategory}; pub use javascript::JavaScriptSecurityAnalyzer; +pub use python::PythonSecurityAnalyzer; pub use patterns::SecretPatternManager; pub use config::SecurityAnalysisConfig; pub use gitignore::{GitIgnoreAnalyzer, GitIgnoreStatus, GitIgnoreRisk}; diff --git a/src/analyzer/security/python.rs b/src/analyzer/security/python.rs new file mode 100644 index 00000000..03c42ed8 --- /dev/null +++ b/src/analyzer/security/python.rs @@ -0,0 +1,1423 @@ +//! # Python Security Analyzer +//! +//! Specialized security analyzer for Python applications. +//! +//! This analyzer focuses on: +//! - Python web frameworks (Django, Flask, FastAPI, etc.) +//! - AI/ML services and tools (OpenAI, Anthropic, Hugging Face, etc.) +//! - Cloud services commonly used with Python (AWS, GCP, Azure) +//! - Database connections and ORMs (SQLAlchemy, Django ORM, etc.) +//! - Environment variable misuse in Python applications +//! - Common Python anti-patterns and secret exposure patterns +//! - Python package managers and dependency files + +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use std::fs; +use regex::Regex; +use log::{debug, info, warn}; + +use super::{SecurityError, SecurityFinding, SecuritySeverity, SecurityCategory, SecurityReport, SecurityAnalysisConfig, GitIgnoreAnalyzer, GitIgnoreRisk}; + +/// Python-specific security analyzer +pub struct PythonSecurityAnalyzer { + config: SecurityAnalysisConfig, + python_patterns: Vec, + framework_patterns: HashMap>, + ai_ml_patterns: Vec, + cloud_patterns: Vec, + database_patterns: Vec, + env_var_patterns: Vec, + gitignore_analyzer: Option, +} + +/// Python-specific secret pattern +#[derive(Debug, Clone)] +pub struct PythonSecretPattern { + pub id: String, + pub name: String, + pub pattern: Regex, + pub severity: SecuritySeverity, + pub description: String, + pub context_indicators: Vec, + pub false_positive_indicators: Vec, + pub remediation_hints: Vec, +} + +/// Framework-specific patterns for Python web frameworks +#[derive(Debug, Clone)] +pub struct FrameworkPattern { + pub framework: String, + pub pattern: Regex, + pub severity: SecuritySeverity, + pub description: String, + pub file_extensions: Vec, +} + +/// AI/ML service patterns +#[derive(Debug, Clone)] +pub struct AiMlPattern { + pub service: String, + pub pattern: Regex, + pub severity: SecuritySeverity, + pub description: String, + pub api_key_format: String, +} + +/// Cloud service patterns +#[derive(Debug, Clone)] +pub struct CloudPattern { + pub provider: String, + pub service: String, + pub pattern: Regex, + pub severity: SecuritySeverity, + pub description: String, +} + +/// Database connection patterns +#[derive(Debug, Clone)] +pub struct DatabasePattern { + pub database_type: String, + pub pattern: Regex, + pub severity: SecuritySeverity, + pub description: String, +} + +/// Environment variable patterns specific to Python +#[derive(Debug, Clone)] +pub struct EnvVarPattern { + pub pattern: Regex, + pub severity: SecuritySeverity, + pub description: String, + pub sensitive_prefixes: Vec, +} + +impl PythonSecurityAnalyzer { + pub fn new() -> Result { + Self::with_config(SecurityAnalysisConfig::default()) + } + + pub fn with_config(config: SecurityAnalysisConfig) -> Result { + let python_patterns = Self::initialize_python_patterns()?; + let framework_patterns = Self::initialize_framework_patterns()?; + let ai_ml_patterns = Self::initialize_ai_ml_patterns()?; + let cloud_patterns = Self::initialize_cloud_patterns()?; + let database_patterns = Self::initialize_database_patterns()?; + let env_var_patterns = Self::initialize_env_var_patterns()?; + + Ok(Self { + config, + python_patterns, + framework_patterns, + ai_ml_patterns, + cloud_patterns, + database_patterns, + env_var_patterns, + gitignore_analyzer: None, + }) + } + + /// Analyze a Python project for security vulnerabilities + pub fn analyze_project(&mut self, project_root: &Path) -> Result { + let mut findings = Vec::new(); + + // Initialize gitignore analyzer for comprehensive file protection assessment + let mut gitignore_analyzer = GitIgnoreAnalyzer::new(project_root) + .map_err(|e| SecurityError::AnalysisFailed(format!("Failed to initialize gitignore analyzer: {}", e)))?; + + info!("πŸ” Using gitignore-aware security analysis for Python project at {}", project_root.display()); + + // Get Python files using gitignore-aware collection + let python_extensions = ["py", "pyx", "pyi", "pyw"]; + let python_files = gitignore_analyzer.get_files_to_analyze(&python_extensions) + .map_err(|e| SecurityError::Io(e))? + .into_iter() + .filter(|file| { + if let Some(ext) = file.extension().and_then(|e| e.to_str()) { + python_extensions.contains(&ext) + } else { + false + } + }) + .collect::>(); + + info!("Found {} Python files to analyze (gitignore-filtered)", python_files.len()); + + // Analyze each Python file with gitignore context + for file_path in &python_files { + let gitignore_status = gitignore_analyzer.analyze_file(file_path); + let mut file_findings = self.analyze_python_file(file_path)?; + + // Enhance findings with gitignore risk assessment + for finding in &mut file_findings { + self.enhance_finding_with_gitignore_status(finding, &gitignore_status); + } + + findings.extend(file_findings); + } + + // Analyze Python configuration files with gitignore awareness + findings.extend(self.analyze_config_files_with_gitignore(project_root, &mut gitignore_analyzer)?); + + // Comprehensive environment file analysis with gitignore risk assessment + findings.extend(self.analyze_env_files_with_gitignore(project_root, &mut gitignore_analyzer)?); + + // Analyze Python-specific dependency files + findings.extend(self.analyze_dependency_files_with_gitignore(project_root, &mut gitignore_analyzer)?); + + // Generate gitignore recommendations for any secret files found + let secret_files: Vec = findings.iter() + .filter_map(|f| f.file_path.as_ref()) + .cloned() + .collect(); + + let gitignore_recommendations = gitignore_analyzer.generate_gitignore_recommendations(&secret_files); + + // Create report with enhanced recommendations + let mut report = SecurityReport::from_findings(findings); + report.recommendations.extend(gitignore_recommendations); + + // Add Python-specific security recommendations + report.recommendations.extend(self.generate_python_security_recommendations()); + + Ok(report) + } + + /// Analyze a single Python file for security vulnerabilities + fn analyze_python_file(&self, file_path: &Path) -> Result, SecurityError> { + let content = fs::read_to_string(file_path)?; + let mut findings = Vec::new(); + + // Check against Python-specific patterns + for pattern in &self.python_patterns { + findings.extend(self.check_python_pattern_in_content(&content, pattern, file_path)?); + } + + // Check against AI/ML service patterns + for pattern in &self.ai_ml_patterns { + findings.extend(self.check_ai_ml_pattern_in_content(&content, pattern, file_path)?); + } + + // Check against cloud service patterns + for pattern in &self.cloud_patterns { + findings.extend(self.check_cloud_pattern_in_content(&content, pattern, file_path)?); + } + + // Check against database patterns + for pattern in &self.database_patterns { + findings.extend(self.check_database_pattern_in_content(&content, pattern, file_path)?); + } + + // Check framework-specific patterns based on file content + let detected_framework = self.detect_python_framework(&content); + if let Some(framework) = detected_framework { + if let Some(framework_patterns) = self.framework_patterns.get(&framework) { + for pattern in framework_patterns { + findings.extend(self.check_framework_pattern_in_content(&content, pattern, file_path)?); + } + } + } + + // Check environment variable usage + findings.extend(self.check_env_var_usage(&content, file_path)?); + + // Check for insecure Python practices + findings.extend(self.check_insecure_python_practices(&content, file_path)?); + + Ok(findings) + } + + /// Check a Python-specific pattern in file content + fn check_python_pattern_in_content( + &self, + content: &str, + pattern: &PythonSecretPattern, + file_path: &Path, + ) -> Result, SecurityError> { + let mut findings = Vec::new(); + + for (line_num, line) in content.lines().enumerate() { + if let Some(captures) = pattern.pattern.captures(line) { + // Check for false positive indicators + if pattern.false_positive_indicators.iter().any(|indicator| { + line.to_lowercase().contains(&indicator.to_lowercase()) + }) { + debug!("Skipping potential false positive in {}: {}", file_path.display(), line.trim()); + continue; + } + + // Extract the secret value and position if captured + let (evidence, column_number) = if captures.len() > 1 { + if let Some(match_) = captures.get(1) { + (Some(self.mask_secret(match_.as_str())), Some(match_.start() + 1)) + } else { + (Some(line.trim().to_string()), None) + } + } else { + if let Some(match_) = captures.get(0) { + (Some(line.trim().to_string()), Some(match_.start() + 1)) + } else { + (Some(line.trim().to_string()), None) + } + }; + + // Check context for confidence scoring + let context_score = self.calculate_context_confidence(content, &pattern.context_indicators); + let adjusted_severity = self.adjust_severity_by_context(pattern.severity.clone(), context_score); + + findings.push(SecurityFinding { + id: format!("{}-{}", pattern.id, line_num), + title: format!("{} Detected", pattern.name), + description: format!("{} (Context confidence: {:.1})", pattern.description, context_score), + severity: adjusted_severity, + category: SecurityCategory::SecretsExposure, + file_path: Some(file_path.to_path_buf()), + line_number: Some(line_num + 1), + column_number, + evidence, + remediation: pattern.remediation_hints.clone(), + references: vec![ + "https://owasp.org/www-project-top-ten/2021/A05_2021-Security_Misconfiguration/".to_string(), + "https://cheatsheetseries.owasp.org/cheatsheets/Secrets_Management_Cheat_Sheet.html".to_string(), + "https://docs.python.org/3/library/os.html#os.environ".to_string(), + ], + cwe_id: Some("CWE-200".to_string()), + compliance_frameworks: vec!["SOC2".to_string(), "GDPR".to_string()], + }); + } + } + + Ok(findings) + } + + /// Check AI/ML service patterns + fn check_ai_ml_pattern_in_content( + &self, + content: &str, + pattern: &AiMlPattern, + file_path: &Path, + ) -> Result, SecurityError> { + let mut findings = Vec::new(); + + for (line_num, line) in content.lines().enumerate() { + if let Some(captures) = pattern.pattern.captures(line) { + let evidence = if captures.len() > 1 { + captures.get(1).map(|m| self.mask_secret(m.as_str())) + } else { + Some(line.trim().to_string()) + }; + + let column_number = captures.get(0).map(|m| m.start() + 1); + + findings.push(SecurityFinding { + id: format!("ai-ml-{}-{}", pattern.service.to_lowercase().replace(" ", "-"), line_num), + title: format!("{} API Key Detected", pattern.service), + description: format!("{} (Expected format: {})", pattern.description, pattern.api_key_format), + severity: pattern.severity.clone(), + category: SecurityCategory::SecretsExposure, + file_path: Some(file_path.to_path_buf()), + line_number: Some(line_num + 1), + column_number, + evidence, + remediation: vec![ + format!("Store {} API key in environment variables", pattern.service), + "Use a secrets management service for production".to_string(), + "Implement API key rotation policies".to_string(), + "Monitor API key usage for anomalies".to_string(), + ], + references: vec![ + "https://owasp.org/www-project-api-security/".to_string(), + format!("https://platform.openai.com/docs/quickstart/account-setup"), + ], + cwe_id: Some("CWE-798".to_string()), + compliance_frameworks: vec!["SOC2".to_string(), "GDPR".to_string()], + }); + } + } + + Ok(findings) + } + + /// Check cloud service patterns + fn check_cloud_pattern_in_content( + &self, + content: &str, + pattern: &CloudPattern, + file_path: &Path, + ) -> Result, SecurityError> { + let mut findings = Vec::new(); + + for (line_num, line) in content.lines().enumerate() { + if let Some(captures) = pattern.pattern.captures(line) { + let evidence = if captures.len() > 1 { + captures.get(1).map(|m| self.mask_secret(m.as_str())) + } else { + Some(line.trim().to_string()) + }; + + let column_number = captures.get(0).map(|m| m.start() + 1); + + findings.push(SecurityFinding { + id: format!("cloud-{}-{}-{}", + pattern.provider.to_lowercase(), + pattern.service.to_lowercase().replace(" ", "-"), + line_num), + title: format!("{} {} Detected", pattern.provider, pattern.service), + description: pattern.description.clone(), + severity: pattern.severity.clone(), + category: SecurityCategory::SecretsExposure, + file_path: Some(file_path.to_path_buf()), + line_number: Some(line_num + 1), + column_number, + evidence, + remediation: vec![ + format!("Use {} managed identity or role-based access", pattern.provider), + "Store credentials in secure key management service".to_string(), + "Implement credential rotation policies".to_string(), + "Use least-privilege access principles".to_string(), + ], + references: vec![ + "https://owasp.org/www-project-top-ten/2021/A07_2021-Identification_and_Authentication_Failures/".to_string(), + format!("https://docs.aws.amazon.com/security/"), + ], + cwe_id: Some("CWE-522".to_string()), + compliance_frameworks: vec!["SOC2".to_string(), "PCI-DSS".to_string()], + }); + } + } + + Ok(findings) + } + + /// Check database patterns + fn check_database_pattern_in_content( + &self, + content: &str, + pattern: &DatabasePattern, + file_path: &Path, + ) -> Result, SecurityError> { + let mut findings = Vec::new(); + + for (line_num, line) in content.lines().enumerate() { + if pattern.pattern.is_match(line) { + // Mask the connection string for evidence + let masked_line = self.mask_database_connection(line); + + findings.push(SecurityFinding { + id: format!("database-{}-{}", pattern.database_type.to_lowercase(), line_num), + title: format!("{} Connection String with Credentials", pattern.database_type), + description: pattern.description.clone(), + severity: pattern.severity.clone(), + category: SecurityCategory::SecretsExposure, + file_path: Some(file_path.to_path_buf()), + line_number: Some(line_num + 1), + column_number: None, + evidence: Some(masked_line), + remediation: vec![ + "Use environment variables for database credentials".to_string(), + "Implement connection pooling with credential management".to_string(), + "Use database authentication mechanisms like IAM roles".to_string(), + "Consider using encrypted connection strings".to_string(), + ], + references: vec![ + "https://owasp.org/www-project-top-ten/2021/A07_2021-Identification_and_Authentication_Failures/".to_string(), + "https://cheatsheetseries.owasp.org/cheatsheets/Database_Security_Cheat_Sheet.html".to_string(), + ], + cwe_id: Some("CWE-798".to_string()), + compliance_frameworks: vec!["SOC2".to_string(), "GDPR".to_string(), "PCI-DSS".to_string()], + }); + } + } + + Ok(findings) + } + + /// Check framework-specific patterns + fn check_framework_pattern_in_content( + &self, + content: &str, + pattern: &FrameworkPattern, + file_path: &Path, + ) -> Result, SecurityError> { + let mut findings = Vec::new(); + + for (line_num, line) in content.lines().enumerate() { + if let Some(captures) = pattern.pattern.captures(line) { + let evidence = if captures.len() > 1 { + captures.get(1).map(|m| self.mask_secret(m.as_str())) + } else { + Some(line.trim().to_string()) + }; + + findings.push(SecurityFinding { + id: format!("framework-{}-{}", pattern.framework.to_lowercase(), line_num), + title: format!("{} Security Issue", pattern.framework), + description: pattern.description.clone(), + severity: pattern.severity.clone(), + category: SecurityCategory::SecretsExposure, + file_path: Some(file_path.to_path_buf()), + line_number: Some(line_num + 1), + column_number: None, + evidence, + remediation: self.generate_framework_remediation(&pattern.framework), + references: vec![ + format!("https://docs.djangoproject.com/en/stable/topics/security/"), + "https://owasp.org/www-project-top-ten/".to_string(), + ], + cwe_id: Some("CWE-200".to_string()), + compliance_frameworks: vec!["SOC2".to_string()], + }); + } + } + + Ok(findings) + } + + /// Initialize Python-specific secret patterns + fn initialize_python_patterns() -> Result, SecurityError> { + let patterns = vec![ + // Django SECRET_KEY pattern + PythonSecretPattern { + id: "python-django-secret-key".to_string(), + name: "Django SECRET_KEY".to_string(), + pattern: Regex::new(r#"(?i)SECRET_KEY\s*=\s*["']([A-Za-z0-9!@#$%^&*()_+\-=\[\]{}|;:,.<>?/~`]{40,})["']"#)?, + severity: SecuritySeverity::Critical, + description: "Django SECRET_KEY found in source code".to_string(), + context_indicators: vec!["django".to_string(), "settings".to_string(), "SECRET_KEY".to_string()], + false_positive_indicators: vec!["example".to_string(), "your-secret-key".to_string(), "fake".to_string()], + remediation_hints: vec![ + "Move SECRET_KEY to environment variables".to_string(), + "Use python-decouple or similar library".to_string(), + "Never commit SECRET_KEY to version control".to_string(), + ], + }, + + // Flask SECRET_KEY pattern + PythonSecretPattern { + id: "python-flask-secret-key".to_string(), + name: "Flask SECRET_KEY".to_string(), + pattern: Regex::new(r#"(?i)app\.secret_key\s*=\s*["']([A-Za-z0-9!@#$%^&*()_+\-=\[\]{}|;:,.<>?/~`]{20,})["']"#)?, + severity: SecuritySeverity::High, + description: "Flask SECRET_KEY hardcoded in application".to_string(), + context_indicators: vec!["flask".to_string(), "app".to_string(), "secret_key".to_string()], + false_positive_indicators: vec!["example".to_string(), "your-secret".to_string()], + remediation_hints: vec![ + "Use os.environ.get('SECRET_KEY')".to_string(), + "Store in environment variables".to_string(), + ], + }, + + // FastAPI JWT secret + PythonSecretPattern { + id: "python-fastapi-jwt-secret".to_string(), + name: "FastAPI JWT Secret".to_string(), + pattern: Regex::new(r#"(?i)(?:jwt_secret|jwt_key|secret_key)\s*=\s*["']([A-Za-z0-9!@#$%^&*()_+\-=\[\]{}|;:,.<>?/~`]{20,})["']"#)?, + severity: SecuritySeverity::High, + description: "FastAPI JWT secret hardcoded in source".to_string(), + context_indicators: vec!["fastapi".to_string(), "jwt".to_string(), "token".to_string()], + false_positive_indicators: vec!["example".to_string(), "test".to_string()], + remediation_hints: vec![ + "Use Pydantic Settings for configuration".to_string(), + "Store JWT secrets in environment variables".to_string(), + ], + }, + + // Database connection strings + PythonSecretPattern { + id: "python-database-url".to_string(), + name: "Database Connection String".to_string(), + pattern: Regex::new(r#"(?i)(?:database_url|db_url|sqlalchemy_database_uri)\s*=\s*["'](?:postgresql|mysql|sqlite|mongodb)://[^"']*:[^"']*@[^"']+["']"#)?, + severity: SecuritySeverity::Critical, + description: "Database connection string with credentials detected".to_string(), + context_indicators: vec!["database".to_string(), "sqlalchemy".to_string(), "connect".to_string()], + false_positive_indicators: vec!["localhost".to_string(), "example.com".to_string(), "user:pass".to_string()], + remediation_hints: vec![ + "Use environment variables for database credentials".to_string(), + "Consider using connection pooling and secrets management".to_string(), + ], + }, + + // Generic API key pattern + PythonSecretPattern { + id: "python-api-key-assignment".to_string(), + name: "API Key Assignment".to_string(), + pattern: Regex::new(r#"(?i)(?:api_key|apikey|access_key|secret_key|private_key|auth_token|bearer_token)\s*=\s*["']([A-Za-z0-9_-]{20,})["']"#)?, + severity: SecuritySeverity::High, + description: "API key hardcoded in variable assignment".to_string(), + context_indicators: vec!["requests".to_string(), "api".to_string(), "client".to_string()], + false_positive_indicators: vec!["os.environ".to_string(), "config".to_string(), "settings".to_string()], + remediation_hints: vec![ + "Use environment variables or config files".to_string(), + "Consider using secrets management services".to_string(), + ], + }, + ]; + + Ok(patterns) + } + + /// Initialize AI/ML service patterns + fn initialize_ai_ml_patterns() -> Result, SecurityError> { + let patterns = vec![ + // OpenAI API keys + AiMlPattern { + service: "OpenAI".to_string(), + pattern: Regex::new(r#"(?i)(?:openai[_-]?api[_-]?key|openai[_-]?key)\s*[=:]\s*["']?(sk-[A-Za-z0-9]{32,})["']?"#)?, + severity: SecuritySeverity::Critical, + description: "OpenAI API key detected".to_string(), + api_key_format: "sk-[32+ alphanumeric characters]".to_string(), + }, + + // OpenAI Organization ID + AiMlPattern { + service: "OpenAI Organization".to_string(), + pattern: Regex::new(r#"(?i)(?:openai[_-]?org[_-]?id|openai[_-]?organization)\s*[=:]\s*["']?(org-[A-Za-z0-9]{20,})["']?"#)?, + severity: SecuritySeverity::Medium, + description: "OpenAI organization ID detected".to_string(), + api_key_format: "org-[20+ alphanumeric characters]".to_string(), + }, + + // Anthropic Claude API keys + AiMlPattern { + service: "Anthropic Claude".to_string(), + pattern: Regex::new(r#"(?i)(?:anthropic[_-]?api[_-]?key|claude[_-]?api[_-]?key)\s*[=:]\s*["']?(sk-ant-[A-Za-z0-9]{40,})["']?"#)?, + severity: SecuritySeverity::Critical, + description: "Anthropic Claude API key detected".to_string(), + api_key_format: "sk-ant-[40+ alphanumeric characters]".to_string(), + }, + + // Hugging Face API tokens + AiMlPattern { + service: "Hugging Face".to_string(), + pattern: Regex::new(r#"(?i)(?:huggingface[_-]?api[_-]?key|huggingface[_-]?token|hf[_-]?token)\s*[=:]\s*["']?(hf_[A-Za-z0-9]{30,})["']?"#)?, + severity: SecuritySeverity::High, + description: "Hugging Face API token detected".to_string(), + api_key_format: "hf_[30+ alphanumeric characters]".to_string(), + }, + + // Google AI/Gemini API keys + AiMlPattern { + service: "Google AI/Gemini".to_string(), + pattern: Regex::new(r#"(?i)(?:google[_-]?ai[_-]?api[_-]?key|gemini[_-]?api[_-]?key)\s*[=:]\s*["']?(AIza[A-Za-z0-9_-]{35,})["']?"#)?, + severity: SecuritySeverity::Critical, + description: "Google AI/Gemini API key detected".to_string(), + api_key_format: "AIza[35+ alphanumeric characters with underscores/dashes]".to_string(), + }, + + // Cohere API keys + AiMlPattern { + service: "Cohere".to_string(), + pattern: Regex::new(r#"(?i)(?:cohere[_-]?api[_-]?key)\s*[=:]\s*["']?([A-Za-z0-9]{40,})["']?"#)?, + severity: SecuritySeverity::High, + description: "Cohere API key detected".to_string(), + api_key_format: "[40+ alphanumeric characters]".to_string(), + }, + + // Replicate API tokens + AiMlPattern { + service: "Replicate".to_string(), + pattern: Regex::new(r#"(?i)(?:replicate[_-]?api[_-]?token|replicate[_-]?token)\s*[=:]\s*["']?(r8_[A-Za-z0-9]{30,})["']?"#)?, + severity: SecuritySeverity::High, + description: "Replicate API token detected".to_string(), + api_key_format: "r8_[30+ alphanumeric characters]".to_string(), + }, + + // Stability AI API keys + AiMlPattern { + service: "Stability AI".to_string(), + pattern: Regex::new(r#"(?i)(?:stability[_-]?ai[_-]?api[_-]?key|stable[_-]?diffusion[_-]?api[_-]?key)\s*[=:]\s*["']?(sk-[A-Za-z0-9]{40,})["']?"#)?, + severity: SecuritySeverity::High, + description: "Stability AI API key detected".to_string(), + api_key_format: "sk-[40+ alphanumeric characters]".to_string(), + }, + + // DeepSeek API keys + AiMlPattern { + service: "DeepSeek".to_string(), + pattern: Regex::new(r#"(?i)(?:deepseek[_-]?api[_-]?key)\s*[=:]\s*["']?(sk-[A-Za-z0-9]{32,})["']?"#)?, + severity: SecuritySeverity::High, + description: "DeepSeek API key detected".to_string(), + api_key_format: "sk-[32+ alphanumeric characters]".to_string(), + }, + + // Mistral AI API keys + AiMlPattern { + service: "Mistral AI".to_string(), + pattern: Regex::new(r#"(?i)(?:mistral[_-]?api[_-]?key)\s*[=:]\s*["']?([A-Za-z0-9]{32,})["']?"#)?, + severity: SecuritySeverity::High, + description: "Mistral AI API key detected".to_string(), + api_key_format: "[32+ alphanumeric characters]".to_string(), + }, + + // Together AI API keys + AiMlPattern { + service: "Together AI".to_string(), + pattern: Regex::new(r#"(?i)(?:together[_-]?ai[_-]?api[_-]?key|together[_-]?api[_-]?key)\s*[=:]\s*["']?([A-Za-z0-9]{40,})["']?"#)?, + severity: SecuritySeverity::High, + description: "Together AI API key detected".to_string(), + api_key_format: "[40+ alphanumeric characters]".to_string(), + }, + + // Weights & Biases API keys + AiMlPattern { + service: "Weights & Biases".to_string(), + pattern: Regex::new(r#"(?i)(?:wandb[_-]?api[_-]?key|wandb[_-]?key)\s*[=:]\s*["']?([A-Za-z0-9]{40,})["']?"#)?, + severity: SecuritySeverity::Medium, + description: "Weights & Biases API key detected".to_string(), + api_key_format: "[40+ alphanumeric characters]".to_string(), + }, + + // MLflow tracking server credentials + AiMlPattern { + service: "MLflow".to_string(), + pattern: Regex::new(r#"(?i)(?:mlflow[_-]?tracking[_-]?username|mlflow[_-]?tracking[_-]?password)\s*[=:]\s*["']?([A-Za-z0-9]{8,})["']?"#)?, + severity: SecuritySeverity::Medium, + description: "MLflow tracking credentials detected".to_string(), + api_key_format: "[8+ alphanumeric characters]".to_string(), + }, + ]; + + Ok(patterns) + } + + /// Initialize cloud service patterns + fn initialize_cloud_patterns() -> Result, SecurityError> { + let patterns = vec![ + // AWS Access Keys + CloudPattern { + provider: "AWS".to_string(), + service: "IAM Access Key".to_string(), + pattern: Regex::new(r#"(?i)(?:aws[_-]?access[_-]?key[_-]?id)\s*[=:]\s*["']?(AKIA[A-Z0-9]{16})["']?"#)?, + severity: SecuritySeverity::Critical, + description: "AWS Access Key ID detected".to_string(), + }, + + // AWS Secret Access Keys + CloudPattern { + provider: "AWS".to_string(), + service: "IAM Secret Key".to_string(), + pattern: Regex::new(r#"(?i)(?:aws[_-]?secret[_-]?access[_-]?key)\s*[=:]\s*["']?([A-Za-z0-9/+=]{40})["']?"#)?, + severity: SecuritySeverity::Critical, + description: "AWS Secret Access Key detected".to_string(), + }, + + // AWS Session Tokens + CloudPattern { + provider: "AWS".to_string(), + service: "Session Token".to_string(), + pattern: Regex::new(r#"(?i)(?:aws[_-]?session[_-]?token)\s*[=:]\s*["']?([A-Za-z0-9/+=]{100,})["']?"#)?, + severity: SecuritySeverity::High, + description: "AWS Session Token detected".to_string(), + }, + + // Google Cloud Service Account Keys + CloudPattern { + provider: "GCP".to_string(), + service: "Service Account Key".to_string(), + pattern: Regex::new(r#"(?i)(?:google[_-]?application[_-]?credentials|gcp[_-]?service[_-]?account)\s*[=:]\s*["']?([A-Za-z0-9/+=]{50,})["']?"#)?, + severity: SecuritySeverity::Critical, + description: "Google Cloud Service Account key detected".to_string(), + }, + + // Azure Storage Account Keys + CloudPattern { + provider: "Azure".to_string(), + service: "Storage Account Key".to_string(), + pattern: Regex::new(r#"(?i)(?:azure[_-]?storage[_-]?account[_-]?key|azure[_-]?storage[_-]?key)\s*[=:]\s*["']?([A-Za-z0-9/+=]{88})["']?"#)?, + severity: SecuritySeverity::Critical, + description: "Azure Storage Account key detected".to_string(), + }, + + // Azure Service Principal + CloudPattern { + provider: "Azure".to_string(), + service: "Service Principal".to_string(), + pattern: Regex::new(r#"(?i)(?:azure[_-]?client[_-]?secret|azure[_-]?tenant[_-]?id)\s*[=:]\s*["']?([A-Za-z0-9-]{32,})["']?"#)?, + severity: SecuritySeverity::Critical, + description: "Azure Service Principal credentials detected".to_string(), + }, + + // DigitalOcean API tokens + CloudPattern { + provider: "DigitalOcean".to_string(), + service: "API Token".to_string(), + pattern: Regex::new(r#"(?i)(?:digitalocean[_-]?api[_-]?token|do[_-]?api[_-]?token)\s*[=:]\s*["']?(dop_v1_[A-Za-z0-9]{64})["']?"#)?, + severity: SecuritySeverity::High, + description: "DigitalOcean API token detected".to_string(), + }, + + // Heroku API keys + CloudPattern { + provider: "Heroku".to_string(), + service: "API Key".to_string(), + pattern: Regex::new(r#"(?i)(?:heroku[_-]?api[_-]?key)\s*[=:]\s*["']?([A-Za-z0-9-]{36})["']?"#)?, + severity: SecuritySeverity::High, + description: "Heroku API key detected".to_string(), + }, + + // Stripe API keys + CloudPattern { + provider: "Stripe".to_string(), + service: "API Key".to_string(), + pattern: Regex::new(r#"(?i)(?:stripe[_-]?api[_-]?key|stripe[_-]?secret[_-]?key)\s*[=:]\s*["']?(sk_live_[A-Za-z0-9]{24}|sk_test_[A-Za-z0-9]{24})["']?"#)?, + severity: SecuritySeverity::Critical, + description: "Stripe API key detected".to_string(), + }, + + // Twilio credentials + CloudPattern { + provider: "Twilio".to_string(), + service: "Auth Token".to_string(), + pattern: Regex::new(r#"(?i)(?:twilio[_-]?auth[_-]?token|twilio[_-]?account[_-]?sid)\s*[=:]\s*["']?([A-Za-z0-9]{32,34})["']?"#)?, + severity: SecuritySeverity::High, + description: "Twilio credentials detected".to_string(), + }, + ]; + + Ok(patterns) + } + + /// Initialize framework-specific patterns + fn initialize_framework_patterns() -> Result>, SecurityError> { + let mut frameworks = HashMap::new(); + + // Django patterns + frameworks.insert("django".to_string(), vec![ + FrameworkPattern { + framework: "Django".to_string(), + pattern: Regex::new(r#"(?i)(?:database|databases)\s*=\s*\{[^}]*['"']password['"']\s*:\s*['"']([^'"']+)['"'][^}]*\}"#)?, + severity: SecuritySeverity::Critical, + description: "Django database password in settings".to_string(), + file_extensions: vec!["py".to_string()], + }, + FrameworkPattern { + framework: "Django".to_string(), + pattern: Regex::new(r#"(?i)email[_-]?host[_-]?password\s*=\s*["']([^"']+)["']"#)?, + severity: SecuritySeverity::High, + description: "Django email password in settings".to_string(), + file_extensions: vec!["py".to_string()], + }, + ]); + + // Flask patterns + frameworks.insert("flask".to_string(), vec![ + FrameworkPattern { + framework: "Flask".to_string(), + pattern: Regex::new(r#"(?i)app\.config\[['"']([A-Z_]*(?:SECRET|KEY|PASSWORD|TOKEN)[A-Z_]*)['"']\]\s*=\s*["']([^"']+)["']"#)?, + severity: SecuritySeverity::High, + description: "Flask configuration with potential secret".to_string(), + file_extensions: vec!["py".to_string()], + }, + ]); + + // FastAPI patterns + frameworks.insert("fastapi".to_string(), vec![ + FrameworkPattern { + framework: "FastAPI".to_string(), + pattern: Regex::new(r#"(?i)class\s+Settings\([^)]*\):[^}]*([A-Z_]*(?:SECRET|KEY|PASSWORD|TOKEN)[A-Z_]*)\s*:\s*str\s*=\s*["']([^"']+)["']"#)?, + severity: SecuritySeverity::High, + description: "FastAPI Settings class with hardcoded secret".to_string(), + file_extensions: vec!["py".to_string()], + }, + ]); + + Ok(frameworks) + } + + /// Initialize database patterns + fn initialize_database_patterns() -> Result, SecurityError> { + let patterns = vec![ + // PostgreSQL connection strings + DatabasePattern { + database_type: "PostgreSQL".to_string(), + pattern: Regex::new(r#"(?i)postgresql://[^:]+:[^@]+@[^/]+/[^"'\s]+"#)?, + severity: SecuritySeverity::Critical, + description: "PostgreSQL connection string with credentials".to_string(), + }, + + // MySQL connection strings + DatabasePattern { + database_type: "MySQL".to_string(), + pattern: Regex::new(r#"(?i)mysql://[^:]+:[^@]+@[^/]+/[^"'\s]+"#)?, + severity: SecuritySeverity::Critical, + description: "MySQL connection string with credentials".to_string(), + }, + + // MongoDB connection strings + DatabasePattern { + database_type: "MongoDB".to_string(), + pattern: Regex::new(r#"(?i)mongodb://[^:]+:[^@]+@[^/]+/[^"'\s]+"#)?, + severity: SecuritySeverity::Critical, + description: "MongoDB connection string with credentials".to_string(), + }, + + // Redis connection strings + DatabasePattern { + database_type: "Redis".to_string(), + pattern: Regex::new(r#"(?i)redis://[^:]*:[^@]+@[^/]+/[^"'\s]*"#)?, + severity: SecuritySeverity::High, + description: "Redis connection string with password".to_string(), + }, + + // SQLAlchemy database URLs + DatabasePattern { + database_type: "SQLAlchemy".to_string(), + pattern: Regex::new(r#"(?i)sqlalchemy_database_uri\s*=\s*["'][^"']*://[^:]+:[^@]+@[^"']+"#)?, + severity: SecuritySeverity::Critical, + description: "SQLAlchemy database URI with credentials".to_string(), + }, + ]; + + Ok(patterns) + } + + /// Initialize environment variable patterns specific to Python + fn initialize_env_var_patterns() -> Result, SecurityError> { + let patterns = vec![ + EnvVarPattern { + pattern: Regex::new(r#"os\.environ(?:\.get)?\(['"']([A-Z_]+)['"']\)"#)?, + severity: SecuritySeverity::Info, + description: "Environment variable usage detected".to_string(), + sensitive_prefixes: vec![ + "SECRET".to_string(), + "KEY".to_string(), + "PASSWORD".to_string(), + "TOKEN".to_string(), + "API".to_string(), + "AUTH".to_string(), + "PRIVATE".to_string(), + "CREDENTIAL".to_string(), + ], + }, + EnvVarPattern { + pattern: Regex::new(r#"getenv\(['"']([A-Z_]+)['"']\)"#)?, + severity: SecuritySeverity::Info, + description: "Environment variable access via getenv".to_string(), + sensitive_prefixes: vec![ + "SECRET".to_string(), + "KEY".to_string(), + "PASSWORD".to_string(), + "TOKEN".to_string(), + ], + }, + ]; + + Ok(patterns) + } + + /// Check environment variable usage patterns + fn check_env_var_usage(&self, content: &str, file_path: &Path) -> Result, SecurityError> { + let mut findings = Vec::new(); + + for pattern in &self.env_var_patterns { + for (line_num, line) in content.lines().enumerate() { + if let Some(captures) = pattern.pattern.captures(line) { + if let Some(var_name) = captures.get(1) { + let var_name = var_name.as_str(); + + // Check if this appears to be a sensitive variable + let is_sensitive = pattern.sensitive_prefixes.iter().any(|prefix| { + var_name.to_uppercase().contains(prefix) + }); + + if is_sensitive { + // Check if this is properly protected (not hardcoded) + if !line.contains("=") || line.contains("os.environ") || line.contains("getenv") { + // This is good practice - environment variable usage + continue; + } + + let column_number = captures.get(0).map(|m| m.start() + 1); + + findings.push(SecurityFinding { + id: format!("env-var-misuse-{}", line_num), + title: "Potential Environment Variable Misuse".to_string(), + description: format!("Sensitive environment variable '{}' usage detected", var_name), + severity: SecuritySeverity::Medium, + category: SecurityCategory::SecretsExposure, + file_path: Some(file_path.to_path_buf()), + line_number: Some(line_num + 1), + column_number, + evidence: Some(line.trim().to_string()), + remediation: vec![ + "Ensure sensitive environment variables are properly protected".to_string(), + "Use python-decouple or similar libraries for configuration".to_string(), + "Document required environment variables".to_string(), + ], + references: vec![ + "https://12factor.net/config".to_string(), + "https://docs.python.org/3/library/os.html#os.environ".to_string(), + ], + cwe_id: Some("CWE-200".to_string()), + compliance_frameworks: vec!["SOC2".to_string()], + }); + } + } + } + } + } + + Ok(findings) + } + + /// Check for insecure Python practices + fn check_insecure_python_practices(&self, content: &str, file_path: &Path) -> Result, SecurityError> { + let mut findings = Vec::new(); + + // Check for eval() usage + if let Ok(eval_pattern) = Regex::new(r#"eval\s*\("#) { + for (line_num, line) in content.lines().enumerate() { + if eval_pattern.is_match(line) { + findings.push(SecurityFinding { + id: format!("insecure-eval-{}", line_num), + title: "Dangerous eval() Usage".to_string(), + description: "Use of eval() function detected - potential code injection risk".to_string(), + severity: SecuritySeverity::High, + category: SecurityCategory::CodeInjection, + file_path: Some(file_path.to_path_buf()), + line_number: Some(line_num + 1), + column_number: None, + evidence: Some(line.trim().to_string()), + remediation: vec![ + "Avoid using eval() with user input".to_string(), + "Use ast.literal_eval() for safe evaluation of literals".to_string(), + "Consider using json.loads() for JSON data".to_string(), + ], + references: vec![ + "https://owasp.org/www-project-top-ten/2021/A03_2021-Injection/".to_string(), + ], + cwe_id: Some("CWE-95".to_string()), + compliance_frameworks: vec!["SOC2".to_string()], + }); + } + } + } + + // Check for shell injection via subprocess + if let Ok(subprocess_pattern) = Regex::new(r#"subprocess\.(call|run|Popen)\([^)]*shell\s*=\s*True"#) { + for (line_num, line) in content.lines().enumerate() { + if subprocess_pattern.is_match(line) { + findings.push(SecurityFinding { + id: format!("shell-injection-{}", line_num), + title: "Potential Shell Injection".to_string(), + description: "subprocess call with shell=True detected - potential command injection risk".to_string(), + severity: SecuritySeverity::High, + category: SecurityCategory::CommandInjection, + file_path: Some(file_path.to_path_buf()), + line_number: Some(line_num + 1), + column_number: None, + evidence: Some(line.trim().to_string()), + remediation: vec![ + "Avoid using shell=True with user input".to_string(), + "Use subprocess with list arguments instead".to_string(), + "Validate and sanitize all user inputs".to_string(), + ], + references: vec![ + "https://owasp.org/www-project-top-ten/2021/A03_2021-Injection/".to_string(), + ], + cwe_id: Some("CWE-78".to_string()), + compliance_frameworks: vec!["SOC2".to_string()], + }); + } + } + } + + Ok(findings) + } + + /// Detect Python framework based on content + fn detect_python_framework(&self, content: &str) -> Option { + if content.contains("django") || content.contains("Django") { + Some("django".to_string()) + } else if content.contains("flask") || content.contains("Flask") { + Some("flask".to_string()) + } else if content.contains("fastapi") || content.contains("FastAPI") { + Some("fastapi".to_string()) + } else { + None + } + } + + /// Mask sensitive information in evidence + fn mask_secret(&self, secret: &str) -> String { + if secret.len() <= 8 { + "*".repeat(secret.len()) + } else { + format!("{}***{}", &secret[..4], &secret[secret.len()-4..]) + } + } + + /// Mask database connection string + fn mask_database_connection(&self, connection_str: &str) -> String { + // Replace password in connection string with asterisks + if let Ok(re) = Regex::new(r"://([^:]+):([^@]+)@") { + re.replace(connection_str, "://$1:***@").to_string() + } else { + connection_str.to_string() + } + } + + /// Calculate confidence score based on context indicators + fn calculate_context_confidence(&self, content: &str, indicators: &[String]) -> f32 { + let total_indicators = indicators.len() as f32; + if total_indicators == 0.0 { + return 0.5; // Neutral confidence + } + + let found_indicators = indicators.iter() + .filter(|indicator| content.to_lowercase().contains(&indicator.to_lowercase())) + .count() as f32; + + found_indicators / total_indicators + } + + /// Adjust severity based on context confidence + fn adjust_severity_by_context(&self, base_severity: SecuritySeverity, confidence: f32) -> SecuritySeverity { + match base_severity { + SecuritySeverity::Critical => base_severity, // Keep critical as-is + SecuritySeverity::High => { + if confidence < 0.3 { + SecuritySeverity::Medium + } else { + base_severity + } + } + SecuritySeverity::Medium => { + if confidence > 0.7 { + SecuritySeverity::High + } else if confidence < 0.3 { + SecuritySeverity::Low + } else { + base_severity + } + } + _ => base_severity, + } + } + + /// Generate framework-specific remediation advice + fn generate_framework_remediation(&self, framework: &str) -> Vec { + match framework.to_lowercase().as_str() { + "django" => vec![ + "Use Django's built-in security features".to_string(), + "Store SECRET_KEY in environment variables".to_string(), + "Use django-environ for configuration management".to_string(), + "Enable Django's security middleware".to_string(), + ], + "flask" => vec![ + "Use Flask-Security for authentication".to_string(), + "Store secrets in environment variables".to_string(), + "Use Flask-Talisman for security headers".to_string(), + "Implement proper session management".to_string(), + ], + "fastapi" => vec![ + "Use Pydantic Settings for configuration".to_string(), + "Implement proper JWT token management".to_string(), + "Use dependency injection for secrets".to_string(), + "Enable HTTPS and security headers".to_string(), + ], + _ => vec![ + "Follow framework-specific security best practices".to_string(), + "Use environment variables for sensitive data".to_string(), + ], + } + } + + /// Enhance a security finding with gitignore risk assessment + fn enhance_finding_with_gitignore_status( + &self, + finding: &mut SecurityFinding, + gitignore_status: &super::gitignore::GitIgnoreStatus, + ) { + // Adjust severity based on gitignore risk + finding.severity = match gitignore_status.risk_level { + GitIgnoreRisk::Tracked => SecuritySeverity::Critical, // Always critical if tracked + GitIgnoreRisk::Exposed => { + // Upgrade severity if exposed + match &finding.severity { + SecuritySeverity::Medium => SecuritySeverity::High, + SecuritySeverity::Low => SecuritySeverity::Medium, + other => other.clone(), + } + } + GitIgnoreRisk::Protected => { + // Downgrade slightly if protected + match &finding.severity { + SecuritySeverity::Critical => SecuritySeverity::High, + SecuritySeverity::High => SecuritySeverity::Medium, + other => other.clone(), + } + } + GitIgnoreRisk::Safe => finding.severity.clone(), + }; + + // Add gitignore context to description + finding.description.push_str(&format!(" (GitIgnore: {})", gitignore_status.description())); + + // Add git history warning for tracked files + if gitignore_status.risk_level == GitIgnoreRisk::Tracked { + finding.remediation.insert(0, "⚠️ CRITICAL: Remove this file from git history using git-filter-branch or BFG Repo-Cleaner".to_string()); + finding.remediation.insert(1, "πŸ”‘ Rotate any exposed secrets immediately".to_string()); + } + } + + /// Analyze Python configuration files with gitignore awareness + fn analyze_config_files_with_gitignore( + &self, + project_root: &Path, + gitignore_analyzer: &mut GitIgnoreAnalyzer, + ) -> Result, SecurityError> { + let mut findings = Vec::new(); + + // Python configuration files to check + let config_files = [ + "settings.py", // Django settings + "config.py", // Flask/general config + "main.py", // FastAPI main + "app.py", // Flask app + "manage.py", // Django management + "wsgi.py", // WSGI config + "asgi.py", // ASGI config + ]; + + for config_file in &config_files { + let config_path = project_root.join(config_file); + if config_path.exists() { + let gitignore_status = gitignore_analyzer.analyze_file(&config_path); + + if let Ok(content) = fs::read_to_string(&config_path) { + // Basic secret pattern check for config files + if self.contains_potential_python_secrets(&content) { + let mut finding = SecurityFinding { + id: format!("config-file-{}", config_file.replace('.', "-")), + title: "Potential Secrets in Python Configuration File".to_string(), + description: format!("Python configuration file '{}' may contain secrets", config_file), + severity: SecuritySeverity::Medium, + category: SecurityCategory::SecretsExposure, + file_path: Some(config_path.clone()), + line_number: None, + column_number: None, + evidence: None, + remediation: vec![ + "Review configuration file for hardcoded secrets".to_string(), + "Use environment variables for sensitive configuration".to_string(), + "Consider using python-decouple or similar libraries".to_string(), + ], + references: vec![ + "https://12factor.net/config".to_string(), + ], + cwe_id: Some("CWE-200".to_string()), + compliance_frameworks: vec!["SOC2".to_string()], + }; + + self.enhance_finding_with_gitignore_status(&mut finding, &gitignore_status); + findings.push(finding); + } + } + } + } + + Ok(findings) + } + + /// Analyze Python dependency files with gitignore awareness + fn analyze_dependency_files_with_gitignore( + &self, + project_root: &Path, + gitignore_analyzer: &mut GitIgnoreAnalyzer, + ) -> Result, SecurityError> { + let mut findings = Vec::new(); + + // Python dependency files to check + let dependency_files = [ + "requirements.txt", + "requirements-dev.txt", + "requirements-prod.txt", + "Pipfile", + "Pipfile.lock", + "pyproject.toml", + "poetry.lock", + "conda-requirements.txt", + "environment.yml", + ]; + + for dep_file in &dependency_files { + let dep_path = project_root.join(dep_file); + if dep_path.exists() { + let gitignore_status = gitignore_analyzer.analyze_file(&dep_path); + + // Generally, dependency files should be tracked, but check for any embedded secrets + if let Ok(content) = fs::read_to_string(&dep_path) { + if self.contains_potential_python_secrets(&content) { + let mut finding = SecurityFinding { + id: format!("dependency-file-{}", dep_file.replace('.', "-").replace('-', "_")), + title: "Potential Secrets in Python Dependency File".to_string(), + description: format!("Python dependency file '{}' may contain secrets", dep_file), + severity: SecuritySeverity::High, + category: SecurityCategory::SecretsExposure, + file_path: Some(dep_path.clone()), + line_number: None, + column_number: None, + evidence: None, + remediation: vec![ + "Remove any secrets from dependency files".to_string(), + "Use environment variables for configuration".to_string(), + "Review dependency sources for security".to_string(), + ], + references: vec![ + "https://pip.pypa.io/en/stable/topics/secure-installs/".to_string(), + ], + cwe_id: Some("CWE-200".to_string()), + compliance_frameworks: vec!["SOC2".to_string()], + }; + + self.enhance_finding_with_gitignore_status(&mut finding, &gitignore_status); + findings.push(finding); + } + } + } + } + + Ok(findings) + } + + /// Analyze environment files with comprehensive gitignore risk assessment + fn analyze_env_files_with_gitignore( + &self, + project_root: &Path, + gitignore_analyzer: &mut GitIgnoreAnalyzer, + ) -> Result, SecurityError> { + let mut findings = Vec::new(); + + // Get all potential environment files using gitignore analyzer + let env_files = gitignore_analyzer.get_files_to_analyze(&[]) + .map_err(|e| SecurityError::Io(e))? + .into_iter() + .filter(|file| { + if let Some(file_name) = file.file_name().and_then(|n| n.to_str()) { + // Exclude template/example files from security alerts + if self.is_template_file(file_name) { + debug!("Skipping template file: {}", file_name); + return false; + } + + file_name.starts_with(".env") || + file_name.contains("credentials") || + file_name.contains("secrets") || + file_name.ends_with(".key") || + file_name.ends_with(".pem") || + file_name == "secret.json" || + file_name == "service-account.json" + } else { + false + } + }) + .collect::>(); + + for env_file in env_files { + let gitignore_status = gitignore_analyzer.analyze_file(&env_file); + let relative_path = env_file.strip_prefix(project_root) + .unwrap_or(&env_file); + + // Create finding based on gitignore risk assessment + let (severity, title, description) = match gitignore_status.risk_level { + GitIgnoreRisk::Tracked => ( + SecuritySeverity::Critical, + "Python Secret File Tracked by Git".to_string(), + format!("Python secret file '{}' is tracked by git and may expose credentials in version history", relative_path.display()), + ), + GitIgnoreRisk::Exposed => ( + SecuritySeverity::High, + "Python Secret File Not in GitIgnore".to_string(), + format!("Python secret file '{}' exists but is not protected by .gitignore", relative_path.display()), + ), + GitIgnoreRisk::Protected => ( + SecuritySeverity::Info, + "Python Secret File Properly Protected".to_string(), + format!("Python secret file '{}' is properly ignored but detected for verification", relative_path.display()), + ), + GitIgnoreRisk::Safe => continue, // Skip files that appear safe + }; + + let mut finding = SecurityFinding { + id: format!("python-env-file-{}", relative_path.to_string_lossy().replace('/', "-").replace('.', "-")), + title, + description, + severity, + category: SecurityCategory::SecretsExposure, + file_path: Some(env_file.clone()), + line_number: None, + column_number: None, + evidence: None, + remediation: vec![ + "Ensure sensitive files are in .gitignore".to_string(), + "Use .env.example files for documentation".to_string(), + "Never commit actual environment files to version control".to_string(), + "Use python-decouple for environment variable management".to_string(), + ], + references: vec![ + "https://github.com/motdotla/dotenv#should-i-commit-my-env-file".to_string(), + "https://pypi.org/project/python-decouple/".to_string(), + ], + cwe_id: Some("CWE-200".to_string()), + compliance_frameworks: vec!["SOC2".to_string()], + }; + + self.enhance_finding_with_gitignore_status(&mut finding, &gitignore_status); + findings.push(finding); + } + + Ok(findings) + } + + /// Check if a file is a template/example file that should be excluded from security alerts + fn is_template_file(&self, file_name: &str) -> bool { + let template_indicators = [ + "sample", "example", "template", "template.env", "env.template", + "sample.env", "env.sample", "example.env", "env.example", + "examples", "samples", "templates", "demo", "test", + ".env.sample", ".env.example", ".env.template", ".env.demo", ".env.test", + "example.json", "sample.json", "template.json" + ]; + + let file_name_lower = file_name.to_lowercase(); + + // Check for exact matches or contains patterns + template_indicators.iter().any(|indicator| { + file_name_lower == *indicator || + file_name_lower.contains(indicator) || + file_name_lower.ends_with(indicator) + }) + } + + /// Check if content contains potential Python secrets (basic patterns) + fn contains_potential_python_secrets(&self, content: &str) -> bool { + let secret_indicators = [ + "sk_", "pk_live_", "eyJ", "AKIA", "-----BEGIN", + "client_secret", "api_key", "access_token", "SECRET_KEY", + "private_key", "secret_key", "bearer", "password", + "token", "credentials", "auth" + ]; + + let content_lower = content.to_lowercase(); + secret_indicators.iter().any(|indicator| content_lower.contains(&indicator.to_lowercase())) + } + + /// Generate Python-specific security recommendations + fn generate_python_security_recommendations(&self) -> Vec { + vec![ + "🐍 Python Security Best Practices:".to_string(), + " β€’ Use environment variables for all secrets and configuration".to_string(), + " β€’ Install python-decouple or python-dotenv for configuration management".to_string(), + " β€’ Keep requirements.txt and poetry.lock files up to date".to_string(), + " β€’ Use virtual environments to isolate dependencies".to_string(), + " β€’ Run 'pip-audit' or 'safety check' to scan for vulnerable packages".to_string(), + " β€’ Enable Django's security middleware if using Django".to_string(), + " β€’ Use parameterized queries to prevent SQL injection".to_string(), + " β€’ Validate and sanitize all user inputs".to_string(), + " β€’ Use HTTPS in production environments".to_string(), + " β€’ Implement proper error handling and logging".to_string(), + " β€’ Consider using tools like bandit for static security analysis".to_string(), + ] + } +} \ No newline at end of file From 52454ae4045cdfccd07dda59ffc1b144e751a30e Mon Sep 17 00:00:00 2001 From: Alex Holmberg Date: Mon, 9 Jun 2025 15:11:51 +0200 Subject: [PATCH 5/6] patch: README.md duplicate phrases updated --- .github/workflows/release-plz.yml | 2 +- README.md | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/.github/workflows/release-plz.yml b/.github/workflows/release-plz.yml index 7e0b996a..4fca2af2 100644 --- a/.github/workflows/release-plz.yml +++ b/.github/workflows/release-plz.yml @@ -52,4 +52,4 @@ jobs: command: release-pr env: GITHUB_TOKEN: ${{ secrets.RELEASE_PLZ_TOKEN }} - CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} \ No newline at end of file + CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}cl \ No newline at end of file diff --git a/README.md b/README.md index b92997c6..5e22286b 100644 --- a/README.md +++ b/README.md @@ -15,11 +15,8 @@ ```bash # Install -# Install cargo install syncable-cli -# Analyze any project - # Analyze any project sync-ctl analyze /path/to/your/project @@ -213,8 +210,6 @@ We welcome contributions! See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines. # Run tests cargo test -# Check code quality -cargo clippy # Check code quality cargo clippy From 2a27bf624a9b791af318b45a14d2f4ca42c2551a Mon Sep 17 00:00:00 2001 From: Alex Holmberg Date: Mon, 9 Jun 2025 23:58:24 +0200 Subject: [PATCH 6/6] feat: huge improvements towards security scanning and performance With next update, we've added caching, performance improvements and concurrency updates. The security scan is now performing from 1 min -> 8.4 avg scannings, marking a way faster scanning experience. --- Cargo.lock | 66 + Cargo.toml | 15 + README.md | 221 ++- examples/enhanced_security.rs | 123 -- src/analyzer/mod.rs | 3 +- src/analyzer/security/gitignore.rs | 531 ------ src/analyzer/security/javascript.rs | 1013 ------------ src/analyzer/security/mod.rs | 47 +- src/analyzer/security/python.rs | 1423 ----------------- src/analyzer/security/turbo/README.md | 184 +++ src/analyzer/security/turbo/cache.rs | 369 +++++ src/analyzer/security/turbo/file_discovery.rs | 558 +++++++ src/analyzer/security/turbo/mod.rs | 390 +++++ src/analyzer/security/turbo/pattern_engine.rs | 552 +++++++ src/analyzer/security/turbo/results.rs | 403 +++++ src/analyzer/security/turbo/scanner.rs | 447 ++++++ src/analyzer/security_analyzer.rs | 37 +- src/cli.rs | 18 + src/main.rs | 162 +- 19 files changed, 3272 insertions(+), 3290 deletions(-) delete mode 100644 examples/enhanced_security.rs delete mode 100644 src/analyzer/security/gitignore.rs delete mode 100644 src/analyzer/security/javascript.rs delete mode 100644 src/analyzer/security/python.rs create mode 100644 src/analyzer/security/turbo/README.md create mode 100644 src/analyzer/security/turbo/cache.rs create mode 100644 src/analyzer/security/turbo/file_discovery.rs create mode 100644 src/analyzer/security/turbo/mod.rs create mode 100644 src/analyzer/security/turbo/pattern_engine.rs create mode 100644 src/analyzer/security/turbo/results.rs create mode 100644 src/analyzer/security/turbo/scanner.rs diff --git a/Cargo.lock b/Cargo.lock index dff1ebae..936c783e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -24,6 +24,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" dependencies = [ "cfg-if", + "getrandom 0.3.3", "once_cell", "version_check", "zerocopy", @@ -115,6 +116,12 @@ version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" +[[package]] +name = "arrayref" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" + [[package]] name = "arrayvec" version = "0.7.6" @@ -204,6 +211,19 @@ version = "2.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" +[[package]] +name = "blake3" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3888aaa89e4b2a40fca9848e400f6a658a5a3978de7be858e209cafa8be9a4a0" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -387,6 +407,12 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "constant_time_eq" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" + [[package]] name = "core-foundation" version = "0.9.4" @@ -527,6 +553,19 @@ dependencies = [ "serde", ] +[[package]] +name = "dashmap" +version = "5.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" +dependencies = [ + "cfg-if", + "hashbrown 0.14.5", + "lock_api", + "once_cell", + "parking_lot_core", +] + [[package]] name = "deranged" version = "0.4.0" @@ -2303,6 +2342,16 @@ dependencies = [ "autocfg", ] +[[package]] +name = "num_cpus" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" +dependencies = [ + "hermit-abi", + "libc", +] + [[package]] name = "num_threads" version = "0.1.7" @@ -3237,6 +3286,12 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "simdutf8" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" + [[package]] name = "siphasher" version = "1.0.1" @@ -3341,26 +3396,37 @@ dependencies = [ name = "syncable-cli" version = "0.8.0" dependencies = [ + "ahash", + "aho-corasick", "assert_cmd", + "blake3", + "bstr", "chrono", "clap", "colored", + "crossbeam", + "dashmap", "dirs", "env_logger", "glob", "indicatif", "log", + "memmap2", + "num_cpus", "once_cell", + "parking_lot", "predicates", "prettytable", "proptest", "rayon", "regex", + "regex-automata", "reqwest", "rustsec", "serde", "serde_json", "serde_yaml", + "simdutf8", "tempfile", "tera", "term_size", diff --git a/Cargo.toml b/Cargo.toml index 0546fc3d..afc87c88 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,8 @@ categories = ["command-line-utilities", "development-tools"] name = "sync-ctl" path = "src/main.rs" + + [dependencies] clap = { version = "4", features = ["derive", "env", "cargo"] } serde = { version = "1", features = ["derive"] } @@ -43,6 +45,19 @@ textwrap = "0.16" tempfile = "3" dirs = "6" +# Performance dependencies for turbo security analyzer +aho-corasick = "1.1" # Multi-pattern string matching +memmap2 = "0.9" # Memory-mapped file I/O +dashmap = "5" # Concurrent hashmap for caching +crossbeam = { version = "0.8", features = ["crossbeam-channel"] } # High-performance channels +blake3 = "1.5" # Fast hashing for cache keys +regex-automata = "0.4" # Compiled regex sets +num_cpus = "1.16" # CPU count detection +parking_lot = "0.12" # Faster mutex/rwlock +ahash = "0.8" # Fast hash function +bstr = "1.9" # Byte string utilities +simdutf8 = "0.1" # SIMD UTF-8 validation + [dev-dependencies] assert_cmd = "2" predicates = "3" diff --git a/README.md b/README.md index 5e22286b..5eb08eb0 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ ## ⚑ Quick Start + ```bash # Install cargo install syncable-cli @@ -23,11 +24,20 @@ sync-ctl analyze /path/to/your/project # Check for vulnerabilities sync-ctl vulnerabilities -# Run security analysis -sync-ctl security +# Run security analysis (multiple modes available) +sync-ctl security # Thorough scan (default) +sync-ctl security --mode lightning # Ultra-fast critical files only +sync-ctl security --mode paranoid # Most comprehensive scan # Force update check (clears cache) sync-ctl --clear-update-cache analyze . + + +# Get help with any command +sync-ctl --help # Show all available commands +sync-ctl analyze --help # Show analyze command options +sync-ctl security --help # Show security scanning options +sync-ctl vulnerabilities --help # Show vulnerability check options ``` That's it! The CLI will detect your languages, frameworks, dependencies, and provide detailed insights about your project structure. The tool includes smart update notifications to keep you on the latest version. @@ -70,11 +80,12 @@ $ sync-ctl analyze ./my-express-app - **Architecture detection** - Monolithic, microservices, serverless, and more - **Monorepo support** - Analyzes complex multi-project repositories -### πŸ›‘οΈ Security & Compliance -- **Vulnerability scanning** - Integrated security checks for all dependencies -- **Secret detection** - Finds exposed API keys and credentials -- **Security scoring** - Get actionable security recommendations -- **Compliance checks** - SOC2, GDPR, HIPAA support (coming soon) +### πŸ›‘οΈ Turbo Security Engine (Covering Javascript / Python ---- Rust-, Go- & Java- Coming soon) +- **10-100x faster scanning** - Rust-powered multi-pattern matching with smart file discovery +- **5 scan modes** - From lightning-fast critical checks to comprehensive audits +- **Smart gitignore analysis** - Understands git status and provides risk assessments +- **260+ secret patterns** - Detects API keys, tokens, certificates, and credentials +- **Zero false positives** - Advanced context-aware filtering excludes test data and documentation ### 🐳 Docker Intelligence - **Dockerfile analysis** - Understand existing Docker configurations @@ -112,8 +123,15 @@ sync-ctl analyze # Matrix view (default) sync-ctl analyze --display detailed # Detailed view sync-ctl analyze --json # JSON output -# Security & vulnerability checks -sync-ctl security # Comprehensive security analysis +# Vulnerabilities analysis +sync-ctl vulnerabilities # Dependency vulnerability scan + +# Security analysis with turbo engine (10-100x faster) +sync-ctl security # Thorough scan (default) +sync-ctl security --mode lightning # Critical files only (.env, configs) +sync-ctl security --mode fast # Smart sampling with priority patterns +sync-ctl security --mode balanced # Good coverage with optimizations +sync-ctl security --mode paranoid # Most comprehensive including low-severity sync-ctl vulnerabilities # Dependency vulnerability scan # Dependency analysis @@ -121,6 +139,98 @@ sync-ctl dependencies --licenses # Show license information sync-ctl dependencies --vulnerabilities # Check for known CVEs ``` +### Security Scan Modes + +The turbo security engine offers 5 scan modes optimized for different use cases: + +| Mode | Speed | Coverage | Use Case | Typical Time | +|------|-------|----------|----------|--------------| +| **Lightning** | πŸš€ Fastest | Critical files only | Pre-commit hooks, CI checks +| **Fast** | ⚑ Very Fast | Smart sampling | Development workflow +| **Balanced** | 🎯 Optimized | Good coverage | Regular security checks +| **Thorough** | πŸ” Complete | Comprehensive | Security audits (default) +| **Paranoid** | πŸ•΅οΈ Maximum | Everything + low severity | Compliance, releases + +## πŸ›‘οΈ Security Detection Deep Dive + +### What We Detect + +The turbo security engine scans for 260+ secret patterns across multiple categories: + +#### πŸ”‘ API Keys & Tokens +- **Cloud Providers**: AWS Access Keys, GCP Service Account Keys, Azure Storage Keys +- **Services**: Stripe API Keys, Twilio Auth Tokens, GitHub Personal Access Tokens +- **Databases**: MongoDB Connection Strings, Redis URLs, PostgreSQL passwords +- **CI/CD**: Jenkins API Tokens, CircleCI Keys, GitLab CI Variables + +#### πŸ” Cryptographic Material +- **Private Keys**: RSA, ECDSA, Ed25519 private keys (.pem, .key files) +- **Certificates**: X.509 certificates, SSL/TLS certs +- **Keystores**: Java KeyStore files, PKCS#12 files +- **SSH Keys**: OpenSSH private keys, SSH certificates + +#### πŸ“§ Authentication Secrets +- **JWT Secrets**: JSON Web Token signing keys +- **OAuth**: Client secrets, refresh tokens +- **SMTP**: Email server credentials, SendGrid API keys +- **LDAP**: Bind credentials, directory service passwords + +#### 🌐 Environment Variables +- **Suspicious Names**: Any variable containing "password", "secret", "key", "token" +- **Base64 Encoded**: Automatically detects encoded secrets +- **URLs with Auth**: Database URLs, API endpoints with embedded credentials + +### Smart Git Status Analysis + +Our security engine provides intelligent risk assessment based on git status: + +| Status | Risk Level | Meaning | Action Needed | +|--------|------------|---------|---------------| +| 🟒 **SAFE** | Low | File properly ignored by .gitignore | βœ… No action needed | +| πŸ”΅ **OK** | Low | File appears safe for version control | βœ… Monitor for changes | +| 🟑 **EXPOSED** | High | Contains secrets but NOT in .gitignore | ⚠️ Add to .gitignore immediately | +| πŸ”΄ **TRACKED** | Critical | Contains secrets AND tracked by git | 🚨 Remove from git history | + +#### Why Some Files Are "OK" Despite Not Being Gitignored + +Files are marked as **OK** when they contain patterns that look like secrets but are actually safe: + +- **Documentation**: Code in README files, API examples, tutorials +- **Test Data**: Mock API keys, placeholder values, example configurations +- **Source Code**: String literals that match patterns but aren't real secrets +- **Lock Files**: Package hashes in `package-lock.json`, `pnpm-lock.yaml`, `cargo.lock` +- **Build Artifacts**: Compiled code, minified files, generated documentation + +### Advanced False Positive Filtering + +Our engine uses sophisticated techniques to minimize false positives: + +#### 🎯 Context-Aware Detection +```bash +# ❌ FALSE POSITIVE - Will be ignored +const API_KEY = "your_api_key_here"; // Documentation example +const EXAMPLE_TOKEN = "sk-example123"; // Clearly a placeholder + +# βœ… REAL SECRET - Will be detected +const STRIPE_KEY = "sk_live_4eC39HqLyjWDarjtT1zdp7dc"; +``` + +#### πŸ“ Documentation Exclusions +- Comments in any language (`//`, `#`, `/* */`, ``) +- Markdown code blocks and documentation files +- README files, CHANGELOG, API docs +- Example configurations and sample files + +#### πŸ§ͺ Test Data Recognition +- Files in `/test/`, `/tests/`, `/spec/`, `__test__` directories +- Filenames containing "test", "spec", "mock", "fixture", "example" +- Common test patterns like "test123", "dummy", "fake" + +#### πŸ“¦ Dependency File Intelligence +- Automatically excludes: `node_modules/`, `vendor/`, `target/` +- Recognizes lock files: `yarn.lock`, `pnpm-lock.yaml`, `go.sum` +- Skips binary files, images, and compiled artifacts + ### Display Modes Choose the output format that works best for you: @@ -130,19 +240,43 @@ Choose the output format that works best for you: - **Summary** - Brief overview for CI/CD - **JSON** - Machine-readable format -### Advanced Configuration -# Analyze with different display formats -sync-ctl analyze # Matrix view (default) -sync-ctl analyze --display detailed # Detailed view -sync-ctl analyze --json # JSON output +### Example Security Output + +```bash +$ sync-ctl security --mode thorough + +πŸ›‘οΈ Security Analysis Results +════════════════════════════════════════════════════════════════════════════════ + +β”Œβ”€ Security Summary ───────────────────────────────────────┐ +β”‚ Overall Score: 85/100 β”‚ +β”‚ Risk Level: High β”‚ +β”‚ Total Findings: 3 β”‚ +β”‚ Files Analyzed: 47 β”‚ +β”‚ Scan Mode: Thorough β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + +β”Œβ”€ Security Findings ────────────────────────────────────────────────────────┐ +β”‚ 1. ./.env.local β”‚ +β”‚ Type: ENV VAR | Severity: Critical | Position: 3:15 | Status: EXPOSED β”‚ +β”‚ β”‚ +β”‚ 2. ./config/database.js β”‚ +β”‚ Type: API KEY | Severity: High | Position: 12:23 | Status: TRACKED β”‚ +β”‚ β”‚ +β”‚ 3. ./docs/api-example.md β”‚ +β”‚ Type: API KEY | Severity: Critical | Position: 45:8 | Status: OK β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + +β”Œβ”€ Key Recommendations ───────────────────────────────────────────────────────┐ +β”‚ 1. 🚨 Add .env.local to .gitignore immediately β”‚ +β”‚ 2. πŸ” Move database credentials to environment variables β”‚ +β”‚ 3. βœ… API example in docs is safely documented β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + +════════════════════════════════════════════════════════════════════════════════ +``` -# Security & vulnerability checks -sync-ctl security # Comprehensive security analysis -sync-ctl vulnerabilities # Dependency vulnerability scan -# Dependency analysis -sync-ctl dependencies --licenses # Show license information -sync-ctl dependencies --vulnerabilities # Check for known CVEs ### Advanced Configuration @@ -154,8 +288,43 @@ include_dev_dependencies = true ignore_patterns = ["vendor", "node_modules", "target"] [security] -fail_on_high_severity = true -check_secrets = true +# Scan configuration +default_mode = "thorough" # Default scan mode +fail_on_high_severity = true # Exit with error on high/critical findings +check_secrets = true # Enable secret detection +check_code_patterns = true # Enable code security pattern analysis + +# Performance tuning +max_file_size_mb = 10 # Skip files larger than 10MB +worker_threads = 0 # Auto-detect CPU cores (0 = auto) +enable_cache = true # Enable result caching +cache_size_mb = 100 # Cache size limit + +# Pattern filtering +priority_extensions = [ # Scan these extensions first + "env", "key", "pem", "json", "yml", "yaml", + "toml", "ini", "conf", "config" +] +``` + +#### Command-Line Options + +```bash +# Scan mode selection +sync-ctl security --mode lightning # Fastest, critical files only +sync-ctl security --mode paranoid # Slowest, most comprehensive + +# Output control +sync-ctl security --json # JSON output for automation +sync-ctl security --output report.json # Save to file + +# Filtering options +sync-ctl security --include-low # Include low-severity findings +sync-ctl security --no-secrets # Skip secret detection +sync-ctl security --no-code-patterns # Skip code pattern analysis + +# CI/CD integration +sync-ctl security --fail-on-findings # Exit with error code if issues found ``` ## 🌟 Technology Coverage @@ -184,8 +353,8 @@ check_secrets = true ### βœ… Phase 1: Analysis Engine (Complete) - Project analysis and technology detection -- Vulnerability scanning -- Basic security analysis +- Vulnerability scanning with 260+ supported packages +- Turbo Security Engine turbo-fast scanning with 5 modes ### πŸ”„ Phase 2: AI-Powered Generation (In Progress) - Smart Dockerfile generation @@ -198,10 +367,6 @@ check_secrets = true - CI/CD pipeline generation - Real-time monitoring setup -[security] -fail_on_high_severity = true -check_secrets = true - ## 🀝 Contributing We welcome contributions! See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines. diff --git a/examples/enhanced_security.rs b/examples/enhanced_security.rs deleted file mode 100644 index 3402ac6d..00000000 --- a/examples/enhanced_security.rs +++ /dev/null @@ -1,123 +0,0 @@ -//! Example: Enhanced Security Analysis -//! -//! This example demonstrates the enhanced security analysis capabilities -//! including the new modular JavaScript/TypeScript security analyzer. - -use std::path::Path; -use syncable_cli::analyzer::{analyze_project, SecurityAnalyzer}; - -fn main() -> Result<(), Box> { - env_logger::init(); - - // For this example, analyze the current directory or a provided path - let project_path = std::env::args() - .nth(1) - .map(|p| Path::new(&p).to_path_buf()) - .unwrap_or_else(|| std::env::current_dir().unwrap()); - - println!("πŸ” Analyzing project security for: {}", project_path.display()); - - // First, perform regular project analysis to detect languages - let analysis = analyze_project(&project_path)?; - - println!("\nπŸ“‹ Detected Languages:"); - for lang in &analysis.languages { - println!(" β€’ {} (confidence: {:.1}%)", lang.name, lang.confidence * 100.0); - } - - println!("\nπŸ”§ Detected Technologies:"); - for tech in &analysis.technologies { - println!(" β€’ {} v{} ({:?})", - tech.name, - tech.version.as_deref().unwrap_or("unknown"), - tech.category - ); - } - - // Check if this is a JavaScript/TypeScript project - let has_js = analysis.languages.iter() - .any(|lang| matches!(lang.name.as_str(), "JavaScript" | "TypeScript" | "JSX" | "TSX")); - - if has_js { - println!("\nβœ… JavaScript/TypeScript project detected! Using enhanced security analysis..."); - } else { - println!("\nπŸ“„ Using general security analysis..."); - } - - // Run enhanced security analysis - println!("\nπŸ›‘οΈ Starting enhanced security analysis..."); - - let mut security_analyzer = SecurityAnalyzer::new()?; - let security_report = security_analyzer.analyze_security_enhanced(&analysis)?; - - // Display results - println!("\nπŸ“Š Security Analysis Results:"); - println!(" Overall Score: {:.1}/100", security_report.overall_score); - println!(" Risk Level: {:?}", security_report.risk_level); - println!(" Total Findings: {}", security_report.total_findings); - - if security_report.total_findings > 0 { - println!("\n🚨 Security Findings:"); - - // Group findings by severity - for severity in [ - syncable_cli::analyzer::security::core::SecuritySeverity::Critical, - syncable_cli::analyzer::security::core::SecuritySeverity::High, - syncable_cli::analyzer::security::core::SecuritySeverity::Medium, - syncable_cli::analyzer::security::core::SecuritySeverity::Low, - ] { - let findings: Vec<_> = security_report.findings.iter() - .filter(|f| f.severity == severity) - .collect(); - - if !findings.is_empty() { - let severity_icon = match severity { - syncable_cli::analyzer::security::core::SecuritySeverity::Critical => "πŸ”΄", - syncable_cli::analyzer::security::core::SecuritySeverity::High => "🟠", - syncable_cli::analyzer::security::core::SecuritySeverity::Medium => "🟑", - syncable_cli::analyzer::security::core::SecuritySeverity::Low => "πŸ”΅", - _ => "βšͺ", - }; - - println!("\n{} {:?} Severity ({} findings):", severity_icon, severity, findings.len()); - - for finding in findings.iter().take(3) { // Show first 3 of each severity - println!(" πŸ“ {}", finding.title); - if let Some(ref file_path) = finding.file_path { - let relative_path = file_path.strip_prefix(&project_path) - .unwrap_or(file_path); - print!(" πŸ“„ {}", relative_path.display()); - if let Some(line) = finding.line_number { - print!(":{}", line); - } - println!(); - } - println!(" πŸ’‘ {}", finding.description); - - if !finding.remediation.is_empty() { - println!(" πŸ”§ Remediation: {}", finding.remediation[0]); - } - println!(); - } - - if findings.len() > 3 { - println!(" ... and {} more findings", findings.len() - 3); - } - } - } - - // Show recommendations - if !security_report.recommendations.is_empty() { - println!("\nπŸ’‘ Recommendations:"); - for (i, recommendation) in security_report.recommendations.iter().enumerate() { - println!(" {}. {}", i + 1, recommendation); - } - } - } else { - println!("βœ… No security issues detected!"); - } - - println!("\n✨ Enhanced security analysis complete!"); - - Ok(()) -} \ No newline at end of file diff --git a/src/analyzer/mod.rs b/src/analyzer/mod.rs index 4951c81a..e89a4290 100644 --- a/src/analyzer/mod.rs +++ b/src/analyzer/mod.rs @@ -37,9 +37,8 @@ pub use security_analyzer::{ SecurityCategory, ComplianceStatus, SecurityAnalysisConfig }; -// Re-export new modular security analysis types +// Re-export security analysis types pub use security::{ - ModularSecurityAnalyzer, JavaScriptSecurityAnalyzer, SecretPatternManager }; pub use security::config::SecurityConfigPreset; diff --git a/src/analyzer/security/gitignore.rs b/src/analyzer/security/gitignore.rs deleted file mode 100644 index da70a500..00000000 --- a/src/analyzer/security/gitignore.rs +++ /dev/null @@ -1,531 +0,0 @@ -//! # GitIgnore-Aware Security Analysis -//! -//! Comprehensive gitignore parsing and pattern matching for security analysis. -//! This module ensures that secret detection is gitignore-aware and can properly -//! assess whether sensitive files are appropriately protected. - -use std::collections::HashSet; -use std::path::{Path, PathBuf}; -use std::fs; -use log::{info, warn}; -use regex::Regex; - -/// GitIgnore pattern matcher for security analysis -pub struct GitIgnoreAnalyzer { - patterns: Vec, - project_root: PathBuf, - is_git_repo: bool, -} - -/// A parsed gitignore pattern with matching logic -#[derive(Debug, Clone)] -pub struct GitIgnorePattern { - pub original: String, - pub regex: Regex, - pub is_negation: bool, - pub is_directory_only: bool, - pub is_absolute: bool, // Starts with / - pub pattern_type: PatternType, -} - -#[derive(Debug, Clone, PartialEq)] -pub enum PatternType { - /// Exact filename match (e.g., ".env") - Exact, - /// Wildcard pattern (e.g., "*.log") - Wildcard, - /// Directory pattern (e.g., "node_modules/") - Directory, - /// Path pattern (e.g., "config/*.env") - Path, -} - -/// Result of gitignore analysis for a file -#[derive(Debug, Clone)] -pub struct GitIgnoreStatus { - pub is_ignored: bool, - pub matched_pattern: Option, - pub is_tracked: bool, // Whether file is tracked by git - pub should_be_ignored: bool, // Whether file contains secrets and should be ignored - pub risk_level: GitIgnoreRisk, -} - -#[derive(Debug, Clone, PartialEq)] -pub enum GitIgnoreRisk { - /// File is properly ignored and contains no secrets - Safe, - /// File contains secrets but is properly ignored - Protected, - /// File contains secrets and is NOT ignored (high risk) - Exposed, - /// File contains secrets, not ignored, and is tracked by git (critical risk) - Tracked, -} - -impl GitIgnoreAnalyzer { - pub fn new(project_root: &Path) -> Result { - let project_root = project_root.canonicalize()?; - let is_git_repo = project_root.join(".git").exists(); - - let patterns = if is_git_repo { - Self::parse_gitignore_files(&project_root)? - } else { - Self::create_default_patterns() - }; - - info!("Initialized GitIgnore analyzer with {} patterns for {}", - patterns.len(), project_root.display()); - - Ok(Self { - patterns, - project_root, - is_git_repo, - }) - } - - /// Parse all relevant .gitignore files - fn parse_gitignore_files(project_root: &Path) -> Result, std::io::Error> { - let mut patterns = Vec::new(); - - // Global gitignore patterns for common secret files - patterns.extend(Self::create_default_patterns()); - - // Parse project .gitignore - let gitignore_path = project_root.join(".gitignore"); - if gitignore_path.exists() { - let content = fs::read_to_string(&gitignore_path)?; - patterns.extend(Self::parse_gitignore_content(&content, project_root)?); - info!("Parsed {} patterns from .gitignore", patterns.len()); - } - - // TODO: Parse global gitignore (~/.gitignore_global) - // TODO: Parse .git/info/exclude - - Ok(patterns) - } - - /// Create default patterns for common secret files - fn create_default_patterns() -> Vec { - let default_patterns = [ - ".env", - ".env.local", - ".env.*.local", - ".env.production", - ".env.development", - ".env.staging", - ".env.test", - "*.pem", - "*.key", - "*.p12", - "*.pfx", - "id_rsa", - "id_dsa", - "id_ecdsa", - "id_ed25519", - ".aws/credentials", - ".ssh/", - "secrets/", - "private/", - ]; - - default_patterns.iter() - .filter_map(|pattern| Self::parse_pattern(pattern, &PathBuf::from(".")).ok()) - .collect() - } - - /// Parse gitignore content into patterns - fn parse_gitignore_content(content: &str, _root: &Path) -> Result, std::io::Error> { - let mut patterns = Vec::new(); - - for (line_num, line) in content.lines().enumerate() { - let line = line.trim(); - - // Skip empty lines and comments - if line.is_empty() || line.starts_with('#') { - continue; - } - - match Self::parse_pattern(line, &PathBuf::from(".")) { - Ok(pattern) => patterns.push(pattern), - Err(e) => { - warn!("Failed to parse gitignore pattern on line {}: '{}' - {}", line_num + 1, line, e); - } - } - } - - Ok(patterns) - } - - /// Parse a single gitignore pattern - fn parse_pattern(pattern: &str, _root: &Path) -> Result { - let original = pattern.to_string(); - let mut pattern = pattern.to_string(); - - // Handle negation - let is_negation = pattern.starts_with('!'); - if is_negation { - pattern = pattern[1..].to_string(); - } - - // Handle directory-only patterns - let is_directory_only = pattern.ends_with('/'); - if is_directory_only { - pattern.pop(); - } - - // Handle absolute patterns (starting with /) - let is_absolute = pattern.starts_with('/'); - if is_absolute { - pattern = pattern[1..].to_string(); - } - - // Determine pattern type - let pattern_type = if pattern.contains('/') { - PatternType::Path - } else if pattern.contains('*') || pattern.contains('?') { - PatternType::Wildcard - } else if is_directory_only { - PatternType::Directory - } else { - PatternType::Exact - }; - - // Convert to regex - let regex_pattern = Self::gitignore_to_regex(&pattern, is_absolute, &pattern_type)?; - let regex = Regex::new(®ex_pattern)?; - - Ok(GitIgnorePattern { - original, - regex, - is_negation, - is_directory_only, - is_absolute, - pattern_type, - }) - } - - /// Convert gitignore pattern to regex - fn gitignore_to_regex(pattern: &str, is_absolute: bool, pattern_type: &PatternType) -> Result { - let mut regex = String::new(); - - // Start anchor - if is_absolute { - regex.push_str("^"); - } else { - // Can match anywhere in the path - regex.push_str("(?:^|/)"); - } - - // Process the pattern - for ch in pattern.chars() { - match ch { - '*' => { - // Check if this is a double star (**) - if pattern.contains("**") { - regex.push_str(".*"); - } else { - regex.push_str("[^/]*"); - } - } - '?' => regex.push_str("[^/]"), - '.' => regex.push_str("\\."), - '^' | '$' | '(' | ')' | '[' | ']' | '{' | '}' | '+' | '|' | '\\' => { - regex.push('\\'); - regex.push(ch); - } - '/' => regex.push_str("/"), - _ => regex.push(ch), - } - } - - // Handle directory-only patterns - match pattern_type { - PatternType::Directory => { - regex.push_str("(?:/|$)"); - } - PatternType::Exact => { - regex.push_str("(?:/|$)"); - } - _ => { - regex.push_str("(?:/.*)?$"); - } - } - - Ok(regex) - } - - /// Check if a file path matches gitignore patterns - pub fn analyze_file(&self, file_path: &Path) -> GitIgnoreStatus { - let relative_path = match file_path.strip_prefix(&self.project_root) { - Ok(rel) => rel, - Err(_) => return GitIgnoreStatus { - is_ignored: false, - matched_pattern: None, - is_tracked: false, - should_be_ignored: false, - risk_level: GitIgnoreRisk::Safe, - }, - }; - - let path_str = relative_path.to_string_lossy(); - let file_name = file_path.file_name() - .and_then(|n| n.to_str()) - .unwrap_or(""); - - // Check against patterns - let mut is_ignored = false; - let mut matched_pattern = None; - - for pattern in &self.patterns { - if pattern.regex.is_match(&path_str) { - if pattern.is_negation { - is_ignored = false; - matched_pattern = None; - } else { - is_ignored = true; - matched_pattern = Some(pattern.original.clone()); - } - } - } - - // Check if file is tracked by git - let is_tracked = if self.is_git_repo { - self.check_git_tracked(file_path) - } else { - false - }; - - // Determine if file should be ignored (contains secrets) - let should_be_ignored = self.should_file_be_ignored(file_path, file_name); - - // Assess risk level - let risk_level = self.assess_risk(is_ignored, is_tracked, should_be_ignored); - - GitIgnoreStatus { - is_ignored, - matched_pattern, - is_tracked, - should_be_ignored, - risk_level, - } - } - - /// Check if file is tracked by git - fn check_git_tracked(&self, file_path: &Path) -> bool { - use std::process::Command; - - Command::new("git") - .args(&["ls-files", "--error-unmatch"]) - .arg(file_path) - .current_dir(&self.project_root) - .output() - .map(|output| output.status.success()) - .unwrap_or(false) - } - - /// Check if a file should be ignored based on its name/path - fn should_file_be_ignored(&self, file_path: &Path, file_name: &str) -> bool { - // Common secret file patterns - let secret_indicators = [ - ".env", ".key", ".pem", ".p12", ".pfx", - "id_rsa", "id_dsa", "id_ecdsa", "id_ed25519", - "credentials", "secrets", "private" - ]; - - let path_str = file_path.to_string_lossy().to_lowercase(); - let file_name_lower = file_name.to_lowercase(); - - secret_indicators.iter().any(|indicator| { - file_name_lower.contains(indicator) || path_str.contains(indicator) - }) - } - - /// Assess the risk level for a file - fn assess_risk(&self, is_ignored: bool, is_tracked: bool, should_be_ignored: bool) -> GitIgnoreRisk { - match (should_be_ignored, is_ignored, is_tracked) { - // File contains secrets - (true, true, _) => GitIgnoreRisk::Protected, // Ignored (good) - (true, false, true) => GitIgnoreRisk::Tracked, // Not ignored AND tracked (critical) - (true, false, false) => GitIgnoreRisk::Exposed, // Not ignored but not tracked (high risk) - // File doesn't contain secrets (or we think it doesn't) - (false, _, _) => GitIgnoreRisk::Safe, - } - } - - /// Get all files that should be analyzed for secrets - pub fn get_files_to_analyze(&self, extensions: &[&str]) -> Result, std::io::Error> { - let mut files = Vec::new(); - self.collect_files_recursive(&self.project_root, extensions, &mut files)?; - - // Filter files that are definitely ignored - let files_to_analyze: Vec = files.into_iter() - .filter(|file| { - let status = self.analyze_file(file); - // Analyze files that are either: - // 1. Not ignored (need to check if they should be) - // 2. Ignored but we want to verify they don't contain secrets anyway - !status.is_ignored || status.should_be_ignored - }) - .collect(); - - info!("Found {} files to analyze for secrets", files_to_analyze.len()); - Ok(files_to_analyze) - } - - /// Recursively collect files with given extensions - fn collect_files_recursive( - &self, - dir: &Path, - extensions: &[&str], - files: &mut Vec - ) -> Result<(), std::io::Error> { - for entry in fs::read_dir(dir)? { - let entry = entry?; - let path = entry.path(); - - if path.is_dir() { - // Skip obviously ignored directories - if let Some(dir_name) = path.file_name().and_then(|n| n.to_str()) { - if matches!(dir_name, ".git" | "node_modules" | "target" | "build" | "dist" | ".next") { - continue; - } - } - - // Check if directory is ignored - let status = self.analyze_file(&path); - if !status.is_ignored { - self.collect_files_recursive(&path, extensions, files)?; - } - } else if let Some(ext) = path.extension().and_then(|e| e.to_str()) { - if extensions.is_empty() || extensions.contains(&ext) { - files.push(path); - } - } else { - // Files without extensions might still be secret files - files.push(path); - } - } - - Ok(()) - } - - /// Generate recommendations for improving gitignore coverage - pub fn generate_gitignore_recommendations(&self, secret_files: &[PathBuf]) -> Vec { - let mut recommendations = Vec::new(); - let mut patterns_to_add = HashSet::new(); - - for file in secret_files { - let status = self.analyze_file(file); - - if status.risk_level == GitIgnoreRisk::Exposed || status.risk_level == GitIgnoreRisk::Tracked { - if let Some(file_name) = file.file_name().and_then(|n| n.to_str()) { - // Suggest specific patterns - if file_name.starts_with(".env") { - patterns_to_add.insert(".env*".to_string()); - } else if file_name.ends_with(".key") || file_name.ends_with(".pem") { - patterns_to_add.insert("*.key".to_string()); - patterns_to_add.insert("*.pem".to_string()); - } else { - patterns_to_add.insert(file_name.to_string()); - } - } - - if status.risk_level == GitIgnoreRisk::Tracked { - recommendations.push(format!( - "CRITICAL: '{}' contains secrets and is tracked by git! Remove from git history.", - file.display() - )); - } - } - } - - if !patterns_to_add.is_empty() { - recommendations.push("Add these patterns to your .gitignore:".to_string()); - for pattern in patterns_to_add { - recommendations.push(format!(" {}", pattern)); - } - } - - recommendations - } -} - -impl GitIgnoreStatus { - /// Get a human-readable description of the status - pub fn description(&self) -> String { - match self.risk_level { - GitIgnoreRisk::Safe => "File appears safe".to_string(), - GitIgnoreRisk::Protected => format!( - "File contains secrets but is protected (ignored by: {})", - self.matched_pattern.as_deref().unwrap_or("default pattern") - ), - GitIgnoreRisk::Exposed => "File contains secrets but is NOT in .gitignore!".to_string(), - GitIgnoreRisk::Tracked => "CRITICAL: File contains secrets and is tracked by git!".to_string(), - } - } - - /// Get recommended action for this file - pub fn recommended_action(&self) -> String { - match self.risk_level { - GitIgnoreRisk::Safe => "No action needed".to_string(), - GitIgnoreRisk::Protected => "Verify secrets are still necessary".to_string(), - GitIgnoreRisk::Exposed => "Add to .gitignore immediately".to_string(), - GitIgnoreRisk::Tracked => "Remove from git history and add to .gitignore".to_string(), - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use tempfile::TempDir; - - #[test] - fn test_gitignore_pattern_parsing() { - let patterns = vec![ - ".env", - "*.log", - "/config.json", - "secrets/", - "!important.env", - ]; - - for pattern_str in patterns { - let pattern = GitIgnoreAnalyzer::parse_pattern(pattern_str, &PathBuf::from(".")); - assert!(pattern.is_ok(), "Failed to parse pattern: {}", pattern_str); - } - } - - #[test] - fn test_pattern_matching() { - let temp_dir = TempDir::new().unwrap(); - let analyzer = GitIgnoreAnalyzer::new(temp_dir.path()).unwrap(); - - // Test exact pattern matching - let env_pattern = GitIgnoreAnalyzer::parse_pattern(".env", &PathBuf::from(".")).unwrap(); - assert!(env_pattern.regex.is_match(".env")); - assert!(env_pattern.regex.is_match("subdir/.env")); - assert!(!env_pattern.regex.is_match("not-env")); - } - - #[test] - fn test_nested_directory_matching() { - let temp_dir = TempDir::new().unwrap(); - let analyzer = GitIgnoreAnalyzer::new(temp_dir.path()).unwrap(); - - // Create a pattern for .env files - let env_pattern = GitIgnoreAnalyzer::parse_pattern(".env*", &PathBuf::from(".")).unwrap(); - - // Test various nested scenarios - let test_paths = [ - ".env", - "secrets/.env", - "config/production/.env.local", - "deeply/nested/folder/.env.production", - ]; - - for path in &test_paths { - assert!(env_pattern.regex.is_match(path), "Pattern should match: {}", path); - } - } -} \ No newline at end of file diff --git a/src/analyzer/security/javascript.rs b/src/analyzer/security/javascript.rs deleted file mode 100644 index 2febc26c..00000000 --- a/src/analyzer/security/javascript.rs +++ /dev/null @@ -1,1013 +0,0 @@ -//! # JavaScript/TypeScript Security Analyzer -//! -//! Specialized security analyzer for JavaScript and TypeScript applications. -//! -//! This analyzer focuses on: -//! - Framework-specific secret patterns (React, Vue, Angular, etc.) -//! - Environment variable misuse -//! - Hardcoded API keys in configuration objects -//! - Client-side secret exposure patterns -//! - Common JS/TS anti-patterns - -use std::collections::HashMap; -use std::path::{Path, PathBuf}; -use std::fs; -use regex::Regex; -use log::{debug, info}; - -use super::{SecurityError, SecurityFinding, SecuritySeverity, SecurityCategory, SecurityReport, SecurityAnalysisConfig, GitIgnoreAnalyzer, GitIgnoreRisk}; - -/// JavaScript/TypeScript specific security analyzer -pub struct JavaScriptSecurityAnalyzer { - config: SecurityAnalysisConfig, - js_patterns: Vec, - framework_patterns: HashMap>, - env_var_patterns: Vec, - gitignore_analyzer: Option, -} - -/// JavaScript-specific secret pattern -#[derive(Debug, Clone)] -pub struct JavaScriptSecretPattern { - pub id: String, - pub name: String, - pub pattern: Regex, - pub severity: SecuritySeverity, - pub description: String, - pub context_indicators: Vec, // Code context that increases confidence - pub false_positive_indicators: Vec, // Context that suggests false positive -} - -/// Framework-specific patterns -#[derive(Debug, Clone)] -pub struct FrameworkPattern { - pub pattern: Regex, - pub severity: SecuritySeverity, - pub description: String, - pub file_extensions: Vec, -} - -/// Environment variable patterns -#[derive(Debug, Clone)] -pub struct EnvVarPattern { - pub pattern: Regex, - pub severity: SecuritySeverity, - pub description: String, - pub public_prefixes: Vec, // Prefixes that indicate public env vars -} - -impl JavaScriptSecurityAnalyzer { - pub fn new() -> Result { - Self::with_config(SecurityAnalysisConfig::default()) - } - - pub fn with_config(config: SecurityAnalysisConfig) -> Result { - let js_patterns = Self::initialize_js_patterns()?; - let framework_patterns = Self::initialize_framework_patterns()?; - let env_var_patterns = Self::initialize_env_var_patterns()?; - - Ok(Self { - config, - js_patterns, - framework_patterns, - env_var_patterns, - gitignore_analyzer: None, // Will be initialized in analyze_project - }) - } - - /// Analyze a JavaScript/TypeScript project - pub fn analyze_project(&mut self, project_root: &Path) -> Result { - let mut findings = Vec::new(); - - // Initialize gitignore analyzer for comprehensive file protection assessment - let mut gitignore_analyzer = GitIgnoreAnalyzer::new(project_root) - .map_err(|e| SecurityError::AnalysisFailed(format!("Failed to initialize gitignore analyzer: {}", e)))?; - - info!("πŸ” Using gitignore-aware security analysis for {}", project_root.display()); - - // Get JS/TS files using gitignore-aware collection - let js_extensions = ["js", "jsx", "ts", "tsx", "vue", "svelte"]; - let js_files = gitignore_analyzer.get_files_to_analyze(&js_extensions) - .map_err(|e| SecurityError::Io(e))? - .into_iter() - .filter(|file| { - if let Some(ext) = file.extension().and_then(|e| e.to_str()) { - js_extensions.contains(&ext) - } else { - false - } - }) - .collect::>(); - - info!("Found {} JavaScript/TypeScript files to analyze (gitignore-filtered)", js_files.len()); - - // Analyze each file with gitignore context - for file_path in &js_files { - let gitignore_status = gitignore_analyzer.analyze_file(file_path); - let mut file_findings = self.analyze_js_file(file_path)?; - - // Enhance findings with gitignore risk assessment - for finding in &mut file_findings { - self.enhance_finding_with_gitignore_status(finding, &gitignore_status); - } - - findings.extend(file_findings); - } - - // Analyze package.json and other config files with gitignore awareness - findings.extend(self.analyze_config_files_with_gitignore(project_root, &mut gitignore_analyzer)?); - - // Comprehensive environment file analysis with gitignore risk assessment - findings.extend(self.analyze_env_files_with_gitignore(project_root, &mut gitignore_analyzer)?); - - // Generate gitignore recommendations for any secret files found - let secret_files: Vec = findings.iter() - .filter_map(|f| f.file_path.as_ref()) - .cloned() - .collect(); - - let gitignore_recommendations = gitignore_analyzer.generate_gitignore_recommendations(&secret_files); - - // Create report with enhanced recommendations - let mut report = SecurityReport::from_findings(findings); - report.recommendations.extend(gitignore_recommendations); - - Ok(report) - } - - /// Initialize JavaScript-specific secret patterns - fn initialize_js_patterns() -> Result, SecurityError> { - let patterns = vec![ - // Firebase config object - JavaScriptSecretPattern { - id: "js-firebase-config".to_string(), - name: "Firebase Configuration Object".to_string(), - pattern: Regex::new(r#"(?i)(?:const\s+|let\s+|var\s+)?firebaseConfig\s*[=:]\s*\{[^}]*apiKey\s*:\s*["']([^"']+)["'][^}]*\}"#)?, - severity: SecuritySeverity::Medium, - description: "Firebase configuration object with API key detected".to_string(), - context_indicators: vec!["initializeApp".to_string(), "firebase".to_string()], - false_positive_indicators: vec!["example".to_string(), "placeholder".to_string(), "your-api-key".to_string()], - }, - - // Stripe publishable key (less sensitive but should be noted) - JavaScriptSecretPattern { - id: "js-stripe-public-key".to_string(), - name: "Stripe Publishable Key".to_string(), - pattern: Regex::new(r#"(?i)pk_(?:test_|live_)[a-zA-Z0-9]{24,}"#)?, - severity: SecuritySeverity::Low, - description: "Stripe publishable key detected (public but should be environment variable)".to_string(), - context_indicators: vec!["stripe".to_string(), "payment".to_string()], - false_positive_indicators: vec![], - }, - - // Supabase anon key - JavaScriptSecretPattern { - id: "js-supabase-anon-key".to_string(), - name: "Supabase Anonymous Key".to_string(), - pattern: Regex::new(r#"(?i)(?:supabase|anon).*?["\']eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+["\']"#)?, - severity: SecuritySeverity::Medium, - description: "Supabase anonymous key detected".to_string(), - context_indicators: vec!["supabase".to_string(), "createClient".to_string()], - false_positive_indicators: vec!["example".to_string(), "placeholder".to_string()], - }, - - // Auth0 configuration - JavaScriptSecretPattern { - id: "js-auth0-config".to_string(), - name: "Auth0 Configuration".to_string(), - pattern: Regex::new(r#"(?i)(?:domain|clientId)\s*:\s*["']([a-zA-Z0-9.-]+\.auth0\.com|[a-zA-Z0-9]{32})["']"#)?, - severity: SecuritySeverity::Medium, - description: "Auth0 configuration detected".to_string(), - context_indicators: vec!["auth0".to_string(), "webAuth".to_string()], - false_positive_indicators: vec!["example".to_string(), "your-domain".to_string()], - }, - - // Process.env hardcoded values - JavaScriptSecretPattern { - id: "js-hardcoded-env".to_string(), - name: "Hardcoded process.env Assignment".to_string(), - pattern: Regex::new(r#"process\.env\.[A-Z_]+\s*=\s*["']([^"']+)["']"#)?, - severity: SecuritySeverity::High, - description: "Hardcoded assignment to process.env detected".to_string(), - context_indicators: vec![], - false_positive_indicators: vec!["development".to_string(), "test".to_string()], - }, - - // Clerk keys - JavaScriptSecretPattern { - id: "js-clerk-key".to_string(), - name: "Clerk API Key".to_string(), - pattern: Regex::new(r#"(?i)(?:clerk|pk_test_|pk_live_)[a-zA-Z0-9_-]{20,}"#)?, - severity: SecuritySeverity::Medium, - description: "Clerk API key detected".to_string(), - context_indicators: vec!["clerk".to_string(), "ClerkProvider".to_string()], - false_positive_indicators: vec![], - }, - - // Generic API key in object assignment - JavaScriptSecretPattern { - id: "js-api-key-object".to_string(), - name: "API Key in Object Assignment".to_string(), - pattern: Regex::new(r#"(?i)(?:apiKey|api_key|clientSecret|client_secret|accessToken|access_token|secretKey|secret_key)\s*:\s*["']([A-Za-z0-9_-]{20,})["']"#)?, - severity: SecuritySeverity::High, - description: "API key or secret assigned in object literal".to_string(), - context_indicators: vec!["fetch".to_string(), "axios".to_string(), "headers".to_string()], - false_positive_indicators: vec!["process.env".to_string(), "import.meta.env".to_string(), "placeholder".to_string()], - }, - - // Bearer tokens in fetch headers - JavaScriptSecretPattern { - id: "js-bearer-token".to_string(), - name: "Bearer Token in Code".to_string(), - pattern: Regex::new(r#"(?i)(?:authorization|bearer)\s*:\s*["'](?:bearer\s+)?([A-Za-z0-9_-]{20,})["']"#)?, - severity: SecuritySeverity::Critical, - description: "Bearer token hardcoded in authorization header".to_string(), - context_indicators: vec!["fetch".to_string(), "axios".to_string(), "headers".to_string()], - false_positive_indicators: vec!["${".to_string(), "process.env".to_string(), "import.meta.env".to_string()], - }, - - // Database connection strings - JavaScriptSecretPattern { - id: "js-database-url".to_string(), - name: "Database Connection URL".to_string(), - pattern: Regex::new(r#"(?i)(?:mongodb|postgres|mysql)://[^"'\s]+:[^"'\s]+@[^"'\s]+"#)?, - severity: SecuritySeverity::Critical, - description: "Database connection string with credentials detected".to_string(), - context_indicators: vec!["connect".to_string(), "mongoose".to_string(), "client".to_string()], - false_positive_indicators: vec!["localhost".to_string(), "example.com".to_string()], - }, - ]; - - Ok(patterns) - } - - /// Initialize framework-specific patterns - fn initialize_framework_patterns() -> Result>, SecurityError> { - let mut frameworks = HashMap::new(); - - // React patterns - frameworks.insert("react".to_string(), vec![ - FrameworkPattern { - pattern: Regex::new(r#"(?i)react_app_[a-z_]+\s*=\s*["']([^"']+)["']"#)?, - severity: SecuritySeverity::Medium, - description: "React environment variable potentially exposed in build".to_string(), - file_extensions: vec!["js".to_string(), "jsx".to_string(), "ts".to_string(), "tsx".to_string()], - }, - ]); - - // Next.js patterns - frameworks.insert("nextjs".to_string(), vec![ - FrameworkPattern { - pattern: Regex::new(r#"(?i)next_public_[a-z_]+\s*=\s*["']([^"']+)["']"#)?, - severity: SecuritySeverity::Low, - description: "Next.js public environment variable (ensure it should be public)".to_string(), - file_extensions: vec!["js".to_string(), "jsx".to_string(), "ts".to_string(), "tsx".to_string()], - }, - ]); - - // Vite patterns - frameworks.insert("vite".to_string(), vec![ - FrameworkPattern { - pattern: Regex::new(r#"(?i)vite_[a-z_]+\s*=\s*["']([^"']+)["']"#)?, - severity: SecuritySeverity::Medium, - description: "Vite environment variable potentially exposed in build".to_string(), - file_extensions: vec!["js".to_string(), "jsx".to_string(), "ts".to_string(), "tsx".to_string(), "vue".to_string()], - }, - ]); - - Ok(frameworks) - } - - /// Initialize environment variable patterns - fn initialize_env_var_patterns() -> Result, SecurityError> { - let patterns = vec![ - EnvVarPattern { - pattern: Regex::new(r#"process\.env\.([A-Z_]+)"#)?, - severity: SecuritySeverity::Info, - description: "Environment variable usage detected".to_string(), - public_prefixes: vec![ - "REACT_APP_".to_string(), - "NEXT_PUBLIC_".to_string(), - "VITE_".to_string(), - "VUE_APP_".to_string(), - "EXPO_PUBLIC_".to_string(), - "NUXT_PUBLIC_".to_string(), - ], - }, - EnvVarPattern { - pattern: Regex::new(r#"import\.meta\.env\.([A-Z_]+)"#)?, - severity: SecuritySeverity::Info, - description: "Vite environment variable usage detected".to_string(), - public_prefixes: vec!["VITE_".to_string()], - }, - ]; - - Ok(patterns) - } - - /// Collect all JavaScript/TypeScript files - fn collect_js_files(&self, project_root: &Path) -> Result, SecurityError> { - let extensions = ["js", "jsx", "ts", "tsx", "vue", "svelte"]; - let mut files = Vec::new(); - - fn collect_recursive(dir: &Path, extensions: &[&str], files: &mut Vec) -> Result<(), std::io::Error> { - for entry in fs::read_dir(dir)? { - let entry = entry?; - let path = entry.path(); - - if path.is_dir() { - // Skip common build/dependency directories - if let Some(dir_name) = path.file_name().and_then(|n| n.to_str()) { - if matches!(dir_name, "node_modules" | ".git" | "build" | "dist" | ".next" | "coverage") { - continue; - } - } - collect_recursive(&path, extensions, files)?; - } else if let Some(ext) = path.extension().and_then(|e| e.to_str()) { - if extensions.contains(&ext) { - files.push(path); - } - } - } - Ok(()) - } - - collect_recursive(project_root, &extensions, &mut files)?; - Ok(files) - } - - /// Analyze a single JavaScript/TypeScript file - fn analyze_js_file(&self, file_path: &Path) -> Result, SecurityError> { - let content = fs::read_to_string(file_path)?; - let mut findings = Vec::new(); - - // Check against JavaScript-specific patterns - for pattern in &self.js_patterns { - findings.extend(self.check_pattern_in_content(&content, pattern, file_path)?); - } - - // Check environment variable usage - findings.extend(self.check_env_var_usage(&content, file_path)?); - - Ok(findings) - } - - /// Check a specific pattern in file content - fn check_pattern_in_content( - &self, - content: &str, - pattern: &JavaScriptSecretPattern, - file_path: &Path, - ) -> Result, SecurityError> { - let mut findings = Vec::new(); - - for (line_num, line) in content.lines().enumerate() { - if let Some(captures) = pattern.pattern.captures(line) { - // Check for false positive indicators - if pattern.false_positive_indicators.iter().any(|indicator| { - line.to_lowercase().contains(&indicator.to_lowercase()) - }) { - debug!("Skipping potential false positive in {}: {}", file_path.display(), line.trim()); - continue; - } - - // Extract the secret value and position if captured - let (evidence, column_number) = if captures.len() > 1 { - if let Some(match_) = captures.get(1) { - (Some(match_.as_str().to_string()), Some(match_.start() + 1)) - } else { - (Some(line.trim().to_string()), None) - } - } else { - // For patterns without capture groups, use the full match - if let Some(match_) = captures.get(0) { - (Some(line.trim().to_string()), Some(match_.start() + 1)) - } else { - (Some(line.trim().to_string()), None) - } - }; - - // Check context for confidence scoring - let context_score = self.calculate_context_confidence(content, &pattern.context_indicators); - let adjusted_severity = self.adjust_severity_by_context(pattern.severity.clone(), context_score); - - findings.push(SecurityFinding { - id: format!("{}-{}", pattern.id, line_num), - title: format!("{} Detected", pattern.name), - description: format!("{} (Context confidence: {:.1})", pattern.description, context_score), - severity: adjusted_severity, - category: SecurityCategory::SecretsExposure, - file_path: Some(file_path.to_path_buf()), - line_number: Some(line_num + 1), - column_number, - evidence, - remediation: self.generate_js_remediation(&pattern.id), - references: vec![ - "https://owasp.org/www-project-top-ten/2021/A05_2021-Security_Misconfiguration/".to_string(), - "https://cheatsheetseries.owasp.org/cheatsheets/Secrets_Management_Cheat_Sheet.html".to_string(), - ], - cwe_id: Some("CWE-200".to_string()), - compliance_frameworks: vec!["SOC2".to_string(), "GDPR".to_string()], - }); - } - } - - Ok(findings) - } - - /// Check environment variable usage patterns with context-aware detection - fn check_env_var_usage(&self, content: &str, file_path: &Path) -> Result, SecurityError> { - let mut findings = Vec::new(); - - // Determine if this is likely server-side or client-side code - let is_server_side = self.is_server_side_file(file_path, content); - - for pattern in &self.env_var_patterns { - for (line_num, line) in content.lines().enumerate() { - if let Some(captures) = pattern.pattern.captures(line) { - if let Some(var_name) = captures.get(1) { - let var_name = var_name.as_str(); - - // Check if this is a public environment variable - let is_public = pattern.public_prefixes.iter().any(|prefix| var_name.starts_with(prefix)); - - // Context-aware detection: Only flag as problematic if: - // 1. It's a sensitive variable AND - // 2. It's in client-side code AND - // 3. It doesn't have a public prefix - if !is_public && self.is_sensitive_var_name(var_name) && !is_server_side { - // Extract column position from the pattern match - let column_number = captures.get(0) - .map(|m| m.start() + 1); - - findings.push(SecurityFinding { - id: format!("js-env-sensitive-{}", line_num), - title: "Sensitive Environment Variable in Client Code".to_string(), - description: format!("Environment variable '{}' appears sensitive and may be exposed to client in browser code", var_name), - severity: SecuritySeverity::High, - category: SecurityCategory::SecretsExposure, - file_path: Some(file_path.to_path_buf()), - line_number: Some(line_num + 1), - column_number, - evidence: Some(line.trim().to_string()), - remediation: vec![ - "Move sensitive environment variables to server-side code".to_string(), - "Use public environment variable prefixes only for non-sensitive data".to_string(), - "Consider using a backend API endpoint to handle sensitive operations".to_string(), - ], - references: vec![ - "https://nextjs.org/docs/basic-features/environment-variables".to_string(), - "https://vitejs.dev/guide/env-and-mode.html".to_string(), - ], - cwe_id: Some("CWE-200".to_string()), - compliance_frameworks: vec!["SOC2".to_string()], - }); - } - // For server-side code using environment variables, this is GOOD practice - don't flag it - } - } - } - } - - Ok(findings) - } - - /// Analyze configuration files (package.json, etc.) - fn analyze_config_files(&self, project_root: &Path) -> Result, SecurityError> { - let mut findings = Vec::new(); - - // Check package.json for exposed scripts or configs - let package_json = project_root.join("package.json"); - if package_json.exists() { - findings.extend(self.analyze_package_json(&package_json)?); - } - - Ok(findings) - } - - /// Analyze package.json for security issues - fn analyze_package_json(&self, package_json: &Path) -> Result, SecurityError> { - let mut findings = Vec::new(); - let content = fs::read_to_string(package_json)?; - - // Look for hardcoded secrets in scripts or config - if content.contains("REACT_APP_") || content.contains("NEXT_PUBLIC_") || content.contains("VITE_") { - for (line_num, line) in content.lines().enumerate() { - if line.contains("sk_") || line.contains("pk_live_") || line.contains("eyJ") { - findings.push(SecurityFinding { - id: format!("package-json-secret-{}", line_num), - title: "Potential Secret in package.json".to_string(), - description: "Potential API key or token found in package.json".to_string(), - severity: SecuritySeverity::High, - category: SecurityCategory::SecretsExposure, - file_path: Some(package_json.to_path_buf()), - line_number: Some(line_num + 1), - column_number: None, - evidence: Some(line.trim().to_string()), - remediation: vec![ - "Remove secrets from package.json".to_string(), - "Use environment variables instead".to_string(), - "Add package.json to .gitignore if it contains secrets (not recommended)".to_string(), - ], - references: vec![ - "https://docs.npmjs.com/cli/v8/configuring-npm/package-json".to_string(), - ], - cwe_id: Some("CWE-200".to_string()), - compliance_frameworks: vec!["SOC2".to_string()], - }); - } - } - } - - Ok(findings) - } - - /// Analyze environment files - fn analyze_env_files(&self, project_root: &Path) -> Result, SecurityError> { - let mut findings = Vec::new(); - - // Check for .env files that might be accidentally committed - let env_files = [".env", ".env.local", ".env.production", ".env.development"]; - - for env_file in &env_files { - // Skip template/example files - if self.is_template_file(env_file) { - debug!("Skipping template env file: {}", env_file); - continue; - } - - let env_path = project_root.join(env_file); - if env_path.exists() { - // Check if this file should be tracked by git - findings.push(SecurityFinding { - id: format!("env-file-{}", env_file.replace('.', "-")), - title: "Environment File Detected".to_string(), - description: format!("Environment file '{}' found - ensure it's properly protected", env_file), - severity: SecuritySeverity::Medium, - category: SecurityCategory::SecretsExposure, - file_path: Some(env_path), - line_number: None, - column_number: None, - evidence: None, - remediation: vec![ - "Ensure environment files are in .gitignore".to_string(), - "Use .env.example files for documentation".to_string(), - "Never commit actual environment files to version control".to_string(), - ], - references: vec![ - "https://github.com/motdotla/dotenv#should-i-commit-my-env-file".to_string(), - ], - cwe_id: Some("CWE-200".to_string()), - compliance_frameworks: vec!["SOC2".to_string()], - }); - } - } - - Ok(findings) - } - - /// Calculate confidence score based on context indicators - fn calculate_context_confidence(&self, content: &str, indicators: &[String]) -> f32 { - let total_indicators = indicators.len() as f32; - if total_indicators == 0.0 { - return 0.5; // Neutral confidence - } - - let found_indicators = indicators.iter() - .filter(|indicator| content.to_lowercase().contains(&indicator.to_lowercase())) - .count() as f32; - - found_indicators / total_indicators - } - - /// Adjust severity based on context confidence - fn adjust_severity_by_context(&self, base_severity: SecuritySeverity, confidence: f32) -> SecuritySeverity { - match base_severity { - SecuritySeverity::Critical => base_severity, // Keep critical as-is - SecuritySeverity::High => { - if confidence < 0.3 { - SecuritySeverity::Medium - } else { - base_severity - } - } - SecuritySeverity::Medium => { - if confidence > 0.7 { - SecuritySeverity::High - } else if confidence < 0.3 { - SecuritySeverity::Low - } else { - base_severity - } - } - _ => base_severity, - } - } - - /// Check if a variable name appears sensitive - fn is_sensitive_var_name(&self, var_name: &str) -> bool { - let sensitive_keywords = [ - "SECRET", "KEY", "TOKEN", "PASSWORD", "PASS", "AUTH", "API", - "PRIVATE", "CREDENTIAL", "CERT", "SSL", "TLS", "OAUTH", - "CLIENT_SECRET", "ACCESS_TOKEN", "REFRESH_TOKEN", - ]; - - let var_upper = var_name.to_uppercase(); - sensitive_keywords.iter().any(|keyword| var_upper.contains(keyword)) - } - - /// Determine if a JavaScript file is likely server-side or client-side - fn is_server_side_file(&self, file_path: &Path, content: &str) -> bool { - // Check file path indicators - let path_str = file_path.to_string_lossy().to_lowercase(); - let server_path_indicators = [ - "/server/", "/backend/", "/api/", "/routes/", "/controllers/", - "/middleware/", "/models/", "/services/", "/utils/", "/lib/", - "server.js", "server.ts", "index.js", "index.ts", "app.js", "app.ts", - "/pages/api/", "/app/api/", // Next.js API routes - "server-side", "backend", "node_modules", // Clear server indicators - ]; - - let client_path_indicators = [ - "/client/", "/frontend/", "/public/", "/static/", "/assets/", - "/components/", "/views/", "/pages/", "/src/components/", - "client.js", "client.ts", "main.js", "main.ts", "app.tsx", "index.html", - ]; - - // Strong server-side path indicators - if server_path_indicators.iter().any(|indicator| path_str.contains(indicator)) { - return true; - } - - // Strong client-side path indicators - if client_path_indicators.iter().any(|indicator| path_str.contains(indicator)) { - return false; - } - - // Check content for server-side indicators - let server_content_indicators = [ - "require(", "module.exports", "exports.", "__dirname", "__filename", - "process.env", "process.exit", "process.argv", "fs.readFile", "fs.writeFile", - "http.createServer", "express(", "app.listen", "app.use", "app.get", "app.post", - "import express", "import fs", "import path", "import http", "import https", - "cors(", "bodyParser", "middleware", "mongoose.connect", "sequelize", - "jwt.sign", "bcrypt", "crypto.createHash", "nodemailer", "socket.io", - "console.log", // While not exclusive, very common in server code - ]; - - let client_content_indicators = [ - "document.", "window.", "navigator.", "localStorage", "sessionStorage", - "addEventListener", "querySelector", "getElementById", "fetch(", - "XMLHttpRequest", "React.", "ReactDOM", "useState", "useEffect", - "Vue.", "Angular", "svelte", "alert(", "confirm(", "prompt(", - "location.href", "history.push", "router.push", "browser", - ]; - - let server_matches = server_content_indicators.iter() - .filter(|&indicator| content.contains(indicator)) - .count(); - - let client_matches = client_content_indicators.iter() - .filter(|&indicator| content.contains(indicator)) - .count(); - - // If we have server indicators and no clear client indicators, assume server-side - if server_matches > 0 && client_matches == 0 { - return true; - } - - // If we have client indicators and no server indicators, assume client-side - if client_matches > 0 && server_matches == 0 { - return false; - } - - // If mixed or unclear, use a heuristic - if server_matches > client_matches { - return true; - } - - // Default to client-side for mixed/unclear files (safer for security) - false - } - - /// Generate JavaScript-specific remediation advice - fn generate_js_remediation(&self, pattern_id: &str) -> Vec { - match pattern_id { - id if id.contains("firebase") => vec![ - "Move Firebase configuration to environment variables".to_string(), - "Use Firebase App Check for additional security".to_string(), - "Implement proper Firebase security rules".to_string(), - ], - id if id.contains("stripe") => vec![ - "Use environment variables for Stripe keys".to_string(), - "Ensure you're using publishable keys in client-side code".to_string(), - "Keep secret keys on the server side only".to_string(), - ], - id if id.contains("bearer") => vec![ - "Never hardcode bearer tokens in client-side code".to_string(), - "Use secure token storage mechanisms".to_string(), - "Implement token refresh flows".to_string(), - ], - _ => vec![ - "Move secrets to environment variables".to_string(), - "Use server-side API routes for sensitive operations".to_string(), - "Implement proper secret management practices".to_string(), - ], - } - } - - /// Enhance a security finding with gitignore risk assessment - fn enhance_finding_with_gitignore_status( - &self, - finding: &mut SecurityFinding, - gitignore_status: &super::gitignore::GitIgnoreStatus, - ) { - // Adjust severity based on gitignore risk - finding.severity = match gitignore_status.risk_level { - GitIgnoreRisk::Tracked => SecuritySeverity::Critical, // Always critical if tracked - GitIgnoreRisk::Exposed => { - // Upgrade severity if exposed - match &finding.severity { - SecuritySeverity::Medium => SecuritySeverity::High, - SecuritySeverity::Low => SecuritySeverity::Medium, - other => other.clone(), - } - } - GitIgnoreRisk::Protected => { - // Downgrade slightly if protected - match &finding.severity { - SecuritySeverity::Critical => SecuritySeverity::High, - SecuritySeverity::High => SecuritySeverity::Medium, - other => other.clone(), - } - } - GitIgnoreRisk::Safe => finding.severity.clone(), - }; - - // Add gitignore context to description - finding.description.push_str(&format!(" (GitIgnore: {})", gitignore_status.description())); - - // Add gitignore-specific remediation - let gitignore_action = gitignore_status.recommended_action(); - if gitignore_action != "No action needed" { - finding.remediation.insert(0, format!("πŸ”’ GitIgnore: {}", gitignore_action)); - } - - // Add git history warning for tracked files - if gitignore_status.risk_level == GitIgnoreRisk::Tracked { - finding.remediation.insert(1, "⚠️ CRITICAL: Remove this file from git history using git-filter-branch or BFG Repo-Cleaner".to_string()); - finding.remediation.insert(2, "πŸ”‘ Rotate any exposed secrets immediately".to_string()); - } - } - - /// Analyze configuration files with gitignore awareness - fn analyze_config_files_with_gitignore( - &self, - project_root: &Path, - gitignore_analyzer: &mut GitIgnoreAnalyzer, - ) -> Result, SecurityError> { - let mut findings = Vec::new(); - - // Check package.json with gitignore assessment - let package_json = project_root.join("package.json"); - if package_json.exists() { - let gitignore_status = gitignore_analyzer.analyze_file(&package_json); - let mut package_findings = self.analyze_package_json(&package_json)?; - - // Enhance findings with gitignore context - for finding in &mut package_findings { - self.enhance_finding_with_gitignore_status(finding, &gitignore_status); - } - - findings.extend(package_findings); - } - - // Check other common config files - let config_files = [ - "tsconfig.json", - "vite.config.js", - "vite.config.ts", - "next.config.js", - "next.config.ts", - "nuxt.config.js", - "nuxt.config.ts", - // Note: .env.example is now excluded as it's a template file - ]; - - for config_file in &config_files { - // Skip template/example files - if self.is_template_file(config_file) { - debug!("Skipping template config file: {}", config_file); - continue; - } - - let config_path = project_root.join(config_file); - if config_path.exists() { - let gitignore_status = gitignore_analyzer.analyze_file(&config_path); - - // Only analyze if file contains potential secrets or is not properly protected - if gitignore_status.should_be_ignored || !gitignore_status.is_ignored { - if let Ok(content) = fs::read_to_string(&config_path) { - // Basic secret pattern check for config files - if self.contains_potential_secrets(&content) { - let mut finding = SecurityFinding { - id: format!("config-file-{}", config_file.replace('.', "-")), - title: "Potential Secrets in Configuration File".to_string(), - description: format!("Configuration file '{}' may contain secrets", config_file), - severity: SecuritySeverity::Medium, - category: SecurityCategory::SecretsExposure, - file_path: Some(config_path.clone()), - line_number: None, - column_number: None, - evidence: None, - remediation: vec![ - "Review configuration file for hardcoded secrets".to_string(), - "Use environment variables for sensitive configuration".to_string(), - ], - references: vec![], - cwe_id: Some("CWE-200".to_string()), - compliance_frameworks: vec!["SOC2".to_string()], - }; - - self.enhance_finding_with_gitignore_status(&mut finding, &gitignore_status); - findings.push(finding); - } - } - } - } - } - - Ok(findings) - } - - /// Check if a file is a template/example file that should be excluded from security alerts - fn is_template_file(&self, file_name: &str) -> bool { - let template_indicators = [ - "sample", "example", "template", "template.env", "env.template", - "sample.env", "env.sample", "example.env", "env.example", - "examples", "samples", "templates", "demo", "test", - ".env.sample", ".env.example", ".env.template", ".env.demo", ".env.test" - ]; - - let file_name_lower = file_name.to_lowercase(); - - // Check for exact matches or contains patterns - template_indicators.iter().any(|indicator| { - file_name_lower == *indicator || - file_name_lower.contains(indicator) || - file_name_lower.ends_with(indicator) - }) - } - - /// Analyze environment files with comprehensive gitignore risk assessment - fn analyze_env_files_with_gitignore( - &self, - project_root: &Path, - gitignore_analyzer: &mut GitIgnoreAnalyzer, - ) -> Result, SecurityError> { - let mut findings = Vec::new(); - - // Get all potential environment files using gitignore analyzer - let env_files = gitignore_analyzer.get_files_to_analyze(&[]) - .map_err(|e| SecurityError::Io(e))? - .into_iter() - .filter(|file| { - if let Some(file_name) = file.file_name().and_then(|n| n.to_str()) { - // Exclude template/example files from security alerts - if self.is_template_file(file_name) { - debug!("Skipping template file: {}", file_name); - return false; - } - - file_name.starts_with(".env") || - file_name.contains("credentials") || - file_name.contains("secrets") || - file_name.contains("config") || - file_name.ends_with(".key") || - file_name.ends_with(".pem") - } else { - false - } - }) - .collect::>(); - - for env_file in env_files { - let gitignore_status = gitignore_analyzer.analyze_file(&env_file); - let relative_path = env_file.strip_prefix(project_root) - .unwrap_or(&env_file); - - // Create finding based on gitignore risk assessment - let (severity, title, description) = match gitignore_status.risk_level { - GitIgnoreRisk::Tracked => ( - SecuritySeverity::Critical, - "Secret File Tracked by Git".to_string(), - format!("Secret file '{}' is tracked by git and may expose credentials in version history", relative_path.display()), - ), - GitIgnoreRisk::Exposed => ( - SecuritySeverity::High, - "Secret File Not in GitIgnore".to_string(), - format!("Secret file '{}' exists but is not protected by .gitignore", relative_path.display()), - ), - GitIgnoreRisk::Protected => ( - SecuritySeverity::Info, - "Secret File Properly Protected".to_string(), - format!("Secret file '{}' is properly ignored but detected for verification", relative_path.display()), - ), - GitIgnoreRisk::Safe => continue, // Skip files that appear safe - }; - - let mut finding = SecurityFinding { - id: format!("env-file-{}", relative_path.to_string_lossy().replace('/', "-").replace('.', "-")), - title, - description, - severity, - category: SecurityCategory::SecretsExposure, - file_path: Some(env_file.clone()), - line_number: None, - column_number: None, - evidence: None, - remediation: vec![ - "Ensure sensitive files are in .gitignore".to_string(), - "Use .env.example files for documentation".to_string(), - "Never commit actual environment files to version control".to_string(), - ], - references: vec![ - "https://github.com/motdotla/dotenv#should-i-commit-my-env-file".to_string(), - ], - cwe_id: Some("CWE-200".to_string()), - compliance_frameworks: vec!["SOC2".to_string()], - }; - - self.enhance_finding_with_gitignore_status(&mut finding, &gitignore_status); - findings.push(finding); - } - - Ok(findings) - } - - /// Check if content contains potential secrets (basic patterns) - fn contains_potential_secrets(&self, content: &str) -> bool { - let secret_indicators = [ - "sk_", "pk_live_", "eyJ", "AKIA", "-----BEGIN", - "client_secret", "api_key", "access_token", - "private_key", "secret_key", "bearer", - ]; - - let content_lower = content.to_lowercase(); - secret_indicators.iter().any(|indicator| content_lower.contains(&indicator.to_lowercase())) - } -} - -impl SecurityReport { - /// Create a security report from a list of findings - pub fn from_findings(findings: Vec) -> Self { - let total_findings = findings.len(); - let mut findings_by_severity = HashMap::new(); - let mut findings_by_category = HashMap::new(); - - for finding in &findings { - *findings_by_severity.entry(finding.severity.clone()).or_insert(0) += 1; - *findings_by_category.entry(finding.category.clone()).or_insert(0) += 1; - } - - // Calculate overall score (simple implementation) - let score_penalty = findings.iter().map(|f| match f.severity { - SecuritySeverity::Critical => 25.0, - SecuritySeverity::High => 15.0, - SecuritySeverity::Medium => 8.0, - SecuritySeverity::Low => 3.0, - SecuritySeverity::Info => 1.0, - }).sum::(); - - let overall_score = (100.0 - score_penalty).max(0.0); - - // Determine risk level - let risk_level = if findings.iter().any(|f| f.severity == SecuritySeverity::Critical) { - SecuritySeverity::Critical - } else if findings.iter().any(|f| f.severity == SecuritySeverity::High) { - SecuritySeverity::High - } else if findings.iter().any(|f| f.severity == SecuritySeverity::Medium) { - SecuritySeverity::Medium - } else if !findings.is_empty() { - SecuritySeverity::Low - } else { - SecuritySeverity::Info - }; - - Self { - analyzed_at: chrono::Utc::now(), - overall_score, - risk_level, - total_findings, - findings_by_severity, - findings_by_category, - findings, - recommendations: vec![ - "Review all detected secrets and move them to environment variables".to_string(), - "Implement proper secret management practices".to_string(), - "Use framework-specific environment variable patterns correctly".to_string(), - ], - compliance_status: HashMap::new(), - } - } -} \ No newline at end of file diff --git a/src/analyzer/security/mod.rs b/src/analyzer/security/mod.rs index e65719c5..e883b270 100644 --- a/src/analyzer/security/mod.rs +++ b/src/analyzer/security/mod.rs @@ -8,60 +8,19 @@ //! - Framework-specific detection //! - Context-aware severity assessment -use std::path::Path; use thiserror::Error; +pub mod config; pub mod core; -pub mod javascript; -pub mod python; pub mod patterns; -pub mod config; -pub mod gitignore; +pub mod turbo; pub use core::{SecurityAnalyzer, SecurityReport, SecurityFinding, SecuritySeverity, SecurityCategory}; -pub use javascript::JavaScriptSecurityAnalyzer; -pub use python::PythonSecurityAnalyzer; +pub use turbo::{TurboSecurityAnalyzer, TurboConfig, ScanMode}; pub use patterns::SecretPatternManager; pub use config::SecurityAnalysisConfig; -pub use gitignore::{GitIgnoreAnalyzer, GitIgnoreStatus, GitIgnoreRisk}; -/// Modular security analyzer that delegates to language-specific analyzers -pub struct ModularSecurityAnalyzer { - javascript_analyzer: JavaScriptSecurityAnalyzer, - // TODO: Add other language analyzers - // python_analyzer: PythonSecurityAnalyzer, - // rust_analyzer: RustSecurityAnalyzer, -} -impl ModularSecurityAnalyzer { - pub fn new() -> Result { - Ok(Self { - javascript_analyzer: JavaScriptSecurityAnalyzer::new()?, - }) - } - - pub fn with_config(config: SecurityAnalysisConfig) -> Result { - Ok(Self { - javascript_analyzer: JavaScriptSecurityAnalyzer::with_config(config.clone())?, - }) - } - - /// Analyze a project with appropriate language-specific analyzers - pub fn analyze_project(&mut self, project_root: &Path, languages: &[crate::analyzer::DetectedLanguage]) -> Result { - let mut all_findings = Vec::new(); - - // Analyze JavaScript/TypeScript files - if languages.iter().any(|lang| matches!(lang.name.as_str(), "JavaScript" | "TypeScript" | "JSX" | "TSX")) { - let js_report = self.javascript_analyzer.analyze_project(project_root)?; - all_findings.extend(js_report.findings); - } - - // TODO: Add other language analyzers based on detected languages - - // Combine results into a comprehensive report - Ok(SecurityReport::from_findings(all_findings)) - } -} #[derive(Debug, Error)] pub enum SecurityError { diff --git a/src/analyzer/security/python.rs b/src/analyzer/security/python.rs deleted file mode 100644 index 03c42ed8..00000000 --- a/src/analyzer/security/python.rs +++ /dev/null @@ -1,1423 +0,0 @@ -//! # Python Security Analyzer -//! -//! Specialized security analyzer for Python applications. -//! -//! This analyzer focuses on: -//! - Python web frameworks (Django, Flask, FastAPI, etc.) -//! - AI/ML services and tools (OpenAI, Anthropic, Hugging Face, etc.) -//! - Cloud services commonly used with Python (AWS, GCP, Azure) -//! - Database connections and ORMs (SQLAlchemy, Django ORM, etc.) -//! - Environment variable misuse in Python applications -//! - Common Python anti-patterns and secret exposure patterns -//! - Python package managers and dependency files - -use std::collections::HashMap; -use std::path::{Path, PathBuf}; -use std::fs; -use regex::Regex; -use log::{debug, info, warn}; - -use super::{SecurityError, SecurityFinding, SecuritySeverity, SecurityCategory, SecurityReport, SecurityAnalysisConfig, GitIgnoreAnalyzer, GitIgnoreRisk}; - -/// Python-specific security analyzer -pub struct PythonSecurityAnalyzer { - config: SecurityAnalysisConfig, - python_patterns: Vec, - framework_patterns: HashMap>, - ai_ml_patterns: Vec, - cloud_patterns: Vec, - database_patterns: Vec, - env_var_patterns: Vec, - gitignore_analyzer: Option, -} - -/// Python-specific secret pattern -#[derive(Debug, Clone)] -pub struct PythonSecretPattern { - pub id: String, - pub name: String, - pub pattern: Regex, - pub severity: SecuritySeverity, - pub description: String, - pub context_indicators: Vec, - pub false_positive_indicators: Vec, - pub remediation_hints: Vec, -} - -/// Framework-specific patterns for Python web frameworks -#[derive(Debug, Clone)] -pub struct FrameworkPattern { - pub framework: String, - pub pattern: Regex, - pub severity: SecuritySeverity, - pub description: String, - pub file_extensions: Vec, -} - -/// AI/ML service patterns -#[derive(Debug, Clone)] -pub struct AiMlPattern { - pub service: String, - pub pattern: Regex, - pub severity: SecuritySeverity, - pub description: String, - pub api_key_format: String, -} - -/// Cloud service patterns -#[derive(Debug, Clone)] -pub struct CloudPattern { - pub provider: String, - pub service: String, - pub pattern: Regex, - pub severity: SecuritySeverity, - pub description: String, -} - -/// Database connection patterns -#[derive(Debug, Clone)] -pub struct DatabasePattern { - pub database_type: String, - pub pattern: Regex, - pub severity: SecuritySeverity, - pub description: String, -} - -/// Environment variable patterns specific to Python -#[derive(Debug, Clone)] -pub struct EnvVarPattern { - pub pattern: Regex, - pub severity: SecuritySeverity, - pub description: String, - pub sensitive_prefixes: Vec, -} - -impl PythonSecurityAnalyzer { - pub fn new() -> Result { - Self::with_config(SecurityAnalysisConfig::default()) - } - - pub fn with_config(config: SecurityAnalysisConfig) -> Result { - let python_patterns = Self::initialize_python_patterns()?; - let framework_patterns = Self::initialize_framework_patterns()?; - let ai_ml_patterns = Self::initialize_ai_ml_patterns()?; - let cloud_patterns = Self::initialize_cloud_patterns()?; - let database_patterns = Self::initialize_database_patterns()?; - let env_var_patterns = Self::initialize_env_var_patterns()?; - - Ok(Self { - config, - python_patterns, - framework_patterns, - ai_ml_patterns, - cloud_patterns, - database_patterns, - env_var_patterns, - gitignore_analyzer: None, - }) - } - - /// Analyze a Python project for security vulnerabilities - pub fn analyze_project(&mut self, project_root: &Path) -> Result { - let mut findings = Vec::new(); - - // Initialize gitignore analyzer for comprehensive file protection assessment - let mut gitignore_analyzer = GitIgnoreAnalyzer::new(project_root) - .map_err(|e| SecurityError::AnalysisFailed(format!("Failed to initialize gitignore analyzer: {}", e)))?; - - info!("πŸ” Using gitignore-aware security analysis for Python project at {}", project_root.display()); - - // Get Python files using gitignore-aware collection - let python_extensions = ["py", "pyx", "pyi", "pyw"]; - let python_files = gitignore_analyzer.get_files_to_analyze(&python_extensions) - .map_err(|e| SecurityError::Io(e))? - .into_iter() - .filter(|file| { - if let Some(ext) = file.extension().and_then(|e| e.to_str()) { - python_extensions.contains(&ext) - } else { - false - } - }) - .collect::>(); - - info!("Found {} Python files to analyze (gitignore-filtered)", python_files.len()); - - // Analyze each Python file with gitignore context - for file_path in &python_files { - let gitignore_status = gitignore_analyzer.analyze_file(file_path); - let mut file_findings = self.analyze_python_file(file_path)?; - - // Enhance findings with gitignore risk assessment - for finding in &mut file_findings { - self.enhance_finding_with_gitignore_status(finding, &gitignore_status); - } - - findings.extend(file_findings); - } - - // Analyze Python configuration files with gitignore awareness - findings.extend(self.analyze_config_files_with_gitignore(project_root, &mut gitignore_analyzer)?); - - // Comprehensive environment file analysis with gitignore risk assessment - findings.extend(self.analyze_env_files_with_gitignore(project_root, &mut gitignore_analyzer)?); - - // Analyze Python-specific dependency files - findings.extend(self.analyze_dependency_files_with_gitignore(project_root, &mut gitignore_analyzer)?); - - // Generate gitignore recommendations for any secret files found - let secret_files: Vec = findings.iter() - .filter_map(|f| f.file_path.as_ref()) - .cloned() - .collect(); - - let gitignore_recommendations = gitignore_analyzer.generate_gitignore_recommendations(&secret_files); - - // Create report with enhanced recommendations - let mut report = SecurityReport::from_findings(findings); - report.recommendations.extend(gitignore_recommendations); - - // Add Python-specific security recommendations - report.recommendations.extend(self.generate_python_security_recommendations()); - - Ok(report) - } - - /// Analyze a single Python file for security vulnerabilities - fn analyze_python_file(&self, file_path: &Path) -> Result, SecurityError> { - let content = fs::read_to_string(file_path)?; - let mut findings = Vec::new(); - - // Check against Python-specific patterns - for pattern in &self.python_patterns { - findings.extend(self.check_python_pattern_in_content(&content, pattern, file_path)?); - } - - // Check against AI/ML service patterns - for pattern in &self.ai_ml_patterns { - findings.extend(self.check_ai_ml_pattern_in_content(&content, pattern, file_path)?); - } - - // Check against cloud service patterns - for pattern in &self.cloud_patterns { - findings.extend(self.check_cloud_pattern_in_content(&content, pattern, file_path)?); - } - - // Check against database patterns - for pattern in &self.database_patterns { - findings.extend(self.check_database_pattern_in_content(&content, pattern, file_path)?); - } - - // Check framework-specific patterns based on file content - let detected_framework = self.detect_python_framework(&content); - if let Some(framework) = detected_framework { - if let Some(framework_patterns) = self.framework_patterns.get(&framework) { - for pattern in framework_patterns { - findings.extend(self.check_framework_pattern_in_content(&content, pattern, file_path)?); - } - } - } - - // Check environment variable usage - findings.extend(self.check_env_var_usage(&content, file_path)?); - - // Check for insecure Python practices - findings.extend(self.check_insecure_python_practices(&content, file_path)?); - - Ok(findings) - } - - /// Check a Python-specific pattern in file content - fn check_python_pattern_in_content( - &self, - content: &str, - pattern: &PythonSecretPattern, - file_path: &Path, - ) -> Result, SecurityError> { - let mut findings = Vec::new(); - - for (line_num, line) in content.lines().enumerate() { - if let Some(captures) = pattern.pattern.captures(line) { - // Check for false positive indicators - if pattern.false_positive_indicators.iter().any(|indicator| { - line.to_lowercase().contains(&indicator.to_lowercase()) - }) { - debug!("Skipping potential false positive in {}: {}", file_path.display(), line.trim()); - continue; - } - - // Extract the secret value and position if captured - let (evidence, column_number) = if captures.len() > 1 { - if let Some(match_) = captures.get(1) { - (Some(self.mask_secret(match_.as_str())), Some(match_.start() + 1)) - } else { - (Some(line.trim().to_string()), None) - } - } else { - if let Some(match_) = captures.get(0) { - (Some(line.trim().to_string()), Some(match_.start() + 1)) - } else { - (Some(line.trim().to_string()), None) - } - }; - - // Check context for confidence scoring - let context_score = self.calculate_context_confidence(content, &pattern.context_indicators); - let adjusted_severity = self.adjust_severity_by_context(pattern.severity.clone(), context_score); - - findings.push(SecurityFinding { - id: format!("{}-{}", pattern.id, line_num), - title: format!("{} Detected", pattern.name), - description: format!("{} (Context confidence: {:.1})", pattern.description, context_score), - severity: adjusted_severity, - category: SecurityCategory::SecretsExposure, - file_path: Some(file_path.to_path_buf()), - line_number: Some(line_num + 1), - column_number, - evidence, - remediation: pattern.remediation_hints.clone(), - references: vec![ - "https://owasp.org/www-project-top-ten/2021/A05_2021-Security_Misconfiguration/".to_string(), - "https://cheatsheetseries.owasp.org/cheatsheets/Secrets_Management_Cheat_Sheet.html".to_string(), - "https://docs.python.org/3/library/os.html#os.environ".to_string(), - ], - cwe_id: Some("CWE-200".to_string()), - compliance_frameworks: vec!["SOC2".to_string(), "GDPR".to_string()], - }); - } - } - - Ok(findings) - } - - /// Check AI/ML service patterns - fn check_ai_ml_pattern_in_content( - &self, - content: &str, - pattern: &AiMlPattern, - file_path: &Path, - ) -> Result, SecurityError> { - let mut findings = Vec::new(); - - for (line_num, line) in content.lines().enumerate() { - if let Some(captures) = pattern.pattern.captures(line) { - let evidence = if captures.len() > 1 { - captures.get(1).map(|m| self.mask_secret(m.as_str())) - } else { - Some(line.trim().to_string()) - }; - - let column_number = captures.get(0).map(|m| m.start() + 1); - - findings.push(SecurityFinding { - id: format!("ai-ml-{}-{}", pattern.service.to_lowercase().replace(" ", "-"), line_num), - title: format!("{} API Key Detected", pattern.service), - description: format!("{} (Expected format: {})", pattern.description, pattern.api_key_format), - severity: pattern.severity.clone(), - category: SecurityCategory::SecretsExposure, - file_path: Some(file_path.to_path_buf()), - line_number: Some(line_num + 1), - column_number, - evidence, - remediation: vec![ - format!("Store {} API key in environment variables", pattern.service), - "Use a secrets management service for production".to_string(), - "Implement API key rotation policies".to_string(), - "Monitor API key usage for anomalies".to_string(), - ], - references: vec![ - "https://owasp.org/www-project-api-security/".to_string(), - format!("https://platform.openai.com/docs/quickstart/account-setup"), - ], - cwe_id: Some("CWE-798".to_string()), - compliance_frameworks: vec!["SOC2".to_string(), "GDPR".to_string()], - }); - } - } - - Ok(findings) - } - - /// Check cloud service patterns - fn check_cloud_pattern_in_content( - &self, - content: &str, - pattern: &CloudPattern, - file_path: &Path, - ) -> Result, SecurityError> { - let mut findings = Vec::new(); - - for (line_num, line) in content.lines().enumerate() { - if let Some(captures) = pattern.pattern.captures(line) { - let evidence = if captures.len() > 1 { - captures.get(1).map(|m| self.mask_secret(m.as_str())) - } else { - Some(line.trim().to_string()) - }; - - let column_number = captures.get(0).map(|m| m.start() + 1); - - findings.push(SecurityFinding { - id: format!("cloud-{}-{}-{}", - pattern.provider.to_lowercase(), - pattern.service.to_lowercase().replace(" ", "-"), - line_num), - title: format!("{} {} Detected", pattern.provider, pattern.service), - description: pattern.description.clone(), - severity: pattern.severity.clone(), - category: SecurityCategory::SecretsExposure, - file_path: Some(file_path.to_path_buf()), - line_number: Some(line_num + 1), - column_number, - evidence, - remediation: vec![ - format!("Use {} managed identity or role-based access", pattern.provider), - "Store credentials in secure key management service".to_string(), - "Implement credential rotation policies".to_string(), - "Use least-privilege access principles".to_string(), - ], - references: vec![ - "https://owasp.org/www-project-top-ten/2021/A07_2021-Identification_and_Authentication_Failures/".to_string(), - format!("https://docs.aws.amazon.com/security/"), - ], - cwe_id: Some("CWE-522".to_string()), - compliance_frameworks: vec!["SOC2".to_string(), "PCI-DSS".to_string()], - }); - } - } - - Ok(findings) - } - - /// Check database patterns - fn check_database_pattern_in_content( - &self, - content: &str, - pattern: &DatabasePattern, - file_path: &Path, - ) -> Result, SecurityError> { - let mut findings = Vec::new(); - - for (line_num, line) in content.lines().enumerate() { - if pattern.pattern.is_match(line) { - // Mask the connection string for evidence - let masked_line = self.mask_database_connection(line); - - findings.push(SecurityFinding { - id: format!("database-{}-{}", pattern.database_type.to_lowercase(), line_num), - title: format!("{} Connection String with Credentials", pattern.database_type), - description: pattern.description.clone(), - severity: pattern.severity.clone(), - category: SecurityCategory::SecretsExposure, - file_path: Some(file_path.to_path_buf()), - line_number: Some(line_num + 1), - column_number: None, - evidence: Some(masked_line), - remediation: vec![ - "Use environment variables for database credentials".to_string(), - "Implement connection pooling with credential management".to_string(), - "Use database authentication mechanisms like IAM roles".to_string(), - "Consider using encrypted connection strings".to_string(), - ], - references: vec![ - "https://owasp.org/www-project-top-ten/2021/A07_2021-Identification_and_Authentication_Failures/".to_string(), - "https://cheatsheetseries.owasp.org/cheatsheets/Database_Security_Cheat_Sheet.html".to_string(), - ], - cwe_id: Some("CWE-798".to_string()), - compliance_frameworks: vec!["SOC2".to_string(), "GDPR".to_string(), "PCI-DSS".to_string()], - }); - } - } - - Ok(findings) - } - - /// Check framework-specific patterns - fn check_framework_pattern_in_content( - &self, - content: &str, - pattern: &FrameworkPattern, - file_path: &Path, - ) -> Result, SecurityError> { - let mut findings = Vec::new(); - - for (line_num, line) in content.lines().enumerate() { - if let Some(captures) = pattern.pattern.captures(line) { - let evidence = if captures.len() > 1 { - captures.get(1).map(|m| self.mask_secret(m.as_str())) - } else { - Some(line.trim().to_string()) - }; - - findings.push(SecurityFinding { - id: format!("framework-{}-{}", pattern.framework.to_lowercase(), line_num), - title: format!("{} Security Issue", pattern.framework), - description: pattern.description.clone(), - severity: pattern.severity.clone(), - category: SecurityCategory::SecretsExposure, - file_path: Some(file_path.to_path_buf()), - line_number: Some(line_num + 1), - column_number: None, - evidence, - remediation: self.generate_framework_remediation(&pattern.framework), - references: vec![ - format!("https://docs.djangoproject.com/en/stable/topics/security/"), - "https://owasp.org/www-project-top-ten/".to_string(), - ], - cwe_id: Some("CWE-200".to_string()), - compliance_frameworks: vec!["SOC2".to_string()], - }); - } - } - - Ok(findings) - } - - /// Initialize Python-specific secret patterns - fn initialize_python_patterns() -> Result, SecurityError> { - let patterns = vec![ - // Django SECRET_KEY pattern - PythonSecretPattern { - id: "python-django-secret-key".to_string(), - name: "Django SECRET_KEY".to_string(), - pattern: Regex::new(r#"(?i)SECRET_KEY\s*=\s*["']([A-Za-z0-9!@#$%^&*()_+\-=\[\]{}|;:,.<>?/~`]{40,})["']"#)?, - severity: SecuritySeverity::Critical, - description: "Django SECRET_KEY found in source code".to_string(), - context_indicators: vec!["django".to_string(), "settings".to_string(), "SECRET_KEY".to_string()], - false_positive_indicators: vec!["example".to_string(), "your-secret-key".to_string(), "fake".to_string()], - remediation_hints: vec![ - "Move SECRET_KEY to environment variables".to_string(), - "Use python-decouple or similar library".to_string(), - "Never commit SECRET_KEY to version control".to_string(), - ], - }, - - // Flask SECRET_KEY pattern - PythonSecretPattern { - id: "python-flask-secret-key".to_string(), - name: "Flask SECRET_KEY".to_string(), - pattern: Regex::new(r#"(?i)app\.secret_key\s*=\s*["']([A-Za-z0-9!@#$%^&*()_+\-=\[\]{}|;:,.<>?/~`]{20,})["']"#)?, - severity: SecuritySeverity::High, - description: "Flask SECRET_KEY hardcoded in application".to_string(), - context_indicators: vec!["flask".to_string(), "app".to_string(), "secret_key".to_string()], - false_positive_indicators: vec!["example".to_string(), "your-secret".to_string()], - remediation_hints: vec![ - "Use os.environ.get('SECRET_KEY')".to_string(), - "Store in environment variables".to_string(), - ], - }, - - // FastAPI JWT secret - PythonSecretPattern { - id: "python-fastapi-jwt-secret".to_string(), - name: "FastAPI JWT Secret".to_string(), - pattern: Regex::new(r#"(?i)(?:jwt_secret|jwt_key|secret_key)\s*=\s*["']([A-Za-z0-9!@#$%^&*()_+\-=\[\]{}|;:,.<>?/~`]{20,})["']"#)?, - severity: SecuritySeverity::High, - description: "FastAPI JWT secret hardcoded in source".to_string(), - context_indicators: vec!["fastapi".to_string(), "jwt".to_string(), "token".to_string()], - false_positive_indicators: vec!["example".to_string(), "test".to_string()], - remediation_hints: vec![ - "Use Pydantic Settings for configuration".to_string(), - "Store JWT secrets in environment variables".to_string(), - ], - }, - - // Database connection strings - PythonSecretPattern { - id: "python-database-url".to_string(), - name: "Database Connection String".to_string(), - pattern: Regex::new(r#"(?i)(?:database_url|db_url|sqlalchemy_database_uri)\s*=\s*["'](?:postgresql|mysql|sqlite|mongodb)://[^"']*:[^"']*@[^"']+["']"#)?, - severity: SecuritySeverity::Critical, - description: "Database connection string with credentials detected".to_string(), - context_indicators: vec!["database".to_string(), "sqlalchemy".to_string(), "connect".to_string()], - false_positive_indicators: vec!["localhost".to_string(), "example.com".to_string(), "user:pass".to_string()], - remediation_hints: vec![ - "Use environment variables for database credentials".to_string(), - "Consider using connection pooling and secrets management".to_string(), - ], - }, - - // Generic API key pattern - PythonSecretPattern { - id: "python-api-key-assignment".to_string(), - name: "API Key Assignment".to_string(), - pattern: Regex::new(r#"(?i)(?:api_key|apikey|access_key|secret_key|private_key|auth_token|bearer_token)\s*=\s*["']([A-Za-z0-9_-]{20,})["']"#)?, - severity: SecuritySeverity::High, - description: "API key hardcoded in variable assignment".to_string(), - context_indicators: vec!["requests".to_string(), "api".to_string(), "client".to_string()], - false_positive_indicators: vec!["os.environ".to_string(), "config".to_string(), "settings".to_string()], - remediation_hints: vec![ - "Use environment variables or config files".to_string(), - "Consider using secrets management services".to_string(), - ], - }, - ]; - - Ok(patterns) - } - - /// Initialize AI/ML service patterns - fn initialize_ai_ml_patterns() -> Result, SecurityError> { - let patterns = vec![ - // OpenAI API keys - AiMlPattern { - service: "OpenAI".to_string(), - pattern: Regex::new(r#"(?i)(?:openai[_-]?api[_-]?key|openai[_-]?key)\s*[=:]\s*["']?(sk-[A-Za-z0-9]{32,})["']?"#)?, - severity: SecuritySeverity::Critical, - description: "OpenAI API key detected".to_string(), - api_key_format: "sk-[32+ alphanumeric characters]".to_string(), - }, - - // OpenAI Organization ID - AiMlPattern { - service: "OpenAI Organization".to_string(), - pattern: Regex::new(r#"(?i)(?:openai[_-]?org[_-]?id|openai[_-]?organization)\s*[=:]\s*["']?(org-[A-Za-z0-9]{20,})["']?"#)?, - severity: SecuritySeverity::Medium, - description: "OpenAI organization ID detected".to_string(), - api_key_format: "org-[20+ alphanumeric characters]".to_string(), - }, - - // Anthropic Claude API keys - AiMlPattern { - service: "Anthropic Claude".to_string(), - pattern: Regex::new(r#"(?i)(?:anthropic[_-]?api[_-]?key|claude[_-]?api[_-]?key)\s*[=:]\s*["']?(sk-ant-[A-Za-z0-9]{40,})["']?"#)?, - severity: SecuritySeverity::Critical, - description: "Anthropic Claude API key detected".to_string(), - api_key_format: "sk-ant-[40+ alphanumeric characters]".to_string(), - }, - - // Hugging Face API tokens - AiMlPattern { - service: "Hugging Face".to_string(), - pattern: Regex::new(r#"(?i)(?:huggingface[_-]?api[_-]?key|huggingface[_-]?token|hf[_-]?token)\s*[=:]\s*["']?(hf_[A-Za-z0-9]{30,})["']?"#)?, - severity: SecuritySeverity::High, - description: "Hugging Face API token detected".to_string(), - api_key_format: "hf_[30+ alphanumeric characters]".to_string(), - }, - - // Google AI/Gemini API keys - AiMlPattern { - service: "Google AI/Gemini".to_string(), - pattern: Regex::new(r#"(?i)(?:google[_-]?ai[_-]?api[_-]?key|gemini[_-]?api[_-]?key)\s*[=:]\s*["']?(AIza[A-Za-z0-9_-]{35,})["']?"#)?, - severity: SecuritySeverity::Critical, - description: "Google AI/Gemini API key detected".to_string(), - api_key_format: "AIza[35+ alphanumeric characters with underscores/dashes]".to_string(), - }, - - // Cohere API keys - AiMlPattern { - service: "Cohere".to_string(), - pattern: Regex::new(r#"(?i)(?:cohere[_-]?api[_-]?key)\s*[=:]\s*["']?([A-Za-z0-9]{40,})["']?"#)?, - severity: SecuritySeverity::High, - description: "Cohere API key detected".to_string(), - api_key_format: "[40+ alphanumeric characters]".to_string(), - }, - - // Replicate API tokens - AiMlPattern { - service: "Replicate".to_string(), - pattern: Regex::new(r#"(?i)(?:replicate[_-]?api[_-]?token|replicate[_-]?token)\s*[=:]\s*["']?(r8_[A-Za-z0-9]{30,})["']?"#)?, - severity: SecuritySeverity::High, - description: "Replicate API token detected".to_string(), - api_key_format: "r8_[30+ alphanumeric characters]".to_string(), - }, - - // Stability AI API keys - AiMlPattern { - service: "Stability AI".to_string(), - pattern: Regex::new(r#"(?i)(?:stability[_-]?ai[_-]?api[_-]?key|stable[_-]?diffusion[_-]?api[_-]?key)\s*[=:]\s*["']?(sk-[A-Za-z0-9]{40,})["']?"#)?, - severity: SecuritySeverity::High, - description: "Stability AI API key detected".to_string(), - api_key_format: "sk-[40+ alphanumeric characters]".to_string(), - }, - - // DeepSeek API keys - AiMlPattern { - service: "DeepSeek".to_string(), - pattern: Regex::new(r#"(?i)(?:deepseek[_-]?api[_-]?key)\s*[=:]\s*["']?(sk-[A-Za-z0-9]{32,})["']?"#)?, - severity: SecuritySeverity::High, - description: "DeepSeek API key detected".to_string(), - api_key_format: "sk-[32+ alphanumeric characters]".to_string(), - }, - - // Mistral AI API keys - AiMlPattern { - service: "Mistral AI".to_string(), - pattern: Regex::new(r#"(?i)(?:mistral[_-]?api[_-]?key)\s*[=:]\s*["']?([A-Za-z0-9]{32,})["']?"#)?, - severity: SecuritySeverity::High, - description: "Mistral AI API key detected".to_string(), - api_key_format: "[32+ alphanumeric characters]".to_string(), - }, - - // Together AI API keys - AiMlPattern { - service: "Together AI".to_string(), - pattern: Regex::new(r#"(?i)(?:together[_-]?ai[_-]?api[_-]?key|together[_-]?api[_-]?key)\s*[=:]\s*["']?([A-Za-z0-9]{40,})["']?"#)?, - severity: SecuritySeverity::High, - description: "Together AI API key detected".to_string(), - api_key_format: "[40+ alphanumeric characters]".to_string(), - }, - - // Weights & Biases API keys - AiMlPattern { - service: "Weights & Biases".to_string(), - pattern: Regex::new(r#"(?i)(?:wandb[_-]?api[_-]?key|wandb[_-]?key)\s*[=:]\s*["']?([A-Za-z0-9]{40,})["']?"#)?, - severity: SecuritySeverity::Medium, - description: "Weights & Biases API key detected".to_string(), - api_key_format: "[40+ alphanumeric characters]".to_string(), - }, - - // MLflow tracking server credentials - AiMlPattern { - service: "MLflow".to_string(), - pattern: Regex::new(r#"(?i)(?:mlflow[_-]?tracking[_-]?username|mlflow[_-]?tracking[_-]?password)\s*[=:]\s*["']?([A-Za-z0-9]{8,})["']?"#)?, - severity: SecuritySeverity::Medium, - description: "MLflow tracking credentials detected".to_string(), - api_key_format: "[8+ alphanumeric characters]".to_string(), - }, - ]; - - Ok(patterns) - } - - /// Initialize cloud service patterns - fn initialize_cloud_patterns() -> Result, SecurityError> { - let patterns = vec![ - // AWS Access Keys - CloudPattern { - provider: "AWS".to_string(), - service: "IAM Access Key".to_string(), - pattern: Regex::new(r#"(?i)(?:aws[_-]?access[_-]?key[_-]?id)\s*[=:]\s*["']?(AKIA[A-Z0-9]{16})["']?"#)?, - severity: SecuritySeverity::Critical, - description: "AWS Access Key ID detected".to_string(), - }, - - // AWS Secret Access Keys - CloudPattern { - provider: "AWS".to_string(), - service: "IAM Secret Key".to_string(), - pattern: Regex::new(r#"(?i)(?:aws[_-]?secret[_-]?access[_-]?key)\s*[=:]\s*["']?([A-Za-z0-9/+=]{40})["']?"#)?, - severity: SecuritySeverity::Critical, - description: "AWS Secret Access Key detected".to_string(), - }, - - // AWS Session Tokens - CloudPattern { - provider: "AWS".to_string(), - service: "Session Token".to_string(), - pattern: Regex::new(r#"(?i)(?:aws[_-]?session[_-]?token)\s*[=:]\s*["']?([A-Za-z0-9/+=]{100,})["']?"#)?, - severity: SecuritySeverity::High, - description: "AWS Session Token detected".to_string(), - }, - - // Google Cloud Service Account Keys - CloudPattern { - provider: "GCP".to_string(), - service: "Service Account Key".to_string(), - pattern: Regex::new(r#"(?i)(?:google[_-]?application[_-]?credentials|gcp[_-]?service[_-]?account)\s*[=:]\s*["']?([A-Za-z0-9/+=]{50,})["']?"#)?, - severity: SecuritySeverity::Critical, - description: "Google Cloud Service Account key detected".to_string(), - }, - - // Azure Storage Account Keys - CloudPattern { - provider: "Azure".to_string(), - service: "Storage Account Key".to_string(), - pattern: Regex::new(r#"(?i)(?:azure[_-]?storage[_-]?account[_-]?key|azure[_-]?storage[_-]?key)\s*[=:]\s*["']?([A-Za-z0-9/+=]{88})["']?"#)?, - severity: SecuritySeverity::Critical, - description: "Azure Storage Account key detected".to_string(), - }, - - // Azure Service Principal - CloudPattern { - provider: "Azure".to_string(), - service: "Service Principal".to_string(), - pattern: Regex::new(r#"(?i)(?:azure[_-]?client[_-]?secret|azure[_-]?tenant[_-]?id)\s*[=:]\s*["']?([A-Za-z0-9-]{32,})["']?"#)?, - severity: SecuritySeverity::Critical, - description: "Azure Service Principal credentials detected".to_string(), - }, - - // DigitalOcean API tokens - CloudPattern { - provider: "DigitalOcean".to_string(), - service: "API Token".to_string(), - pattern: Regex::new(r#"(?i)(?:digitalocean[_-]?api[_-]?token|do[_-]?api[_-]?token)\s*[=:]\s*["']?(dop_v1_[A-Za-z0-9]{64})["']?"#)?, - severity: SecuritySeverity::High, - description: "DigitalOcean API token detected".to_string(), - }, - - // Heroku API keys - CloudPattern { - provider: "Heroku".to_string(), - service: "API Key".to_string(), - pattern: Regex::new(r#"(?i)(?:heroku[_-]?api[_-]?key)\s*[=:]\s*["']?([A-Za-z0-9-]{36})["']?"#)?, - severity: SecuritySeverity::High, - description: "Heroku API key detected".to_string(), - }, - - // Stripe API keys - CloudPattern { - provider: "Stripe".to_string(), - service: "API Key".to_string(), - pattern: Regex::new(r#"(?i)(?:stripe[_-]?api[_-]?key|stripe[_-]?secret[_-]?key)\s*[=:]\s*["']?(sk_live_[A-Za-z0-9]{24}|sk_test_[A-Za-z0-9]{24})["']?"#)?, - severity: SecuritySeverity::Critical, - description: "Stripe API key detected".to_string(), - }, - - // Twilio credentials - CloudPattern { - provider: "Twilio".to_string(), - service: "Auth Token".to_string(), - pattern: Regex::new(r#"(?i)(?:twilio[_-]?auth[_-]?token|twilio[_-]?account[_-]?sid)\s*[=:]\s*["']?([A-Za-z0-9]{32,34})["']?"#)?, - severity: SecuritySeverity::High, - description: "Twilio credentials detected".to_string(), - }, - ]; - - Ok(patterns) - } - - /// Initialize framework-specific patterns - fn initialize_framework_patterns() -> Result>, SecurityError> { - let mut frameworks = HashMap::new(); - - // Django patterns - frameworks.insert("django".to_string(), vec![ - FrameworkPattern { - framework: "Django".to_string(), - pattern: Regex::new(r#"(?i)(?:database|databases)\s*=\s*\{[^}]*['"']password['"']\s*:\s*['"']([^'"']+)['"'][^}]*\}"#)?, - severity: SecuritySeverity::Critical, - description: "Django database password in settings".to_string(), - file_extensions: vec!["py".to_string()], - }, - FrameworkPattern { - framework: "Django".to_string(), - pattern: Regex::new(r#"(?i)email[_-]?host[_-]?password\s*=\s*["']([^"']+)["']"#)?, - severity: SecuritySeverity::High, - description: "Django email password in settings".to_string(), - file_extensions: vec!["py".to_string()], - }, - ]); - - // Flask patterns - frameworks.insert("flask".to_string(), vec![ - FrameworkPattern { - framework: "Flask".to_string(), - pattern: Regex::new(r#"(?i)app\.config\[['"']([A-Z_]*(?:SECRET|KEY|PASSWORD|TOKEN)[A-Z_]*)['"']\]\s*=\s*["']([^"']+)["']"#)?, - severity: SecuritySeverity::High, - description: "Flask configuration with potential secret".to_string(), - file_extensions: vec!["py".to_string()], - }, - ]); - - // FastAPI patterns - frameworks.insert("fastapi".to_string(), vec![ - FrameworkPattern { - framework: "FastAPI".to_string(), - pattern: Regex::new(r#"(?i)class\s+Settings\([^)]*\):[^}]*([A-Z_]*(?:SECRET|KEY|PASSWORD|TOKEN)[A-Z_]*)\s*:\s*str\s*=\s*["']([^"']+)["']"#)?, - severity: SecuritySeverity::High, - description: "FastAPI Settings class with hardcoded secret".to_string(), - file_extensions: vec!["py".to_string()], - }, - ]); - - Ok(frameworks) - } - - /// Initialize database patterns - fn initialize_database_patterns() -> Result, SecurityError> { - let patterns = vec![ - // PostgreSQL connection strings - DatabasePattern { - database_type: "PostgreSQL".to_string(), - pattern: Regex::new(r#"(?i)postgresql://[^:]+:[^@]+@[^/]+/[^"'\s]+"#)?, - severity: SecuritySeverity::Critical, - description: "PostgreSQL connection string with credentials".to_string(), - }, - - // MySQL connection strings - DatabasePattern { - database_type: "MySQL".to_string(), - pattern: Regex::new(r#"(?i)mysql://[^:]+:[^@]+@[^/]+/[^"'\s]+"#)?, - severity: SecuritySeverity::Critical, - description: "MySQL connection string with credentials".to_string(), - }, - - // MongoDB connection strings - DatabasePattern { - database_type: "MongoDB".to_string(), - pattern: Regex::new(r#"(?i)mongodb://[^:]+:[^@]+@[^/]+/[^"'\s]+"#)?, - severity: SecuritySeverity::Critical, - description: "MongoDB connection string with credentials".to_string(), - }, - - // Redis connection strings - DatabasePattern { - database_type: "Redis".to_string(), - pattern: Regex::new(r#"(?i)redis://[^:]*:[^@]+@[^/]+/[^"'\s]*"#)?, - severity: SecuritySeverity::High, - description: "Redis connection string with password".to_string(), - }, - - // SQLAlchemy database URLs - DatabasePattern { - database_type: "SQLAlchemy".to_string(), - pattern: Regex::new(r#"(?i)sqlalchemy_database_uri\s*=\s*["'][^"']*://[^:]+:[^@]+@[^"']+"#)?, - severity: SecuritySeverity::Critical, - description: "SQLAlchemy database URI with credentials".to_string(), - }, - ]; - - Ok(patterns) - } - - /// Initialize environment variable patterns specific to Python - fn initialize_env_var_patterns() -> Result, SecurityError> { - let patterns = vec![ - EnvVarPattern { - pattern: Regex::new(r#"os\.environ(?:\.get)?\(['"']([A-Z_]+)['"']\)"#)?, - severity: SecuritySeverity::Info, - description: "Environment variable usage detected".to_string(), - sensitive_prefixes: vec![ - "SECRET".to_string(), - "KEY".to_string(), - "PASSWORD".to_string(), - "TOKEN".to_string(), - "API".to_string(), - "AUTH".to_string(), - "PRIVATE".to_string(), - "CREDENTIAL".to_string(), - ], - }, - EnvVarPattern { - pattern: Regex::new(r#"getenv\(['"']([A-Z_]+)['"']\)"#)?, - severity: SecuritySeverity::Info, - description: "Environment variable access via getenv".to_string(), - sensitive_prefixes: vec![ - "SECRET".to_string(), - "KEY".to_string(), - "PASSWORD".to_string(), - "TOKEN".to_string(), - ], - }, - ]; - - Ok(patterns) - } - - /// Check environment variable usage patterns - fn check_env_var_usage(&self, content: &str, file_path: &Path) -> Result, SecurityError> { - let mut findings = Vec::new(); - - for pattern in &self.env_var_patterns { - for (line_num, line) in content.lines().enumerate() { - if let Some(captures) = pattern.pattern.captures(line) { - if let Some(var_name) = captures.get(1) { - let var_name = var_name.as_str(); - - // Check if this appears to be a sensitive variable - let is_sensitive = pattern.sensitive_prefixes.iter().any(|prefix| { - var_name.to_uppercase().contains(prefix) - }); - - if is_sensitive { - // Check if this is properly protected (not hardcoded) - if !line.contains("=") || line.contains("os.environ") || line.contains("getenv") { - // This is good practice - environment variable usage - continue; - } - - let column_number = captures.get(0).map(|m| m.start() + 1); - - findings.push(SecurityFinding { - id: format!("env-var-misuse-{}", line_num), - title: "Potential Environment Variable Misuse".to_string(), - description: format!("Sensitive environment variable '{}' usage detected", var_name), - severity: SecuritySeverity::Medium, - category: SecurityCategory::SecretsExposure, - file_path: Some(file_path.to_path_buf()), - line_number: Some(line_num + 1), - column_number, - evidence: Some(line.trim().to_string()), - remediation: vec![ - "Ensure sensitive environment variables are properly protected".to_string(), - "Use python-decouple or similar libraries for configuration".to_string(), - "Document required environment variables".to_string(), - ], - references: vec![ - "https://12factor.net/config".to_string(), - "https://docs.python.org/3/library/os.html#os.environ".to_string(), - ], - cwe_id: Some("CWE-200".to_string()), - compliance_frameworks: vec!["SOC2".to_string()], - }); - } - } - } - } - } - - Ok(findings) - } - - /// Check for insecure Python practices - fn check_insecure_python_practices(&self, content: &str, file_path: &Path) -> Result, SecurityError> { - let mut findings = Vec::new(); - - // Check for eval() usage - if let Ok(eval_pattern) = Regex::new(r#"eval\s*\("#) { - for (line_num, line) in content.lines().enumerate() { - if eval_pattern.is_match(line) { - findings.push(SecurityFinding { - id: format!("insecure-eval-{}", line_num), - title: "Dangerous eval() Usage".to_string(), - description: "Use of eval() function detected - potential code injection risk".to_string(), - severity: SecuritySeverity::High, - category: SecurityCategory::CodeInjection, - file_path: Some(file_path.to_path_buf()), - line_number: Some(line_num + 1), - column_number: None, - evidence: Some(line.trim().to_string()), - remediation: vec![ - "Avoid using eval() with user input".to_string(), - "Use ast.literal_eval() for safe evaluation of literals".to_string(), - "Consider using json.loads() for JSON data".to_string(), - ], - references: vec![ - "https://owasp.org/www-project-top-ten/2021/A03_2021-Injection/".to_string(), - ], - cwe_id: Some("CWE-95".to_string()), - compliance_frameworks: vec!["SOC2".to_string()], - }); - } - } - } - - // Check for shell injection via subprocess - if let Ok(subprocess_pattern) = Regex::new(r#"subprocess\.(call|run|Popen)\([^)]*shell\s*=\s*True"#) { - for (line_num, line) in content.lines().enumerate() { - if subprocess_pattern.is_match(line) { - findings.push(SecurityFinding { - id: format!("shell-injection-{}", line_num), - title: "Potential Shell Injection".to_string(), - description: "subprocess call with shell=True detected - potential command injection risk".to_string(), - severity: SecuritySeverity::High, - category: SecurityCategory::CommandInjection, - file_path: Some(file_path.to_path_buf()), - line_number: Some(line_num + 1), - column_number: None, - evidence: Some(line.trim().to_string()), - remediation: vec![ - "Avoid using shell=True with user input".to_string(), - "Use subprocess with list arguments instead".to_string(), - "Validate and sanitize all user inputs".to_string(), - ], - references: vec![ - "https://owasp.org/www-project-top-ten/2021/A03_2021-Injection/".to_string(), - ], - cwe_id: Some("CWE-78".to_string()), - compliance_frameworks: vec!["SOC2".to_string()], - }); - } - } - } - - Ok(findings) - } - - /// Detect Python framework based on content - fn detect_python_framework(&self, content: &str) -> Option { - if content.contains("django") || content.contains("Django") { - Some("django".to_string()) - } else if content.contains("flask") || content.contains("Flask") { - Some("flask".to_string()) - } else if content.contains("fastapi") || content.contains("FastAPI") { - Some("fastapi".to_string()) - } else { - None - } - } - - /// Mask sensitive information in evidence - fn mask_secret(&self, secret: &str) -> String { - if secret.len() <= 8 { - "*".repeat(secret.len()) - } else { - format!("{}***{}", &secret[..4], &secret[secret.len()-4..]) - } - } - - /// Mask database connection string - fn mask_database_connection(&self, connection_str: &str) -> String { - // Replace password in connection string with asterisks - if let Ok(re) = Regex::new(r"://([^:]+):([^@]+)@") { - re.replace(connection_str, "://$1:***@").to_string() - } else { - connection_str.to_string() - } - } - - /// Calculate confidence score based on context indicators - fn calculate_context_confidence(&self, content: &str, indicators: &[String]) -> f32 { - let total_indicators = indicators.len() as f32; - if total_indicators == 0.0 { - return 0.5; // Neutral confidence - } - - let found_indicators = indicators.iter() - .filter(|indicator| content.to_lowercase().contains(&indicator.to_lowercase())) - .count() as f32; - - found_indicators / total_indicators - } - - /// Adjust severity based on context confidence - fn adjust_severity_by_context(&self, base_severity: SecuritySeverity, confidence: f32) -> SecuritySeverity { - match base_severity { - SecuritySeverity::Critical => base_severity, // Keep critical as-is - SecuritySeverity::High => { - if confidence < 0.3 { - SecuritySeverity::Medium - } else { - base_severity - } - } - SecuritySeverity::Medium => { - if confidence > 0.7 { - SecuritySeverity::High - } else if confidence < 0.3 { - SecuritySeverity::Low - } else { - base_severity - } - } - _ => base_severity, - } - } - - /// Generate framework-specific remediation advice - fn generate_framework_remediation(&self, framework: &str) -> Vec { - match framework.to_lowercase().as_str() { - "django" => vec![ - "Use Django's built-in security features".to_string(), - "Store SECRET_KEY in environment variables".to_string(), - "Use django-environ for configuration management".to_string(), - "Enable Django's security middleware".to_string(), - ], - "flask" => vec![ - "Use Flask-Security for authentication".to_string(), - "Store secrets in environment variables".to_string(), - "Use Flask-Talisman for security headers".to_string(), - "Implement proper session management".to_string(), - ], - "fastapi" => vec![ - "Use Pydantic Settings for configuration".to_string(), - "Implement proper JWT token management".to_string(), - "Use dependency injection for secrets".to_string(), - "Enable HTTPS and security headers".to_string(), - ], - _ => vec![ - "Follow framework-specific security best practices".to_string(), - "Use environment variables for sensitive data".to_string(), - ], - } - } - - /// Enhance a security finding with gitignore risk assessment - fn enhance_finding_with_gitignore_status( - &self, - finding: &mut SecurityFinding, - gitignore_status: &super::gitignore::GitIgnoreStatus, - ) { - // Adjust severity based on gitignore risk - finding.severity = match gitignore_status.risk_level { - GitIgnoreRisk::Tracked => SecuritySeverity::Critical, // Always critical if tracked - GitIgnoreRisk::Exposed => { - // Upgrade severity if exposed - match &finding.severity { - SecuritySeverity::Medium => SecuritySeverity::High, - SecuritySeverity::Low => SecuritySeverity::Medium, - other => other.clone(), - } - } - GitIgnoreRisk::Protected => { - // Downgrade slightly if protected - match &finding.severity { - SecuritySeverity::Critical => SecuritySeverity::High, - SecuritySeverity::High => SecuritySeverity::Medium, - other => other.clone(), - } - } - GitIgnoreRisk::Safe => finding.severity.clone(), - }; - - // Add gitignore context to description - finding.description.push_str(&format!(" (GitIgnore: {})", gitignore_status.description())); - - // Add git history warning for tracked files - if gitignore_status.risk_level == GitIgnoreRisk::Tracked { - finding.remediation.insert(0, "⚠️ CRITICAL: Remove this file from git history using git-filter-branch or BFG Repo-Cleaner".to_string()); - finding.remediation.insert(1, "πŸ”‘ Rotate any exposed secrets immediately".to_string()); - } - } - - /// Analyze Python configuration files with gitignore awareness - fn analyze_config_files_with_gitignore( - &self, - project_root: &Path, - gitignore_analyzer: &mut GitIgnoreAnalyzer, - ) -> Result, SecurityError> { - let mut findings = Vec::new(); - - // Python configuration files to check - let config_files = [ - "settings.py", // Django settings - "config.py", // Flask/general config - "main.py", // FastAPI main - "app.py", // Flask app - "manage.py", // Django management - "wsgi.py", // WSGI config - "asgi.py", // ASGI config - ]; - - for config_file in &config_files { - let config_path = project_root.join(config_file); - if config_path.exists() { - let gitignore_status = gitignore_analyzer.analyze_file(&config_path); - - if let Ok(content) = fs::read_to_string(&config_path) { - // Basic secret pattern check for config files - if self.contains_potential_python_secrets(&content) { - let mut finding = SecurityFinding { - id: format!("config-file-{}", config_file.replace('.', "-")), - title: "Potential Secrets in Python Configuration File".to_string(), - description: format!("Python configuration file '{}' may contain secrets", config_file), - severity: SecuritySeverity::Medium, - category: SecurityCategory::SecretsExposure, - file_path: Some(config_path.clone()), - line_number: None, - column_number: None, - evidence: None, - remediation: vec![ - "Review configuration file for hardcoded secrets".to_string(), - "Use environment variables for sensitive configuration".to_string(), - "Consider using python-decouple or similar libraries".to_string(), - ], - references: vec![ - "https://12factor.net/config".to_string(), - ], - cwe_id: Some("CWE-200".to_string()), - compliance_frameworks: vec!["SOC2".to_string()], - }; - - self.enhance_finding_with_gitignore_status(&mut finding, &gitignore_status); - findings.push(finding); - } - } - } - } - - Ok(findings) - } - - /// Analyze Python dependency files with gitignore awareness - fn analyze_dependency_files_with_gitignore( - &self, - project_root: &Path, - gitignore_analyzer: &mut GitIgnoreAnalyzer, - ) -> Result, SecurityError> { - let mut findings = Vec::new(); - - // Python dependency files to check - let dependency_files = [ - "requirements.txt", - "requirements-dev.txt", - "requirements-prod.txt", - "Pipfile", - "Pipfile.lock", - "pyproject.toml", - "poetry.lock", - "conda-requirements.txt", - "environment.yml", - ]; - - for dep_file in &dependency_files { - let dep_path = project_root.join(dep_file); - if dep_path.exists() { - let gitignore_status = gitignore_analyzer.analyze_file(&dep_path); - - // Generally, dependency files should be tracked, but check for any embedded secrets - if let Ok(content) = fs::read_to_string(&dep_path) { - if self.contains_potential_python_secrets(&content) { - let mut finding = SecurityFinding { - id: format!("dependency-file-{}", dep_file.replace('.', "-").replace('-', "_")), - title: "Potential Secrets in Python Dependency File".to_string(), - description: format!("Python dependency file '{}' may contain secrets", dep_file), - severity: SecuritySeverity::High, - category: SecurityCategory::SecretsExposure, - file_path: Some(dep_path.clone()), - line_number: None, - column_number: None, - evidence: None, - remediation: vec![ - "Remove any secrets from dependency files".to_string(), - "Use environment variables for configuration".to_string(), - "Review dependency sources for security".to_string(), - ], - references: vec![ - "https://pip.pypa.io/en/stable/topics/secure-installs/".to_string(), - ], - cwe_id: Some("CWE-200".to_string()), - compliance_frameworks: vec!["SOC2".to_string()], - }; - - self.enhance_finding_with_gitignore_status(&mut finding, &gitignore_status); - findings.push(finding); - } - } - } - } - - Ok(findings) - } - - /// Analyze environment files with comprehensive gitignore risk assessment - fn analyze_env_files_with_gitignore( - &self, - project_root: &Path, - gitignore_analyzer: &mut GitIgnoreAnalyzer, - ) -> Result, SecurityError> { - let mut findings = Vec::new(); - - // Get all potential environment files using gitignore analyzer - let env_files = gitignore_analyzer.get_files_to_analyze(&[]) - .map_err(|e| SecurityError::Io(e))? - .into_iter() - .filter(|file| { - if let Some(file_name) = file.file_name().and_then(|n| n.to_str()) { - // Exclude template/example files from security alerts - if self.is_template_file(file_name) { - debug!("Skipping template file: {}", file_name); - return false; - } - - file_name.starts_with(".env") || - file_name.contains("credentials") || - file_name.contains("secrets") || - file_name.ends_with(".key") || - file_name.ends_with(".pem") || - file_name == "secret.json" || - file_name == "service-account.json" - } else { - false - } - }) - .collect::>(); - - for env_file in env_files { - let gitignore_status = gitignore_analyzer.analyze_file(&env_file); - let relative_path = env_file.strip_prefix(project_root) - .unwrap_or(&env_file); - - // Create finding based on gitignore risk assessment - let (severity, title, description) = match gitignore_status.risk_level { - GitIgnoreRisk::Tracked => ( - SecuritySeverity::Critical, - "Python Secret File Tracked by Git".to_string(), - format!("Python secret file '{}' is tracked by git and may expose credentials in version history", relative_path.display()), - ), - GitIgnoreRisk::Exposed => ( - SecuritySeverity::High, - "Python Secret File Not in GitIgnore".to_string(), - format!("Python secret file '{}' exists but is not protected by .gitignore", relative_path.display()), - ), - GitIgnoreRisk::Protected => ( - SecuritySeverity::Info, - "Python Secret File Properly Protected".to_string(), - format!("Python secret file '{}' is properly ignored but detected for verification", relative_path.display()), - ), - GitIgnoreRisk::Safe => continue, // Skip files that appear safe - }; - - let mut finding = SecurityFinding { - id: format!("python-env-file-{}", relative_path.to_string_lossy().replace('/', "-").replace('.', "-")), - title, - description, - severity, - category: SecurityCategory::SecretsExposure, - file_path: Some(env_file.clone()), - line_number: None, - column_number: None, - evidence: None, - remediation: vec![ - "Ensure sensitive files are in .gitignore".to_string(), - "Use .env.example files for documentation".to_string(), - "Never commit actual environment files to version control".to_string(), - "Use python-decouple for environment variable management".to_string(), - ], - references: vec![ - "https://github.com/motdotla/dotenv#should-i-commit-my-env-file".to_string(), - "https://pypi.org/project/python-decouple/".to_string(), - ], - cwe_id: Some("CWE-200".to_string()), - compliance_frameworks: vec!["SOC2".to_string()], - }; - - self.enhance_finding_with_gitignore_status(&mut finding, &gitignore_status); - findings.push(finding); - } - - Ok(findings) - } - - /// Check if a file is a template/example file that should be excluded from security alerts - fn is_template_file(&self, file_name: &str) -> bool { - let template_indicators = [ - "sample", "example", "template", "template.env", "env.template", - "sample.env", "env.sample", "example.env", "env.example", - "examples", "samples", "templates", "demo", "test", - ".env.sample", ".env.example", ".env.template", ".env.demo", ".env.test", - "example.json", "sample.json", "template.json" - ]; - - let file_name_lower = file_name.to_lowercase(); - - // Check for exact matches or contains patterns - template_indicators.iter().any(|indicator| { - file_name_lower == *indicator || - file_name_lower.contains(indicator) || - file_name_lower.ends_with(indicator) - }) - } - - /// Check if content contains potential Python secrets (basic patterns) - fn contains_potential_python_secrets(&self, content: &str) -> bool { - let secret_indicators = [ - "sk_", "pk_live_", "eyJ", "AKIA", "-----BEGIN", - "client_secret", "api_key", "access_token", "SECRET_KEY", - "private_key", "secret_key", "bearer", "password", - "token", "credentials", "auth" - ]; - - let content_lower = content.to_lowercase(); - secret_indicators.iter().any(|indicator| content_lower.contains(&indicator.to_lowercase())) - } - - /// Generate Python-specific security recommendations - fn generate_python_security_recommendations(&self) -> Vec { - vec![ - "🐍 Python Security Best Practices:".to_string(), - " β€’ Use environment variables for all secrets and configuration".to_string(), - " β€’ Install python-decouple or python-dotenv for configuration management".to_string(), - " β€’ Keep requirements.txt and poetry.lock files up to date".to_string(), - " β€’ Use virtual environments to isolate dependencies".to_string(), - " β€’ Run 'pip-audit' or 'safety check' to scan for vulnerable packages".to_string(), - " β€’ Enable Django's security middleware if using Django".to_string(), - " β€’ Use parameterized queries to prevent SQL injection".to_string(), - " β€’ Validate and sanitize all user inputs".to_string(), - " β€’ Use HTTPS in production environments".to_string(), - " β€’ Implement proper error handling and logging".to_string(), - " β€’ Consider using tools like bandit for static security analysis".to_string(), - ] - } -} \ No newline at end of file diff --git a/src/analyzer/security/turbo/README.md b/src/analyzer/security/turbo/README.md new file mode 100644 index 00000000..4472c64d --- /dev/null +++ b/src/analyzer/security/turbo/README.md @@ -0,0 +1,184 @@ +# πŸš€ Turbo Security Analyzer + +Ultra-fast security scanning that's 10-100x faster than traditional approaches. + +## Overview + +The Turbo Security Analyzer is a high-performance security scanner that utilizes Rust's full capabilities for blazing fast analysis. It achieves dramatic speedups through: + +- **Smart File Selection**: Eliminates 80-90% of work upfront using gitignore-aware discovery +- **Multi-Pattern Matching**: Aho-Corasick algorithm for simultaneous pattern search +- **Memory-Mapped I/O**: Zero-copy file reading for large files +- **Parallel Processing**: Work-stealing thread pool with early termination +- **Intelligent Caching**: Concurrent caching with LRU eviction +- **Specialized Scanners**: Optimized for common file types + +## Key Features + +### 🎯 Smart File Discovery +- Git-aware file discovery using `git ls-files` +- Automatically skips ignored files +- Prioritizes critical files (.env, configs, secrets) + +### ⚑ High-Performance Scanning +- Aho-Corasick multi-pattern matching +- Memory-mapped I/O for large files +- Work-stealing parallelism across CPU cores +- Early termination on critical findings + +### 🧠 Intelligent Detection +- Advanced false positive reduction +- Context-aware confidence scoring +- GitIgnore risk assessment +- Template/example file exclusion + +## Usage + +### Integration with CLI + +The turbo analyzer is integrated into the main security command: + +```bash +# Fast security scan +sync-ctl security /path/to/project + +# Include low severity findings (thorough mode) +sync-ctl security --include-low /path/to/project + +# Skip secret detection (lightning mode) +sync-ctl security --no-secrets /path/to/project +``` + +### Scan Modes + +The analyzer automatically chooses the best mode based on your flags: + +- **Lightning**: Critical files only (.env, configs), basic patterns +- **Fast**: Smart sampling, priority patterns, skip large files +- **Balanced**: Good coverage with performance optimizations (default) +- **Thorough**: Full scan with all patterns (still optimized) +- **Paranoid**: Everything including low-severity findings + +## Architecture + +### Core Components + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ File Discovery β”‚ ← Git-aware, smart filtering +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Priority Scoring β”‚ ← Critical files first +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Pattern Engine β”‚ ← Aho-Corasick matching +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Parallel Scanner β”‚ ← Work-stealing threads +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Result Cache β”‚ ← Concurrent caching +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Report Generator β”‚ ← Aggregation & scoring +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +### Pattern Categories + +- **Secrets**: API keys, passwords, tokens +- **Environment Variables**: Sensitive config values +- **Cryptographic Material**: Private keys, certificates +- **Cloud Credentials**: AWS, GCP, Azure keys +- **Database Connections**: Connection strings with credentials + +## Performance + +Typical performance improvements over traditional scanning: + +- **Lightning Mode**: 50-100x faster (critical files only) +- **Fast Mode**: 20-50x faster (smart sampling) +- **Balanced Mode**: 10-25x faster (default, good coverage) +- **Thorough Mode**: 5-10x faster (comprehensive scan) + +## Implementation Details + +### File Discovery Optimization + +```rust +// Git-aware discovery (50x faster than walkdir) +git ls-files -z | parallel_process + +// Smart filtering pipeline +files -> priority_score -> sort -> filter_by_mode +``` + +### Pattern Matching + +```rust +// Aho-Corasick for multi-pattern search +let patterns = ["password", "api_key", "secret", ...]; +let matcher = AhoCorasick::new(patterns); + +// Single pass through content +for match in matcher.find_iter(content) { + // Process match with confidence scoring +} +``` + +### Memory Mapping + +```rust +// Zero-copy file reading for large files +let mmap = unsafe { MmapOptions::new().map(&file)? }; +let content = simdutf8::from_utf8(&mmap)?; +``` + +### Concurrent Caching + +```rust +// Thread-safe cache with DashMap +cache: DashMap + +// LRU eviction when reaching size limit +if size > limit * 0.9 { + evict_least_recently_used(); +} +``` + +## Security Features + +### GitIgnore Risk Assessment + +The analyzer provides comprehensive gitignore status for all findings: + +- **TRACKED**: File is tracked by git (CRITICAL RISK) +- **EXPOSED**: File contains secrets but not in .gitignore (HIGH RISK) +- **PROTECTED**: File is properly ignored (GOOD) +- **SAFE**: File appears safe for version control + +### False Positive Reduction + +Advanced techniques to minimize false positives: + +- Skip documentation and comment lines +- Exclude template/example files +- Ignore placeholder values +- Context-aware confidence scoring + +## Contributing + +The turbo analyzer is designed for extensibility: + +- Add new pattern sets in `pattern_engine.rs` +- Extend file discovery logic in `file_discovery.rs` +- Implement additional scanners in `scanner.rs` + +## License + +Same as the parent project. \ No newline at end of file diff --git a/src/analyzer/security/turbo/cache.rs b/src/analyzer/security/turbo/cache.rs new file mode 100644 index 00000000..659d8e5e --- /dev/null +++ b/src/analyzer/security/turbo/cache.rs @@ -0,0 +1,369 @@ +//! # Cache Module +//! +//! High-performance caching for security scan results using DashMap and blake3. + +use std::path::PathBuf; +use std::time::{SystemTime, Duration}; +use std::sync::Arc; + +use dashmap::DashMap; + +use log::{debug, trace}; + +use crate::analyzer::security::SecurityFinding; + +/// Cache key for file content +#[derive(Debug, Clone, Hash, PartialEq, Eq)] +pub struct CacheKey { + pub file_path: PathBuf, +} + +/// Cached scan result +#[derive(Debug, Clone)] +pub struct CachedResult { + pub findings: Vec, + pub cached_at: SystemTime, + pub access_count: u32, +} + +/// High-performance security cache +pub struct SecurityCache { + // Main cache storage + cache: Arc>, + + // Cache configuration + max_size_bytes: usize, + current_size_bytes: Arc>, + eviction_threshold: f64, + + // Statistics + hits: Arc>, + misses: Arc>, +} + +/// Internal cache entry +#[derive(Debug, Clone)] +struct CachedEntry { + key: CacheKey, + result: CachedResult, + size_bytes: usize, + last_accessed: SystemTime, +} + +impl SecurityCache { + /// Create a new cache with specified size in MB + pub fn new(size_mb: usize) -> Self { + let max_size_bytes = size_mb * 1024 * 1024; + let hasher = ahash::RandomState::new(); + + Self { + cache: Arc::new(DashMap::with_hasher(hasher)), + max_size_bytes, + current_size_bytes: Arc::new(parking_lot::Mutex::new(0)), + eviction_threshold: 0.9, // Start eviction at 90% capacity + hits: Arc::new(parking_lot::Mutex::new(0)), + misses: Arc::new(parking_lot::Mutex::new(0)), + } + } + + /// Get cached result for a file + pub fn get(&self, file_path: &PathBuf) -> Option> { + let entry = self.cache.get_mut(file_path)?; + + // Update access statistics + let mut entry = entry; + entry.last_accessed = SystemTime::now(); + entry.result.access_count += 1; + + *self.hits.lock() += 1; + trace!("Cache hit for: {}", file_path.display()); + + Some(entry.result.findings.clone()) + } + + /// Insert a scan result into cache + pub fn insert(&self, file_path: PathBuf, findings: Vec) { + // Calculate entry size + let size_bytes = Self::estimate_size(&findings); + + // Check if we need to evict entries + let current_size = *self.current_size_bytes.lock(); + if current_size + size_bytes > (self.max_size_bytes as f64 * self.eviction_threshold) as usize { + self.evict_lru(); + } + + // Create cache key + let key = CacheKey { + file_path: file_path.clone(), + }; + + // Create cache entry + let entry = CachedEntry { + key, + result: CachedResult { + findings, + cached_at: SystemTime::now(), + access_count: 1, + }, + size_bytes, + last_accessed: SystemTime::now(), + }; + + // Insert into cache + if let Some(old_entry) = self.cache.insert(file_path, entry) { + // Subtract old entry size + *self.current_size_bytes.lock() -= old_entry.size_bytes; + } + + // Add new entry size + *self.current_size_bytes.lock() += size_bytes; + + debug!("Cached result, current size: {} MB", + *self.current_size_bytes.lock() / (1024 * 1024)); + } + + /// Clear the entire cache + pub fn clear(&self) { + self.cache.clear(); + *self.current_size_bytes.lock() = 0; + *self.hits.lock() = 0; + *self.misses.lock() = 0; + debug!("Cache cleared"); + } + + /// Get cache statistics + pub fn stats(&self) -> CacheStats { + let hits = *self.hits.lock(); + let misses = *self.misses.lock(); + let total = hits + misses; + + CacheStats { + hits, + misses, + hit_rate: if total > 0 { hits as f64 / total as f64 } else { 0.0 }, + entries: self.cache.len(), + size_bytes: *self.current_size_bytes.lock(), + capacity_bytes: self.max_size_bytes, + } + } + + /// Evict least recently used entries + fn evict_lru(&self) { + let target_size = (self.max_size_bytes as f64 * 0.7) as usize; // Evict to 70% capacity + let mut entries_to_remove = Vec::new(); + + // Collect entries sorted by last access time + let mut entries: Vec<(PathBuf, SystemTime, usize)> = self.cache.iter() + .map(|entry| (entry.key().clone(), entry.last_accessed, entry.size_bytes)) + .collect(); + + // Sort by last accessed (oldest first) + entries.sort_by_key(|(_, last_accessed, _)| *last_accessed); + + // Determine which entries to remove + let mut current_size = *self.current_size_bytes.lock(); + for (path, _, size) in entries { + if current_size <= target_size { + break; + } + + entries_to_remove.push(path); + current_size -= size; + } + + // Count entries to remove + let entries_removed = entries_to_remove.len(); + + // Remove entries + for path in entries_to_remove { + if let Some((_, entry)) = self.cache.remove(&path) { + *self.current_size_bytes.lock() -= entry.size_bytes; + } + } + + debug!("Evicted {} entries, new size: {} MB", + entries_removed, + *self.current_size_bytes.lock() / (1024 * 1024)); + } + + + + /// Estimate memory size of findings + fn estimate_size(findings: &[SecurityFinding]) -> usize { + // Base size for the vector + let mut size = std::mem::size_of::>(); + + // Add size for each finding + for finding in findings { + size += std::mem::size_of::(); + + // Add string sizes + size += finding.id.len(); + size += finding.title.len(); + size += finding.description.len(); + + if let Some(ref path) = finding.file_path { + size += path.to_string_lossy().len(); + } + + if let Some(ref evidence) = finding.evidence { + size += evidence.len(); + } + + // Add vector sizes + size += finding.remediation.iter().map(|s| s.len()).sum::(); + size += finding.references.iter().map(|s| s.len()).sum::(); + size += finding.compliance_frameworks.iter().map(|s| s.len()).sum::(); + + if let Some(ref cwe) = finding.cwe_id { + size += cwe.len(); + } + } + + size + } + + /// Invalidate cache entries older than duration + pub fn invalidate_older_than(&self, duration: Duration) { + let cutoff = SystemTime::now() - duration; + let mut removed = 0; + + self.cache.retain(|_, entry| { + if entry.result.cached_at < cutoff { + *self.current_size_bytes.lock() -= entry.size_bytes; + removed += 1; + false + } else { + true + } + }); + + if removed > 0 { + debug!("Invalidated {} stale cache entries", removed); + } + } +} + +/// Cache statistics +#[derive(Debug, Clone)] +pub struct CacheStats { + pub hits: u64, + pub misses: u64, + pub hit_rate: f64, + pub entries: usize, + pub size_bytes: usize, + pub capacity_bytes: usize, +} + +impl CacheStats { + /// Get human-readable size + pub fn size_mb(&self) -> f64 { + self.size_bytes as f64 / (1024.0 * 1024.0) + } + + /// Get capacity utilization percentage + pub fn utilization(&self) -> f64 { + if self.capacity_bytes == 0 { + 0.0 + } else { + (self.size_bytes as f64 / self.capacity_bytes as f64) * 100.0 + } + } +} + + + +#[cfg(test)] +mod tests { + use super::*; + use crate::analyzer::security::{SecuritySeverity, SecurityCategory}; + + #[test] + fn test_cache_basic_operations() { + let cache = SecurityCache::new(10); // 10MB cache + + let path = PathBuf::from("/test/file.js"); + let findings = vec![ + SecurityFinding { + id: "test-1".to_string(), + title: "Test Finding".to_string(), + description: "Test description".to_string(), + severity: SecuritySeverity::High, + category: SecurityCategory::SecretsExposure, + file_path: Some(path.clone()), + line_number: Some(10), + column_number: Some(5), + evidence: Some("evidence".to_string()), + remediation: vec!["Fix it".to_string()], + references: vec!["https://example.com".to_string()], + cwe_id: Some("CWE-798".to_string()), + compliance_frameworks: vec!["SOC2".to_string()], + } + ]; + + // Test insert + cache.insert(path.clone(), findings.clone()); + + // Test get + let cached = cache.get(&path); + assert!(cached.is_some()); + assert_eq!(cached.unwrap().len(), 1); + + // Test stats + let stats = cache.stats(); + assert_eq!(stats.hits, 1); + assert_eq!(stats.misses, 0); + assert_eq!(stats.entries, 1); + } + + #[test] + fn test_cache_eviction() { + let cache = SecurityCache::new(1); // 1MB cache (small for testing) + + // Insert many entries to trigger eviction + for i in 0..1000 { + let path = PathBuf::from(format!("/test/file{}.js", i)); + let findings = vec![ + SecurityFinding { + id: format!("test-{}", i), + title: "Test Finding with very long title to consume memory".to_string(), + description: "Test description that is also quite long to use up cache space".to_string(), + severity: SecuritySeverity::High, + category: SecurityCategory::SecretsExposure, + file_path: Some(path.clone()), + line_number: Some(10), + column_number: Some(5), + evidence: Some("evidence with long content to test memory usage".to_string()), + remediation: vec!["Fix it with a long remediation message".to_string()], + references: vec!["https://example.com/very/long/url/path".to_string()], + cwe_id: Some("CWE-798".to_string()), + compliance_frameworks: vec!["SOC2".to_string(), "GDPR".to_string()], + } + ]; + + cache.insert(path, findings); + } + + // Cache should have evicted some entries + let stats = cache.stats(); + assert!(stats.entries < 1000); + assert!(stats.utilization() <= 90.0); + } + + #[test] + fn test_cache_invalidation() { + let cache = SecurityCache::new(10); + + let path = PathBuf::from("/test/file.js"); + let findings = vec![]; + + cache.insert(path.clone(), findings); + + // Invalidate entries older than 0 seconds (all entries) + cache.invalidate_older_than(Duration::from_secs(0)); + + // Cache should be empty + assert!(cache.get(&path).is_none()); + assert_eq!(cache.stats().entries, 0); + } +} \ No newline at end of file diff --git a/src/analyzer/security/turbo/file_discovery.rs b/src/analyzer/security/turbo/file_discovery.rs new file mode 100644 index 00000000..6bf9eb3f --- /dev/null +++ b/src/analyzer/security/turbo/file_discovery.rs @@ -0,0 +1,558 @@ +//! # File Discovery Module +//! +//! Ultra-fast file discovery with git-aware filtering and smart prioritization. + +use std::path::{Path, PathBuf}; +use std::process::Command; +use std::fs; +use std::time::SystemTime; + +use ahash::AHashSet; +use rayon::prelude::*; +use walkdir::WalkDir; +use log::{debug, trace}; + +use super::{ScanMode, SecurityError}; + +/// File metadata for efficient filtering +#[derive(Debug, Clone)] +pub struct FileMetadata { + pub path: PathBuf, + pub size: usize, + pub extension: Option, + pub is_gitignored: bool, + pub modified: SystemTime, + pub priority_hints: PriorityHints, +} + +/// Priority hints for file scoring +#[derive(Debug, Clone, Default)] +pub struct PriorityHints { + pub is_env_file: bool, + pub is_config_file: bool, + pub is_secret_file: bool, + pub is_source_file: bool, + pub has_secret_keywords: bool, +} + +/// Configuration for file discovery +#[derive(Debug, Clone)] +pub struct DiscoveryConfig { + pub use_git: bool, + pub max_file_size: usize, + pub priority_extensions: Vec, + pub scan_mode: ScanMode, +} + +/// High-performance file discovery +pub struct FileDiscovery { + config: DiscoveryConfig, + ignored_dirs: AHashSet, + secret_keywords: Vec<&'static str>, +} + +impl FileDiscovery { + pub fn new(config: DiscoveryConfig) -> Self { + let ignored_dirs = Self::get_ignored_dirs(&config.scan_mode); + let secret_keywords = Self::get_secret_keywords(); + + Self { + config, + ignored_dirs, + secret_keywords, + } + } + + /// Discover files with ultra-fast git-aware filtering + pub fn discover_files(&self, project_root: &Path) -> Result, SecurityError> { + let is_git_repo = project_root.join(".git").exists(); + + if is_git_repo && self.config.use_git { + self.git_aware_discovery(project_root) + } else { + self.filesystem_discovery(project_root) + } + } + + /// Git-aware file discovery (fastest method) + fn git_aware_discovery(&self, project_root: &Path) -> Result, SecurityError> { + debug!("Using git-aware file discovery"); + + // Get all tracked files using git ls-files + let tracked_files = self.get_git_tracked_files(project_root)?; + + // Get untracked files that might contain secrets + let untracked_files = self.get_untracked_secret_files(project_root)?; + + // Combine and process in parallel + let all_paths: Vec = tracked_files.into_iter() + .chain(untracked_files) + .collect(); + + // Process files in parallel to build metadata + let files: Vec = all_paths + .par_iter() + .filter_map(|path| self.build_file_metadata(path, project_root).ok()) + .filter(|meta| self.should_include_file(meta)) + .collect(); + + Ok(files) + } + + /// Get tracked files from git + fn get_git_tracked_files(&self, project_root: &Path) -> Result, SecurityError> { + let output = Command::new("git") + .args(&["ls-files", "-z"]) // -z for null-terminated output + .current_dir(project_root) + .output() + .map_err(|e| SecurityError::FileDiscovery(format!("Git ls-files failed: {}", e)))?; + + if !output.status.success() { + return Err(SecurityError::FileDiscovery("Git ls-files failed".to_string())); + } + + // Parse null-terminated paths + let paths: Vec = output.stdout + .split(|&b| b == 0) + .filter(|path| !path.is_empty()) + .filter_map(|path| std::str::from_utf8(path).ok()) + .map(|path| project_root.join(path)) + .collect(); + + Ok(paths) + } + + /// Get untracked files that might contain secrets + fn get_untracked_secret_files(&self, project_root: &Path) -> Result, SecurityError> { + // Common secret file patterns that might not be tracked + let secret_patterns = vec![ + ".env*", + "*.key", + "*.pem", + "*.p12", + "*credentials*", + "*secret*", + "config/*.json", + "config/*.yml", + ]; + + let mut untracked_files = Vec::new(); + + for pattern in secret_patterns { + let output = Command::new("git") + .args(&["ls-files", "--others", "--exclude-standard", pattern]) + .current_dir(project_root) + .output(); + + if let Ok(output) = output { + if output.status.success() { + let paths: Vec = String::from_utf8_lossy(&output.stdout) + .lines() + .map(|line| project_root.join(line)) + .collect(); + untracked_files.extend(paths); + } + } + } + + Ok(untracked_files) + } + + /// Fallback filesystem discovery + fn filesystem_discovery(&self, project_root: &Path) -> Result, SecurityError> { + debug!("Using filesystem discovery"); + + let walker = WalkDir::new(project_root) + .follow_links(false) + .max_depth(20) + .into_iter() + .filter_entry(|entry| { + // Skip ignored directories + if entry.file_type().is_dir() { + let dir_name = entry.file_name().to_string_lossy(); + return !self.ignored_dirs.contains(dir_name.as_ref()); + } + true + }); + + let files: Vec = walker + .par_bridge() + .filter_map(|entry| entry.ok()) + .filter(|entry| entry.file_type().is_file()) + .filter_map(|entry| self.build_file_metadata(entry.path(), project_root).ok()) + .filter(|meta| self.should_include_file(meta)) + .collect(); + + Ok(files) + } + + /// Build file metadata with priority hints + fn build_file_metadata(&self, path: &Path, project_root: &Path) -> Result { + let metadata = fs::metadata(path)?; + let size = metadata.len() as usize; + let modified = metadata.modified()?; + + let extension = path.extension() + .and_then(|ext| ext.to_str()) + .map(|s| s.to_lowercase()); + + let file_name = path.file_name() + .and_then(|n| n.to_str()) + .unwrap_or(""); + + let file_name_lower = file_name.to_lowercase(); + + // Check gitignore status efficiently + let is_gitignored = if project_root.join(".git").exists() { + self.check_gitignore_batch(path, project_root) + } else { + false + }; + + // Build priority hints + let priority_hints = PriorityHints { + is_env_file: file_name_lower.starts_with(".env") || file_name_lower.ends_with(".env"), + is_config_file: self.is_config_file(&file_name_lower, &extension), + is_secret_file: self.is_secret_file(&file_name_lower, path), + is_source_file: self.is_source_file(&extension), + has_secret_keywords: self.has_secret_keywords(&file_name_lower), + }; + + Ok(FileMetadata { + path: path.to_path_buf(), + size, + extension, + is_gitignored, + modified, + priority_hints, + }) + } + + /// Batch check gitignore status + fn check_gitignore_batch(&self, path: &Path, project_root: &Path) -> bool { + // Quick check using git check-ignore + let output = Command::new("git") + .args(&["check-ignore", path.to_str().unwrap_or("")]) + .current_dir(project_root) + .output(); + + match output { + Ok(output) => output.status.success(), + Err(_) => false, + } + } + + /// Check if file should be included based on filters + fn should_include_file(&self, meta: &FileMetadata) -> bool { + // Size filter + if meta.size > self.config.max_file_size { + trace!("Skipping large file: {} ({} bytes)", meta.path.display(), meta.size); + return false; + } + + // Binary file detection (simple heuristic) + if let Some(ext) = &meta.extension { + let binary_extensions = ["exe", "dll", "so", "dylib", "jpg", "png", "gif", "mp4", "zip", "tar", "gz"]; + if binary_extensions.contains(&ext.as_str()) { + return false; + } + } + + // Exclude files that are unlikely to contain real secrets + if self.should_exclude_from_security_scan(meta) { + trace!("Excluding from security scan: {}", meta.path.display()); + return false; + } + + // Critical files always included + if meta.is_critical() { + return true; + } + + // Scan mode specific filtering + match self.config.scan_mode { + ScanMode::Lightning => { + // Only critical files (already handled above) + false + } + ScanMode::Fast => { + // Priority files or small source files + meta.is_priority() || (meta.priority_hints.is_source_file && meta.size < 50_000) + } + _ => true, // Include all for other modes + } + } + + /// Check if file should be excluded from security scanning + fn should_exclude_from_security_scan(&self, meta: &FileMetadata) -> bool { + let path_str = meta.path.to_string_lossy().to_lowercase(); + + // DEPENDENCY LOCK FILES - These contain package hashes/metadata, not secrets + if self.is_dependency_lock_file(meta) { + return true; + } + + // Documentation and non-code files that rarely contain real secrets + let exclude_patterns = [ + ".md", ".txt", ".rst", ".adoc", ".asciidoc", + "readme", "changelog", "license", "todo", + "roadmap", "contributing", "authors", + // Test files (often contain fake/example data) + "/test/", "/tests/", "/spec/", "/specs/", + "__test__", "__spec__", ".test.", ".spec.", + "_test.", "_spec.", "fixtures", "mocks", "examples", + // Documentation directories + "/docs/", "/doc/", "/documentation/", + // Framework/library detection files (they contain patterns but not secrets) + "frameworks/", "detector", "rules", "patterns", + // Build artifacts + "target/", "build/", "dist/", ".next/", "coverage/", + ]; + + // Check patterns + if exclude_patterns.iter().any(|&pattern| path_str.contains(pattern)) { + return true; + } + + // Documentation file extensions + if let Some(ext) = &meta.extension { + let doc_extensions = ["md", "txt", "rst", "adoc", "asciidoc"]; + if doc_extensions.contains(&ext.as_str()) { + return true; + } + } + + // Check if filename suggests it's documentation or examples + let filename = meta.path.file_name() + .and_then(|n| n.to_str()) + .unwrap_or("") + .to_lowercase(); + + let doc_filenames = [ + "readme", "changelog", "license", "authors", "contributing", + "roadmap", "todo", "examples", "demo", "sample", + ]; + + if doc_filenames.iter().any(|&name| filename.contains(name)) { + return true; + } + + false + } + + /// Get ignored directories based on scan mode + fn get_ignored_dirs(scan_mode: &ScanMode) -> AHashSet { + let mut dirs = AHashSet::new(); + + // Always ignore these + let always_ignore = vec![ + ".git", "node_modules", "target", "build", "dist", ".next", + "coverage", "__pycache__", ".pytest_cache", ".mypy_cache", + "vendor", "packages", ".bundle", "bower_components", + ]; + + for dir in always_ignore { + dirs.insert(dir.to_string()); + } + + // Additional ignores for faster modes + if matches!(scan_mode, ScanMode::Lightning | ScanMode::Fast) { + let fast_ignore = vec!["test", "tests", "spec", "specs", "docs", "documentation"]; + for dir in fast_ignore { + dirs.insert(dir.to_string()); + } + } + + dirs + } + + /// Get secret keywords for detection + fn get_secret_keywords() -> Vec<&'static str> { + vec![ + "secret", "key", "token", "password", "credential", + "auth", "api", "private", "access", "bearer", + ] + } + + fn is_config_file(&self, name: &str, extension: &Option) -> bool { + let config_extensions = ["json", "yml", "yaml", "toml", "ini", "conf", "config", "xml"]; + let config_names = ["config", "settings", "configuration", ".env"]; + + if let Some(ext) = extension { + if config_extensions.contains(&ext.as_str()) { + return true; + } + } + + config_names.iter().any(|&n| name.contains(n)) + } + + fn is_secret_file(&self, name: &str, path: &Path) -> bool { + let secret_patterns = [ + ".env", ".key", ".pem", ".p12", ".pfx", + "credentials", "secret", "private", "cert", + ]; + + // Check filename + if secret_patterns.iter().any(|&p| name.contains(p)) { + return true; + } + + // Check path components + let path_str = path.to_string_lossy().to_lowercase(); + secret_patterns.iter().any(|&p| path_str.contains(p)) + } + + fn is_source_file(&self, extension: &Option) -> bool { + if let Some(ext) = extension { + let source_extensions = [ + "js", "jsx", "ts", "tsx", "py", "java", "kt", "go", + "rs", "rb", "php", "cs", "cpp", "c", "h", "swift", + "scala", "clj", "ex", "exs", + ]; + source_extensions.contains(&ext.as_str()) + } else { + false + } + } + + fn has_secret_keywords(&self, name: &str) -> bool { + self.secret_keywords.iter().any(|&keyword| name.contains(keyword)) + } + + /// Check if file is a dependency lock file (contains hashes/metadata, not secrets) + fn is_dependency_lock_file(&self, meta: &FileMetadata) -> bool { + let filename = meta.path.file_name() + .and_then(|n| n.to_str()) + .unwrap_or("") + .to_lowercase(); + + // Common dependency lock files that contain package hashes and metadata + let lock_files = [ + // JavaScript/Node.js + "package-lock.json", + "yarn.lock", + "pnpm-lock.yaml", // <-- This was missing! + "shrinkwrap.yaml", + "npm-shrinkwrap.json", + // Python + "poetry.lock", + "pipfile.lock", + "pip-lock.txt", + // Rust + "cargo.lock", + // Go + "go.sum", + "go.mod", + // Java + "gradle.lockfile", + "maven-dependency-plugin.log", + // Ruby + "gemfile.lock", + // PHP + "composer.lock", + // .NET + "packages.lock.json", + "paket.lock", + // Others + "mix.lock", // Elixir + "pubspec.lock", // Dart + ]; + + // Check if filename matches any lock file pattern + lock_files.iter().any(|&pattern| filename == pattern) || + // Also check for common lock file patterns + filename.ends_with(".lock") || + filename.ends_with("-lock.json") || + filename.ends_with("-lock.yaml") || + filename.ends_with("-lock.yml") || + filename.contains("shrinkwrap") || + filename.contains("lockfile") + } +} + +impl FileMetadata { + /// Check if file is critical (must scan) + pub fn is_critical(&self) -> bool { + self.priority_hints.is_env_file || + self.priority_hints.is_secret_file || + self.extension.as_deref() == Some("pem") || + self.extension.as_deref() == Some("key") + } + + /// Check if file is high priority + pub fn is_priority(&self) -> bool { + self.is_critical() || + self.priority_hints.is_config_file || + self.priority_hints.has_secret_keywords + } + + /// Calculate priority score (higher = more important) + pub fn priority_score(&self) -> u32 { + let mut score: u32 = 0; + + if self.priority_hints.is_env_file { score += 1000; } + if self.priority_hints.is_secret_file { score += 900; } + if self.priority_hints.is_config_file { score += 500; } + if self.priority_hints.has_secret_keywords { score += 300; } + if !self.is_gitignored { score += 200; } + if self.priority_hints.is_source_file { score += 100; } + + // Penalize large files + if self.size > 1_000_000 { score = score.saturating_sub(100); } + + score + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + #[test] + fn test_file_priority_scoring() { + let meta = FileMetadata { + path: PathBuf::from(".env"), + size: 100, + extension: Some("env".to_string()), + is_gitignored: false, + modified: SystemTime::now(), + priority_hints: PriorityHints { + is_env_file: true, + is_config_file: true, + is_secret_file: true, + is_source_file: false, + has_secret_keywords: true, + }, + }; + + assert!(meta.is_critical()); + assert!(meta.is_priority()); + assert!(meta.priority_score() > 2000); + } + + #[test] + fn test_file_discovery() { + let temp_dir = TempDir::new().unwrap(); + fs::write(temp_dir.path().join(".env"), "SECRET=123").unwrap(); + fs::write(temp_dir.path().join("config.json"), "{}").unwrap(); + fs::create_dir(temp_dir.path().join("node_modules")).unwrap(); + fs::write(temp_dir.path().join("node_modules/test.js"), "code").unwrap(); + + let config = DiscoveryConfig { + use_git: false, + max_file_size: 1024 * 1024, + priority_extensions: vec!["env".to_string()], + scan_mode: ScanMode::Fast, + }; + + let discovery = FileDiscovery::new(config); + let files = discovery.discover_files(temp_dir.path()).unwrap(); + + // Should find .env and config.json but not node_modules/test.js + assert_eq!(files.len(), 2); + assert!(files.iter().any(|f| f.path.ends_with(".env"))); + assert!(files.iter().any(|f| f.path.ends_with("config.json"))); + } +} \ No newline at end of file diff --git a/src/analyzer/security/turbo/mod.rs b/src/analyzer/security/turbo/mod.rs new file mode 100644 index 00000000..707e395a --- /dev/null +++ b/src/analyzer/security/turbo/mod.rs @@ -0,0 +1,390 @@ +//! # Turbo Security Analyzer +//! +//! High-performance security analyzer that's 10-100x faster than traditional approaches. +//! Uses advanced techniques like multi-pattern matching, memory-mapped I/O, and intelligent filtering. + +use std::path::Path; +use std::sync::Arc; +use std::time::Instant; + +use crossbeam::channel::bounded; + +use rayon::prelude::*; +use log::{info, debug, trace}; + +pub mod file_discovery; +pub mod pattern_engine; +pub mod cache; +pub mod scanner; +pub mod results; + +use file_discovery::{FileDiscovery, FileMetadata, DiscoveryConfig}; +use pattern_engine::PatternEngine; +use cache::SecurityCache; +use scanner::{FileScanner, ScanTask, ScanResult}; +use results::{ResultAggregator, SecurityReport}; + +use crate::analyzer::security::SecurityFinding; + +/// Turbo security analyzer configuration +#[derive(Debug, Clone)] +pub struct TurboConfig { + /// Scanning mode determines speed vs thoroughness tradeoff + pub scan_mode: ScanMode, + + /// Maximum file size to scan (in bytes) + pub max_file_size: usize, + + /// Number of worker threads (0 = auto-detect) + pub worker_threads: usize, + + /// Enable memory mapping for large files + pub use_mmap: bool, + + /// Cache configuration + pub enable_cache: bool, + pub cache_size_mb: usize, + + /// Early termination + pub max_critical_findings: Option, + pub timeout_seconds: Option, + + /// File filtering + pub skip_gitignored: bool, + pub priority_extensions: Vec, + + /// Pattern configuration + pub pattern_sets: Vec, +} + +/// Scanning modes with different speed/accuracy tradeoffs +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ScanMode { + /// Ultra-fast: Critical files only (.env, configs), basic patterns + Lightning, + + /// Fast: Smart sampling, priority patterns, skip large files + Fast, + + /// Balanced: Good coverage with performance optimizations + Balanced, + + /// Thorough: Full scan with all patterns (still optimized) + Thorough, + + /// Paranoid: Everything including experimental patterns + Paranoid, +} + +impl Default for TurboConfig { + fn default() -> Self { + Self { + scan_mode: ScanMode::Balanced, + max_file_size: 10 * 1024 * 1024, // 10MB + worker_threads: 0, // Auto-detect + use_mmap: true, + enable_cache: true, + cache_size_mb: 100, + max_critical_findings: None, + timeout_seconds: None, + skip_gitignored: true, + priority_extensions: vec![ + "env".to_string(), + "key".to_string(), + "pem".to_string(), + "json".to_string(), + "yml".to_string(), + "yaml".to_string(), + "toml".to_string(), + "ini".to_string(), + "conf".to_string(), + "config".to_string(), + ], + pattern_sets: vec!["default".to_string()], + } + } +} + +/// High-performance security analyzer +pub struct TurboSecurityAnalyzer { + config: TurboConfig, + pattern_engine: Arc, + cache: Arc, + file_discovery: Arc, +} + +impl TurboSecurityAnalyzer { + /// Create a new turbo security analyzer + pub fn new(config: TurboConfig) -> Result { + let start = Instant::now(); + + // Initialize pattern engine with compiled patterns + let pattern_engine = Arc::new(PatternEngine::new(&config)?); + info!("Pattern engine initialized with {} patterns in {:?}", + pattern_engine.pattern_count(), start.elapsed()); + + // Initialize cache + let cache = Arc::new(SecurityCache::new(config.cache_size_mb)); + + // Initialize file discovery + let discovery_config = DiscoveryConfig { + use_git: config.skip_gitignored, + max_file_size: config.max_file_size, + priority_extensions: config.priority_extensions.clone(), + scan_mode: config.scan_mode, + }; + let file_discovery = Arc::new(FileDiscovery::new(discovery_config)); + + Ok(Self { + config, + pattern_engine, + cache, + file_discovery, + }) + } + + /// Analyze a project with turbo performance + pub fn analyze_project(&self, project_root: &Path) -> Result { + let start = Instant::now(); + info!("πŸš€ Starting turbo security analysis for: {}", project_root.display()); + + // Phase 1: Ultra-fast file discovery + let discovery_start = Instant::now(); + let files = self.file_discovery.discover_files(project_root)?; + info!("πŸ“ Discovered {} files in {:?}", files.len(), discovery_start.elapsed()); + + // Early exit if no files + if files.is_empty() { + return Ok(SecurityReport::empty()); + } + + // Phase 2: Intelligent filtering and prioritization + let filtered_files = self.filter_and_prioritize_files(files); + info!("🎯 Filtered to {} high-priority files", filtered_files.len()); + + // Phase 3: Parallel scanning with work-stealing + let scan_start = Instant::now(); + let findings = self.parallel_scan(filtered_files)?; + info!("πŸ” Scanned files in {:?}, found {} findings", + scan_start.elapsed(), findings.len()); + + // Phase 4: Result aggregation and report generation + let report = ResultAggregator::aggregate(findings, start.elapsed()); + + info!("βœ… Turbo analysis completed in {:?}", start.elapsed()); + Ok(report) + } + + /// Filter and prioritize files based on scan mode and heuristics + fn filter_and_prioritize_files(&self, files: Vec) -> Vec { + use ScanMode::*; + + let mut filtered: Vec = match self.config.scan_mode { + Lightning => { + // Ultra-fast: Only critical files + files.into_iter() + .filter(|f| f.is_critical()) + .take(100) // Hard limit for speed + .collect() + } + Fast => { + // Fast: Priority files + sample of others + let (priority, others): (Vec<_>, Vec<_>) = files.into_iter() + .partition(|f| f.is_priority()); + + let mut result = priority; + // Sample 20% of other files + let sample_size = others.len() / 5; + result.extend(others.into_iter().take(sample_size)); + result + } + Balanced => { + // Balanced: All priority files + 50% of others + let (priority, others): (Vec<_>, Vec<_>) = files.into_iter() + .partition(|f| f.is_priority()); + + let mut result = priority; + let sample_size = others.len() / 2; + result.extend(others.into_iter().take(sample_size)); + result + } + Thorough => { + // Thorough: All files except huge ones + files.into_iter() + .filter(|f| f.size < self.config.max_file_size) + .collect() + } + Paranoid => { + // Paranoid: Everything + files + } + }; + + // Sort by priority score (critical files first) + filtered.par_sort_by_key(|f| std::cmp::Reverse(f.priority_score())); + filtered + } + + /// Parallel scan with work-stealing and early termination + fn parallel_scan(&self, files: Vec) -> Result, SecurityError> { + let thread_count = if self.config.worker_threads == 0 { + num_cpus::get() + } else { + self.config.worker_threads + }; + + // Create channels for work distribution + let (task_sender, task_receiver) = bounded::(thread_count * 10); + let (result_sender, result_receiver) = bounded::(thread_count * 10); + + // Atomic counter for early termination + let critical_count = Arc::new(parking_lot::Mutex::new(0)); + let should_terminate = Arc::new(parking_lot::RwLock::new(false)); + + // Spawn scanner threads + let scanner_handles: Vec<_> = (0..thread_count) + .map(|thread_id| { + let scanner = FileScanner::new( + thread_id, + Arc::clone(&self.pattern_engine), + Arc::clone(&self.cache), + self.config.use_mmap, + ); + + let task_receiver = task_receiver.clone(); + let result_sender = result_sender.clone(); + let critical_count = Arc::clone(&critical_count); + let should_terminate = Arc::clone(&should_terminate); + let max_critical = self.config.max_critical_findings; + + std::thread::spawn(move || { + scanner.run( + task_receiver, + result_sender, + critical_count, + should_terminate, + max_critical, + ) + }) + }) + .collect(); + + // Drop original receiver to signal completion + drop(task_receiver); + + // Send scan tasks + let task_sender_thread = { + let task_sender = task_sender.clone(); + let should_terminate = Arc::clone(&should_terminate); + + std::thread::spawn(move || { + for (idx, file) in files.into_iter().enumerate() { + // Check for early termination + if *should_terminate.read() { + debug!("Early termination triggered, stopping task distribution"); + break; + } + + let task = ScanTask { + id: idx, + file, + quick_reject: idx > 1000, // Quick reject for files after first 1000 + }; + + if task_sender.send(task).is_err() { + break; // Channel closed + } + } + }) + }; + + // Drop original sender to signal completion + drop(task_sender); + drop(result_sender); + + // Collect results + let mut all_findings = Vec::new(); + let mut files_scanned = 0; + let mut files_skipped = 0; + + while let Ok(result) = result_receiver.recv() { + match result { + ScanResult::Findings(findings) => { + all_findings.extend(findings); + files_scanned += 1; + } + ScanResult::Skipped => { + files_skipped += 1; + } + ScanResult::Error(err) => { + debug!("Scan error: {}", err); + } + } + + // Progress reporting every 100 files + if (files_scanned + files_skipped) % 100 == 0 { + trace!("Progress: {} scanned, {} skipped", files_scanned, files_skipped); + } + } + + // Wait for threads to complete + task_sender_thread.join().unwrap(); + for handle in scanner_handles { + handle.join().unwrap(); + } + + info!("Scan complete: {} files scanned, {} skipped, {} findings", + files_scanned, files_skipped, all_findings.len()); + + Ok(all_findings) + } +} + +#[derive(Debug, thiserror::Error)] +pub enum SecurityError { + #[error("Pattern engine error: {0}")] + PatternEngine(String), + + #[error("File discovery error: {0}")] + FileDiscovery(String), + + #[error("IO error: {0}")] + Io(#[from] std::io::Error), + + #[error("Cache error: {0}")] + Cache(String), +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + use std::fs; + + #[test] + fn test_turbo_analyzer_creation() { + let config = TurboConfig::default(); + let analyzer = TurboSecurityAnalyzer::new(config); + assert!(analyzer.is_ok()); + } + + #[test] + fn test_scan_modes() { + let temp_dir = TempDir::new().unwrap(); + + // Create test files + fs::write(temp_dir.path().join(".env"), "API_KEY=secret123").unwrap(); + fs::write(temp_dir.path().join("config.json"), r#"{"key": "value"}"#).unwrap(); + fs::write(temp_dir.path().join("main.rs"), "fn main() {}").unwrap(); + + // Test Lightning mode (should only scan critical files) + let mut config = TurboConfig::default(); + config.scan_mode = ScanMode::Lightning; + + let analyzer = TurboSecurityAnalyzer::new(config).unwrap(); + let report = analyzer.analyze_project(temp_dir.path()).unwrap(); + + // Should find the .env file + assert!(report.total_findings > 0); + } +} \ No newline at end of file diff --git a/src/analyzer/security/turbo/pattern_engine.rs b/src/analyzer/security/turbo/pattern_engine.rs new file mode 100644 index 00000000..95629636 --- /dev/null +++ b/src/analyzer/security/turbo/pattern_engine.rs @@ -0,0 +1,552 @@ +//! # Pattern Engine Module +//! +//! Ultra-fast multi-pattern matching using Aho-Corasick algorithm and compiled regex sets. + +use std::sync::Arc; +use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind}; +use regex::Regex; +use ahash::AHashMap; +use log::debug; + +use super::{TurboConfig, SecurityError}; +use crate::analyzer::security::{SecuritySeverity, SecurityCategory}; + +/// A compiled pattern for ultra-fast matching +#[derive(Debug, Clone)] +pub struct CompiledPattern { + pub id: String, + pub name: String, + pub severity: SecuritySeverity, + pub category: SecurityCategory, + pub description: String, + pub remediation: Vec, + pub references: Vec, + pub cwe_id: Option, + pub confidence_boost_keywords: Vec, + pub false_positive_keywords: Vec, +} + +/// Pattern match result +#[derive(Debug, Clone)] +pub struct PatternMatch { + pub pattern: Arc, + pub line_number: usize, + pub column_number: usize, + pub evidence: String, + pub confidence: f32, +} + +/// High-performance pattern matching engine +pub struct PatternEngine { + // Multi-pattern matchers + secret_matcher: AhoCorasick, + env_var_matcher: AhoCorasick, + api_key_matcher: AhoCorasick, + + // Pattern lookup maps + secret_patterns: AHashMap>, + env_var_patterns: AHashMap>, + api_key_patterns: AHashMap>, + + // Specialized matchers for complex patterns + complex_patterns: Vec<(Regex, Arc)>, + + // Performance counters + total_patterns: usize, +} + +impl PatternEngine { + pub fn new(config: &TurboConfig) -> Result { + debug!("Initializing pattern engine with pattern sets: {:?}", config.pattern_sets); + + // Load patterns based on configuration + let (secret_patterns, env_var_patterns, api_key_patterns, complex_patterns) = + Self::load_patterns(&config.pattern_sets)?; + + // Build Aho-Corasick matchers + let secret_matcher = Self::build_matcher(&secret_patterns)?; + let env_var_matcher = Self::build_matcher(&env_var_patterns)?; + let api_key_matcher = Self::build_matcher(&api_key_patterns)?; + + let total_patterns = secret_patterns.len() + env_var_patterns.len() + + api_key_patterns.len() + complex_patterns.len(); + + debug!("Pattern engine initialized with {} total patterns", total_patterns); + + Ok(Self { + secret_matcher, + env_var_matcher, + api_key_matcher, + secret_patterns: Self::create_pattern_map(secret_patterns), + env_var_patterns: Self::create_pattern_map(env_var_patterns), + api_key_patterns: Self::create_pattern_map(api_key_patterns), + complex_patterns, + total_patterns, + }) + } + + /// Get total pattern count + pub fn pattern_count(&self) -> usize { + self.total_patterns + } + + /// Scan content for all patterns + pub fn scan_content(&self, content: &str, quick_reject: bool) -> Vec { + // Quick reject using Boyer-Moore substring search + if quick_reject && !self.quick_contains_secrets(content) { + return Vec::new(); + } + + let mut matches = Vec::new(); + + // Split content into lines for line number tracking + let lines: Vec<&str> = content.lines().collect(); + let mut line_offsets = vec![0]; + let mut offset = 0; + + for line in &lines { + offset += line.len() + 1; // +1 for newline + line_offsets.push(offset); + } + + // Run multi-pattern matchers + matches.extend(self.run_matcher(&self.secret_matcher, content, &self.secret_patterns, &lines, &line_offsets)); + matches.extend(self.run_matcher(&self.env_var_matcher, content, &self.env_var_patterns, &lines, &line_offsets)); + matches.extend(self.run_matcher(&self.api_key_matcher, content, &self.api_key_patterns, &lines, &line_offsets)); + + // Run complex patterns (regex-based) + for (line_num, line) in lines.iter().enumerate() { + for (regex, pattern) in &self.complex_patterns { + if let Some(mat) = regex.find(line) { + let confidence = self.calculate_confidence(line, content, &pattern); + + matches.push(PatternMatch { + pattern: Arc::clone(pattern), + line_number: line_num + 1, + column_number: mat.start() + 1, + evidence: self.extract_evidence(line, mat.start(), mat.end()), + confidence, + }); + } + } + } + + // Intelligent confidence filtering - adaptive threshold based on pattern type + matches.retain(|m| { + let threshold = match m.pattern.id.as_str() { + id if id.contains("aws-access-key") || id.contains("openai-api-key") => 0.3, // High-confidence patterns + id if id.contains("jwt-token") || id.contains("database-url") => 0.5, // Medium confidence patterns + id if id.contains("generic") => 0.7, // Generic patterns need higher confidence + _ => 0.6, // Default threshold + }; + m.confidence > threshold + }); + + matches + } + + /// Quick check if content might contain secrets + fn quick_contains_secrets(&self, content: &str) -> bool { + // Common secret indicators (optimized for speed) + const QUICK_PATTERNS: &[&str] = &[ + "api", "key", "secret", "token", "password", "credential", + "auth", "private", "-----BEGIN", "sk_", "pk_", "eyJ", + ]; + + let content_lower = content.to_lowercase(); + QUICK_PATTERNS.iter().any(|&pattern| content_lower.contains(pattern)) + } + + /// Run Aho-Corasick matcher and collect results + fn run_matcher( + &self, + matcher: &AhoCorasick, + content: &str, + patterns: &AHashMap>, + lines: &[&str], + line_offsets: &[usize], + ) -> Vec { + let mut matches = Vec::new(); + + for mat in matcher.find_iter(content) { + let pattern_id = mat.pattern().as_usize(); + if let Some(pattern) = patterns.get(&pattern_id) { + // Find line and column + let (line_num, col_num) = self.offset_to_line_col(mat.start(), line_offsets); + let line = lines.get(line_num.saturating_sub(1)).unwrap_or(&""); + + let confidence = self.calculate_confidence(line, content, pattern); + + matches.push(PatternMatch { + pattern: Arc::clone(pattern), + line_number: line_num, + column_number: col_num, + evidence: self.extract_evidence(line, mat.start(), mat.end()), + confidence, + }); + } + } + + matches + } + + /// Convert byte offset to line and column numbers + fn offset_to_line_col(&self, offset: usize, line_offsets: &[usize]) -> (usize, usize) { + let line_num = line_offsets.binary_search(&offset) + .unwrap_or_else(|i| i.saturating_sub(1)); + + let line_start = line_offsets.get(line_num).copied().unwrap_or(0); + let col_num = offset - line_start + 1; + + (line_num + 1, col_num) + } + + /// Calculate confidence score for a match + fn calculate_confidence(&self, line: &str, content: &str, pattern: &CompiledPattern) -> f32 { + let mut confidence: f32 = 0.6; + + let line_lower = line.to_lowercase(); + let content_lower = content.to_lowercase(); + + // Basic false positive detection + if line_lower.starts_with("//") || line_lower.starts_with("#") || line_lower.contains("example") || + line_lower.contains("placeholder") || line_lower.contains("your_") || line_lower.contains("todo") { + return 0.0; // Skip obvious examples/docs + } + + // Boost confidence for actual assignments + if line.contains("=") || line.contains(":") { + confidence += 0.2; + } + + // Check pattern-specific keywords + for keyword in &pattern.confidence_boost_keywords { + if content_lower.contains(&keyword.to_lowercase()) { + confidence += 0.1; + } + } + + for keyword in &pattern.false_positive_keywords { + if line_lower.contains(&keyword.to_lowercase()) { + confidence -= 0.4; + } + } + + confidence.clamp(0.0, 1.0) + } + + + + /// Extract evidence with context + fn extract_evidence(&self, line: &str, start: usize, end: usize) -> String { + // Mask the actual secret value + let prefix = &line[..start.min(line.len())]; + let suffix = &line[end.min(line.len())..]; + let masked = "*".repeat((end - start).min(20)); + + format!("{}{}{}", prefix, masked, suffix).trim().to_string() + } + + /// Build Aho-Corasick matcher from patterns + fn build_matcher(patterns: &[(String, Arc)]) -> Result { + let strings: Vec<&str> = patterns.iter().map(|(s, _)| s.as_str()).collect(); + + let matcher = AhoCorasickBuilder::new() + .match_kind(MatchKind::LeftmostFirst) + .ascii_case_insensitive(true) + .build(&strings) + .map_err(|e| SecurityError::PatternEngine(format!("Failed to build matcher: {}", e)))?; + + Ok(matcher) + } + + /// Create pattern lookup map + fn create_pattern_map(patterns: Vec<(String, Arc)>) -> AHashMap> { + patterns.into_iter() + .enumerate() + .map(|(id, (_, pattern))| (id, pattern)) + .collect() + } + + /// Load patterns based on pattern sets + fn load_patterns(pattern_sets: &[String]) -> Result<( + Vec<(String, Arc)>, + Vec<(String, Arc)>, + Vec<(String, Arc)>, + Vec<(Regex, Arc)>, + ), SecurityError> { + let mut secret_patterns = Vec::new(); + let mut env_var_patterns = Vec::new(); + let mut api_key_patterns = Vec::new(); + let mut complex_patterns = Vec::new(); + + // Load default patterns + if pattern_sets.contains(&"default".to_string()) { + Self::load_default_patterns(&mut secret_patterns, &mut env_var_patterns, + &mut api_key_patterns, &mut complex_patterns)?; + } + + // Load additional pattern sets + for set in pattern_sets { + match set.as_str() { + "aws" => Self::load_aws_patterns(&mut api_key_patterns)?, + "gcp" => Self::load_gcp_patterns(&mut api_key_patterns)?, + "azure" => Self::load_azure_patterns(&mut api_key_patterns)?, + "crypto" => Self::load_crypto_patterns(&mut secret_patterns)?, + _ => {} + } + } + + Ok((secret_patterns, env_var_patterns, api_key_patterns, complex_patterns)) + } + + /// Load default security patterns - focused on ACTUAL secrets, not references + fn load_default_patterns( + secret_patterns: &mut Vec<(String, Arc)>, + env_var_patterns: &mut Vec<(String, Arc)>, + api_key_patterns: &mut Vec<(String, Arc)>, + complex_patterns: &mut Vec<(Regex, Arc)>, + ) -> Result<(), SecurityError> { + // ONLY detect actual API key values, not variable names + + // OpenAI API Keys - actual key format + api_key_patterns.push(( + "sk-".to_string(), + Arc::new(CompiledPattern { + id: "openai-api-key".to_string(), + name: "OpenAI API Key".to_string(), + severity: SecuritySeverity::Critical, + category: SecurityCategory::SecretsExposure, + description: "OpenAI API key detected".to_string(), + remediation: vec![ + "Remove API key from source code".to_string(), + "Use environment variables".to_string(), + ], + references: vec!["https://platform.openai.com/docs/api-reference".to_string()], + cwe_id: Some("CWE-798".to_string()), + confidence_boost_keywords: vec!["openai".to_string(), "gpt".to_string()], + false_positive_keywords: vec![ + "sk-xxxxxxxx".to_string(), "sk-...".to_string(), "sk_test".to_string(), + "example".to_string(), "placeholder".to_string(), "your_".to_string(), + "TODO".to_string(), "FIXME".to_string(), "XXX".to_string(), + ], + }), + )); + + // Complex regex patterns for ACTUAL secret assignments with values + complex_patterns.push(( + // Only match when there's an actual long value, not just variable names + Regex::new(r#"(?i)(?:api[_-]?key|secret[_-]?key|access[_-]?token)\s*[:=]\s*['"]([a-zA-Z0-9+/=]{32,})['"]"#) + .map_err(|e| SecurityError::PatternEngine(format!("Regex error: {}", e)))?, + Arc::new(CompiledPattern { + id: "long-secret-value".to_string(), + name: "Hardcoded Secret Value".to_string(), + severity: SecuritySeverity::Critical, + category: SecurityCategory::SecretsExposure, + description: "Long secret value hardcoded in source code".to_string(), + remediation: vec![ + "Use environment variables for secrets".to_string(), + "Implement proper secret management".to_string(), + ], + references: vec![], + cwe_id: Some("CWE-798".to_string()), + confidence_boost_keywords: vec!["bearer".to_string(), "auth".to_string()], + false_positive_keywords: vec![ + "process.env".to_string(), "getenv".to_string(), "example".to_string(), + "placeholder".to_string(), "your_".to_string(), "TODO".to_string(), + "test".to_string(), "demo".to_string(), "fake".to_string(), + ], + }), + )); + + // JWT tokens (actual token format) + complex_patterns.push(( + Regex::new(r#"\beyJ[a-zA-Z0-9+/=]{100,}\b"#) + .map_err(|e| SecurityError::PatternEngine(format!("Regex error: {}", e)))?, + Arc::new(CompiledPattern { + id: "jwt-token".to_string(), + name: "JWT Token".to_string(), + severity: SecuritySeverity::High, + category: SecurityCategory::SecretsExposure, + description: "JWT token detected in source code".to_string(), + remediation: vec![ + "Never hardcode JWT tokens".to_string(), + "Use secure token storage".to_string(), + ], + references: vec![], + cwe_id: Some("CWE-798".to_string()), + confidence_boost_keywords: vec!["bearer".to_string(), "authorization".to_string()], + false_positive_keywords: vec!["example".to_string(), "demo".to_string()], + }), + )); + + // Database connection strings with embedded credentials + complex_patterns.push(( + Regex::new(r#"(?i)(?:postgres|mysql|mongodb)://[^:\s]+:[^@\s]+@[^/\s]+/[^\s]*"#) + .map_err(|e| SecurityError::PatternEngine(format!("Regex error: {}", e)))?, + Arc::new(CompiledPattern { + id: "database-url-with-creds".to_string(), + name: "Database URL with Credentials".to_string(), + severity: SecuritySeverity::Critical, + category: SecurityCategory::SecretsExposure, + description: "Database connection string with embedded credentials".to_string(), + remediation: vec![ + "Use environment variables for database credentials".to_string(), + "Use connection string without embedded passwords".to_string(), + ], + references: vec![], + cwe_id: Some("CWE-798".to_string()), + confidence_boost_keywords: vec!["connection".to_string(), "database".to_string()], + false_positive_keywords: vec![ + "example.com".to_string(), "localhost".to_string(), "placeholder".to_string(), + "your_".to_string(), "user:pass".to_string(), + ], + }), + )); + + // Private SSH/SSL keys + secret_patterns.push(( + "-----BEGIN".to_string(), + Arc::new(CompiledPattern { + id: "private-key-header".to_string(), + name: "Private Key".to_string(), + severity: SecuritySeverity::Critical, + category: SecurityCategory::SecretsExposure, + description: "Private key detected".to_string(), + remediation: vec![ + "Never commit private keys to version control".to_string(), + "Use secure key storage solutions".to_string(), + ], + references: vec![], + cwe_id: Some("CWE-321".to_string()), + confidence_boost_keywords: vec!["PRIVATE".to_string(), "RSA".to_string(), "DSA".to_string()], + false_positive_keywords: vec!["PUBLIC".to_string(), "CERTIFICATE".to_string()], + }), + )); + + Ok(()) + } + + /// Load AWS-specific patterns + fn load_aws_patterns(api_key_patterns: &mut Vec<(String, Arc)>) -> Result<(), SecurityError> { + api_key_patterns.push(( + "AKIA".to_string(), + Arc::new(CompiledPattern { + id: "aws-access-key".to_string(), + name: "AWS Access Key".to_string(), + severity: SecuritySeverity::Critical, + category: SecurityCategory::SecretsExposure, + description: "AWS Access Key ID detected".to_string(), + remediation: vec![ + "Remove AWS credentials from source code".to_string(), + "Use IAM roles or environment variables".to_string(), + "Rotate the exposed key immediately".to_string(), + ], + references: vec!["https://docs.aws.amazon.com/security/".to_string()], + cwe_id: Some("CWE-798".to_string()), + confidence_boost_keywords: vec!["aws".to_string(), "s3".to_string(), "ec2".to_string()], + false_positive_keywords: vec!["AKIA00000000".to_string()], + }), + )); + + Ok(()) + } + + /// Load GCP-specific patterns + fn load_gcp_patterns(api_key_patterns: &mut Vec<(String, Arc)>) -> Result<(), SecurityError> { + api_key_patterns.push(( + "AIza".to_string(), + Arc::new(CompiledPattern { + id: "gcp-api-key".to_string(), + name: "Google Cloud API Key".to_string(), + severity: SecuritySeverity::High, + category: SecurityCategory::SecretsExposure, + description: "Google Cloud API key detected".to_string(), + remediation: vec![ + "Use service accounts instead of API keys".to_string(), + "Restrict API key usage by IP/referrer".to_string(), + ], + references: vec!["https://cloud.google.com/security/".to_string()], + cwe_id: Some("CWE-798".to_string()), + confidence_boost_keywords: vec!["google".to_string(), "gcp".to_string(), "firebase".to_string()], + false_positive_keywords: vec![], + }), + )); + + Ok(()) + } + + /// Load Azure-specific patterns + fn load_azure_patterns(_api_key_patterns: &mut Vec<(String, Arc)>) -> Result<(), SecurityError> { + // Azure patterns would go here + Ok(()) + } + + /// Load cryptocurrency-related patterns + fn load_crypto_patterns(secret_patterns: &mut Vec<(String, Arc)>) -> Result<(), SecurityError> { + secret_patterns.push(( + "-----BEGIN".to_string(), + Arc::new(CompiledPattern { + id: "private-key".to_string(), + name: "Private Key".to_string(), + severity: SecuritySeverity::Critical, + category: SecurityCategory::SecretsExposure, + description: "Private key detected".to_string(), + remediation: vec![ + "Never commit private keys to version control".to_string(), + "Use secure key storage solutions".to_string(), + ], + references: vec![], + cwe_id: Some("CWE-321".to_string()), + confidence_boost_keywords: vec!["RSA".to_string(), "PRIVATE".to_string()], + false_positive_keywords: vec!["PUBLIC".to_string()], + }), + )); + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_pattern_engine_creation() { + let config = TurboConfig::default(); + let engine = PatternEngine::new(&config); + assert!(engine.is_ok()); + + let engine = engine.unwrap(); + assert!(engine.pattern_count() > 0); + } + + #[test] + fn test_pattern_matching() { + let config = TurboConfig::default(); + let engine = PatternEngine::new(&config).unwrap(); + + let content = r#" + const apiKey = "sk-1234567890abcdef"; + password = "super_secret_password"; + process.env.DATABASE_URL + "#; + + let matches = engine.scan_content(content, false); + assert!(!matches.is_empty()); + + // Should find API key and password + assert!(matches.iter().any(|m| m.pattern.id == "openai-api-key")); + assert!(matches.iter().any(|m| m.pattern.id == "generic-password")); + } + + #[test] + fn test_quick_reject() { + let config = TurboConfig::default(); + let engine = PatternEngine::new(&config).unwrap(); + + let safe_content = "fn main() { println!(\"Hello, world!\"); }"; + let matches = engine.scan_content(safe_content, true); + assert!(matches.is_empty()); + } +} \ No newline at end of file diff --git a/src/analyzer/security/turbo/results.rs b/src/analyzer/security/turbo/results.rs new file mode 100644 index 00000000..24ba80dd --- /dev/null +++ b/src/analyzer/security/turbo/results.rs @@ -0,0 +1,403 @@ +//! # Results Module +//! +//! Aggregation and processing of security scan results. + +use std::collections::HashMap; +use std::time::Duration; + +use ahash::AHashMap; +use chrono::{DateTime, Utc}; +use serde::{Serialize, Deserialize}; + +use crate::analyzer::security::{SecurityFinding, SecuritySeverity, SecurityCategory}; +use super::SecurityError; + +/// Security analysis report with comprehensive metrics +#[derive(Debug, Serialize, Deserialize)] +pub struct SecurityReport { + pub analyzed_at: DateTime, + pub scan_duration: Duration, + pub overall_score: f32, + pub risk_level: SecuritySeverity, + pub total_findings: usize, + pub files_scanned: usize, + pub findings_by_severity: HashMap, + pub findings_by_category: HashMap, + pub findings: Vec, + pub recommendations: Vec, + pub performance_metrics: PerformanceMetrics, +} + +/// Performance metrics for the scan +#[derive(Debug, Serialize, Deserialize)] +pub struct PerformanceMetrics { + pub total_duration: Duration, + pub file_discovery_time: Duration, + pub pattern_matching_time: Duration, + pub files_per_second: f64, + pub cache_hit_rate: f64, + pub memory_usage_mb: f64, +} + +/// Result aggregator for combining and processing findings +pub struct ResultAggregator; + +impl ResultAggregator { + /// Aggregate findings into a comprehensive report + pub fn aggregate(mut findings: Vec, scan_duration: Duration) -> SecurityReport { + // Deduplicate findings + findings = Self::deduplicate_findings(findings); + + // Sort by severity (critical first) + findings.sort_by_key(|f| std::cmp::Reverse(severity_to_number(&f.severity))); + + // Calculate metrics + let total_findings = findings.len(); + let findings_by_severity = Self::count_by_severity(&findings); + let findings_by_category = Self::count_by_category(&findings); + let overall_score = Self::calculate_security_score(&findings); + let risk_level = Self::determine_risk_level(&findings); + + // Generate recommendations + let recommendations = Self::generate_recommendations(&findings); + + // Create performance metrics (placeholder values for now) + let performance_metrics = PerformanceMetrics { + total_duration: scan_duration, + file_discovery_time: Duration::from_millis(0), // TODO: Track actual time + pattern_matching_time: Duration::from_millis(0), // TODO: Track actual time + files_per_second: 0.0, // TODO: Calculate actual rate + cache_hit_rate: 0.0, // TODO: Get from cache stats + memory_usage_mb: 0.0, // TODO: Track memory usage + }; + + SecurityReport { + analyzed_at: Utc::now(), + scan_duration, + overall_score, + risk_level, + total_findings, + files_scanned: 0, // TODO: Track actual count + findings_by_severity, + findings_by_category, + findings, + recommendations, + performance_metrics, + } + } + + /// Create an empty report + pub fn empty() -> SecurityReport { + SecurityReport { + analyzed_at: Utc::now(), + scan_duration: Duration::from_secs(0), + overall_score: 100.0, + risk_level: SecuritySeverity::Info, + total_findings: 0, + files_scanned: 0, + findings_by_severity: HashMap::new(), + findings_by_category: HashMap::new(), + findings: Vec::new(), + recommendations: vec!["No security issues detected.".to_string()], + performance_metrics: PerformanceMetrics { + total_duration: Duration::from_secs(0), + file_discovery_time: Duration::from_secs(0), + pattern_matching_time: Duration::from_secs(0), + files_per_second: 0.0, + cache_hit_rate: 0.0, + memory_usage_mb: 0.0, + }, + } + } + + /// Deduplicate findings based on content similarity + fn deduplicate_findings(findings: Vec) -> Vec { + let mut seen: AHashMap = AHashMap::new(); + + for finding in findings { + // Create a deduplication key + let key = format!( + "{}-{}-{}-{}", + finding.id, + finding.file_path.as_ref().map(|p| p.display().to_string()).unwrap_or_default(), + finding.line_number.unwrap_or(0), + finding.title + ); + + // Keep the finding with the highest severity + match seen.get(&key) { + Some(existing) if severity_to_number(&existing.severity) >= severity_to_number(&finding.severity) => { + // Keep existing + } + _ => { + seen.insert(key, finding); + } + } + } + + seen.into_values().collect() + } + + /// Count findings by severity + fn count_by_severity(findings: &[SecurityFinding]) -> HashMap { + let mut counts = HashMap::new(); + for finding in findings { + *counts.entry(finding.severity.clone()).or_insert(0) += 1; + } + counts + } + + /// Count findings by category + fn count_by_category(findings: &[SecurityFinding]) -> HashMap { + let mut counts = HashMap::new(); + for finding in findings { + *counts.entry(finding.category.clone()).or_insert(0) += 1; + } + counts + } + + /// Calculate overall security score (0-100) + fn calculate_security_score(findings: &[SecurityFinding]) -> f32 { + if findings.is_empty() { + return 100.0; + } + + let total_penalty: f32 = findings.iter().map(|f| match f.severity { + SecuritySeverity::Critical => 25.0, + SecuritySeverity::High => 15.0, + SecuritySeverity::Medium => 8.0, + SecuritySeverity::Low => 3.0, + SecuritySeverity::Info => 1.0, + }).sum(); + + (100.0 - total_penalty).max(0.0) + } + + /// Determine overall risk level + fn determine_risk_level(findings: &[SecurityFinding]) -> SecuritySeverity { + if findings.iter().any(|f| f.severity == SecuritySeverity::Critical) { + SecuritySeverity::Critical + } else if findings.iter().any(|f| f.severity == SecuritySeverity::High) { + SecuritySeverity::High + } else if findings.iter().any(|f| f.severity == SecuritySeverity::Medium) { + SecuritySeverity::Medium + } else if !findings.is_empty() { + SecuritySeverity::Low + } else { + SecuritySeverity::Info + } + } + + /// Generate recommendations based on findings + fn generate_recommendations(findings: &[SecurityFinding]) -> Vec { + let mut recommendations = Vec::new(); + + // Check for unprotected secrets + if findings.iter().any(|f| f.category == SecurityCategory::SecretsExposure && !f.file_path.as_ref().map(|p| p.to_string_lossy().contains(".gitignore")).unwrap_or(false)) { + recommendations.push("πŸ” Implement comprehensive secret management:".to_string()); + recommendations.push(" β€’ Add sensitive files to .gitignore immediately".to_string()); + recommendations.push(" β€’ Use environment variables for all secrets".to_string()); + recommendations.push(" β€’ Consider using a secure vault service (e.g., HashiCorp Vault)".to_string()); + } + + // Check for critical findings + let critical_count = findings.iter().filter(|f| f.severity == SecuritySeverity::Critical).count(); + if critical_count > 0 { + recommendations.push(format!("🚨 Address {} CRITICAL security issues immediately", critical_count)); + recommendations.push(" β€’ Review and rotate any exposed credentials".to_string()); + recommendations.push(" β€’ Check git history for committed secrets".to_string()); + } + + // Framework-specific recommendations + if findings.iter().any(|f| f.description.contains("React") || f.description.contains("Next.js")) { + recommendations.push("βš›οΈ React/Next.js Security:".to_string()); + recommendations.push(" β€’ Use NEXT_PUBLIC_ prefix only for truly public values".to_string()); + recommendations.push(" β€’ Keep sensitive API keys server-side only".to_string()); + } + + // Database security + if findings.iter().any(|f| f.title.contains("Database") || f.title.contains("SQL")) { + recommendations.push("πŸ—„οΈ Database Security:".to_string()); + recommendations.push(" β€’ Use connection pooling with encrypted credentials".to_string()); + recommendations.push(" β€’ Implement least-privilege database access".to_string()); + recommendations.push(" β€’ Enable SSL/TLS for database connections".to_string()); + } + + // General best practices + recommendations.push("\nπŸ“‹ General Security Best Practices:".to_string()); + recommendations.push(" β€’ Enable automated security scanning in CI/CD".to_string()); + recommendations.push(" β€’ Regularly update dependencies".to_string()); + recommendations.push(" β€’ Implement security headers".to_string()); + recommendations.push(" β€’ Use HTTPS everywhere".to_string()); + + recommendations + } +} + +/// Convert severity to numeric value for sorting +fn severity_to_number(severity: &SecuritySeverity) -> u8 { + match severity { + SecuritySeverity::Critical => 5, + SecuritySeverity::High => 4, + SecuritySeverity::Medium => 3, + SecuritySeverity::Low => 2, + SecuritySeverity::Info => 1, + } +} + +impl SecurityReport { + /// Create an empty report + pub fn empty() -> Self { + ResultAggregator::empty() + } + + /// Get a summary of the report + pub fn summary(&self) -> String { + format!( + "Security Score: {:.0}/100 | Risk: {:?} | Findings: {} | Duration: {:.1}s", + self.overall_score, + self.risk_level, + self.total_findings, + self.scan_duration.as_secs_f64() + ) + } + + /// Check if the scan found any critical issues + pub fn has_critical_issues(&self) -> bool { + self.findings_by_severity.get(&SecuritySeverity::Critical) + .map(|&count| count > 0) + .unwrap_or(false) + } + + /// Get findings filtered by severity + pub fn findings_by_severity_level(&self, severity: SecuritySeverity) -> Vec<&SecurityFinding> { + self.findings.iter() + .filter(|f| f.severity == severity) + .collect() + } + + /// Export report as JSON + pub fn to_json(&self) -> Result { + serde_json::to_string_pretty(&self) + .map_err(|e| SecurityError::Cache(format!("Failed to serialize report: {}", e))) + } + + /// Export report as SARIF (Static Analysis Results Interchange Format) + pub fn to_sarif(&self) -> Result { + // TODO: Implement SARIF export for GitHub integration + Err(SecurityError::Cache("SARIF export not yet implemented".to_string())) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::path::PathBuf; + + #[test] + fn test_result_aggregation() { + let findings = vec![ + SecurityFinding { + id: "test-1".to_string(), + title: "Critical Finding".to_string(), + description: "Test critical".to_string(), + severity: SecuritySeverity::Critical, + category: SecurityCategory::SecretsExposure, + file_path: Some(PathBuf::from("test.js")), + line_number: Some(10), + column_number: Some(5), + evidence: None, + remediation: vec![], + references: vec![], + cwe_id: None, + compliance_frameworks: vec![], + }, + SecurityFinding { + id: "test-2".to_string(), + title: "Medium Finding".to_string(), + description: "Test medium".to_string(), + severity: SecuritySeverity::Medium, + category: SecurityCategory::InsecureConfiguration, + file_path: Some(PathBuf::from("config.json")), + line_number: Some(20), + column_number: Some(1), + evidence: None, + remediation: vec![], + references: vec![], + cwe_id: None, + compliance_frameworks: vec![], + }, + ]; + + let report = ResultAggregator::aggregate(findings, Duration::from_secs(5)); + + assert_eq!(report.total_findings, 2); + assert_eq!(report.risk_level, SecuritySeverity::Critical); + assert!(report.overall_score < 100.0); + assert!(!report.recommendations.is_empty()); + } + + #[test] + fn test_deduplication() { + let findings = vec![ + SecurityFinding { + id: "dup-1".to_string(), + title: "Duplicate Finding".to_string(), + description: "Test".to_string(), + severity: SecuritySeverity::High, + category: SecurityCategory::SecretsExposure, + file_path: Some(PathBuf::from("test.js")), + line_number: Some(10), + column_number: Some(5), + evidence: None, + remediation: vec![], + references: vec![], + cwe_id: None, + compliance_frameworks: vec![], + }, + SecurityFinding { + id: "dup-1".to_string(), + title: "Duplicate Finding".to_string(), + description: "Test".to_string(), + severity: SecuritySeverity::Medium, // Lower severity + category: SecurityCategory::SecretsExposure, + file_path: Some(PathBuf::from("test.js")), + line_number: Some(10), + column_number: Some(5), + evidence: None, + remediation: vec![], + references: vec![], + cwe_id: None, + compliance_frameworks: vec![], + }, + ]; + + let deduplicated = ResultAggregator::deduplicate_findings(findings); + assert_eq!(deduplicated.len(), 1); + assert_eq!(deduplicated[0].severity, SecuritySeverity::High); // Should keep higher severity + } + + #[test] + fn test_security_score_calculation() { + let findings = vec![ + SecurityFinding { + id: "test".to_string(), + title: "Test".to_string(), + description: "Test".to_string(), + severity: SecuritySeverity::Critical, + category: SecurityCategory::SecretsExposure, + file_path: None, + line_number: None, + column_number: None, + evidence: None, + remediation: vec![], + references: vec![], + cwe_id: None, + compliance_frameworks: vec![], + }, + ]; + + let score = ResultAggregator::calculate_security_score(&findings); + assert_eq!(score, 75.0); // 100 - 25 (critical penalty) + } +} \ No newline at end of file diff --git a/src/analyzer/security/turbo/scanner.rs b/src/analyzer/security/turbo/scanner.rs new file mode 100644 index 00000000..86179a5a --- /dev/null +++ b/src/analyzer/security/turbo/scanner.rs @@ -0,0 +1,447 @@ +//! # Scanner Module +//! +//! High-performance file scanning with memory-mapped I/O and parallel processing. + +use std::path::PathBuf; +use std::sync::Arc; +use std::fs::File; +use std::io::{self, Read, BufReader}; + +use memmap2::MmapOptions; +use crossbeam::channel::{Receiver, Sender}; +use parking_lot::{Mutex, RwLock}; +use log::{debug, trace, warn}; + +use super::file_discovery::FileMetadata; +use super::pattern_engine::{PatternEngine, PatternMatch}; +use super::cache::SecurityCache; +use crate::analyzer::security::{SecurityFinding, SecuritySeverity, SecurityCategory}; + +/// Scan task for a worker thread +#[derive(Debug)] +pub struct ScanTask { + pub id: usize, + pub file: FileMetadata, + pub quick_reject: bool, +} + +/// Scan result from a worker thread +#[derive(Debug)] +pub enum ScanResult { + Findings(Vec), + Skipped, + Error(String), +} + +/// File scanner worker +pub struct FileScanner { + thread_id: usize, + pattern_engine: Arc, + cache: Arc, + use_mmap: bool, +} + +impl FileScanner { + pub fn new( + thread_id: usize, + pattern_engine: Arc, + cache: Arc, + use_mmap: bool, + ) -> Self { + Self { + thread_id, + pattern_engine, + cache, + use_mmap, + } + } + + /// Run the scanner worker + pub fn run( + &self, + task_receiver: Receiver, + result_sender: Sender, + critical_count: Arc>, + should_terminate: Arc>, + max_critical: Option, + ) { + debug!("Scanner thread {} started", self.thread_id); + + while let Ok(task) = task_receiver.recv() { + // Check for early termination + if *should_terminate.read() { + debug!("Scanner thread {} terminating early", self.thread_id); + break; + } + + // Process the scan task + let result = self.scan_file(task); + + // Check for critical findings + if let ScanResult::Findings(ref findings) = result { + let critical_findings = findings.iter() + .filter(|f| f.severity == SecuritySeverity::Critical) + .count(); + + if critical_findings > 0 { + let mut count = critical_count.lock(); + *count += critical_findings; + + if let Some(max) = max_critical { + if *count >= max { + *should_terminate.write() = true; + debug!("Critical findings limit reached, triggering early termination"); + } + } + } + } + + // Send result + if result_sender.send(result).is_err() { + break; // Channel closed + } + } + + debug!("Scanner thread {} finished", self.thread_id); + } + + /// Scan a single file + fn scan_file(&self, task: ScanTask) -> ScanResult { + trace!("Thread {} scanning: {}", self.thread_id, task.file.path.display()); + + // Check cache first + if let Some(cached_result) = self.cache.get(&task.file.path) { + trace!("Cache hit for: {}", task.file.path.display()); + return ScanResult::Findings(cached_result); + } + + // Read file content + let content = match self.read_file_content(&task.file) { + Ok(content) => content, + Err(e) => { + warn!("Failed to read file {}: {}", task.file.path.display(), e); + return ScanResult::Error(e.to_string()); + } + }; + + // Skip if content is empty + if content.is_empty() { + return ScanResult::Skipped; + } + + // Scan content for patterns + let matches = self.pattern_engine.scan_content(&content, task.quick_reject); + + // Convert matches to findings + let findings = self.convert_matches_to_findings(matches, &task.file); + + // Cache the result + self.cache.insert(task.file.path.clone(), findings.clone()); + + ScanResult::Findings(findings) + } + + /// Read file content with optimal method + fn read_file_content(&self, file_meta: &FileMetadata) -> io::Result { + // Use memory mapping for larger files if enabled + if self.use_mmap && file_meta.size > 4096 { + self.read_file_mmap(&file_meta.path) + } else { + self.read_file_buffered(&file_meta.path) + } + } + + /// Read file using memory mapping + fn read_file_mmap(&self, path: &PathBuf) -> io::Result { + let file = File::open(path)?; + let mmap = unsafe { MmapOptions::new().map(&file)? }; + + // Validate UTF-8 using SIMD if available + match simdutf8::basic::from_utf8(&mmap) { + Ok(content) => Ok(content.to_string()), + Err(_) => { + // Fallback to lossy conversion for non-UTF8 files + Ok(String::from_utf8_lossy(&mmap).to_string()) + } + } + } + + /// Read file using buffered I/O + fn read_file_buffered(&self, path: &PathBuf) -> io::Result { + let file = File::open(path)?; + let mut reader = BufReader::with_capacity(8192, file); + let mut content = String::new(); + reader.read_to_string(&mut content)?; + Ok(content) + } + + /// Convert pattern matches to security findings + fn convert_matches_to_findings(&self, matches: Vec, file_meta: &FileMetadata) -> Vec { + matches.into_iter() + .map(|match_| { + SecurityFinding { + id: format!("{}-{}-{}", match_.pattern.id, file_meta.path.display(), match_.line_number), + title: match_.pattern.name.clone(), + description: self.enhance_description(&match_.pattern.description, file_meta), + severity: self.adjust_severity(&match_.pattern.severity, file_meta, match_.confidence), + category: match_.pattern.category.clone(), + file_path: Some(file_meta.path.clone()), + line_number: Some(match_.line_number), + column_number: Some(match_.column_number), + evidence: Some(match_.evidence), + remediation: match_.pattern.remediation.clone(), + references: match_.pattern.references.clone(), + cwe_id: match_.pattern.cwe_id.clone(), + compliance_frameworks: self.get_compliance_frameworks(&match_.pattern.category), + } + }) + .collect() + } + + /// Enhance description with file context and proper gitignore status + fn enhance_description(&self, base_description: &str, file_meta: &FileMetadata) -> String { + let mut description = base_description.to_string(); + + // Add comprehensive gitignore context for status determination + if file_meta.is_gitignored { + // File is properly protected + if file_meta.priority_hints.is_env_file || + file_meta.priority_hints.is_config_file || + base_description.to_lowercase().contains("secret") || + base_description.to_lowercase().contains("key") || + base_description.to_lowercase().contains("token") { + description.push_str(" (File is protected by .gitignore)"); + } else { + description.push_str(" (File appears safe for version control)"); + } + } else { + // File is NOT gitignored - determine risk level + if self.file_contains_secrets(file_meta) { + // Check if tracked by git using git command + if self.is_file_tracked_by_git(&file_meta.path) { + description.push_str(" (File is tracked by git and may expose secrets in version history - CRITICAL RISK)"); + } else { + description.push_str(" (File is NOT in .gitignore but contains secrets - HIGH RISK)"); + } + } else { + description.push_str(" (File appears safe for version control)"); + } + } + + // Add file type context + if file_meta.priority_hints.is_env_file { + description.push_str(" [Environment file]"); + } else if file_meta.priority_hints.is_config_file { + description.push_str(" [Configuration file]"); + } + + description + } + + /// Check if file likely contains secrets based on patterns + fn file_contains_secrets(&self, file_meta: &FileMetadata) -> bool { + // Check file name patterns + if let Some(file_name) = file_meta.path.file_name().and_then(|n| n.to_str()) { + let file_name_lower = file_name.to_lowercase(); + let secret_file_patterns = [ + ".env", ".key", ".pem", ".p12", ".pfx", + "id_rsa", "id_dsa", "id_ecdsa", "id_ed25519", + "credentials", "secrets", "private", "secret.json", + "service-account", "auth.json", "config.json" + ]; + + if secret_file_patterns.iter().any(|pattern| file_name_lower.contains(pattern)) { + return true; + } + } + + // Check if it's a priority file (likely to contain secrets) + file_meta.priority_hints.is_env_file || + file_meta.priority_hints.is_config_file || + file_meta.is_critical() + } + + /// Check if file is tracked by git + fn is_file_tracked_by_git(&self, file_path: &std::path::PathBuf) -> bool { + use std::process::Command; + + Command::new("git") + .args(&["ls-files", "--error-unmatch"]) + .arg(file_path) + .output() + .map(|output| output.status.success()) + .unwrap_or(false) + } + + /// Adjust severity based on context + fn adjust_severity(&self, base_severity: &SecuritySeverity, file_meta: &FileMetadata, confidence: f32) -> SecuritySeverity { + let mut severity = base_severity.clone(); + + // Upgrade severity for unprotected files + if !file_meta.is_gitignored && matches!(severity, SecuritySeverity::Medium | SecuritySeverity::High) { + severity = match severity { + SecuritySeverity::Medium => SecuritySeverity::High, + SecuritySeverity::High => SecuritySeverity::Critical, + _ => severity, + }; + } + + // Downgrade for low confidence + if confidence < 0.5 && matches!(severity, SecuritySeverity::High | SecuritySeverity::Critical) { + severity = match severity { + SecuritySeverity::Critical => SecuritySeverity::High, + SecuritySeverity::High => SecuritySeverity::Medium, + _ => severity, + }; + } + + severity + } + + /// Get compliance frameworks based on category + fn get_compliance_frameworks(&self, category: &SecurityCategory) -> Vec { + match category { + SecurityCategory::SecretsExposure => vec!["SOC2".to_string(), "GDPR".to_string(), "PCI-DSS".to_string()], + SecurityCategory::InsecureConfiguration => vec!["SOC2".to_string(), "OWASP".to_string()], + SecurityCategory::AuthenticationSecurity => vec!["SOC2".to_string(), "OWASP".to_string()], + SecurityCategory::DataProtection => vec!["GDPR".to_string(), "CCPA".to_string()], + _ => vec!["SOC2".to_string()], + } + } +} + +/// Specialized scanner for .env files +pub struct EnvFileScanner; + +impl EnvFileScanner { + /// Fast scan of .env files without regex + pub fn scan_env_file(path: &PathBuf) -> Result, io::Error> { + let content = std::fs::read_to_string(path)?; + let mut findings = Vec::new(); + + for (line_num, line) in content.lines().enumerate() { + let line = line.trim(); + + // Skip comments and empty lines + if line.is_empty() || line.starts_with('#') { + continue; + } + + // Parse key=value pairs + if let Some(eq_pos) = line.find('=') { + let key = &line[..eq_pos].trim(); + let value = &line[eq_pos + 1..].trim_matches('"').trim_matches('\''); + + // Check for sensitive keys with actual values + if is_sensitive_env_key(key) && !value.is_empty() && !is_placeholder_value(value) { + findings.push(SecurityFinding { + id: format!("env-secret-{}-{}", path.display(), line_num), + title: format!("Sensitive Environment Variable: {}", key), + description: format!("Environment variable '{}' contains a potentially sensitive value", key), + severity: determine_env_severity(key, value), + category: SecurityCategory::SecretsExposure, + file_path: Some(path.clone()), + line_number: Some(line_num + 1), + column_number: Some(eq_pos + 1), + evidence: Some(format!("{}=***", key)), + remediation: vec![ + "Ensure .env files are in .gitignore".to_string(), + "Use .env.example for documentation".to_string(), + "Consider using a secure secret management service".to_string(), + ], + references: vec![ + "https://12factor.net/config".to_string(), + ], + cwe_id: Some("CWE-798".to_string()), + compliance_frameworks: vec!["SOC2".to_string(), "GDPR".to_string()], + }); + } + } + } + + Ok(findings) + } +} + +/// Check if an environment variable key is sensitive +fn is_sensitive_env_key(key: &str) -> bool { + let key_upper = key.to_uppercase(); + let sensitive_patterns = [ + "PASSWORD", "SECRET", "KEY", "TOKEN", "API", "AUTH", + "PRIVATE", "CREDENTIAL", "ACCESS", "CLIENT", "STRIPE", + "AWS", "GOOGLE", "AZURE", "DATABASE", "DB_", "JWT", + ]; + + sensitive_patterns.iter().any(|pattern| key_upper.contains(pattern)) +} + +/// Check if a value is likely a placeholder +fn is_placeholder_value(value: &str) -> bool { + let placeholders = [ + "your_", "change_me", "xxx", "placeholder", "example", + "test", "demo", "fake", "dummy", "<", ">", "${", "}", + ]; + + let value_lower = value.to_lowercase(); + placeholders.iter().any(|p| value_lower.contains(p)) +} + +/// Determine severity based on the type of secret +fn determine_env_severity(key: &str, _value: &str) -> SecuritySeverity { + let key_upper = key.to_uppercase(); + + // Critical: API keys, database credentials + if key_upper.contains("DATABASE") || key_upper.contains("DB_PASS") || + key_upper.contains("AWS_SECRET") || key_upper.contains("STRIPE_SECRET") { + return SecuritySeverity::Critical; + } + + // High: Most API keys and secrets + if key_upper.contains("API") || key_upper.contains("SECRET") || + key_upper.contains("PRIVATE") || key_upper.contains("TOKEN") { + return SecuritySeverity::High; + } + + // Medium: General passwords and auth + if key_upper.contains("PASSWORD") || key_upper.contains("AUTH") { + return SecuritySeverity::Medium; + } + + SecuritySeverity::Low +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + use std::fs; + + #[test] + fn test_env_file_scanner() { + let temp_dir = TempDir::new().unwrap(); + let env_file = temp_dir.path().join(".env"); + + fs::write(&env_file, r#" +# Database config +DATABASE_URL=postgres://user:password@localhost/db +API_KEY=sk-1234567890abcdef +PUBLIC_URL=https://example.com +TEST_VAR=placeholder_value +"#).unwrap(); + + let findings = EnvFileScanner::scan_env_file(&env_file).unwrap(); + + // Should find DATABASE_URL and API_KEY but not PUBLIC_URL or TEST_VAR + assert_eq!(findings.len(), 2); + assert!(findings.iter().any(|f| f.title.contains("DATABASE_URL"))); + assert!(findings.iter().any(|f| f.title.contains("API_KEY"))); + } + + #[test] + fn test_placeholder_detection() { + assert!(is_placeholder_value("your_api_key_here")); + assert!(is_placeholder_value("")); + assert!(is_placeholder_value("xxx")); + assert!(!is_placeholder_value("sk-1234567890")); + } +} \ No newline at end of file diff --git a/src/analyzer/security_analyzer.rs b/src/analyzer/security_analyzer.rs index 39bbed7f..6e0e6a4c 100644 --- a/src/analyzer/security_analyzer.rs +++ b/src/analyzer/security_analyzer.rs @@ -21,10 +21,7 @@ use indicatif::{ProgressBar, ProgressStyle, MultiProgress}; use crate::analyzer::{ProjectAnalysis, DetectedLanguage, DetectedTechnology, EnvVar}; use crate::analyzer::dependency_parser::Language; -use crate::analyzer::security::{ - ModularSecurityAnalyzer, SecurityAnalysisConfig as NewSecurityAnalysisConfig -}; -use crate::analyzer::security::core::SecurityReport as NewSecurityReport; + #[derive(Debug, Error)] pub enum SecurityError { @@ -214,37 +211,7 @@ impl SecurityAnalyzer { }) } - /// Enhanced security analysis using the new modular approach - pub fn analyze_security_enhanced(&mut self, analysis: &ProjectAnalysis) -> Result { - let start_time = Instant::now(); - info!("Starting enhanced modular security analysis"); - - // Create modular analyzer with JavaScript-specific configuration if JS/TS is detected - let has_javascript = analysis.languages.iter() - .any(|lang| matches!(lang.name.as_str(), "JavaScript" | "TypeScript" | "JSX" | "TSX")); - - let config = if has_javascript { - NewSecurityAnalysisConfig::for_javascript() - } else { - NewSecurityAnalysisConfig::default() - }; - - let mut modular_analyzer = ModularSecurityAnalyzer::with_config(config) - .map_err(|e| SecurityError::AnalysisFailed(e.to_string()))?; - - // Use the modular analyzer - let enhanced_report = modular_analyzer.analyze_project(&analysis.project_root, &analysis.languages) - .map_err(|e| SecurityError::AnalysisFailed(e.to_string()))?; - - // For now, just return the enhanced report as-is - // TODO: Combine with existing findings if needed - - // Build final report - let duration = start_time.elapsed().as_secs_f32(); - info!("Enhanced security analysis completed in {:.1}s - Found {} issues", duration, enhanced_report.total_findings); - - Ok(enhanced_report) - } + /// Perform comprehensive security analysis with appropriate progress for verbosity level pub fn analyze_security(&mut self, analysis: &ProjectAnalysis) -> Result { diff --git a/src/cli.rs b/src/cli.rs index 36a9813f..7e1f2d8a 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -173,6 +173,10 @@ pub enum Commands { #[arg(value_name = "PROJECT_PATH", default_value = ".")] path: PathBuf, + /// Security scan mode (lightning, fast, balanced, thorough, paranoid) + #[arg(long, value_enum, default_value = "thorough")] + mode: SecurityScanMode, + /// Include low severity findings #[arg(long)] include_low: bool, @@ -296,6 +300,20 @@ pub enum SeverityThreshold { Critical, } +#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)] +pub enum SecurityScanMode { + /// Lightning fast scan - critical files only (.env, configs) + Lightning, + /// Fast scan - smart sampling with priority patterns + Fast, + /// Balanced scan - good coverage with performance optimizations (recommended) + Balanced, + /// Thorough scan - comprehensive analysis of all files + Thorough, + /// Paranoid scan - most comprehensive including low-severity findings + Paranoid, +} + impl Cli { /// Initialize logging based on verbosity level pub fn init_logging(&self) { diff --git a/src/main.rs b/src/main.rs index 272b700d..53ca6c06 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,12 +4,15 @@ use syncable_cli::{ self, vulnerability_checker::VulnerabilitySeverity, DetectedTechnology, TechnologyCategory, LibraryType, analyze_monorepo, ProjectCategory, // Import new modular security types - security::SecuritySeverity, + security::{TurboSecurityAnalyzer, TurboConfig, ScanMode}, }, - cli::{Cli, Commands, ToolsCommand, OutputFormat, SeverityThreshold, DisplayFormat}, + cli::{Cli, Commands, ToolsCommand, OutputFormat, SeverityThreshold, DisplayFormat, SecurityScanMode}, config, generator, }; + +// Use alias for the turbo SecuritySeverity to avoid conflicts +use syncable_cli::analyzer::security::SecuritySeverity as TurboSecuritySeverity; use syncable_cli::analyzer::display::{display_analysis, DisplayMode, BoxDrawer}; use std::process; use std::collections::HashMap; @@ -80,6 +83,7 @@ async fn run() -> syncable_cli::Result<()> { } Commands::Security { path, + mode, include_low, no_secrets, no_code_patterns, @@ -92,6 +96,7 @@ async fn run() -> syncable_cli::Result<()> { } => { handle_security( path, + mode, include_low, no_secrets, no_code_patterns, @@ -1065,6 +1070,7 @@ fn display_technologies_summary(technologies: &[DetectedTechnology]) { fn handle_security( path: std::path::PathBuf, + mode: SecurityScanMode, include_low: bool, no_secrets: bool, no_code_patterns: bool, @@ -1075,98 +1081,72 @@ fn handle_security( output: Option, fail_on_findings: bool, ) -> syncable_cli::Result<()> { - use syncable_cli::analyzer::{SecurityAnalyzer, SecurityAnalysisConfig}; - use indicatif::{ProgressBar, ProgressStyle}; - use std::time::Duration; - use std::thread; - let project_path = path.canonicalize() .unwrap_or_else(|_| path.clone()); - // Create beautiful progress indicator - let progress = ProgressBar::new(100); - progress.set_style( - ProgressStyle::default_bar() - .template("πŸ›‘οΈ {msg} [{elapsed_precise}] {bar:40.cyan/blue} {pos:>3}/{len:3} {percent}%") - .unwrap() - .progress_chars("β–°β–±") - ); - - // Step 1: Project Analysis - progress.set_message("Analyzing project structure..."); - progress.set_position(10); - let project_analysis = analyzer::analyze_project(&project_path)?; - thread::sleep(Duration::from_millis(200)); - - // Step 2: Security Configuration - progress.set_message("Configuring security scanners..."); - progress.set_position(20); - let config = SecurityAnalysisConfig { - include_low_severity: include_low, - check_secrets: !no_secrets, - check_code_patterns: !no_code_patterns, - check_infrastructure: !no_infrastructure, - check_compliance: !no_compliance, - frameworks_to_check: frameworks.clone(), - ignore_patterns: vec![ - "node_modules".to_string(), - ".git".to_string(), - "target".to_string(), - "build".to_string(), - ".next".to_string(), - "dist".to_string(), + println!("πŸ›‘οΈ Running security analysis on: {}", project_path.display()); + + // Convert CLI mode to internal ScanMode, with flag overrides + let scan_mode = if no_secrets && no_code_patterns { + // Override: if both secrets and code patterns are disabled, use lightning + ScanMode::Lightning + } else if include_low { + // Override: if including low findings, force paranoid mode + ScanMode::Paranoid + } else { + // Use the requested mode from CLI + match mode { + SecurityScanMode::Lightning => ScanMode::Lightning, + SecurityScanMode::Fast => ScanMode::Fast, + SecurityScanMode::Balanced => ScanMode::Balanced, + SecurityScanMode::Thorough => ScanMode::Thorough, + SecurityScanMode::Paranoid => ScanMode::Paranoid, + } + }; + + // Configure turbo analyzer + let config = TurboConfig { + scan_mode, + max_file_size: 10 * 1024 * 1024, // 10MB + worker_threads: 0, // Auto-detect + use_mmap: true, + enable_cache: true, + cache_size_mb: 100, + max_critical_findings: if fail_on_findings { Some(1) } else { None }, + timeout_seconds: Some(60), + skip_gitignored: true, + priority_extensions: vec![ + "env".to_string(), "key".to_string(), "pem".to_string(), + "json".to_string(), "yml".to_string(), "yaml".to_string(), + "toml".to_string(), "ini".to_string(), "conf".to_string(), + "config".to_string(), "js".to_string(), "ts".to_string(), + "py".to_string(), "rs".to_string(), "go".to_string(), ], - skip_gitignored_files: true, - downgrade_gitignored_severity: false, + pattern_sets: if no_secrets { + vec![] + } else { + vec!["default".to_string(), "aws".to_string(), "gcp".to_string()] + }, }; - thread::sleep(Duration::from_millis(300)); - // Step 3: Security Scanner Initialization - progress.set_message("Initializing security analyzer..."); - progress.set_position(30); - let mut security_analyzer = SecurityAnalyzer::with_config(config) + // Initialize and run analyzer + let analyzer = TurboSecurityAnalyzer::new(config) .map_err(|e| syncable_cli::error::IaCGeneratorError::Analysis( syncable_cli::error::AnalysisError::InvalidStructure( - format!("Failed to create security analyzer: {}", e) + format!("Failed to create turbo security analyzer: {}", e) ) ))?; - thread::sleep(Duration::from_millis(200)); - - // Step 4: Secret Detection - if !no_secrets { - progress.set_message("Scanning for exposed secrets..."); - progress.set_position(50); - thread::sleep(Duration::from_millis(500)); - } - // Step 5: Code Pattern Analysis - if !no_code_patterns { - progress.set_message("Analyzing code security patterns..."); - progress.set_position(70); - thread::sleep(Duration::from_millis(400)); - } - - // Step 6: Environment Variables (always runs) - progress.set_message("Analyzing environment variables..."); - progress.set_position(85); - thread::sleep(Duration::from_millis(200)); - - // Step 7: Final processing - progress.set_message("Finalizing analysis..."); - progress.set_position(95); - thread::sleep(Duration::from_millis(200)); - - // Step 8: Generating Report - progress.set_message("Generating security report..."); - progress.set_position(100); - let security_report = security_analyzer.analyze_security_enhanced(&project_analysis) + let start_time = std::time::Instant::now(); + let security_report = analyzer.analyze_project(&project_path) .map_err(|e| syncable_cli::error::IaCGeneratorError::Analysis( syncable_cli::error::AnalysisError::InvalidStructure( - format!("Enhanced security analysis failed: {}", e) + format!("Turbo security analysis failed: {}", e) ) ))?; + let scan_duration = start_time.elapsed(); - progress.finish_and_clear(); + println!("⚑ Scan completed in {:.2}s", scan_duration.as_secs_f64()); // Format output in the beautiful style requested let output_string = match format { @@ -1184,11 +1164,11 @@ fn handle_security( let mut score_box = BoxDrawer::new("Security Summary"); score_box.add_line("Overall Score:", &format!("{:.0}/100", security_report.overall_score).bright_yellow(), true); score_box.add_line("Risk Level:", &format!("{:?}", security_report.risk_level).color(match security_report.risk_level { - SecuritySeverity::Critical => "bright_red", - SecuritySeverity::High => "red", - SecuritySeverity::Medium => "yellow", - SecuritySeverity::Low => "green", - SecuritySeverity::Info => "blue", + TurboSecuritySeverity::Critical => "bright_red", + TurboSecuritySeverity::High => "red", + TurboSecuritySeverity::Medium => "yellow", + TurboSecuritySeverity::Low => "green", + TurboSecuritySeverity::Info => "blue", }), true); score_box.add_line("Total Findings:", &security_report.total_findings.to_string().cyan(), true); @@ -1198,7 +1178,7 @@ fn handle_security( .collect::>() .len(); score_box.add_line("Files Analyzed:", &config_files.max(1).to_string().green(), true); - score_box.add_line("Env Variables:", &project_analysis.environment_variables.len().to_string().green(), true); + score_box.add_line("Scan Mode:", &format!("{:?}", scan_mode).green(), true); output.push_str(&format!("\n{}\n", score_box.draw())); @@ -1215,11 +1195,11 @@ fn handle_security( for (i, finding) in security_report.findings.iter().enumerate() { let severity_color = match finding.severity { - SecuritySeverity::Critical => "bright_red", - SecuritySeverity::High => "red", - SecuritySeverity::Medium => "yellow", - SecuritySeverity::Low => "blue", - SecuritySeverity::Info => "green", + TurboSecuritySeverity::Critical => "bright_red", + TurboSecuritySeverity::High => "red", + TurboSecuritySeverity::Medium => "yellow", + TurboSecuritySeverity::Low => "blue", + TurboSecuritySeverity::Info => "green", }; // Extract relative file path from project root @@ -1427,10 +1407,10 @@ fn handle_security( // Exit with error code if requested and findings exist if fail_on_findings && security_report.total_findings > 0 { let critical_count = security_report.findings_by_severity - .get(&SecuritySeverity::Critical) + .get(&TurboSecuritySeverity::Critical) .unwrap_or(&0); let high_count = security_report.findings_by_severity - .get(&SecuritySeverity::High) + .get(&TurboSecuritySeverity::High) .unwrap_or(&0); if *critical_count > 0 {