diff --git a/src/commands/scan/handle-create-new-scan.mts b/src/commands/scan/handle-create-new-scan.mts index 800d37323..5c921ddd5 100644 --- a/src/commands/scan/handle-create-new-scan.mts +++ b/src/commands/scan/handle-create-new-scan.mts @@ -14,6 +14,7 @@ import { outputCreateNewScan } from './output-create-new-scan.mts' import { performReachabilityAnalysis } from './perform-reachability-analysis.mts' import constants from '../../constants.mts' import { checkCommandInput } from '../../utils/check-input.mts' +import { compressSocketFactsForUpload } from '../../utils/coana.mts' import { findSocketYmlSync } from '../../utils/config.mts' import { getPackageFilesForScan } from '../../utils/path-resolve.mts' import { readOrDefaultSocketJson } from '../../utils/socket-json.mts' @@ -259,28 +260,40 @@ export async function handleCreateNewScan({ tier1ReachabilityScanId = reachResult.data?.tier1ReachabilityScanId } - const fullScanCResult = await fetchCreateOrgFullScan( - scanPaths, - orgSlug, - { - commitHash, - commitMessage, - committers, - pullRequest, - repoName, - branchName, - scanType: reach.runReachabilityAnalysis - ? constants.SCAN_TYPE_SOCKET_TIER1 - : constants.SCAN_TYPE_SOCKET, - workspace, - }, - { - cwd, - defaultBranch, - pendingHead, - tmp, - }, - ) + // Brotli-compress any .socket.facts.json paths in scanPaths just before + // upload. depscan's api-v0 multipart boundary streams brotli decode based + // on the .br filename suffix. Coana keeps writing plain .socket.facts.json + // on disk, so the local read paths (extractTier1ReachabilityScanId, + // extractReachabilityErrors) stay correct. The cleanup() in the finally + // block removes the temp dirs whether the upload succeeded or threw. + const compressed = await compressSocketFactsForUpload(scanPaths) + let fullScanCResult: Awaited> + try { + fullScanCResult = await fetchCreateOrgFullScan( + compressed.paths, + orgSlug, + { + commitHash, + commitMessage, + committers, + pullRequest, + repoName, + branchName, + scanType: reach.runReachabilityAnalysis + ? constants.SCAN_TYPE_SOCKET_TIER1 + : constants.SCAN_TYPE_SOCKET, + workspace, + }, + { + cwd, + defaultBranch, + pendingHead, + tmp, + }, + ) + } finally { + await compressed.cleanup() + } const scanId = fullScanCResult.ok ? fullScanCResult.data?.id : undefined diff --git a/src/utils/coana.mts b/src/utils/coana.mts index c80d66fe4..aa8800695 100644 --- a/src/utils/coana.mts +++ b/src/utils/coana.mts @@ -3,6 +3,11 @@ * Manages reachability analysis via Coana tech CLI. * * Key Functions: + * - compressSocketFactsForUpload: Brotli-compress any .socket.facts.json + * entries in scanPaths just before upload, returning swapped paths plus a + * cleanup callback. Coana keeps writing plain JSON; the on-the-wire form + * to depscan is brotli (api-v0 decodes at the multipart boundary). + * - extractReachabilityErrors: Extract per-component reachability errors * - extractTier1ReachabilityScanId: Extract scan ID from socket facts file * * Integration: @@ -11,8 +16,84 @@ * - Extracts tier 1 reachability scan identifiers */ +import { createReadStream, createWriteStream, existsSync } from 'node:fs' +import { rm } from 'node:fs/promises' +import path from 'node:path' +import { pipeline } from 'node:stream/promises' +import { createBrotliCompress } from 'node:zlib' + import { readJsonSync } from '@socketsecurity/registry/lib/fs' +import constants from '../constants.mts' + +const { DOT_SOCKET_DOT_FACTS_JSON } = constants + +export type CompressedScanPaths = { + paths: string[] + cleanup: () => Promise +} + +/** + * For each `.socket.facts.json` in `scanPaths`, stream-brotli-compress a + * sibling `.socket.facts.json.br` next to the original file and swap its + * path in. Other paths pass through unchanged. Missing files also pass + * through unchanged (the upload will fail downstream with the same error + * it would have). + * + * Streaming + worker-thread compression keeps the event loop responsive: + * default brotli quality (11) on a 60+MB facts file takes multiple seconds + * of CPU, which would otherwise freeze the spinner / signal handlers / + * any concurrent work. + * + * The `.br` lives next to the source rather than under the OS temp dir + * because depscan's multipart ingest (`addStreamEntry`) rejects entries + * whose names contain `..` traversal segments. The SDK computes the + * multipart entry name via `path.relative(cwd, brPath)`, so an OS-tmpdir + * temp path turns into `../../../var/folders/...` and gets dropped as + * `unmatchedFiles`. Sibling-write keeps the relative path inside cwd, and + * keeps the directory shape symmetric with the plain `.socket.facts.json` + * upload (depscan strips only the `.br` suffix at ingest, so + * `/.socket.facts.json.br` and `/.socket.facts.json` resolve to + * the same storage path). + * + * Concurrent scans against the same source directory are already racy on + * `.socket.facts.json` itself (coana writes to a single path), so the + * sibling `.br` doesn't introduce a new race. + * + * Caller MUST `await cleanup()` (typically in a `finally` block) once the + * upload completes — successful or not — to remove the sibling files. + */ +export async function compressSocketFactsForUpload( + scanPaths: string[], +): Promise { + const brPaths: string[] = [] + const paths = await Promise.all( + scanPaths.map(async p => { + if (path.basename(p) !== DOT_SOCKET_DOT_FACTS_JSON) { + return p + } + if (!existsSync(p)) { + return p + } + const brPath = `${p}.br` + await pipeline( + createReadStream(p), + createBrotliCompress(), + createWriteStream(brPath), + ) + brPaths.push(brPath) + return brPath + }), + ) + const cleanup = async () => { + const targets = brPaths.splice(0) + await Promise.all( + targets.map(t => rm(t, { force: true })), + ) + } + return { paths, cleanup } +} + export type ReachabilityError = { componentName: string componentVersion: string diff --git a/src/utils/coana.test.mts b/src/utils/coana.test.mts new file mode 100644 index 000000000..7b2ad30f8 --- /dev/null +++ b/src/utils/coana.test.mts @@ -0,0 +1,276 @@ +/** + * Unit tests for Coana facts-file utilities. + * + * Test Coverage: + * - compressSocketFactsForUpload: swaps .socket.facts.json paths for + * brotli-compressed .br temps, leaves other paths alone, cleans up. + * - extractTier1ReachabilityScanId: plain JSON + edge cases. + * - extractReachabilityErrors: plain JSON + missing + malformed. + * + * Related Files: + * - utils/coana.mts (implementation) + */ + +import { + existsSync, + mkdtempSync, + readFileSync, + rmSync, + writeFileSync, +} from 'node:fs' +import { tmpdir } from 'node:os' +import path from 'node:path' +import { brotliDecompressSync } from 'node:zlib' + +import { afterAll, beforeAll, describe, expect, it } from 'vitest' + +import { + compressSocketFactsForUpload, + extractReachabilityErrors, + extractTier1ReachabilityScanId, +} from './coana.mts' + +describe('coana facts-file utils', () => { + let tmpDir: string + + beforeAll(() => { + tmpDir = mkdtempSync(path.join(tmpdir(), 'socket-coana-facts-')) + }) + + afterAll(() => { + rmSync(tmpDir, { recursive: true, force: true }) + }) + + function writePlain(name: string, body: unknown): string { + const filePath = path.join(tmpDir, name) + writeFileSync(filePath, JSON.stringify(body)) + return filePath + } + + describe('compressSocketFactsForUpload', () => { + it('writes brotli .br as a sibling of the source file', async () => { + const wrapDir = mkdtempSync(path.join(tmpdir(), 'socket-coana-wrap-')) + const inputPath = path.join(wrapDir, '.socket.facts.json') + const payload = { tier1ReachabilityScanId: 'compress-test', a: 1, b: 2 } + writeFileSync(inputPath, JSON.stringify(payload)) + + try { + const result = await compressSocketFactsForUpload([inputPath]) + const swappedPath = result.paths[0]! + + expect(result.paths).toHaveLength(1) + expect(swappedPath).toBe(`${inputPath}.br`) + expect(existsSync(swappedPath)).toBe(true) + // The sibling file is real brotli that round-trips to the original + // JSON. + const roundTripped = brotliDecompressSync( + readFileSync(swappedPath), + ).toString('utf8') + expect(JSON.parse(roundTripped)).toEqual(payload) + + // Cleanup removes the sibling .br file but leaves the source intact. + await result.cleanup() + expect(existsSync(swappedPath)).toBe(false) + expect(existsSync(inputPath)).toBe(true) + } finally { + rmSync(wrapDir, { recursive: true, force: true }) + } + }) + + it('leaves non-facts paths unchanged', async () => { + const wrapDir = mkdtempSync(path.join(tmpdir(), 'socket-coana-wrap-')) + const lock = path.join(wrapDir, 'package-lock.json') + const pkg = path.join(wrapDir, 'package.json') + writeFileSync(lock, '{}') + writeFileSync(pkg, '{}') + + const result = await compressSocketFactsForUpload([lock, pkg]) + try { + expect(result.paths).toEqual([lock, pkg]) + } finally { + await result.cleanup() + rmSync(wrapDir, { recursive: true, force: true }) + } + }) + + it('leaves a missing .socket.facts.json path unchanged', async () => { + const wrapDir = mkdtempSync(path.join(tmpdir(), 'socket-coana-wrap-')) + const missingFacts = path.join(wrapDir, '.socket.facts.json') + // Note: no writeFileSync — file does not exist. + + const result = await compressSocketFactsForUpload([missingFacts]) + try { + expect(result.paths).toEqual([missingFacts]) + } finally { + await result.cleanup() + rmSync(wrapDir, { recursive: true, force: true }) + } + }) + + it('mixes facts and non-facts entries correctly', async () => { + const wrapDir = mkdtempSync(path.join(tmpdir(), 'socket-coana-wrap-')) + const facts = path.join(wrapDir, '.socket.facts.json') + const lock = path.join(wrapDir, 'package-lock.json') + writeFileSync(facts, JSON.stringify({ tier1ReachabilityScanId: 'mix' })) + writeFileSync(lock, '{"name":"x"}') + + const result = await compressSocketFactsForUpload([lock, facts]) + try { + expect(result.paths[0]).toBe(lock) + expect(result.paths[1]).toBe(`${facts}.br`) + const roundTripped = JSON.parse( + brotliDecompressSync(readFileSync(result.paths[1]!)).toString('utf8'), + ) + expect(roundTripped.tier1ReachabilityScanId).toBe('mix') + } finally { + await result.cleanup() + rmSync(wrapDir, { recursive: true, force: true }) + } + }) + + it('cleanup is idempotent (safe to call twice)', async () => { + const wrapDir = mkdtempSync(path.join(tmpdir(), 'socket-coana-wrap-')) + const facts = path.join(wrapDir, '.socket.facts.json') + writeFileSync(facts, JSON.stringify({ tier1ReachabilityScanId: 'idem' })) + + const result = await compressSocketFactsForUpload([facts]) + await result.cleanup() + await expect(result.cleanup()).resolves.not.toThrow() + rmSync(wrapDir, { recursive: true, force: true }) + }) + }) + + describe('extractTier1ReachabilityScanId', () => { + it('reads scan ID from plain JSON file', () => { + const file = writePlain('plain-id.json', { + tier1ReachabilityScanId: 'scan-123', + }) + + expect(extractTier1ReachabilityScanId(file)).toBe('scan-123') + }) + + it('returns undefined for missing file', () => { + expect( + extractTier1ReachabilityScanId(path.join(tmpDir, 'missing.json')), + ).toBeUndefined() + }) + + it('returns undefined when tier1ReachabilityScanId is missing', () => { + const file = writePlain('missing-field.json', { otherField: 'value' }) + + expect(extractTier1ReachabilityScanId(file)).toBeUndefined() + }) + + it('returns undefined for null scan ID', () => { + const file = writePlain('null-id.json', { tier1ReachabilityScanId: null }) + + expect(extractTier1ReachabilityScanId(file)).toBeUndefined() + }) + + it('returns undefined for empty / whitespace scan ID', () => { + const blank = writePlain('blank-id.json', { + tier1ReachabilityScanId: ' ', + }) + const empty = writePlain('empty-id.json', { + tier1ReachabilityScanId: '', + }) + + expect(extractTier1ReachabilityScanId(blank)).toBeUndefined() + expect(extractTier1ReachabilityScanId(empty)).toBeUndefined() + }) + + it('trims whitespace from scan ID', () => { + const file = writePlain('padded-id.json', { + tier1ReachabilityScanId: ' scan-456 ', + }) + + expect(extractTier1ReachabilityScanId(file)).toBe('scan-456') + }) + + it('coerces numeric scan ID to string', () => { + const file = writePlain('numeric-id.json', { + tier1ReachabilityScanId: 12345, + }) + + expect(extractTier1ReachabilityScanId(file)).toBe('12345') + }) + }) + + describe('extractReachabilityErrors', () => { + const errorComponentsBody = { + components: [ + { + name: 'lodash', + version: '4.17.21', + reachability: [ + { + ghsa_id: 'GHSA-aaaa-bbbb-cccc', + reachability: [ + { type: 'error', subprojectPath: 'packages/web' }, + { type: 'reachable', subprojectPath: 'packages/api' }, + ], + }, + ], + }, + { + name: 'axios', + version: '1.4.0', + reachability: [ + { + ghsa_id: 'GHSA-xxxx-yyyy-zzzz', + reachability: [{ type: 'error', subprojectPath: 'packages/api' }], + }, + ], + }, + ], + } + + const expectedErrors = [ + { + componentName: 'lodash', + componentVersion: '4.17.21', + ghsaId: 'GHSA-aaaa-bbbb-cccc', + subprojectPath: 'packages/web', + }, + { + componentName: 'axios', + componentVersion: '1.4.0', + ghsaId: 'GHSA-xxxx-yyyy-zzzz', + subprojectPath: 'packages/api', + }, + ] + + it('extracts errors from plain JSON', () => { + const file = writePlain('errors-plain.json', errorComponentsBody) + + expect(extractReachabilityErrors(file)).toEqual(expectedErrors) + }) + + it('returns empty array for missing file', () => { + expect( + extractReachabilityErrors(path.join(tmpDir, 'missing-errors.json')), + ).toEqual([]) + }) + + it('returns empty array when components is missing', () => { + const file = writePlain('errors-no-components.json', { other: true }) + + expect(extractReachabilityErrors(file)).toEqual([]) + }) + + it('skips components with no reachability arrays', () => { + const file = writePlain('errors-skip.json', { + components: [ + { name: 'just-name', version: '1.0.0' }, + { + name: 'no-inner', + version: '1.0.0', + reachability: [{ ghsa_id: 'GHSA-1' }], + }, + ], + }) + + expect(extractReachabilityErrors(file)).toEqual([]) + }) + }) +})