diff --git a/README.md b/README.md index c669be7..79af555 100644 --- a/README.md +++ b/README.md @@ -144,7 +144,7 @@ Once installed and authenticated, try: ```bash decodo scrape https://ip.decodo.com -decodo google-search "top articles hacker news" --limit 5 --parse +decodo google-search "top articles hacker news" --page-count 5 --parse ``` You should see markdown or parsed JSON within seconds. If you see an auth error, double-check your @@ -183,13 +183,15 @@ By default, scrape commands print the first result's `content` (parsed JSON when | Flag | Effect | | --- | --- | | `--full` | Print the full API response envelope | -| `--format ndjson` | One JSON object per result (pipe-friendly) | +| `--format ndjson` | One JSON object per result line on stdout (pipe-friendly) | | `--pretty` | Indented JSON on stdout | | `-o, --output ` | Write to a file instead of stdout | | `-v, --verbose` | Print debug logs to stderr | **TTY vs pipe:** When stdout is a terminal, human-readable output is used where possible. When piped or redirected, raw bytes or compact JSON is written. Screenshot output must go to `-o` or a redirect — writing binary PNG to a TTY is rejected. +**NDJSON line contract:** With `--format ndjson`, stdout is one JSON object per API result line. Without `--full`, each line is that result's `content`. With `--full`, each line is the full result entry (e.g. `content`, `status_code`, `url`). There is no envelope-level `.results[]` on a single line — pipe each line through `jq` individually. + ```bash # Parsed JSON from Google Search decodo google-search "query" --parse @@ -198,7 +200,7 @@ decodo google-search "query" --parse decodo google-search "query" --full --pretty # NDJSON stream for jq / agents -decodo google-search "query" --format ndjson --full | jq -c '.results[]' +decodo google-search "query" --format ndjson --full | jq -c '.url' ``` ## Examples @@ -207,10 +209,10 @@ decodo google-search "query" --format ndjson --full | jq -c '.results[]' ```bash # Search and extract titles -decodo google-search "rust web scraping" --limit 3 --parse | jq '.[].title' +decodo google-search "rust web scraping" --page-count 3 --parse | jq '.results.results.organic[].title' # Scrape JSON API endpoint -decodo scrape https://ip.decodo.com/json | jq '.ip' +decodo scrape https://ip.decodo.com/json | jq '.proxy.ip' # Screenshot to file, then open decodo screenshot https://example.com -o shot.png diff --git a/package.json b/package.json index 299d524..4544126 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@decodo/cli", - "version": "0.1.5", + "version": "0.1.6", "description": "Official CLI for the Decodo APIs", "license": "MIT", "type": "module", diff --git a/src/output/services/write-scrape-response.ts b/src/output/services/write-scrape-response.ts index 58a6d58..7f682c4 100644 --- a/src/output/services/write-scrape-response.ts +++ b/src/output/services/write-scrape-response.ts @@ -31,6 +31,12 @@ export function writeScrapeResponse( } const full = options.full === true; + if (!full && response.results.length > 1) { + console.error( + `Warning: showing 1 of ${response.results.length} results; use --format ndjson or --full` + ); + } + const indent = resolvePrettyIndent(options); const payload = extractPayload(response, full); const text = full diff --git a/src/scrape/commands/codegen-target-commands.ts b/src/scrape/commands/codegen-target-commands.ts index 825b065..a3c47eb 100644 --- a/src/scrape/commands/codegen-target-commands.ts +++ b/src/scrape/commands/codegen-target-commands.ts @@ -3,15 +3,16 @@ import { Command } from "commander"; import { configureTargetCommand } from "../services/command-builder.js"; import { snakeToKebab } from "../services/naming.js"; import { createTargetAction } from "../services/run-target-scrape.js"; +import { resolveTargetGroup } from "../services/target-group.js"; export function createCodegenTargetCommands(schema: DecodoSchema): Command[] { const commands: Command[] = []; for (const target of schema.listTargets()) { const commandName = snakeToKebab(target); - const meta = schema.getTargetMeta(target); + const group = resolveTargetGroup(schema, target); const command = new Command(commandName).description( - meta?.group ? `${meta.group} scrape target` : `${target} scrape target` + group ? `${group} scrape target` : "Scrape target" ); configureTargetCommand(command, target, schema); diff --git a/src/scrape/commands/list-targets.ts b/src/scrape/commands/list-targets.ts index 0893e0c..e98aaf6 100644 --- a/src/scrape/commands/list-targets.ts +++ b/src/scrape/commands/list-targets.ts @@ -1,6 +1,7 @@ import type { DecodoSchema } from "@decodo/sdk-ts"; import { Command } from "commander"; import { snakeToKebab } from "../services/naming.js"; +import { resolveTargetGroup } from "../services/target-group.js"; export function createListTargetsCommand(schema: DecodoSchema): Command { return new Command("targets") @@ -9,7 +10,7 @@ export function createListTargetsCommand(schema: DecodoSchema): Command { const grouped = new Map(); for (const target of schema.listTargets()) { - const group = schema.getTargetMeta(target)?.group ?? "Other"; + const group = resolveTargetGroup(schema, target) ?? "Other"; const names = grouped.get(group) ?? []; names.push(snakeToKebab(target)); grouped.set(group, names); diff --git a/src/scrape/services/auth-validation.ts b/src/scrape/services/auth-validation.ts index a450f54..9e4a73a 100644 --- a/src/scrape/services/auth-validation.ts +++ b/src/scrape/services/auth-validation.ts @@ -1,11 +1,35 @@ -import { Target as ScrapeTarget } from "@decodo/sdk-ts"; +import { + AuthenticationError, + DecodoError, + RateLimitError, + Target as ScrapeTarget, + TimeoutError, +} from "@decodo/sdk-ts"; import { createDecodoClient } from "./client.js"; +const AUTH_PROBE_URL = "https://does-not-exist.decodo.com"; + export async function validateAuthToken(token: string): Promise { const client = createDecodoClient(token); - await client.webScrapingApi.scrape({ - target: ScrapeTarget.Universal, - url: "https://does-not-exist.decodo.com", - }); + try { + await client.webScrapingApi.scrape({ + target: ScrapeTarget.Universal, + url: AUTH_PROBE_URL, + }); + } catch (err) { + if ( + err instanceof AuthenticationError || + err instanceof RateLimitError || + err instanceof TimeoutError + ) { + throw err; + } + + if (err instanceof DecodoError) { + return; + } + + throw err; + } } diff --git a/src/scrape/services/run-target-scrape.ts b/src/scrape/services/run-target-scrape.ts index 11934d6..683d639 100644 --- a/src/scrape/services/run-target-scrape.ts +++ b/src/scrape/services/run-target-scrape.ts @@ -51,11 +51,22 @@ export function createTargetAction( ((input, options) => buildScrapeBody(target, input, options, config, schema)); - return async ( - input: string | undefined, - options: Record, - command: Command - ): Promise => { + return async (...args: unknown[]): Promise => { + let input: string | undefined; + let options: Record; + let command: Command; + + if (config.primaryField) { + [input, options, command] = args as [ + string | undefined, + Record, + Command, + ]; + } else { + [options, command] = args as [Record, Command]; + input = undefined; + } + const rootOpts = getRootOpts(command); const verbose = rootOpts.verbose === true; diff --git a/src/scrape/services/target-group.ts b/src/scrape/services/target-group.ts new file mode 100644 index 0000000..612ec53 --- /dev/null +++ b/src/scrape/services/target-group.ts @@ -0,0 +1,14 @@ +import type { DecodoSchema } from "@decodo/sdk-ts"; + +const NO_GROUP = "None"; + +export function resolveTargetGroup( + schema: DecodoSchema, + target: string +): string | undefined { + const group = schema.getTargetMeta(target)?.group; + if (!group || group === NO_GROUP) { + return; + } + return group; +} diff --git a/tests/output/services/write-scrape-response.test.ts b/tests/output/services/write-scrape-response.test.ts index 59c9d22..ad686b6 100644 --- a/tests/output/services/write-scrape-response.test.ts +++ b/tests/output/services/write-scrape-response.test.ts @@ -110,6 +110,29 @@ describe("writeScrapeResponse", () => { } }); + it("warns on stderr when default output shows one of many results", () => { + const response = { + results: [{ content: { a: 1 } }, { content: { b: 2 } }], + } as SyncResponse; + + writeScrapeResponse(response, { options: {} }); + + expect(written).toBe('{"a":1}\n'); + expect(stderr).toContain( + "Warning: showing 1 of 2 results; use --format ndjson or --full" + ); + }); + + it("does not warn when --full is set", () => { + const response = { + results: [{ content: { a: 1 } }, { content: { b: 2 } }], + } as SyncResponse; + + writeScrapeResponse(response, { options: { full: true } }); + + expect(stderr).toEqual([]); + }); + it("refuses TTY stdout for binary png without -o", () => { Object.defineProperty(process.stdout, "isTTY", { value: true, diff --git a/tests/scrape/commands/codegen-target-commands.test.ts b/tests/scrape/commands/codegen-target-commands.test.ts index 5d1d320..4ffbe73 100644 --- a/tests/scrape/commands/codegen-target-commands.test.ts +++ b/tests/scrape/commands/codegen-target-commands.test.ts @@ -20,4 +20,15 @@ describe("createCodegenTargetCommands", () => { true ); }); + + it("uses a generic description for targets without a real group", () => { + const schema = BundledSchema.shared; + const commands = createCodegenTargetCommands(schema); + + const ungrouped = commands.find((cmd) => cmd.name() === "youtube-video"); + expect(ungrouped?.description()).toBe("Scrape target"); + + const grouped = commands.find((cmd) => cmd.name() === "amazon-product"); + expect(grouped?.description()).toBe("Amazon scrape target"); + }); }); diff --git a/tests/scrape/services/auth-validation.test.ts b/tests/scrape/services/auth-validation.test.ts index 3bd9080..4fd054d 100644 --- a/tests/scrape/services/auth-validation.test.ts +++ b/tests/scrape/services/auth-validation.test.ts @@ -1,35 +1,62 @@ +import { + AuthenticationError, + DecodoError, + RateLimitError, + Target as ScrapeTarget, +} from "@decodo/sdk-ts"; import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import { validateAuthToken } from "../../../src/scrape/services/auth-validation.js"; +import { createDecodoClient } from "../../../src/scrape/services/client.js"; + +vi.mock("../../../src/scrape/services/client.js", () => ({ + createDecodoClient: vi.fn(), +})); describe("validateAuthToken", () => { + const scrape = vi.fn(); + beforeEach(() => { - vi.stubGlobal("fetch", vi.fn()); + scrape.mockReset(); + vi.mocked(createDecodoClient).mockReturnValue({ + webScrapingApi: { scrape }, + } as never); }); afterEach(() => { - vi.unstubAllGlobals(); + vi.clearAllMocks(); }); - it("calls universal scrape against ip.decodo.com", async () => { - const fetchMock = vi.mocked(fetch); - fetchMock.mockResolvedValue({ - ok: true, - status: 200, - json: async () => ({ results: [] }), - } as Response); + it("probes auth with the stats-invisible URL", async () => { + scrape.mockResolvedValue({ results: [] }); await validateAuthToken("test-token"); - expect(fetchMock).toHaveBeenCalledOnce(); - const [url, init] = fetchMock.mock.calls[0] as [string, RequestInit]; - expect(url).toBe("https://scraper-api.decodo.com/v2/scrape"); - expect(JSON.parse(init.body as string)).toEqual({ - target: "universal", + expect(createDecodoClient).toHaveBeenCalledWith("test-token"); + expect(scrape).toHaveBeenCalledWith({ + target: ScrapeTarget.Universal, url: "https://does-not-exist.decodo.com", }); - expect(init.headers).toMatchObject({ - Authorization: "Basic test-token", - "x-integration": "cli", - }); + }); + + it("rejects invalid tokens", async () => { + scrape.mockRejectedValue(new AuthenticationError("Username invalid.")); + + await expect(validateAuthToken("bad-token")).rejects.toThrow( + AuthenticationError + ); + }); + + it("accepts valid tokens when the probe scrape fails with DecodoError", async () => { + scrape.mockRejectedValue(new DecodoError("Request processing failed", 422)); + + await expect(validateAuthToken("test-token")).resolves.toBeUndefined(); + }); + + it("rethrows rate limit errors", async () => { + scrape.mockRejectedValue(new RateLimitError("Rate limit exceeded")); + + await expect(validateAuthToken("test-token")).rejects.toThrow( + RateLimitError + ); }); }); diff --git a/tests/scrape/services/run-target-scrape.test.ts b/tests/scrape/services/run-target-scrape.test.ts index 178a921..a45df9f 100644 --- a/tests/scrape/services/run-target-scrape.test.ts +++ b/tests/scrape/services/run-target-scrape.test.ts @@ -189,4 +189,30 @@ describe("createTargetAction", () => { expect(exitCode).toBe(4); }); + + it("handles targets without a primary input argument", async () => { + const scrape = vi.fn().mockResolvedValue({ + results: [{ content: { ok: true } }], + }); + vi.mocked(createDecodoClient).mockReturnValue({ + webScrapingApi: { scrape }, + } as never); + + const program = new Command() + .option("--token ") + .addCommand( + new Command("universal-ecommerce").action( + createTargetAction("universal_ecommerce", BundledSchema.shared) + ) + ); + + await program.parseAsync(["universal-ecommerce", "--token", "test-token"], { + from: "user", + }); + + expect(scrape).toHaveBeenCalledWith({ + target: "universal_ecommerce", + }); + expect(stdout).toBe('{"ok":true}\n'); + }); }); diff --git a/tests/scrape/services/target-group.test.ts b/tests/scrape/services/target-group.test.ts new file mode 100644 index 0000000..d613446 --- /dev/null +++ b/tests/scrape/services/target-group.test.ts @@ -0,0 +1,27 @@ +import type { DecodoSchema } from "@decodo/sdk-ts"; +import { describe, expect, it } from "vitest"; +import { resolveTargetGroup } from "../../../src/scrape/services/target-group.js"; + +function schemaWithGroup(group: string | undefined): DecodoSchema { + return { + getTargetMeta: () => (group === undefined ? undefined : { group }), + } as unknown as DecodoSchema; +} + +describe("resolveTargetGroup", () => { + it("returns the group when it is a real value", () => { + expect( + resolveTargetGroup(schemaWithGroup("Amazon"), "amazon_product") + ).toBe("Amazon"); + }); + + it.each([ + "None", + "", + undefined, + ])("returns undefined for the %p sentinel group", (group) => { + expect( + resolveTargetGroup(schemaWithGroup(group), "youtube_video") + ).toBeUndefined(); + }); +});