Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ Once installed and authenticated, try:

```bash
decodo scrape https://ip.decodo.com
decodo google-search "top articles hacker news" --limit 5 --parse
decodo google-search "top articles hacker news" --page-count 5 --parse
```

You should see markdown or parsed JSON within seconds. If you see an auth error, double-check your
Expand Down Expand Up @@ -183,13 +183,15 @@ By default, scrape commands print the first result's `content` (parsed JSON when
| Flag | Effect |
| --- | --- |
| `--full` | Print the full API response envelope |
| `--format ndjson` | One JSON object per result (pipe-friendly) |
| `--format ndjson` | One JSON object per result line on stdout (pipe-friendly) |
| `--pretty` | Indented JSON on stdout |
| `-o, --output <path>` | Write to a file instead of stdout |
| `-v, --verbose` | Print debug logs to stderr |

**TTY vs pipe:** When stdout is a terminal, human-readable output is used where possible. When piped or redirected, raw bytes or compact JSON is written. Screenshot output must go to `-o` or a redirect — writing binary PNG to a TTY is rejected.

**NDJSON line contract:** With `--format ndjson`, stdout is one JSON object per API result line. Without `--full`, each line is that result's `content`. With `--full`, each line is the full result entry (e.g. `content`, `status_code`, `url`). There is no envelope-level `.results[]` on a single line — pipe each line through `jq` individually.

```bash
# Parsed JSON from Google Search
decodo google-search "query" --parse
Expand All @@ -198,7 +200,7 @@ decodo google-search "query" --parse
decodo google-search "query" --full --pretty

# NDJSON stream for jq / agents
decodo google-search "query" --format ndjson --full | jq -c '.results[]'
decodo google-search "query" --format ndjson --full | jq -c '.url'
```

## Examples
Expand All @@ -207,10 +209,10 @@ decodo google-search "query" --format ndjson --full | jq -c '.results[]'

```bash
# Search and extract titles
decodo google-search "rust web scraping" --limit 3 --parse | jq '.[].title'
decodo google-search "rust web scraping" --page-count 3 --parse | jq '.results.results.organic[].title'

# Scrape JSON API endpoint
decodo scrape https://ip.decodo.com/json | jq '.ip'
decodo scrape https://ip.decodo.com/json | jq '.proxy.ip'

# Screenshot to file, then open
decodo screenshot https://example.com -o shot.png
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@decodo/cli",
"version": "0.1.5",
"version": "0.1.6",
"description": "Official CLI for the Decodo APIs",
"license": "MIT",
"type": "module",
Expand Down
6 changes: 6 additions & 0 deletions src/output/services/write-scrape-response.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ export function writeScrapeResponse(
}

const full = options.full === true;
if (!full && response.results.length > 1) {
console.error(
`Warning: showing 1 of ${response.results.length} results; use --format ndjson or --full`
);
}

const indent = resolvePrettyIndent(options);
const payload = extractPayload(response, full);
const text = full
Expand Down
5 changes: 3 additions & 2 deletions src/scrape/commands/codegen-target-commands.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,16 @@ import { Command } from "commander";
import { configureTargetCommand } from "../services/command-builder.js";
import { snakeToKebab } from "../services/naming.js";
import { createTargetAction } from "../services/run-target-scrape.js";
import { resolveTargetGroup } from "../services/target-group.js";

export function createCodegenTargetCommands(schema: DecodoSchema): Command[] {
const commands: Command[] = [];

for (const target of schema.listTargets()) {
const commandName = snakeToKebab(target);
const meta = schema.getTargetMeta(target);
const group = resolveTargetGroup(schema, target);
const command = new Command(commandName).description(
meta?.group ? `${meta.group} scrape target` : `${target} scrape target`
group ? `${group} scrape target` : "Scrape target"
);

configureTargetCommand(command, target, schema);
Expand Down
3 changes: 2 additions & 1 deletion src/scrape/commands/list-targets.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import type { DecodoSchema } from "@decodo/sdk-ts";
import { Command } from "commander";
import { snakeToKebab } from "../services/naming.js";
import { resolveTargetGroup } from "../services/target-group.js";

export function createListTargetsCommand(schema: DecodoSchema): Command {
return new Command("targets")
Expand All @@ -9,7 +10,7 @@ export function createListTargetsCommand(schema: DecodoSchema): Command {
const grouped = new Map<string, string[]>();

for (const target of schema.listTargets()) {
const group = schema.getTargetMeta(target)?.group ?? "Other";
const group = resolveTargetGroup(schema, target) ?? "Other";
const names = grouped.get(group) ?? [];
names.push(snakeToKebab(target));
grouped.set(group, names);
Expand Down
34 changes: 29 additions & 5 deletions src/scrape/services/auth-validation.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,35 @@
import { Target as ScrapeTarget } from "@decodo/sdk-ts";
import {
AuthenticationError,
DecodoError,
RateLimitError,
Target as ScrapeTarget,
TimeoutError,
} from "@decodo/sdk-ts";
import { createDecodoClient } from "./client.js";

const AUTH_PROBE_URL = "https://does-not-exist.decodo.com";

export async function validateAuthToken(token: string): Promise<void> {
const client = createDecodoClient(token);

await client.webScrapingApi.scrape({
target: ScrapeTarget.Universal,
url: "https://does-not-exist.decodo.com",
});
try {
await client.webScrapingApi.scrape({
target: ScrapeTarget.Universal,
url: AUTH_PROBE_URL,
});
} catch (err) {
if (
err instanceof AuthenticationError ||
err instanceof RateLimitError ||
err instanceof TimeoutError
) {
throw err;
}

if (err instanceof DecodoError) {
return;
}

throw err;
}
}
21 changes: 16 additions & 5 deletions src/scrape/services/run-target-scrape.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,22 @@ export function createTargetAction(
((input, options) =>
buildScrapeBody(target, input, options, config, schema));

return async (
input: string | undefined,
options: Record<string, unknown>,
command: Command
): Promise<void> => {
return async (...args: unknown[]): Promise<void> => {
let input: string | undefined;
let options: Record<string, unknown>;
let command: Command;

if (config.primaryField) {
[input, options, command] = args as [
string | undefined,
Record<string, unknown>,
Command,
];
} else {
[options, command] = args as [Record<string, unknown>, Command];
input = undefined;
}

const rootOpts = getRootOpts(command);
const verbose = rootOpts.verbose === true;

Expand Down
14 changes: 14 additions & 0 deletions src/scrape/services/target-group.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import type { DecodoSchema } from "@decodo/sdk-ts";

const NO_GROUP = "None";

export function resolveTargetGroup(
schema: DecodoSchema,
target: string
): string | undefined {
const group = schema.getTargetMeta(target)?.group;
if (!group || group === NO_GROUP) {
return;
}
return group;
}
23 changes: 23 additions & 0 deletions tests/output/services/write-scrape-response.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,29 @@ describe("writeScrapeResponse", () => {
}
});

it("warns on stderr when default output shows one of many results", () => {
const response = {
results: [{ content: { a: 1 } }, { content: { b: 2 } }],
} as SyncResponse;

writeScrapeResponse(response, { options: {} });

expect(written).toBe('{"a":1}\n');
expect(stderr).toContain(
"Warning: showing 1 of 2 results; use --format ndjson or --full"
);
});

it("does not warn when --full is set", () => {
const response = {
results: [{ content: { a: 1 } }, { content: { b: 2 } }],
} as SyncResponse;

writeScrapeResponse(response, { options: { full: true } });

expect(stderr).toEqual([]);
});

it("refuses TTY stdout for binary png without -o", () => {
Object.defineProperty(process.stdout, "isTTY", {
value: true,
Expand Down
11 changes: 11 additions & 0 deletions tests/scrape/commands/codegen-target-commands.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,15 @@ describe("createCodegenTargetCommands", () => {
true
);
});

it("uses a generic description for targets without a real group", () => {
const schema = BundledSchema.shared;
const commands = createCodegenTargetCommands(schema);

const ungrouped = commands.find((cmd) => cmd.name() === "youtube-video");
expect(ungrouped?.description()).toBe("Scrape target");

const grouped = commands.find((cmd) => cmd.name() === "amazon-product");
expect(grouped?.description()).toBe("Amazon scrape target");
});
});
63 changes: 45 additions & 18 deletions tests/scrape/services/auth-validation.test.ts
Original file line number Diff line number Diff line change
@@ -1,35 +1,62 @@
import {
AuthenticationError,
DecodoError,
RateLimitError,
Target as ScrapeTarget,
} from "@decodo/sdk-ts";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import { validateAuthToken } from "../../../src/scrape/services/auth-validation.js";
import { createDecodoClient } from "../../../src/scrape/services/client.js";

vi.mock("../../../src/scrape/services/client.js", () => ({
createDecodoClient: vi.fn(),
}));

describe("validateAuthToken", () => {
const scrape = vi.fn();

beforeEach(() => {
vi.stubGlobal("fetch", vi.fn());
scrape.mockReset();
vi.mocked(createDecodoClient).mockReturnValue({
webScrapingApi: { scrape },
} as never);
});

afterEach(() => {
vi.unstubAllGlobals();
vi.clearAllMocks();
});

it("calls universal scrape against ip.decodo.com", async () => {
const fetchMock = vi.mocked(fetch);
fetchMock.mockResolvedValue({
ok: true,
status: 200,
json: async () => ({ results: [] }),
} as Response);
it("probes auth with the stats-invisible URL", async () => {
scrape.mockResolvedValue({ results: [] });

await validateAuthToken("test-token");

expect(fetchMock).toHaveBeenCalledOnce();
const [url, init] = fetchMock.mock.calls[0] as [string, RequestInit];
expect(url).toBe("https://scraper-api.decodo.com/v2/scrape");
expect(JSON.parse(init.body as string)).toEqual({
target: "universal",
expect(createDecodoClient).toHaveBeenCalledWith("test-token");
expect(scrape).toHaveBeenCalledWith({
target: ScrapeTarget.Universal,
url: "https://does-not-exist.decodo.com",
});
expect(init.headers).toMatchObject({
Authorization: "Basic test-token",
"x-integration": "cli",
});
});

it("rejects invalid tokens", async () => {
scrape.mockRejectedValue(new AuthenticationError("Username invalid."));

await expect(validateAuthToken("bad-token")).rejects.toThrow(
AuthenticationError
);
});

it("accepts valid tokens when the probe scrape fails with DecodoError", async () => {
scrape.mockRejectedValue(new DecodoError("Request processing failed", 422));

await expect(validateAuthToken("test-token")).resolves.toBeUndefined();
});

it("rethrows rate limit errors", async () => {
scrape.mockRejectedValue(new RateLimitError("Rate limit exceeded"));

await expect(validateAuthToken("test-token")).rejects.toThrow(
RateLimitError
);
});
});
26 changes: 26 additions & 0 deletions tests/scrape/services/run-target-scrape.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -189,4 +189,30 @@ describe("createTargetAction", () => {

expect(exitCode).toBe(4);
});

it("handles targets without a primary input argument", async () => {
const scrape = vi.fn().mockResolvedValue({
results: [{ content: { ok: true } }],
});
vi.mocked(createDecodoClient).mockReturnValue({
webScrapingApi: { scrape },
} as never);

const program = new Command()
.option("--token <token>")
.addCommand(
new Command("universal-ecommerce").action(
createTargetAction("universal_ecommerce", BundledSchema.shared)
)
);

await program.parseAsync(["universal-ecommerce", "--token", "test-token"], {
from: "user",
});

expect(scrape).toHaveBeenCalledWith({
target: "universal_ecommerce",
});
expect(stdout).toBe('{"ok":true}\n');
});
});
27 changes: 27 additions & 0 deletions tests/scrape/services/target-group.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import type { DecodoSchema } from "@decodo/sdk-ts";
import { describe, expect, it } from "vitest";
import { resolveTargetGroup } from "../../../src/scrape/services/target-group.js";

function schemaWithGroup(group: string | undefined): DecodoSchema {
return {
getTargetMeta: () => (group === undefined ? undefined : { group }),
} as unknown as DecodoSchema;
}

describe("resolveTargetGroup", () => {
it("returns the group when it is a real value", () => {
expect(
resolveTargetGroup(schemaWithGroup("Amazon"), "amazon_product")
).toBe("Amazon");
});

it.each([
"None",
"",
undefined,
])("returns undefined for the %p sentinel group", (group) => {
expect(
resolveTargetGroup(schemaWithGroup(group), "youtube_video")
).toBeUndefined();
});
});
Loading