Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ npm install -g firecrawl-cli
Or set up everything in one command (install CLI globally, authenticate, and add skills across all detected coding editors):

```bash
npx -y firecrawl-cli@1.19.6 init -y --browser
npx -y firecrawl-cli@1.19.7 init -y --browser
```

- `-y` runs setup non-interactively
Expand Down Expand Up @@ -218,6 +218,9 @@ firecrawl https://spa-app.com --wait-for 3000
# Get all links from a page
firecrawl https://example.com --format links

# Discover videos on a page (prints video URLs)
firecrawl https://example.com/product --format video

# Screenshot + markdown
firecrawl https://example.com --format markdown --screenshot

Expand Down Expand Up @@ -675,7 +678,7 @@ firecrawl --status
```

```
🔥 firecrawl cli v1.19.6
🔥 firecrawl cli v1.19.7

● Authenticated via stored credentials
Concurrency: 0/100 jobs (parallel scrape limit)
Expand Down Expand Up @@ -704,15 +707,19 @@ firecrawl https://example.com --format links --pretty

### Format Behavior

- **Single format**: Outputs raw content (markdown text, HTML, etc.)
- **Single format**: Outputs raw content (markdown text, HTML, links, image URLs, video URLs, etc.)
- **Multiple formats**: Outputs JSON with all requested data
- **Video metadata**: Use `--format video --json` to include thumbnails, descriptions, and other `videos` metadata

```bash
# Raw markdown output
firecrawl https://example.com --format markdown

# JSON output with multiple formats
firecrawl https://example.com --format markdown,links,images

# Full video metadata
firecrawl https://example.com/product --format video --json
```

---
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "firecrawl-cli",
"version": "1.19.6",
"version": "1.19.7",
"description": "Command-line interface for Firecrawl. Scrape, crawl, and extract data from any website directly from your terminal.",
"main": "dist/index.js",
"bin": {
Expand Down
83 changes: 76 additions & 7 deletions src/__tests__/commands/scrape.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@
* Tests for scrape command
*/

import fs from 'fs';
import os from 'os';
import path from 'path';
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
import { executeScrape } from '../../commands/scrape';
import { executeScrape, handleAllScrapeCommand } from '../../commands/scrape';
import { getClient } from '../../utils/client';
import { initializeConfig } from '../../utils/config';
import { setupTest, teardownTest } from '../utils/mock-client';
Expand Down Expand Up @@ -447,12 +450,9 @@ describe('executeScrape', () => {

describe('Type safety', () => {
it('should accept valid ScrapeFormat types', async () => {
const formatList: Array<'markdown' | 'html' | 'rawHtml' | 'links'> = [
'markdown',
'html',
'rawHtml',
'links',
];
const formatList: Array<
'markdown' | 'html' | 'rawHtml' | 'links' | 'video'
> = ['markdown', 'html', 'rawHtml', 'links', 'video'];

for (const format of formatList) {
mockClient.scrape.mockResolvedValue({ [format]: 'test' });
Expand Down Expand Up @@ -483,4 +483,73 @@ describe('executeScrape', () => {
});
});
});

describe('download output', () => {
it('should save video URLs and metadata for video format downloads', async () => {
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'firecrawl-cli-'));
const previousCwd = process.cwd();
const stderrSpy = vi
.spyOn(process.stderr, 'write')
.mockImplementation(() => true);

const videos = [
{
url: 'https://cdn.example.com/video-a.mp4',
sourceURL: 'https://example.com/product',
source: 'script',
kind: 'file',
provider: 'cdn.example.com',
thumbnail: 'https://cdn.example.com/thumb-a.jpg',
},
{
url: 'https://cdn.example.com/video-b.mp4',
sourceURL: 'https://example.com/product',
source: 'script',
kind: 'file',
provider: 'cdn.example.com',
},
];

mockClient.map = vi.fn().mockResolvedValue({
links: [{ url: 'https://example.com/product' }],
});
mockClient.scrape.mockResolvedValue({ videos });

try {
process.chdir(tmpDir);
initializeConfig({ apiUrl: 'http://localhost:3002' });

await handleAllScrapeCommand(
'https://example.com/product',
{
url: 'https://example.com/product',
apiUrl: 'http://localhost:3002',
formats: ['video'],
},
{ yes: true, limit: 1 }
);

const outputDir = path.join(
tmpDir,
'.firecrawl',
'example.com',
'product'
);
const videosTxt = path.join(outputDir, 'videos.txt');
const videosJson = path.join(outputDir, 'videos.json');

expect(fs.readFileSync(videosTxt, 'utf-8')).toBe(
'https://cdn.example.com/video-a.mp4\nhttps://cdn.example.com/video-b.mp4'
);
expect(JSON.parse(fs.readFileSync(videosJson, 'utf-8'))).toEqual(
videos
);
expect(fs.existsSync(path.join(outputDir, 'index.json'))).toBe(false);
} finally {
process.chdir(previousCwd);
stderrSpy.mockRestore();
fs.rmSync(tmpDir, { recursive: true, force: true });
}
});
});
});
9 changes: 3 additions & 6 deletions src/__tests__/commands/search.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -703,12 +703,9 @@ describe('executeSearch', () => {
});

it('should accept valid scrape format types', async () => {
const formatList: Array<'markdown' | 'html' | 'rawHtml' | 'links'> = [
'markdown',
'html',
'rawHtml',
'links',
];
const formatList: Array<
'markdown' | 'html' | 'rawHtml' | 'links' | 'video'
> = ['markdown', 'html', 'rawHtml', 'links', 'video'];

for (const format of formatList) {
mockHttpPost.mockResolvedValue(
Expand Down
9 changes: 8 additions & 1 deletion src/__tests__/utils/options.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ describe('Option Parsing Utilities', () => {
it('should parse single attributes format', () => {
expect(parseFormats('attributes')).toEqual(['attributes']);
});

it('should parse single video format', () => {
expect(parseFormats('video')).toEqual(['video']);
});
});

describe('Multiple format parsing', () => {
Expand All @@ -67,12 +71,15 @@ describe('Option Parsing Utilities', () => {
});

it('should handle all common formats together', () => {
expect(parseFormats('markdown,html,links,images,screenshot')).toEqual([
expect(
parseFormats('markdown,html,links,images,screenshot,video')
).toEqual([
'markdown',
'html',
'links',
'images',
'screenshot',
'video',
]);
});
});
Expand Down
82 changes: 82 additions & 0 deletions src/__tests__/utils/output.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,53 @@ describe('Output Utilities', () => {
);
});

it('should output newline-separated video URLs for single video format', () => {
vi.mocked(fs.existsSync).mockReturnValue(true);

handleScrapeOutput(
{
success: true,
data: {
videos: [
{
url: 'https://cdn.example.com/product.mp4',
sourceURL: 'https://example.com/product',
source: 'script',
},
{
url: 'https://cdn.example.com/demo.mp4',
sourceURL: 'https://example.com/product',
source: 'html',
},
],
},
},
['video']
);

expect(stdoutWriteSpy).toHaveBeenCalledWith(
'https://cdn.example.com/product.mp4\nhttps://cdn.example.com/demo.mp4\n'
);
});

it('should output legacy video URL for single video format', () => {
vi.mocked(fs.existsSync).mockReturnValue(true);

handleScrapeOutput(
{
success: true,
data: {
video: 'https://storage.example.com/video.mp4',
},
},
['video']
);

expect(stdoutWriteSpy).toHaveBeenCalledWith(
'https://storage.example.com/video.mp4\n'
);
});

it('should output summary for single summary format', () => {
vi.mocked(fs.existsSync).mockReturnValue(true);

Expand Down Expand Up @@ -263,6 +310,41 @@ describe('Output Utilities', () => {
expect(parsed.links).toEqual(['https://example.com']);
});

it('should include videos in JSON output for multiple formats', () => {
vi.mocked(fs.existsSync).mockReturnValue(true);

handleScrapeOutput(
{
success: true,
data: {
markdown: '# Test',
videos: [
{
url: 'https://cdn.example.com/product.mp4',
sourceURL: 'https://example.com/product',
source: 'script',
thumbnail: 'https://cdn.example.com/poster.jpg',
},
],
metadata: { title: 'Test' },
},
},
['markdown', 'video']
);

const output = stdoutWriteSpy.mock.calls[0][0];
const parsed = JSON.parse(output);
expect(parsed.markdown).toBe('# Test');
expect(parsed.videos).toEqual([
{
url: 'https://cdn.example.com/product.mp4',
sourceURL: 'https://example.com/product',
source: 'script',
thumbnail: 'https://cdn.example.com/poster.jpg',
},
]);
});

it('should output pretty JSON when pretty flag is true', () => {
vi.mocked(fs.existsSync).mockReturnValue(true);

Expand Down
35 changes: 35 additions & 0 deletions src/commands/scrape.ts
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,24 @@ function urlToNestedPath(url: string, filename: string = 'index.md'): string {
}
}

function getVideoUrls(data?: ScrapeResult['data']): string[] {
if (!data) return [];

if (Array.isArray(data.videos)) {
return data.videos
.map((video) => video?.url)
.filter(
(url): url is string => typeof url === 'string' && url.length > 0
);
}

if (typeof data.video === 'string' && data.video.length > 0) {
return [data.video];
}

return [];
}

/**
* Map an entire site and scrape all discovered URLs.
* Organizes results into nested directories based on URL paths.
Expand Down Expand Up @@ -383,6 +401,7 @@ async function runWizard(
{ name: 'html', value: 'html' },
{ name: 'links', value: 'links' },
{ name: 'images', value: 'images' },
{ name: 'video', value: 'video' },
{ name: 'summary', value: 'summary' },
{ name: 'screenshot', value: 'screenshot' },
{ name: 'full page screenshot', value: 'fullPageScreenshot' },
Expand Down Expand Up @@ -656,6 +675,22 @@ export async function handleAllScrapeCommand(
fs.writeFileSync(filepath, result.data.images.join('\n'), 'utf-8');
savedFiles.push(filepath);
}
} else if (fmt === 'video') {
const videoUrls = getVideoUrls(result.data);
if (videoUrls.length > 0) {
const filepath = path.join(dir, 'videos.txt');
fs.writeFileSync(filepath, videoUrls.join('\n'), 'utf-8');
savedFiles.push(filepath);
}
if (Array.isArray(result.data?.videos)) {
const filepath = path.join(dir, 'videos.json');
fs.writeFileSync(
filepath,
JSON.stringify(result.data.videos, null, 2),
'utf-8'
);
savedFiles.push(filepath);
}
} else if (fmt === 'summary') {
if (result.data?.summary) {
const filepath = path.join(dir, 'summary.md');
Expand Down
12 changes: 9 additions & 3 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ function createScrapeCommand(): Command {
.option('-H, --html', 'Output raw HTML (shortcut for --format html)')
.option(
'-f, --format <formats>',
'Output format(s). Multiple formats can be specified with commas (e.g., "markdown,links,images"). Available: markdown, html, rawHtml, links, images, screenshot, summary, changeTracking, json, attributes, branding. Single format outputs raw content; multiple formats output JSON.'
'Output format(s). Multiple formats can be specified with commas (e.g., "markdown,links,images"). Available: markdown, html, rawHtml, links, images, screenshot, summary, changeTracking, json, attributes, branding, video. Single format outputs raw content; multiple formats output JSON.'
)
.option('--only-main-content', 'Include only main content', false)
.option(
Expand Down Expand Up @@ -501,7 +501,7 @@ function createDownloadCommand(): Command {
.option('--allow-subdomains', 'Include subdomains', false)
.option(
'-f, --format <formats>',
'Output format(s), comma-separated (default: markdown). Available: markdown, html, rawHtml, links, images, summary, json'
'Output format(s), comma-separated (default: markdown). Available: markdown, html, rawHtml, links, images, summary, json, video'
)
Comment on lines 502 to 505
.option('-H, --html', 'Download as HTML (shortcut for --format html)')
.option(
Expand Down Expand Up @@ -847,6 +847,12 @@ Max upload size: 50 MB
url: 'file://' + file,
format: format ?? 'markdown',
});
if (scrapeOptions.formats?.includes('video')) {
console.error(
'Error: The video format is not supported for parse. Use scrape with --format video for webpage video discovery.'
);
process.exit(1);
}

await handleParseCommand({
file,
Expand Down Expand Up @@ -913,7 +919,7 @@ function createSearchCommand(): Command {
.option('--scrape', 'Enable scraping of search results', false)
.option(
'--scrape-formats <formats>',
'Comma-separated scrape formats when --scrape is enabled: markdown, html, rawHtml, links, etc. (default: markdown)'
'Comma-separated scrape formats when --scrape is enabled: markdown, html, rawHtml, links, video, etc. (default: markdown)'
)
.option(
'--only-main-content',
Expand Down
Loading
Loading