diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 38b8c2837..b916cad82 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -146,6 +146,33 @@ jobs: - run: bun install --frozen-lockfile - run: bun run --filter @hyperframes/core test:hyperframe-runtime-ci + native-renderer: + name: "Test: native renderer" + needs: changes + if: needs.changes.outputs.code == 'true' + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - uses: actions/checkout@v4 + with: + lfs: true + - uses: oven-sh/setup-bun@v2 + - uses: actions/setup-node@v4 + with: + node-version: 22 + - uses: dtolnay/rust-toolchain@stable + - name: Install FFmpeg + run: sudo apt-get update && sudo apt-get install -y ffmpeg + - run: bun install --frozen-lockfile + - run: cargo test --manifest-path packages/native-renderer/Cargo.toml -- --test-threads=1 + - run: bun test packages/cli/src/utils/nativeBackend.test.ts packages/native-renderer/src/scene/extract.test.ts packages/native-renderer/src/scene/support.test.ts packages/native-renderer/src/timeline/bake.test.ts + - name: Native renderer comparison shard + run: | + bun packages/native-renderer/scripts/compare-regression-fixtures.ts \ + --fixtures gsap-letters-render-compat \ + --max-duration 0.25 \ + --artifacts /tmp/native-renderer-comparison-ci + smoke-global-install: name: "Smoke: global install" needs: [changes, build] diff --git a/.github/workflows/native-renderer.yml b/.github/workflows/native-renderer.yml new file mode 100644 index 000000000..7204c621c --- /dev/null +++ b/.github/workflows/native-renderer.yml @@ -0,0 +1,65 @@ +name: Native Renderer + +on: + pull_request: + types: [opened, synchronize, reopened] + paths: + - "packages/native-renderer/**" + - ".github/workflows/native-renderer.yml" + push: + branches: [main] + paths: + - "packages/native-renderer/**" + +concurrency: + group: native-renderer-${{ github.ref }} + cancel-in-progress: true + +jobs: + test: + name: Tests (Linux x86_64, CPU raster) + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - uses: actions/checkout@v4 + + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + + - name: Cache Cargo + Skia + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + packages/native-renderer/target + key: native-${{ runner.os }}-${{ hashFiles('packages/native-renderer/Cargo.lock') }} + restore-keys: native-${{ runner.os }}- + + - name: Install system deps + run: | + sudo apt-get update + sudo apt-get install -y --no-install-recommends \ + clang libclang-dev pkg-config \ + libfontconfig1-dev libfreetype6-dev \ + ninja-build python3 \ + ffmpeg fonts-liberation fonts-dejavu-core fontconfig + sudo fc-cache -fv + + - name: Build (CPU raster, no GPU) + working-directory: packages/native-renderer + run: cargo build --release --no-default-features --tests + + - name: Run tests + working-directory: packages/native-renderer + run: cargo test --release --no-default-features -- --test-threads=1 + + - name: Benchmark + working-directory: packages/native-renderer + run: | + cargo bench --no-default-features 2>&1 | tee /tmp/bench.txt + echo "## Native Renderer Benchmark" >> $GITHUB_STEP_SUMMARY + echo "Linux x86_64, CPU raster, no GPU" >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + grep -E 'time:|^[a-z].*time:' /tmp/bench.txt >> $GITHUB_STEP_SUMMARY || echo "No benchmark output" >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY diff --git a/docs/native-renderer-roadmap.md b/docs/native-renderer-roadmap.md new file mode 100644 index 000000000..07cd02fe8 --- /dev/null +++ b/docs/native-renderer-roadmap.md @@ -0,0 +1,395 @@ +# HyperFrames Native Renderer — Roadmap to Renderer Supremacy + +--- + +## The Problem + +Every video rendering framework today — HyperFrames included, Remotion included — renders video the same way: open a headless Chrome browser, seek to each frame, take a screenshot via the Chrome DevTools Protocol, serialize it as base64, transfer it over a WebSocket, decode it, and pipe it to FFmpeg. This costs **14-40 milliseconds per frame**. For a 30-second video at 30fps, that's 12-36 seconds just on screenshots — and no amount of configuration tuning can make it faster. The CDP serialization round-trip is a physics wall. + +## What We're Building + +A native video renderer in Rust that uses **Skia** — the same 2D graphics library that Chrome itself uses to paint every pixel you see — to render composition frames directly, bypassing Chrome entirely. Chrome is used once (~200ms) to extract the layout tree, then Skia paints all subsequent frames at **1.8ms per frame on CPU** and **0.22ms per frame on GPU**. That's 8-180x faster than the Chrome screenshot path. + +## Why Rust + +- **Performance**: Rust compiles to native machine code with zero garbage collection pauses. Frame rendering is a tight loop where every millisecond matters — GC pauses from Node.js or Go would show up as frame drops. +- **Skia bindings**: The `skia-safe` crate provides production-grade Rust bindings to Google's Skia library (the same C++ engine inside Chrome, Android, and Flutter). We get Chrome-identical rendering quality from Chrome's own engine. +- **Memory safety**: Video rendering processes gigabytes of pixel data. Rust's ownership model prevents buffer overflows, use-after-free, and data races that would be silent bugs in C/C++. +- **Cross-platform**: One codebase compiles to macOS (Metal GPU), Linux (Vulkan GPU or CPU), and can target Windows. No JVM, no runtime, no container dependency beyond FFmpeg. +- **Ecosystem**: H.264 encoding (openh264, BSD-licensed), MP4 muxing (minimp4), SIMD color conversion (dcv-color-primitives) — all available as Rust crates with permissive licenses. + +## What This Solves + +1. **Rendering speed**: 8-22x faster on CPU, 64-182x faster on GPU. A 30-second video that takes 40 seconds with Chrome renders in 2-5 seconds natively. +2. **Infrastructure cost**: Each Chrome instance uses ~256MB RAM. The native renderer uses ~50MB. On cloud GPU instances, one machine renders 10-50x more videos per hour. +3. **Competitive moat**: Remotion cannot copy this (explained below). Our composition format enables native rendering; theirs doesn't. +4. **Path to real-time**: At 0.22ms/frame on GPU, we can preview compositions at 60fps+ in a native desktop app without a browser. + +## Definition of Done + +The native renderer is production-ready when: + +1. **`--backend auto` produces correct output** for all compositions. Unsupported compositions fall back to Chrome automatically. No silent quality loss. +2. **80%+ of regression test fixtures render natively** with PSNR > 30dB against Chrome output (imperceptible visual difference). +3. **Render speed is 10x+ faster** than Chrome CDP on Linux CPU for supported compositions, measured end-to-end (paint + encode + mux). +4. **Every claim is benchmarked**: speed numbers come from CI, not local machines. PSNR scores come from the regression harness, not eyeballing. +5. **Zero regressions** in the existing Chrome pipeline. The native renderer is additive — it doesn't modify any existing code path. + +## Current Status + +**Phase 1-3 complete.** 51 Rust tests + 31 TypeScript tests, all passing on macOS and Linux CI. The paint layer is proven fast. The encoding layer and visual fidelity gap are the remaining work (Phases 4-7, ~8-12 weeks). + +--- + +## 1. Why This Is an Unchallengeable Moat + +### Remotion Cannot Follow + +Remotion's API contract is **React components rendered in Chrome**. Every Remotion composition is a React component that uses hooks (`useCurrentFrame`, `useVideoConfig`), renders JSX to a real DOM, and relies on Chrome to paint it. This means: + +- Remotion MUST run Chrome on every frame (React needs a DOM) +- Remotion CANNOT switch to Skia (every user component would break) +- Remotion CANNOT pre-bake timelines (React components can `useEffect`, `fetch`, `useState`) + +HyperFrames compositions are **declarative HTML + GSAP timelines**. The HTML defines a static scene graph. GSAP defines a deterministic function: `time → property values`. Neither requires a browser to evaluate. This means: + +- We can extract the scene graph once and replay it natively +- We can evaluate GSAP in a lightweight V8 isolate (no DOM needed) +- We can paint with Skia (Chrome's own engine) at GPU speed + +The **authoring format** is the moat. HTML+GSAP is thin enough to map to native rendering. React is too thick. + +### The Adapter Architecture + +Because our composition format is a thin declarative layer, ANY rendering backend can consume it: + +| Adapter | Use Case | Status | +|---|---|---| +| Chrome CDP (BeginFrame) | Maximum compatibility, Linux headless | Production | +| Chrome CDP (Screenshot) | Cross-platform, macOS/Windows | Production | +| WebCodecs (browser) | In-browser export, no server | PR #239 | +| **Skia/Rust (native)** | **Maximum speed, production rendering** | **This roadmap** | +| Skia GPU (Metal) | Local dev on Mac | Working (0.22ms/frame) | +| Skia GPU (Vulkan+NVENC) | Cloud GPU instances | Stub ready | + +Remotion is locked to one adapter (Chrome). We can have six, each optimized for a different context, all rendering the same compositions identically. + +--- + +## 2. Current Performance Numbers + +### Measured on GitHub Actions (Linux x86_64, CPU only, no GPU) + +| Benchmark | 30 frames at 1080p | Per frame | vs Chrome CDP | +|---|---|---|---| +| Skia CPU paint | 54ms | **1.8ms** | **7.8-22x faster** | +| Paint + BGRA readback | 55ms | 1.83ms | 7.6-22x faster | +| Paint + I420 convert | 95ms | 3.17ms | 4.4-12.6x faster | +| Raw render + FFmpeg batch | 241ms | 8.03ms | 1.7-5x faster | +| Native openh264 in-process | 478ms | 15.9ms | 0.9-2.5x faster | +| FFmpeg JPEG pipe (baseline) | 1,128ms | 37.6ms | ~1x (same as Chrome) | + +### Measured on macOS Apple Silicon (Metal GPU) + +| Benchmark | Per frame | vs Chrome CDP | +|---|---|---| +| GPU paint (Metal) | **0.22ms** | **64-182x faster** | +| GPU + BGRA readback | 1.17ms | 12-34x faster | +| E2E with VideoToolbox | 11.2ms | 1.3-3.6x faster | + +### Key Insight + +The **paint** is 8-180x faster than Chrome CDP. The **encoding** is the remaining bottleneck. The roadmap addresses both. + +--- + +## 3. Architecture + +### Current Pipeline (Chrome CDP) + +``` +HTML composition + → Producer compiles (resolve sub-compositions, inline scripts) + → Chrome loads compiled HTML + → Per frame: seek(t) → Chrome paint → CDP screenshot → base64 → Node.js → FFmpeg + → FFmpeg encodes H.264 → mux with audio → MP4 + + Cost: 14-40ms per frame (CDP screenshot is the bottleneck) +``` + +### Native Pipeline (Skia/Rust) + +``` +HTML composition + → Producer compiles (same as Chrome path — reused) + → Chrome loads compiled HTML (one-shot, ~200ms) + → Extract scene graph (element positions, sizes, styles → JSON) + → Bake timeline (GSAP seek at every frame → property values → JSON) + → Chrome closes (never used again) + → Rust binary: Skia paints each frame from scene + timeline + → openh264 encodes H.264 → minimp4 muxes → MP4 + + Cost: 1.8-3.2ms per frame (Skia paint + color convert) +``` + +### Hybrid Pipeline (Phase 4 target) + +``` +HTML composition + → Producer compiles + → Support detector classifies each element: + Supported → Skia native paint + Unsupported → Chrome CDP screenshot (per-element or full-frame) + → Composite: native layers + Chrome layers → final frame + → Encode → MP4 + + Cost: 2-10ms per frame depending on native coverage +``` + +--- + +## 4. What's Built (Phases 1-3) + +### Rust Crate: `packages/native-renderer/` + +| Component | Files | Tests | Status | +|---|---|---|---| +| Scene graph types + JSON parser | `scene/mod.rs`, `scene/parse.rs` | 7 | Done | +| Skia raster surface + encoding | `paint/canvas.rs` | 4 | Done | +| Element painter (Tier 1+2 CSS) | `paint/elements.rs` | 4 | Done | +| Visual effects (shadow, blur, gradient) | `paint/effects.rs` | 8 | Done | +| Image compositing (JPEG/PNG/WebP) | `paint/images.rs` | 6 | Done | +| Animated pipeline (baked timeline) | `pipeline.rs` | 3 | Done | +| Raw render + deferred encode | `pipeline.rs` | — | Done | +| Native H.264 + MP4 (openh264+minimp4) | `native_encode.rs` | — | Done | +| Hardware encoder detection | `encode.rs` | 12 | Done | +| Metal GPU surface | `paint/canvas.rs` | — | Done (macOS) | +| Vulkan GPU surface | `paint/canvas.rs` | — | Stub (needs GPU) | +| CLI binary | `bin/render_native.rs` | — | Done | +| Criterion benchmarks | `benches/render_bench.rs` | — | Done | + +### TypeScript Bridge + +| Component | Files | Tests | Status | +|---|---|---|---| +| CDP scene extraction | `scene/extract.ts` | 5 | Done | +| Timeline baking | `timeline/bake.ts` | 5 | Done | +| Support detection | `scene/support.ts` | 21 | Done | + +### Integration + +| Component | Status | +|---|---| +| `hyperframes render --backend native` CLI flag | Done | +| Support detector → auto fallback to Chrome | Done | +| GitHub Actions CI (Linux, CPU raster) | Done | +| Docker test image | Done | +| Cross-platform feature flags (Metal/Vulkan/CPU) | Done | + +### Total: 51 Rust tests + 31 TypeScript tests, all passing on macOS and Linux CI. + +--- + +## 5. Visual Fidelity Gap + +### What the Native Renderer Paints Correctly Today + +| CSS Feature | Skia Equivalent | Fidelity | +|---|---|---| +| `background-color` (solid) | `Canvas::draw_rect` | Pixel-perfect | +| `border-radius` | `Canvas::draw_rrect` / `clip_rrect` | Pixel-perfect | +| `overflow: hidden` | `Canvas::clip_rect/rrect` | Pixel-perfect | +| `transform` (translate/rotate/scale) | `Canvas::concat` matrix | Pixel-perfect | +| `opacity` | `save_layer_alpha` | Pixel-perfect | +| `visibility/display` | Skip element | Pixel-perfect | +| `box-shadow` (single) | `MaskFilter::blur` + offset | Close (~28-32dB PSNR) | +| `filter: blur()` | `ImageFilter::blur` | Close | +| `filter: brightness/contrast/saturate` | `ColorFilter::matrix` | Close | +| `background: linear-gradient()` | `gradient_shader::linear` | Close | +| `background: radial-gradient()` | `gradient_shader::radial` | Close | +| `clip-path: circle/ellipse` | `Canvas::clip_path` | Close | +| `mix-blend-mode` | `Paint::set_blend_mode` | Pixel-perfect | +| Images (JPEG/PNG/WebP) | `Canvas::draw_image_rect` | Pixel-perfect | + +### What's NOT Faithful Yet (the Gap) + +| Feature | Why It's Hard | Impact on Apple Presentation | +|---|---|---| +| **Text rendering** | Chrome uses platform-specific font rasterizers (Core Text on Mac, FreeType+HarfBuzz on Linux) with sub-pixel AA. Skia uses its own HarfBuzz path which produces slightly different glyph positions and anti-aliasing. Custom web fonts (Google Fonts) need explicit loading. | High — every slide has text | +| **CSS layout** | Chrome computes flex, grid, absolute/relative positioning. We extract computed positions from Chrome, but if animation changes layout (e.g., text reflow), the extracted positions are stale. | Medium — most animations are transform/opacity only | +| **Video compositing** | Chrome decodes and renders `