Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions skills/publish-to-pages/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,22 +34,37 @@ Ask the user for a **repo name** if not provided. Default: filename without exte

## 3. Conversion

### Large File Handling

Both conversion scripts automatically detect large files and switch to **external assets mode**:
- **PPTX:** Files >20MB or with >50 images → images saved as separate files in `assets/`
- **PDF:** Files >20MB or with >50 pages → page PNGs saved in `assets/`
- Files >150MB print a warning (PPTX suggests PDF path instead)

This keeps individual files well under GitHub's 100MB limit. Small files still produce a single self-contained HTML.

You can force the behavior with `--external-assets` or `--no-external-assets`.

### HTML
No conversion needed. Use the file directly as `index.html`.

### PPTX
Run the conversion script:
```bash
python3 SKILL_DIR/scripts/convert-pptx.py INPUT_FILE /tmp/output.html
# For large files, force external assets:
python3 SKILL_DIR/scripts/convert-pptx.py INPUT_FILE /tmp/output.html --external-assets
```
If `python-pptx` is missing, tell the user: `pip install python-pptx`

### PDF
Convert with the included script (requires `poppler-utils` for `pdftoppm`):
```bash
python3 SKILL_DIR/scripts/convert-pdf.py INPUT_FILE /tmp/output.html
# For large files, force external assets:
python3 SKILL_DIR/scripts/convert-pdf.py INPUT_FILE /tmp/output.html --external-assets
```
Each page is rendered as a PNG and base64-embedded into a self-contained HTML with slide navigation.
Each page is rendered as a PNG and embedded into HTML with slide navigation.
If `pdftoppm` is missing, tell the user: `apt install poppler-utils` (or `brew install poppler` on macOS).

### Google Slides
Expand All @@ -72,7 +87,9 @@ bash SKILL_DIR/scripts/publish.sh /path/to/index.html REPO_NAME public "Descript

Pass `private` instead of `public` if the user requests it.

The script creates the repo, pushes `index.html`, and enables GitHub Pages.
The script creates the repo, pushes `index.html` (plus `assets/` if present), and enables GitHub Pages.

**Note:** When external assets mode is used, the output HTML references files in `assets/`. The publish script automatically detects and copies the `assets/` directory alongside the HTML file. Make sure the HTML file and its `assets/` directory are in the same parent directory.

## 5. Output

Expand Down
83 changes: 69 additions & 14 deletions skills/publish-to-pages/scripts/convert-pdf.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
#!/usr/bin/env python3
"""Convert a PDF to a self-contained HTML presentation.
"""Convert a PDF to an HTML presentation.

Each page is rendered as a PNG image (via pdftoppm) and base64-embedded
into a single HTML file with slide navigation (arrows, swipe, click).
Each page is rendered as a PNG image (via pdftoppm). Supports external assets
mode for large files to avoid huge single-file HTML.

Requirements: poppler-utils (pdftoppm)
Usage: python3 convert-pdf.py input.pdf [output.html]
"""

import argparse
import base64
import glob
import os
Expand All @@ -17,19 +17,50 @@
from pathlib import Path


def convert(pdf_path: str, output_path: str | None = None, dpi: int = 150):
def get_page_count(pdf_path):
"""Get page count using pdfinfo if available."""
try:
result = subprocess.run(["pdfinfo", pdf_path], capture_output=True, text=True)
for line in result.stdout.splitlines():
if line.startswith("Pages:"):
return int(line.split(":")[1].strip())
except:
pass
return None


def convert(pdf_path: str, output_path: str | None = None, dpi: int = 150, external_assets=None):
pdf_path = str(Path(pdf_path).resolve())
if not Path(pdf_path).exists():
print(f"Error: {pdf_path} not found")
sys.exit(1)

# Check for pdftoppm
if subprocess.run(["which", "pdftoppm"], capture_output=True).returncode != 0:
print("Error: pdftoppm not found. Install poppler-utils:")
print(" apt install poppler-utils # Debian/Ubuntu")
print(" brew install poppler # macOS")
sys.exit(1)

file_size_mb = os.path.getsize(pdf_path) / (1024 * 1024)

if file_size_mb > 150:
print(f"WARNING: PDF is {file_size_mb:.0f}MB — conversion may be slow and memory-intensive.")

page_count = get_page_count(pdf_path)

# Auto-detect external assets mode
if external_assets is None:
external_assets = file_size_mb > 20 or (page_count is not None and page_count > 50)
if external_assets:
print(f"Auto-enabling external assets mode (file: {file_size_mb:.1f}MB, pages: {page_count or 'unknown'})")

output = output_path or str(Path(pdf_path).with_suffix('.html'))
output_dir = Path(output).parent

if external_assets:
assets_dir = output_dir / "assets"
assets_dir.mkdir(parents=True, exist_ok=True)

with tempfile.TemporaryDirectory() as tmpdir:
prefix = os.path.join(tmpdir, "page")
result = subprocess.run(
Expand All @@ -48,15 +79,23 @@ def convert(pdf_path: str, output_path: str | None = None, dpi: int = 150):
slides_html = []
for i, page_path in enumerate(pages, 1):
with open(page_path, "rb") as f:
b64 = base64.b64encode(f.read()).decode()
page_bytes = f.read()

if external_assets:
img_name = f"img-{i:03d}.png"
(assets_dir / img_name).write_bytes(page_bytes)
src = f"assets/{img_name}"
else:
b64 = base64.b64encode(page_bytes).decode()
src = f"data:image/png;base64,{b64}"

slides_html.append(
f'<section class="slide">'
f'<div class="slide-inner">'
f'<img src="data:image/png;base64,{b64}" alt="Page {i}">'
f'<img src="{src}" alt="Page {i}">'
f'</div></section>'
)

# Try to extract title from filename
title = Path(pdf_path).stem.replace("-", " ").replace("_", " ")

html = f'''<!DOCTYPE html>
Expand Down Expand Up @@ -108,14 +147,30 @@ def convert(pdf_path: str, output_path: str | None = None, dpi: int = 150):
</script>
</body></html>'''

output = output_path or str(Path(pdf_path).with_suffix('.html'))
Path(output).write_text(html, encoding='utf-8')
output_size = os.path.getsize(output)

print(f"Converted to: {output}")
print(f"Pages: {len(slides_html)}")
print(f"Output size: {output_size / (1024*1024):.1f}MB")
print(f"External assets: {'yes' if external_assets else 'no'}")


if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: convert-pdf.py <file.pdf> [output.html]")
sys.exit(1)
convert(sys.argv[1], sys.argv[2] if len(sys.argv) > 2 else None)
parser = argparse.ArgumentParser(description="Convert PDF to HTML presentation")
parser.add_argument("input", help="Path to .pdf file")
parser.add_argument("output", nargs="?", help="Output HTML path (default: same name with .html)")
parser.add_argument("--external-assets", action="store_true", default=None,
help="Save page images as separate files in assets/ directory (auto-detected for large files)")
parser.add_argument("--no-external-assets", action="store_true",
help="Force inline base64 even for large files")
parser.add_argument("--dpi", type=int, default=150, help="Render DPI (default: 150)")
args = parser.parse_args()

ext_assets = None
if args.external_assets:
ext_assets = True
elif args.no_external_assets:
ext_assets = False
Comment on lines +160 to +174
Copy link

Copilot AI Mar 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

--external-assets and --no-external-assets can both be provided at the same time; currently the script will silently prefer --external-assets. Make these flags mutually exclusive (and produce a clear argparse error when both are set) so users can’t accidentally get the opposite behavior than intended.

Copilot uses AI. Check for mistakes.

convert(args.input, args.output, dpi=args.dpi, external_assets=ext_assets)
Loading
Loading