Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions benchmarks/pandas/bench_read_html.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
"""
Benchmark: pd.read_html — parse HTML tables into DataFrames.
Outputs JSON: {"function": "read_html", "mean_ms": ..., "iterations": ..., "total_ms": ...}
"""
import json
import time
import math

try:
import pandas as pd
except ImportError:
import subprocess, sys
subprocess.check_call([sys.executable, "-m", "pip", "install", "pandas", "--quiet"])
import pandas as pd

try:
import lxml # noqa: F401
except ImportError:
import subprocess, sys
subprocess.check_call([sys.executable, "-m", "pip", "install", "lxml", "--quiet"])

ROWS = 1_000
WARMUP = 3
ITERATIONS = 20


def build_html(rows: int) -> str:
header = "<tr><th>id</th><th>name</th><th>value</th><th>score</th></tr>"
body_rows = [
f"<tr><td>{i}</td><td>item_{i % 100}</td><td>{i * 1.5:.2f}</td><td>{math.sin(i * 0.01):.6f}</td></tr>"
for i in range(rows)
]
return f"<table><thead>{header}</thead><tbody>{''.join(body_rows)}</tbody></table>"


html = build_html(ROWS)

# Warm-up
for _ in range(WARMUP):
pd.read_html(html)

start = time.perf_counter()
for _ in range(ITERATIONS):
pd.read_html(html)
total_ms = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "read_html",
"mean_ms": total_ms / ITERATIONS,
"iterations": ITERATIONS,
"total_ms": total_ms,
}))
43 changes: 43 additions & 0 deletions benchmarks/tsb/bench_read_html.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/**
* Benchmark: readHtml — parse HTML tables into DataFrames.
* Outputs JSON: {"function": "read_html", "mean_ms": ..., "iterations": ..., "total_ms": ...}
*/
import { readHtml } from "../../src/index.js";

const ROWS = 1_000;
const WARMUP = 3;
const ITERATIONS = 20;

// Build a realistic HTML string with a 1000-row table.
function buildHtml(rows: number): string {
const header = "<tr><th>id</th><th>name</th><th>value</th><th>score</th></tr>";
const bodyRows: string[] = [];
for (let i = 0; i < rows; i++) {
bodyRows.push(
`<tr><td>${i}</td><td>item_${i % 100}</td><td>${(i * 1.5).toFixed(2)}</td><td>${Math.sin(i * 0.01).toFixed(6)}</td></tr>`,
);
}
return `<table><thead>${header}</thead><tbody>${bodyRows.join("")}</tbody></table>`;
}

const html = buildHtml(ROWS);

// Warm-up
for (let i = 0; i < WARMUP; i++) {
readHtml(html);
}

const start = performance.now();
for (let i = 0; i < ITERATIONS; i++) {
readHtml(html);
}
const total = performance.now() - start;

console.log(
JSON.stringify({
function: "read_html",
mean_ms: total / ITERATIONS,
iterations: ITERATIONS,
total_ms: total,
}),
);
Loading