Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 75 additions & 0 deletions .github/workflows/metal_workflow.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
name: Metal MLX Job
on:
workflow_dispatch:
inputs:
run_id:
description: 'Unique identifier for this run'
required: true
type: string
payload:
description: 'Content of the user submission, as json string'
required: true
type: string
runner:
description: 'Metal runner to run workflow on'
required: true
default: "arc-metal-runner-set"
type: string
requirements:
description: 'Contents for a requirements.txt file'
required: false
type: string

run-name: 'Metal Job - ${{ github.event.inputs.run_id }}'

jobs:
run:
runs-on: ${{ github.event.inputs.runner }}
timeout-minutes: 20
steps:
- uses: actions/checkout@v3

- name: Create input files
shell: bash
run: |
# Extract the payload content without printing it
PAYLOAD=$(jq -r '.inputs.payload' $GITHUB_EVENT_PATH)

# Apply mask to the extracted content
echo "::add-mask::$PAYLOAD"

# Now write to file (won't be logged since it's masked)
echo "$PAYLOAD" > payload.json

- name: Setup Virtual Environment and Install Dependencies
shell: bash
run: |
pip install --break-system-packages --upgrade pip
pip install --break-system-packages -e .

- name: Install requirements
if: ${{ github.event.inputs.requirements != '' }}
shell: bash
run: |
echo "${{ github.event.inputs.requirements }}" > requirements.txt
pip install --break-system-packages -r requirements.txt

- name: Run script
shell: bash
run: |
python3 src/runners/github-runner.py

- name: Upload training artifacts
uses: actions/upload-artifact@v4
if: always()
with:
name: run-result
path: result.json

- name: Upload profiling artifacts
uses: actions/upload-artifact@v4
if: always()
with:
name: profile-data
path: profile_data/*
retention-days: 1
9 changes: 9 additions & 0 deletions examples/mlx.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
name: MLX Problem Set
deadline: "2026-05-01 03:59"
description: "Test MLX"
problems:
- directory: mlx/example
name: example_mlx
deadline: "2026-05-01 03:59"
gpus:
- M4_Max
133 changes: 133 additions & 0 deletions examples/mlx/example/eval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import math
import os
import re
import sys
import time
from pathlib import Path

import mlx.core as mx

from reference import check_implementation, generate_input
from submission import custom_kernel

WARMUP_ITERS = 10
BENCH_ITERS = 100


class PopcornOutput:
def __init__(self, fd: int):
self.file = os.fdopen(fd, "w")

def __enter__(self):
return self

def __exit__(self, exc_type, exc_val, exc_tb):
self.file.close()

def log(self, key, value):
print(f"{key}: {value}", file=self.file, flush=True)


def get_test_cases(file_name):
content = Path(file_name).read_text()
tests = []
pattern = r"\s*([a-zA-Z_]+):\s*([a-zA-Z_]+|[+-]?[0-9]+)\s*"
for line in content.splitlines():
if not line.strip():
continue
case = {}
for part in line.split(";"):
m = re.fullmatch(pattern, part)
if not m:
print(f"invalid test case: '{line}'", file=sys.stderr)
sys.exit(113)
key, val = m[1], m[2]
try:
val = int(val)
except ValueError:
pass
case[key] = val
tests.append(case)
return tests


def run_testing(logger, tests):
passed = True
logger.log("test-count", len(tests))
for idx, test in enumerate(tests):
logger.log(f"test.{idx}.spec", test)
data = generate_input(**test)
output = custom_kernel(data)
mx.eval(output)
error = check_implementation(data, output)
if error:
logger.log(f"test.{idx}.status", "fail")
logger.log(f"test.{idx}.error", error)
passed = False
else:
logger.log(f"test.{idx}.status", "pass")
logger.log("check", "pass" if passed else "fail")
return 0 if passed else 112


def run_benchmarking(logger, tests):
# warmup
data = generate_input(**tests[0])
for _ in range(WARMUP_ITERS):
mx.eval(custom_kernel(data))

passed = True
logger.log("benchmark-count", len(tests))
for idx, test in enumerate(tests):
logger.log(f"benchmark.{idx}.spec", test)
data = generate_input(**test)
mx.eval(data)

output = custom_kernel(data)
mx.eval(output)
error = check_implementation(data, output)
if error:
logger.log(f"benchmark.{idx}.status", "fail")
logger.log(f"benchmark.{idx}.error", error)
passed = False
continue

durations = []
for i in range(BENCH_ITERS):
start = time.perf_counter_ns()
mx.eval(custom_kernel(data))
durations.append(time.perf_counter_ns() - start)
if i > 1:
avg = sum(durations) / len(durations)
std = math.sqrt(sum((d - avg) ** 2 for d in durations) / (len(durations) - 1))
if std / math.sqrt(len(durations)) / avg < 0.01:
break

avg = sum(durations) / len(durations)
logger.log(f"benchmark.{idx}.runs", len(durations))
logger.log(f"benchmark.{idx}.mean", avg)

logger.log("check", "pass" if passed else "fail")
return 0 if passed else 112


def main():
fd = os.getenv("POPCORN_FD")
if not fd:
return 111
if len(sys.argv) < 3:
return 2

mode = sys.argv[1]
tests = get_test_cases(sys.argv[2])

with PopcornOutput(int(fd)) as logger:
if mode == "test":
return run_testing(logger, tests)
if mode in ("benchmark", "leaderboard"):
return run_benchmarking(logger, tests)
return 2


if __name__ == "__main__":
raise SystemExit(main())
29 changes: 29 additions & 0 deletions examples/mlx/example/reference.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import mlx.core as mx


ATOL = 1e-3
RTOL = 1e-3


def generate_input(size, seed=42):
mx.random.seed(seed)
A = mx.random.normal(shape=(size, size)).astype(mx.float16)
B = mx.random.normal(shape=(size, size)).astype(mx.float16)
mx.eval(A, B)
return A, B


def reference_kernel(data):
A, B = data
return A + B


def check_implementation(data, output):
expected = reference_kernel(data)
mx.eval(expected)
if output.shape != expected.shape:
return f"shape mismatch: expected {expected.shape}, got {output.shape}"
if not mx.allclose(output, expected, atol=ATOL, rtol=RTOL).item():
max_diff = mx.max(mx.abs(output - expected)).item()
return f"mismatch found! max diff: {max_diff}"
return ""
6 changes: 6 additions & 0 deletions examples/mlx/example/submission.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import mlx.core as mx


def custom_kernel(data):
A, B = data
return A + B
32 changes: 32 additions & 0 deletions examples/mlx/example/task.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
files:
- {"name": "submission.py", "source": "@SUBMISSION@"}
- {"name": "reference.py", "source": "reference.py"}
- {"name": "eval.py", "source": "eval.py"}

lang: "py"

description: |
Implement a float16 vector addition kernel using MLX.

Input: tuple(mx.array, mx.array) with arrays of shape (N, N) and type mx.float16.
Output: mx.array of shape (N, N) and type mx.float16

config:
main: "eval.py"

tests:
- {"size": 128, "seed": 5236}
- {"size": 256, "seed": 5531}
- {"size": 512, "seed": 9173}

benchmarks:
- {"size": 1024, "seed": 31232}
- {"size": 4096, "seed": 2146}
- {"size": 16384, "seed": 54352}

test_timeout: 180
benchmark_timeout: 180
ranked_timeout: 180

gpus:
- M4_Max
73 changes: 73 additions & 0 deletions instructions.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
## Changes Summary

### New files
- src/libkernelbot/launchers/local.py — LocalLauncher that runs submissions directly on the host machine via run_config(). Blocks CUDA submissions.

### Modified files — Adding Metal/MLX support

1. src/libkernelbot/consts.py
- Added MetalGPU enum (M4_Max)
- Registered it in _GPU_LOOKUP under "Local" runner
- Added M4_Max: None to GPU_TO_SM

2. src/libkernelbot/launchers/__init__.py — Exports LocalLauncher

3. src/kernelbot/main.py — Registers LocalLauncher() in create_backend()

4. src/kernelbot/cogs/admin_cog.py — Added MetalGPU to Discord GPU dropdowns

### Modified files — Bug fixes for macOS compatibility

5. src/libkernelbot/run_eval.py — Three fixes in make_system_info():
- Added MPS/Metal detection via torch.backends.mps
- Catch FileNotFoundError for nvidia-smi/rocm-smi (don't exist on macOS)
- Catch FileNotFoundError for /proc/cpuinfo (doesn't exist on macOS)

6. src/kernelbot/api/main.py — Replace / with _ in auto-derived dev leaderboard names so nested directories don't break API routing

---

## Manual Test Steps

# 1. Start Postgres (if not already running)
brew services start postgresql@14

# 2. Create DB and run migrations
export DATABASE_URL="postgresql://$(whoami)@localhost:5432/kernelbot"
createdb kernelbot # skip if already exists
cd /path/to/kernelbot
uv run yoyo apply --database "$DATABASE_URL" src/migrations/

# 3. Create test user
psql "$DATABASE_URL" -c "INSERT INTO leaderboard.user_info (id, user_name, cli_id, cli_valid)
VALUES ('999999', 'testuser', 'test-cli-id-123', true)
ON CONFLICT (id) DO UPDATE SET cli_id = 'test-cli-id-123', cli_valid = true;"

# 4. Install mlx
uv pip install mlx

# 5. Start the API server
cd src/kernelbot
export DATABASE_URL="postgresql://$(whoami)@localhost:5432/kernelbot"
export ADMIN_TOKEN="your-admin-token"
export PROBLEM_DEV_DIR="/path/to/kernelbot/examples"
export GITHUB_TOKEN="dummy"
export GITHUB_REPO="dummy/dummy"
export DISABLE_SSL=1
uv run python main.py --api-only

# 6. (In another terminal) Create the dev leaderboard
curl -X POST "http://localhost:8000/admin/leaderboards" \
-H "Authorization: Bearer your-admin-token" \
-H "Content-Type: application/json" \
-d '{"directory": "mlx/example"}'

# 7. Submit a test
curl -X POST "http://localhost:8000/mlx_example-dev/M4_Max/test" \
-H "X-Popcorn-Cli-Id: test-cli-id-123" \
-F "file=@examples/mlx/example/submission.py"

# 8. Submit a benchmark
curl -X POST "http://localhost:8000/mlx_example-dev/M4_Max/benchmark" \
-H "X-Popcorn-Cli-Id: test-cli-id-123" \
-F "file=@examples/mlx/example/submission.py"
Loading
Loading