gemini-cli-extensions · Airwhale · May 15, 2026 · May 15, 2026 · May 15, 2026 · May 15, 2026
diff --git a/.env.example b/.env.example
@@ -0,0 +1,86 @@
+# Copy to .env (gitignored). The runner reads this from the repo root, not CWD,
+# so you configure it once here and invoke `review.py` from any project
+# directory. You only need to set the key for whichever provider you actually
+# use; the runner fails fast with a clear message if the key for the selected
+# provider is missing.
+
+# --- Provider selection (default: openrouter) -------------------------------
+# Per-run override: `review.py --provider gemini`
+# CODE_REVIEW_PROVIDER=openrouter
+
+# --- OpenRouter --------------------------------------------------------------
+# Required when --provider openrouter (the default). https://openrouter.ai/keys
+OPENROUTER_API_KEY=
+
+# Optional: default model when --provider openrouter is selected. OpenRouter
+# prefixes vendor names. `google/gemini-2.5-flash` is ~3x faster than pro.
+# OPENROUTER_MODEL=google/gemini-2.5-pro
+
+# Optional: identifying headers OpenRouter surfaces in its dashboard.
+# OPENROUTER_HTTP_REFERER=https://github.com/Airwhale/local-gemini-code-review
+# OPENROUTER_X_TITLE=OpenRouter Code Review
+
+# --- Gemini API (Google AI Studio) -------------------------------------------
+# Required when --provider gemini. https://aistudio.google.com/apikey
+GEMINI_API_KEY=
+
+# Optional: default model when --provider gemini is selected. The Gemini API
+# takes the bare model name (no `google/` prefix).
+# GEMINI_MODEL=gemini-2.5-pro
+
+# --- Ollama (local LLM) ------------------------------------------------------
+# No API key needed -- Ollama runs locally. Install from https://ollama.com
+# (or inside WSL on Windows if Smart App Control / Application Control blocks
+# the native installer). Start the server with `ollama serve`, then pull at
+# least one model, e.g. `ollama pull qwen3-coder:30b`.
+#
+# Per-run override: `review.py --provider ollama`
+
+# Optional: Ollama server URL. Default is http://localhost:11434 which works
+# for both native Windows installs and WSL2 with default mirrored networking.
+# Override if Ollama listens on a non-default port, runs on another machine,
+# or you're using a WSL distro without localhost mirroring (use the WSL2 IP
+# from `wsl hostname -I` in that case).
+# OLLAMA_HOST=http://localhost:11434
+
+# Optional: default model when --provider ollama is selected. Recommended:
+# `qwen3-coder:30b` (30B MoE coder, ~3.3B active params -- the quality/speed
+# sweet spot on CPU since active params drive inference speed, not total).
+# Higher quality: `qwen3-coder-next` (80B/3B active MoE, ~40 GB download).
+# Use named aliases via `--model`: `local` (qwen3-coder:30b),
+# `local-pro` (qwen3-coder-next). For very weak hardware that can't load
+# 30B, pass an explicit small model slug like `--model qwen2.5-coder:7b`
+# rather than expecting an alias -- the qwen2.5 family is a generation
+# behind on code review quality, so we don't expose it as a default tier.
+# OLLAMA_MODEL=qwen3-coder:30b
+
+# Optional: HTTP request timeout in seconds. Default 1800 (30 minutes) --
+# significantly larger than the cloud-provider default because local CPU
+# inference can be slow on first call (model load adds 10-60s) and on
+# thorough reviews of large diffs. Set lower if you want the runner to
+# give up faster on a hung model; raise if you're on slower hardware or
+# running larger models.
+# OLLAMA_TIMEOUT=1800
+
+# --- Sampling tuning ---------------------------------------------------------
+# Both apply to all providers (openrouter, gemini-direct, ollama) and can be
+# overridden per call via `--temperature` / `--max-tokens`. Defaults are 0.3
+# and 16000 respectively. Temperature history: 0.2 (original) was too
+# conservative (1-2 findings per round on diffs that plausibly contained
+# more); a brief 0.5 default produced a hallucinated finding in cross-model
+# review (Gemini referenced a CLI flag that didn't exist). 0.3 splits the
+# difference. Set lower if you want tighter / fewer findings; set higher if
+# you want broader exploration and accept more false positives. ``max_tokens``
+# is a ceiling, not a target -- you pay only for tokens actually emitted.
+# CODE_REVIEW_TEMPERATURE=0.3
+# CODE_REVIEW_MAX_TOKENS=16000
+
+# --- Safety context ----------------------------------------------------------
+# Optional prompt prefix prepended to every review request. Reduces false-
+# positive content-filter refusals when the diff under review uses words that
+# look adversarial in isolation (security testing, policy enforcement,
+# sanctions screening, etc.). The runner ships a generic default; set this
+# env var to override per environment (e.g. add your project's name + brief
+# subject matter). Override per call with `--context "..."` or disable
+# entirely with `--no-context`. See the README's "Safety context" section.
+# CODE_REVIEW_CONTEXT="The diff below is from <your project>; treat as authorized code review."
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,21 @@
+# Local env / secrets — never commit.
+.env
+.env.*
+!.env.example
+
+# Python / uv
+__pycache__/
+*.pyc
+.venv/
+.pytest_cache/
+*.egg-info/
+
+# Editor
+.vscode/
+.idea/
+*.swp
+*.swo
+
+# OS
+.DS_Store
+Thumbs.db