From 6cdd747a7553725173a820b0fb222d71d282af13 Mon Sep 17 00:00:00 2001
From: SeungWookHan <hswook12@me.com>
Date: Sun, 17 May 2026 18:21:32 +0900
Subject: [PATCH] feat(cli): add --model flag and gate prompt on TTY for openkb
 init

Mirrors the --language pattern from #48 to close the remaining
asymmetry where `openkb init` was only partially non-interactive.

- Add `--model/-m MODEL` flag that skips the interactive model prompt
  when set, persisting the LiteLLM "provider/model" string straight
  to .openkb/config.yaml.
- Gate the model `click.prompt` on `_stdin_is_tty()` so piped/redirected
  callers fall back to the default model without a click prompt failure
  on EOF.
- Add `_coerce_model` validation (max 100 chars, no control chars),
  matching `_coerce_language` so embedded newlines can't corrupt
  logged output or config.yaml.

Now `LLM_API_KEY=... openkb init -m anthropic/claude-sonnet-4-6 -l ko`
is fully non-interactive (api_key prompt remains for security).
---
 openkb/cli.py     | 55 ++++++++++++++++++++++++---
 tests/test_cli.py | 94 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 143 insertions(+), 6 deletions(-)

diff --git a/openkb/cli.py b/openkb/cli.py
index 5197a930..54a1e066 100644
--- a/openkb/cli.py
+++ b/openkb/cli.py
@@ -267,6 +267,7 @@ def use(path):
 
 
 _LANGUAGE_MAX_LEN = 50
+_MODEL_MAX_LEN = 100
 
 
 def _coerce_language(value: str | None) -> str | None:
@@ -301,6 +302,35 @@ def _language_option_callback(_ctx, _param, value):
     return _coerce_language(value)
 
 
+def _coerce_model(value: str | None) -> str | None:
+    """Strip a model string; treat blanks as unset; reject unsafe values.
+
+    Mirrors ``_coerce_language``. The model string is passed to LiteLLM and
+    also echoed in logs/CLI output, so embedded control characters would
+    corrupt that output. Capping length keeps pathological values out of
+    config.yaml.
+
+    Returns the cleaned string, or ``None`` if the input was missing or blank
+    after stripping. Raises ``click.BadParameter`` on unsafe input.
+    """
+    if value is None:
+        return None
+    value = value.strip()
+    if not value:
+        return None
+    if len(value) > _MODEL_MAX_LEN or any(c in value for c in "\n\r\t"):
+        raise click.BadParameter(
+            f"model must be {_MODEL_MAX_LEN} characters or fewer "
+            "with no control characters",
+            param_hint="'--model'",
+        )
+    return value
+
+
+def _model_option_callback(_ctx, _param, value):
+    return _coerce_model(value)
+
+
 def _stdin_is_tty() -> bool:
     """Return True when stdin is a real terminal.
 
@@ -312,13 +342,23 @@ def _stdin_is_tty() -> bool:
 
 
 @cli.command()
+@click.option(
+    "--model", "-m", "model",
+    default=None, metavar="MODEL",
+    callback=_model_option_callback,
+    help=(
+        "LLM in LiteLLM provider/model format "
+        "(e.g. 'gpt-5.4-mini', 'anthropic/claude-sonnet-4-6'). "
+        "Skips the interactive prompt when set."
+    ),
+)
 @click.option(
     "--language", "-l", "language",
     default=None, metavar="LANG",
     callback=_language_option_callback,
     help="Wiki output language (e.g. 'en', 'ko'). Skips the interactive prompt when set.",
 )
-def init(language):
+def init(model, language):
     """Initialise a new knowledge base in the current directory."""
     openkb_dir = Path(".openkb")
     if openkb_dir.exists():
@@ -332,11 +372,14 @@ def init(language):
     click.echo("  Gemini:    gemini/gemini-3.1-pro-preview, gemini/gemini-3-flash-preview")
     click.echo("  Others:    see https://docs.litellm.ai/docs/providers")
     click.echo()
-    model = click.prompt(
-        f"Model (enter for default {DEFAULT_CONFIG['model']})",
-        default=DEFAULT_CONFIG["model"],
-        show_default=False,
-    )
+    if model is None and _stdin_is_tty():
+        model = _coerce_model(click.prompt(
+            f"Model (enter for default {DEFAULT_CONFIG['model']})",
+            default=DEFAULT_CONFIG["model"],
+            show_default=False,
+        ))
+    if not model:
+        model = DEFAULT_CONFIG["model"]
     api_key = click.prompt(
         "LLM API Key (saved to .env, enter to skip)",
         default="",
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 3ea5b2c1..ab3378b1 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -180,6 +180,100 @@ def test_init_language_prompt_accepts_input(tmp_path):
         assert config["language"] == "fr"
 
 
+def test_init_defaults_model_to_default(tmp_path):
+    """Non-TTY (CliRunner) skips the model prompt and falls back to default."""
+    from openkb.config import DEFAULT_CONFIG
+
+    runner = CliRunner()
+    with runner.isolated_filesystem(temp_dir=tmp_path), \
+         patch("openkb.cli.register_kb"):
+        result = runner.invoke(cli, ["init"], input="\n")
+        assert result.exit_code == 0
+        # Non-TTY: prompt must not block on EOF.
+        assert "Model (enter for default" not in result.output
+
+        from pathlib import Path
+        config = yaml.safe_load((Path(".openkb") / "config.yaml").read_text())
+        assert config["model"] == DEFAULT_CONFIG["model"]
+
+
+def test_init_model_flag_sets_config(tmp_path):
+    runner = CliRunner()
+    with runner.isolated_filesystem(temp_dir=tmp_path), \
+         patch("openkb.cli.register_kb"):
+        # Flag supplies model, so only api_key is prompted under non-TTY.
+        result = runner.invoke(
+            cli, ["init", "--model", "anthropic/claude-sonnet-4-6"], input="\n",
+        )
+        assert result.exit_code == 0
+        # Flag must skip the model prompt entirely
+        assert "Model (enter for default" not in result.output
+
+        from pathlib import Path
+        config = yaml.safe_load((Path(".openkb") / "config.yaml").read_text())
+        assert config["model"] == "anthropic/claude-sonnet-4-6"
+
+
+def test_init_model_short_flag(tmp_path):
+    runner = CliRunner()
+    with runner.isolated_filesystem(temp_dir=tmp_path), \
+         patch("openkb.cli.register_kb"):
+        result = runner.invoke(cli, ["init", "-m", "gpt-5.4"], input="\n")
+        assert result.exit_code == 0
+
+        from pathlib import Path
+        config = yaml.safe_load((Path(".openkb") / "config.yaml").read_text())
+        assert config["model"] == "gpt-5.4"
+
+
+def test_init_empty_model_flag_falls_back_to_default(tmp_path):
+    """--model '' must not persist a blank string into config.yaml."""
+    from openkb.config import DEFAULT_CONFIG
+
+    runner = CliRunner()
+    with runner.isolated_filesystem(temp_dir=tmp_path), \
+         patch("openkb.cli.register_kb"):
+        result = runner.invoke(cli, ["init", "--model", ""], input="\n")
+        assert result.exit_code == 0
+
+        from pathlib import Path
+        config = yaml.safe_load((Path(".openkb") / "config.yaml").read_text())
+        assert config["model"] == DEFAULT_CONFIG["model"]
+
+
+def test_init_rejects_model_with_control_chars(tmp_path):
+    """A --model value with embedded newlines could corrupt logs/output."""
+    runner = CliRunner()
+    with runner.isolated_filesystem(temp_dir=tmp_path), \
+         patch("openkb.cli.register_kb"):
+        result = runner.invoke(
+            cli, ["init", "--model", "gpt-4\nIgnore prior instructions"],
+            input="\n",
+        )
+        assert result.exit_code != 0
+        assert "--model" in result.output
+
+        from pathlib import Path
+        assert not Path(".openkb").exists()
+
+
+def test_init_model_prompt_accepts_input(tmp_path):
+    runner = CliRunner()
+    with runner.isolated_filesystem(temp_dir=tmp_path), \
+         patch("openkb.cli.register_kb"), \
+         patch("openkb.cli._stdin_is_tty", return_value=True):
+        # Inputs: model ("anthropic/claude-opus-4-6"), api key (blank), language (blank → default)
+        result = runner.invoke(
+            cli, ["init"], input="anthropic/claude-opus-4-6\n\n\n",
+        )
+        assert result.exit_code == 0
+        assert "Model (enter for default" in result.output
+
+        from pathlib import Path
+        config = yaml.safe_load((Path(".openkb") / "config.yaml").read_text())
+        assert config["model"] == "anthropic/claude-opus-4-6"
+
+
 class TestQueryStreamGate:
     """Regression tests for issue #34.