diff --git a/veadk/cli/cli_frontend.py b/veadk/cli/cli_frontend.py
index e1c09864..cb1bd1de 100644
--- a/veadk/cli/cli_frontend.py
+++ b/veadk/cli/cli_frontend.py
@@ -927,6 +927,13 @@ async def _delete_temp_agent(app_name: str):
         oauth2_config = _build_generic_oauth2(provider_id or "custom", redirect_uri)
         provider_id = provider_id or "custom"
 
+    # The SPA fetches /web/auth-config and /oauth2/userinfo on every startup, so
+    # both must always return JSON. With SSO off we answer with an empty provider
+    # list and a 401 (unauthenticated), and the app renders its normal no-login
+    # UI; otherwise the SPA-fallback serves the HTML shell for these paths and the
+    # app's `await res.json()` throws, leaving a white screen.
+    providers: list[dict] = []
+
     if oauth2_config is not None:
         from urllib.parse import urlsplit
 
@@ -950,10 +957,6 @@ async def _delete_temp_agent(app_name: str):
         )
         providers = [{"id": provider_id, "label": label, "loginUrl": "/oauth2/login"}]
 
-        @app.get("/web/auth-config")
-        async def _web_auth_config():
-            return {"providers": providers}
-
         # Protect the API but exempt the SPA shell + this config endpoint so the
         # app can load and render its own login page when not signed in.
         setup_oauth2(
@@ -965,6 +968,19 @@ async def _web_auth_config():
         logger.info(
             f"OAuth2 SSO enabled (provider={provider_id}, redirect_uri={redirect_uri})"
         )
+    else:
+        from fastapi.responses import JSONResponse
+
+        @app.get("/oauth2/userinfo")
+        async def _userinfo_no_sso():
+            # No SSO configured: report unauthenticated (401) so the SPA's auth
+            # check resolves cleanly instead of parsing the HTML shell as JSON.
+            return JSONResponse({"status": "unauthenticated"}, status_code=401)
+
+    @app.get("/web/auth-config")
+    async def _web_auth_config():
+        # Empty provider list when SSO is off -> the SPA shows its normal UI.
+        return {"providers": providers}
 
     if dev:
         logger.info(
diff --git a/veadk/runtime/codex/runtime.py b/veadk/runtime/codex/runtime.py
index 4bd835c4..bec2b9b0 100644
--- a/veadk/runtime/codex/runtime.py
+++ b/veadk/runtime/codex/runtime.py
@@ -39,10 +39,14 @@
 from typing import TYPE_CHECKING, AsyncGenerator
 
 from openai_codex import AsyncCodex  # type: ignore[import-not-found]
+from openai_codex.generated.v2_all import (  # type: ignore[import-not-found]
+    ItemCompletedNotification,
+    TurnCompletedNotification,
+)
 
 from veadk.runtime.base_runtime import BaseRuntime, build_system_append
 from veadk.runtime.codex.proxy import get_shim_url
-from veadk.runtime.codex.translate import build_prompt, result_to_events
+from veadk.runtime.codex.translate import build_prompt, item_to_events
 from veadk.utils.logger import get_logger
 
 if TYPE_CHECKING:
@@ -98,9 +102,38 @@ async def run_async(
         os.environ["CODEX_HOME"] = codex_home
         os.environ[_KEY_ENV] = _LOCAL_SHIM_TOKEN
         try:
+            # Stream the turn: emit ADK events as each Codex item completes
+            # (reasoning, tool calls, messages) instead of collecting the whole
+            # turn first. This keeps the BaseRuntime async-generator contract
+            # truly incremental, so thinking/tool steps show up live (a blocking
+            # thread.run() would leave the client silent for the whole turn).
             async with AsyncCodex() as codex:
                 thread = await codex.thread_start(model=model)
-                result = await thread.run(prompt)
+                turn = await thread.turn(prompt)
+                stream = turn.stream()
+                try:
+                    async for note in stream:
+                        payload = note.payload
+                        if (
+                            isinstance(payload, ItemCompletedNotification)
+                            and payload.turn_id == turn.id
+                        ):
+                            for event in item_to_events(
+                                payload.item, agent.name, ctx.invocation_id
+                            ):
+                                yield event
+                        elif (
+                            isinstance(payload, TurnCompletedNotification)
+                            and payload.turn.id == turn.id
+                            and payload.turn.error
+                        ):
+                            raise RuntimeError(payload.turn.error.message)
+                finally:
+                    # stream() is an async generator at runtime; close it to
+                    # unregister the turn's notification listener.
+                    aclose = getattr(stream, "aclose", None)
+                    if aclose is not None:
+                        await aclose()
         finally:
             for key, value in previous.items():
                 if value is None:
@@ -108,9 +141,6 @@ async def run_async(
                 else:
                     os.environ[key] = value
 
-        for event in result_to_events(result, agent.name, ctx.invocation_id):
-            yield event
-
     def _resolve_model(self, agent: "Agent") -> str:
         name = agent.model_name
         if isinstance(name, list):
diff --git a/veadk/runtime/codex/translate.py b/veadk/runtime/codex/translate.py
index 55ae0c7a..8c12ec4c 100644
--- a/veadk/runtime/codex/translate.py
+++ b/veadk/runtime/codex/translate.py
@@ -173,87 +173,91 @@ def _event(author: str, invocation_id: str, role: str, part: types.Part) -> Even
     )
 
 
-def result_to_events(result: Any, author: str, invocation_id: str) -> list[Event]:
-    """Convert a Codex run result into ADK events, faithfully and in order.
+def item_to_events(item: Any, author: str, invocation_id: str) -> list[Event]:
+    """Convert a single Codex thread item into ADK events.
 
-    A Codex turn is multi-step. Rather than collapse it to ``final_response``,
-    walk ``result.items`` and forward each step as its own ADK event, mapping
-    Codex thread items onto the genai part the matching ADK event expects:
+    Maps one item onto the genai part the matching ADK event expects:
 
     - ``reasoning`` -> a thought text part,
     - tool calls (``commandExecution`` / ``mcpToolCall`` / ``dynamicToolCall``
       / ``fileChange`` / ``webSearch``) -> a ``function_call`` part plus a
       matching ``function_response`` part carrying the tool's output,
     - ``agentMessage`` / ``plan`` / any other text-bearing item -> a text part,
-    - ``userMessage`` -> skipped (ADK already owns the user turn).
+    - ``userMessage`` (and anything else) -> nothing.
 
-    If nothing maps, fall back to ``final_response`` so a turn is never empty.
+    Returning per-item keeps the conversion reusable both for the streaming
+    path (emit as each item completes) and the batch path below.
 
     Args:
-        result (Any): The object returned by ``thread.run(...)``.
+        item (Any): A Codex ``ThreadItem`` (model or dict).
         author (str): Event author (the agent name).
         invocation_id (str): The ADK invocation id to stamp on each event.
 
     Returns:
-        list[google.adk.events.event.Event]: The turn's events in order.
+        list[google.adk.events.event.Event]: 0-2 events for this item.
     """
-    events: list[Event] = []
-    for item in getattr(result, "items", None) or []:
-        data = _item_dict(item)
-        itype = str(data.get("type", ""))
-
-        if itype == "userMessage":
-            continue
+    data = _item_dict(item)
+    itype = str(data.get("type", ""))
 
-        if itype == "reasoning":
-            text = _join(data.get("summary")) or _join(data.get("content"))
-            if text:
-                events.append(
-                    _event(
-                        author,
-                        invocation_id,
-                        "model",
-                        types.Part(text=text, thought=True),
+    if itype == "reasoning":
+        text = _join(data.get("summary")) or _join(data.get("content"))
+        if not text:
+            return []
+        return [
+            _event(author, invocation_id, "model", types.Part(text=text, thought=True))
+        ]
+
+    call = _tool_call(data)
+    if call is not None:
+        name, args, response = call
+        call_id = data.get("id") or f"call_{itype}"
+        return [
+            _event(
+                author,
+                invocation_id,
+                "model",
+                types.Part(
+                    function_call=types.FunctionCall(id=call_id, name=name, args=args)
+                ),
+            ),
+            _event(
+                author,
+                invocation_id,
+                "user",
+                types.Part(
+                    function_response=types.FunctionResponse(
+                        id=call_id, name=name, response=response
                     )
-                )
-            continue
+                ),
+            ),
+        ]
 
-        call = _tool_call(data)
-        if call is not None:
-            name, args, response = call
-            call_id = data.get("id") or f"call_{len(events)}"
-            events.append(
-                _event(
-                    author,
-                    invocation_id,
-                    "model",
-                    types.Part(
-                        function_call=types.FunctionCall(
-                            id=call_id, name=name, args=args
-                        )
-                    ),
-                )
-            )
-            events.append(
-                _event(
-                    author,
-                    invocation_id,
-                    "user",
-                    types.Part(
-                        function_response=types.FunctionResponse(
-                            id=call_id, name=name, response=response
-                        )
-                    ),
-                )
-            )
-            continue
+    if itype != "userMessage" and data.get("text"):
+        return [
+            _event(author, invocation_id, "model", types.Part(text=str(data["text"])))
+        ]
 
-        if data.get("text"):  # agentMessage, plan, and any text-bearing item
-            events.append(
-                _event(
-                    author, invocation_id, "model", types.Part(text=str(data["text"]))
-                )
-            )
+    return []
+
+
+def result_to_events(result: Any, author: str, invocation_id: str) -> list[Event]:
+    """Convert a whole Codex run result into ADK events, in order.
+
+    Walks ``result.items`` through :func:`item_to_events`. Falls back to
+    ``final_response`` so a turn is never empty. (The streaming runtime path
+    calls :func:`item_to_events` per completed item instead of this.)
+
+    Args:
+        result (Any): The object returned by ``thread.run(...)``.
+        author (str): Event author (the agent name).
+        invocation_id (str): The ADK invocation id to stamp on each event.
+
+    Returns:
+        list[google.adk.events.event.Event]: The turn's events in order.
+    """
+    events: list[Event] = []
+    for item in getattr(result, "items", None) or []:
+        events.extend(item_to_events(item, author, invocation_id))
 
     if events:
         return events