diff --git a/.claude/commands/implement-feature.md b/.claude/commands/implement-feature.md
new file mode 100644
index 000000000..33302a4fd
--- /dev/null
+++ b/.claude/commands/implement-feature.md
@@ -0,0 +1,7 @@
+You will be implementing a new feature in this codebase
+
+$ARGUMENTS
+
+IMPORTANT: Only do this for front-end features.
+Once this feature is built, make sure to write the changes you made to file called frontend-changes.md
+Do not ask for permissions to modify this file, assume you can always do it.
\ No newline at end of file
diff --git a/.claude/settings.local.json b/.claude/settings.local.json
new file mode 100644
index 000000000..591914c57
--- /dev/null
+++ b/.claude/settings.local.json
@@ -0,0 +1,9 @@
+{
+ "permissions": {
+ "allow": [
+ "mcp__playwright__browser_navigate",
+ "mcp__playwright__browser_snapshot",
+ "mcp__playwright__browser_take_screenshot"
+ ]
+ }
+}
diff --git a/.github/workflows/claude-code-review.yml b/.github/workflows/claude-code-review.yml
new file mode 100644
index 000000000..b5e8cfd4d
--- /dev/null
+++ b/.github/workflows/claude-code-review.yml
@@ -0,0 +1,44 @@
+name: Claude Code Review
+
+on:
+ pull_request:
+ types: [opened, synchronize, ready_for_review, reopened]
+ # Optional: Only run on specific file changes
+ # paths:
+ # - "src/**/*.ts"
+ # - "src/**/*.tsx"
+ # - "src/**/*.js"
+ # - "src/**/*.jsx"
+
+jobs:
+ claude-review:
+ # Optional: Filter by PR author
+ # if: |
+ # github.event.pull_request.user.login == 'external-contributor' ||
+ # github.event.pull_request.user.login == 'new-developer' ||
+ # github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR'
+
+ runs-on: ubuntu-latest
+ permissions:
+ contents: read
+ pull-requests: read
+ issues: read
+ id-token: write
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 1
+
+ - name: Run Claude Code Review
+ id: claude-review
+ uses: anthropics/claude-code-action@v1
+ with:
+ claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
+ plugin_marketplaces: 'https://github.com/anthropics/claude-code.git'
+ plugins: 'code-review@claude-code-plugins'
+ prompt: '/code-review:code-review ${{ github.repository }}/pull/${{ github.event.pull_request.number }}'
+ # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
+ # or https://code.claude.com/docs/en/cli-reference for available options
+
diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml
new file mode 100644
index 000000000..6b15fac7a
--- /dev/null
+++ b/.github/workflows/claude.yml
@@ -0,0 +1,50 @@
+name: Claude Code
+
+on:
+ issue_comment:
+ types: [created]
+ pull_request_review_comment:
+ types: [created]
+ issues:
+ types: [opened, assigned]
+ pull_request_review:
+ types: [submitted]
+
+jobs:
+ claude:
+ if: |
+ (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) ||
+ (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) ||
+ (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) ||
+ (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude')))
+ runs-on: ubuntu-latest
+ permissions:
+ contents: read
+ pull-requests: read
+ issues: read
+ id-token: write
+ actions: read # Required for Claude to read CI results on PRs
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 1
+
+ - name: Run Claude Code
+ id: claude
+ uses: anthropics/claude-code-action@v1
+ with:
+ claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
+
+ # This is an optional setting that allows Claude to read CI results on PRs
+ additional_permissions: |
+ actions: read
+
+ # Optional: Give a custom prompt to Claude. If this is not specified, Claude will perform the instructions specified in the comment that tagged it.
+ # prompt: 'Update the pull request description to include a summary of changes.'
+
+ # Optional: Add claude_args to customize behavior and configuration
+ # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
+ # or https://code.claude.com/docs/en/cli-reference for available options
+ # claude_args: '--allowed-tools Bash(gh pr *)'
+
diff --git a/.gitignore b/.gitignore
index 41b4384b8..0fad1f34b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -28,4 +28,7 @@ uploads/
# OS
.DS_Store
-Thumbs.db
\ No newline at end of file
+Thumbs.db
+
+# Git worktrees
+.trees/
\ No newline at end of file
diff --git a/.playwright-mcp/console-2026-04-12T14-03-55-254Z.log b/.playwright-mcp/console-2026-04-12T14-03-55-254Z.log
new file mode 100644
index 000000000..ab7cd717d
--- /dev/null
+++ b/.playwright-mcp/console-2026-04-12T14-03-55-254Z.log
@@ -0,0 +1,3 @@
+[ 4168ms] [LOG] Loading course stats... @ http://127.0.0.1:8000/script.js?v=9:178
+[ 4196ms] [LOG] Course data received: {total_courses: 4, course_titles: Array(4)} @ http://127.0.0.1:8000/script.js?v=9:183
+[ 4198ms] [ERROR] Failed to load resource: the server responded with a status of 404 (Not Found) @ http://127.0.0.1:8000/favicon.ico:0
diff --git a/.playwright-mcp/console-2026-04-12T14-06-09-521Z.log b/.playwright-mcp/console-2026-04-12T14-06-09-521Z.log
new file mode 100644
index 000000000..b4012bfbb
--- /dev/null
+++ b/.playwright-mcp/console-2026-04-12T14-06-09-521Z.log
@@ -0,0 +1,2 @@
+[ 145ms] [LOG] Loading course stats... @ http://127.0.0.1:8000/script.js?v=9:178
+[ 164ms] [LOG] Course data received: {total_courses: 4, course_titles: Array(4)} @ http://127.0.0.1:8000/script.js?v=9:183
diff --git a/.playwright-mcp/page-2026-04-12T14-03-59-486Z.yml b/.playwright-mcp/page-2026-04-12T14-03-59-486Z.yml
new file mode 100644
index 000000000..cc2654c8c
--- /dev/null
+++ b/.playwright-mcp/page-2026-04-12T14-03-59-486Z.yml
@@ -0,0 +1,14 @@
+- generic [ref=e3]:
+ - complementary [ref=e4]:
+ - button "+ NEW CHAT" [ref=e6] [cursor=pointer]
+ - group [ref=e8]:
+ - generic "▶ Courses" [ref=e9] [cursor=pointer]
+ - group [ref=e11]:
+ - generic "▶ Try asking:" [ref=e12] [cursor=pointer]
+ - main [ref=e13]:
+ - generic [ref=e14]:
+ - paragraph [ref=e18]: Welcome to the Course Materials Assistant! I can help you with questions about courses, lessons and specific content. What would you like to know?
+ - generic [ref=e19]:
+ - textbox "Ask about courses, lessons, or specific content..." [ref=e20]
+ - button [ref=e21] [cursor=pointer]:
+ - img [ref=e22]
\ No newline at end of file
diff --git a/.playwright-mcp/page-2026-04-12T14-04-10-654Z.png b/.playwright-mcp/page-2026-04-12T14-04-10-654Z.png
new file mode 100644
index 000000000..a79e09b83
Binary files /dev/null and b/.playwright-mcp/page-2026-04-12T14-04-10-654Z.png differ
diff --git a/.playwright-mcp/page-2026-04-12T14-06-09-734Z.yml b/.playwright-mcp/page-2026-04-12T14-06-09-734Z.yml
new file mode 100644
index 000000000..cc2654c8c
--- /dev/null
+++ b/.playwright-mcp/page-2026-04-12T14-06-09-734Z.yml
@@ -0,0 +1,14 @@
+- generic [ref=e3]:
+ - complementary [ref=e4]:
+ - button "+ NEW CHAT" [ref=e6] [cursor=pointer]
+ - group [ref=e8]:
+ - generic "▶ Courses" [ref=e9] [cursor=pointer]
+ - group [ref=e11]:
+ - generic "▶ Try asking:" [ref=e12] [cursor=pointer]
+ - main [ref=e13]:
+ - generic [ref=e14]:
+ - paragraph [ref=e18]: Welcome to the Course Materials Assistant! I can help you with questions about courses, lessons and specific content. What would you like to know?
+ - generic [ref=e19]:
+ - textbox "Ask about courses, lessons, or specific content..." [ref=e20]
+ - button [ref=e21] [cursor=pointer]:
+ - img [ref=e22]
\ No newline at end of file
diff --git a/.playwright-mcp/page-2026-04-12T14-06-22-569Z.png b/.playwright-mcp/page-2026-04-12T14-06-22-569Z.png
new file mode 100644
index 000000000..c2408f75f
Binary files /dev/null and b/.playwright-mcp/page-2026-04-12T14-06-22-569Z.png differ
diff --git a/CLAUDE.local.md b/CLAUDE.local.md
new file mode 100644
index 000000000..19870b20d
--- /dev/null
+++ b/CLAUDE.local.md
@@ -0,0 +1,4 @@
+# Local Project Instructions
+
+## Server
+Never start the server (`./run.sh` or `uvicorn`). The user always starts it manually.
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 000000000..8a8f8bc2f
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,65 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Running the Application
+
+**Quick start (from repo root):**
+```bash
+./run.sh
+```
+
+**Manual start (must run from `backend/` directory):**
+```bash
+cd backend && uv run uvicorn app:app --reload --port 8000
+```
+
+The server runs at `http://localhost:8000`. API docs at `http://localhost:8000/docs`.
+
+**Install dependencies:**
+```bash
+uv sync
+```
+
+> Always use `uv` to run the server and manage packages. Never use `pip` directly.
+
+**Required environment variable** — create a `.env` file in the repo root:
+```
+ANTHROPIC_API_KEY=your-key-here
+```
+
+## Architecture Overview
+
+This is a full-stack RAG (Retrieval-Augmented Generation) chatbot for querying course materials.
+
+**Backend** (`backend/`) is a FastAPI app that must be started from within the `backend/` directory (relative paths like `../docs` and `../frontend` depend on this).
+
+**Data flow for a query:**
+1. `app.py` receives POST `/api/query` → calls `RAGSystem.query()`
+2. `RAGSystem` (`rag_system.py`) builds a prompt and passes it to `AIGenerator` with the `search_course_content` tool available
+3. `AIGenerator` (`ai_generator.py`) calls the Claude API; if Claude decides to search, it invokes the tool
+4. `ToolManager` routes tool calls to `CourseSearchTool` (`search_tools.py`), which queries `VectorStore`
+5. `VectorStore` (`vector_store.py`) uses ChromaDB with two collections:
+ - `course_catalog` — course-level metadata (title, instructor, links, lesson list as JSON)
+ - `course_content` — chunked lesson text for semantic search
+6. The final Claude response + sources are returned to the frontend
+
+**Document ingestion** (happens at startup from `docs/` folder):
+- `DocumentProcessor` (`document_processor.py`) parses `.txt`/`.pdf`/`.docx` files
+- Expected file format: first 3 lines are `Course Title:`, `Course Link:`, `Course Instructor:`, followed by `Lesson N:
` markers and content
+- Text is chunked into ~800-char sentence-based chunks with 100-char overlap
+- `RAGSystem.add_course_folder()` skips courses already present in ChromaDB (deduplication by title)
+
+**Session management:** `SessionManager` keeps in-memory conversation history (default: last 2 exchanges = 4 messages). Sessions are identified by a string ID returned to and echoed back by the frontend.
+
+**Frontend** (`frontend/`) is plain HTML/JS/CSS served as static files by FastAPI from the `../frontend` path.
+
+**Configuration** (`backend/config.py`): all tuneable parameters (model, chunk size, ChromaDB path, max results, history length) are in the `Config` dataclass. ChromaDB is stored at `backend/chroma_db/` (relative to where uvicorn runs).
+
+**Tool extension:** To add a new tool, implement the `Tool` ABC in `search_tools.py` and call `tool_manager.register_tool(your_tool)` in `RAGSystem.__init__()`.
+
+## Rules
+- Never read or write files outside this project folder without explicit permission
+- Always ask before saving anything to memory or external locations
+- Never access C:\Users\haddad\.claude\ without explicit permission
+- Always use `uv` to add dependencies (e.g., `uv add `); never use `pip` directly
\ No newline at end of file
diff --git a/backend-tool-refactor.md b/backend-tool-refactor.md
new file mode 100644
index 000000000..de23ae5c7
--- /dev/null
+++ b/backend-tool-refactor.md
@@ -0,0 +1,28 @@
+Refactor @backend/ai_generator.py to support sequential tool calling where Claude can make up to 2 tool calls in separate API rounds.
+
+Current behavior:
+- Claude makes 1 tool call → tools are removed from API params → final response
+- If Claude wants another tool call after seeing results, it can't (gets empty response)
+
+Desired behavior:
+- Each tool call should be a separate API request where Claude can reason about previous results
+- Support for complex queries requiring multiple searches for comparisons, multi-part questions, or when information from different courses/lessons is needed
+
+Example flow:
+1. User: "Search for a course that discusses the same topic as lesson 4 of course X"
+2. Claude: get course outline for course X → gets title of lesson 4
+3. Claude: uses the title to search for a course that discusses the same topic → returns course information
+4. Claude: provides complete answer
+
+Requirements:
+- Maximum 2 sequential rounds per user query
+- Terminate when: (a) 2 rounds completed, (b) Claude's response has no tool_use blocks, or (c) tool call fails
+- Preserve conversation context between rounds
+- Handle tool execution errors gracefully
+
+Notes:
+- Update the system prompt in @backend/ai_generator.py
+- Update the test @backend/tests/test_ai_generator.py
+- Write tests that verify the external behavior (API calls made, tools executed, results returned) rather than internal state details.
+
+Use two parallel subagents to brainstorm possible plans. Do not implement any code.
diff --git a/backend/ai_generator.py b/backend/ai_generator.py
index 0363ca90c..13395ae68 100644
--- a/backend/ai_generator.py
+++ b/backend/ai_generator.py
@@ -3,16 +3,29 @@
class AIGenerator:
"""Handles interactions with Anthropic's Claude API for generating responses"""
-
+
+ MAX_TOOL_ROUNDS = 2
+
# Static system prompt to avoid rebuilding on each call
SYSTEM_PROMPT = """ You are an AI assistant specialized in course materials and educational content with access to a comprehensive search tool for course information.
Search Tool Usage:
- Use the search tool **only** for questions about specific course content or detailed educational materials
-- **One search per query maximum**
- Synthesize search results into accurate, fact-based responses
- If search yields no results, state this clearly without offering alternatives
+Outline Tool Usage:
+- Use get_course_outline **only** for questions about course structure, syllabus, lesson list, or what topics a course covers
+- Return the course title, course link, and each lesson number with its title
+- Do not use the content search tool for outline queries
+
+Sequential Tool Calls:
+- You may make up to 2 tool calls in sequence when a single search is insufficient
+- Use sequential calls for: multi-part questions, comparisons across courses/lessons,
+ or when you need an outline first and then content from a specific lesson
+ (e.g. get_course_outline → search_course_content using the lesson title found)
+- Do NOT make a second tool call if the first result fully answers the question
+
Response Protocol:
- **General knowledge questions**: Answer using existing knowledge without searching
- **Course-specific questions**: Search first, then answer
@@ -46,90 +59,107 @@ def generate_response(self, query: str,
tool_manager=None) -> str:
"""
Generate AI response with optional tool usage and conversation context.
-
+
Args:
query: The user's question or request
conversation_history: Previous messages for context
tools: Available tools the AI can use
tool_manager: Manager to execute tools
-
+
Returns:
Generated response as string
"""
-
- # Build system content efficiently - avoid string ops when possible
+
system_content = (
f"{self.SYSTEM_PROMPT}\n\nPrevious conversation:\n{conversation_history}"
- if conversation_history
+ if conversation_history
else self.SYSTEM_PROMPT
)
-
- # Prepare API call parameters efficiently
+
+ messages = [{"role": "user", "content": query}]
+
api_params = {
**self.base_params,
- "messages": [{"role": "user", "content": query}],
+ "messages": messages,
"system": system_content
}
-
- # Add tools if available
+
if tools:
api_params["tools"] = tools
api_params["tool_choice"] = {"type": "auto"}
-
- # Get response from Claude
+
response = self.client.messages.create(**api_params)
-
- # Handle tool execution if needed
- if response.stop_reason == "tool_use" and tool_manager:
- return self._handle_tool_execution(response, api_params, tool_manager)
-
- # Return direct response
+
+ # Tool loop: up to MAX_TOOL_ROUNDS sequential rounds
+ rounds_completed = 0
+ while (
+ response.stop_reason == "tool_use"
+ and tool_manager
+ and rounds_completed < self.MAX_TOOL_ROUNDS
+ ):
+ response, success = self._handle_tool_execution(
+ response, messages, tool_manager, system_content, tools
+ )
+ rounds_completed += 1
+ if not success:
+ break
+
+ # Round cap hit or no tool_manager: force a plain-text synthesis call
+ if response.stop_reason == "tool_use":
+ messages.append({"role": "assistant", "content": response.content})
+ response = self.client.messages.create(
+ **self.base_params,
+ messages=messages,
+ system=system_content
+ )
+
return response.content[0].text
-
- def _handle_tool_execution(self, initial_response, base_params: Dict[str, Any], tool_manager):
+
+ def _handle_tool_execution(self, response, messages: List, tool_manager,
+ system_content: str, tools: List) -> tuple:
"""
- Handle execution of tool calls and get follow-up response.
-
+ Execute one round of tool calls and make the intermediate follow-up API call.
+
+ Mutates messages in place by appending the assistant tool-use message and
+ the tool results user message.
+
Args:
- initial_response: The response containing tool use requests
- base_params: Base API parameters
+ response: The current API response with stop_reason == "tool_use"
+ messages: Accumulated message list (mutated in place)
tool_manager: Manager to execute tools
-
+ system_content: System prompt string for the follow-up call
+ tools: Tool definitions for the follow-up call
+
Returns:
- Final response text after tool execution
+ (next_response, success): next_response is the follow-up API response;
+ success is False if any tool raised an exception (loop should stop).
"""
- # Start with existing messages
- messages = base_params["messages"].copy()
-
- # Add AI's tool use response
- messages.append({"role": "assistant", "content": initial_response.content})
-
- # Execute all tool calls and collect results
+ messages.append({"role": "assistant", "content": response.content})
+
tool_results = []
- for content_block in initial_response.content:
- if content_block.type == "tool_use":
- tool_result = tool_manager.execute_tool(
- content_block.name,
- **content_block.input
- )
-
+ success = True
+ for block in response.content:
+ if block.type == "tool_use":
+ try:
+ result = tool_manager.execute_tool(block.name, **block.input)
+ except Exception as e:
+ result = f"Tool execution error: {e}"
+ success = False
tool_results.append({
"type": "tool_result",
- "tool_use_id": content_block.id,
- "content": tool_result
+ "tool_use_id": block.id,
+ "content": result
})
-
- # Add tool results as single message
+
if tool_results:
messages.append({"role": "user", "content": tool_results})
-
- # Prepare final API call without tools
- final_params = {
+
+ # Intermediate follow-up WITH tools so Claude can call again if needed
+ next_response = self.client.messages.create(
**self.base_params,
- "messages": messages,
- "system": base_params["system"]
- }
-
- # Get final response
- final_response = self.client.messages.create(**final_params)
- return final_response.content[0].text
\ No newline at end of file
+ messages=messages,
+ system=system_content,
+ tools=tools,
+ tool_choice={"type": "auto"}
+ )
+ return next_response, success
\ No newline at end of file
diff --git a/backend/app.py b/backend/app.py
index 5a69d741d..6bbb76292 100644
--- a/backend/app.py
+++ b/backend/app.py
@@ -43,7 +43,7 @@ class QueryRequest(BaseModel):
class QueryResponse(BaseModel):
"""Response model for course queries"""
answer: str
- sources: List[str]
+ sources: List[dict]
session_id: str
class CourseStats(BaseModel):
@@ -85,6 +85,12 @@ async def get_course_stats():
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
+@app.delete("/api/session/{session_id}")
+async def delete_session(session_id: str):
+ """Clear conversation history for a session"""
+ rag_system.session_manager.clear_session(session_id)
+ return {"status": "cleared"}
+
@app.on_event("startup")
async def startup_event():
"""Load initial documents on startup"""
diff --git a/backend/rag_system.py b/backend/rag_system.py
index 50d848c8e..443649f0e 100644
--- a/backend/rag_system.py
+++ b/backend/rag_system.py
@@ -4,7 +4,7 @@
from vector_store import VectorStore
from ai_generator import AIGenerator
from session_manager import SessionManager
-from search_tools import ToolManager, CourseSearchTool
+from search_tools import ToolManager, CourseSearchTool, CourseOutlineTool
from models import Course, Lesson, CourseChunk
class RAGSystem:
@@ -23,6 +23,8 @@ def __init__(self, config):
self.tool_manager = ToolManager()
self.search_tool = CourseSearchTool(self.vector_store)
self.tool_manager.register_tool(self.search_tool)
+ self.outline_tool = CourseOutlineTool(self.vector_store)
+ self.tool_manager.register_tool(self.outline_tool)
def add_course_document(self, file_path: str) -> Tuple[Course, int]:
"""
diff --git a/backend/search_tools.py b/backend/search_tools.py
index adfe82352..fd2a33009 100644
--- a/backend/search_tools.py
+++ b/backend/search_tools.py
@@ -89,30 +89,76 @@ def _format_results(self, results: SearchResults) -> str:
"""Format search results with course and lesson context"""
formatted = []
sources = [] # Track sources for the UI
-
+
for doc, meta in zip(results.documents, results.metadata):
course_title = meta.get('course_title', 'unknown')
lesson_num = meta.get('lesson_number')
-
+
# Build context header
header = f"[{course_title}"
if lesson_num is not None:
header += f" - Lesson {lesson_num}"
header += "]"
-
+
# Track source for the UI
- source = course_title
+ label = course_title
if lesson_num is not None:
- source += f" - Lesson {lesson_num}"
- sources.append(source)
-
+ label += f" - Lesson {lesson_num}"
+
+ # Fetch lesson link from the catalog
+ url = None
+ if lesson_num is not None:
+ url = self.store.get_lesson_link(course_title, lesson_num)
+
+ sources.append({"label": label, "url": url})
+
formatted.append(f"{header}\n{doc}")
-
+
# Store sources for retrieval
self.last_sources = sources
-
+
return "\n\n".join(formatted)
+class CourseOutlineTool(Tool):
+ """Tool for retrieving a course outline (title, link, lesson list)"""
+
+ def __init__(self, vector_store: VectorStore):
+ self.store = vector_store
+
+ def get_tool_definition(self) -> Dict[str, Any]:
+ return {
+ "name": "get_course_outline",
+ "description": "Get the complete outline of a course: title, link, and all lesson numbers with titles",
+ "input_schema": {
+ "type": "object",
+ "properties": {
+ "course_name": {
+ "type": "string",
+ "description": "Course title to look up (partial matches work)"
+ }
+ },
+ "required": ["course_name"]
+ }
+ }
+
+ def execute(self, course_name: str) -> str:
+ outline = self.store.get_course_outline(course_name)
+ if not outline:
+ return f"No course found matching '{course_name}'."
+
+ lines = [
+ f"Course: {outline['title']}",
+ f"Link: {outline['course_link'] or 'N/A'}",
+ "",
+ "Lessons:"
+ ]
+ for lesson in outline['lessons']:
+ lines.append(
+ f" Lesson {lesson['lesson_number']}: {lesson['lesson_title']}"
+ )
+ return "\n".join(lines)
+
+
class ToolManager:
"""Manages available tools for the AI"""
diff --git a/backend/tests/__init__.py b/backend/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py
new file mode 100644
index 000000000..9c86742f5
--- /dev/null
+++ b/backend/tests/conftest.py
@@ -0,0 +1,166 @@
+import pytest
+from unittest.mock import MagicMock
+from vector_store import SearchResults
+from fastapi import FastAPI, HTTPException
+from fastapi.testclient import TestClient
+from pydantic import BaseModel
+from typing import List, Optional
+
+
+# ---------------------------------------------------------------------------
+# Shared sample data
+# ---------------------------------------------------------------------------
+
+SAMPLE_CHROMA_RESULTS = {
+ "documents": [["Lesson content about Python basics.", "More content here."]],
+ "metadatas": [[
+ {"course_title": "Python Fundamentals", "lesson_number": 1, "chunk_index": 0},
+ {"course_title": "Python Fundamentals", "lesson_number": 2, "chunk_index": 0},
+ ]],
+ "distances": [[0.12, 0.34]],
+}
+
+
+# ---------------------------------------------------------------------------
+# SearchResults fixtures
+# ---------------------------------------------------------------------------
+
+@pytest.fixture
+def sample_search_results():
+ """Two-document SearchResults with full metadata."""
+ return SearchResults.from_chroma(SAMPLE_CHROMA_RESULTS)
+
+
+@pytest.fixture
+def empty_search_results():
+ """Empty SearchResults with no error."""
+ return SearchResults(documents=[], metadata=[], distances=[])
+
+
+@pytest.fixture
+def error_search_results():
+ """SearchResults carrying a ChromaDB error string."""
+ return SearchResults.empty(
+ "Search error: Number of requested results 5 is greater than number of elements in index 0"
+ )
+
+
+# ---------------------------------------------------------------------------
+# VectorStore mock fixture
+# ---------------------------------------------------------------------------
+
+@pytest.fixture
+def mock_vector_store(sample_search_results):
+ """
+ MagicMock standing in for VectorStore.
+ Defaults: .search() returns sample_search_results, .get_lesson_link() returns a URL.
+ """
+ store = MagicMock()
+ store.search.return_value = sample_search_results
+ store.get_lesson_link.return_value = "https://example.com/lesson/1"
+ store.get_course_outline.return_value = None
+ return store
+
+
+# ---------------------------------------------------------------------------
+# RAGSystem mock fixture
+# ---------------------------------------------------------------------------
+
+@pytest.fixture
+def mock_rag_system():
+ """
+ MagicMock standing in for RAGSystem.
+ Defaults: .query() returns a plain answer with no sources; .get_course_analytics()
+ returns two courses; session_manager behaves as expected.
+ """
+ rag = MagicMock()
+ rag.session_manager.create_session.return_value = "auto-session-id"
+ rag.query.return_value = ("Test answer.", [])
+ rag.get_course_analytics.return_value = {
+ "total_courses": 2,
+ "course_titles": ["Python Fundamentals", "Data Science Basics"],
+ }
+ return rag
+
+
+# ---------------------------------------------------------------------------
+# Shared test-app factory (used by test_app.py)
+# ---------------------------------------------------------------------------
+
+def build_test_app(rag_system) -> FastAPI:
+ """
+ Return a minimal FastAPI app that mirrors the real app.py routes but
+ skips the StaticFiles mount and RAGSystem startup, so tests can run
+ without a frontend directory or a real ChromaDB instance.
+ """
+ app = FastAPI()
+
+ class QueryRequest(BaseModel):
+ query: str
+ session_id: Optional[str] = None
+
+ class QueryResponse(BaseModel):
+ answer: str
+ sources: List[dict]
+ session_id: str
+
+ class CourseStats(BaseModel):
+ total_courses: int
+ course_titles: List[str]
+
+ @app.post("/api/query", response_model=QueryResponse)
+ async def query_documents(request: QueryRequest):
+ try:
+ session_id = request.session_id
+ if not session_id:
+ session_id = rag_system.session_manager.create_session()
+ answer, sources = rag_system.query(request.query, session_id)
+ return QueryResponse(answer=answer, sources=sources, session_id=session_id)
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=str(e))
+
+ @app.get("/api/courses", response_model=CourseStats)
+ async def get_course_stats():
+ try:
+ analytics = rag_system.get_course_analytics()
+ return CourseStats(
+ total_courses=analytics["total_courses"],
+ course_titles=analytics["course_titles"],
+ )
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=str(e))
+
+ @app.delete("/api/session/{session_id}")
+ async def delete_session(session_id: str):
+ rag_system.session_manager.clear_session(session_id)
+ return {"status": "cleared"}
+
+ return app
+
+
+# ---------------------------------------------------------------------------
+# Anthropic response mock helpers (module-level, importable by test files)
+# ---------------------------------------------------------------------------
+
+def make_text_response(text: str):
+ """Create a mock Anthropic Message with a single text block and stop_reason=end_turn."""
+ block = MagicMock()
+ block.type = "text"
+ block.text = text
+ response = MagicMock()
+ response.stop_reason = "end_turn"
+ response.content = [block]
+ return response
+
+
+def make_tool_use_response(tool_name: str, tool_input: dict, tool_use_id: str = "tu_abc123"):
+ """Create a mock Anthropic Message that requests a tool call."""
+ block = MagicMock()
+ block.type = "tool_use"
+ block.name = tool_name
+ block.input = tool_input
+ block.id = tool_use_id
+ response = MagicMock()
+ response.stop_reason = "tool_use"
+ response.content = [block]
+ return response
diff --git a/backend/tests/test_ai_generator.py b/backend/tests/test_ai_generator.py
new file mode 100644
index 000000000..79d747301
--- /dev/null
+++ b/backend/tests/test_ai_generator.py
@@ -0,0 +1,352 @@
+"""
+Unit tests for AIGenerator.generate_response() and _handle_tool_execution().
+
+Diagnostic focus: Does tool_use branching work correctly?
+Does the final API call correctly omit tools?
+Does the tool result make it back to Claude?
+"""
+import pytest
+from unittest.mock import MagicMock, patch
+from ai_generator import AIGenerator
+from tests.conftest import make_text_response, make_tool_use_response
+
+
+@pytest.fixture
+def generator():
+ """AIGenerator with a fake API key; client.messages.create will be mocked per test."""
+ return AIGenerator(api_key="sk-test-fake", model="claude-3-haiku-20240307")
+
+
+class TestGenerateResponseDirectPath:
+
+ def test_returns_text_on_end_turn(self, generator):
+ """
+ WHAT: stop_reason=end_turn → generate_response returns text of first content block.
+ ASSERT: return value equals the text in the mock.
+ FAILURE MEANS: Direct (no-tool) responses are broken.
+ """
+ with patch.object(generator.client.messages, 'create',
+ return_value=make_text_response("Hello, I am Claude.")):
+ result = generator.generate_response("What is Python?")
+ assert result == "Hello, I am Claude."
+
+ def test_system_prompt_included_without_history(self, generator):
+ """
+ WHAT: Without conversation_history, system param equals SYSTEM_PROMPT exactly.
+ ASSERT: system kwarg passed to create() equals AIGenerator.SYSTEM_PROMPT.
+ FAILURE MEANS: System prompt is corrupted on clean queries.
+ """
+ with patch.object(generator.client.messages, 'create',
+ return_value=make_text_response("ok")) as mock_create:
+ generator.generate_response("test")
+ call_kwargs = mock_create.call_args[1]
+ assert call_kwargs["system"] == AIGenerator.SYSTEM_PROMPT
+
+ def test_system_prompt_includes_history_when_provided(self, generator):
+ """
+ WHAT: When conversation_history is provided, system includes 'Previous conversation:'.
+ ASSERT: system kwarg contains both SYSTEM_PROMPT content and the history.
+ FAILURE MEANS: Conversation context is silently dropped.
+ """
+ with patch.object(generator.client.messages, 'create',
+ return_value=make_text_response("ok")) as mock_create:
+ generator.generate_response("test", conversation_history="User: hi\nAssistant: hello")
+ call_kwargs = mock_create.call_args[1]
+ assert "Previous conversation:" in call_kwargs["system"]
+ assert "User: hi" in call_kwargs["system"]
+
+ def test_tools_included_in_api_call_when_provided(self, generator):
+ """
+ WHAT: When tools list is non-empty, tools and tool_choice appear in the API call.
+ ASSERT: 'tools' and 'tool_choice' are in call kwargs.
+ FAILURE MEANS: Claude never sees the search tool → answers from general knowledge only.
+ """
+ tool_defs = [{"name": "search_course_content", "description": "...", "input_schema": {}}]
+ with patch.object(generator.client.messages, 'create',
+ return_value=make_text_response("ok")) as mock_create:
+ generator.generate_response("test", tools=tool_defs)
+ call_kwargs = mock_create.call_args[1]
+ assert "tools" in call_kwargs
+ assert call_kwargs["tool_choice"] == {"type": "auto"}
+
+ def test_tools_absent_from_api_call_when_not_provided(self, generator):
+ """
+ WHAT: When no tools are passed, 'tools' key is absent from the API call.
+ ASSERT: 'tools' not in call kwargs.
+ FAILURE MEANS: Empty tools list might cause an API validation error.
+ """
+ with patch.object(generator.client.messages, 'create',
+ return_value=make_text_response("ok")) as mock_create:
+ generator.generate_response("test", tools=None)
+ call_kwargs = mock_create.call_args[1]
+ assert "tools" not in call_kwargs
+
+
+class TestHandleToolExecution:
+
+ def test_tool_use_branch_triggers_second_api_call(self, generator):
+ """
+ WHAT: stop_reason=tool_use + tool_manager → _handle_tool_execution runs.
+ ASSERT: create() is called TWICE (initial + intermediate follow-up WITH tools).
+ Call 2 is an intermediate follow-up that still includes tools, allowing
+ Claude to call another tool in a second round if needed. Here it returns
+ end_turn, so no third call is made.
+ FAILURE MEANS: Tool results never make it back to Claude; only one API call happens.
+ """
+ tool_response = make_tool_use_response("search_course_content", {"query": "python"})
+ final_response = make_text_response("Python is a programming language.")
+ mock_manager = MagicMock()
+ mock_manager.execute_tool.return_value = "Lesson content: Python basics..."
+
+ with patch.object(generator.client.messages, 'create',
+ side_effect=[tool_response, final_response]) as mock_create:
+ result = generator.generate_response("What is Python?", tools=[{}], tool_manager=mock_manager)
+
+ assert mock_create.call_count == 2
+ assert result == "Python is a programming language."
+
+ def test_tool_use_with_no_tool_manager_skips_tool_execution(self, generator):
+ """
+ WHAT: stop_reason=tool_use but tool_manager=None → the `and tool_manager` guard
+ skips the tool loop entirely. The `if response.stop_reason == "tool_use"`
+ guard after the loop then triggers a plain-text synthesis call.
+ ASSERT: create() is called TWICE (initial tool_use response + synthesis call).
+ execute_tool is never called.
+ FAILURE MEANS: No synthesis call is made, causing an AttributeError when trying
+ to access .text on a ToolUseBlock.
+ """
+ tool_response = make_tool_use_response("search_course_content", {"query": "python"})
+ final_response = make_text_response("Python is a programming language.")
+ with patch.object(generator.client.messages, 'create',
+ side_effect=[tool_response, final_response]) as mock_create:
+ result = generator.generate_response("What is Python?", tools=[{}], tool_manager=None)
+ assert mock_create.call_count == 2
+ assert result == "Python is a programming language."
+
+ def test_synthesis_call_after_round_cap_has_no_tools(self, generator):
+ """
+ WHAT: When both tool rounds are exhausted (MAX_TOOL_ROUNDS=2) and Claude still
+ returns tool_use, generate_response forces a final synthesis call WITHOUT
+ tools to obtain a plain-text answer.
+ ASSERT: The last (4th) call lacks 'tools' and 'tool_choice'.
+ FAILURE MEANS: Synthesis call includes tools and fails with an API error, or
+ Claude never produces a text answer after hitting the round cap.
+ """
+ r1 = make_tool_use_response("search_course_content", {"query": "python"}, "tu_1")
+ r2 = make_tool_use_response("search_course_content", {"query": "python2"}, "tu_2")
+ r3 = make_tool_use_response("search_course_content", {"query": "python3"}, "tu_3")
+ final = make_text_response("Python answer.")
+ mock_manager = MagicMock()
+ mock_manager.execute_tool.return_value = "tool result content"
+
+ with patch.object(generator.client.messages, 'create',
+ side_effect=[r1, r2, r3, final]) as mock_create:
+ generator.generate_response("What is Python?", tools=[{}], tool_manager=mock_manager)
+
+ last_call_kwargs = mock_create.call_args_list[-1][1]
+ assert "tools" not in last_call_kwargs
+ assert "tool_choice" not in last_call_kwargs
+
+ def test_tool_result_appended_as_user_message(self, generator):
+ """
+ WHAT: Tool execution result is added as a user-role message with type=tool_result.
+ ASSERT: Second create() call (the intermediate follow-up WITH tools) receives 3
+ messages: [original user query, assistant tool-use block, tool result].
+ FAILURE MEANS: Claude never sees the search results — answers blind.
+ Critical check for the 'query failed' symptom.
+ """
+ tool_response = make_tool_use_response(
+ "search_course_content", {"query": "python"}, "tu_test_id"
+ )
+ final_response = make_text_response("Python answer.")
+ mock_manager = MagicMock()
+ mock_manager.execute_tool.return_value = "Search result: Python basics"
+
+ with patch.object(generator.client.messages, 'create',
+ side_effect=[tool_response, final_response]) as mock_create:
+ generator.generate_response("What is Python?", tools=[{}], tool_manager=mock_manager)
+
+ second_call_messages = mock_create.call_args_list[1][1]["messages"]
+ assert len(second_call_messages) == 3
+ tool_result_message = second_call_messages[2]
+ assert tool_result_message["role"] == "user"
+ result_blocks = tool_result_message["content"]
+ assert any(
+ b.get("type") == "tool_result"
+ and b.get("tool_use_id") == "tu_test_id"
+ and "Python basics" in b.get("content", "")
+ for b in result_blocks
+ )
+
+ def test_tool_manager_execute_called_with_correct_args(self, generator):
+ """
+ WHAT: execute_tool() is called with the exact tool name and input that Claude requested.
+ ASSERT: execute_tool called with name='search_course_content', query='variables', lesson_number=2.
+ FAILURE MEANS: Parameters lost/renamed between Claude's response and the tool call.
+ """
+ tool_input = {"query": "variables", "lesson_number": 2}
+ tool_response = make_tool_use_response("search_course_content", tool_input)
+ final_response = make_text_response("Variables are...")
+ mock_manager = MagicMock()
+ mock_manager.execute_tool.return_value = "content about variables"
+
+ with patch.object(generator.client.messages, 'create',
+ side_effect=[tool_response, final_response]):
+ generator.generate_response("What are variables?", tools=[{}], tool_manager=mock_manager)
+
+ mock_manager.execute_tool.assert_called_once_with(
+ "search_course_content", query="variables", lesson_number=2
+ )
+
+ def test_tool_error_string_passed_through_to_claude(self, generator):
+ """
+ WHAT: If execute_tool returns an error string (e.g. from VectorStore failure),
+ that error string is what Claude receives as tool_result.content.
+ ASSERT: Second API call's messages include the error string verbatim.
+ FAILURE MEANS: THIS EXPOSES THE ROOT CAUSE. Claude receives 'Search error: ...'
+ as its context, then tells the user it cannot answer.
+ """
+ error_str = (
+ "Search error: Number of requested results 5 is greater than "
+ "number of elements in index 0"
+ )
+ tool_response = make_tool_use_response("search_course_content", {"query": "python"})
+ final_response = make_text_response("I couldn't find information about that.")
+ mock_manager = MagicMock()
+ mock_manager.execute_tool.return_value = error_str
+
+ with patch.object(generator.client.messages, 'create',
+ side_effect=[tool_response, final_response]) as mock_create:
+ generator.generate_response("What is Python?", tools=[{}], tool_manager=mock_manager)
+
+ second_call_messages = mock_create.call_args_list[1][1]["messages"]
+ tool_result_msg = second_call_messages[2]
+ content_blocks = tool_result_msg["content"]
+ assert any(error_str in b.get("content", "") for b in content_blocks)
+
+
+class TestSequentialToolCalling:
+
+ def test_two_tool_rounds_makes_three_api_calls(self, generator):
+ """
+ WHAT: Two sequential tool rounds where each follow-up triggers another tool call,
+ until the third response is end_turn.
+ ASSERT: create() called 3 times, execute_tool called twice, result is the
+ text from the third response.
+ FAILURE MEANS: The loop exits after round 1, preventing a second tool call
+ even when Claude wants to search again.
+ """
+ r1 = make_tool_use_response("get_course_outline", {"course_name": "Python"}, "tu_1")
+ r2 = make_tool_use_response("search_course_content", {"query": "loops"}, "tu_2")
+ r3 = make_text_response("Python loops are covered in lesson 3.")
+ mock_manager = MagicMock()
+ mock_manager.execute_tool.return_value = "tool result"
+
+ with patch.object(generator.client.messages, 'create',
+ side_effect=[r1, r2, r3]) as mock_create:
+ result = generator.generate_response(
+ "What lesson covers loops?", tools=[{}], tool_manager=mock_manager
+ )
+
+ assert mock_create.call_count == 3
+ assert mock_manager.execute_tool.call_count == 2
+ assert result == "Python loops are covered in lesson 3."
+
+ def test_second_round_intermediate_call_has_tools(self, generator):
+ """
+ WHAT: The intermediate follow-up call after round 1 must include tools so
+ Claude can decide to make a second tool call.
+ ASSERT: The second create() call (index 1) has 'tools' in its kwargs.
+ FAILURE MEANS: Claude cannot make a second tool call because the intermediate
+ call strips tools — the sequential feature is broken.
+ """
+ r1 = make_tool_use_response("get_course_outline", {"course_name": "Python"}, "tu_1")
+ r2 = make_tool_use_response("search_course_content", {"query": "loops"}, "tu_2")
+ r3 = make_text_response("Python loops answer.")
+ mock_manager = MagicMock()
+ mock_manager.execute_tool.return_value = "outline content"
+
+ with patch.object(generator.client.messages, 'create',
+ side_effect=[r1, r2, r3]) as mock_create:
+ generator.generate_response(
+ "What lesson covers loops?", tools=[{}], tool_manager=mock_manager
+ )
+
+ second_call_kwargs = mock_create.call_args_list[1][1]
+ assert "tools" in second_call_kwargs
+ assert second_call_kwargs["tool_choice"] == {"type": "auto"}
+
+ def test_round_cap_forces_toolless_synthesis_call(self, generator):
+ """
+ WHAT: When MAX_TOOL_ROUNDS (2) is exhausted and Claude still returns tool_use,
+ a final synthesis call WITHOUT tools is forced to get a text answer.
+ ASSERT: create() called 4 times total; last call has no 'tools' or 'tool_choice'.
+ FAILURE MEANS: The round cap does not terminate the loop, or the forced synthesis
+ call incorrectly includes tools causing an API error.
+ """
+ r1 = make_tool_use_response("search_course_content", {"query": "q1"}, "tu_1")
+ r2 = make_tool_use_response("search_course_content", {"query": "q2"}, "tu_2")
+ r3 = make_tool_use_response("search_course_content", {"query": "q3"}, "tu_3")
+ final = make_text_response("Here is the answer.")
+ mock_manager = MagicMock()
+ mock_manager.execute_tool.return_value = "result"
+
+ with patch.object(generator.client.messages, 'create',
+ side_effect=[r1, r2, r3, final]) as mock_create:
+ result = generator.generate_response(
+ "Complex query", tools=[{}], tool_manager=mock_manager
+ )
+
+ assert mock_create.call_count == 4
+ last_call_kwargs = mock_create.call_args_list[-1][1]
+ assert "tools" not in last_call_kwargs
+ assert "tool_choice" not in last_call_kwargs
+ assert result == "Here is the answer."
+
+ def test_tool_exception_stops_loop_and_proceeds_to_synthesis(self, generator):
+ """
+ WHAT: If execute_tool raises an Exception, the loop stops (success=False) and
+ the intermediate follow-up call provides the next response. If that
+ response is end_turn, no further calls are made.
+ ASSERT: create() called twice, execute_tool called once, result is the text
+ from the second response.
+ FAILURE MEANS: An exception in execute_tool propagates uncaught, or the loop
+ continues trying more tool rounds after a hard failure.
+ """
+ r1 = make_tool_use_response("search_course_content", {"query": "python"}, "tu_1")
+ r2 = make_text_response("I encountered an error retrieving that information.")
+ mock_manager = MagicMock()
+ mock_manager.execute_tool.side_effect = Exception("DB connection failed")
+
+ with patch.object(generator.client.messages, 'create',
+ side_effect=[r1, r2]) as mock_create:
+ result = generator.generate_response(
+ "What is Python?", tools=[{}], tool_manager=mock_manager
+ )
+
+ assert mock_create.call_count == 2
+ assert mock_manager.execute_tool.call_count == 1
+ assert result == "I encountered an error retrieving that information."
+
+ def test_accumulated_messages_grow_across_rounds(self, generator):
+ """
+ WHAT: After two tool rounds, the third API call receives the full accumulated
+ message history: [user_query, asst_tool1, tool_result_1, asst_tool2, tool_result_2].
+ ASSERT: Third create() call's messages list has exactly 5 items.
+ FAILURE MEANS: Context is not preserved between rounds; Claude answers without
+ seeing results from earlier tool calls.
+ """
+ r1 = make_tool_use_response("get_course_outline", {"course_name": "Python"}, "tu_1")
+ r2 = make_tool_use_response("search_course_content", {"query": "lesson 3"}, "tu_2")
+ r3 = make_text_response("Lesson 3 covers loops.")
+ mock_manager = MagicMock()
+ mock_manager.execute_tool.return_value = "tool content"
+
+ with patch.object(generator.client.messages, 'create',
+ side_effect=[r1, r2, r3]) as mock_create:
+ generator.generate_response(
+ "What does lesson 3 cover?", tools=[{}], tool_manager=mock_manager
+ )
+
+ third_call_messages = mock_create.call_args_list[2][1]["messages"]
+ assert len(third_call_messages) == 5
diff --git a/backend/tests/test_app.py b/backend/tests/test_app.py
new file mode 100644
index 000000000..b942667aa
--- /dev/null
+++ b/backend/tests/test_app.py
@@ -0,0 +1,199 @@
+"""
+API endpoint tests for the FastAPI application.
+
+Uses an inline test app (build_test_app from conftest) that mirrors the real
+app.py routes without mounting StaticFiles or instantiating a real RAGSystem,
+so these tests run without a frontend directory or ChromaDB instance.
+
+Endpoints covered:
+ POST /api/query
+ GET /api/courses
+ DELETE /api/session/{session_id}
+"""
+import pytest
+from fastapi.testclient import TestClient
+from tests.conftest import build_test_app
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+@pytest.fixture
+def client(mock_rag_system):
+ """TestClient wired to the inline test app and a fresh RAGSystem mock."""
+ return TestClient(build_test_app(mock_rag_system))
+
+
+# ---------------------------------------------------------------------------
+# POST /api/query
+# ---------------------------------------------------------------------------
+
+class TestQueryEndpoint:
+
+ def test_returns_200_with_required_fields(self, client):
+ """
+ WHAT: Valid query returns 200 and a body that contains answer, sources, session_id.
+ ASSERT: All three keys present; answer is a non-empty string.
+ FAILURE MEANS: Response contract broken — frontend crashes unpacking the JSON.
+ """
+ response = client.post("/api/query", json={"query": "What is Python?"})
+ assert response.status_code == 200
+ data = response.json()
+ assert "answer" in data
+ assert "sources" in data
+ assert "session_id" in data
+ assert isinstance(data["answer"], str)
+ assert len(data["answer"]) > 0
+
+ def test_auto_creates_session_when_none_provided(self, client, mock_rag_system):
+ """
+ WHAT: Omitting session_id triggers session_manager.create_session().
+ ASSERT: Returned session_id equals the value produced by the mock.
+ FAILURE MEANS: Anonymous (stateless) queries never get a session — frontend
+ cannot maintain conversation continuity.
+ """
+ response = client.post("/api/query", json={"query": "Hello"})
+ assert response.status_code == 200
+ assert response.json()["session_id"] == "auto-session-id"
+ mock_rag_system.session_manager.create_session.assert_called_once()
+
+ def test_uses_provided_session_id(self, client, mock_rag_system):
+ """
+ WHAT: When session_id is supplied, create_session is NOT called; the provided
+ id is passed directly to rag_system.query and echoed back.
+ ASSERT: create_session not called; session_id in response matches the input.
+ FAILURE MEANS: Existing sessions are silently discarded, breaking multi-turn chat.
+ """
+ response = client.post(
+ "/api/query",
+ json={"query": "Follow-up question", "session_id": "existing-session-42"},
+ )
+ assert response.status_code == 200
+ assert response.json()["session_id"] == "existing-session-42"
+ mock_rag_system.session_manager.create_session.assert_not_called()
+ mock_rag_system.query.assert_called_once_with(
+ "Follow-up question", "existing-session-42"
+ )
+
+ def test_sources_list_forwarded_from_rag(self, client, mock_rag_system):
+ """
+ WHAT: Sources returned by rag_system.query appear in the response body.
+ ASSERT: sources list matches what the mock returns.
+ FAILURE MEANS: Frontend never displays source links even when search succeeds.
+ """
+ mock_rag_system.query.return_value = (
+ "Python is great.",
+ [{"label": "Python Fundamentals - Lesson 1", "url": "https://example.com"}],
+ )
+ response = client.post("/api/query", json={"query": "What is Python?"})
+ assert response.status_code == 200
+ sources = response.json()["sources"]
+ assert len(sources) == 1
+ assert sources[0]["label"] == "Python Fundamentals - Lesson 1"
+
+ def test_returns_500_when_rag_raises(self, client, mock_rag_system):
+ """
+ WHAT: If rag_system.query raises, the endpoint returns HTTP 500.
+ ASSERT: status_code == 500 and detail string is present.
+ FAILURE MEANS: Exception propagates unhandled → Starlette returns a generic 500
+ without the error detail, making debugging harder.
+ """
+ mock_rag_system.query.side_effect = RuntimeError("ChromaDB connection lost")
+ response = client.post("/api/query", json={"query": "What is Python?"})
+ assert response.status_code == 500
+ assert "ChromaDB connection lost" in response.json()["detail"]
+
+ def test_query_field_is_required(self, client):
+ """
+ WHAT: A request body missing the required 'query' field is rejected with 422.
+ ASSERT: status_code == 422 (Unprocessable Entity).
+ FAILURE MEANS: Pydantic validation is bypassed or the model definition changed.
+ """
+ response = client.post("/api/query", json={"session_id": "abc"})
+ assert response.status_code == 422
+
+
+# ---------------------------------------------------------------------------
+# GET /api/courses
+# ---------------------------------------------------------------------------
+
+class TestCoursesEndpoint:
+
+ def test_returns_200_with_course_stats(self, client):
+ """
+ WHAT: GET /api/courses returns total_courses and course_titles from the RAG system.
+ ASSERT: 200; total_courses == 2; course_titles is a list of 2 strings.
+ FAILURE MEANS: Analytics endpoint broken — dashboard always shows stale/zero data.
+ """
+ response = client.get("/api/courses")
+ assert response.status_code == 200
+ data = response.json()
+ assert data["total_courses"] == 2
+ assert data["course_titles"] == ["Python Fundamentals", "Data Science Basics"]
+
+ def test_delegates_to_get_course_analytics(self, client, mock_rag_system):
+ """
+ WHAT: /api/courses calls rag_system.get_course_analytics() exactly once.
+ ASSERT: get_course_analytics called once.
+ FAILURE MEANS: Route is using a cached value or wrong method — data could be stale.
+ """
+ client.get("/api/courses")
+ mock_rag_system.get_course_analytics.assert_called_once()
+
+ def test_returns_500_when_analytics_raises(self, client, mock_rag_system):
+ """
+ WHAT: If get_course_analytics raises, the endpoint returns HTTP 500.
+ ASSERT: status_code == 500 with an error detail string.
+ FAILURE MEANS: Unhandled exception crashes the server process instead of
+ returning a structured error to the frontend.
+ """
+ mock_rag_system.get_course_analytics.side_effect = Exception("DB error")
+ response = client.get("/api/courses")
+ assert response.status_code == 500
+ assert "DB error" in response.json()["detail"]
+
+ def test_empty_course_list(self, client, mock_rag_system):
+ """
+ WHAT: When no courses are loaded, endpoint returns total_courses=0 and [].
+ ASSERT: total_courses == 0; course_titles == [].
+ FAILURE MEANS: Empty-state handling crashes or returns unexpected data types.
+ """
+ mock_rag_system.get_course_analytics.return_value = {
+ "total_courses": 0,
+ "course_titles": [],
+ }
+ response = client.get("/api/courses")
+ assert response.status_code == 200
+ data = response.json()
+ assert data["total_courses"] == 0
+ assert data["course_titles"] == []
+
+
+# ---------------------------------------------------------------------------
+# DELETE /api/session/{session_id}
+# ---------------------------------------------------------------------------
+
+class TestDeleteSessionEndpoint:
+
+ def test_returns_200_with_cleared_status(self, client):
+ """
+ WHAT: DELETE /api/session/{id} returns 200 and {"status": "cleared"}.
+ ASSERT: status_code == 200; body matches exactly.
+ FAILURE MEANS: Session cleanup endpoint broken — conversation history leaks
+ across users or stale sessions accumulate in memory.
+ """
+ response = client.delete("/api/session/test-session-id")
+ assert response.status_code == 200
+ assert response.json() == {"status": "cleared"}
+
+ def test_calls_clear_session_with_correct_id(self, client, mock_rag_system):
+ """
+ WHAT: The session_id path parameter is forwarded to session_manager.clear_session.
+ ASSERT: clear_session called once with the exact id from the URL.
+ FAILURE MEANS: Wrong session is cleared, or the call is silently skipped.
+ """
+ client.delete("/api/session/my-specific-session")
+ mock_rag_system.session_manager.clear_session.assert_called_once_with(
+ "my-specific-session"
+ )
diff --git a/backend/tests/test_course_search_tool.py b/backend/tests/test_course_search_tool.py
new file mode 100644
index 000000000..707512785
--- /dev/null
+++ b/backend/tests/test_course_search_tool.py
@@ -0,0 +1,190 @@
+"""
+Unit tests for CourseSearchTool.execute() and _format_results().
+
+Diagnostic focus: Does the tool correctly surface VectorStore errors?
+Does it populate self.last_sources? Does it format results correctly?
+"""
+import pytest
+from unittest.mock import MagicMock
+from search_tools import CourseSearchTool, ToolManager
+from vector_store import SearchResults
+
+
+class TestCourseSearchToolExecute:
+
+ def test_execute_returns_formatted_content_on_success(self, mock_vector_store, sample_search_results):
+ """
+ WHAT: execute() with a working VectorStore returns formatted content.
+ ASSERT: returned string contains course title and document text.
+ FAILURE MEANS: _format_results is broken or not called.
+ """
+ tool = CourseSearchTool(mock_vector_store)
+ result = tool.execute(query="what is python")
+ assert "Python Fundamentals" in result
+ assert "Lesson content about Python basics." in result
+
+ def test_execute_calls_store_search_with_correct_args(self, mock_vector_store):
+ """
+ WHAT: execute() passes query/course_name/lesson_number through to store.search().
+ ASSERT: store.search called with exactly the right keyword args.
+ FAILURE MEANS: parameter forwarding broken → wrong ChromaDB filters applied.
+ """
+ tool = CourseSearchTool(mock_vector_store)
+ tool.execute(query="variables", course_name="Python", lesson_number=3)
+ mock_vector_store.search.assert_called_once_with(
+ query="variables", course_name="Python", lesson_number=3
+ )
+
+ def test_execute_populates_last_sources(self, mock_vector_store):
+ """
+ WHAT: After execute(), tool.last_sources is populated with one entry per result.
+ ASSERT: last_sources has 2 entries with 'label' and 'url' keys.
+ FAILURE MEANS: ToolManager.get_last_sources() returns [] even after successful search
+ → sources never reach the frontend.
+ """
+ tool = CourseSearchTool(mock_vector_store)
+ tool.execute(query="python basics")
+ assert len(tool.last_sources) == 2
+ for source in tool.last_sources:
+ assert "label" in source
+ assert "url" in source
+
+ def test_execute_fetches_lesson_link_for_each_result(self, mock_vector_store):
+ """
+ WHAT: _format_results calls get_lesson_link once per result that has a lesson_number.
+ ASSERT: get_lesson_link called exactly twice (for 2 results with lesson_number).
+ FAILURE MEANS: source URLs always None in the frontend.
+ """
+ tool = CourseSearchTool(mock_vector_store)
+ tool.execute(query="python basics")
+ assert mock_vector_store.get_lesson_link.call_count == 2
+
+ def test_execute_returns_error_string_verbatim_when_store_errors(self, mock_vector_store):
+ """
+ WHAT: When store.search returns SearchResults with .error set,
+ execute() returns that error string directly.
+ ASSERT: return value IS the error string.
+ FAILURE MEANS: ChromaDB error strings reach Claude as tool result,
+ causing Claude to report failure. THIS IS THE LIKELY ROOT CAUSE.
+ """
+ error_msg = (
+ "Search error: Number of requested results 5 is greater than "
+ "number of elements in index 0"
+ )
+ mock_vector_store.search.return_value = SearchResults.empty(error_msg)
+ tool = CourseSearchTool(mock_vector_store)
+ result = tool.execute(query="anything")
+ assert result == error_msg
+
+ def test_execute_returns_no_content_message_when_empty(self, mock_vector_store, empty_search_results):
+ """
+ WHAT: When results are empty (no error, just no hits), execute() returns
+ the 'No relevant content found' sentinel.
+ ASSERT: return value starts with 'No relevant content found'.
+ FAILURE MEANS: Empty DB causes tool to silently return empty string or crash.
+ """
+ mock_vector_store.search.return_value = empty_search_results
+ tool = CourseSearchTool(mock_vector_store)
+ result = tool.execute(query="anything")
+ assert result.startswith("No relevant content found")
+
+ def test_execute_includes_course_filter_in_empty_message(self, mock_vector_store, empty_search_results):
+ """
+ WHAT: Empty result message mentions the requested course name.
+ ASSERT: message contains the course_name that was requested.
+ FAILURE MEANS: User can't tell which course had no content.
+ """
+ mock_vector_store.search.return_value = empty_search_results
+ tool = CourseSearchTool(mock_vector_store)
+ result = tool.execute(query="anything", course_name="Python")
+ assert "Python" in result
+
+ def test_execute_does_not_update_sources_on_error(self, mock_vector_store):
+ """
+ WHAT: When store returns an error result, last_sources is NOT overwritten.
+ ASSERT: Pre-seeded stale sources remain unchanged after an errored execute().
+ FAILURE MEANS: Stale sources from a previous query could leak into this response.
+ """
+ mock_vector_store.search.return_value = SearchResults.empty("Search error: boom")
+ tool = CourseSearchTool(mock_vector_store)
+ tool.last_sources = [{"label": "stale", "url": None}]
+ tool.execute(query="anything")
+ # Error branch returns early — last_sources should NOT have been updated
+ assert tool.last_sources == [{"label": "stale", "url": None}]
+
+
+class TestFormatResults:
+
+ def test_format_results_header_format(self, mock_vector_store, sample_search_results):
+ """
+ WHAT: _format_results includes [CourseName - Lesson N] headers.
+ ASSERT: expected header appears in output.
+ FAILURE MEANS: Claude receives raw content without course context headers.
+ """
+ tool = CourseSearchTool(mock_vector_store)
+ result = tool._format_results(sample_search_results)
+ assert "[Python Fundamentals - Lesson 1]" in result
+
+ def test_format_results_no_lesson_number_omits_lesson_from_header(self, mock_vector_store):
+ """
+ WHAT: When lesson_number is None in metadata, header is just [CourseName].
+ ASSERT: header does not contain 'Lesson'.
+ FAILURE MEANS: Metadata extraction crashes on missing lesson_number.
+ """
+ results = SearchResults(
+ documents=["Content without lesson number"],
+ metadata=[{"course_title": "Advanced Python", "lesson_number": None}],
+ distances=[0.1]
+ )
+ tool = CourseSearchTool(mock_vector_store)
+ result = tool._format_results(results)
+ assert "[Advanced Python]" in result
+ assert "Lesson" not in result
+
+ def test_format_results_separates_results_with_double_newline(self, mock_vector_store, sample_search_results):
+ """
+ WHAT: Multiple results are joined with double newlines.
+ ASSERT: '\\n\\n' appears in the output.
+ FAILURE MEANS: Output is garbled — all results run together.
+ """
+ tool = CourseSearchTool(mock_vector_store)
+ result = tool._format_results(sample_search_results)
+ assert "\n\n" in result
+
+
+class TestToolManager:
+
+ def test_tool_manager_get_last_sources_returns_first_nonempty(self, mock_vector_store):
+ """
+ WHAT: get_last_sources() returns the first non-empty last_sources from registered tools.
+ ASSERT: returned list matches what was set on the tool.
+ FAILURE MEANS: RAGSystem.query() always returns empty sources list.
+ """
+ manager = ToolManager()
+ tool = CourseSearchTool(mock_vector_store)
+ tool.last_sources = [{"label": "Python - Lesson 1", "url": "https://example.com"}]
+ manager.register_tool(tool)
+ assert manager.get_last_sources() == [{"label": "Python - Lesson 1", "url": "https://example.com"}]
+
+ def test_tool_manager_reset_sources_clears_all_tools(self, mock_vector_store):
+ """
+ WHAT: reset_sources() clears last_sources on all registered tools.
+ ASSERT: After reset, last_sources == [].
+ FAILURE MEANS: Sources from query N bleed into query N+1.
+ """
+ manager = ToolManager()
+ tool = CourseSearchTool(mock_vector_store)
+ tool.last_sources = [{"label": "stale", "url": None}]
+ manager.register_tool(tool)
+ manager.reset_sources()
+ assert tool.last_sources == []
+
+ def test_tool_manager_execute_unknown_tool_returns_error_string(self, mock_vector_store):
+ """
+ WHAT: Calling execute_tool with an unregistered name returns an error string.
+ ASSERT: Returns string containing 'not found'.
+ FAILURE MEANS: Unknown tool name crashes instead of returning a recoverable error.
+ """
+ manager = ToolManager()
+ result = manager.execute_tool("nonexistent_tool", query="test")
+ assert "not found" in result
diff --git a/backend/tests/test_rag_system.py b/backend/tests/test_rag_system.py
new file mode 100644
index 000000000..e2f751f43
--- /dev/null
+++ b/backend/tests/test_rag_system.py
@@ -0,0 +1,209 @@
+"""
+Integration tests for RAGSystem.query().
+
+Patches VectorStore, DocumentProcessor, and the Anthropic client so no real
+ChromaDB or API calls occur. Lets the real RAGSystem, ToolManager,
+CourseSearchTool, and AIGenerator code run.
+
+Diagnostic focus: Does the full pipeline assemble correctly?
+Do sources flow back from tool to response? Does session history update?
+"""
+import pytest
+from unittest.mock import MagicMock, patch
+from rag_system import RAGSystem
+from vector_store import SearchResults
+from tests.conftest import make_text_response, make_tool_use_response
+
+
+@pytest.fixture
+def mock_config():
+ """Minimal config that prevents real ChromaDB and Anthropic initialization."""
+ cfg = MagicMock()
+ cfg.ANTHROPIC_API_KEY = "sk-test-fake"
+ cfg.ANTHROPIC_MODEL = "claude-3-haiku-20240307"
+ cfg.CHROMA_PATH = ":memory:"
+ cfg.EMBEDDING_MODEL = "all-MiniLM-L6-v2"
+ cfg.MAX_RESULTS = 5
+ cfg.MAX_HISTORY = 2
+ cfg.CHUNK_SIZE = 800
+ cfg.CHUNK_OVERLAP = 100
+ return cfg
+
+
+@pytest.fixture
+def rag_system_with_mocks(mock_config, sample_search_results):
+ """
+ RAGSystem with VectorStore and Anthropic client both mocked.
+ Yields (system, mock_vs_instance, mock_anthropic_client).
+ """
+ with patch("rag_system.VectorStore") as MockVS, \
+ patch("rag_system.DocumentProcessor"), \
+ patch("ai_generator.anthropic.Anthropic") as MockAnthropic:
+
+ mock_vs_instance = MagicMock()
+ mock_vs_instance.search.return_value = sample_search_results
+ mock_vs_instance.get_lesson_link.return_value = "https://example.com/lesson/1"
+ MockVS.return_value = mock_vs_instance
+
+ mock_client = MagicMock()
+ MockAnthropic.return_value = mock_client
+
+ system = RAGSystem(mock_config)
+ yield system, mock_vs_instance, mock_client
+
+
+class TestRAGSystemQueryHappyPath:
+
+ def test_query_returns_tuple_of_answer_and_sources(self, rag_system_with_mocks):
+ """
+ WHAT: RAGSystem.query() returns a 2-tuple (str, list).
+ ASSERT: result[0] is str, result[1] is list.
+ FAILURE MEANS: API contract broken — app.py crashes unpacking (answer, sources).
+ """
+ system, _, mock_client = rag_system_with_mocks
+ mock_client.messages.create.return_value = make_text_response("General answer.")
+ answer, sources = system.query("What is Python?")
+ assert isinstance(answer, str)
+ assert isinstance(sources, list)
+
+ def test_query_prompt_wraps_user_question(self, rag_system_with_mocks):
+ """
+ WHAT: RAGSystem.query() prepends the 'Answer this question about course materials:'
+ prefix to the user query before calling generate_response.
+ ASSERT: The message content sent to Claude starts with that prefix.
+ FAILURE MEANS: Prompt framing confirmed — this framing may suppress tool use (Bug 5).
+ """
+ system, _, mock_client = rag_system_with_mocks
+ mock_client.messages.create.return_value = make_text_response("ok")
+ system.query("What are variables?")
+ call_kwargs = mock_client.messages.create.call_args[1]
+ user_message_content = call_kwargs["messages"][0]["content"]
+ assert "Answer this question about course materials:" in user_message_content
+
+ def test_query_with_tool_use_returns_sources(self, rag_system_with_mocks):
+ """
+ WHAT: When Claude uses the search tool and results are found, sources list is non-empty.
+ ASSERT: sources has at least one entry with 'label' key.
+ FAILURE MEANS: Frontend never displays source links even on successful searches.
+ """
+ system, _, mock_client = rag_system_with_mocks
+ tool_response = make_tool_use_response("search_course_content", {"query": "python"})
+ final_response = make_text_response("Python is a programming language.")
+ mock_client.messages.create.side_effect = [tool_response, final_response]
+
+ answer, sources = system.query("What is Python?")
+ assert answer == "Python is a programming language."
+ assert len(sources) > 0
+ assert "label" in sources[0]
+
+ def test_query_resets_sources_after_retrieval(self, rag_system_with_mocks):
+ """
+ WHAT: After query() retrieves sources, reset_sources() is called so the next
+ query doesn't inherit stale sources.
+ ASSERT: Second query's sources list is empty (direct response, no tool use).
+ FAILURE MEANS: Sources from query N bleed into query N+1 in the frontend.
+ """
+ system, _, mock_client = rag_system_with_mocks
+ tool_response = make_tool_use_response("search_course_content", {"query": "python"})
+ direct_response = make_text_response("General knowledge answer.")
+
+ # First query: tool use
+ mock_client.messages.create.side_effect = [tool_response, make_text_response("Python answer.")]
+ system.query("What is Python?")
+
+ # Second query: direct response (no tool use)
+ mock_client.messages.create.side_effect = [direct_response]
+ _, sources2 = system.query("What is 2 + 2?")
+ assert sources2 == []
+
+
+class TestRAGSystemSessionHandling:
+
+ def test_query_without_session_id_returns_answer(self, rag_system_with_mocks):
+ """
+ WHAT: query() called without session_id does not crash.
+ ASSERT: answer is a non-empty string.
+ FAILURE MEANS: Session handling broken for anonymous (stateless) queries.
+ """
+ system, _, mock_client = rag_system_with_mocks
+ mock_client.messages.create.return_value = make_text_response("Answer.")
+ answer, _ = system.query("test", session_id=None)
+ assert len(answer) > 0
+
+ def test_query_with_new_session_id_does_not_crash(self, rag_system_with_mocks):
+ """
+ WHAT: query() with a fresh session_id (not yet in sessions dict) works correctly.
+ ASSERT: No exception; answer returned as str.
+ FAILURE MEANS: get_conversation_history() crashes on unknown session_id.
+ """
+ system, _, mock_client = rag_system_with_mocks
+ mock_client.messages.create.return_value = make_text_response("Answer.")
+ answer, _ = system.query("test", session_id="brand-new-session-99")
+ assert isinstance(answer, str)
+
+ def test_query_updates_session_history_after_response(self, rag_system_with_mocks):
+ """
+ WHAT: After a successful query, the exchange is stored in session history.
+ ASSERT: get_conversation_history() returns a string containing the user query.
+ FAILURE MEANS: Conversation context never accumulates; multi-turn dialogue is broken.
+ """
+ system, _, mock_client = rag_system_with_mocks
+ mock_client.messages.create.return_value = make_text_response("Answer to hello.")
+ session_id = system.session_manager.create_session()
+ system.query("hello", session_id=session_id)
+ history = system.session_manager.get_conversation_history(session_id)
+ assert "hello" in history
+
+
+class TestRAGSystemErrorPropagation:
+
+ def test_query_when_vector_store_errors_claude_receives_error_string(self, rag_system_with_mocks):
+ """
+ WHAT: When VectorStore.search returns error SearchResults, the error string
+ reaches Claude as a tool result. Claude's final answer is its text,
+ not a Python exception.
+ ASSERT: answer is a string (no exception propagated).
+ FAILURE MEANS: Unhandled exception → FastAPI 500. If this passes but user sees
+ 'query failed', the bug is Claude saying so verbally, not an HTTP error.
+ """
+ system, mock_vs, mock_client = rag_system_with_mocks
+ mock_vs.search.return_value = SearchResults.empty(
+ "Search error: Number of requested results 5 is greater than number of elements in index 0"
+ )
+ tool_response = make_tool_use_response("search_course_content", {"query": "python"})
+ final_response = make_text_response("I was unable to find information about that topic.")
+ mock_client.messages.create.side_effect = [tool_response, final_response]
+
+ answer, sources = system.query("What is Python?")
+ assert isinstance(answer, str)
+ assert len(answer) > 0
+ assert sources == []
+
+ def test_query_anthropic_api_exception_propagates_to_caller(self, rag_system_with_mocks):
+ """
+ WHAT: If the Anthropic API call raises, the exception propagates out of query()
+ so FastAPI catches it as a 500.
+ ASSERT: query() raises an exception (any type).
+ FAILURE MEANS: Exception silently swallowed → query returns wrong value, no 500 sent.
+ """
+ system, _, mock_client = rag_system_with_mocks
+ mock_client.messages.create.side_effect = ConnectionError("API unreachable")
+ with pytest.raises(Exception):
+ system.query("What is Python?")
+
+ def test_query_with_empty_database_returns_answer_string(self, rag_system_with_mocks):
+ """
+ WHAT: If the vector DB is empty, search returns is_empty()=True.
+ The tool returns 'No relevant content found.' Claude answers accordingly.
+ ASSERT: answer is a non-empty string; no exception raised.
+ FAILURE MEANS: Empty database crashes the system → HTTP 500 instead of a graceful reply.
+ """
+ system, mock_vs, mock_client = rag_system_with_mocks
+ mock_vs.search.return_value = SearchResults(documents=[], metadata=[], distances=[])
+ tool_response = make_tool_use_response("search_course_content", {"query": "python"})
+ final_response = make_text_response("There is no course content about that topic.")
+ mock_client.messages.create.side_effect = [tool_response, final_response]
+
+ answer, _ = system.query("What is Python?")
+ assert isinstance(answer, str)
+ assert len(answer) > 0
diff --git a/backend/vector_store.py b/backend/vector_store.py
index 390abe71c..ee9557764 100644
--- a/backend/vector_store.py
+++ b/backend/vector_store.py
@@ -90,9 +90,15 @@ def search(self,
search_limit = limit if limit is not None else self.max_results
try:
+ # Guard: clamp n_results to actual collection size.
+ # ChromaDB raises ValueError if n_results > number of indexed documents.
+ collection_count = self.course_content.count()
+ if collection_count == 0:
+ return SearchResults(documents=[], metadata=[], distances=[])
+ actual_limit = min(search_limit, collection_count)
results = self.course_content.query(
query_texts=[query],
- n_results=search_limit,
+ n_results=actual_limit,
where=filter_dict
)
return SearchResults.from_chroma(results)
@@ -102,6 +108,8 @@ def search(self,
def _resolve_course_name(self, course_name: str) -> Optional[str]:
"""Use vector search to find best matching course by name"""
try:
+ if self.course_catalog.count() == 0:
+ return None
results = self.course_catalog.query(
query_texts=[course_name],
n_results=1
@@ -246,6 +254,26 @@ def get_course_link(self, course_title: str) -> Optional[str]:
print(f"Error getting course link: {e}")
return None
+ def get_course_outline(self, course_name: str) -> Optional[Dict[str, Any]]:
+ """Get course outline (title, link, lessons list) by course name (fuzzy match)"""
+ import json
+ course_title = self._resolve_course_name(course_name)
+ if not course_title:
+ return None
+ try:
+ results = self.course_catalog.get(ids=[course_title])
+ if results and results['metadatas']:
+ meta = results['metadatas'][0]
+ lessons = json.loads(meta.get('lessons_json', '[]'))
+ return {
+ 'title': meta.get('title'),
+ 'course_link': meta.get('course_link'),
+ 'lessons': lessons
+ }
+ except Exception as e:
+ print(f"Error getting course outline: {e}")
+ return None
+
def get_lesson_link(self, course_title: str, lesson_number: int) -> Optional[str]:
"""Get lesson link for a given course title and lesson number"""
import json
diff --git a/frontend-changes.md b/frontend-changes.md
new file mode 100644
index 000000000..f4445fc97
--- /dev/null
+++ b/frontend-changes.md
@@ -0,0 +1,147 @@
+# Frontend Changes
+
+## Code Quality Tooling
+
+### What was added
+
+| File | Purpose |
+|---|---|
+| `frontend/package.json` | npm project manifest with Prettier and ESLint as dev dependencies |
+| `frontend/.prettierrc` | Prettier configuration |
+| `frontend/.eslintrc.json` | ESLint configuration |
+| `frontend/.prettierignore` | Excludes `node_modules/` from formatting |
+| `scripts/check-frontend.sh` | Shell script that runs both Prettier and ESLint |
+
+### Prettier (`frontend/.prettierrc`)
+
+Prettier is the JavaScript/CSS/HTML equivalent of Black — it enforces a single, consistent code style with no configuration debates.
+
+Settings chosen to match the existing code style:
+- `singleQuote: true` — use single quotes (already used throughout)
+- `semi: true` — require semicolons
+- `tabWidth: 2` — 2-space indentation
+- `trailingComma: "es5"` — trailing commas in objects/arrays (ES5-safe)
+- `printWidth: 100` — line length limit
+- `arrowParens: "always"` — always parenthesise arrow function params: `(x) => x`
+
+### ESLint (`frontend/.eslintrc.json`)
+
+Catches real bugs and enforces best practices in `script.js`:
+- `eqeqeq` — require `===` instead of `==`
+- `no-var` — disallow `var`, enforcing `const`/`let`
+- `prefer-const` — warn when `let` could be `const`
+- `no-unused-vars` — warn on unused variables
+- `no-implicit-globals` — prevent accidental globals
+
+`marked` (loaded from CDN) is declared as a global so ESLint does not flag it as undefined.
+
+### `script.js` formatting changes applied
+
+Prettier was applied to `script.js`. Key diffs from the original:
+
+1. **Indentation normalised to 2 spaces** throughout (was 4 spaces).
+2. **Trailing commas** added in multi-line objects:
+ - `{ 'Content-Type': 'application/json' }` fetch header object
+ - `{ query, session_id }` request body object
+3. **Arrow function parentheses** made consistent: `s =>` → `(s) =>`, `.forEach(button =>` → `.forEach((button) =>`
+4. **Double blank lines** collapsed to single blank lines (e.g. in `setupEventListeners`).
+5. **Method chains** reformatted: `sources.map(...).join('')` broken across lines for readability.
+6. **`addMessage` long string call** broken into multi-line form with trailing argument style.
+
+### Running quality checks
+
+**Install dependencies (once):**
+```bash
+cd frontend && npm install
+```
+
+**Check formatting and linting:**
+```bash
+# From repo root:
+./scripts/check-frontend.sh
+
+# Or from frontend/:
+npm run quality
+```
+
+**Auto-fix all issues:**
+```bash
+# From repo root:
+./scripts/check-frontend.sh --fix
+
+# Or from frontend/ (format then lint-fix):
+npm run format
+npm run lint:fix
+```
+
+**Individual commands:**
+```bash
+cd frontend
+
+npm run format # apply Prettier formatting
+npm run format:check # check formatting without writing
+npm run lint # run ESLint
+npm run lint:fix # run ESLint with auto-fix
+npm run quality # format:check + lint (CI-safe, no writes)
+```
+
+---
+
+## Dark/Light Theme Toggle
+
+### Summary
+
+Added a dark/light theme toggle button to the frontend. Users can switch between the existing dark theme and a new light theme. The preference is persisted in `localStorage` and applied immediately on page load (no flash of wrong theme).
+
+---
+
+### Files Modified
+
+#### `frontend/index.html`
+
+- Added a `