diff --git a/.claude/commands/implement-feature.md b/.claude/commands/implement-feature.md
new file mode 100644
index 000000000..33302a4fd
--- /dev/null
+++ b/.claude/commands/implement-feature.md
@@ -0,0 +1,7 @@
+You will be implementing a new feature in this codebase
+
+$ARGUMENTS
+
+IMPORTANT: Only do this for front-end features.
+Once this feature is built, make sure to write the changes you made to file called frontend-changes.md
+Do not ask for permissions to modify this file, assume you can always do it.
\ No newline at end of file
diff --git a/.claude/settings.local.json b/.claude/settings.local.json
new file mode 100644
index 000000000..591914c57
--- /dev/null
+++ b/.claude/settings.local.json
@@ -0,0 +1,9 @@
+{
+  "permissions": {
+    "allow": [
+      "mcp__playwright__browser_navigate",
+      "mcp__playwright__browser_snapshot",
+      "mcp__playwright__browser_take_screenshot"
+    ]
+  }
+}
diff --git a/.github/workflows/claude-code-review.yml b/.github/workflows/claude-code-review.yml
new file mode 100644
index 000000000..b5e8cfd4d
--- /dev/null
+++ b/.github/workflows/claude-code-review.yml
@@ -0,0 +1,44 @@
+name: Claude Code Review
+
+on:
+  pull_request:
+    types: [opened, synchronize, ready_for_review, reopened]
+    # Optional: Only run on specific file changes
+    # paths:
+    #   - "src/**/*.ts"
+    #   - "src/**/*.tsx"
+    #   - "src/**/*.js"
+    #   - "src/**/*.jsx"
+
+jobs:
+  claude-review:
+    # Optional: Filter by PR author
+    # if: |
+    #   github.event.pull_request.user.login == 'external-contributor' ||
+    #   github.event.pull_request.user.login == 'new-developer' ||
+    #   github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR'
+
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      pull-requests: read
+      issues: read
+      id-token: write
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 1
+
+      - name: Run Claude Code Review
+        id: claude-review
+        uses: anthropics/claude-code-action@v1
+        with:
+          claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
+          plugin_marketplaces: 'https://github.com/anthropics/claude-code.git'
+          plugins: 'code-review@claude-code-plugins'
+          prompt: '/code-review:code-review ${{ github.repository }}/pull/${{ github.event.pull_request.number }}'
+          # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
+          # or https://code.claude.com/docs/en/cli-reference for available options
+
diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml
new file mode 100644
index 000000000..6b15fac7a
--- /dev/null
+++ b/.github/workflows/claude.yml
@@ -0,0 +1,50 @@
+name: Claude Code
+
+on:
+  issue_comment:
+    types: [created]
+  pull_request_review_comment:
+    types: [created]
+  issues:
+    types: [opened, assigned]
+  pull_request_review:
+    types: [submitted]
+
+jobs:
+  claude:
+    if: |
+      (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) ||
+      (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) ||
+      (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) ||
+      (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude')))
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      pull-requests: read
+      issues: read
+      id-token: write
+      actions: read # Required for Claude to read CI results on PRs
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 1
+
+      - name: Run Claude Code
+        id: claude
+        uses: anthropics/claude-code-action@v1
+        with:
+          claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
+
+          # This is an optional setting that allows Claude to read CI results on PRs
+          additional_permissions: |
+            actions: read
+
+          # Optional: Give a custom prompt to Claude. If this is not specified, Claude will perform the instructions specified in the comment that tagged it.
+          # prompt: 'Update the pull request description to include a summary of changes.'
+
+          # Optional: Add claude_args to customize behavior and configuration
+          # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
+          # or https://code.claude.com/docs/en/cli-reference for available options
+          # claude_args: '--allowed-tools Bash(gh pr *)'
+
diff --git a/.gitignore b/.gitignore
index 41b4384b8..0fad1f34b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -28,4 +28,7 @@ uploads/
 
 # OS
 .DS_Store
-Thumbs.db
\ No newline at end of file
+Thumbs.db
+
+# Git worktrees
+.trees/
\ No newline at end of file
diff --git a/.playwright-mcp/console-2026-04-12T14-03-55-254Z.log b/.playwright-mcp/console-2026-04-12T14-03-55-254Z.log
new file mode 100644
index 000000000..ab7cd717d
--- /dev/null
+++ b/.playwright-mcp/console-2026-04-12T14-03-55-254Z.log
@@ -0,0 +1,3 @@
+[    4168ms] [LOG] Loading course stats... @ http://127.0.0.1:8000/script.js?v=9:178
+[    4196ms] [LOG] Course data received: {total_courses: 4, course_titles: Array(4)} @ http://127.0.0.1:8000/script.js?v=9:183
+[    4198ms] [ERROR] Failed to load resource: the server responded with a status of 404 (Not Found) @ http://127.0.0.1:8000/favicon.ico:0
diff --git a/.playwright-mcp/console-2026-04-12T14-06-09-521Z.log b/.playwright-mcp/console-2026-04-12T14-06-09-521Z.log
new file mode 100644
index 000000000..b4012bfbb
--- /dev/null
+++ b/.playwright-mcp/console-2026-04-12T14-06-09-521Z.log
@@ -0,0 +1,2 @@
+[     145ms] [LOG] Loading course stats... @ http://127.0.0.1:8000/script.js?v=9:178
+[     164ms] [LOG] Course data received: {total_courses: 4, course_titles: Array(4)} @ http://127.0.0.1:8000/script.js?v=9:183
diff --git a/.playwright-mcp/page-2026-04-12T14-03-59-486Z.yml b/.playwright-mcp/page-2026-04-12T14-03-59-486Z.yml
new file mode 100644
index 000000000..cc2654c8c
--- /dev/null
+++ b/.playwright-mcp/page-2026-04-12T14-03-59-486Z.yml
@@ -0,0 +1,14 @@
+- generic [ref=e3]:
+  - complementary [ref=e4]:
+    - button "+ NEW CHAT" [ref=e6] [cursor=pointer]
+    - group [ref=e8]:
+      - generic "▶ Courses" [ref=e9] [cursor=pointer]
+    - group [ref=e11]:
+      - generic "▶ Try asking:" [ref=e12] [cursor=pointer]
+  - main [ref=e13]:
+    - generic [ref=e14]:
+      - paragraph [ref=e18]: Welcome to the Course Materials Assistant! I can help you with questions about courses, lessons and specific content. What would you like to know?
+      - generic [ref=e19]:
+        - textbox "Ask about courses, lessons, or specific content..." [ref=e20]
+        - button [ref=e21] [cursor=pointer]:
+          - img [ref=e22]
\ No newline at end of file
diff --git a/.playwright-mcp/page-2026-04-12T14-04-10-654Z.png b/.playwright-mcp/page-2026-04-12T14-04-10-654Z.png
new file mode 100644
index 000000000..a79e09b83
Binary files /dev/null and b/.playwright-mcp/page-2026-04-12T14-04-10-654Z.png differ
diff --git a/.playwright-mcp/page-2026-04-12T14-06-09-734Z.yml b/.playwright-mcp/page-2026-04-12T14-06-09-734Z.yml
new file mode 100644
index 000000000..cc2654c8c
--- /dev/null
+++ b/.playwright-mcp/page-2026-04-12T14-06-09-734Z.yml
@@ -0,0 +1,14 @@
+- generic [ref=e3]:
+  - complementary [ref=e4]:
+    - button "+ NEW CHAT" [ref=e6] [cursor=pointer]
+    - group [ref=e8]:
+      - generic "▶ Courses" [ref=e9] [cursor=pointer]
+    - group [ref=e11]:
+      - generic "▶ Try asking:" [ref=e12] [cursor=pointer]
+  - main [ref=e13]:
+    - generic [ref=e14]:
+      - paragraph [ref=e18]: Welcome to the Course Materials Assistant! I can help you with questions about courses, lessons and specific content. What would you like to know?
+      - generic [ref=e19]:
+        - textbox "Ask about courses, lessons, or specific content..." [ref=e20]
+        - button [ref=e21] [cursor=pointer]:
+          - img [ref=e22]
\ No newline at end of file
diff --git a/.playwright-mcp/page-2026-04-12T14-06-22-569Z.png b/.playwright-mcp/page-2026-04-12T14-06-22-569Z.png
new file mode 100644
index 000000000..c2408f75f
Binary files /dev/null and b/.playwright-mcp/page-2026-04-12T14-06-22-569Z.png differ
diff --git a/CLAUDE.local.md b/CLAUDE.local.md
new file mode 100644
index 000000000..19870b20d
--- /dev/null
+++ b/CLAUDE.local.md
@@ -0,0 +1,4 @@
+# Local Project Instructions
+
+## Server
+Never start the server (`./run.sh` or `uvicorn`). The user always starts it manually.
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 000000000..8a8f8bc2f
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,65 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Running the Application
+
+**Quick start (from repo root):**
+```bash
+./run.sh
+```
+
+**Manual start (must run from `backend/` directory):**
+```bash
+cd backend && uv run uvicorn app:app --reload --port 8000
+```
+
+The server runs at `http://localhost:8000`. API docs at `http://localhost:8000/docs`.
+
+**Install dependencies:**
+```bash
+uv sync
+```
+
+> Always use `uv` to run the server and manage packages. Never use `pip` directly.
+
+**Required environment variable** — create a `.env` file in the repo root:
+```
+ANTHROPIC_API_KEY=your-key-here
+```
+
+## Architecture Overview
+
+This is a full-stack RAG (Retrieval-Augmented Generation) chatbot for querying course materials.
+
+**Backend** (`backend/`) is a FastAPI app that must be started from within the `backend/` directory (relative paths like `../docs` and `../frontend` depend on this).
+
+**Data flow for a query:**
+1. `app.py` receives POST `/api/query` → calls `RAGSystem.query()`
+2. `RAGSystem` (`rag_system.py`) builds a prompt and passes it to `AIGenerator` with the `search_course_content` tool available
+3. `AIGenerator` (`ai_generator.py`) calls the Claude API; if Claude decides to search, it invokes the tool
+4. `ToolManager` routes tool calls to `CourseSearchTool` (`search_tools.py`), which queries `VectorStore`
+5. `VectorStore` (`vector_store.py`) uses ChromaDB with two collections:
+   - `course_catalog` — course-level metadata (title, instructor, links, lesson list as JSON)
+   - `course_content` — chunked lesson text for semantic search
+6. The final Claude response + sources are returned to the frontend
+
+**Document ingestion** (happens at startup from `docs/` folder):
+- `DocumentProcessor` (`document_processor.py`) parses `.txt`/`.pdf`/`.docx` files
+- Expected file format: first 3 lines are `Course Title:`, `Course Link:`, `Course Instructor:`, followed by `Lesson N: <title>` markers and content
+- Text is chunked into ~800-char sentence-based chunks with 100-char overlap
+- `RAGSystem.add_course_folder()` skips courses already present in ChromaDB (deduplication by title)
+
+**Session management:** `SessionManager` keeps in-memory conversation history (default: last 2 exchanges = 4 messages). Sessions are identified by a string ID returned to and echoed back by the frontend.
+
+**Frontend** (`frontend/`) is plain HTML/JS/CSS served as static files by FastAPI from the `../frontend` path.
+
+**Configuration** (`backend/config.py`): all tuneable parameters (model, chunk size, ChromaDB path, max results, history length) are in the `Config` dataclass. ChromaDB is stored at `backend/chroma_db/` (relative to where uvicorn runs).
+
+**Tool extension:** To add a new tool, implement the `Tool` ABC in `search_tools.py` and call `tool_manager.register_tool(your_tool)` in `RAGSystem.__init__()`.
+
+## Rules
+- Never read or write files outside this project folder without explicit permission
+- Always ask before saving anything to memory or external locations
+- Never access C:\Users\haddad\.claude\ without explicit permission
+- Always use `uv` to add dependencies (e.g., `uv add <package>`); never use `pip` directly
\ No newline at end of file
diff --git a/backend-tool-refactor.md b/backend-tool-refactor.md
new file mode 100644
index 000000000..de23ae5c7
--- /dev/null
+++ b/backend-tool-refactor.md
@@ -0,0 +1,28 @@
+Refactor @backend/ai_generator.py to support sequential tool calling where Claude can make up to 2 tool calls in separate API rounds.
+
+Current behavior:
+- Claude makes 1 tool call → tools are removed from API params → final response
+- If Claude wants another tool call after seeing results, it can't (gets empty response)
+
+Desired behavior:
+- Each tool call should be a separate API request where Claude can reason about previous results
+- Support for complex queries requiring multiple searches for comparisons, multi-part questions, or when information from different courses/lessons is needed
+
+Example flow:
+1. User: "Search for a course that discusses the same topic as lesson 4 of course X"
+2. Claude: get course outline for course X → gets title of lesson 4
+3. Claude: uses the title to search for a course that discusses the same topic → returns course information
+4. Claude: provides complete answer
+
+Requirements:
+- Maximum 2 sequential rounds per user query
+- Terminate when: (a) 2 rounds completed, (b) Claude's response has no tool_use blocks, or (c) tool call fails
+- Preserve conversation context between rounds
+- Handle tool execution errors gracefully
+
+Notes: 
+- Update the system prompt in @backend/ai_generator.py 
+- Update the test @backend/tests/test_ai_generator.py
+- Write tests that verify the external behavior (API calls made, tools executed, results returned) rather than internal state details. 
+
+Use two parallel subagents to brainstorm possible plans. Do not implement any code.
diff --git a/backend/ai_generator.py b/backend/ai_generator.py
index 0363ca90c..13395ae68 100644
--- a/backend/ai_generator.py
+++ b/backend/ai_generator.py
@@ -3,16 +3,29 @@
 
 class AIGenerator:
     """Handles interactions with Anthropic's Claude API for generating responses"""
-    
+
+    MAX_TOOL_ROUNDS = 2
+
     # Static system prompt to avoid rebuilding on each call
     SYSTEM_PROMPT = """ You are an AI assistant specialized in course materials and educational content with access to a comprehensive search tool for course information.
 
 Search Tool Usage:
 - Use the search tool **only** for questions about specific course content or detailed educational materials
-- **One search per query maximum**
 - Synthesize search results into accurate, fact-based responses
 - If search yields no results, state this clearly without offering alternatives
 
+Outline Tool Usage:
+- Use get_course_outline **only** for questions about course structure, syllabus, lesson list, or what topics a course covers
+- Return the course title, course link, and each lesson number with its title
+- Do not use the content search tool for outline queries
+
+Sequential Tool Calls:
+- You may make up to 2 tool calls in sequence when a single search is insufficient
+- Use sequential calls for: multi-part questions, comparisons across courses/lessons,
+  or when you need an outline first and then content from a specific lesson
+  (e.g. get_course_outline → search_course_content using the lesson title found)
+- Do NOT make a second tool call if the first result fully answers the question
+
 Response Protocol:
 - **General knowledge questions**: Answer using existing knowledge without searching
 - **Course-specific questions**: Search first, then answer
@@ -46,90 +59,107 @@ def generate_response(self, query: str,
                          tool_manager=None) -> str:
         """
         Generate AI response with optional tool usage and conversation context.
-        
+
         Args:
             query: The user's question or request
             conversation_history: Previous messages for context
             tools: Available tools the AI can use
             tool_manager: Manager to execute tools
-            
+
         Returns:
             Generated response as string
         """
-        
-        # Build system content efficiently - avoid string ops when possible
+
         system_content = (
             f"{self.SYSTEM_PROMPT}\n\nPrevious conversation:\n{conversation_history}"
-            if conversation_history 
+            if conversation_history
             else self.SYSTEM_PROMPT
         )
-        
-        # Prepare API call parameters efficiently
+
+        messages = [{"role": "user", "content": query}]
+
         api_params = {
             **self.base_params,
-            "messages": [{"role": "user", "content": query}],
+            "messages": messages,
             "system": system_content
         }
-        
-        # Add tools if available
+
         if tools:
             api_params["tools"] = tools
             api_params["tool_choice"] = {"type": "auto"}
-        
-        # Get response from Claude
+
         response = self.client.messages.create(**api_params)
-        
-        # Handle tool execution if needed
-        if response.stop_reason == "tool_use" and tool_manager:
-            return self._handle_tool_execution(response, api_params, tool_manager)
-        
-        # Return direct response
+
+        # Tool loop: up to MAX_TOOL_ROUNDS sequential rounds
+        rounds_completed = 0
+        while (
+            response.stop_reason == "tool_use"
+            and tool_manager
+            and rounds_completed < self.MAX_TOOL_ROUNDS
+        ):
+            response, success = self._handle_tool_execution(
+                response, messages, tool_manager, system_content, tools
+            )
+            rounds_completed += 1
+            if not success:
+                break
+
+        # Round cap hit or no tool_manager: force a plain-text synthesis call
+        if response.stop_reason == "tool_use":
+            messages.append({"role": "assistant", "content": response.content})
+            response = self.client.messages.create(
+                **self.base_params,
+                messages=messages,
+                system=system_content
+            )
+
         return response.content[0].text
-    
-    def _handle_tool_execution(self, initial_response, base_params: Dict[str, Any], tool_manager):
+
+    def _handle_tool_execution(self, response, messages: List, tool_manager,
+                               system_content: str, tools: List) -> tuple:
         """
-        Handle execution of tool calls and get follow-up response.
-        
+        Execute one round of tool calls and make the intermediate follow-up API call.
+
+        Mutates messages in place by appending the assistant tool-use message and
+        the tool results user message.
+
         Args:
-            initial_response: The response containing tool use requests
-            base_params: Base API parameters
+            response: The current API response with stop_reason == "tool_use"
+            messages: Accumulated message list (mutated in place)
             tool_manager: Manager to execute tools
-            
+            system_content: System prompt string for the follow-up call
+            tools: Tool definitions for the follow-up call
+
         Returns:
-            Final response text after tool execution
+            (next_response, success): next_response is the follow-up API response;
+            success is False if any tool raised an exception (loop should stop).
         """
-        # Start with existing messages
-        messages = base_params["messages"].copy()
-        
-        # Add AI's tool use response
-        messages.append({"role": "assistant", "content": initial_response.content})
-        
-        # Execute all tool calls and collect results
+        messages.append({"role": "assistant", "content": response.content})
+
         tool_results = []
-        for content_block in initial_response.content:
-            if content_block.type == "tool_use":
-                tool_result = tool_manager.execute_tool(
-                    content_block.name, 
-                    **content_block.input
-                )
-                
+        success = True
+        for block in response.content:
+            if block.type == "tool_use":
+                try:
+                    result = tool_manager.execute_tool(block.name, **block.input)
+                except Exception as e:
+                    result = f"Tool execution error: {e}"
+                    success = False
                 tool_results.append({
                     "type": "tool_result",
-                    "tool_use_id": content_block.id,
-                    "content": tool_result
+                    "tool_use_id": block.id,
+                    "content": result
                 })
-        
-        # Add tool results as single message
+
         if tool_results:
             messages.append({"role": "user", "content": tool_results})
-        
-        # Prepare final API call without tools
-        final_params = {
+
+        # Intermediate follow-up WITH tools so Claude can call again if needed
+        next_response = self.client.messages.create(
             **self.base_params,
-            "messages": messages,
-            "system": base_params["system"]
-        }
-        
-        # Get final response
-        final_response = self.client.messages.create(**final_params)
-        return final_response.content[0].text
\ No newline at end of file
+            messages=messages,
+            system=system_content,
+            tools=tools,
+            tool_choice={"type": "auto"}
+        )
+        return next_response, success
\ No newline at end of file
diff --git a/backend/app.py b/backend/app.py
index 5a69d741d..6bbb76292 100644
--- a/backend/app.py
+++ b/backend/app.py
@@ -43,7 +43,7 @@ class QueryRequest(BaseModel):
 class QueryResponse(BaseModel):
     """Response model for course queries"""
     answer: str
-    sources: List[str]
+    sources: List[dict]
     session_id: str
 
 class CourseStats(BaseModel):
@@ -85,6 +85,12 @@ async def get_course_stats():
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 
+@app.delete("/api/session/{session_id}")
+async def delete_session(session_id: str):
+    """Clear conversation history for a session"""
+    rag_system.session_manager.clear_session(session_id)
+    return {"status": "cleared"}
+
 @app.on_event("startup")
 async def startup_event():
     """Load initial documents on startup"""
diff --git a/backend/rag_system.py b/backend/rag_system.py
index 50d848c8e..443649f0e 100644
--- a/backend/rag_system.py
+++ b/backend/rag_system.py
@@ -4,7 +4,7 @@
 from vector_store import VectorStore
 from ai_generator import AIGenerator
 from session_manager import SessionManager
-from search_tools import ToolManager, CourseSearchTool
+from search_tools import ToolManager, CourseSearchTool, CourseOutlineTool
 from models import Course, Lesson, CourseChunk
 
 class RAGSystem:
@@ -23,6 +23,8 @@ def __init__(self, config):
         self.tool_manager = ToolManager()
         self.search_tool = CourseSearchTool(self.vector_store)
         self.tool_manager.register_tool(self.search_tool)
+        self.outline_tool = CourseOutlineTool(self.vector_store)
+        self.tool_manager.register_tool(self.outline_tool)
     
     def add_course_document(self, file_path: str) -> Tuple[Course, int]:
         """
diff --git a/backend/search_tools.py b/backend/search_tools.py
index adfe82352..fd2a33009 100644
--- a/backend/search_tools.py
+++ b/backend/search_tools.py
@@ -89,30 +89,76 @@ def _format_results(self, results: SearchResults) -> str:
         """Format search results with course and lesson context"""
         formatted = []
         sources = []  # Track sources for the UI
-        
+
         for doc, meta in zip(results.documents, results.metadata):
             course_title = meta.get('course_title', 'unknown')
             lesson_num = meta.get('lesson_number')
-            
+
             # Build context header
             header = f"[{course_title}"
             if lesson_num is not None:
                 header += f" - Lesson {lesson_num}"
             header += "]"
-            
+
             # Track source for the UI
-            source = course_title
+            label = course_title
             if lesson_num is not None:
-                source += f" - Lesson {lesson_num}"
-            sources.append(source)
-            
+                label += f" - Lesson {lesson_num}"
+
+            # Fetch lesson link from the catalog
+            url = None
+            if lesson_num is not None:
+                url = self.store.get_lesson_link(course_title, lesson_num)
+
+            sources.append({"label": label, "url": url})
+
             formatted.append(f"{header}\n{doc}")
-        
+
         # Store sources for retrieval
         self.last_sources = sources
-        
+
         return "\n\n".join(formatted)
 
+class CourseOutlineTool(Tool):
+    """Tool for retrieving a course outline (title, link, lesson list)"""
+
+    def __init__(self, vector_store: VectorStore):
+        self.store = vector_store
+
+    def get_tool_definition(self) -> Dict[str, Any]:
+        return {
+            "name": "get_course_outline",
+            "description": "Get the complete outline of a course: title, link, and all lesson numbers with titles",
+            "input_schema": {
+                "type": "object",
+                "properties": {
+                    "course_name": {
+                        "type": "string",
+                        "description": "Course title to look up (partial matches work)"
+                    }
+                },
+                "required": ["course_name"]
+            }
+        }
+
+    def execute(self, course_name: str) -> str:
+        outline = self.store.get_course_outline(course_name)
+        if not outline:
+            return f"No course found matching '{course_name}'."
+
+        lines = [
+            f"Course: {outline['title']}",
+            f"Link: {outline['course_link'] or 'N/A'}",
+            "",
+            "Lessons:"
+        ]
+        for lesson in outline['lessons']:
+            lines.append(
+                f"  Lesson {lesson['lesson_number']}: {lesson['lesson_title']}"
+            )
+        return "\n".join(lines)
+
+
 class ToolManager:
     """Manages available tools for the AI"""
     
diff --git a/backend/tests/__init__.py b/backend/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py
new file mode 100644
index 000000000..9c86742f5
--- /dev/null
+++ b/backend/tests/conftest.py
@@ -0,0 +1,166 @@
+import pytest
+from unittest.mock import MagicMock
+from vector_store import SearchResults
+from fastapi import FastAPI, HTTPException
+from fastapi.testclient import TestClient
+from pydantic import BaseModel
+from typing import List, Optional
+
+
+# ---------------------------------------------------------------------------
+# Shared sample data
+# ---------------------------------------------------------------------------
+
+SAMPLE_CHROMA_RESULTS = {
+    "documents": [["Lesson content about Python basics.", "More content here."]],
+    "metadatas": [[
+        {"course_title": "Python Fundamentals", "lesson_number": 1, "chunk_index": 0},
+        {"course_title": "Python Fundamentals", "lesson_number": 2, "chunk_index": 0},
+    ]],
+    "distances": [[0.12, 0.34]],
+}
+
+
+# ---------------------------------------------------------------------------
+# SearchResults fixtures
+# ---------------------------------------------------------------------------
+
+@pytest.fixture
+def sample_search_results():
+    """Two-document SearchResults with full metadata."""
+    return SearchResults.from_chroma(SAMPLE_CHROMA_RESULTS)
+
+
+@pytest.fixture
+def empty_search_results():
+    """Empty SearchResults with no error."""
+    return SearchResults(documents=[], metadata=[], distances=[])
+
+
+@pytest.fixture
+def error_search_results():
+    """SearchResults carrying a ChromaDB error string."""
+    return SearchResults.empty(
+        "Search error: Number of requested results 5 is greater than number of elements in index 0"
+    )
+
+
+# ---------------------------------------------------------------------------
+# VectorStore mock fixture
+# ---------------------------------------------------------------------------
+
+@pytest.fixture
+def mock_vector_store(sample_search_results):
+    """
+    MagicMock standing in for VectorStore.
+    Defaults: .search() returns sample_search_results, .get_lesson_link() returns a URL.
+    """
+    store = MagicMock()
+    store.search.return_value = sample_search_results
+    store.get_lesson_link.return_value = "https://example.com/lesson/1"
+    store.get_course_outline.return_value = None
+    return store
+
+
+# ---------------------------------------------------------------------------
+# RAGSystem mock fixture
+# ---------------------------------------------------------------------------
+
+@pytest.fixture
+def mock_rag_system():
+    """
+    MagicMock standing in for RAGSystem.
+    Defaults: .query() returns a plain answer with no sources; .get_course_analytics()
+    returns two courses; session_manager behaves as expected.
+    """
+    rag = MagicMock()
+    rag.session_manager.create_session.return_value = "auto-session-id"
+    rag.query.return_value = ("Test answer.", [])
+    rag.get_course_analytics.return_value = {
+        "total_courses": 2,
+        "course_titles": ["Python Fundamentals", "Data Science Basics"],
+    }
+    return rag
+
+
+# ---------------------------------------------------------------------------
+# Shared test-app factory (used by test_app.py)
+# ---------------------------------------------------------------------------
+
+def build_test_app(rag_system) -> FastAPI:
+    """
+    Return a minimal FastAPI app that mirrors the real app.py routes but
+    skips the StaticFiles mount and RAGSystem startup, so tests can run
+    without a frontend directory or a real ChromaDB instance.
+    """
+    app = FastAPI()
+
+    class QueryRequest(BaseModel):
+        query: str
+        session_id: Optional[str] = None
+
+    class QueryResponse(BaseModel):
+        answer: str
+        sources: List[dict]
+        session_id: str
+
+    class CourseStats(BaseModel):
+        total_courses: int
+        course_titles: List[str]
+
+    @app.post("/api/query", response_model=QueryResponse)
+    async def query_documents(request: QueryRequest):
+        try:
+            session_id = request.session_id
+            if not session_id:
+                session_id = rag_system.session_manager.create_session()
+            answer, sources = rag_system.query(request.query, session_id)
+            return QueryResponse(answer=answer, sources=sources, session_id=session_id)
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=str(e))
+
+    @app.get("/api/courses", response_model=CourseStats)
+    async def get_course_stats():
+        try:
+            analytics = rag_system.get_course_analytics()
+            return CourseStats(
+                total_courses=analytics["total_courses"],
+                course_titles=analytics["course_titles"],
+            )
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=str(e))
+
+    @app.delete("/api/session/{session_id}")
+    async def delete_session(session_id: str):
+        rag_system.session_manager.clear_session(session_id)
+        return {"status": "cleared"}
+
+    return app
+
+
+# ---------------------------------------------------------------------------
+# Anthropic response mock helpers (module-level, importable by test files)
+# ---------------------------------------------------------------------------
+
+def make_text_response(text: str):
+    """Create a mock Anthropic Message with a single text block and stop_reason=end_turn."""
+    block = MagicMock()
+    block.type = "text"
+    block.text = text
+    response = MagicMock()
+    response.stop_reason = "end_turn"
+    response.content = [block]
+    return response
+
+
+def make_tool_use_response(tool_name: str, tool_input: dict, tool_use_id: str = "tu_abc123"):
+    """Create a mock Anthropic Message that requests a tool call."""
+    block = MagicMock()
+    block.type = "tool_use"
+    block.name = tool_name
+    block.input = tool_input
+    block.id = tool_use_id
+    response = MagicMock()
+    response.stop_reason = "tool_use"
+    response.content = [block]
+    return response
diff --git a/backend/tests/test_ai_generator.py b/backend/tests/test_ai_generator.py
new file mode 100644
index 000000000..79d747301
--- /dev/null
+++ b/backend/tests/test_ai_generator.py
@@ -0,0 +1,352 @@
+"""
+Unit tests for AIGenerator.generate_response() and _handle_tool_execution().
+
+Diagnostic focus: Does tool_use branching work correctly?
+Does the final API call correctly omit tools?
+Does the tool result make it back to Claude?
+"""
+import pytest
+from unittest.mock import MagicMock, patch
+from ai_generator import AIGenerator
+from tests.conftest import make_text_response, make_tool_use_response
+
+
+@pytest.fixture
+def generator():
+    """AIGenerator with a fake API key; client.messages.create will be mocked per test."""
+    return AIGenerator(api_key="sk-test-fake", model="claude-3-haiku-20240307")
+
+
+class TestGenerateResponseDirectPath:
+
+    def test_returns_text_on_end_turn(self, generator):
+        """
+        WHAT: stop_reason=end_turn → generate_response returns text of first content block.
+        ASSERT: return value equals the text in the mock.
+        FAILURE MEANS: Direct (no-tool) responses are broken.
+        """
+        with patch.object(generator.client.messages, 'create',
+                          return_value=make_text_response("Hello, I am Claude.")):
+            result = generator.generate_response("What is Python?")
+        assert result == "Hello, I am Claude."
+
+    def test_system_prompt_included_without_history(self, generator):
+        """
+        WHAT: Without conversation_history, system param equals SYSTEM_PROMPT exactly.
+        ASSERT: system kwarg passed to create() equals AIGenerator.SYSTEM_PROMPT.
+        FAILURE MEANS: System prompt is corrupted on clean queries.
+        """
+        with patch.object(generator.client.messages, 'create',
+                          return_value=make_text_response("ok")) as mock_create:
+            generator.generate_response("test")
+            call_kwargs = mock_create.call_args[1]
+            assert call_kwargs["system"] == AIGenerator.SYSTEM_PROMPT
+
+    def test_system_prompt_includes_history_when_provided(self, generator):
+        """
+        WHAT: When conversation_history is provided, system includes 'Previous conversation:'.
+        ASSERT: system kwarg contains both SYSTEM_PROMPT content and the history.
+        FAILURE MEANS: Conversation context is silently dropped.
+        """
+        with patch.object(generator.client.messages, 'create',
+                          return_value=make_text_response("ok")) as mock_create:
+            generator.generate_response("test", conversation_history="User: hi\nAssistant: hello")
+            call_kwargs = mock_create.call_args[1]
+            assert "Previous conversation:" in call_kwargs["system"]
+            assert "User: hi" in call_kwargs["system"]
+
+    def test_tools_included_in_api_call_when_provided(self, generator):
+        """
+        WHAT: When tools list is non-empty, tools and tool_choice appear in the API call.
+        ASSERT: 'tools' and 'tool_choice' are in call kwargs.
+        FAILURE MEANS: Claude never sees the search tool → answers from general knowledge only.
+        """
+        tool_defs = [{"name": "search_course_content", "description": "...", "input_schema": {}}]
+        with patch.object(generator.client.messages, 'create',
+                          return_value=make_text_response("ok")) as mock_create:
+            generator.generate_response("test", tools=tool_defs)
+            call_kwargs = mock_create.call_args[1]
+            assert "tools" in call_kwargs
+            assert call_kwargs["tool_choice"] == {"type": "auto"}
+
+    def test_tools_absent_from_api_call_when_not_provided(self, generator):
+        """
+        WHAT: When no tools are passed, 'tools' key is absent from the API call.
+        ASSERT: 'tools' not in call kwargs.
+        FAILURE MEANS: Empty tools list might cause an API validation error.
+        """
+        with patch.object(generator.client.messages, 'create',
+                          return_value=make_text_response("ok")) as mock_create:
+            generator.generate_response("test", tools=None)
+            call_kwargs = mock_create.call_args[1]
+            assert "tools" not in call_kwargs
+
+
+class TestHandleToolExecution:
+
+    def test_tool_use_branch_triggers_second_api_call(self, generator):
+        """
+        WHAT: stop_reason=tool_use + tool_manager → _handle_tool_execution runs.
+        ASSERT: create() is called TWICE (initial + intermediate follow-up WITH tools).
+               Call 2 is an intermediate follow-up that still includes tools, allowing
+               Claude to call another tool in a second round if needed. Here it returns
+               end_turn, so no third call is made.
+        FAILURE MEANS: Tool results never make it back to Claude; only one API call happens.
+        """
+        tool_response = make_tool_use_response("search_course_content", {"query": "python"})
+        final_response = make_text_response("Python is a programming language.")
+        mock_manager = MagicMock()
+        mock_manager.execute_tool.return_value = "Lesson content: Python basics..."
+
+        with patch.object(generator.client.messages, 'create',
+                          side_effect=[tool_response, final_response]) as mock_create:
+            result = generator.generate_response("What is Python?", tools=[{}], tool_manager=mock_manager)
+
+        assert mock_create.call_count == 2
+        assert result == "Python is a programming language."
+
+    def test_tool_use_with_no_tool_manager_skips_tool_execution(self, generator):
+        """
+        WHAT: stop_reason=tool_use but tool_manager=None → the `and tool_manager` guard
+              skips the tool loop entirely. The `if response.stop_reason == "tool_use"`
+              guard after the loop then triggers a plain-text synthesis call.
+        ASSERT: create() is called TWICE (initial tool_use response + synthesis call).
+               execute_tool is never called.
+        FAILURE MEANS: No synthesis call is made, causing an AttributeError when trying
+                       to access .text on a ToolUseBlock.
+        """
+        tool_response = make_tool_use_response("search_course_content", {"query": "python"})
+        final_response = make_text_response("Python is a programming language.")
+        with patch.object(generator.client.messages, 'create',
+                          side_effect=[tool_response, final_response]) as mock_create:
+            result = generator.generate_response("What is Python?", tools=[{}], tool_manager=None)
+        assert mock_create.call_count == 2
+        assert result == "Python is a programming language."
+
+    def test_synthesis_call_after_round_cap_has_no_tools(self, generator):
+        """
+        WHAT: When both tool rounds are exhausted (MAX_TOOL_ROUNDS=2) and Claude still
+              returns tool_use, generate_response forces a final synthesis call WITHOUT
+              tools to obtain a plain-text answer.
+        ASSERT: The last (4th) call lacks 'tools' and 'tool_choice'.
+        FAILURE MEANS: Synthesis call includes tools and fails with an API error, or
+                       Claude never produces a text answer after hitting the round cap.
+        """
+        r1 = make_tool_use_response("search_course_content", {"query": "python"}, "tu_1")
+        r2 = make_tool_use_response("search_course_content", {"query": "python2"}, "tu_2")
+        r3 = make_tool_use_response("search_course_content", {"query": "python3"}, "tu_3")
+        final = make_text_response("Python answer.")
+        mock_manager = MagicMock()
+        mock_manager.execute_tool.return_value = "tool result content"
+
+        with patch.object(generator.client.messages, 'create',
+                          side_effect=[r1, r2, r3, final]) as mock_create:
+            generator.generate_response("What is Python?", tools=[{}], tool_manager=mock_manager)
+
+        last_call_kwargs = mock_create.call_args_list[-1][1]
+        assert "tools" not in last_call_kwargs
+        assert "tool_choice" not in last_call_kwargs
+
+    def test_tool_result_appended_as_user_message(self, generator):
+        """
+        WHAT: Tool execution result is added as a user-role message with type=tool_result.
+        ASSERT: Second create() call (the intermediate follow-up WITH tools) receives 3
+                messages: [original user query, assistant tool-use block, tool result].
+        FAILURE MEANS: Claude never sees the search results — answers blind.
+                       Critical check for the 'query failed' symptom.
+        """
+        tool_response = make_tool_use_response(
+            "search_course_content", {"query": "python"}, "tu_test_id"
+        )
+        final_response = make_text_response("Python answer.")
+        mock_manager = MagicMock()
+        mock_manager.execute_tool.return_value = "Search result: Python basics"
+
+        with patch.object(generator.client.messages, 'create',
+                          side_effect=[tool_response, final_response]) as mock_create:
+            generator.generate_response("What is Python?", tools=[{}], tool_manager=mock_manager)
+
+        second_call_messages = mock_create.call_args_list[1][1]["messages"]
+        assert len(second_call_messages) == 3
+        tool_result_message = second_call_messages[2]
+        assert tool_result_message["role"] == "user"
+        result_blocks = tool_result_message["content"]
+        assert any(
+            b.get("type") == "tool_result"
+            and b.get("tool_use_id") == "tu_test_id"
+            and "Python basics" in b.get("content", "")
+            for b in result_blocks
+        )
+
+    def test_tool_manager_execute_called_with_correct_args(self, generator):
+        """
+        WHAT: execute_tool() is called with the exact tool name and input that Claude requested.
+        ASSERT: execute_tool called with name='search_course_content', query='variables', lesson_number=2.
+        FAILURE MEANS: Parameters lost/renamed between Claude's response and the tool call.
+        """
+        tool_input = {"query": "variables", "lesson_number": 2}
+        tool_response = make_tool_use_response("search_course_content", tool_input)
+        final_response = make_text_response("Variables are...")
+        mock_manager = MagicMock()
+        mock_manager.execute_tool.return_value = "content about variables"
+
+        with patch.object(generator.client.messages, 'create',
+                          side_effect=[tool_response, final_response]):
+            generator.generate_response("What are variables?", tools=[{}], tool_manager=mock_manager)
+
+        mock_manager.execute_tool.assert_called_once_with(
+            "search_course_content", query="variables", lesson_number=2
+        )
+
+    def test_tool_error_string_passed_through_to_claude(self, generator):
+        """
+        WHAT: If execute_tool returns an error string (e.g. from VectorStore failure),
+              that error string is what Claude receives as tool_result.content.
+        ASSERT: Second API call's messages include the error string verbatim.
+        FAILURE MEANS: THIS EXPOSES THE ROOT CAUSE. Claude receives 'Search error: ...'
+                       as its context, then tells the user it cannot answer.
+        """
+        error_str = (
+            "Search error: Number of requested results 5 is greater than "
+            "number of elements in index 0"
+        )
+        tool_response = make_tool_use_response("search_course_content", {"query": "python"})
+        final_response = make_text_response("I couldn't find information about that.")
+        mock_manager = MagicMock()
+        mock_manager.execute_tool.return_value = error_str
+
+        with patch.object(generator.client.messages, 'create',
+                          side_effect=[tool_response, final_response]) as mock_create:
+            generator.generate_response("What is Python?", tools=[{}], tool_manager=mock_manager)
+
+        second_call_messages = mock_create.call_args_list[1][1]["messages"]
+        tool_result_msg = second_call_messages[2]
+        content_blocks = tool_result_msg["content"]
+        assert any(error_str in b.get("content", "") for b in content_blocks)
+
+
+class TestSequentialToolCalling:
+
+    def test_two_tool_rounds_makes_three_api_calls(self, generator):
+        """
+        WHAT: Two sequential tool rounds where each follow-up triggers another tool call,
+              until the third response is end_turn.
+        ASSERT: create() called 3 times, execute_tool called twice, result is the
+                text from the third response.
+        FAILURE MEANS: The loop exits after round 1, preventing a second tool call
+                       even when Claude wants to search again.
+        """
+        r1 = make_tool_use_response("get_course_outline", {"course_name": "Python"}, "tu_1")
+        r2 = make_tool_use_response("search_course_content", {"query": "loops"}, "tu_2")
+        r3 = make_text_response("Python loops are covered in lesson 3.")
+        mock_manager = MagicMock()
+        mock_manager.execute_tool.return_value = "tool result"
+
+        with patch.object(generator.client.messages, 'create',
+                          side_effect=[r1, r2, r3]) as mock_create:
+            result = generator.generate_response(
+                "What lesson covers loops?", tools=[{}], tool_manager=mock_manager
+            )
+
+        assert mock_create.call_count == 3
+        assert mock_manager.execute_tool.call_count == 2
+        assert result == "Python loops are covered in lesson 3."
+
+    def test_second_round_intermediate_call_has_tools(self, generator):
+        """
+        WHAT: The intermediate follow-up call after round 1 must include tools so
+              Claude can decide to make a second tool call.
+        ASSERT: The second create() call (index 1) has 'tools' in its kwargs.
+        FAILURE MEANS: Claude cannot make a second tool call because the intermediate
+                       call strips tools — the sequential feature is broken.
+        """
+        r1 = make_tool_use_response("get_course_outline", {"course_name": "Python"}, "tu_1")
+        r2 = make_tool_use_response("search_course_content", {"query": "loops"}, "tu_2")
+        r3 = make_text_response("Python loops answer.")
+        mock_manager = MagicMock()
+        mock_manager.execute_tool.return_value = "outline content"
+
+        with patch.object(generator.client.messages, 'create',
+                          side_effect=[r1, r2, r3]) as mock_create:
+            generator.generate_response(
+                "What lesson covers loops?", tools=[{}], tool_manager=mock_manager
+            )
+
+        second_call_kwargs = mock_create.call_args_list[1][1]
+        assert "tools" in second_call_kwargs
+        assert second_call_kwargs["tool_choice"] == {"type": "auto"}
+
+    def test_round_cap_forces_toolless_synthesis_call(self, generator):
+        """
+        WHAT: When MAX_TOOL_ROUNDS (2) is exhausted and Claude still returns tool_use,
+              a final synthesis call WITHOUT tools is forced to get a text answer.
+        ASSERT: create() called 4 times total; last call has no 'tools' or 'tool_choice'.
+        FAILURE MEANS: The round cap does not terminate the loop, or the forced synthesis
+                       call incorrectly includes tools causing an API error.
+        """
+        r1 = make_tool_use_response("search_course_content", {"query": "q1"}, "tu_1")
+        r2 = make_tool_use_response("search_course_content", {"query": "q2"}, "tu_2")
+        r3 = make_tool_use_response("search_course_content", {"query": "q3"}, "tu_3")
+        final = make_text_response("Here is the answer.")
+        mock_manager = MagicMock()
+        mock_manager.execute_tool.return_value = "result"
+
+        with patch.object(generator.client.messages, 'create',
+                          side_effect=[r1, r2, r3, final]) as mock_create:
+            result = generator.generate_response(
+                "Complex query", tools=[{}], tool_manager=mock_manager
+            )
+
+        assert mock_create.call_count == 4
+        last_call_kwargs = mock_create.call_args_list[-1][1]
+        assert "tools" not in last_call_kwargs
+        assert "tool_choice" not in last_call_kwargs
+        assert result == "Here is the answer."
+
+    def test_tool_exception_stops_loop_and_proceeds_to_synthesis(self, generator):
+        """
+        WHAT: If execute_tool raises an Exception, the loop stops (success=False) and
+              the intermediate follow-up call provides the next response. If that
+              response is end_turn, no further calls are made.
+        ASSERT: create() called twice, execute_tool called once, result is the text
+                from the second response.
+        FAILURE MEANS: An exception in execute_tool propagates uncaught, or the loop
+                       continues trying more tool rounds after a hard failure.
+        """
+        r1 = make_tool_use_response("search_course_content", {"query": "python"}, "tu_1")
+        r2 = make_text_response("I encountered an error retrieving that information.")
+        mock_manager = MagicMock()
+        mock_manager.execute_tool.side_effect = Exception("DB connection failed")
+
+        with patch.object(generator.client.messages, 'create',
+                          side_effect=[r1, r2]) as mock_create:
+            result = generator.generate_response(
+                "What is Python?", tools=[{}], tool_manager=mock_manager
+            )
+
+        assert mock_create.call_count == 2
+        assert mock_manager.execute_tool.call_count == 1
+        assert result == "I encountered an error retrieving that information."
+
+    def test_accumulated_messages_grow_across_rounds(self, generator):
+        """
+        WHAT: After two tool rounds, the third API call receives the full accumulated
+              message history: [user_query, asst_tool1, tool_result_1, asst_tool2, tool_result_2].
+        ASSERT: Third create() call's messages list has exactly 5 items.
+        FAILURE MEANS: Context is not preserved between rounds; Claude answers without
+                       seeing results from earlier tool calls.
+        """
+        r1 = make_tool_use_response("get_course_outline", {"course_name": "Python"}, "tu_1")
+        r2 = make_tool_use_response("search_course_content", {"query": "lesson 3"}, "tu_2")
+        r3 = make_text_response("Lesson 3 covers loops.")
+        mock_manager = MagicMock()
+        mock_manager.execute_tool.return_value = "tool content"
+
+        with patch.object(generator.client.messages, 'create',
+                          side_effect=[r1, r2, r3]) as mock_create:
+            generator.generate_response(
+                "What does lesson 3 cover?", tools=[{}], tool_manager=mock_manager
+            )
+
+        third_call_messages = mock_create.call_args_list[2][1]["messages"]
+        assert len(third_call_messages) == 5
diff --git a/backend/tests/test_app.py b/backend/tests/test_app.py
new file mode 100644
index 000000000..b942667aa
--- /dev/null
+++ b/backend/tests/test_app.py
@@ -0,0 +1,199 @@
+"""
+API endpoint tests for the FastAPI application.
+
+Uses an inline test app (build_test_app from conftest) that mirrors the real
+app.py routes without mounting StaticFiles or instantiating a real RAGSystem,
+so these tests run without a frontend directory or ChromaDB instance.
+
+Endpoints covered:
+  POST /api/query
+  GET  /api/courses
+  DELETE /api/session/{session_id}
+"""
+import pytest
+from fastapi.testclient import TestClient
+from tests.conftest import build_test_app
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+@pytest.fixture
+def client(mock_rag_system):
+    """TestClient wired to the inline test app and a fresh RAGSystem mock."""
+    return TestClient(build_test_app(mock_rag_system))
+
+
+# ---------------------------------------------------------------------------
+# POST /api/query
+# ---------------------------------------------------------------------------
+
+class TestQueryEndpoint:
+
+    def test_returns_200_with_required_fields(self, client):
+        """
+        WHAT: Valid query returns 200 and a body that contains answer, sources, session_id.
+        ASSERT: All three keys present; answer is a non-empty string.
+        FAILURE MEANS: Response contract broken — frontend crashes unpacking the JSON.
+        """
+        response = client.post("/api/query", json={"query": "What is Python?"})
+        assert response.status_code == 200
+        data = response.json()
+        assert "answer" in data
+        assert "sources" in data
+        assert "session_id" in data
+        assert isinstance(data["answer"], str)
+        assert len(data["answer"]) > 0
+
+    def test_auto_creates_session_when_none_provided(self, client, mock_rag_system):
+        """
+        WHAT: Omitting session_id triggers session_manager.create_session().
+        ASSERT: Returned session_id equals the value produced by the mock.
+        FAILURE MEANS: Anonymous (stateless) queries never get a session — frontend
+                       cannot maintain conversation continuity.
+        """
+        response = client.post("/api/query", json={"query": "Hello"})
+        assert response.status_code == 200
+        assert response.json()["session_id"] == "auto-session-id"
+        mock_rag_system.session_manager.create_session.assert_called_once()
+
+    def test_uses_provided_session_id(self, client, mock_rag_system):
+        """
+        WHAT: When session_id is supplied, create_session is NOT called; the provided
+              id is passed directly to rag_system.query and echoed back.
+        ASSERT: create_session not called; session_id in response matches the input.
+        FAILURE MEANS: Existing sessions are silently discarded, breaking multi-turn chat.
+        """
+        response = client.post(
+            "/api/query",
+            json={"query": "Follow-up question", "session_id": "existing-session-42"},
+        )
+        assert response.status_code == 200
+        assert response.json()["session_id"] == "existing-session-42"
+        mock_rag_system.session_manager.create_session.assert_not_called()
+        mock_rag_system.query.assert_called_once_with(
+            "Follow-up question", "existing-session-42"
+        )
+
+    def test_sources_list_forwarded_from_rag(self, client, mock_rag_system):
+        """
+        WHAT: Sources returned by rag_system.query appear in the response body.
+        ASSERT: sources list matches what the mock returns.
+        FAILURE MEANS: Frontend never displays source links even when search succeeds.
+        """
+        mock_rag_system.query.return_value = (
+            "Python is great.",
+            [{"label": "Python Fundamentals - Lesson 1", "url": "https://example.com"}],
+        )
+        response = client.post("/api/query", json={"query": "What is Python?"})
+        assert response.status_code == 200
+        sources = response.json()["sources"]
+        assert len(sources) == 1
+        assert sources[0]["label"] == "Python Fundamentals - Lesson 1"
+
+    def test_returns_500_when_rag_raises(self, client, mock_rag_system):
+        """
+        WHAT: If rag_system.query raises, the endpoint returns HTTP 500.
+        ASSERT: status_code == 500 and detail string is present.
+        FAILURE MEANS: Exception propagates unhandled → Starlette returns a generic 500
+                       without the error detail, making debugging harder.
+        """
+        mock_rag_system.query.side_effect = RuntimeError("ChromaDB connection lost")
+        response = client.post("/api/query", json={"query": "What is Python?"})
+        assert response.status_code == 500
+        assert "ChromaDB connection lost" in response.json()["detail"]
+
+    def test_query_field_is_required(self, client):
+        """
+        WHAT: A request body missing the required 'query' field is rejected with 422.
+        ASSERT: status_code == 422 (Unprocessable Entity).
+        FAILURE MEANS: Pydantic validation is bypassed or the model definition changed.
+        """
+        response = client.post("/api/query", json={"session_id": "abc"})
+        assert response.status_code == 422
+
+
+# ---------------------------------------------------------------------------
+# GET /api/courses
+# ---------------------------------------------------------------------------
+
+class TestCoursesEndpoint:
+
+    def test_returns_200_with_course_stats(self, client):
+        """
+        WHAT: GET /api/courses returns total_courses and course_titles from the RAG system.
+        ASSERT: 200; total_courses == 2; course_titles is a list of 2 strings.
+        FAILURE MEANS: Analytics endpoint broken — dashboard always shows stale/zero data.
+        """
+        response = client.get("/api/courses")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["total_courses"] == 2
+        assert data["course_titles"] == ["Python Fundamentals", "Data Science Basics"]
+
+    def test_delegates_to_get_course_analytics(self, client, mock_rag_system):
+        """
+        WHAT: /api/courses calls rag_system.get_course_analytics() exactly once.
+        ASSERT: get_course_analytics called once.
+        FAILURE MEANS: Route is using a cached value or wrong method — data could be stale.
+        """
+        client.get("/api/courses")
+        mock_rag_system.get_course_analytics.assert_called_once()
+
+    def test_returns_500_when_analytics_raises(self, client, mock_rag_system):
+        """
+        WHAT: If get_course_analytics raises, the endpoint returns HTTP 500.
+        ASSERT: status_code == 500 with an error detail string.
+        FAILURE MEANS: Unhandled exception crashes the server process instead of
+                       returning a structured error to the frontend.
+        """
+        mock_rag_system.get_course_analytics.side_effect = Exception("DB error")
+        response = client.get("/api/courses")
+        assert response.status_code == 500
+        assert "DB error" in response.json()["detail"]
+
+    def test_empty_course_list(self, client, mock_rag_system):
+        """
+        WHAT: When no courses are loaded, endpoint returns total_courses=0 and [].
+        ASSERT: total_courses == 0; course_titles == [].
+        FAILURE MEANS: Empty-state handling crashes or returns unexpected data types.
+        """
+        mock_rag_system.get_course_analytics.return_value = {
+            "total_courses": 0,
+            "course_titles": [],
+        }
+        response = client.get("/api/courses")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["total_courses"] == 0
+        assert data["course_titles"] == []
+
+
+# ---------------------------------------------------------------------------
+# DELETE /api/session/{session_id}
+# ---------------------------------------------------------------------------
+
+class TestDeleteSessionEndpoint:
+
+    def test_returns_200_with_cleared_status(self, client):
+        """
+        WHAT: DELETE /api/session/{id} returns 200 and {"status": "cleared"}.
+        ASSERT: status_code == 200; body matches exactly.
+        FAILURE MEANS: Session cleanup endpoint broken — conversation history leaks
+                       across users or stale sessions accumulate in memory.
+        """
+        response = client.delete("/api/session/test-session-id")
+        assert response.status_code == 200
+        assert response.json() == {"status": "cleared"}
+
+    def test_calls_clear_session_with_correct_id(self, client, mock_rag_system):
+        """
+        WHAT: The session_id path parameter is forwarded to session_manager.clear_session.
+        ASSERT: clear_session called once with the exact id from the URL.
+        FAILURE MEANS: Wrong session is cleared, or the call is silently skipped.
+        """
+        client.delete("/api/session/my-specific-session")
+        mock_rag_system.session_manager.clear_session.assert_called_once_with(
+            "my-specific-session"
+        )
diff --git a/backend/tests/test_course_search_tool.py b/backend/tests/test_course_search_tool.py
new file mode 100644
index 000000000..707512785
--- /dev/null
+++ b/backend/tests/test_course_search_tool.py
@@ -0,0 +1,190 @@
+"""
+Unit tests for CourseSearchTool.execute() and _format_results().
+
+Diagnostic focus: Does the tool correctly surface VectorStore errors?
+Does it populate self.last_sources? Does it format results correctly?
+"""
+import pytest
+from unittest.mock import MagicMock
+from search_tools import CourseSearchTool, ToolManager
+from vector_store import SearchResults
+
+
+class TestCourseSearchToolExecute:
+
+    def test_execute_returns_formatted_content_on_success(self, mock_vector_store, sample_search_results):
+        """
+        WHAT: execute() with a working VectorStore returns formatted content.
+        ASSERT: returned string contains course title and document text.
+        FAILURE MEANS: _format_results is broken or not called.
+        """
+        tool = CourseSearchTool(mock_vector_store)
+        result = tool.execute(query="what is python")
+        assert "Python Fundamentals" in result
+        assert "Lesson content about Python basics." in result
+
+    def test_execute_calls_store_search_with_correct_args(self, mock_vector_store):
+        """
+        WHAT: execute() passes query/course_name/lesson_number through to store.search().
+        ASSERT: store.search called with exactly the right keyword args.
+        FAILURE MEANS: parameter forwarding broken → wrong ChromaDB filters applied.
+        """
+        tool = CourseSearchTool(mock_vector_store)
+        tool.execute(query="variables", course_name="Python", lesson_number=3)
+        mock_vector_store.search.assert_called_once_with(
+            query="variables", course_name="Python", lesson_number=3
+        )
+
+    def test_execute_populates_last_sources(self, mock_vector_store):
+        """
+        WHAT: After execute(), tool.last_sources is populated with one entry per result.
+        ASSERT: last_sources has 2 entries with 'label' and 'url' keys.
+        FAILURE MEANS: ToolManager.get_last_sources() returns [] even after successful search
+                       → sources never reach the frontend.
+        """
+        tool = CourseSearchTool(mock_vector_store)
+        tool.execute(query="python basics")
+        assert len(tool.last_sources) == 2
+        for source in tool.last_sources:
+            assert "label" in source
+            assert "url" in source
+
+    def test_execute_fetches_lesson_link_for_each_result(self, mock_vector_store):
+        """
+        WHAT: _format_results calls get_lesson_link once per result that has a lesson_number.
+        ASSERT: get_lesson_link called exactly twice (for 2 results with lesson_number).
+        FAILURE MEANS: source URLs always None in the frontend.
+        """
+        tool = CourseSearchTool(mock_vector_store)
+        tool.execute(query="python basics")
+        assert mock_vector_store.get_lesson_link.call_count == 2
+
+    def test_execute_returns_error_string_verbatim_when_store_errors(self, mock_vector_store):
+        """
+        WHAT: When store.search returns SearchResults with .error set,
+              execute() returns that error string directly.
+        ASSERT: return value IS the error string.
+        FAILURE MEANS: ChromaDB error strings reach Claude as tool result,
+                       causing Claude to report failure. THIS IS THE LIKELY ROOT CAUSE.
+        """
+        error_msg = (
+            "Search error: Number of requested results 5 is greater than "
+            "number of elements in index 0"
+        )
+        mock_vector_store.search.return_value = SearchResults.empty(error_msg)
+        tool = CourseSearchTool(mock_vector_store)
+        result = tool.execute(query="anything")
+        assert result == error_msg
+
+    def test_execute_returns_no_content_message_when_empty(self, mock_vector_store, empty_search_results):
+        """
+        WHAT: When results are empty (no error, just no hits), execute() returns
+              the 'No relevant content found' sentinel.
+        ASSERT: return value starts with 'No relevant content found'.
+        FAILURE MEANS: Empty DB causes tool to silently return empty string or crash.
+        """
+        mock_vector_store.search.return_value = empty_search_results
+        tool = CourseSearchTool(mock_vector_store)
+        result = tool.execute(query="anything")
+        assert result.startswith("No relevant content found")
+
+    def test_execute_includes_course_filter_in_empty_message(self, mock_vector_store, empty_search_results):
+        """
+        WHAT: Empty result message mentions the requested course name.
+        ASSERT: message contains the course_name that was requested.
+        FAILURE MEANS: User can't tell which course had no content.
+        """
+        mock_vector_store.search.return_value = empty_search_results
+        tool = CourseSearchTool(mock_vector_store)
+        result = tool.execute(query="anything", course_name="Python")
+        assert "Python" in result
+
+    def test_execute_does_not_update_sources_on_error(self, mock_vector_store):
+        """
+        WHAT: When store returns an error result, last_sources is NOT overwritten.
+        ASSERT: Pre-seeded stale sources remain unchanged after an errored execute().
+        FAILURE MEANS: Stale sources from a previous query could leak into this response.
+        """
+        mock_vector_store.search.return_value = SearchResults.empty("Search error: boom")
+        tool = CourseSearchTool(mock_vector_store)
+        tool.last_sources = [{"label": "stale", "url": None}]
+        tool.execute(query="anything")
+        # Error branch returns early — last_sources should NOT have been updated
+        assert tool.last_sources == [{"label": "stale", "url": None}]
+
+
+class TestFormatResults:
+
+    def test_format_results_header_format(self, mock_vector_store, sample_search_results):
+        """
+        WHAT: _format_results includes [CourseName - Lesson N] headers.
+        ASSERT: expected header appears in output.
+        FAILURE MEANS: Claude receives raw content without course context headers.
+        """
+        tool = CourseSearchTool(mock_vector_store)
+        result = tool._format_results(sample_search_results)
+        assert "[Python Fundamentals - Lesson 1]" in result
+
+    def test_format_results_no_lesson_number_omits_lesson_from_header(self, mock_vector_store):
+        """
+        WHAT: When lesson_number is None in metadata, header is just [CourseName].
+        ASSERT: header does not contain 'Lesson'.
+        FAILURE MEANS: Metadata extraction crashes on missing lesson_number.
+        """
+        results = SearchResults(
+            documents=["Content without lesson number"],
+            metadata=[{"course_title": "Advanced Python", "lesson_number": None}],
+            distances=[0.1]
+        )
+        tool = CourseSearchTool(mock_vector_store)
+        result = tool._format_results(results)
+        assert "[Advanced Python]" in result
+        assert "Lesson" not in result
+
+    def test_format_results_separates_results_with_double_newline(self, mock_vector_store, sample_search_results):
+        """
+        WHAT: Multiple results are joined with double newlines.
+        ASSERT: '\\n\\n' appears in the output.
+        FAILURE MEANS: Output is garbled — all results run together.
+        """
+        tool = CourseSearchTool(mock_vector_store)
+        result = tool._format_results(sample_search_results)
+        assert "\n\n" in result
+
+
+class TestToolManager:
+
+    def test_tool_manager_get_last_sources_returns_first_nonempty(self, mock_vector_store):
+        """
+        WHAT: get_last_sources() returns the first non-empty last_sources from registered tools.
+        ASSERT: returned list matches what was set on the tool.
+        FAILURE MEANS: RAGSystem.query() always returns empty sources list.
+        """
+        manager = ToolManager()
+        tool = CourseSearchTool(mock_vector_store)
+        tool.last_sources = [{"label": "Python - Lesson 1", "url": "https://example.com"}]
+        manager.register_tool(tool)
+        assert manager.get_last_sources() == [{"label": "Python - Lesson 1", "url": "https://example.com"}]
+
+    def test_tool_manager_reset_sources_clears_all_tools(self, mock_vector_store):
+        """
+        WHAT: reset_sources() clears last_sources on all registered tools.
+        ASSERT: After reset, last_sources == [].
+        FAILURE MEANS: Sources from query N bleed into query N+1.
+        """
+        manager = ToolManager()
+        tool = CourseSearchTool(mock_vector_store)
+        tool.last_sources = [{"label": "stale", "url": None}]
+        manager.register_tool(tool)
+        manager.reset_sources()
+        assert tool.last_sources == []
+
+    def test_tool_manager_execute_unknown_tool_returns_error_string(self, mock_vector_store):
+        """
+        WHAT: Calling execute_tool with an unregistered name returns an error string.
+        ASSERT: Returns string containing 'not found'.
+        FAILURE MEANS: Unknown tool name crashes instead of returning a recoverable error.
+        """
+        manager = ToolManager()
+        result = manager.execute_tool("nonexistent_tool", query="test")
+        assert "not found" in result
diff --git a/backend/tests/test_rag_system.py b/backend/tests/test_rag_system.py
new file mode 100644
index 000000000..e2f751f43
--- /dev/null
+++ b/backend/tests/test_rag_system.py
@@ -0,0 +1,209 @@
+"""
+Integration tests for RAGSystem.query().
+
+Patches VectorStore, DocumentProcessor, and the Anthropic client so no real
+ChromaDB or API calls occur. Lets the real RAGSystem, ToolManager,
+CourseSearchTool, and AIGenerator code run.
+
+Diagnostic focus: Does the full pipeline assemble correctly?
+Do sources flow back from tool to response? Does session history update?
+"""
+import pytest
+from unittest.mock import MagicMock, patch
+from rag_system import RAGSystem
+from vector_store import SearchResults
+from tests.conftest import make_text_response, make_tool_use_response
+
+
+@pytest.fixture
+def mock_config():
+    """Minimal config that prevents real ChromaDB and Anthropic initialization."""
+    cfg = MagicMock()
+    cfg.ANTHROPIC_API_KEY = "sk-test-fake"
+    cfg.ANTHROPIC_MODEL = "claude-3-haiku-20240307"
+    cfg.CHROMA_PATH = ":memory:"
+    cfg.EMBEDDING_MODEL = "all-MiniLM-L6-v2"
+    cfg.MAX_RESULTS = 5
+    cfg.MAX_HISTORY = 2
+    cfg.CHUNK_SIZE = 800
+    cfg.CHUNK_OVERLAP = 100
+    return cfg
+
+
+@pytest.fixture
+def rag_system_with_mocks(mock_config, sample_search_results):
+    """
+    RAGSystem with VectorStore and Anthropic client both mocked.
+    Yields (system, mock_vs_instance, mock_anthropic_client).
+    """
+    with patch("rag_system.VectorStore") as MockVS, \
+         patch("rag_system.DocumentProcessor"), \
+         patch("ai_generator.anthropic.Anthropic") as MockAnthropic:
+
+        mock_vs_instance = MagicMock()
+        mock_vs_instance.search.return_value = sample_search_results
+        mock_vs_instance.get_lesson_link.return_value = "https://example.com/lesson/1"
+        MockVS.return_value = mock_vs_instance
+
+        mock_client = MagicMock()
+        MockAnthropic.return_value = mock_client
+
+        system = RAGSystem(mock_config)
+        yield system, mock_vs_instance, mock_client
+
+
+class TestRAGSystemQueryHappyPath:
+
+    def test_query_returns_tuple_of_answer_and_sources(self, rag_system_with_mocks):
+        """
+        WHAT: RAGSystem.query() returns a 2-tuple (str, list).
+        ASSERT: result[0] is str, result[1] is list.
+        FAILURE MEANS: API contract broken — app.py crashes unpacking (answer, sources).
+        """
+        system, _, mock_client = rag_system_with_mocks
+        mock_client.messages.create.return_value = make_text_response("General answer.")
+        answer, sources = system.query("What is Python?")
+        assert isinstance(answer, str)
+        assert isinstance(sources, list)
+
+    def test_query_prompt_wraps_user_question(self, rag_system_with_mocks):
+        """
+        WHAT: RAGSystem.query() prepends the 'Answer this question about course materials:'
+              prefix to the user query before calling generate_response.
+        ASSERT: The message content sent to Claude starts with that prefix.
+        FAILURE MEANS: Prompt framing confirmed — this framing may suppress tool use (Bug 5).
+        """
+        system, _, mock_client = rag_system_with_mocks
+        mock_client.messages.create.return_value = make_text_response("ok")
+        system.query("What are variables?")
+        call_kwargs = mock_client.messages.create.call_args[1]
+        user_message_content = call_kwargs["messages"][0]["content"]
+        assert "Answer this question about course materials:" in user_message_content
+
+    def test_query_with_tool_use_returns_sources(self, rag_system_with_mocks):
+        """
+        WHAT: When Claude uses the search tool and results are found, sources list is non-empty.
+        ASSERT: sources has at least one entry with 'label' key.
+        FAILURE MEANS: Frontend never displays source links even on successful searches.
+        """
+        system, _, mock_client = rag_system_with_mocks
+        tool_response = make_tool_use_response("search_course_content", {"query": "python"})
+        final_response = make_text_response("Python is a programming language.")
+        mock_client.messages.create.side_effect = [tool_response, final_response]
+
+        answer, sources = system.query("What is Python?")
+        assert answer == "Python is a programming language."
+        assert len(sources) > 0
+        assert "label" in sources[0]
+
+    def test_query_resets_sources_after_retrieval(self, rag_system_with_mocks):
+        """
+        WHAT: After query() retrieves sources, reset_sources() is called so the next
+              query doesn't inherit stale sources.
+        ASSERT: Second query's sources list is empty (direct response, no tool use).
+        FAILURE MEANS: Sources from query N bleed into query N+1 in the frontend.
+        """
+        system, _, mock_client = rag_system_with_mocks
+        tool_response = make_tool_use_response("search_course_content", {"query": "python"})
+        direct_response = make_text_response("General knowledge answer.")
+
+        # First query: tool use
+        mock_client.messages.create.side_effect = [tool_response, make_text_response("Python answer.")]
+        system.query("What is Python?")
+
+        # Second query: direct response (no tool use)
+        mock_client.messages.create.side_effect = [direct_response]
+        _, sources2 = system.query("What is 2 + 2?")
+        assert sources2 == []
+
+
+class TestRAGSystemSessionHandling:
+
+    def test_query_without_session_id_returns_answer(self, rag_system_with_mocks):
+        """
+        WHAT: query() called without session_id does not crash.
+        ASSERT: answer is a non-empty string.
+        FAILURE MEANS: Session handling broken for anonymous (stateless) queries.
+        """
+        system, _, mock_client = rag_system_with_mocks
+        mock_client.messages.create.return_value = make_text_response("Answer.")
+        answer, _ = system.query("test", session_id=None)
+        assert len(answer) > 0
+
+    def test_query_with_new_session_id_does_not_crash(self, rag_system_with_mocks):
+        """
+        WHAT: query() with a fresh session_id (not yet in sessions dict) works correctly.
+        ASSERT: No exception; answer returned as str.
+        FAILURE MEANS: get_conversation_history() crashes on unknown session_id.
+        """
+        system, _, mock_client = rag_system_with_mocks
+        mock_client.messages.create.return_value = make_text_response("Answer.")
+        answer, _ = system.query("test", session_id="brand-new-session-99")
+        assert isinstance(answer, str)
+
+    def test_query_updates_session_history_after_response(self, rag_system_with_mocks):
+        """
+        WHAT: After a successful query, the exchange is stored in session history.
+        ASSERT: get_conversation_history() returns a string containing the user query.
+        FAILURE MEANS: Conversation context never accumulates; multi-turn dialogue is broken.
+        """
+        system, _, mock_client = rag_system_with_mocks
+        mock_client.messages.create.return_value = make_text_response("Answer to hello.")
+        session_id = system.session_manager.create_session()
+        system.query("hello", session_id=session_id)
+        history = system.session_manager.get_conversation_history(session_id)
+        assert "hello" in history
+
+
+class TestRAGSystemErrorPropagation:
+
+    def test_query_when_vector_store_errors_claude_receives_error_string(self, rag_system_with_mocks):
+        """
+        WHAT: When VectorStore.search returns error SearchResults, the error string
+              reaches Claude as a tool result. Claude's final answer is its text,
+              not a Python exception.
+        ASSERT: answer is a string (no exception propagated).
+        FAILURE MEANS: Unhandled exception → FastAPI 500. If this passes but user sees
+                       'query failed', the bug is Claude saying so verbally, not an HTTP error.
+        """
+        system, mock_vs, mock_client = rag_system_with_mocks
+        mock_vs.search.return_value = SearchResults.empty(
+            "Search error: Number of requested results 5 is greater than number of elements in index 0"
+        )
+        tool_response = make_tool_use_response("search_course_content", {"query": "python"})
+        final_response = make_text_response("I was unable to find information about that topic.")
+        mock_client.messages.create.side_effect = [tool_response, final_response]
+
+        answer, sources = system.query("What is Python?")
+        assert isinstance(answer, str)
+        assert len(answer) > 0
+        assert sources == []
+
+    def test_query_anthropic_api_exception_propagates_to_caller(self, rag_system_with_mocks):
+        """
+        WHAT: If the Anthropic API call raises, the exception propagates out of query()
+              so FastAPI catches it as a 500.
+        ASSERT: query() raises an exception (any type).
+        FAILURE MEANS: Exception silently swallowed → query returns wrong value, no 500 sent.
+        """
+        system, _, mock_client = rag_system_with_mocks
+        mock_client.messages.create.side_effect = ConnectionError("API unreachable")
+        with pytest.raises(Exception):
+            system.query("What is Python?")
+
+    def test_query_with_empty_database_returns_answer_string(self, rag_system_with_mocks):
+        """
+        WHAT: If the vector DB is empty, search returns is_empty()=True.
+              The tool returns 'No relevant content found.' Claude answers accordingly.
+        ASSERT: answer is a non-empty string; no exception raised.
+        FAILURE MEANS: Empty database crashes the system → HTTP 500 instead of a graceful reply.
+        """
+        system, mock_vs, mock_client = rag_system_with_mocks
+        mock_vs.search.return_value = SearchResults(documents=[], metadata=[], distances=[])
+        tool_response = make_tool_use_response("search_course_content", {"query": "python"})
+        final_response = make_text_response("There is no course content about that topic.")
+        mock_client.messages.create.side_effect = [tool_response, final_response]
+
+        answer, _ = system.query("What is Python?")
+        assert isinstance(answer, str)
+        assert len(answer) > 0
diff --git a/backend/vector_store.py b/backend/vector_store.py
index 390abe71c..ee9557764 100644
--- a/backend/vector_store.py
+++ b/backend/vector_store.py
@@ -90,9 +90,15 @@ def search(self,
         search_limit = limit if limit is not None else self.max_results
         
         try:
+            # Guard: clamp n_results to actual collection size.
+            # ChromaDB raises ValueError if n_results > number of indexed documents.
+            collection_count = self.course_content.count()
+            if collection_count == 0:
+                return SearchResults(documents=[], metadata=[], distances=[])
+            actual_limit = min(search_limit, collection_count)
             results = self.course_content.query(
                 query_texts=[query],
-                n_results=search_limit,
+                n_results=actual_limit,
                 where=filter_dict
             )
             return SearchResults.from_chroma(results)
@@ -102,6 +108,8 @@ def search(self,
     def _resolve_course_name(self, course_name: str) -> Optional[str]:
         """Use vector search to find best matching course by name"""
         try:
+            if self.course_catalog.count() == 0:
+                return None
             results = self.course_catalog.query(
                 query_texts=[course_name],
                 n_results=1
@@ -246,6 +254,26 @@ def get_course_link(self, course_title: str) -> Optional[str]:
             print(f"Error getting course link: {e}")
             return None
     
+    def get_course_outline(self, course_name: str) -> Optional[Dict[str, Any]]:
+        """Get course outline (title, link, lessons list) by course name (fuzzy match)"""
+        import json
+        course_title = self._resolve_course_name(course_name)
+        if not course_title:
+            return None
+        try:
+            results = self.course_catalog.get(ids=[course_title])
+            if results and results['metadatas']:
+                meta = results['metadatas'][0]
+                lessons = json.loads(meta.get('lessons_json', '[]'))
+                return {
+                    'title': meta.get('title'),
+                    'course_link': meta.get('course_link'),
+                    'lessons': lessons
+                }
+        except Exception as e:
+            print(f"Error getting course outline: {e}")
+        return None
+
     def get_lesson_link(self, course_title: str, lesson_number: int) -> Optional[str]:
         """Get lesson link for a given course title and lesson number"""
         import json
diff --git a/frontend-changes.md b/frontend-changes.md
new file mode 100644
index 000000000..f4445fc97
--- /dev/null
+++ b/frontend-changes.md
@@ -0,0 +1,147 @@
+# Frontend Changes
+
+## Code Quality Tooling
+
+### What was added
+
+| File | Purpose |
+|---|---|
+| `frontend/package.json` | npm project manifest with Prettier and ESLint as dev dependencies |
+| `frontend/.prettierrc` | Prettier configuration |
+| `frontend/.eslintrc.json` | ESLint configuration |
+| `frontend/.prettierignore` | Excludes `node_modules/` from formatting |
+| `scripts/check-frontend.sh` | Shell script that runs both Prettier and ESLint |
+
+### Prettier (`frontend/.prettierrc`)
+
+Prettier is the JavaScript/CSS/HTML equivalent of Black — it enforces a single, consistent code style with no configuration debates.
+
+Settings chosen to match the existing code style:
+- `singleQuote: true` — use single quotes (already used throughout)
+- `semi: true` — require semicolons
+- `tabWidth: 2` — 2-space indentation
+- `trailingComma: "es5"` — trailing commas in objects/arrays (ES5-safe)
+- `printWidth: 100` — line length limit
+- `arrowParens: "always"` — always parenthesise arrow function params: `(x) => x`
+
+### ESLint (`frontend/.eslintrc.json`)
+
+Catches real bugs and enforces best practices in `script.js`:
+- `eqeqeq` — require `===` instead of `==`
+- `no-var` — disallow `var`, enforcing `const`/`let`
+- `prefer-const` — warn when `let` could be `const`
+- `no-unused-vars` — warn on unused variables
+- `no-implicit-globals` — prevent accidental globals
+
+`marked` (loaded from CDN) is declared as a global so ESLint does not flag it as undefined.
+
+### `script.js` formatting changes applied
+
+Prettier was applied to `script.js`. Key diffs from the original:
+
+1. **Indentation normalised to 2 spaces** throughout (was 4 spaces).
+2. **Trailing commas** added in multi-line objects:
+   - `{ 'Content-Type': 'application/json' }` fetch header object
+   - `{ query, session_id }` request body object
+3. **Arrow function parentheses** made consistent: `s =>` → `(s) =>`, `.forEach(button =>` → `.forEach((button) =>`
+4. **Double blank lines** collapsed to single blank lines (e.g. in `setupEventListeners`).
+5. **Method chains** reformatted: `sources.map(...).join('')` broken across lines for readability.
+6. **`addMessage` long string call** broken into multi-line form with trailing argument style.
+
+### Running quality checks
+
+**Install dependencies (once):**
+```bash
+cd frontend && npm install
+```
+
+**Check formatting and linting:**
+```bash
+# From repo root:
+./scripts/check-frontend.sh
+
+# Or from frontend/:
+npm run quality
+```
+
+**Auto-fix all issues:**
+```bash
+# From repo root:
+./scripts/check-frontend.sh --fix
+
+# Or from frontend/ (format then lint-fix):
+npm run format
+npm run lint:fix
+```
+
+**Individual commands:**
+```bash
+cd frontend
+
+npm run format        # apply Prettier formatting
+npm run format:check  # check formatting without writing
+npm run lint          # run ESLint
+npm run lint:fix      # run ESLint with auto-fix
+npm run quality       # format:check + lint (CI-safe, no writes)
+```
+
+---
+
+## Dark/Light Theme Toggle
+
+### Summary
+
+Added a dark/light theme toggle button to the frontend. Users can switch between the existing dark theme and a new light theme. The preference is persisted in `localStorage` and applied immediately on page load (no flash of wrong theme).
+
+---
+
+### Files Modified
+
+#### `frontend/index.html`
+
+- Added a `<button class="theme-toggle" id="themeToggle">` element directly after `<body>`, positioned fixed in the top-right corner.
+- The button contains two SVG icons: a **moon** (visible in dark mode) and a **sun** (visible in light mode). Icon visibility is controlled purely via CSS.
+- Includes `aria-label` and `title` attributes for accessibility.
+- Bumped stylesheet cache-buster version from `v=11` to `v=12`.
+
+#### `frontend/style.css`
+
+- **Light theme variables** — Added a `[data-theme="light"]` block defining a full set of CSS custom properties:
+  - `--background`: `#f8fafc` (near-white page background)
+  - `--surface`: `#ffffff` (white cards/sidebar)
+  - `--surface-hover`: `#e2e8f0`
+  - `--text-primary`: `#0f172a` (near-black for high contrast)
+  - `--text-secondary`: `#64748b`
+  - `--border-color`: `#cbd5e1`
+  - `--welcome-bg` / `--welcome-border`: light blue tints
+  - Theme toggle button variables (`--theme-toggle-bg`, `--theme-toggle-border`, `--theme-toggle-color`, `--theme-toggle-hover-bg`)
+
+- **Smooth transitions** — Added a universal `transition` rule (`background-color`, `border-color`, `color`, `box-shadow` — 0.25 s ease) on `*, *::before, *::after` so every themed element animates smoothly when toggling.
+
+- **Theme toggle button styles** — Added `.theme-toggle` styles:
+  - Fixed position, top-right (`top: 1rem; right: 1rem; z-index: 1000`)
+  - 40×40 px circle with border and shadow matching the current theme
+  - Hover: scales up slightly, highlights with `--primary-color`
+  - Focus: visible focus ring using `--focus-ring` for keyboard navigation
+  - Icon switching: `.icon-moon` shown by default; `.icon-sun` shown when `[data-theme="light"]` is set
+
+- **Code block contrast fix** — Reduced hard-coded `rgba(0,0,0,0.2)` backgrounds for `code` and `pre` to `rgba(0,0,0,0.12)`, with a further light-mode override at `rgba(0,0,0,0.07)` / `rgba(0,0,0,0.05)` to keep inline code readable on white backgrounds.
+
+- **Source pill colors for light mode** — Added `[data-theme="light"] .source-pill` overrides to replace the dark-optimised `#90cdf4` color with `#1d4ed8` (dark blue) on a light blue tint background, maintaining sufficient contrast.
+
+#### `frontend/script.js`
+
+- **`initTheme()`** — Reads `localStorage.getItem('theme')` and sets `data-theme="light"` on `<html>` if needed. Called immediately at module evaluation (before `DOMContentLoaded`) to prevent a flash of the wrong theme on reload.
+
+- **`toggleTheme()`** — Reads the current `data-theme` attribute on `document.documentElement` and toggles between dark (no attribute) and light (`data-theme="light"`). Saves the result to `localStorage`.
+
+- **Event listener** — Wired `themeToggle` button to `toggleTheme()` inside `setupEventListeners()`.
+
+---
+
+### Design Decisions
+
+- `data-theme` is set on `<html>` (i.e. `document.documentElement`) so the attribute is available from the very first paint, minimising any flash.
+- Dark mode remains the default (no `data-theme` attribute = dark). Only `data-theme="light"` needs to be explicitly set.
+- No extra libraries or build steps — pure CSS variables + vanilla JS.
+- The toggle button is `position: fixed` (not inside a hidden `<header>`) so it is always accessible regardless of layout or scroll state.
diff --git a/frontend/.eslintrc.json b/frontend/.eslintrc.json
new file mode 100644
index 000000000..b533cdefa
--- /dev/null
+++ b/frontend/.eslintrc.json
@@ -0,0 +1,22 @@
+{
+  "env": {
+    "browser": true,
+    "es2021": true
+  },
+  "extends": "eslint:recommended",
+  "parserOptions": {
+    "ecmaVersion": 2021,
+    "sourceType": "script"
+  },
+  "globals": {
+    "marked": "readonly"
+  },
+  "rules": {
+    "no-unused-vars": ["warn", { "argsIgnorePattern": "^_" }],
+    "no-console": "off",
+    "eqeqeq": ["error", "always"],
+    "no-var": "error",
+    "prefer-const": "warn",
+    "no-implicit-globals": "error"
+  }
+}
diff --git a/frontend/.prettierignore b/frontend/.prettierignore
new file mode 100644
index 000000000..c2658d7d1
--- /dev/null
+++ b/frontend/.prettierignore
@@ -0,0 +1 @@
+node_modules/
diff --git a/frontend/.prettierrc b/frontend/.prettierrc
new file mode 100644
index 000000000..0009e9d56
--- /dev/null
+++ b/frontend/.prettierrc
@@ -0,0 +1,11 @@
+{
+  "singleQuote": true,
+  "semi": true,
+  "tabWidth": 2,
+  "trailingComma": "es5",
+  "printWidth": 100,
+  "arrowParens": "always",
+  "bracketSpacing": true,
+  "htmlWhitespaceSensitivity": "css",
+  "endOfLine": "lf"
+}
diff --git a/frontend/index.html b/frontend/index.html
index f8e25a62f..1c7728e13 100644
--- a/frontend/index.html
+++ b/frontend/index.html
@@ -7,9 +7,27 @@
     <meta http-equiv="Pragma" content="no-cache">
     <meta http-equiv="Expires" content="0">
     <title>Course Materials Assistant</title>
-    <link rel="stylesheet" href="style.css?v=9">
+    <link rel="stylesheet" href="style.css?v=12">
 </head>
 <body>
+    <button class="theme-toggle" id="themeToggle" aria-label="Toggle theme" title="Toggle light/dark theme">
+        <!-- Moon icon (shown in dark mode) -->
+        <svg class="icon-moon" xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" aria-hidden="true">
+            <path d="M21 12.79A9 9 0 1 1 11.21 3 7 7 0 0 0 21 12.79z"></path>
+        </svg>
+        <!-- Sun icon (shown in light mode) -->
+        <svg class="icon-sun" xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" aria-hidden="true">
+            <circle cx="12" cy="12" r="5"></circle>
+            <line x1="12" y1="1" x2="12" y2="3"></line>
+            <line x1="12" y1="21" x2="12" y2="23"></line>
+            <line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
+            <line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
+            <line x1="1" y1="12" x2="3" y2="12"></line>
+            <line x1="21" y1="12" x2="23" y2="12"></line>
+            <line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
+            <line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
+        </svg>
+    </button>
     <div class="container">
         <header>
             <h1>Course Materials Assistant</h1>
@@ -19,6 +37,11 @@ <h1>Course Materials Assistant</h1>
         <div class="main-content">
             <!-- Left Sidebar -->
             <aside class="sidebar">
+                <!-- New Chat -->
+                <div class="sidebar-section">
+                    <button class="new-chat-btn" id="newChatBtn">+ NEW CHAT</button>
+                </div>
+
                 <!-- Course Stats -->
                 <div class="sidebar-section">
                     <details class="stats-collapsible">
diff --git a/frontend/package.json b/frontend/package.json
new file mode 100644
index 000000000..cc37627cc
--- /dev/null
+++ b/frontend/package.json
@@ -0,0 +1,17 @@
+{
+  "name": "ragchatbot-frontend",
+  "version": "1.0.0",
+  "description": "Frontend for the RAG Chatbot course materials assistant",
+  "private": true,
+  "scripts": {
+    "format": "prettier --write .",
+    "format:check": "prettier --check .",
+    "lint": "eslint script.js",
+    "lint:fix": "eslint --fix script.js",
+    "quality": "npm run format:check && npm run lint"
+  },
+  "devDependencies": {
+    "eslint": "^8.57.0",
+    "prettier": "^3.3.3"
+  }
+}
diff --git a/frontend/script.js b/frontend/script.js
index 562a8a363..43432b5df 100644
--- a/frontend/script.js
+++ b/frontend/script.js
@@ -7,98 +7,123 @@ let currentSessionId = null;
 // DOM elements
 let chatMessages, chatInput, sendButton, totalCourses, courseTitles;
 
+// Theme management
+function initTheme() {
+    const saved = localStorage.getItem('theme');
+    if (saved === 'light') {
+        document.documentElement.setAttribute('data-theme', 'light');
+    }
+}
+
+function toggleTheme() {
+    const isLight = document.documentElement.getAttribute('data-theme') === 'light';
+    if (isLight) {
+        document.documentElement.removeAttribute('data-theme');
+        localStorage.setItem('theme', 'dark');
+    } else {
+        document.documentElement.setAttribute('data-theme', 'light');
+        localStorage.setItem('theme', 'light');
+    }
+}
+
+// Apply saved theme immediately (before DOMContentLoaded) to avoid flash
+initTheme();
+
 // Initialize
 document.addEventListener('DOMContentLoaded', () => {
-    // Get DOM elements after page loads
-    chatMessages = document.getElementById('chatMessages');
-    chatInput = document.getElementById('chatInput');
-    sendButton = document.getElementById('sendButton');
-    totalCourses = document.getElementById('totalCourses');
-    courseTitles = document.getElementById('courseTitles');
-    
-    setupEventListeners();
-    createNewSession();
-    loadCourseStats();
+  // Get DOM elements after page loads
+  chatMessages = document.getElementById('chatMessages');
+  chatInput = document.getElementById('chatInput');
+  sendButton = document.getElementById('sendButton');
+  totalCourses = document.getElementById('totalCourses');
+  courseTitles = document.getElementById('courseTitles');
+
+  setupEventListeners();
+  createNewSession();
+  loadCourseStats();
 });
 
 // Event Listeners
 function setupEventListeners() {
-    // Chat functionality
-    sendButton.addEventListener('click', sendMessage);
-    chatInput.addEventListener('keypress', (e) => {
-        if (e.key === 'Enter') sendMessage();
-    });
-    
-    
-    // Suggested questions
-    document.querySelectorAll('.suggested-item').forEach(button => {
-        button.addEventListener('click', (e) => {
-            const question = e.target.getAttribute('data-question');
-            chatInput.value = question;
-            sendMessage();
-        });
+  // Chat functionality
+  sendButton.addEventListener('click', sendMessage);
+  chatInput.addEventListener('keypress', (e) => {
+    if (e.key === 'Enter') sendMessage();
+  });
+
+  // Theme toggle
+  document.getElementById('themeToggle').addEventListener('click', toggleTheme);
+
+  // New chat button
+  document.getElementById('newChatBtn').addEventListener('click', handleNewChat);
+
+  // Suggested questions
+  document.querySelectorAll('.suggested-item').forEach((button) => {
+    button.addEventListener('click', (e) => {
+      const question = e.target.getAttribute('data-question');
+      chatInput.value = question;
+      sendMessage();
     });
+  });
 }
 
-
 // Chat Functions
 async function sendMessage() {
-    const query = chatInput.value.trim();
-    if (!query) return;
-
-    // Disable input
-    chatInput.value = '';
-    chatInput.disabled = true;
-    sendButton.disabled = true;
+  const query = chatInput.value.trim();
+  if (!query) return;
+
+  // Disable input
+  chatInput.value = '';
+  chatInput.disabled = true;
+  sendButton.disabled = true;
+
+  // Add user message
+  addMessage(query, 'user');
+
+  // Add loading message - create a unique container for it
+  const loadingMessage = createLoadingMessage();
+  chatMessages.appendChild(loadingMessage);
+  chatMessages.scrollTop = chatMessages.scrollHeight;
+
+  try {
+    const response = await fetch(`${API_URL}/query`, {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+      },
+      body: JSON.stringify({
+        query: query,
+        session_id: currentSessionId,
+      }),
+    });
 
-    // Add user message
-    addMessage(query, 'user');
+    if (!response.ok) throw new Error('Query failed');
 
-    // Add loading message - create a unique container for it
-    const loadingMessage = createLoadingMessage();
-    chatMessages.appendChild(loadingMessage);
-    chatMessages.scrollTop = chatMessages.scrollHeight;
+    const data = await response.json();
 
-    try {
-        const response = await fetch(`${API_URL}/query`, {
-            method: 'POST',
-            headers: {
-                'Content-Type': 'application/json',
-            },
-            body: JSON.stringify({
-                query: query,
-                session_id: currentSessionId
-            })
-        });
-
-        if (!response.ok) throw new Error('Query failed');
-
-        const data = await response.json();
-        
-        // Update session ID if new
-        if (!currentSessionId) {
-            currentSessionId = data.session_id;
-        }
-
-        // Replace loading message with response
-        loadingMessage.remove();
-        addMessage(data.answer, 'assistant', data.sources);
-
-    } catch (error) {
-        // Replace loading message with error
-        loadingMessage.remove();
-        addMessage(`Error: ${error.message}`, 'assistant');
-    } finally {
-        chatInput.disabled = false;
-        sendButton.disabled = false;
-        chatInput.focus();
+    // Update session ID if new
+    if (!currentSessionId) {
+      currentSessionId = data.session_id;
     }
+
+    // Replace loading message with response
+    loadingMessage.remove();
+    addMessage(data.answer, 'assistant', data.sources);
+  } catch (error) {
+    // Replace loading message with error
+    loadingMessage.remove();
+    addMessage(`Error: ${error.message}`, 'assistant');
+  } finally {
+    chatInput.disabled = false;
+    sendButton.disabled = false;
+    chatInput.focus();
+  }
 }
 
 function createLoadingMessage() {
-    const messageDiv = document.createElement('div');
-    messageDiv.className = 'message assistant';
-    messageDiv.innerHTML = `
+  const messageDiv = document.createElement('div');
+  messageDiv.className = 'message assistant';
+  messageDiv.innerHTML = `
         <div class="message-content">
             <div class="loading">
                 <span></span>
@@ -107,85 +132,109 @@ function createLoadingMessage() {
             </div>
         </div>
     `;
-    return messageDiv;
+  return messageDiv;
 }
 
 function addMessage(content, type, sources = null, isWelcome = false) {
-    const messageId = Date.now();
-    const messageDiv = document.createElement('div');
-    messageDiv.className = `message ${type}${isWelcome ? ' welcome-message' : ''}`;
-    messageDiv.id = `message-${messageId}`;
-    
-    // Convert markdown to HTML for assistant messages
-    const displayContent = type === 'assistant' ? marked.parse(content) : escapeHtml(content);
-    
-    let html = `<div class="message-content">${displayContent}</div>`;
-    
-    if (sources && sources.length > 0) {
-        html += `
+  const messageId = Date.now();
+  const messageDiv = document.createElement('div');
+  messageDiv.className = `message ${type}${isWelcome ? ' welcome-message' : ''}`;
+  messageDiv.id = `message-${messageId}`;
+
+  // Convert markdown to HTML for assistant messages
+  const displayContent = type === 'assistant' ? marked.parse(content) : escapeHtml(content);
+
+  let html = `<div class="message-content">${displayContent}</div>`;
+
+  if (sources && sources.length > 0) {
+    const sourceItems = sources
+      .map((s) => {
+        const label = s.label || s;
+        const url = s.url;
+        return url
+          ? `<a class="source-pill" href="${url}" target="_blank" rel="noopener noreferrer">${label}</a>`
+          : `<span class="source-pill">${label}</span>`;
+      })
+      .join('');
+    html += `
             <details class="sources-collapsible">
                 <summary class="sources-header">Sources</summary>
-                <div class="sources-content">${sources.join(', ')}</div>
+                <div class="sources-content">${sourceItems}</div>
             </details>
         `;
-    }
-    
-    messageDiv.innerHTML = html;
-    chatMessages.appendChild(messageDiv);
-    chatMessages.scrollTop = chatMessages.scrollHeight;
-    
-    return messageId;
+  }
+
+  messageDiv.innerHTML = html;
+  chatMessages.appendChild(messageDiv);
+  chatMessages.scrollTop = chatMessages.scrollHeight;
+
+  return messageId;
 }
 
 // Helper function to escape HTML for user messages
 function escapeHtml(text) {
-    const div = document.createElement('div');
-    div.textContent = text;
-    return div.innerHTML;
+  const div = document.createElement('div');
+  div.textContent = text;
+  return div.innerHTML;
 }
 
 // Removed removeMessage function - no longer needed since we handle loading differently
 
+async function handleNewChat() {
+  if (currentSessionId) {
+    try {
+      await fetch(`${API_URL}/session/${currentSessionId}`, { method: 'DELETE' });
+    } catch (e) {
+      // Non-critical — proceed regardless
+    }
+  }
+  createNewSession();
+}
+
 async function createNewSession() {
-    currentSessionId = null;
-    chatMessages.innerHTML = '';
-    addMessage('Welcome to the Course Materials Assistant! I can help you with questions about courses, lessons and specific content. What would you like to know?', 'assistant', null, true);
+  currentSessionId = null;
+  chatMessages.innerHTML = '';
+  addMessage(
+    'Welcome to the Course Materials Assistant! I can help you with questions about courses, lessons and specific content. What would you like to know?',
+    'assistant',
+    null,
+    true
+  );
 }
 
 // Load course statistics
 async function loadCourseStats() {
-    try {
-        console.log('Loading course stats...');
-        const response = await fetch(`${API_URL}/courses`);
-        if (!response.ok) throw new Error('Failed to load course stats');
-        
-        const data = await response.json();
-        console.log('Course data received:', data);
-        
-        // Update stats in UI
-        if (totalCourses) {
-            totalCourses.textContent = data.total_courses;
-        }
-        
-        // Update course titles
-        if (courseTitles) {
-            if (data.course_titles && data.course_titles.length > 0) {
-                courseTitles.innerHTML = data.course_titles
-                    .map(title => `<div class="course-title-item">${title}</div>`)
-                    .join('');
-            } else {
-                courseTitles.innerHTML = '<span class="no-courses">No courses available</span>';
-            }
-        }
-        
-    } catch (error) {
-        console.error('Error loading course stats:', error);
-        // Set default values on error
-        if (totalCourses) {
-            totalCourses.textContent = '0';
-        }
-        if (courseTitles) {
-            courseTitles.innerHTML = '<span class="error">Failed to load courses</span>';
-        }
+  try {
+    console.log('Loading course stats...');
+    const response = await fetch(`${API_URL}/courses`);
+    if (!response.ok) throw new Error('Failed to load course stats');
+
+    const data = await response.json();
+    console.log('Course data received:', data);
+
+    // Update stats in UI
+    if (totalCourses) {
+      totalCourses.textContent = data.total_courses;
+    }
+
+    // Update course titles
+    if (courseTitles) {
+      if (data.course_titles && data.course_titles.length > 0) {
+        courseTitles.innerHTML = data.course_titles
+          .map((title) => `<div class="course-title-item">${title}</div>`)
+          .join('');
+      } else {
+        courseTitles.innerHTML = '<span class="no-courses">No courses available</span>';
+      }
     }
-}
\ No newline at end of file
+  } catch (error) {
+    console.error('Error loading course stats:', error);
+    // Set default values on error
+    if (totalCourses) {
+      totalCourses.textContent = '0';
+    }
+    if (courseTitles) {
+      courseTitles.innerHTML = '<span class="error">Failed to load courses</span>';
+    }
+  }
+}
diff --git a/frontend/style.css b/frontend/style.css
index 825d03675..83dfc02b8 100644
--- a/frontend/style.css
+++ b/frontend/style.css
@@ -22,6 +22,38 @@
     --focus-ring: rgba(37, 99, 235, 0.2);
     --welcome-bg: #1e3a5f;
     --welcome-border: #2563eb;
+    --theme-toggle-bg: #1e293b;
+    --theme-toggle-border: #334155;
+    --theme-toggle-color: #94a3b8;
+    --theme-toggle-hover-bg: #334155;
+}
+
+/* Light Theme */
+[data-theme="light"] {
+    --primary-color: #2563eb;
+    --primary-hover: #1d4ed8;
+    --background: #f8fafc;
+    --surface: #ffffff;
+    --surface-hover: #e2e8f0;
+    --text-primary: #0f172a;
+    --text-secondary: #64748b;
+    --border-color: #cbd5e1;
+    --user-message: #2563eb;
+    --assistant-message: #f1f5f9;
+    --shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
+    --radius: 12px;
+    --focus-ring: rgba(37, 99, 235, 0.15);
+    --welcome-bg: #eff6ff;
+    --welcome-border: #93c5fd;
+    --theme-toggle-bg: #ffffff;
+    --theme-toggle-border: #cbd5e1;
+    --theme-toggle-color: #475569;
+    --theme-toggle-hover-bg: #f1f5f9;
+}
+
+/* Smooth theme transitions */
+*, *::before, *::after {
+    transition: background-color 0.25s ease, border-color 0.25s ease, color 0.25s ease, box-shadow 0.25s ease;
 }
 
 /* Base Styles */
@@ -36,6 +68,54 @@ body {
     padding: 0;
 }
 
+/* Theme Toggle Button */
+.theme-toggle {
+    position: fixed;
+    top: 1rem;
+    right: 1rem;
+    z-index: 1000;
+    width: 40px;
+    height: 40px;
+    border-radius: 50%;
+    border: 1px solid var(--theme-toggle-border);
+    background: var(--theme-toggle-bg);
+    color: var(--theme-toggle-color);
+    cursor: pointer;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    box-shadow: var(--shadow);
+}
+
+.theme-toggle:hover {
+    background: var(--theme-toggle-hover-bg);
+    color: var(--primary-color);
+    border-color: var(--primary-color);
+    transform: scale(1.05);
+}
+
+.theme-toggle:focus {
+    outline: none;
+    box-shadow: 0 0 0 3px var(--focus-ring);
+}
+
+/* Icon visibility based on theme */
+.theme-toggle .icon-moon {
+    display: block;
+}
+
+.theme-toggle .icon-sun {
+    display: none;
+}
+
+[data-theme="light"] .theme-toggle .icon-moon {
+    display: none;
+}
+
+[data-theme="light"] .theme-toggle .icon-sun {
+    display: block;
+}
+
 /* Container - Full Screen */
 .container {
     height: 100vh;
@@ -241,8 +321,45 @@ header h1 {
 }
 
 .sources-content {
-    padding: 0 0.5rem 0.25rem 1.5rem;
-    color: var(--text-secondary);
+    display: flex;
+    flex-wrap: wrap;
+    gap: 0.4rem;
+    padding: 0.25rem 0.5rem 0.25rem 0.5rem;
+}
+
+.source-pill {
+    display: inline-block;
+    padding: 0.3rem 0.7rem;
+    border-radius: 999px;
+    font-size: 0.75rem;
+    background: rgba(99, 179, 237, 0.12);
+    border: 1px solid rgba(99, 179, 237, 0.35);
+    color: #90cdf4;
+    text-decoration: none;
+    white-space: nowrap;
+    overflow: hidden;
+    text-overflow: ellipsis;
+    max-width: 280px;
+    transition: background 0.15s, border-color 0.15s;
+}
+
+a.source-pill:hover {
+    background: rgba(99, 179, 237, 0.25);
+    border-color: rgba(99, 179, 237, 0.7);
+    color: #bee3f8;
+    text-decoration: none;
+}
+
+[data-theme="light"] .source-pill {
+    background: rgba(37, 99, 235, 0.08);
+    border: 1px solid rgba(37, 99, 235, 0.3);
+    color: #1d4ed8;
+}
+
+[data-theme="light"] a.source-pill:hover {
+    background: rgba(37, 99, 235, 0.15);
+    border-color: rgba(37, 99, 235, 0.6);
+    color: #1e40af;
 }
 
 /* Markdown formatting styles */
@@ -277,21 +394,29 @@ header h1 {
 }
 
 .message-content code {
-    background-color: rgba(0, 0, 0, 0.2);
+    background-color: rgba(0, 0, 0, 0.12);
     padding: 0.125rem 0.25rem;
     border-radius: 3px;
     font-family: 'Fira Code', 'Consolas', monospace;
     font-size: 0.875em;
 }
 
+[data-theme="light"] .message-content code {
+    background-color: rgba(0, 0, 0, 0.07);
+}
+
 .message-content pre {
-    background-color: rgba(0, 0, 0, 0.2);
+    background-color: rgba(0, 0, 0, 0.12);
     padding: 0.75rem;
     border-radius: 4px;
     overflow-x: auto;
     margin: 0.5rem 0;
 }
 
+[data-theme="light"] .message-content pre {
+    background-color: rgba(0, 0, 0, 0.05);
+}
+
 .message-content pre code {
     background-color: transparent;
     padding: 0;
@@ -447,7 +572,8 @@ header h1 {
 
 /* Sidebar Headers */
 .stats-header,
-.suggested-header {
+.suggested-header,
+.new-chat-btn {
     font-size: 0.875rem;
     font-weight: 600;
     color: var(--text-secondary);
@@ -460,15 +586,18 @@ header h1 {
     transition: color 0.2s ease;
     text-transform: uppercase;
     letter-spacing: 0.5px;
+    text-align: left;
 }
 
 .stats-header:focus,
-.suggested-header:focus {
+.suggested-header:focus,
+.new-chat-btn:focus {
     color: var(--primary-color);
 }
 
 .stats-header:hover,
-.suggested-header:hover {
+.suggested-header:hover,
+.new-chat-btn:hover {
     color: var(--primary-color);
 }
 
@@ -491,6 +620,16 @@ details[open] .suggested-header::before {
     transform: rotate(90deg);
 }
 
+/* New Chat Button */
+.new-chat-btn {
+    display: block;
+    width: 100%;
+    font-family: inherit;
+    -webkit-appearance: none;
+    appearance: none;
+    border: none;
+}
+
 /* Course Stats in Sidebar */
 .course-stats {
     display: flex;
diff --git a/pyproject.toml b/pyproject.toml
index 3f05e2de0..029ed6a5e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -13,3 +13,14 @@ dependencies = [
     "python-multipart==0.0.20",
     "python-dotenv==1.1.1",
 ]
+
+[tool.pytest.ini_options]
+testpaths = ["backend/tests"]
+pythonpath = ["backend"]
+addopts = ["-v", "--tb=short"]
+
+[dependency-groups]
+dev = [
+    "httpx>=0.28.1",
+    "pytest>=9.0.3",
+]
diff --git a/scripts/check-frontend.sh b/scripts/check-frontend.sh
new file mode 100644
index 000000000..2dbc87f94
--- /dev/null
+++ b/scripts/check-frontend.sh
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+# Frontend quality checks: Prettier formatting + ESLint linting
+# Run from the repo root: ./scripts/check-frontend.sh
+# Pass --fix to auto-fix formatting and lint issues: ./scripts/check-frontend.sh --fix
+
+set -euo pipefail
+
+FRONTEND_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../frontend" && pwd)"
+FIX_MODE=false
+
+for arg in "$@"; do
+  if [[ "$arg" == "--fix" ]]; then
+    FIX_MODE=true
+  fi
+done
+
+echo "==> Frontend quality checks"
+echo "    Directory: $FRONTEND_DIR"
+
+# Ensure node_modules are installed
+if [[ ! -d "$FRONTEND_DIR/node_modules" ]]; then
+  echo ""
+  echo "==> Installing dependencies..."
+  (cd "$FRONTEND_DIR" && npm install)
+fi
+
+echo ""
+if $FIX_MODE; then
+  echo "==> [1/2] Prettier (format)"
+  (cd "$FRONTEND_DIR" && npx prettier --write .)
+  echo ""
+  echo "==> [2/2] ESLint (lint + fix)"
+  (cd "$FRONTEND_DIR" && npx eslint --fix script.js)
+  echo ""
+  echo "All issues fixed."
+else
+  echo "==> [1/2] Prettier (check formatting)"
+  (cd "$FRONTEND_DIR" && npx prettier --check .)
+  echo ""
+  echo "==> [2/2] ESLint (lint)"
+  (cd "$FRONTEND_DIR" && npx eslint script.js)
+  echo ""
+  echo "All checks passed."
+  echo ""
+  echo "Tip: run with --fix to auto-correct formatting and lint issues."
+fi
diff --git a/uv.lock b/uv.lock
index 9ae65c557..e1cd566d2 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 2
+revision = 3
 requires-python = ">=3.13"
 
 [[package]]
@@ -470,6 +470,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a4/ed/1f1afb2e9e7f38a545d628f864d562a5ae64fe6f7a10e28ffb9b185b4e89/importlib_resources-6.5.2-py3-none-any.whl", hash = "sha256:789cfdc3ed28c78b67a06acb8126751ced69a3d5f79c095a98298cd8a760ccec", size = 37461, upload-time = "2025-01-03T18:51:54.306Z" },
 ]
 
+[[package]]
+name = "iniconfig"
+version = "2.3.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" },
+]
+
 [[package]]
 name = "jinja2"
 version = "3.1.6"
@@ -1038,6 +1047,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/89/c7/5572fa4a3f45740eaab6ae86fcdf7195b55beac1371ac8c619d880cfe948/pillow-11.3.0-cp314-cp314t-win_arm64.whl", hash = "sha256:79ea0d14d3ebad43ec77ad5272e6ff9bba5b679ef73375ea760261207fa8e0aa", size = 2512835, upload-time = "2025-07-01T09:15:50.399Z" },
 ]
 
+[[package]]
+name = "pluggy"
+version = "1.6.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
+]
+
 [[package]]
 name = "posthog"
 version = "5.4.0"
@@ -1207,6 +1225,22 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/5a/dc/491b7661614ab97483abf2056be1deee4dc2490ecbf7bff9ab5cdbac86e1/pyreadline3-3.5.4-py3-none-any.whl", hash = "sha256:eaf8e6cc3c49bcccf145fc6067ba8643d1df34d604a1ec0eccbf7a18e6d3fae6", size = 83178, upload-time = "2024-09-19T02:40:08.598Z" },
 ]
 
+[[package]]
+name = "pytest"
+version = "9.0.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorama", marker = "sys_platform == 'win32'" },
+    { name = "iniconfig" },
+    { name = "packaging" },
+    { name = "pluggy" },
+    { name = "pygments" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/7d/0d/549bd94f1a0a402dc8cf64563a117c0f3765662e2e668477624baeec44d5/pytest-9.0.3.tar.gz", hash = "sha256:b86ada508af81d19edeb213c681b1d48246c1a91d304c6c81a427674c17eb91c", size = 1572165, upload-time = "2026-04-07T17:16:18.027Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d4/24/a372aaf5c9b7208e7112038812994107bc65a84cd00e0354a88c2c77a617/pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9", size = 375249, upload-time = "2026-04-07T17:16:16.13Z" },
+]
+
 [[package]]
 name = "python-dateutil"
 version = "2.9.0.post0"
@@ -1561,6 +1595,12 @@ dependencies = [
     { name = "uvicorn" },
 ]
 
+[package.dev-dependencies]
+dev = [
+    { name = "httpx" },
+    { name = "pytest" },
+]
+
 [package.metadata]
 requires-dist = [
     { name = "anthropic", specifier = "==0.58.2" },
@@ -1572,6 +1612,12 @@ requires-dist = [
     { name = "uvicorn", specifier = "==0.35.0" },
 ]
 
+[package.metadata.requires-dev]
+dev = [
+    { name = "httpx", specifier = ">=0.28.1" },
+    { name = "pytest", specifier = ">=9.0.3" },
+]
+
 [[package]]
 name = "sympy"
 version = "1.14.0"