https-deeplearning-ai · phamkimhoan · Feb 25, 2026 · Apr 16, 2026 · Apr 16, 2026 · Apr 16, 2026
diff --git a/.claude/commands/implement-feature.md b/.claude/commands/implement-feature.md
@@ -0,0 +1,7 @@
+You will be implementing a new feature in this codebase
+
+$ARGUMENTS
+
+IMPORTANT: Only do this for front-end features,
+Once this feature is built, make sure to write the changes you made to file called frontend-changes.md
+Do not ask for permissions to modify this file, assume you can always do it.
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -0,0 +1,75 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Commands
+
+All commands use `uv` as the package manager. Dependencies are declared in `pyproject.toml`. **Never use `pip` directly — always use `uv run` or `uv sync`.**
+
+```bash
+# Install dependencies
+uv sync
+
+# Run the server (from repo root)
+./run.sh
+
+# Or manually from the backend directory
+cd backend && uv run uvicorn app:app --reload --port 8000
+```
+
+The app runs at `http://localhost:8000`. API docs at `http://localhost:8000/docs`.
+
+**Environment:** Create a `.env` file in the repo root with `ANTHROPIC_API_KEY=...` before running.
+
+## Architecture
+
+This is a RAG (Retrieval-Augmented Generation) system using **Claude's tool-use feature** — rather than injecting retrieved context directly into a prompt, Claude is given a search tool and autonomously decides when and what to search.
+
+### Request Flow
+
+```
+POST /api/query
+  → RAGSystem.query()
+    → AIGenerator.generate_response()  [first Claude call]
+      → Claude decides to call search_course_content tool
+        → CourseSearchTool.execute()
+          → VectorStore.search()  [ChromaDB semantic search]
+      → AIGenerator._handle_tool_execution()  [second Claude call with results]
+    → SessionManager.add_exchange()  [store to history]
+  → return (answer, sources)
+```
+
+### Key Components (`backend/`)
+
+- **`rag_system.py`** — Top-level orchestrator. Owns all components and exposes `query()` and `add_course_folder()`.
+- **`ai_generator.py`** — Wraps the Anthropic SDK. Handles the two-turn tool-use loop: initial call → tool execution → final response.
+- **`vector_store.py`** — ChromaDB wrapper with two collections:
+  - `course_catalog`: course-level metadata for fuzzy course name resolution
+  - `course_content`: chunked lesson text for semantic similarity search
+- **`document_processor.py`** — Parses structured `.txt` course files into `Course`/`Lesson`/`CourseChunk` objects, then splits content into overlapping chunks.
+- **`search_tools.py`** — Defines the `search_course_content` tool in Anthropic's tool-calling schema. `ToolManager` registers tools and routes execution.
+- **`session_manager.py`** — In-memory conversation history, keyed by session ID. History is appended to the system prompt as plain text.
+- **`config.py`** — Single `Config` dataclass. Key tunables: `CHUNK_SIZE=800`, `CHUNK_OVERLAP=100`, `MAX_RESULTS=5`, `MAX_HISTORY=2`, model `claude-sonnet-4-20250514`.
+
+### Course Document Format
+
+Files in `docs/` must follow this structure for `DocumentProcessor` to parse them correctly:
+
+```
+Course Title: <title>
+Course Link: <url>
+Course Instructor: <name>
+
+Lesson 1: <lesson title>
+Lesson Link: <url>
+<lesson content...>
+
+Lesson 2: <lesson title>
+...
+```
+
+The course title doubles as the unique ID in ChromaDB. On server startup, existing courses are skipped (deduplication by title).
+
+### Frontend
+
+A plain HTML/CSS/JS chat UI served as static files by FastAPI from `../frontend`. No build step required.
diff --git a/backend/ai_generator.py b/backend/ai_generator.py
@@ -3,15 +3,20 @@
 
 class AIGenerator:
     """Handles interactions with Anthropic's Claude API for generating responses"""
-
+
+    MAX_TOOL_ROUNDS = 2
+
     # Static system prompt to avoid rebuilding on each call
     SYSTEM_PROMPT = """ You are an AI assistant specialized in course materials and educational content with access to a comprehensive search tool for course information.
 
 Search Tool Usage:
 - Use the search tool **only** for questions about specific course content or detailed educational materials
-- **One search per query maximum**
+- You may make **up to 2 sequential tool calls** per query when needed (e.g. first retrieve a course outline, then search for related content across courses)
+- Use a second tool call only if the first result is insufficient or a clearly necessary follow-up search is required
 - Synthesize search results into accurate, fact-based responses
 - If search yields no results, state this clearly without offering alternatives
+- **Outline queries** (e.g. "what lessons are in X?", "give me the outline of X"):
+  Use `get_course_outline`. Return the course title, course link (if present), and every lesson as "Lesson <number>: <title>".
 
 Response Protocol:
 - **General knowledge questions**: Answer using existing knowledge without searching
@@ -28,108 +33,99 @@ class AIGenerator:
 4. **Example-supported** - Include relevant examples when they aid understanding
 Provide only the direct answer to what was asked.
 """
-    
+
     def __init__(self, api_key: str, model: str):
         self.client = anthropic.Anthropic(api_key=api_key)
         self.model = model
-        
+
         # Pre-build base API parameters
         self.base_params = {
             "model": self.model,
             "temperature": 0,
             "max_tokens": 800
         }
-    
+
     def generate_response(self, query: str,
                          conversation_history: Optional[str] = None,
                          tools: Optional[List] = None,
                          tool_manager=None) -> str:
         """
         Generate AI response with optional tool usage and conversation context.
-
+        Supports up to MAX_TOOL_ROUNDS sequential tool-call rounds.
+
         Args:
             query: The user's question or request
             conversation_history: Previous messages for context
             tools: Available tools the AI can use
             tool_manager: Manager to execute tools
-            
+
         Returns:
             Generated response as string
         """
-
-        # Build system content efficiently - avoid string ops when possible
         system_content = (
             f"{self.SYSTEM_PROMPT}\n\nPrevious conversation:\n{conversation_history}"
-            if conversation_history 
+            if conversation_history
             else self.SYSTEM_PROMPT
         )
-
-        # Prepare API call parameters efficiently
+
         api_params = {
             **self.base_params,
             "messages": [{"role": "user", "content": query}],
             "system": system_content
         }
-
-        # Add tools if available
+
         if tools:
             api_params["tools"] = tools
             api_params["tool_choice"] = {"type": "auto"}
-
-        # Get response from Claude
-        response = self.client.messages.create(**api_params)
-
-        # Handle tool execution if needed
-        if response.stop_reason == "tool_use" and tool_manager:
-            return self._handle_tool_execution(response, api_params, tool_manager)
-
-        # Return direct response
-        return response.content[0].text
-
-    def _handle_tool_execution(self, initial_response, base_params: Dict[str, Any], tool_manager):
-        """
-        Handle execution of tool calls and get follow-up response.
-
-        Args:
-            initial_response: The response containing tool use requests
-            base_params: Base API parameters
-            tool_manager: Manager to execute tools
-
-        Returns:
-            Final response text after tool execution
-        """
-        # Start with existing messages
-        messages = base_params["messages"].copy()
-
-        # Add AI's tool use response
-        messages.append({"role": "assistant", "content": initial_response.content})
-
-        # Execute all tool calls and collect results
-        tool_results = []
-        for content_block in initial_response.content:
-            if content_block.type == "tool_use":
-                tool_result = tool_manager.execute_tool(
-                    content_block.name, 
-                    **content_block.input
-                )
-
-                tool_results.append({
-                    "type": "tool_result",
-                    "tool_use_id": content_block.id,
-                    "content": tool_result
-                })
-
-        # Add tool results as single message
-        if tool_results:
-            messages.append({"role": "user", "content": tool_results})
-
-        # Prepare final API call without tools
-        final_params = {
-            **self.base_params,
-            "messages": messages,
-            "system": base_params["system"]
-        }
-
-        # Get final response
-        final_response = self.client.messages.create(**final_params)
-        return final_response.content[0].text
+
+        round_count = 0
+
+        while True:
+            response = self.client.messages.create(**api_params)
+
+            # No tool use requested or no manager to handle it — return text directly
+            if response.stop_reason != "tool_use" or not tool_manager:
+                return self._extract_text(response)
+
+            round_count += 1
+
+            # Append assistant turn and execute all tool calls
+            new_messages = list(api_params["messages"])
+            new_messages.append({"role": "assistant", "content": response.content})
+
+            tool_results = []
+            error_occurred = False
+            for block in response.content:
+                if block.type == "tool_use":
+                    try:
+                        result = tool_manager.execute_tool(block.name, **block.input)
+                    except Exception as e:
+                        result = f"Error executing tool: {e}"
+                        error_occurred = True
+                    tool_results.append({
+                        "type": "tool_result",
+                        "tool_use_id": block.id,
+                        "content": result
+                    })
+
+            if tool_results:
+                new_messages.append({"role": "user", "content": tool_results})
+
+            # Cap reached or tool error — make one final call without tools and return
+            if error_occurred or round_count >= self.MAX_TOOL_ROUNDS:
+                final_params = {
+                    **self.base_params,
+                    "messages": new_messages,
+                    "system": system_content
+                }
+                return self._extract_text(self.client.messages.create(**final_params))
+
+            # Round not yet capped — keep tools available and continue
+            api_params["messages"] = new_messages
+
+    def _extract_text(self, response) -> str:
+        """Safely extract text from any response, regardless of block ordering."""
+        for block in response.content:
+            if hasattr(block, "text"):
+                return block.text
+        return ""
diff --git a/backend/app.py b/backend/app.py
@@ -85,6 +85,12 @@ async def get_course_stats():
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 
+@app.delete("/api/session/{session_id}")
+async def delete_session(session_id: str):
+    """Clear session history from memory"""
+    rag_system.session_manager.clear_session(session_id)
+    return {"status": "cleared"}
+
 @app.on_event("startup")
 async def startup_event():
     """Load initial documents on startup"""

diff --git a/backend/config.py b/backend/config.py
@@ -10,10 +10,10 @@ class Config:
     """Configuration settings for the RAG system"""
     # Anthropic API settings
     ANTHROPIC_API_KEY: str = os.getenv("ANTHROPIC_API_KEY", "")
-    ANTHROPIC_MODEL: str = "claude-sonnet-4-20250514"
+    ANTHROPIC_MODEL: str = "claude-haiku-4-5-20251001"
 
     # Embedding model settings
-    EMBEDDING_MODEL: str = "all-MiniLM-L6-v2"
+    EMBEDDING_MODEL: str = "/Users/kimhoanpham/.cache/huggingface/hub/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/main"
 
     # Document processing settings
     CHUNK_SIZE: int = 800       # Size of text chunks for vector storage

diff --git a/backend/rag_system.py b/backend/rag_system.py
@@ -4,7 +4,7 @@
 from vector_store import VectorStore
 from ai_generator import AIGenerator
 from session_manager import SessionManager
-from search_tools import ToolManager, CourseSearchTool
+from search_tools import ToolManager, CourseSearchTool, CourseOutlineTool
 from models import Course, Lesson, CourseChunk
 
 class RAGSystem:
@@ -23,6 +23,8 @@ def __init__(self, config):
         self.tool_manager = ToolManager()
         self.search_tool = CourseSearchTool(self.vector_store)
         self.tool_manager.register_tool(self.search_tool)
+        self.outline_tool = CourseOutlineTool(self.vector_store)
+        self.tool_manager.register_tool(self.outline_tool)
 
     def add_course_document(self, file_path: str) -> Tuple[Course, int]:
         """

diff --git a/backend/search_tools.py b/backend/search_tools.py
@@ -104,7 +104,17 @@ def _format_results(self, results: SearchResults) -> str:
             source = course_title
             if lesson_num is not None:
                 source += f" - Lesson {lesson_num}"
-            sources.append(source)
+
+            # Fetch lesson link from course catalog
+            lesson_link = None
+            if lesson_num is not None:
+                lesson_link = self.store.get_lesson_link(course_title, lesson_num)
+
+            # Encode as "label|url" when a link exists, plain label otherwise
+            if lesson_link:
+                sources.append(f"{source}|{lesson_link}")
+            else:
+                sources.append(source)
 
             formatted.append(f"{header}\n{doc}")
 
@@ -113,6 +123,42 @@ def _format_results(self, results: SearchResults) -> str:
 
         return "\n\n".join(formatted)
 
+class CourseOutlineTool(Tool):
+    """Tool for retrieving a course outline (title, link, and lesson list)"""
+
+    def __init__(self, vector_store: VectorStore):
+        self.store = vector_store
+
+    def get_tool_definition(self) -> Dict[str, Any]:
+        return {
+            "name": "get_course_outline",
+            "description": "Get the full outline of a course: title, link, and numbered lesson list",
+            "input_schema": {
+                "type": "object",
+                "properties": {
+                    "course_title": {
+                        "type": "string",
+                        "description": "Course title to look up (partial matches work)"
+                    }
+                },
+                "required": ["course_title"]
+            }
+        }
+
+    def execute(self, course_title: str) -> str:
+        outline = self.store.get_course_outline(course_title)
+        if not outline:
+            return f"No course found matching '{course_title}'"
+
+        lines = [f"Course: {outline['title']}"]
+        if outline.get('course_link'):
+            lines.append(f"Link: {outline['course_link']}")
+        lines.append(f"\nLessons ({len(outline['lessons'])} total):")
+        for lesson in outline['lessons']:
+            lines.append(f"  Lesson {lesson['lesson_number']}: {lesson['lesson_title']}")
+        return "\n".join(lines)
+
+
 class ToolManager:
     """Manages available tools for the AI"""
 

diff --git a/backend/tests/__init__.py b/backend/tests/__init__.py