diff --git a/.env.development b/.env.development index 02e961bab..b86e25e8a 100644 --- a/.env.development +++ b/.env.development @@ -76,3 +76,5 @@ SOURCEBOT_TELEMETRY_DISABLED=true # Disables telemetry collection # CONFIG_MAX_REPOS_NO_TOKEN= NODE_ENV=development # SOURCEBOT_TENANCY_MODE=single + +DEBUG_WRITE_CHAT_MESSAGES_TO_FILE=true \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c879790b..f66a58da6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,8 +7,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +- Added `find_symbol_definitions`, and `find_symbol_references` tools to the MCP server. [#1014](https://github.com/sourcebot-dev/sourcebot/pull/1014) +- Added `list_tree` tool to the ask agent. [#1014](https://github.com/sourcebot-dev/sourcebot/pull/1014) +- Added input & output token breakdown in ask details card. [#1014](https://github.com/sourcebot-dev/sourcebot/pull/1014) +- Added `path` parameter to the `/api/commits` api to allow filtering commits by paths. [#1014](https://github.com/sourcebot-dev/sourcebot/pull/1014) + +### Fixed +- Fixed issue where ask responses would sometimes appear in the details panel while generating. [#1014](https://github.com/sourcebot-dev/sourcebot/pull/1014) + ### Changed - Changed language detection to resolve file extensions with multiple language resolutions (e.g., .md) to the most common resolution. [#1026](https://github.com/sourcebot-dev/sourcebot/pull/1026) +- Changed the `webUrl` property of the `/api/repos` api to return a URL rather than just a path. [#1014](https://github.com/sourcebot-dev/sourcebot/pull/1014) ## [4.15.11] - 2026-03-20 diff --git a/CLAUDE.md b/CLAUDE.md index 2fff041c8..3755abd4c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -38,6 +38,25 @@ Exceptions: - Special files like `README.md`, `CHANGELOG.md`, `LICENSE` - Next.js conventions: `page.tsx`, `layout.tsx`, `loading.tsx`, etc. +## Code Style + +Always use curly braces for `if` statements, with the body on a new line — even for single-line bodies: + +```ts +// Correct +if (!value) { + return; +} +if (condition) { + doSomething(); +} + +// Incorrect +if (!value) return; +if (!value) { return; } +if (condition) doSomething(); +``` + ## Tailwind CSS Use Tailwind color classes directly instead of CSS variable syntax: diff --git a/docs/docs/features/mcp-server.mdx b/docs/docs/features/mcp-server.mdx index 248e4f2d3..b1ce9c275 100644 --- a/docs/docs/features/mcp-server.mdx +++ b/docs/docs/features/mcp-server.mdx @@ -304,22 +304,19 @@ Pass the key as an `Authorization: Bearer ` header when connecting to the M ## Available Tools -### `search_code` +### `grep` -Searches for code that matches the provided search query as a substring by default, or as a regular expression if `useRegex` is true. +Searches for code matching a regular expression pattern across repositories, similar to `grep`/`ripgrep`. Always case-sensitive. Results are grouped by file and include line numbers. Parameters: | Name | Required | Description | -|:----------------------|:---------|:---------------------------------------------------------------------------------------------------------------------| -| `query` | yes | The search pattern to match against code contents. Do not escape quotes in your query. | -| `useRegex` | no | Whether to use regular expression matching. When false, substring matching is used (default: false). | -| `filterByRepos` | no | Scope the search to specific repositories. | -| `filterByLanguages` | no | Scope the search to specific languages. | -| `filterByFilepaths` | no | Scope the search to specific filepaths. | -| `caseSensitive` | no | Whether the search should be case sensitive (default: false). | -| `includeCodeSnippets` | no | Whether to include code snippets in the response (default: false). | +|:----------|:---------|:--------------------------------------------------------------------------------------------------------------| +| `pattern` | yes | The regex pattern to search for in file contents. | +| `path` | no | Directory path to scope the search to. Defaults to the repository root. | +| `include` | no | File glob pattern to include in the search (e.g. `*.ts`, `*.{ts,tsx}`). | +| `repo` | no | Repository name to search in. If not provided, searches all repositories. Use the full name including host (e.g. `github.com/org/repo`). | | `ref` | no | Commit SHA, branch or tag name to search on. If not provided, defaults to the default branch. | -| `maxTokens` | no | The maximum number of tokens to return (default: 10000). | +| `limit` | no | Maximum number of matching files to return (default: 100). | ### `list_repos` @@ -336,18 +333,20 @@ Parameters: ### `read_file` -Reads the source code for a given file. +Reads the source code for a given file, with optional line range control for large files. Parameters: | Name | Required | Description | -|:-------|:---------|:-------------------------------------------------------------------------------------------------------| +|:---------|:---------|:-------------------------------------------------------------------------------------------------------| | `repo` | yes | The repository name. | | `path` | yes | The path to the file. | | `ref` | no | Commit SHA, branch or tag name to fetch the source code for. If not provided, uses the default branch. | +| `offset` | no | Line number to start reading from (1-indexed). Omit to start from the beginning. | +| `limit` | no | Maximum number of lines to read (max: 500). Omit to read up to 500 lines. | ### `list_tree` -Lists files and directories from a repository path. Can be used as a directory listing tool (`depth: 1`) or a repo-tree tool (`depth > 1`). +Lists files and directories from a repository path. Directories are shown before files at each level. Parameters: | Name | Required | Description | diff --git a/packages/shared/src/logger.ts b/packages/shared/src/logger.ts index a3f89e2cc..b142cb07c 100644 --- a/packages/shared/src/logger.ts +++ b/packages/shared/src/logger.ts @@ -32,12 +32,11 @@ const datadogFormat = format((info) => { return info; }); -const humanReadableFormat = printf(({ level, message, timestamp, stack, label: _label }) => { +const humanReadableFormat = printf(({ level, message, timestamp, stack, label: _label, ...rest }) => { const label = `[${_label}] `; - if (stack) { - return `${timestamp} ${level}: ${label}${message}\n${stack}`; - } - return `${timestamp} ${level}: ${label}${message}`; + const extras = Object.keys(rest).length > 0 ? ` ${JSON.stringify(rest)}` : ''; + const base = `${timestamp} ${level}: ${label}${message}${extras}`; + return stack ? `${base}\n${stack}` : base; }); const createLogger = (label: string) => { diff --git a/packages/web/next.config.mjs b/packages/web/next.config.mjs index de1b8b1ee..6211fcfe2 100644 --- a/packages/web/next.config.mjs +++ b/packages/web/next.config.mjs @@ -63,7 +63,14 @@ const nextConfig = { ] }, - turbopack: {}, + turbopack: { + rules: { + '*.txt': { + loaders: ['raw-loader'], + as: '*.js', + }, + }, + }, // @see: https://github.com/vercel/next.js/issues/58019#issuecomment-1910531929 ...(process.env.NODE_ENV === 'development' ? { diff --git a/packages/web/package.json b/packages/web/package.json index 040577ce6..82a7a0ddb 100644 --- a/packages/web/package.json +++ b/packages/web/package.json @@ -144,6 +144,7 @@ "escape-string-regexp": "^5.0.0", "fast-deep-equal": "^3.1.3", "fuse.js": "^7.0.0", + "glob-to-regexp": "^0.4.1", "google-auth-library": "^10.1.0", "graphql": "^16.9.0", "http-status-codes": "^2.3.0", @@ -189,6 +190,7 @@ "stripe": "^17.6.0", "tailwind-merge": "^2.5.2", "tailwindcss-animate": "^1.0.7", + "use-stick-to-bottom": "^1.1.3", "usehooks-ts": "^3.1.0", "vscode-icons-js": "^11.6.1", "zod": "^3.25.74", @@ -202,6 +204,7 @@ "@tanstack/eslint-plugin-query": "^5.74.7", "@testing-library/dom": "^10.4.1", "@testing-library/react": "^16.3.0", + "@types/glob-to-regexp": "^0.4.4", "@types/micromatch": "^4.0.9", "@types/node": "^20", "@types/nodemailer": "^6.4.17", @@ -218,6 +221,7 @@ "jsdom": "^25.0.1", "npm-run-all": "^4.1.5", "postcss": "^8", + "raw-loader": "^4.0.2", "react-email": "^5.1.0", "react-grab": "^0.1.23", "tailwindcss": "^3.4.1", diff --git a/packages/web/src/app/api/(server)/commits/route.ts b/packages/web/src/app/api/(server)/commits/route.ts index 18b4afb93..fb3f9cbe1 100644 --- a/packages/web/src/app/api/(server)/commits/route.ts +++ b/packages/web/src/app/api/(server)/commits/route.ts @@ -13,6 +13,7 @@ const listCommitsQueryParamsSchema = z.object({ until: z.string().optional(), author: z.string().optional(), ref: z.string().optional(), + path: z.string().optional(), page: z.coerce.number().int().positive().default(1), perPage: z.coerce.number().int().positive().max(100).default(50), }); @@ -57,6 +58,7 @@ export const GET = apiHandler(async (request: NextRequest): Promise => ...(searchParams.until ? { until: searchParams.until } : {}), ...(searchParams.author ? { author: searchParams.author } : {}), ...(searchParams.ref ? { ref: searchParams.ref } : {}), + ...(searchParams.path ? { path: searchParams.path } : {}), }, }); if (linkHeader) headers.set('Link', linkHeader); diff --git a/packages/web/src/features/chat/agent.ts b/packages/web/src/features/chat/agent.ts index 11b124207..3645bb42e 100644 --- a/packages/web/src/features/chat/agent.ts +++ b/packages/web/src/features/chat/agent.ts @@ -14,10 +14,15 @@ import { } from "ai"; import { randomUUID } from "crypto"; import _dedent from "dedent"; -import { ANSWER_TAG, FILE_REFERENCE_PREFIX, toolNames } from "./constants"; -import { createCodeSearchTool, findSymbolDefinitionsTool, findSymbolReferencesTool, listCommitsTool, listReposTool, readFilesTool } from "./tools"; +import { ANSWER_TAG, FILE_REFERENCE_PREFIX } from "./constants"; +import { findSymbolReferencesDefinition } from "@/features/tools/findSymbolReferences"; +import { findSymbolDefinitionsDefinition } from "@/features/tools/findSymbolDefinitions"; +import { readFileDefinition } from "@/features/tools/readFile"; +import { grepDefinition } from "@/features/tools/grep"; import { Source } from "./types"; import { addLineNumbers, fileReferenceToString } from "./utils"; +import { createTools } from "./tools"; +import { listTreeDefinition } from "../tools"; const dedent = _dedent.withOptions({ alignValues: true }); @@ -198,14 +203,7 @@ const createAgentStream = async ({ providerOptions, messages: inputMessages, system: systemPrompt, - tools: { - [toolNames.searchCode]: createCodeSearchTool(selectedRepos), - [toolNames.readFiles]: readFilesTool, - [toolNames.findSymbolReferences]: findSymbolReferencesTool, - [toolNames.findSymbolDefinitions]: findSymbolDefinitionsTool, - [toolNames.listRepos]: listReposTool, - [toolNames.listCommits]: listCommitsTool, - }, + tools: createTools({ source: 'sourcebot-ask-agent', selectedRepos }), temperature: env.SOURCEBOT_CHAT_MODEL_TEMPERATURE, stopWhen: [ stepCountIsGTE(env.SOURCEBOT_CHAT_MAX_STEP_COUNT), @@ -223,39 +221,46 @@ const createAgentStream = async ({ return; } - if (toolName === toolNames.readFiles) { - output.forEach((file) => { + if (toolName === readFileDefinition.name) { + onWriteSource({ + type: 'file', + repo: output.metadata.repo, + path: output.metadata.path, + revision: output.metadata.revision, + name: output.metadata.path.split('/').pop() ?? output.metadata.path, + }); + } else if (toolName === grepDefinition.name) { + output.metadata.files.forEach((file) => { onWriteSource({ type: 'file', - language: file.language, - repo: file.repository, + repo: file.repo, path: file.path, revision: file.revision, name: file.path.split('/').pop() ?? file.path, }); }); - } else if (toolName === toolNames.searchCode) { - output.files.forEach((file) => { + } else if (toolName === findSymbolDefinitionsDefinition.name || toolName === findSymbolReferencesDefinition.name) { + output.metadata.files.forEach((file) => { onWriteSource({ type: 'file', - language: file.language, - repo: file.repository, + repo: file.repo, path: file.fileName, revision: file.revision, name: file.fileName.split('/').pop() ?? file.fileName, }); }); - } else if (toolName === toolNames.findSymbolDefinitions || toolName === toolNames.findSymbolReferences) { - output.forEach((file) => { - onWriteSource({ - type: 'file', - language: file.language, - repo: file.repository, - path: file.fileName, - revision: file.revision, - name: file.fileName.split('/').pop() ?? file.fileName, + } else if (toolName === listTreeDefinition.name) { + output.metadata.entries + .filter((entry) => entry.type === 'blob') + .forEach((entry) => { + onWriteSource({ + type: 'file', + repo: output.metadata.repo, + path: entry.path, + revision: output.metadata.ref, + name: entry.name, + }); }); - }); } }); }, @@ -312,6 +317,10 @@ const createPrompt = ({ The user has explicitly selected the following repositories for analysis: ${repos.map(repo => `- ${repo}`).join('\n')} + + When calling tools that accept a \`repo\` parameter (e.g. \`read_file\`, \`list_commits\`, \`list_tree\`, \`grep\`), use these repository names exactly as listed above, including the full host prefix (e.g. \`github.com/org/repo\`). + + When using \`grep\` to search across ALL selected repositories (e.g. "which repos have X?"), omit the \`repo\` parameter entirely — the tool will automatically search across all selected repositories in a single call. Do NOT call \`grep\` once per repository when a single broad search would suffice. ` : ''} diff --git a/packages/web/src/features/chat/components/chatThread/chatThreadListItem.tsx b/packages/web/src/features/chat/components/chatThread/chatThreadListItem.tsx index 1734a04d5..b9c9a65b1 100644 --- a/packages/web/src/features/chat/components/chatThread/chatThreadListItem.tsx +++ b/packages/web/src/features/chat/components/chatThread/chatThreadListItem.tsx @@ -13,8 +13,8 @@ import { AnswerCard } from './answerCard'; import { DetailsCard } from './detailsCard'; import { MarkdownRenderer, REFERENCE_PAYLOAD_ATTRIBUTE } from './markdownRenderer'; import { ReferencedSourcesListView } from './referencedSourcesListView'; -import { uiVisiblePartTypes } from '../../constants'; import isEqual from "fast-deep-equal/react"; +import { ANSWER_TAG } from '../../constants'; interface ChatThreadListItemProps { userMessage: SBChatMessage; @@ -95,19 +95,24 @@ const ChatThreadListItemComponent = forwardRef step // First, filter out any parts that are not text .filter((part) => { - if (part.type !== 'text') { - return true; + if (part.type === 'text') { + return !part.text.includes(ANSWER_TAG); } - return part.text !== answerPart?.text; + return true; }) .filter((part) => { - return uiVisiblePartTypes.includes(part.type); + // Only include text, reasoning, and tool parts + return ( + part.type === 'text' || + part.type === 'reasoning' || + part.type.startsWith('tool-') + ) }) ) // Then, filter out any steps that are empty .filter(step => step.length > 0); - }, [answerPart, assistantMessage?.parts]); + }, [assistantMessage?.parts]); // "thinking" is when the agent is generating output that is not the answer. const isThinking = useMemo(() => { diff --git a/packages/web/src/features/chat/components/chatThread/detailsCard.tsx b/packages/web/src/features/chat/components/chatThread/detailsCard.tsx index ff155ea00..2eb1804f9 100644 --- a/packages/web/src/features/chat/components/chatThread/detailsCard.tsx +++ b/packages/web/src/features/chat/components/chatThread/detailsCard.tsx @@ -5,20 +5,24 @@ import { Collapsible, CollapsibleContent, CollapsibleTrigger } from "@/component import { Separator } from '@/components/ui/separator'; import { Skeleton } from '@/components/ui/skeleton'; import { Tooltip, TooltipContent, TooltipTrigger } from '@/components/ui/tooltip'; -import { cn } from '@/lib/utils'; -import { Brain, ChevronDown, ChevronRight, Clock, InfoIcon, Loader2, List, ScanSearchIcon, Zap } from 'lucide-react'; -import { memo, useCallback } from 'react'; import useCaptureEvent from '@/hooks/useCaptureEvent'; +import { cn, getShortenedNumberDisplayString } from '@/lib/utils'; +import isEqual from "fast-deep-equal/react"; +import { useStickToBottom } from 'use-stick-to-bottom'; +import { Brain, ChevronDown, ChevronRight, Clock, InfoIcon, Loader2, ScanSearchIcon, Zap } from 'lucide-react'; +import { memo, useCallback, useEffect, useState } from 'react'; +import { usePrevious } from '@uidotdev/usehooks'; +import { SBChatMessageMetadata, SBChatMessagePart } from '../../types'; +import { SearchScopeIcon } from '../searchScopeIcon'; import { MarkdownRenderer } from './markdownRenderer'; import { FindSymbolDefinitionsToolComponent } from './tools/findSymbolDefinitionsToolComponent'; import { FindSymbolReferencesToolComponent } from './tools/findSymbolReferencesToolComponent'; -import { ReadFilesToolComponent } from './tools/readFilesToolComponent'; -import { SearchCodeToolComponent } from './tools/searchCodeToolComponent'; -import { ListReposToolComponent } from './tools/listReposToolComponent'; +import { GrepToolComponent } from './tools/grepToolComponent'; import { ListCommitsToolComponent } from './tools/listCommitsToolComponent'; -import { SBChatMessageMetadata, SBChatMessagePart } from '../../types'; -import { SearchScopeIcon } from '../searchScopeIcon'; -import isEqual from "fast-deep-equal/react"; +import { ListReposToolComponent } from './tools/listReposToolComponent'; +import { ListTreeToolComponent } from './tools/listTreeToolComponent'; +import { ReadFileToolComponent } from './tools/readFileToolComponent'; +import { ToolOutputGuard } from './tools/toolOutputGuard'; interface DetailsCardProps { @@ -105,21 +109,37 @@ const DetailsCardComponent = ({ )} {metadata?.totalTokens && ( -
- - {metadata?.totalTokens} tokens -
+ + +
+ + {getShortenedNumberDisplayString(metadata.totalTokens, 0)} tokens +
+
+ +
+
+ Input + {metadata.totalInputTokens?.toLocaleString() ?? '—'} +
+
+ Output + {metadata.totalOutputTokens?.toLocaleString() ?? '—'} +
+
+ Total + {metadata.totalTokens.toLocaleString()} +
+
+
+
)} {metadata?.totalResponseTimeMs && (
- {metadata?.totalResponseTimeMs / 1000} seconds + {Math.round(metadata.totalResponseTimeMs / 1000)} seconds
)} -
- - {`${thinkingSteps.length} step${thinkingSteps.length === 1 ? '' : 's'}`} -
)} @@ -133,88 +153,12 @@ const DetailsCardComponent = ({ - - {thinkingSteps.length === 0 ? ( - isStreaming ? ( - - ) : ( -

No thinking steps

- ) - ) : thinkingSteps.map((step, index) => { - return ( -
-
- - {index + 1} - -
- {step.map((part, index) => { - switch (part.type) { - case 'reasoning': - case 'text': - return ( - - ) - case 'tool-readFiles': - return ( - - ) - case 'tool-searchCode': - return ( - - ) - case 'tool-findSymbolDefinitions': - return ( - - ) - case 'tool-findSymbolReferences': - return ( - - ) - case 'tool-listRepos': - return ( - - ) - case 'tool-listCommits': - return ( - - ) - default: - return null; - } - })} -
- ) - })} + +
@@ -222,4 +166,130 @@ const DetailsCardComponent = ({ ) } -export const DetailsCard = memo(DetailsCardComponent, isEqual); \ No newline at end of file +export const DetailsCard = memo(DetailsCardComponent, isEqual); + + +const ThinkingSteps = ({ thinkingSteps, isStreaming, isThinking }: { thinkingSteps: SBChatMessagePart[][], isStreaming: boolean, isThinking: boolean }) => { + const { scrollRef, contentRef, scrollToBottom } = useStickToBottom(); + const [shouldStick, setShouldStick] = useState(isThinking); + const prevIsThinking = usePrevious(isThinking); + + useEffect(() => { + if (prevIsThinking && !isThinking) { + scrollToBottom(); + setShouldStick(false); + } else if (!prevIsThinking && isThinking) { + setShouldStick(true); + } + }, [isThinking, prevIsThinking, scrollToBottom]); + + return ( +
+
+ {thinkingSteps.length === 0 ? ( + isStreaming ? ( + + ) : ( +

No thinking steps

+ ) + ) : thinkingSteps.map((step, index) => ( +
+ {step.map((part, index) => ( +
+ +
+ ))} +
+ ))} +
+
+ ); +} + + +export const StepPartRenderer = ({ part }: { part: SBChatMessagePart }) => { + switch (part.type) { + case 'reasoning': + case 'text': + return ( + + ) + case 'tool-read_file': + return ( + + {(output) => } + + ) + case 'tool-grep': + return ( + + {(output) => } + + ) + case 'tool-find_symbol_definitions': + return ( + + {(output) => } + + ) + case 'tool-find_symbol_references': + return ( + + {(output) => } + + ) + case 'tool-list_repos': + return ( + + {(output) => } + + ) + case 'tool-list_commits': + return ( + + {(output) => } + + ) + case 'tool-list_tree': + return ( + + {(output) => } + + ) + case 'data-source': + case 'dynamic-tool': + case 'file': + case 'source-document': + case 'source-url': + case 'step-start': + return null; + default: + // Guarantees this switch-case to be exhaustive + part satisfies never; + return null; + } +} \ No newline at end of file diff --git a/packages/web/src/features/chat/components/chatThread/tools/findSymbolDefinitionsToolComponent.tsx b/packages/web/src/features/chat/components/chatThread/tools/findSymbolDefinitionsToolComponent.tsx index 792efd434..ae327d640 100644 --- a/packages/web/src/features/chat/components/chatThread/tools/findSymbolDefinitionsToolComponent.tsx +++ b/packages/web/src/features/chat/components/chatThread/tools/findSymbolDefinitionsToolComponent.tsx @@ -1,68 +1,22 @@ 'use client'; -import { FindSymbolDefinitionsToolUIPart } from "@/features/chat/tools"; -import { isServiceError } from "@/lib/utils"; -import { useMemo, useState } from "react"; -import { FileListItem, ToolHeader, TreeList } from "./shared"; -import { CodeSnippet } from "@/app/components/codeSnippet"; +import { FindSymbolDefinitionsMetadata, ToolResult } from "@/features/tools"; import { Separator } from "@/components/ui/separator"; -import { BookOpenIcon } from "lucide-react"; +import { VscSymbolMisc } from "react-icons/vsc"; +import { RepoBadge } from "./repoBadge"; - -export const FindSymbolDefinitionsToolComponent = ({ part }: { part: FindSymbolDefinitionsToolUIPart }) => { - const [isExpanded, setIsExpanded] = useState(false); - - const label = useMemo(() => { - switch (part.state) { - case 'input-streaming': - return 'Resolving definition...'; - case 'input-available': - return Resolving definition for {part.input.symbol}; - case 'output-error': - return '"Find symbol definitions" tool call failed'; - case 'output-available': - return Resolved definition for {part.input.symbol}; - } - }, [part]); +export const FindSymbolDefinitionsToolComponent = ({ metadata }: ToolResult) => { + const label = `${metadata.matchCount} ${metadata.matchCount === 1 ? 'definition' : 'definitions'}`; return ( -
- - {part.state === 'output-available' && isExpanded && ( - <> - {isServiceError(part.output) ? ( - - Failed with the following error: {part.output.message} - - ) : ( - <> - {part.output.length === 0 ? ( - No matches found - ) : ( - - {part.output.map((file) => { - return ( - - ) - })} - - )} - - )} - - - )} +
+ Resolved + {metadata.symbol} + in + + + {label} +
- ) -} \ No newline at end of file + ); +}; diff --git a/packages/web/src/features/chat/components/chatThread/tools/findSymbolReferencesToolComponent.tsx b/packages/web/src/features/chat/components/chatThread/tools/findSymbolReferencesToolComponent.tsx index 44dcf763b..d423fe976 100644 --- a/packages/web/src/features/chat/components/chatThread/tools/findSymbolReferencesToolComponent.tsx +++ b/packages/web/src/features/chat/components/chatThread/tools/findSymbolReferencesToolComponent.tsx @@ -1,68 +1,22 @@ 'use client'; -import { FindSymbolReferencesToolUIPart } from "@/features/chat/tools"; -import { isServiceError } from "@/lib/utils"; -import { useMemo, useState } from "react"; -import { FileListItem, ToolHeader, TreeList } from "./shared"; -import { CodeSnippet } from "@/app/components/codeSnippet"; +import { FindSymbolReferencesMetadata, ToolResult } from "@/features/tools"; import { Separator } from "@/components/ui/separator"; -import { BookOpenIcon } from "lucide-react"; +import { VscSymbolMisc } from "react-icons/vsc"; +import { RepoBadge } from "./repoBadge"; - -export const FindSymbolReferencesToolComponent = ({ part }: { part: FindSymbolReferencesToolUIPart }) => { - const [isExpanded, setIsExpanded] = useState(false); - - const label = useMemo(() => { - switch (part.state) { - case 'input-streaming': - return 'Resolving references...'; - case 'input-available': - return Resolving references for {part.input.symbol}; - case 'output-error': - return '"Find symbol references" tool call failed'; - case 'output-available': - return Resolved references for {part.input.symbol}; - } - }, [part]); +export const FindSymbolReferencesToolComponent = ({ metadata }: ToolResult) => { + const label = `${metadata.matchCount} ${metadata.matchCount === 1 ? 'reference' : 'references'}`; return ( -
- - {part.state === 'output-available' && isExpanded && ( - <> - {isServiceError(part.output) ? ( - - Failed with the following error: {part.output.message} - - ) : ( - <> - {part.output.length === 0 ? ( - No matches found - ) : ( - - {part.output.map((file) => { - return ( - - ) - })} - - )} - - )} - - - )} +
+ Resolved + {metadata.symbol} + in + + + {label} +
- ) -} \ No newline at end of file + ); +}; diff --git a/packages/web/src/features/chat/components/chatThread/tools/grepToolComponent.tsx b/packages/web/src/features/chat/components/chatThread/tools/grepToolComponent.tsx new file mode 100644 index 000000000..fe655e482 --- /dev/null +++ b/packages/web/src/features/chat/components/chatThread/tools/grepToolComponent.tsx @@ -0,0 +1,162 @@ +'use client'; + +import { GrepFile, GrepMetadata, GrepRepoInfo, ToolResult } from "@/features/tools"; +import { useMemo } from "react"; +import { RepoBadge } from "./repoBadge"; +import { HoverCard, HoverCardContent, HoverCardTrigger } from "@/components/ui/hover-card"; +import { VscodeFileIcon } from "@/app/components/vscodeFileIcon"; +import { getBrowsePath } from "@/app/[domain]/browse/hooks/utils"; +import { SINGLE_TENANT_ORG_DOMAIN } from "@/lib/constants"; +import { cn, getCodeHostIcon } from "@/lib/utils"; +import Image from "next/image"; +import { Separator } from "@/components/ui/separator"; +import Link from "next/link"; + +export const GrepToolComponent = (output: ToolResult) => { + const stats = useMemo(() => { + const { matchCount, repoCount } = output.metadata; + const matchLabel = `${matchCount} ${matchCount === 1 ? 'match' : 'matches'}`; + if (matchCount === 0 || repoCount === 1) { + return matchLabel; + } + const repoLabel = `${repoCount} ${repoCount === 1 ? 'repo' : 'repos'}`; + return `${matchLabel} · ${repoLabel}`; + }, [output]); + + const filesByRepo = useMemo(() => { + const groups = new Map(); + for (const file of output.metadata.files) { + if (!groups.has(file.repo)) { + groups.set(file.repo, []); + } + groups.get(file.repo)!.push(file); + } + return groups; + }, [output.metadata.files]); + + const singleRepo = output.metadata.repoCount === 1 + ? output.metadata.repoInfoMap[output.metadata.files[0]?.repo] + : undefined; + + return ( + +
+
+ + + Searched + {output.metadata.pattern} + {singleRepo && <>in} + + +
+ {stats} + +
+ {output.metadata.files.length > 0 && ( + +
+ {output.metadata.groupByRepo ? ( + Array.from(filesByRepo.keys()).map((repo) => ( + + )) + ) : ( + Array.from(filesByRepo.entries()).map(([repo, files]) => ( +
+ + {files.map((file) => ( + + ))} +
+ )) + )} +
+
+ )} +
+ ); +} + +const RepoHeader = ({ repo, repoName, isPrimary }: { repo: GrepRepoInfo | undefined; repoName: string; isPrimary: boolean }) => { + const displayName = repo?.displayName ?? repoName.split('/').slice(1).join('/'); + const icon = repo ? getCodeHostIcon(repo.codeHostType) : null; + + const href = getBrowsePath({ + repoName: repoName, + path: '', + pathType: 'tree', + domain: SINGLE_TENANT_ORG_DOMAIN, + }); + + const className = cn("top-0 flex items-center gap-1.5 px-3 py-1.5 text-xs font-medium bg-popover border-b border-border", + { + 'sticky text-muted-foreground': !isPrimary, + 'text-foreground cursor-pointer hover:bg-accent transition-colors': isPrimary, + } + ) + + const Content = ( + <> + {icon && ( + {repo!.codeHostType} + )} + {displayName} + + ) + + if (isPrimary) { + return ( + + {Content} + + ) + } else { + return ( +
+ {Content} +
+ ) + } +} + +const FileRow = ({ file }: { file: GrepFile }) => { + const dir = file.path.includes('/') + ? file.path.split('/').slice(0, -1).join('/') + : ''; + + const href = getBrowsePath({ + repoName: file.repo, + revisionName: file.revision, + path: file.path, + pathType: 'blob', + domain: SINGLE_TENANT_ORG_DOMAIN, + }); + + return ( + + + {file.name} + {dir && ( + <> + · + {dir} + + )} + + ); +} diff --git a/packages/web/src/features/chat/components/chatThread/tools/listCommitsToolComponent.tsx b/packages/web/src/features/chat/components/chatThread/tools/listCommitsToolComponent.tsx index f1cc6890e..3e0d2651d 100644 --- a/packages/web/src/features/chat/components/chatThread/tools/listCommitsToolComponent.tsx +++ b/packages/web/src/features/chat/components/chatThread/tools/listCommitsToolComponent.tsx @@ -1,87 +1,20 @@ 'use client'; -import { ListCommitsToolUIPart } from "@/features/chat/tools"; -import { isServiceError } from "@/lib/utils"; -import { useMemo, useState } from "react"; -import { ToolHeader, TreeList } from "./shared"; -import { CodeSnippet } from "@/app/components/codeSnippet"; +import { ListCommitsMetadata, ToolResult } from "@/features/tools"; +import { RepoBadge } from "./repoBadge"; import { Separator } from "@/components/ui/separator"; -import { GitCommitVerticalIcon } from "lucide-react"; -export const ListCommitsToolComponent = ({ part }: { part: ListCommitsToolUIPart }) => { - const [isExpanded, setIsExpanded] = useState(false); - - const label = useMemo(() => { - switch (part.state) { - case 'input-streaming': - return 'Listing commits...'; - case 'output-error': - return '"List commits" tool call failed'; - case 'input-available': - case 'output-available': - return 'Listed commits'; - } - }, [part]); +export const ListCommitsToolComponent = ({ metadata }: ToolResult) => { + const count = metadata.commits.length; + const label = `${count} ${count === 1 ? 'commit' : 'commits'}`; return ( -
- - {part.state === 'output-available' && isExpanded && ( - <> - {isServiceError(part.output) ? ( - - Failed with the following error: {part.output.message} - - ) : ( - <> - {part.output.commits.length === 0 ? ( - No commits found - ) : ( - -
- Found {part.output.commits.length} of {part.output.totalCount} total commits: -
- {part.output.commits.map((commit) => ( -
-
- -
-
- - {commit.hash.substring(0, 7)} - - {commit.refs && ( - - {commit.refs} - - )} -
-
- {commit.message} -
-
- {commit.author} - - {new Date(commit.date).toLocaleString()} -
-
-
-
- ))} -
- )} - - )} - - - )} +
+ Listed commits in + + + {label} +
- ) -} + ); +}; diff --git a/packages/web/src/features/chat/components/chatThread/tools/listReposToolComponent.tsx b/packages/web/src/features/chat/components/chatThread/tools/listReposToolComponent.tsx index 3639b598e..ba7c8d008 100644 --- a/packages/web/src/features/chat/components/chatThread/tools/listReposToolComponent.tsx +++ b/packages/web/src/features/chat/components/chatThread/tools/listReposToolComponent.tsx @@ -1,66 +1,18 @@ 'use client'; -import { ListReposToolUIPart } from "@/features/chat/tools"; -import { isServiceError } from "@/lib/utils"; -import { useMemo, useState } from "react"; -import { ToolHeader, TreeList } from "./shared"; -import { CodeSnippet } from "@/app/components/codeSnippet"; +import { ListReposMetadata, ToolResult } from "@/features/tools"; import { Separator } from "@/components/ui/separator"; -import { FolderOpenIcon } from "lucide-react"; -export const ListReposToolComponent = ({ part }: { part: ListReposToolUIPart }) => { - const [isExpanded, setIsExpanded] = useState(false); - - const label = useMemo(() => { - switch (part.state) { - case 'input-streaming': - return 'Listing repositories...'; - case 'output-error': - return '"List repositories" tool call failed'; - case 'input-available': - case 'output-available': - return 'Listed repositories'; - } - }, [part]); +export const ListReposToolComponent = ({ metadata }: ToolResult) => { + const count = metadata.repos.length; + const label = `${count}${metadata.totalCount > count ? ` of ${metadata.totalCount}` : ''} ${count === 1 ? 'repo' : 'repos'}`; return ( -
- - {part.state === 'output-available' && isExpanded && ( - <> - {isServiceError(part.output) ? ( - - Failed with the following error: {part.output.message} - - ) : ( - <> - {part.output.length === 0 ? ( - No repositories found - ) : ( - -
- Found {part.output.length} repositories: -
- {part.output.map((repoName, index) => ( -
- - {repoName} -
- ))} -
- )} - - )} - - - )} +
+ Listed repositories + + {label} +
- ) -} \ No newline at end of file + ); +}; diff --git a/packages/web/src/features/chat/components/chatThread/tools/listTreeToolComponent.tsx b/packages/web/src/features/chat/components/chatThread/tools/listTreeToolComponent.tsx new file mode 100644 index 000000000..bc49c56ef --- /dev/null +++ b/packages/web/src/features/chat/components/chatThread/tools/listTreeToolComponent.tsx @@ -0,0 +1,37 @@ +'use client'; + +import { ListTreeMetadata, ToolResult } from "@/features/tools"; +import { RepoBadge } from "./repoBadge"; +import { Separator } from "@/components/ui/separator"; +import { getBrowsePath } from "@/app/[domain]/browse/hooks/utils"; +import { SINGLE_TENANT_ORG_DOMAIN } from "@/lib/constants"; +import { FolderIcon } from "lucide-react"; +import Link from "next/link"; + +export const ListTreeToolComponent = ({ metadata }: ToolResult) => { + return ( +
+ Listed + e.stopPropagation()} + className="inline-flex items-center gap-1 text-xs bg-muted hover:bg-accent px-1.5 py-0.5 rounded truncate text-foreground font-medium transition-colors min-w-0" + > + + {metadata.path || '/'} + + in + + + {metadata.totalReturned} {metadata.totalReturned === 1 ? 'entry' : 'entries'}{metadata.truncated ? ' (truncated)' : ''} + + +
+ ); +}; diff --git a/packages/web/src/features/chat/components/chatThread/tools/readFileToolComponent.tsx b/packages/web/src/features/chat/components/chatThread/tools/readFileToolComponent.tsx new file mode 100644 index 000000000..44e207683 --- /dev/null +++ b/packages/web/src/features/chat/components/chatThread/tools/readFileToolComponent.tsx @@ -0,0 +1,48 @@ +'use client'; + +import { ReadFileMetadata, ToolResult } from "@/features/tools"; +import { VscodeFileIcon } from "@/app/components/vscodeFileIcon"; +import { getBrowsePath } from "@/app/[domain]/browse/hooks/utils"; +import { SINGLE_TENANT_ORG_DOMAIN } from "@/lib/constants"; +import { Separator } from "@/components/ui/separator"; +import Link from "next/link"; +import { RepoBadge } from "./repoBadge"; + +export const ReadFileToolComponent = ({ metadata }: ToolResult) => { + const fileName = metadata.path.split('/').pop() ?? metadata.path; + const href = getBrowsePath({ + repoName: metadata.repo, + revisionName: metadata.revision, + path: metadata.path, + pathType: 'blob', + domain: SINGLE_TENANT_ORG_DOMAIN, + highlightRange: (metadata.isTruncated || metadata.startLine > 1) ? { + start: { lineNumber: metadata.startLine }, + end: { lineNumber: metadata.endLine }, + } : undefined, + }); + + const linesRead = metadata.endLine - metadata.startLine + 1; + + return ( +
+ Read + e.stopPropagation()} + > + + {fileName} + {(metadata.isTruncated || metadata.startLine > 1) && ( + L{metadata.startLine}-{metadata.endLine} + )} + + in + + + {linesRead} {linesRead === 1 ? 'line' : 'lines'} + +
+ ); +} diff --git a/packages/web/src/features/chat/components/chatThread/tools/readFilesToolComponent.tsx b/packages/web/src/features/chat/components/chatThread/tools/readFilesToolComponent.tsx deleted file mode 100644 index a31ae75b4..000000000 --- a/packages/web/src/features/chat/components/chatThread/tools/readFilesToolComponent.tsx +++ /dev/null @@ -1,60 +0,0 @@ -'use client'; - -import { CodeSnippet } from "@/app/components/codeSnippet"; -import { Separator } from "@/components/ui/separator"; -import { ReadFilesToolUIPart } from "@/features/chat/tools"; -import { isServiceError } from "@/lib/utils"; -import { EyeIcon } from "lucide-react"; -import { useMemo, useState } from "react"; -import { FileListItem, ToolHeader, TreeList } from "./shared"; - -export const ReadFilesToolComponent = ({ part }: { part: ReadFilesToolUIPart }) => { - const [isExpanded, setIsExpanded] = useState(false); - - const label = useMemo(() => { - switch (part.state) { - case 'input-streaming': - return 'Reading...'; - case 'input-available': - return `Reading ${part.input.paths.length} file${part.input.paths.length === 1 ? '' : 's'}...`; - case 'output-error': - return 'Tool call failed'; - case 'output-available': - if (isServiceError(part.output)) { - return 'Failed to read files'; - } - return `Read ${part.output.length} file${part.output.length === 1 ? '' : 's'}`; - } - }, [part]); - - return ( -
- - {part.state === 'output-available' && isExpanded && ( - <> - - {isServiceError(part.output) ? ( - Failed with the following error: {part.output.message} - ) : part.output.map((file) => { - return ( - - ) - })} - - - - )} -
- ) -} diff --git a/packages/web/src/features/chat/components/chatThread/tools/repoBadge.tsx b/packages/web/src/features/chat/components/chatThread/tools/repoBadge.tsx new file mode 100644 index 000000000..68b69bfef --- /dev/null +++ b/packages/web/src/features/chat/components/chatThread/tools/repoBadge.tsx @@ -0,0 +1,29 @@ +'use client'; + +import { getBrowsePath } from "@/app/[domain]/browse/hooks/utils"; +import { SINGLE_TENANT_ORG_DOMAIN } from "@/lib/constants"; +import { getCodeHostIcon } from "@/lib/utils"; +import { CodeHostType } from "@sourcebot/db"; +import Image from "next/image"; +import Link from "next/link"; + +export const RepoBadge = ({ repo }: { repo: { name: string; displayName: string; codeHostType: CodeHostType } }) => { + const icon = getCodeHostIcon(repo.codeHostType); + const href = getBrowsePath({ + repoName: repo.name, + path: '', + pathType: 'tree', + domain: SINGLE_TENANT_ORG_DOMAIN, + }); + + return ( + e.stopPropagation()} + className="inline-flex items-center gap-1 px-1.5 py-0.5 rounded bg-muted hover:bg-accent text-xs font-medium transition-colors text-foreground max-w-[300px] overflow-hidden" + > + {repo.codeHostType} + {repo.displayName.split('/').pop()} + + ); +} diff --git a/packages/web/src/features/chat/components/chatThread/tools/searchCodeToolComponent.tsx b/packages/web/src/features/chat/components/chatThread/tools/searchCodeToolComponent.tsx deleted file mode 100644 index ca266c599..000000000 --- a/packages/web/src/features/chat/components/chatThread/tools/searchCodeToolComponent.tsx +++ /dev/null @@ -1,74 +0,0 @@ -'use client'; - -import { SearchCodeToolUIPart } from "@/features/chat/tools"; -import { isServiceError } from "@/lib/utils"; -import { useMemo, useState } from "react"; -import { FileListItem, ToolHeader, TreeList } from "./shared"; -import { CodeSnippet } from "@/app/components/codeSnippet"; -import { Separator } from "@/components/ui/separator"; -import { SearchIcon } from "lucide-react"; - -export const SearchCodeToolComponent = ({ part }: { part: SearchCodeToolUIPart }) => { - const [isExpanded, setIsExpanded] = useState(false); - - const displayQuery = useMemo(() => { - if (part.state !== 'input-available' && part.state !== 'output-available') { - return ''; - } - - return part.input.query; - }, [part]); - - const label = useMemo(() => { - switch (part.state) { - case 'input-streaming': - return 'Searching...'; - case 'output-error': - return '"Search code" tool call failed'; - case 'input-available': - case 'output-available': - return Searched for {displayQuery}; - } - }, [part, displayQuery]); - - return ( -
- - {part.state === 'output-available' && isExpanded && ( - <> - {isServiceError(part.output) ? ( - - Failed with the following error: {part.output.message} - - ) : ( - <> - {part.output.files.length === 0 ? ( - No matches found - ) : ( - - {part.output.files.map((file) => { - return ( - - ) - })} - - )} - - )} - - - )} -
- ) -} \ No newline at end of file diff --git a/packages/web/src/features/chat/components/chatThread/tools/shared.tsx b/packages/web/src/features/chat/components/chatThread/tools/shared.tsx deleted file mode 100644 index 92c2bf3fa..000000000 --- a/packages/web/src/features/chat/components/chatThread/tools/shared.tsx +++ /dev/null @@ -1,135 +0,0 @@ -'use client'; - -import { VscodeFileIcon } from '@/app/components/vscodeFileIcon'; -import { ScrollArea } from '@/components/ui/scroll-area'; -import { cn } from '@/lib/utils'; -import { ChevronDown, ChevronRight, Loader2 } from 'lucide-react'; -import Link from 'next/link'; -import React from 'react'; -import { getBrowsePath } from "@/app/[domain]/browse/hooks/utils"; -import { SINGLE_TENANT_ORG_DOMAIN } from "@/lib/constants"; - - -export const FileListItem = ({ - path, - repoName, -}: { - path: string, - repoName: string, -}) => { - return ( -
- - - {path} - -
- ) -} - -export const TreeList = ({ children }: { children: React.ReactNode }) => { - const childrenArray = React.Children.toArray(children); - - return ( - - {/* vertical line */} -
0 ? `${100 / childrenArray.length * 0.6}%` : '0' - }} - /> - - {childrenArray.map((child, index) => { - const isLast = index === childrenArray.length - 1; - - return ( -
- {!isLast && ( -
- )} - {isLast && ( -
- )} - -
{child}
-
- ) - })} - - ); -}; - -interface ToolHeaderProps { - isLoading: boolean; - isError: boolean; - isExpanded: boolean; - label: React.ReactNode; - Icon: React.ElementType; - onExpand: (isExpanded: boolean) => void; - className?: string; -} - -export const ToolHeader = ({ isLoading, isError, isExpanded, label, Icon, onExpand, className }: ToolHeaderProps) => { - return ( -
{ - onExpand(!isExpanded) - }} - onKeyDown={(e) => { - if (e.key !== "Enter") { - return; - } - onExpand(!isExpanded); - }} - > - {isLoading ? ( - - ) : ( - - )} - - {label} - - {!isLoading && ( -
- {isExpanded ? ( - - ) : ( - - )} -
- )} -
- ) -} \ No newline at end of file diff --git a/packages/web/src/features/chat/components/chatThread/tools/toolOutputGuard.tsx b/packages/web/src/features/chat/components/chatThread/tools/toolOutputGuard.tsx new file mode 100644 index 000000000..c682cfcad --- /dev/null +++ b/packages/web/src/features/chat/components/chatThread/tools/toolOutputGuard.tsx @@ -0,0 +1,110 @@ +'use client'; + +import { SBChatMessageToolTypes } from "@/features/chat/types"; +import { CopyIconButton } from "@/app/[domain]/components/copyIconButton"; +import { ToolUIPart } from "ai"; +import { ChevronDown } from "lucide-react"; +import { cn } from "@/lib/utils"; +import { useCallback, useState } from "react"; + +export const ToolOutputGuard = >({ + part, + loadingText, + children, +}: { + part: T, + loadingText: string, + children: (output: Extract['output']) => React.ReactNode, +}) => { + const [isExpanded, setIsExpanded] = useState(false); + + const onToggle = useCallback(() => setIsExpanded(v => !v), []); + + const hasInput = part.state !== 'input-streaming'; + + const requestText = hasInput ? JSON.stringify(part.input, null, 2) : ''; + const responseText = part.state === 'output-available' + ? (() => { + const raw = (part.output as { output: string }).output; + try { + return JSON.stringify(JSON.parse(raw), null, 2); + } catch { + return raw; + } + })() + : part.state === 'output-error' + ? (part.errorText ?? '') + : undefined; + + const onCopyRequest = useCallback(() => { + navigator.clipboard.writeText(requestText); + return true; + }, [requestText]); + + const onCopyResponse = useCallback(() => { + if (!responseText) { + return false; + } + navigator.clipboard.writeText(responseText); + return true; + }, [responseText]); + + return ( +
+
+
+ {part.state === 'output-error' ? ( + + {part.title!} failed with error: {part.errorText} + + ) : part.state !== 'output-available' ? ( + + {loadingText} + + ) : ( + children(part.output as Extract['output']) + )} +
+ {hasInput && } +
+ {hasInput && isExpanded && ( +
+ +
+                            {requestText}
+                        
+
+ {responseText !== undefined && ( + <> +
+ +
+                                    {responseText}
+                                
+
+ + )} +
+ )} +
+ ); +} + +const ExpandButton = ({ isExpanded, onToggle }: { isExpanded: boolean; onToggle: () => void }) => ( + +); + +const ResultSection = ({ label, onCopy, children }: { label: string; onCopy: () => boolean; children: React.ReactNode }) => ( +
+
+ {label} + +
+
+ {children} +
+
+); diff --git a/packages/web/src/features/chat/constants.ts b/packages/web/src/features/chat/constants.ts index aca101a3c..b2e5de742 100644 --- a/packages/web/src/features/chat/constants.ts +++ b/packages/web/src/features/chat/constants.ts @@ -1,5 +1,3 @@ -import { SBChatMessagePart } from "./types"; - export const FILE_REFERENCE_PREFIX = '@file:'; export const FILE_REFERENCE_REGEX = new RegExp( // @file:{repoName::fileName:startLine-endLine} @@ -8,24 +6,3 @@ export const FILE_REFERENCE_REGEX = new RegExp( ); export const ANSWER_TAG = ''; - -export const toolNames = { - searchCode: 'searchCode', - readFiles: 'readFiles', - findSymbolReferences: 'findSymbolReferences', - findSymbolDefinitions: 'findSymbolDefinitions', - listRepos: 'listRepos', - listCommits: 'listCommits', -} as const; - -// These part types are visible in the UI. -export const uiVisiblePartTypes: SBChatMessagePart['type'][] = [ - 'reasoning', - 'text', - 'tool-searchCode', - 'tool-readFiles', - 'tool-findSymbolDefinitions', - 'tool-findSymbolReferences', - 'tool-listRepos', - 'tool-listCommits', -] as const; \ No newline at end of file diff --git a/packages/web/src/features/chat/logger.ts b/packages/web/src/features/chat/logger.ts new file mode 100644 index 000000000..bbd1b7001 --- /dev/null +++ b/packages/web/src/features/chat/logger.ts @@ -0,0 +1,3 @@ +import { createLogger } from "@sourcebot/shared"; + +export const logger = createLogger('ask-agent'); \ No newline at end of file diff --git a/packages/web/src/features/chat/tools.ts b/packages/web/src/features/chat/tools.ts index 713f31722..d3673a7b8 100644 --- a/packages/web/src/features/chat/tools.ts +++ b/packages/web/src/features/chat/tools.ts @@ -1,312 +1,31 @@ -import { z } from "zod" -import { search } from "@/features/search" -import { InferToolInput, InferToolOutput, InferUITool, tool, ToolUIPart } from "ai"; -import { isServiceError } from "@/lib/utils"; -import { FileSourceResponse, getFileSource, listCommits } from '@/features/git'; -import { findSearchBasedSymbolDefinitions, findSearchBasedSymbolReferences } from "../codeNav/api"; -import { addLineNumbers } from "./utils"; -import { toolNames } from "./constants"; -import { listReposQueryParamsSchema } from "@/lib/schemas"; -import { ListReposQueryParams } from "@/lib/types"; -import { listRepos } from "@/app/api/(server)/repos/listReposApi"; -import escapeStringRegexp from "escape-string-regexp"; - -// @NOTE: When adding a new tool, follow these steps: -// 1. Add the tool to the `toolNames` constant in `constants.ts`. -// 2. Add the tool to the `SBChatMessageToolTypes` type in `types.ts`. -// 3. Add the tool to the `tools` prop in `agent.ts`. -// 4. If the tool is meant to be rendered in the UI: -// - Add the tool to the `uiVisiblePartTypes` constant in `constants.ts`. -// - Add the tool's component to the `DetailsCard` switch statement in `detailsCard.tsx`. -// -// - bk, 2025-07-25 - - -export const findSymbolReferencesTool = tool({ - description: `Finds references to a symbol in the codebase.`, - inputSchema: z.object({ - symbol: z.string().describe("The symbol to find references to"), - language: z.string().describe("The programming language of the symbol"), - repository: z.string().describe("The repository to scope the search to").optional(), - }), - execute: async ({ symbol, language, repository }) => { - // @todo: make revision configurable. - const revision = "HEAD"; - - const response = await findSearchBasedSymbolReferences({ - symbolName: symbol, - language, - revisionName: "HEAD", - repoName: repository, - }); - - if (isServiceError(response)) { - return response; - } - - return response.files.map((file) => ({ - fileName: file.fileName, - repository: file.repository, - language: file.language, - matches: file.matches.map(({ lineContent, range }) => { - return addLineNumbers(lineContent, range.start.lineNumber); - }), - revision, - })); - }, -}); - -export type FindSymbolReferencesTool = InferUITool; -export type FindSymbolReferencesToolInput = InferToolInput; -export type FindSymbolReferencesToolOutput = InferToolOutput; -export type FindSymbolReferencesToolUIPart = ToolUIPart<{ [toolNames.findSymbolReferences]: FindSymbolReferencesTool }> - -export const findSymbolDefinitionsTool = tool({ - description: `Finds definitions of a symbol in the codebase.`, - inputSchema: z.object({ - symbol: z.string().describe("The symbol to find definitions of"), - language: z.string().describe("The programming language of the symbol"), - repository: z.string().describe("The repository to scope the search to").optional(), - }), - execute: async ({ symbol, language, repository }) => { - // @todo: make revision configurable. - const revision = "HEAD"; - - const response = await findSearchBasedSymbolDefinitions({ - symbolName: symbol, - language, - revisionName: revision, - repoName: repository, - }); - - if (isServiceError(response)) { - return response; - } - - return response.files.map((file) => ({ - fileName: file.fileName, - repository: file.repository, - language: file.language, - matches: file.matches.map(({ lineContent, range }) => { - return addLineNumbers(lineContent, range.start.lineNumber); - }), - revision, - })); - } -}); - -export type FindSymbolDefinitionsTool = InferUITool; -export type FindSymbolDefinitionsToolInput = InferToolInput; -export type FindSymbolDefinitionsToolOutput = InferToolOutput; -export type FindSymbolDefinitionsToolUIPart = ToolUIPart<{ [toolNames.findSymbolDefinitions]: FindSymbolDefinitionsTool }> - -export const readFilesTool = tool({ - description: `Reads the contents of multiple files at the given paths.`, - inputSchema: z.object({ - paths: z.array(z.string()).describe("The paths to the files to read"), - repository: z.string().describe("The repository to read the files from"), - }), - execute: async ({ paths, repository }) => { - // @todo: make revision configurable. - const revision = "HEAD"; - - const responses = await Promise.all(paths.map(async (path) => { - return getFileSource({ - path, - repo: repository, - ref: revision, - }, { source: 'sourcebot-ask-agent' }); - })); - - if (responses.some(isServiceError)) { - const firstError = responses.find(isServiceError); - return firstError!; - } - - return (responses as FileSourceResponse[]).map((response) => ({ - path: response.path, - repository: response.repo, - language: response.language, - source: addLineNumbers(response.source), - revision, - })); - } -}); - -export type ReadFilesTool = InferUITool; -export type ReadFilesToolInput = InferToolInput; -export type ReadFilesToolOutput = InferToolOutput; -export type ReadFilesToolUIPart = ToolUIPart<{ [toolNames.readFiles]: ReadFilesTool }> - -const DEFAULT_SEARCH_LIMIT = 100; - -export const createCodeSearchTool = (selectedRepos: string[]) => tool({ - description: `Searches for code that matches the provided search query as a substring by default, or as a regular expression if useRegex is true. Useful for exploring remote repositories by searching for exact symbols, functions, variables, or specific code patterns. To determine if a repository is indexed, use the \`listRepos\` tool. By default, searches are global and will search the default branch of all repositories. Searches can be scoped to specific repositories, languages, and branches.`, - inputSchema: z.object({ - query: z - .string() - .describe(`The search pattern to match against code contents. Do not escape quotes in your query.`) - // Escape backslashes first, then quotes, and wrap in double quotes - // so the query is treated as a literal phrase (like grep). - .transform((val) => { - const escaped = val.replace(/\\/g, '\\\\').replace(/"/g, '\\"'); - return `"${escaped}"`; - }), - useRegex: z - .boolean() - .describe(`Whether to use regular expression matching to match the search query against code contents. When false, substring matching is used. (default: false)`) - .optional(), - filterByRepos: z - .array(z.string()) - .describe(`Scope the search to the provided repositories.`) - .optional(), - filterByLanguages: z - .array(z.string()) - .describe(`Scope the search to the provided languages.`) - .optional(), - filterByFilepaths: z - .array(z.string()) - .describe(`Scope the search to the provided filepaths. Each filepath is a regular expression matched against the full file path.`) - .optional(), - caseSensitive: z - .boolean() - .describe(`Whether the search should be case sensitive (default: false).`) - .optional(), - ref: z - .string() - .describe(`Commit SHA, branch or tag name to search on. If not provided, defaults to the default branch (usually 'main' or 'master').`) - .optional(), - limit: z - .number() - .default(DEFAULT_SEARCH_LIMIT) - .describe(`Maximum number of matches to return (default: ${DEFAULT_SEARCH_LIMIT})`) - .optional(), - }), - execute: async ({ - query, - useRegex = false, - filterByRepos: repos = [], - filterByLanguages: languages = [], - filterByFilepaths: filepaths = [], - caseSensitive = false, - ref, - limit = DEFAULT_SEARCH_LIMIT, - }) => { - - if (selectedRepos.length > 0) { - query += ` reposet:${selectedRepos.join(',')}`; - } - - if (repos.length > 0) { - query += ` (repo:${repos.map(id => escapeStringRegexp(id)).join(' or repo:')})`; - } - - if (languages.length > 0) { - query += ` (lang:${languages.join(' or lang:')})`; - } - - if (filepaths.length > 0) { - query += ` (file:${filepaths.join(' or file:')})`; - } - - if (ref) { - query += ` (rev:${ref})`; - } - - const response = await search({ - queryType: 'string', - query, - options: { - matches: limit, - contextLines: 3, - isCaseSensitivityEnabled: caseSensitive, - isRegexEnabled: useRegex, - }, - source: 'sourcebot-ask-agent', - }); - - if (isServiceError(response)) { - return response; - } - - return { - files: response.files.map((file) => ({ - fileName: file.fileName.text, - repository: file.repository, - language: file.language, - matches: file.chunks.map(({ content, contentStart }) => { - return addLineNumbers(content, contentStart.lineNumber); - }), - // @todo: make revision configurable. - revision: 'HEAD', - })), - query, - } - }, -}); - -export type SearchCodeTool = InferUITool>; -export type SearchCodeToolInput = InferToolInput>; -export type SearchCodeToolOutput = InferToolOutput>; -export type SearchCodeToolUIPart = ToolUIPart<{ [toolNames.searchCode]: SearchCodeTool }>; - -export const listReposTool = tool({ - description: 'Lists repositories in the organization with optional filtering and pagination.', - inputSchema: listReposQueryParamsSchema, - execute: async (request: ListReposQueryParams) => { - const reposResponse = await listRepos({ ...request, source: 'sourcebot-ask-agent' }); - - if (isServiceError(reposResponse)) { - return reposResponse; - } - - return reposResponse.data.map((repo) => repo.repoName); - } -}); - -export type ListReposTool = InferUITool; -export type ListReposToolInput = InferToolInput; -export type ListReposToolOutput = InferToolOutput; -export type ListReposToolUIPart = ToolUIPart<{ [toolNames.listRepos]: ListReposTool }>; - -export const listCommitsTool = tool({ - description: 'Lists commits in a repository with optional filtering by date range, author, and commit message.', - inputSchema: z.object({ - repository: z.string().describe("The repository to list commits from"), - query: z.string().describe("Search query to filter commits by message (case-insensitive)").optional(), - since: z.string().describe("Start date for commit range (e.g., '30 days ago', '2024-01-01', 'last week')").optional(), - until: z.string().describe("End date for commit range (e.g., 'yesterday', '2024-12-31', 'today')").optional(), - author: z.string().describe("Filter commits by author name or email (case-insensitive)").optional(), - maxCount: z.number().describe("Maximum number of commits to return (default: 50)").optional(), - }), - execute: async ({ repository, query, since, until, author, maxCount }) => { - const response = await listCommits({ - repo: repository, - query, - since, - until, - author, - maxCount, - }); - - if (isServiceError(response)) { - return response; - } - - return { - commits: response.commits.map((commit) => ({ - hash: commit.hash, - date: commit.date, - message: commit.message, - author: `${commit.author_name} <${commit.author_email}>`, - refs: commit.refs, - })), - totalCount: response.totalCount, - }; - } +import { + toVercelAITool, + readFileDefinition, + listCommitsDefinition, + listReposDefinition, + grepDefinition, + findSymbolReferencesDefinition, + findSymbolDefinitionsDefinition, + listTreeDefinition, +} from "@/features/tools"; +import { ToolContext } from "@/features/tools/types"; +import { ToolUIPart } from "ai"; +import { SBChatMessageToolTypes } from "./types"; + +export const createTools = (context: ToolContext) => ({ + [readFileDefinition.name]: toVercelAITool(readFileDefinition, context), + [listCommitsDefinition.name]: toVercelAITool(listCommitsDefinition, context), + [listReposDefinition.name]: toVercelAITool(listReposDefinition, context), + [grepDefinition.name]: toVercelAITool(grepDefinition, context), + [findSymbolReferencesDefinition.name]: toVercelAITool(findSymbolReferencesDefinition, context), + [findSymbolDefinitionsDefinition.name]: toVercelAITool(findSymbolDefinitionsDefinition, context), + [listTreeDefinition.name]: toVercelAITool(listTreeDefinition, context), }); -export type ListCommitsTool = InferUITool; -export type ListCommitsToolInput = InferToolInput; -export type ListCommitsToolOutput = InferToolOutput; -export type ListCommitsToolUIPart = ToolUIPart<{ [toolNames.listCommits]: ListCommitsTool }>; +export type ReadFileToolUIPart = ToolUIPart<{ read_file: SBChatMessageToolTypes['read_file'] }>; +export type ListCommitsToolUIPart = ToolUIPart<{ list_commits: SBChatMessageToolTypes['list_commits'] }>; +export type ListReposToolUIPart = ToolUIPart<{ list_repos: SBChatMessageToolTypes['list_repos'] }>; +export type GrepToolUIPart = ToolUIPart<{ grep: SBChatMessageToolTypes['grep'] }>; +export type FindSymbolReferencesToolUIPart = ToolUIPart<{ find_symbol_references: SBChatMessageToolTypes['find_symbol_references'] }>; +export type FindSymbolDefinitionsToolUIPart = ToolUIPart<{ find_symbol_definitions: SBChatMessageToolTypes['find_symbol_definitions'] }>; +export type ListTreeToolUIPart = ToolUIPart<{ list_tree: SBChatMessageToolTypes['list_tree'] }>; diff --git a/packages/web/src/features/chat/types.ts b/packages/web/src/features/chat/types.ts index 9411f850f..11fa7f360 100644 --- a/packages/web/src/features/chat/types.ts +++ b/packages/web/src/features/chat/types.ts @@ -1,18 +1,16 @@ -import { CreateUIMessage, UIMessage, UIMessagePart } from "ai"; +import { CreateUIMessage, InferUITool, UIMessage, UIMessagePart } from "ai"; import { BaseEditor, Descendant } from "slate"; import { HistoryEditor } from "slate-history"; import { ReactEditor, RenderElementProps } from "slate-react"; import { z } from "zod"; -import { FindSymbolDefinitionsTool, FindSymbolReferencesTool, ReadFilesTool, SearchCodeTool, ListReposTool, ListCommitsTool } from "./tools"; -import { toolNames } from "./constants"; import { LanguageModel } from "@sourcebot/schemas/v3/index.type"; +import { createTools } from "./tools"; const fileSourceSchema = z.object({ type: z.literal('file'), repo: z.string(), path: z.string(), name: z.string(), - language: z.string(), revision: z.string(), }); export type FileSource = z.infer; @@ -79,13 +77,8 @@ export const sbChatMessageMetadataSchema = z.object({ export type SBChatMessageMetadata = z.infer; export type SBChatMessageToolTypes = { - [toolNames.searchCode]: SearchCodeTool, - [toolNames.readFiles]: ReadFilesTool, - [toolNames.findSymbolReferences]: FindSymbolReferencesTool, - [toolNames.findSymbolDefinitions]: FindSymbolDefinitionsTool, - [toolNames.listRepos]: ListReposTool, - [toolNames.listCommits]: ListCommitsTool, -} + [K in keyof ReturnType]: InferUITool[K]>; +}; export type SBChatMessageDataParts = { // The `source` data type allows us to know what sources the LLM saw diff --git a/packages/web/src/features/chat/utils.ts b/packages/web/src/features/chat/utils.ts index f77325c4d..a1c9fd9f4 100644 --- a/packages/web/src/features/chat/utils.ts +++ b/packages/web/src/features/chat/utils.ts @@ -174,7 +174,7 @@ export const resetEditor = (editor: CustomEditor) => { } export const addLineNumbers = (source: string, lineOffset = 1) => { - return source.split('\n').map((line, index) => `${index + lineOffset}:${line}`).join('\n'); + return source.split('\n').map((line, index) => `${index + lineOffset}: ${line}`).join('\n'); } export const createUIMessage = (text: string, mentions: MentionData[], selectedSearchScopes: SearchScope[]): CreateUIMessage => { @@ -187,7 +187,6 @@ export const createUIMessage = (text: string, mentions: MentionData[], selectedS path: mention.path, repo: mention.repo, name: mention.name, - language: mention.language, revision: mention.revision, } return fileSource; @@ -338,8 +337,13 @@ export const getAnswerPartFromAssistantMessage = (message: SBChatMessage, isStre const lastTextPart = message.parts .findLast((part) => part.type === 'text') - if (lastTextPart?.text.startsWith(ANSWER_TAG)) { - return lastTextPart; + if (lastTextPart?.text.includes(ANSWER_TAG)) { + const answerIndex = lastTextPart.text.indexOf(ANSWER_TAG); + const answer = lastTextPart.text.substring(answerIndex + ANSWER_TAG.length); + return { + ...lastTextPart, + text: answer + }; } // If the agent did not include the answer tag, then fallback to using the last text part. diff --git a/packages/web/src/features/codeNav/api.ts b/packages/web/src/features/codeNav/api.ts index 2d0e92364..fe7a44e54 100644 --- a/packages/web/src/features/codeNav/api.ts +++ b/packages/web/src/features/codeNav/api.ts @@ -22,8 +22,6 @@ export const findSearchBasedSymbolReferences = async (props: FindRelatedSymbolsR repoName, } = props; - const languageFilter = getExpandedLanguageFilter(language); - const query: QueryIR = { and: { children: [ @@ -41,7 +39,7 @@ export const findSearchBasedSymbolReferences = async (props: FindRelatedSymbolsR exact: true, } }, - languageFilter, + ...(language ? [getExpandedLanguageFilter(language)] : []), ...(repoName ? [{ repo: { regexp: `^${escapeStringRegexp(repoName)}$`, @@ -78,8 +76,6 @@ export const findSearchBasedSymbolDefinitions = async (props: FindRelatedSymbols repoName } = props; - const languageFilter = getExpandedLanguageFilter(language); - const query: QueryIR = { and: { children: [ @@ -101,7 +97,7 @@ export const findSearchBasedSymbolDefinitions = async (props: FindRelatedSymbols exact: true, } }, - languageFilter, + ...(language ? [getExpandedLanguageFilter(language)] : []), ...(repoName ? [{ repo: { regexp: `^${escapeStringRegexp(repoName)}$`, diff --git a/packages/web/src/features/codeNav/types.ts b/packages/web/src/features/codeNav/types.ts index d3b789471..59fd7b22f 100644 --- a/packages/web/src/features/codeNav/types.ts +++ b/packages/web/src/features/codeNav/types.ts @@ -3,7 +3,7 @@ import { rangeSchema, repositoryInfoSchema } from "../search/types"; export const findRelatedSymbolsRequestSchema = z.object({ symbolName: z.string(), - language: z.string(), + language: z.string().optional(), /** * Optional revision name to scope search to. * If not provided, the search will be scoped to HEAD. diff --git a/packages/web/src/features/git/getFileSourceApi.ts b/packages/web/src/features/git/getFileSourceApi.ts index 03ecbaef2..401461981 100644 --- a/packages/web/src/features/git/getFileSourceApi.ts +++ b/packages/web/src/features/git/getFileSourceApi.ts @@ -6,7 +6,7 @@ import { detectLanguageFromFilename } from '@/lib/languageDetection'; import { ServiceError, notFound, fileNotFound, invalidGitRef, unexpectedError } from '@/lib/serviceError'; import { getCodeHostBrowseFileAtBranchUrl } from '@/lib/utils'; import { withOptionalAuthV2 } from '@/withAuthV2'; -import { getRepoPath } from '@sourcebot/shared'; +import { env, getRepoPath } from '@sourcebot/shared'; import { headers } from 'next/headers'; import simpleGit from 'simple-git'; import type z from 'zod'; @@ -66,13 +66,6 @@ export const getFileSource = async ({ path: filePath, repo: repoName, ref }: Fil } const language = detectLanguageFromFilename(filePath); - const webUrl = getBrowsePath({ - repoName: repo.name, - revisionName: ref, - path: filePath, - pathType: 'blob', - domain: SINGLE_TENANT_ORG_DOMAIN, - }); const externalWebUrl = getCodeHostBrowseFileAtBranchUrl({ webUrl: repo.webUrl, codeHostType: repo.external_codeHostType, @@ -80,6 +73,15 @@ export const getFileSource = async ({ path: filePath, repo: repoName, ref }: Fil filePath, }); + const baseUrl = env.AUTH_URL; + const webUrl = `${baseUrl}${getBrowsePath({ + repoName: repo.name, + revisionName: ref, + path: filePath, + pathType: 'blob', + domain: SINGLE_TENANT_ORG_DOMAIN, + })}`; + return { source: fileContent, language, diff --git a/packages/web/src/features/git/listCommitsApi.ts b/packages/web/src/features/git/listCommitsApi.ts index 27baf096e..405dcaf9a 100644 --- a/packages/web/src/features/git/listCommitsApi.ts +++ b/packages/web/src/features/git/listCommitsApi.ts @@ -28,6 +28,7 @@ type ListCommitsRequest = { until?: string; author?: string; ref?: string; + path?: string; maxCount?: number; skip?: number; } @@ -46,6 +47,7 @@ export const listCommits = async ({ until, author, ref = 'HEAD', + path, maxCount = 50, skip = 0, }: ListCommitsRequest): Promise => sew(() => @@ -93,19 +95,31 @@ export const listCommits = async ({ } : {}), }; + // Build args array directly to ensure correct ordering: + // git log [flags] [-- ] + const logArgs: string[] = [`--max-count=${maxCount}`]; + if (skip > 0) { + logArgs.push(`--skip=${skip}`); + } + for (const [key, value] of Object.entries(sharedOptions)) { + logArgs.push(value !== null ? `${key}=${value}` : key); + } + logArgs.push(ref); + if (path) { + logArgs.push('--', path); + } + // First, get the commits - const log = await git.log({ - [ref]: null, - maxCount, - ...(skip > 0 ? { '--skip': skip } : {}), - ...sharedOptions, - }); + const log = await git.log(logArgs); // Then, use rev-list to get the total count of commits const countArgs = ['rev-list', '--count', ref]; for (const [key, value] of Object.entries(sharedOptions)) { countArgs.push(value !== null ? `${key}=${value}` : key); } + if (path) { + countArgs.push('--', path); + } const totalCount = parseInt((await git.raw(countArgs)).trim(), 10); diff --git a/packages/web/src/features/mcp/server.ts b/packages/web/src/features/mcp/server.ts index 4353de0f4..390334ca2 100644 --- a/packages/web/src/features/mcp/server.ts +++ b/packages/web/src/features/mcp/server.ts @@ -1,494 +1,57 @@ -import { listRepos } from '@/app/api/(server)/repos/listReposApi'; -import { getConfiguredLanguageModels, getConfiguredLanguageModelsInfo } from "../chat/utils.server"; -import { askCodebase } from '@/features/mcp/askCodebase'; import { languageModelInfoSchema, } from '@/features/chat/types'; -import { getFileSource, getTree, listCommits } from '@/features/git'; -import { search } from '@/features/search/searchApi'; +import { askCodebase } from '@/features/mcp/askCodebase'; import { isServiceError } from '@/lib/utils'; import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { ChatVisibility } from '@sourcebot/db'; import { SOURCEBOT_VERSION } from '@sourcebot/shared'; import _dedent from 'dedent'; -import escapeStringRegexp from 'escape-string-regexp'; import { z } from 'zod'; +import { getConfiguredLanguageModelsInfo } from "../chat/utils.server"; import { - ListTreeEntry, - TextContent, -} from './types'; -import { buildTreeNodeIndex, joinTreePath, normalizeTreePath, sortTreeEntries } from './utils'; + findSymbolDefinitionsDefinition, + findSymbolReferencesDefinition, + listCommitsDefinition, + listReposDefinition, + listTreeDefinition, + readFileDefinition, + registerMcpTool, + grepDefinition, + ToolContext, +} from '../tools'; const dedent = _dedent.withOptions({ alignValues: true }); -const DEFAULT_MINIMUM_TOKENS = 10000; -const DEFAULT_MATCHES = 10000; -const DEFAULT_CONTEXT_LINES = 5; - -const DEFAULT_TREE_DEPTH = 1; -const MAX_TREE_DEPTH = 10; -const DEFAULT_MAX_TREE_ENTRIES = 1000; -const MAX_MAX_TREE_ENTRIES = 10000; - -const TOOL_DESCRIPTIONS = { - search_code: dedent` - Searches for code that matches the provided search query as a substring by default, or as a regular expression if useRegex is true. Useful for exploring remote repositories by - searching for exact symbols, functions, variables, or specific code patterns. - - To determine if a repository is indexed, use the \`list_repos\` tool. By default, searches are global and will search the default branch of all repositories. Searches can be - scoped to specific repositories, languages, and branches. - - When referencing code outputted by this tool, always include the file's external URL as a link. This makes it easier for the user to view the file, even if they don't have it locally checked out. - `, - list_commits: dedent`Get a list of commits for a given repository.`, - list_repos: dedent`Lists repositories in the organization with optional filtering and pagination.`, - read_file: dedent`Reads the source code for a given file.`, - list_tree: dedent` - Lists files and directories from a repository path. This can be used as a repo tree tool or directory listing tool. - Returns a flat list of entries with path metadata and depth relative to the requested path. - `, - list_language_models: dedent`Lists the available language models configured on the Sourcebot instance. Use this to discover which models can be specified when calling ask_codebase.`, - ask_codebase: dedent` - DO NOT USE THIS TOOL UNLESS EXPLICITLY ASKED TO. THE PROMPT MUST SPECIFICALLY ASK TO USE THE ask_codebase TOOL. - - Ask a natural language question about the codebase. This tool uses an AI agent to autonomously search code, read files, and find symbol references/definitions to answer your question. - - This is a blocking operation that may take 60+ seconds to research the codebase, so only invoke it if the user has explicitly asked you to by specifying the ask_codebase tool call in the prompt. - - The agent will: - - Analyze your question and determine what context it needs - - Search the codebase using multiple strategies (code search, symbol lookup, file reading) - - Synthesize findings into a comprehensive answer with code references - - Returns a detailed answer in markdown format with code references, plus a link to view the full research session (including all tool calls and reasoning) in the Sourcebot web UI. - - When using this in shared environments (e.g., Slack), you can set the visibility parameter to 'PUBLIC' to ensure everyone can access the chat link. - `, -}; - export async function createMcpServer(): Promise { const server = new McpServer({ name: 'sourcebot-mcp-server', version: SOURCEBOT_VERSION, }); - const configuredModels = await getConfiguredLanguageModels(); - const hasLanguageModels = configuredModels.length > 0; - - server.registerTool( - "search_code", - { - description: TOOL_DESCRIPTIONS.search_code, - annotations: { readOnlyHint: true }, - inputSchema: { - query: z - .string() - .describe(`The search pattern to match against code contents. Do not escape quotes in your query.`) - .transform((val) => { - const escaped = val.replace(/\\/g, '\\\\').replace(/"/g, '\\"'); - return `"${escaped}"`; - }), - useRegex: z - .boolean() - .describe(`Whether to use regular expression matching. When false, substring matching is used. (default: false)`) - .optional(), - filterByRepos: z - .array(z.string()) - .describe(`Scope the search to the provided repositories.`) - .optional(), - filterByLanguages: z - .array(z.string()) - .describe(`Scope the search to the provided languages.`) - .optional(), - filterByFilepaths: z - .array(z.string()) - .describe(`Scope the search to the provided filepaths. Each filepath is a regular expression matched against the full file path.`) - .optional(), - caseSensitive: z - .boolean() - .describe(`Whether the search should be case sensitive (default: false).`) - .optional(), - includeCodeSnippets: z - .boolean() - .describe(`Whether to include code snippets in the response. If false, only the file's URL, repository, and language will be returned. (default: false)`) - .optional(), - ref: z - .string() - .describe(`Commit SHA, branch or tag name to search on. If not provided, defaults to the default branch.`) - .optional(), - maxTokens: z - .number() - .describe(`The maximum number of tokens to return (default: ${DEFAULT_MINIMUM_TOKENS}).`) - .transform((val) => (val < DEFAULT_MINIMUM_TOKENS ? DEFAULT_MINIMUM_TOKENS : val)) - .optional(), - }, - }, - async ({ - query, - filterByRepos: repos = [], - filterByLanguages: languages = [], - filterByFilepaths: filepaths = [], - maxTokens = DEFAULT_MINIMUM_TOKENS, - includeCodeSnippets = false, - caseSensitive = false, - ref, - useRegex = false, - }: { - query: string; - useRegex?: boolean; - filterByRepos?: string[]; - filterByLanguages?: string[]; - filterByFilepaths?: string[]; - caseSensitive?: boolean; - includeCodeSnippets?: boolean; - ref?: string; - maxTokens?: number; - }) => { - if (repos.length > 0) { - query += ` (repo:${repos.map(id => escapeStringRegexp(id)).join(' or repo:')})`; - } - if (languages.length > 0) { - query += ` (lang:${languages.join(' or lang:')})`; - } - if (filepaths.length > 0) { - query += ` (file:${filepaths.join(' or file:')})`; - } - if (ref) { - query += ` ( rev:${ref} )`; - } - - const response = await search({ - queryType: 'string', - query, - options: { - matches: DEFAULT_MATCHES, - contextLines: DEFAULT_CONTEXT_LINES, - isRegexEnabled: useRegex, - isCaseSensitivityEnabled: caseSensitive, - }, - source: 'mcp', - }); - - if (isServiceError(response)) { - return { - content: [{ type: "text", text: `Search failed: ${response.message}` }], - }; - } - - if (response.files.length === 0) { - return { - content: [{ type: "text", text: `No results found for the query: ${query}` }], - }; - } - - const content: TextContent[] = []; - let totalTokens = 0; - let isResponseTruncated = false; + const configuredLanguageModels = await getConfiguredLanguageModelsInfo(); + const hasLanguageModels = configuredLanguageModels.length > 0; - for (const file of response.files) { - const numMatches = file.chunks.reduce((acc, chunk) => acc + chunk.matchRanges.length, 0); - let text = dedent` - file: ${file.webUrl} - num_matches: ${numMatches} - repo: ${file.repository} - language: ${file.language} - `; - - if (includeCodeSnippets) { - const snippets = file.chunks.map(chunk => `\`\`\`\n${chunk.content}\n\`\`\``).join('\n'); - text += `\n\n${snippets}`; - } - - const tokens = text.length / 4; - - if ((totalTokens + tokens) > maxTokens) { - const remainingTokens = maxTokens - totalTokens; - if (remainingTokens > 100) { - const maxLength = Math.floor(remainingTokens * 4); - content.push({ - type: "text", - text: text.substring(0, maxLength) + "\n\n...[content truncated due to token limit]", - }); - totalTokens += remainingTokens; - } - isResponseTruncated = true; - break; - } - - totalTokens += tokens; - content.push({ type: "text", text }); - } - - if (isResponseTruncated) { - content.push({ - type: "text", - text: `The response was truncated because the number of tokens exceeded the maximum limit of ${maxTokens}.`, - }); - } - - return { content }; - } - ); - - server.registerTool( - "list_commits", - { - description: TOOL_DESCRIPTIONS.list_commits, - annotations: { readOnlyHint: true }, - inputSchema: z.object({ - repo: z.string().describe("The name of the repository to list commits for."), - query: z.string().describe("Search query to filter commits by message content (case-insensitive).").optional(), - since: z.string().describe("Show commits more recent than this date. Supports ISO 8601 or relative formats (e.g., '30 days ago').").optional(), - until: z.string().describe("Show commits older than this date. Supports ISO 8601 or relative formats (e.g., 'yesterday').").optional(), - author: z.string().describe("Filter commits by author name or email (case-insensitive).").optional(), - ref: z.string().describe("Commit SHA, branch or tag name to list commits of. If not provided, uses the default branch.").optional(), - page: z.number().int().positive().describe("Page number for pagination (min 1). Default: 1").optional().default(1), - perPage: z.number().int().positive().max(100).describe("Results per page for pagination (min 1, max 100). Default: 50").optional().default(50), - }), - }, - async ({ repo, query, since, until, author, ref, page, perPage }) => { - const skip = (page - 1) * perPage; - const result = await listCommits({ - repo, - query, - since, - until, - author, - ref, - maxCount: perPage, - skip, - }); - - if (isServiceError(result)) { - return { - content: [{ type: "text", text: `Failed to list commits: ${result.message}` }], - }; - } - - return { content: [{ type: "text", text: JSON.stringify(result) }] }; - } - ); - - server.registerTool( - "list_repos", - { - description: TOOL_DESCRIPTIONS.list_repos, - annotations: { readOnlyHint: true }, - inputSchema: z.object({ - query: z.string().describe("Filter repositories by name (case-insensitive)").optional(), - page: z.number().int().positive().describe("Page number for pagination (min 1). Default: 1").optional().default(1), - perPage: z.number().int().positive().max(100).describe("Results per page for pagination (min 1, max 100). Default: 30").optional().default(30), - sort: z.enum(['name', 'pushed']).describe("Sort repositories by 'name' or 'pushed' (most recent commit). Default: 'name'").optional().default('name'), - direction: z.enum(['asc', 'desc']).describe("Sort direction: 'asc' or 'desc'. Default: 'asc'").optional().default('asc'), - }) - }, - async ({ query, page, perPage, sort, direction }) => { - const result = await listRepos({ query, page, perPage, sort, direction, source: 'mcp' }); - - if (isServiceError(result)) { - return { - content: [{ type: "text", text: `Failed to list repositories: ${result.message}` }], - }; - } - - return { - content: [{ - type: "text", - text: JSON.stringify({ - repos: result.data.map((repo) => ({ - name: repo.repoName, - url: repo.webUrl, - pushedAt: repo.pushedAt, - defaultBranch: repo.defaultBranch, - isFork: repo.isFork, - isArchived: repo.isArchived, - })), - totalCount: result.totalCount, - }), - }], - }; - } - ); - - server.registerTool( - "read_file", - { - description: TOOL_DESCRIPTIONS.read_file, - annotations: { readOnlyHint: true }, - inputSchema: { - repo: z.string().describe("The repository name."), - path: z.string().describe("The path to the file."), - ref: z.string().optional().describe("Commit SHA, branch or tag name to fetch the source code for. If not provided, uses the default branch of the repository."), - }, - }, - async ({ repo, path, ref }) => { - const response = await getFileSource({ repo, path, ref }, { source: 'mcp' }); - - if (isServiceError(response)) { - return { - content: [{ type: "text", text: `Failed to read file: ${response.message}` }], - }; - } - - return { - content: [{ - type: "text", - text: JSON.stringify({ - source: response.source, - language: response.language, - path: response.path, - url: response.webUrl, - }), - }], - }; - } - ); - - server.registerTool( - "list_tree", - { - description: TOOL_DESCRIPTIONS.list_tree, - annotations: { readOnlyHint: true }, - inputSchema: { - repo: z.string().describe("The name of the repository to list files from."), - path: z.string().describe("Directory path (relative to repo root). If omitted, the repo root is used.").optional().default(''), - ref: z.string().describe("Commit SHA, branch or tag name to list files from. If not provided, uses the default branch.").optional().default('HEAD'), - depth: z.number().int().positive().max(MAX_TREE_DEPTH).describe(`How many directory levels to traverse below \`path\` (min 1, max ${MAX_TREE_DEPTH}, default ${DEFAULT_TREE_DEPTH}).`).optional().default(DEFAULT_TREE_DEPTH), - includeFiles: z.boolean().describe("Whether to include files in the output (default: true).").optional().default(true), - includeDirectories: z.boolean().describe("Whether to include directories in the output (default: true).").optional().default(true), - maxEntries: z.number().int().positive().max(MAX_MAX_TREE_ENTRIES).describe(`Maximum number of entries to return (min 1, max ${MAX_MAX_TREE_ENTRIES}, default ${DEFAULT_MAX_TREE_ENTRIES}).`).optional().default(DEFAULT_MAX_TREE_ENTRIES), - }, - }, - async ({ - repo, - path = '', - ref = 'HEAD', - depth = DEFAULT_TREE_DEPTH, - includeFiles = true, - includeDirectories = true, - maxEntries = DEFAULT_MAX_TREE_ENTRIES, - }: { - repo: string; - path?: string; - ref?: string; - depth?: number; - includeFiles?: boolean; - includeDirectories?: boolean; - maxEntries?: number; - }) => { - const normalizedPath = normalizeTreePath(path); - const normalizedDepth = Math.min(depth, MAX_TREE_DEPTH); - const normalizedMaxEntries = Math.min(maxEntries, MAX_MAX_TREE_ENTRIES); - - if (!includeFiles && !includeDirectories) { - return { - content: [{ - type: "text", - text: JSON.stringify({ - repo, ref, path: normalizedPath, - entries: [] as ListTreeEntry[], - totalReturned: 0, - truncated: false, - }), - }], - }; - } - - const queue: Array<{ path: string; depth: number }> = [{ path: normalizedPath, depth: 0 }]; - const queuedPaths = new Set([normalizedPath]); - const seenEntries = new Set(); - const entries: ListTreeEntry[] = []; - let truncated = false; - let treeError: string | null = null; - - while (queue.length > 0 && !truncated) { - const currentDepth = queue[0]!.depth; - const currentLevelPaths: string[] = []; - - while (queue.length > 0 && queue[0]!.depth === currentDepth) { - currentLevelPaths.push(queue.shift()!.path); - } - - const treeResult = await getTree({ - repoName: repo, - revisionName: ref, - paths: currentLevelPaths.filter(Boolean), - }, { source: 'mcp' }); - - if (isServiceError(treeResult)) { - treeError = treeResult.message; - break; - } - - const treeNodeIndex = buildTreeNodeIndex(treeResult.tree); - - for (const currentPath of currentLevelPaths) { - const currentNode = currentPath === '' ? treeResult.tree : treeNodeIndex.get(currentPath); - if (!currentNode || currentNode.type !== 'tree') continue; - - for (const child of currentNode.children) { - if (child.type !== 'tree' && child.type !== 'blob') continue; - - const childPath = joinTreePath(currentPath, child.name); - const childDepth = currentDepth + 1; - - if (child.type === 'tree' && childDepth < normalizedDepth && !queuedPaths.has(childPath)) { - queue.push({ path: childPath, depth: childDepth }); - queuedPaths.add(childPath); - } - - if ((child.type === 'blob' && !includeFiles) || (child.type === 'tree' && !includeDirectories)) { - continue; - } - - const key = `${child.type}:${childPath}`; - if (seenEntries.has(key)) continue; - seenEntries.add(key); - - if (entries.length >= normalizedMaxEntries) { - truncated = true; - break; - } - - entries.push({ - type: child.type as 'tree' | 'blob', - path: childPath, - name: child.name, - parentPath: currentPath, - depth: childDepth, - }); - } - - if (truncated) break; - } - } + const toolContext: ToolContext = { + source: 'sourcebot-mcp-server', + } - if (treeError) { - return { - content: [{ type: "text", text: `Failed to list tree: ${treeError}` }], - }; - } - - const sortedEntries = sortTreeEntries(entries); - return { - content: [{ - type: "text", - text: JSON.stringify({ - repo, ref, path: normalizedPath, - entries: sortedEntries, - totalReturned: sortedEntries.length, - truncated, - }), - }], - }; - } - ); + registerMcpTool(server, grepDefinition, toolContext); + registerMcpTool(server, listCommitsDefinition, toolContext); + registerMcpTool(server, listReposDefinition, toolContext); + registerMcpTool(server, readFileDefinition, toolContext); + registerMcpTool(server, listTreeDefinition, toolContext); + registerMcpTool(server, findSymbolDefinitionsDefinition, toolContext); + registerMcpTool(server, findSymbolReferencesDefinition, toolContext); server.registerTool( "list_language_models", { - description: TOOL_DESCRIPTIONS.list_language_models, - annotations: { readOnlyHint: true }, + description: dedent`Lists the available language models configured on the Sourcebot instance. Use this to discover which models can be specified when calling ask_codebase.`, + annotations: { + readOnlyHint: true, + idempotentHint: true, + } }, async () => { const models = await getConfiguredLanguageModelsInfo(); @@ -500,14 +63,31 @@ export async function createMcpServer(): Promise { server.registerTool( "ask_codebase", { - description: TOOL_DESCRIPTIONS.ask_codebase, - annotations: { readOnlyHint: true }, + description: dedent` + DO NOT USE THIS TOOL UNLESS EXPLICITLY ASKED TO. THE PROMPT MUST SPECIFICALLY ASK TO USE THE ask_codebase TOOL. + + Ask a natural language question about the codebase. This tool uses an AI agent to autonomously search code, read files, and find symbol references/definitions to answer your question. + + This is a blocking operation that may take 60+ seconds to research the codebase, so only invoke it if the user has explicitly asked you to by specifying the ask_codebase tool call in the prompt. + + The agent will: + - Analyze your question and determine what context it needs + - Search the codebase using multiple strategies (code search, symbol lookup, file reading) + - Synthesize findings into a comprehensive answer with code references + + Returns a detailed answer in markdown format with code references, plus a link to view the full research session (including all tool calls and reasoning) in the Sourcebot web UI. + + When using this in shared environments (e.g., Slack), you can set the visibility parameter to 'PUBLIC' to ensure everyone can access the chat link. + `, inputSchema: z.object({ query: z.string().describe("The query to ask about the codebase."), repos: z.array(z.string()).optional().describe("The repositories accessible to the agent. If not provided, all repositories are accessible."), languageModel: languageModelInfoSchema.optional().describe("The language model to use. If not provided, defaults to the first model in the config."), visibility: z.enum(['PRIVATE', 'PUBLIC']).optional().describe("The visibility of the chat session. Defaults to PRIVATE for authenticated users."), }), + annotations: { + readOnlyHint: true, + } }, async (request) => { const result = await askCodebase({ diff --git a/packages/web/src/features/mcp/types.ts b/packages/web/src/features/mcp/types.ts index af60fd648..b3ff5d903 100644 --- a/packages/web/src/features/mcp/types.ts +++ b/packages/web/src/features/mcp/types.ts @@ -1,13 +1,7 @@ export type TextContent = { type: "text", text: string }; -export type ListTreeEntry = { - type: 'tree' | 'blob'; - path: string; - name: string; - parentPath: string; - depth: number; -}; +export type { ListTreeEntry } from "@/features/tools/listTree"; export type ListTreeApiNode = { type: 'tree' | 'blob'; diff --git a/packages/web/src/features/mcp/utils.ts b/packages/web/src/features/mcp/utils.ts index 96ef5d568..b6de4c71a 100644 --- a/packages/web/src/features/mcp/utils.ts +++ b/packages/web/src/features/mcp/utils.ts @@ -1,6 +1,6 @@ import { FileTreeNode } from "../git"; import { ServiceError } from "@/lib/serviceError"; -import { ListTreeEntry } from "./types"; +import { ListTreeEntry } from "@/features/tools/listTree"; export const isServiceError = (data: unknown): data is ServiceError => { return typeof data === 'object' && diff --git a/packages/web/src/features/tools/adapters.ts b/packages/web/src/features/tools/adapters.ts new file mode 100644 index 000000000..5b2ac3809 --- /dev/null +++ b/packages/web/src/features/tools/adapters.ts @@ -0,0 +1,51 @@ +import { tool } from "ai"; +import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; +import { z } from "zod"; +import { ToolContext, ToolDefinition } from "./types"; + +export function toVercelAITool( + def: ToolDefinition, + context: ToolContext, +) { + return tool({ + description: def.description, + inputSchema: def.inputSchema, + title: def.title, + execute: (input) => def.execute(input, context), + toModelOutput: ({ output }) => ({ + type: "content", + value: [{ type: "text", text: output.output }], + }), + }); +} + +export function registerMcpTool( + server: McpServer, + def: ToolDefinition, + context: ToolContext, +) { + // Widening .shape to z.ZodRawShape (its base constraint) gives TypeScript a + // concrete InputArgs so it can fully resolve BaseToolCallback's conditional + // type. def.inputSchema.parse() recovers the correctly typed value inside. + server.registerTool( + def.name, + { + description: def.description, + inputSchema: def.inputSchema.shape as z.ZodRawShape, + annotations: { + readOnlyHint: def.isReadOnly, + idempotentHint: def.isIdempotent, + }, + }, + async (input) => { + try { + const parsed = def.inputSchema.parse(input); + const result = await def.execute(parsed, context); + return { content: [{ type: "text" as const, text: result.output }] }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { content: [{ type: "text" as const, text: `Tool "${def.name}" failed: ${message}` }], isError: true }; + } + }, + ); +} diff --git a/packages/web/src/features/tools/findSymbolDefinitions.ts b/packages/web/src/features/tools/findSymbolDefinitions.ts new file mode 100644 index 000000000..b52813a1e --- /dev/null +++ b/packages/web/src/features/tools/findSymbolDefinitions.ts @@ -0,0 +1,107 @@ +import { getRepoInfoByName } from "@/actions"; +import { findSearchBasedSymbolDefinitions } from "@/features/codeNav/api"; +import { isServiceError } from "@/lib/utils"; +import { z } from "zod"; +import description from "./findSymbolDefinitions.txt"; +import { FindSymbolFile, FindSymbolRepoInfo } from "./findSymbolReferences"; +import { logger } from "./logger"; +import { ToolDefinition } from "./types"; + + +const MAX_LINE_LENGTH = 2000; +const MAX_LINE_SUFFIX = `... (line truncated to ${MAX_LINE_LENGTH} chars)`; + +const findSymbolDefinitionsShape = { + symbol: z.string().describe("The symbol to find definitions of"), + repo: z.string().describe("The repository to scope the search to"), +}; + + +export type FindSymbolDefinitionsMetadata = { + symbol: string; + matchCount: number; + fileCount: number; + repoInfo: FindSymbolRepoInfo; + files: FindSymbolFile[]; +}; + +export const findSymbolDefinitionsDefinition: ToolDefinition< + 'find_symbol_definitions', + typeof findSymbolDefinitionsShape, + FindSymbolDefinitionsMetadata +> = { + name: 'find_symbol_definitions', + title: 'Find symbol definitions', + isReadOnly: true, + isIdempotent: true, + description, + inputSchema: z.object(findSymbolDefinitionsShape), + execute: async ({ symbol, repo }, _context) => { + logger.debug('find_symbol_definitions', { symbol, repo }); + const revision = "HEAD"; + + const response = await findSearchBasedSymbolDefinitions({ + symbolName: symbol, + revisionName: revision, + repoName: repo, + }); + + if (isServiceError(response)) { + throw new Error(response.message); + } + + const matchCount = response.stats.matchCount; + const fileCount = response.files.length; + + const repoInfoResult = await getRepoInfoByName(repo); + if (isServiceError(repoInfoResult) || !repoInfoResult) { + throw new Error(`Repository "${repo}" not found.`); + } + const repoInfo: FindSymbolRepoInfo = { + name: repoInfoResult.name, + displayName: repoInfoResult.displayName ?? repoInfoResult.name, + codeHostType: repoInfoResult.codeHostType, + }; + + const metadata: FindSymbolDefinitionsMetadata = { + symbol, + matchCount, + fileCount, + repoInfo, + files: response.files.map((file) => ({ + fileName: file.fileName, + repo: file.repository, + revision, + })), + }; + + if (fileCount === 0) { + return { + output: 'No definitions found', + metadata, + }; + } + + const outputLines: string[] = [ + `Found ${matchCount} ${matchCount === 1 ? 'definition' : 'definitions'} in ${fileCount} ${fileCount === 1 ? 'file' : 'files'}`, + ]; + + for (const file of response.files) { + outputLines.push(''); + outputLines.push(`[${file.repository}] ${file.fileName}:`); + for (const { lineContent, range } of file.matches) { + const lineNum = range.start.lineNumber; + const trimmed = lineContent.trimEnd(); + const line = trimmed.length > MAX_LINE_LENGTH + ? trimmed.substring(0, MAX_LINE_LENGTH) + MAX_LINE_SUFFIX + : trimmed; + outputLines.push(` ${lineNum}: ${line}`); + } + } + + return { + output: outputLines.join('\n'), + metadata, + }; + }, +}; diff --git a/packages/web/src/features/tools/findSymbolDefinitions.txt b/packages/web/src/features/tools/findSymbolDefinitions.txt new file mode 100644 index 000000000..0ba87ff08 --- /dev/null +++ b/packages/web/src/features/tools/findSymbolDefinitions.txt @@ -0,0 +1 @@ +Finds definitions of a symbol in the codebase. diff --git a/packages/web/src/features/tools/findSymbolReferences.ts b/packages/web/src/features/tools/findSymbolReferences.ts new file mode 100644 index 000000000..3013bd34d --- /dev/null +++ b/packages/web/src/features/tools/findSymbolReferences.ts @@ -0,0 +1,117 @@ +import { z } from "zod"; +import { isServiceError } from "@/lib/utils"; +import { findSearchBasedSymbolReferences } from "@/features/codeNav/api"; +import { ToolDefinition } from "./types"; +import { logger } from "./logger"; +import description from "./findSymbolReferences.txt"; +import { getRepoInfoByName } from "@/actions"; +import { CodeHostType } from "@sourcebot/db"; + +const MAX_LINE_LENGTH = 2000; +const MAX_LINE_SUFFIX = `... (line truncated to ${MAX_LINE_LENGTH} chars)`; + +const findSymbolReferencesShape = { + symbol: z.string().describe("The symbol to find references to"), + repo: z.string().describe("The repository to scope the search to"), +}; + +export type FindSymbolRepoInfo = { + name: string; + displayName: string; + codeHostType: CodeHostType; +}; + +export type FindSymbolFile = { + fileName: string; + repo: string; + revision: string; +}; + +export type FindSymbolReferencesMetadata = { + symbol: string; + matchCount: number; + fileCount: number; + repoInfo: FindSymbolRepoInfo; + files: FindSymbolFile[]; +}; + +export const findSymbolReferencesDefinition: ToolDefinition< + 'find_symbol_references', + typeof findSymbolReferencesShape, + FindSymbolReferencesMetadata +> = { + name: 'find_symbol_references', + title: 'Find symbol references', + isReadOnly: true, + isIdempotent: true, + description, + inputSchema: z.object(findSymbolReferencesShape), + execute: async ({ symbol, repo }, _context) => { + logger.debug('find_symbol_references', { symbol, repo }); + const revision = "HEAD"; + + const response = await findSearchBasedSymbolReferences({ + symbolName: symbol, + revisionName: revision, + repoName: repo, + }); + + if (isServiceError(response)) { + throw new Error(response.message); + } + + const matchCount = response.stats.matchCount; + const fileCount = response.files.length; + + const repoInfoResult = await getRepoInfoByName(repo); + if (isServiceError(repoInfoResult) || !repoInfoResult) { + throw new Error(`Repository "${repo}" not found.`); + } + const repoInfo: FindSymbolRepoInfo = { + name: repoInfoResult.name, + displayName: repoInfoResult.displayName ?? repoInfoResult.name, + codeHostType: repoInfoResult.codeHostType, + }; + + const metadata: FindSymbolReferencesMetadata = { + symbol, + matchCount, + fileCount, + repoInfo, + files: response.files.map((file) => ({ + fileName: file.fileName, + repo: file.repository, + revision, + })), + }; + + if (fileCount === 0) { + return { + output: 'No references found', + metadata, + }; + } + + const outputLines: string[] = [ + `Found ${matchCount} ${matchCount === 1 ? 'reference' : 'references'} in ${fileCount} ${fileCount === 1 ? 'file' : 'files'}`, + ]; + + for (const file of response.files) { + outputLines.push(''); + outputLines.push(`[${file.repository}] ${file.fileName}:`); + for (const { lineContent, range } of file.matches) { + const lineNum = range.start.lineNumber; + const trimmed = lineContent.trimEnd(); + const line = trimmed.length > MAX_LINE_LENGTH + ? trimmed.substring(0, MAX_LINE_LENGTH) + MAX_LINE_SUFFIX + : trimmed; + outputLines.push(` ${lineNum}: ${line}`); + } + } + + return { + output: outputLines.join('\n'), + metadata, + }; + }, +}; diff --git a/packages/web/src/features/tools/findSymbolReferences.txt b/packages/web/src/features/tools/findSymbolReferences.txt new file mode 100644 index 000000000..e35a2c87b --- /dev/null +++ b/packages/web/src/features/tools/findSymbolReferences.txt @@ -0,0 +1 @@ +Finds references to a symbol in the codebase. diff --git a/packages/web/src/features/tools/grep.ts b/packages/web/src/features/tools/grep.ts new file mode 100644 index 000000000..1f200d961 --- /dev/null +++ b/packages/web/src/features/tools/grep.ts @@ -0,0 +1,226 @@ +import { z } from "zod"; +import globToRegexp from "glob-to-regexp"; +import { isServiceError } from "@/lib/utils"; +import { search } from "@/features/search"; +import escapeStringRegexp from "escape-string-regexp"; +import { ToolDefinition } from "./types"; +import { logger } from "./logger"; +import description from "./grep.txt"; +import { CodeHostType } from "@sourcebot/db"; + +const DEFAULT_LIMIT = 100; +const DEFAULT_GROUP_BY_REPO_LIMIT = 10_000; +const MAX_LINE_LENGTH = 2000; +const MAX_LINE_SUFFIX = `... (line truncated to ${MAX_LINE_LENGTH} chars)`; +const TRUNCATION_MESSAGE = `(Results truncated. Consider using a more specific path or pattern, specifying a repo, or increasing the limit.)`; + +function globToFileRegexp(glob: string): string { + const re = globToRegexp(glob, { extended: true, globstar: true }); + return re.source.replace(/^\^/, ''); +} + +const grepShape = { + pattern: z + .string() + .describe(`The regex pattern to search for in file contents`), + path: z + .string() + .describe(`The directory to search in. Defaults to the repository root.`) + .optional(), + include: z + .string() + .describe(`File pattern to include in the search (e.g. "*.js", "*.{ts,tsx}")`) + .optional(), + repo: z + .string() + .describe(`The name of the repository to search in. If not provided, searches all repositories.`) + .optional(), + ref: z + .string() + .describe(`The commit SHA, branch or tag name to search on. If not provided, defaults to the default branch (usually 'main' or 'master').`) + .optional(), + limit: z + .number() + .describe(`The maximum number of matches to return (default: ${DEFAULT_LIMIT} when groupByRepo=false, ${DEFAULT_GROUP_BY_REPO_LIMIT} when groupByRepo=true)`) + .optional(), + groupByRepo: z + .boolean() + .optional() + .describe(`If true, returns a summary of match counts grouped by repository instead of individual file results.`), +}; + +export type GrepFile = { + path: string; + name: string; + repo: string; + revision: string; +}; + +export type GrepRepoInfo = { + name: string; + displayName: string; + codeHostType: CodeHostType; +}; + +export type GrepMetadata = { + files: GrepFile[]; + pattern: string; + query: string; + matchCount: number; + repoCount: number; + repoInfoMap: Record; + groupByRepo: boolean; +}; + +export const grepDefinition: ToolDefinition<'grep', typeof grepShape, GrepMetadata> = { + name: 'grep', + title: 'Search code', + isReadOnly: true, + isIdempotent: true, + description, + inputSchema: z.object(grepShape), + execute: async ({ + pattern, + path, + include, + repo, + ref, + limit: _limit, + groupByRepo = false, + }, context) => { + + const limit = _limit ?? (groupByRepo ? DEFAULT_GROUP_BY_REPO_LIMIT : DEFAULT_LIMIT); + + logger.debug('grep', { pattern, path, include, repo, ref, limit, groupByRepo }); + + const quotedPattern = `"${pattern.replace(/"/g, '\\"')}"`; + let query = quotedPattern; + + if (path) { + query += ` file:${escapeStringRegexp(path)}`; + } + + if (include) { + query += ` file:${globToFileRegexp(include)}`; + } + + if (repo) { + query += ` repo:${escapeStringRegexp(repo)}`; + } else if (context.selectedRepos && context.selectedRepos.length > 0) { + query += ` reposet:${context.selectedRepos.join(',')}`; + } + + if (ref) { + query += ` (rev:${ref})`; + } + + const response = await search({ + queryType: 'string', + query, + options: { + matches: limit, + contextLines: 0, + isCaseSensitivityEnabled: true, + isRegexEnabled: true, + }, + source: context.source, + }); + + if (isServiceError(response)) { + throw new Error(response.message); + } + + const files = response.files.map((file) => ({ + path: file.fileName.text, + name: file.fileName.text.split('/').pop() ?? file.fileName.text, + repo: file.repository, + revision: ref ?? 'HEAD', + } satisfies GrepFile)); + + const repoInfoMap = Object.fromEntries( + response.repositoryInfo.map((info) => [info.name, { + name: info.name, + displayName: info.displayName ?? info.name, + codeHostType: info.codeHostType, + }]) + ); + + const metadata: GrepMetadata = { + files, + pattern, + query, + matchCount: response.stats.actualMatchCount, + repoCount: new Set(files.map((f) => f.repo)).size, + repoInfoMap, + groupByRepo, + }; + + const totalFiles = response.files.length; + const actualMatches = response.stats.actualMatchCount; + + if (totalFiles === 0) { + return { + output: 'No files found', + metadata, + }; + } + + if (groupByRepo) { + const repoCounts = new Map(); + for (const file of response.files) { + const repo = file.repository; + const matchCount = file.chunks.reduce((acc, chunk) => acc + chunk.matchRanges.length, 0); + const existing = repoCounts.get(repo) ?? { matches: 0, files: 0 }; + repoCounts.set(repo, { matches: existing.matches + matchCount, files: existing.files + 1 }); + } + + const outputLines: string[] = [ + `Found matches in ${repoCounts.size} ${repoCounts.size === 1 ? 'repository' : 'repositories'}:`, + ]; + for (const [repoName, { matches, files }] of repoCounts) { + outputLines.push(` ${repoName}: ${matches} ${matches === 1 ? 'match' : 'matches'} in ${files} ${files === 1 ? 'file' : 'files'}`); + } + + if (!response.isSearchExhaustive) { + outputLines.push(''); + outputLines.push(TRUNCATION_MESSAGE); + } + + return { + output: outputLines.join('\n'), + metadata, + }; + } + + const outputLines: string[] = [ + `Found ${actualMatches} match${actualMatches !== 1 ? 'es' : ''} in ${totalFiles} file${totalFiles !== 1 ? 's' : ''}`, + ]; + + for (const file of response.files) { + outputLines.push(''); + outputLines.push(`[${file.repository}] ${file.fileName.text}:`); + for (const chunk of file.chunks) { + chunk.content.split('\n').forEach((content, i) => { + if (!content.trim()) { + return; + } + const lineNum = chunk.contentStart.lineNumber + i; + const line = content.length > MAX_LINE_LENGTH + ? content.substring(0, MAX_LINE_LENGTH) + MAX_LINE_SUFFIX + : content; + outputLines.push(` ${lineNum}: ${line}`); + }); + } + } + + if (!response.isSearchExhaustive) { + outputLines.push(''); + outputLines.push(TRUNCATION_MESSAGE); + } + + return { + output: outputLines.join('\n'), + metadata, + }; + }, +}; diff --git a/packages/web/src/features/tools/grep.txt b/packages/web/src/features/tools/grep.txt new file mode 100644 index 000000000..c8f06e477 --- /dev/null +++ b/packages/web/src/features/tools/grep.txt @@ -0,0 +1,8 @@ +- Fast content search tool that works with any codebase size +- Searches file contents using regular expressions +- Supports full regex syntax (eg. "log.*Error", "function\s+\w+", etc.) +- Filter files by pattern with the include parameter (eg. "*.js", "*.{ts,tsx}") +- Returns file paths and line numbers with at least one match +- Use this tool when you need to find files containing specific patterns +- When using the `repo` param, if the repository name is not known, use `list_repos` first to discover the correct name. +- Use `groupByRepo: true` when searching across many repositories and you want to identify which repos are most relevant before drilling in. This returns a per-repository summary (match and file counts) instead of individual file results, and automatically uses a higher match limit for accuracy. diff --git a/packages/web/src/features/tools/index.ts b/packages/web/src/features/tools/index.ts new file mode 100644 index 000000000..38fae0da6 --- /dev/null +++ b/packages/web/src/features/tools/index.ts @@ -0,0 +1,9 @@ +export * from './readFile'; +export * from './listCommits'; +export * from './listRepos'; +export * from './grep'; +export * from './findSymbolReferences'; +export * from './findSymbolDefinitions'; +export * from './listTree'; +export * from './adapters'; +export * from './types'; \ No newline at end of file diff --git a/packages/web/src/features/tools/listCommits.ts b/packages/web/src/features/tools/listCommits.ts new file mode 100644 index 000000000..34d61eeeb --- /dev/null +++ b/packages/web/src/features/tools/listCommits.ts @@ -0,0 +1,77 @@ +import { z } from "zod"; +import { isServiceError } from "@/lib/utils"; +import { listCommits, SearchCommitsResult } from "@/features/git"; +import { ToolDefinition } from "./types"; +import { logger } from "./logger"; +import description from "./listCommits.txt"; +import { CodeHostType } from "@sourcebot/db"; +import { getRepoInfoByName } from "@/actions"; + +const listCommitsShape = { + repo: z.string().describe("The repository to list commits from"), + query: z.string().describe("Search query to filter commits by message (case-insensitive)").optional(), + since: z.string().describe("Start date for commit range (e.g., '30 days ago', '2024-01-01', 'last week')").optional(), + until: z.string().describe("End date for commit range (e.g., 'yesterday', '2024-12-31', 'today')").optional(), + author: z.string().describe("Filter commits by author name or email (case-insensitive)").optional(), + ref: z.string().describe("Commit SHA, branch or tag name to list commits of. If not provided, uses the default branch.").optional(), + path: z.string().describe("Filter commits to only those that touched this file or directory path (relative to repo root).").optional(), + page: z.number().int().positive().describe("Page number for pagination (min 1). Default: 1").optional().default(1), + perPage: z.number().int().positive().max(100).describe("Results per page for pagination (min 1, max 100). Default: 50").optional().default(50), +}; + +export type ListCommitsRepoInfo = { + name: string; + displayName: string; + codeHostType: CodeHostType; +}; + +export type ListCommitsMetadata = SearchCommitsResult & { + repo: string; + repoInfo: ListCommitsRepoInfo; +}; + +export const listCommitsDefinition: ToolDefinition<"list_commits", typeof listCommitsShape, ListCommitsMetadata> = { + name: "list_commits", + title: "List commits", + isReadOnly: true, + isIdempotent: true, + description, + inputSchema: z.object(listCommitsShape), + execute: async (params, _context) => { + logger.debug('list_commits', params); + + const { repo, query, since, until, author, ref, path, page, perPage } = params; + const skip = (page - 1) * perPage; + + const response = await listCommits({ + repo, + query, + since, + until, + author, + ref, + path, + maxCount: perPage, + skip, + }); + + if (isServiceError(response)) { + throw new Error(response.message); + } + + const repoInfoResult = await getRepoInfoByName(repo); + if (isServiceError(repoInfoResult) || !repoInfoResult) { + throw new Error(`Repository "${repo}" not found.`); + } + const repoInfo: ListCommitsRepoInfo = { + name: repoInfoResult.name, + displayName: repoInfoResult.displayName ?? repoInfoResult.name, + codeHostType: repoInfoResult.codeHostType, + }; + + return { + output: JSON.stringify(response), + metadata: { ...response, repo, repoInfo }, + }; + }, +}; diff --git a/packages/web/src/features/tools/listCommits.txt b/packages/web/src/features/tools/listCommits.txt new file mode 100644 index 000000000..b82afe97a --- /dev/null +++ b/packages/web/src/features/tools/listCommits.txt @@ -0,0 +1 @@ +Lists commits in a repository with optional filtering by date range, author, and commit message. diff --git a/packages/web/src/features/tools/listRepos.ts b/packages/web/src/features/tools/listRepos.ts new file mode 100644 index 000000000..8958fcec2 --- /dev/null +++ b/packages/web/src/features/tools/listRepos.ts @@ -0,0 +1,73 @@ +import { z } from "zod"; +import { isServiceError } from "@/lib/utils"; +import { listRepos } from "@/app/api/(server)/repos/listReposApi"; +import { ToolDefinition } from "./types"; +import { logger } from "./logger"; +import description from './listRepos.txt'; + +const listReposShape = { + query: z.string().describe("Filter repositories by name (case-insensitive)").optional(), + page: z.number().int().positive().describe("Page number for pagination (min 1). Default: 1").optional().default(1), + perPage: z.number().int().positive().max(100).describe("Results per page for pagination (min 1, max 100). Default: 30").optional().default(30), + sort: z.enum(['name', 'pushed']).describe("Sort repositories by 'name' or 'pushed' (most recent commit). Default: 'name'").optional().default('name'), + direction: z.enum(['asc', 'desc']).describe("Sort direction: 'asc' or 'desc'. Default: 'asc'").optional().default('asc'), +}; + +export type ListRepo = { + name: string; + url: string | null; + pushedAt: string | null; + defaultBranch: string | null; + isFork: boolean; + isArchived: boolean; +}; + +export type ListReposMetadata = { + repos: ListRepo[]; + totalCount: number; +}; + +export const listReposDefinition: ToolDefinition< + 'list_repos', + typeof listReposShape, + ListReposMetadata +> = { + name: 'list_repos', + title: 'List repositories', + isReadOnly: true, + isIdempotent: true, + description, + inputSchema: z.object(listReposShape), + execute: async ({ page, perPage, sort, direction, query }, context) => { + logger.debug('list_repos', { page, perPage, sort, direction, query }); + const reposResponse = await listRepos({ + page, + perPage, + sort, + direction, + query, + source: context.source, + }); + + if (isServiceError(reposResponse)) { + throw new Error(reposResponse.message); + } + + const metadata: ListReposMetadata = { + repos: reposResponse.data.map((repo) => ({ + name: repo.repoName, + url: repo.webUrl ?? null, + pushedAt: repo.pushedAt?.toISOString() ?? null, + defaultBranch: repo.defaultBranch ?? null, + isFork: repo.isFork, + isArchived: repo.isArchived, + })), + totalCount: reposResponse.totalCount, + }; + + return { + output: JSON.stringify(metadata), + metadata, + }; + }, +}; diff --git a/packages/web/src/features/tools/listRepos.txt b/packages/web/src/features/tools/listRepos.txt new file mode 100644 index 000000000..343546d27 --- /dev/null +++ b/packages/web/src/features/tools/listRepos.txt @@ -0,0 +1 @@ +Lists repositories in the organization with optional filtering and pagination. diff --git a/packages/web/src/features/tools/listTree.ts b/packages/web/src/features/tools/listTree.ts new file mode 100644 index 000000000..08ae8f4b1 --- /dev/null +++ b/packages/web/src/features/tools/listTree.ts @@ -0,0 +1,195 @@ +import { z } from "zod"; +import { isServiceError } from "@/lib/utils"; +import { getTree } from "@/features/git"; +import { buildTreeNodeIndex, joinTreePath, normalizeTreePath, sortTreeEntries } from "@/features/mcp/utils"; +import { ToolDefinition } from "./types"; +import { logger } from "./logger"; +import description from "./listTree.txt"; +import { CodeHostType } from "@sourcebot/db"; +import { getRepoInfoByName } from "@/actions"; + +const DEFAULT_TREE_DEPTH = 1; +const MAX_TREE_DEPTH = 10; +const DEFAULT_MAX_TREE_ENTRIES = 1000; +const MAX_MAX_TREE_ENTRIES = 10000; + +const listTreeShape = { + repo: z.string().describe("The name of the repository to list files from."), + path: z.string().describe("Directory path (relative to repo root). If omitted, the repo root is used.").optional().default(''), + ref: z.string().describe("Commit SHA, branch or tag name to list files from. If not provided, uses the default branch.").optional().default('HEAD'), + depth: z.number().int().positive().max(MAX_TREE_DEPTH).describe(`How many directory levels to traverse below \`path\` (min 1, max ${MAX_TREE_DEPTH}, default ${DEFAULT_TREE_DEPTH}).`).optional().default(DEFAULT_TREE_DEPTH), + includeFiles: z.boolean().describe("Whether to include files in the output (default: true).").optional().default(true), + includeDirectories: z.boolean().describe("Whether to include directories in the output (default: true).").optional().default(true), + maxEntries: z.number().int().positive().max(MAX_MAX_TREE_ENTRIES).describe(`Maximum number of entries to return (min 1, max ${MAX_MAX_TREE_ENTRIES}, default ${DEFAULT_MAX_TREE_ENTRIES}).`).optional().default(DEFAULT_MAX_TREE_ENTRIES), +}; + +export type ListTreeEntry = { + type: 'tree' | 'blob'; + path: string; + name: string; + parentPath: string; + depth: number; +}; + +export type ListTreeRepoInfo = { + name: string; + displayName: string; + codeHostType: CodeHostType; +}; + +export type ListTreeMetadata = { + repo: string; + repoInfo: ListTreeRepoInfo; + ref: string; + path: string; + entries: ListTreeEntry[]; + totalReturned: number; + truncated: boolean; +}; + +export const listTreeDefinition: ToolDefinition<'list_tree', typeof listTreeShape, ListTreeMetadata> = { + name: 'list_tree', + title: 'List directory tree', + isReadOnly: true, + isIdempotent: true, + description, + inputSchema: z.object(listTreeShape), + execute: async ({ repo, path = '', ref = 'HEAD', depth = DEFAULT_TREE_DEPTH, includeFiles = true, includeDirectories = true, maxEntries = DEFAULT_MAX_TREE_ENTRIES }, context) => { + logger.debug('list_tree', { repo, path, ref, depth, includeFiles, includeDirectories, maxEntries }); + const normalizedPath = normalizeTreePath(path); + const normalizedDepth = Math.min(depth, MAX_TREE_DEPTH); + const normalizedMaxEntries = Math.min(maxEntries, MAX_MAX_TREE_ENTRIES); + + const repoInfoResult = await getRepoInfoByName(repo); + if (isServiceError(repoInfoResult) || !repoInfoResult) { + throw new Error(`Repository "${repo}" not found.`); + } + const repoInfo: ListTreeRepoInfo = { + name: repoInfoResult.name, + displayName: repoInfoResult.displayName ?? repoInfoResult.name, + codeHostType: repoInfoResult.codeHostType, + }; + + if (!includeFiles && !includeDirectories) { + const metadata: ListTreeMetadata = { + repo, + repoInfo, + ref, + path: normalizedPath, + entries: [], + totalReturned: 0, + truncated: false, + }; + return { output: 'No entries found', metadata }; + } + + const queue: Array<{ path: string; depth: number }> = [{ path: normalizedPath, depth: 0 }]; + const queuedPaths = new Set([normalizedPath]); + const seenEntries = new Set(); + const entries: ListTreeEntry[] = []; + let truncated = false; + + while (queue.length > 0 && !truncated) { + const currentDepth = queue[0]!.depth; + const currentLevelPaths: string[] = []; + + while (queue.length > 0 && queue[0]!.depth === currentDepth) { + currentLevelPaths.push(queue.shift()!.path); + } + + const treeResult = await getTree({ + repoName: repo, + revisionName: ref, + paths: currentLevelPaths.filter(Boolean), + }, { source: context.source }); + + if (isServiceError(treeResult)) { + throw new Error(treeResult.message); + } + + const treeNodeIndex = buildTreeNodeIndex(treeResult.tree); + + for (const currentPath of currentLevelPaths) { + const currentNode = currentPath === '' ? treeResult.tree : treeNodeIndex.get(currentPath); + if (!currentNode || currentNode.type !== 'tree') continue; + + for (const child of currentNode.children) { + if (child.type !== 'tree' && child.type !== 'blob') continue; + + const childPath = joinTreePath(currentPath, child.name); + const childDepth = currentDepth + 1; + + if (child.type === 'tree' && childDepth < normalizedDepth && !queuedPaths.has(childPath)) { + queue.push({ path: childPath, depth: childDepth }); + queuedPaths.add(childPath); + } + + if ((child.type === 'blob' && !includeFiles) || (child.type === 'tree' && !includeDirectories)) { + continue; + } + + const key = `${child.type}:${childPath}`; + if (seenEntries.has(key)) continue; + seenEntries.add(key); + + if (entries.length >= normalizedMaxEntries) { + truncated = true; + break; + } + + entries.push({ + type: child.type as 'tree' | 'blob', + path: childPath, + name: child.name, + parentPath: currentPath, + depth: childDepth, + }); + } + + if (truncated) break; + } + } + + const sortedEntries = sortTreeEntries(entries); + const metadata: ListTreeMetadata = { + repo, repoInfo, ref, path: normalizedPath, + entries: sortedEntries, + totalReturned: sortedEntries.length, + truncated, + }; + + const outputLines = [normalizedPath || '/']; + + const childrenByPath = new Map(); + for (const entry of sortedEntries) { + const siblings = childrenByPath.get(entry.parentPath) ?? []; + siblings.push(entry); + childrenByPath.set(entry.parentPath, siblings); + } + + function renderEntries(parentPath: string) { + const children = childrenByPath.get(parentPath) ?? []; + for (const entry of children) { + const indent = ' '.repeat(entry.depth); + const label = entry.type === 'tree' ? `${entry.name}/` : entry.name; + outputLines.push(`${indent}${label}`); + if (entry.type === 'tree') { + renderEntries(entry.path); + } + } + } + + renderEntries(normalizedPath); + + if (sortedEntries.length === 0) { + outputLines.push(' (no entries found)'); + } + + if (truncated) { + outputLines.push(''); + outputLines.push(`(truncated — showing first ${normalizedMaxEntries} entries)`); + } + + return { output: outputLines.join('\n'), metadata }; + }, +}; diff --git a/packages/web/src/features/tools/listTree.txt b/packages/web/src/features/tools/listTree.txt new file mode 100644 index 000000000..3737ddfd9 --- /dev/null +++ b/packages/web/src/features/tools/listTree.txt @@ -0,0 +1,9 @@ +Lists files and directories from a repository path. This can be used as a repo tree tool or directory listing tool. Returns a flat list of entries with path metadata and depth relative to the requested path. + +Usage: +- If the repository name is not known, use `list_repos` first to discover the correct name. +- Start with a shallow depth (default: 1) to get a high-level overview, then drill into specific subdirectories as needed. +- Use `path` to scope the listing to a subdirectory rather than fetching the entire tree at once. +- Set `includeFiles: false` to list only directories when you only need the directory structure. +- Set `includeDirectories: false` to list only files when you only need leaf nodes. +- Call this tool in parallel when you need to explore multiple directories simultaneously. diff --git a/packages/web/src/features/tools/logger.ts b/packages/web/src/features/tools/logger.ts new file mode 100644 index 000000000..2d1bb7dbe --- /dev/null +++ b/packages/web/src/features/tools/logger.ts @@ -0,0 +1,3 @@ +import { createLogger } from "@sourcebot/shared"; + +export const logger = createLogger('tool'); diff --git a/packages/web/src/features/tools/readFile.ts b/packages/web/src/features/tools/readFile.ts new file mode 100644 index 000000000..0119d59aa --- /dev/null +++ b/packages/web/src/features/tools/readFile.ts @@ -0,0 +1,132 @@ +import { z } from "zod"; +import { isServiceError } from "@/lib/utils"; +import { getFileSource } from "@/features/git"; +import { ToolDefinition } from "./types"; +import { logger } from "./logger"; +import description from "./readFile.txt"; +import { CodeHostType } from "@sourcebot/db"; +import { getRepoInfoByName } from "@/actions"; + +// NOTE: if you change these values, update readFile.txt to match. +const READ_FILES_MAX_LINES = 500; +const MAX_LINE_LENGTH = 2000; +const MAX_LINE_SUFFIX = `... (line truncated to ${MAX_LINE_LENGTH} chars)`; +const MAX_BYTES = 5 * 1024; +const MAX_BYTES_LABEL = `${MAX_BYTES / 1024}KB`; + +const readFileShape = { + path: z.string().describe("The path to the file"), + repo: z.string().describe("The repository to read the file from"), + offset: z.number().int().positive() + .optional() + .describe("Line number to start reading from (1-indexed). Omit to start from the beginning."), + limit: z.number().int().positive() + .optional() + .describe(`Maximum number of lines to read (max: ${READ_FILES_MAX_LINES}). Omit to read up to ${READ_FILES_MAX_LINES} lines.`), +}; + +export type ReadFileRepoInfo = { + name: string; + displayName: string; + codeHostType: CodeHostType; +}; + +export type ReadFileMetadata = { + path: string; + repo: string; + repoInfo: ReadFileRepoInfo; + language: string; + startLine: number; + endLine: number; + isTruncated: boolean; + revision: string; +}; + +export const readFileDefinition: ToolDefinition<"read_file", typeof readFileShape, ReadFileMetadata> = { + name: "read_file", + title: "Read file", + isReadOnly: true, + isIdempotent: true, + description, + inputSchema: z.object(readFileShape), + execute: async ({ path, repo, offset, limit }, context) => { + logger.debug('read_file', { path, repo, offset, limit }); + // @todo: make revision configurable. + const revision = "HEAD"; + + const fileSource = await getFileSource({ + path, + repo, + ref: revision, + }, { source: context.source }); + + if (isServiceError(fileSource)) { + throw new Error(fileSource.message); + } + + const lines = fileSource.source.split('\n'); + const start = (offset ?? 1) - 1; + const end = start + Math.min(limit ?? READ_FILES_MAX_LINES, READ_FILES_MAX_LINES); + + let bytes = 0; + let truncatedByBytes = false; + const slicedLines: string[] = []; + for (const raw of lines.slice(start, end)) { + const line = raw.length > MAX_LINE_LENGTH ? raw.substring(0, MAX_LINE_LENGTH) + MAX_LINE_SUFFIX : raw; + const size = Buffer.byteLength(line, 'utf-8') + (slicedLines.length > 0 ? 1 : 0); + if (bytes + size > MAX_BYTES) { + truncatedByBytes = true; + break; + } + slicedLines.push(line); + bytes += size; + } + + const truncatedByLines = end < lines.length; + const startLine = (offset ?? 1); + const lastReadLine = startLine + slicedLines.length - 1; + const nextOffset = lastReadLine + 1; + + let output = [ + `${fileSource.repo}`, + `${fileSource.path}`, + `${fileSource.externalWebUrl}`, + '\n' + ].join('\n'); + + output += slicedLines.map((line, i) => `${startLine + i}: ${line}`).join('\n'); + + if (truncatedByBytes) { + output += `\n\n(Output capped at ${MAX_BYTES_LABEL}. Showing lines ${startLine}-${lastReadLine} of ${lines.length}. Use offset=${nextOffset} to continue.)`; + } else if (truncatedByLines) { + output += `\n\n(Showing lines ${startLine}-${lastReadLine} of ${lines.length}. Use offset=${nextOffset} to continue.)`; + } else { + output += `\n\n(End of file - ${lines.length} lines total)`; + } + + output += `\n`; + + const repoInfoResult = await getRepoInfoByName(fileSource.repo); + if (isServiceError(repoInfoResult) || !repoInfoResult) { + throw new Error(`Repository "${fileSource.repo}" not found.`); + } + const repoInfo: ReadFileRepoInfo = { + name: repoInfoResult.name, + displayName: repoInfoResult.displayName ?? repoInfoResult.name, + codeHostType: repoInfoResult.codeHostType, + }; + + const metadata: ReadFileMetadata = { + path: fileSource.path, + repo: fileSource.repo, + repoInfo, + language: fileSource.language, + startLine, + endLine: lastReadLine, + isTruncated: truncatedByBytes || truncatedByLines, + revision, + }; + + return { output, metadata }; + }, +}; diff --git a/packages/web/src/features/tools/readFile.txt b/packages/web/src/features/tools/readFile.txt new file mode 100644 index 000000000..9e1590bb6 --- /dev/null +++ b/packages/web/src/features/tools/readFile.txt @@ -0,0 +1,9 @@ +Read the contents of a file in a repository. + +Usage: +- Use offset/limit to read a specific portion of a file, which is strongly preferred for large files when only a specific section is needed. +- Maximum 500 lines per call. Output is also capped at 5KB — if the cap is hit, call again with a larger offset to continue reading. +- Any line longer than 2000 characters is truncated. +- The response content includes the line range read and total line count. If the output was truncated, the next offset to continue reading is also included. +- Call this tool in parallel when you need to read multiple files simultaneously. +- Avoid tiny repeated slices. If you need more context, read a larger window. diff --git a/packages/web/src/features/tools/types.ts b/packages/web/src/features/tools/types.ts new file mode 100644 index 000000000..437f17b01 --- /dev/null +++ b/packages/web/src/features/tools/types.ts @@ -0,0 +1,25 @@ +import { z } from "zod"; + +export interface ToolContext { + source?: string; + selectedRepos?: string[]; +} + +export interface ToolDefinition< + TName extends string, + TShape extends z.ZodRawShape, + TMetadata = Record, +> { + name: TName; + title: string; + description: string; + inputSchema: z.ZodObject; + isReadOnly: boolean; + isIdempotent: boolean; + execute: (input: z.infer>, context: ToolContext) => Promise>; +} + +export interface ToolResult> { + output: string; + metadata: TMetadata; +} diff --git a/packages/web/src/lib/utils.ts b/packages/web/src/lib/utils.ts index dd7f783e5..d61832326 100644 --- a/packages/web/src/lib/utils.ts +++ b/packages/web/src/lib/utils.ts @@ -507,13 +507,13 @@ export const getFormattedDate = (date: Date) => { /** * Converts a number to a string */ -export const getShortenedNumberDisplayString = (number: number) => { +export const getShortenedNumberDisplayString = (number: number, fractionDigits: number = 1) => { if (number < 1000) { return number.toString(); } else if (number < 1000000) { - return `${(number / 1000).toFixed(1)}k`; + return `${(number / 1000).toFixed(fractionDigits)}k`; } else { - return `${(number / 1000000).toFixed(1)}m`; + return `${(number / 1000000).toFixed(fractionDigits)}m`; } } diff --git a/packages/web/tools/globToRegexpPlayground.ts b/packages/web/tools/globToRegexpPlayground.ts new file mode 100644 index 000000000..fc915b55b --- /dev/null +++ b/packages/web/tools/globToRegexpPlayground.ts @@ -0,0 +1,111 @@ +import globToRegexp from 'glob-to-regexp'; +import escapeStringRegexp from 'escape-string-regexp'; + +// ------------------------------------------------------- +// Playground for building Sourcebot/zoekt search queries +// from grep-style (pattern, path, include) inputs. +// +// Run with: yarn workspace @sourcebot/web tsx tools/globToRegexpPlayground.ts +// ------------------------------------------------------- + +interface SearchInput { + pattern: string; // content search term or regex + path?: string; // directory prefix, e.g. "packages/web/src" + include?: string; // glob for filenames, e.g. "*.ts" or "**/*.{ts,tsx}" +} + +function globToFileRegexp(glob: string): string { + const re = globToRegexp(glob, { extended: true, globstar: true }); + // Strip ^ anchor — Sourcebot file paths include the full repo-relative path, + // so the pattern shouldn't be anchored to the start. + return re.source.replace(/^\^/, ''); +} + +function buildRipgrepCommand({ pattern, path, include }: SearchInput): string { + const parts = ['rg', `"${pattern.replace(/"/g, '\\"')}"`]; + if (path) parts.push(path); + if (include) parts.push(`--glob "${include}"`); + return parts.join(' '); +} + +function buildZoektQuery({ pattern, path, include }: SearchInput): string { + const parts: string[] = [`"${pattern.replace(/"/g, '\\"')}"`]; + + if (path) { + parts.push(`file:${escapeStringRegexp(path)}`); + } + + if (include) { + parts.push(`file:${globToFileRegexp(include)}`); + } + + return parts.join(' '); +} + +// ------------------------------------------------------- +// Examples +// ------------------------------------------------------- + +const examples: SearchInput[] = [ + // Broad content search, no file scoping + { pattern: 'isServiceError' }, + + // Scoped to a directory + { pattern: 'isServiceError', path: 'packages/web/src' }, + + // Scoped to a file type + { pattern: 'isServiceError', include: '*.ts' }, + + // Scoped to both + { pattern: 'isServiceError', path: 'packages/web/src', include: '*.ts' }, + + // Multiple extensions via glob + { pattern: 'useQuery', include: '**/*.{ts,tsx}' }, + + // Test files only + { pattern: 'expect\\(', include: '*.test.ts' }, + + // Specific subdirectory + extension + { pattern: 'withAuthV2', path: 'packages/web/src/app', include: '**/*.ts' }, + + // Next.js route group — parens in path are regex special chars + { pattern: 'withAuthV2', path: 'packages/web/src/app/api/(server)', include: '**/*.ts' }, + + // Next.js dynamic segment — brackets in path are regex special chars + { pattern: 'withOptionalAuthV2', path: 'packages/web/src/app/[domain]', include: '**/*.ts' }, + + // Pattern with spaces — must be quoted in zoekt query + { pattern: 'Starting scheduler', include: '**/*.ts' }, + + // Literal phrase in a txt file + { pattern: String.raw`"hello world"`, include: '**/*.txt' }, + + // Pattern with a quote character + { pattern: 'from "@/lib', include: '**/*.ts' }, + + // Pattern with a backslash — needs double-escaping in zoekt quoted terms + { pattern: String.raw`C:\\\\Windows\\\\System32`, include: '**/*.ts' }, +]; + +function truncate(str: string, width: number): string { + return str.length > width ? str.slice(0, width - 3) + '...' : str.padEnd(width); +} + +const col1 = 70; +const col2 = 75; +console.log(truncate('input', col1) + truncate('ripgrep', col2) + 'zoekt query'); +console.log('-'.repeat(col1 + col2 + 50)); + +function prettyPrint(example: SearchInput): string { + const fields = Object.entries(example) + .map(([k, v]) => `${k}: '${v}'`) + .join(', '); + return `{ ${fields} }`; +} + +for (const example of examples) { + const input = prettyPrint(example); + const rg = buildRipgrepCommand(example); + const zoekt = buildZoektQuery(example); + console.log(truncate(input, col1) + rg.padEnd(col2) + zoekt); +} diff --git a/packages/web/types.d.ts b/packages/web/types.d.ts new file mode 100644 index 000000000..bceb5175d --- /dev/null +++ b/packages/web/types.d.ts @@ -0,0 +1,4 @@ +declare module '*.txt' { + const content: string; + export default content; +} diff --git a/yarn.lock b/yarn.lock index 4a2dfdb3f..28671cdae 100644 --- a/yarn.lock +++ b/yarn.lock @@ -8949,6 +8949,7 @@ __metadata: "@tanstack/react-virtual": "npm:^3.10.8" "@testing-library/dom": "npm:^10.4.1" "@testing-library/react": "npm:^16.3.0" + "@types/glob-to-regexp": "npm:^0.4.4" "@types/micromatch": "npm:^4.0.9" "@types/node": "npm:^20" "@types/nodemailer": "npm:^6.4.17" @@ -8998,6 +8999,7 @@ __metadata: eslint-plugin-react-hooks: "npm:^7.0.1" fast-deep-equal: "npm:^3.1.3" fuse.js: "npm:^7.0.0" + glob-to-regexp: "npm:^0.4.1" google-auth-library: "npm:^10.1.0" graphql: "npm:^16.9.0" http-status-codes: "npm:^2.3.0" @@ -9022,6 +9024,7 @@ __metadata: posthog-node: "npm:^5.24.15" pretty-bytes: "npm:^6.1.1" psl: "npm:^1.15.0" + raw-loader: "npm:^4.0.2" react: "npm:19.2.4" react-device-detect: "npm:^2.2.3" react-dom: "npm:19.2.4" @@ -9052,6 +9055,7 @@ __metadata: tsx: "npm:^4.19.2" typescript: "npm:^5" typescript-eslint: "npm:^8.56.1" + use-stick-to-bottom: "npm:^1.1.3" usehooks-ts: "npm:^3.1.0" vite-tsconfig-paths: "npm:^5.1.3" vitest: "npm:^2.1.5" @@ -9462,6 +9466,13 @@ __metadata: languageName: node linkType: hard +"@types/glob-to-regexp@npm:^0.4.4": + version: 0.4.4 + resolution: "@types/glob-to-regexp@npm:0.4.4" + checksum: 10c0/7288ff853850d8302a8770a3698b187fc3970ad12ee6427f0b3758a3e7a0ebb0bd993abc6ebaaa979d09695b4194157d2bfaa7601b0fb9ed72c688b4c1298b88 + languageName: node + linkType: hard + "@types/hast@npm:^3.0.0, @types/hast@npm:^3.0.4": version: 3.0.4 resolution: "@types/hast@npm:3.0.4" @@ -9485,7 +9496,7 @@ __metadata: languageName: node linkType: hard -"@types/json-schema@npm:^7.0.15": +"@types/json-schema@npm:^7.0.15, @types/json-schema@npm:^7.0.8": version: 7.0.15 resolution: "@types/json-schema@npm:7.0.15" checksum: 10c0/a996a745e6c5d60292f36731dd41341339d4eeed8180bb09226e5c8d23759067692b1d88e5d91d72ee83dfc00d3aca8e7bd43ea120516c17922cbcb7c3e252db @@ -10528,6 +10539,15 @@ __metadata: languageName: node linkType: hard +"ajv-keywords@npm:^3.5.2": + version: 3.5.2 + resolution: "ajv-keywords@npm:3.5.2" + peerDependencies: + ajv: ^6.9.1 + checksum: 10c0/0c57a47cbd656e8cdfd99d7c2264de5868918ffa207c8d7a72a7f63379d4333254b2ba03d69e3c035e996a3fd3eb6d5725d7a1597cca10694296e32510546360 + languageName: node + linkType: hard + "ajv@npm:^6.12.4": version: 6.12.6 resolution: "ajv@npm:6.12.6" @@ -10540,7 +10560,7 @@ __metadata: languageName: node linkType: hard -"ajv@npm:^6.14.0": +"ajv@npm:^6.12.5, ajv@npm:^6.14.0": version: 6.14.0 resolution: "ajv@npm:6.14.0" dependencies: @@ -10953,6 +10973,13 @@ __metadata: languageName: node linkType: hard +"big.js@npm:^5.2.2": + version: 5.2.2 + resolution: "big.js@npm:5.2.2" + checksum: 10c0/230520f1ff920b2d2ce3e372d77a33faa4fa60d802fe01ca4ffbc321ee06023fe9a741ac02793ee778040a16b7e497f7d60c504d1c402b8fdab6f03bb785a25f + languageName: node + linkType: hard + "bignumber.js@npm:^9.0.0": version: 9.3.0 resolution: "bignumber.js@npm:9.3.0" @@ -12722,6 +12749,13 @@ __metadata: languageName: node linkType: hard +"emojis-list@npm:^3.0.0": + version: 3.0.0 + resolution: "emojis-list@npm:3.0.0" + checksum: 10c0/7dc4394b7b910444910ad64b812392159a21e1a7ecc637c775a440227dcb4f80eff7fe61f4453a7d7603fa23d23d30cc93fe9e4b5ed985b88d6441cd4a35117b + languageName: node + linkType: hard + "enabled@npm:2.0.x": version: 2.0.0 resolution: "enabled@npm:2.0.0" @@ -14572,6 +14606,13 @@ __metadata: languageName: node linkType: hard +"glob-to-regexp@npm:^0.4.1": + version: 0.4.1 + resolution: "glob-to-regexp@npm:0.4.1" + checksum: 10c0/0486925072d7a916f052842772b61c3e86247f0a80cc0deb9b5a3e8a1a9faad5b04fb6f58986a09f34d3e96cd2a22a24b7e9882fb1cf904c31e9a310de96c429 + languageName: node + linkType: hard + "glob@npm:^10.5.0": version: 10.5.0 resolution: "glob@npm:10.5.0" @@ -16031,7 +16072,7 @@ __metadata: languageName: node linkType: hard -"json5@npm:^2.2.1, json5@npm:^2.2.2, json5@npm:^2.2.3": +"json5@npm:^2.1.2, json5@npm:^2.2.1, json5@npm:^2.2.2, json5@npm:^2.2.3": version: 2.2.3 resolution: "json5@npm:2.2.3" bin: @@ -16280,6 +16321,17 @@ __metadata: languageName: node linkType: hard +"loader-utils@npm:^2.0.0": + version: 2.0.4 + resolution: "loader-utils@npm:2.0.4" + dependencies: + big.js: "npm:^5.2.2" + emojis-list: "npm:^3.0.0" + json5: "npm:^2.1.2" + checksum: 10c0/d5654a77f9d339ec2a03d88221a5a695f337bf71eb8dea031b3223420bb818964ba8ed0069145c19b095f6c8b8fd386e602a3fc7ca987042bd8bb1dcc90d7100 + languageName: node + linkType: hard + "locate-path@npm:^6.0.0": version: 6.0.0 resolution: "locate-path@npm:6.0.0" @@ -19124,6 +19176,18 @@ __metadata: languageName: node linkType: hard +"raw-loader@npm:^4.0.2": + version: 4.0.2 + resolution: "raw-loader@npm:4.0.2" + dependencies: + loader-utils: "npm:^2.0.0" + schema-utils: "npm:^3.0.0" + peerDependencies: + webpack: ^4.0.0 || ^5.0.0 + checksum: 10c0/981ebe65e1cee7230300d21ba6dcd8bd23ea81ef4ad2b167c0f62d93deba347f27921d330be848634baab3831cf9f38900af6082d6416c2e937fe612fa6a74ff + languageName: node + linkType: hard + "react-device-detect@npm:^2.2.3": version: 2.2.3 resolution: "react-device-detect@npm:2.2.3" @@ -20032,6 +20096,17 @@ __metadata: languageName: node linkType: hard +"schema-utils@npm:^3.0.0": + version: 3.3.0 + resolution: "schema-utils@npm:3.3.0" + dependencies: + "@types/json-schema": "npm:^7.0.8" + ajv: "npm:^6.12.5" + ajv-keywords: "npm:^3.5.2" + checksum: 10c0/fafdbde91ad8aa1316bc543d4b61e65ea86970aebbfb750bfb6d8a6c287a23e415e0e926c2498696b242f63af1aab8e585252637fabe811fd37b604351da6500 + languageName: node + linkType: hard + "scroll-into-view-if-needed@npm:^3.1.0": version: 3.1.0 resolution: "scroll-into-view-if-needed@npm:3.1.0" @@ -22121,6 +22196,15 @@ __metadata: languageName: node linkType: hard +"use-stick-to-bottom@npm:^1.1.3": + version: 1.1.3 + resolution: "use-stick-to-bottom@npm:1.1.3" + peerDependencies: + react: ^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 + checksum: 10c0/60408d47b4ddac959a8f170fe9806cdea94fd8a51d3b58cbcce6246ef9babde56d1a5f1a14cb12f474d351430145464992aaac008178924c60191b6c61954bf7 + languageName: node + linkType: hard + "use-sync-external-store@npm:^1.4.0": version: 1.5.0 resolution: "use-sync-external-store@npm:1.5.0"