diff --git a/examples/wiki-explorer-server/server.test.ts b/examples/wiki-explorer-server/server.test.ts new file mode 100644 index 00000000..005ec026 --- /dev/null +++ b/examples/wiki-explorer-server/server.test.ts @@ -0,0 +1,110 @@ +import { describe, it, expect, beforeEach, afterEach, spyOn } from "bun:test"; +import { Client } from "@modelcontextprotocol/sdk/client/index.js"; +import { InMemoryTransport } from "@modelcontextprotocol/sdk/inMemory.js"; +import { createServer } from "./server"; + +function firstText(r: Awaited>): string { + return (r.content as Array<{ type: string; text: string }>)[0].text; +} + +describe("wiki-explorer URL validation", () => { + let server: ReturnType; + let client: Client; + + beforeEach(async () => { + server = createServer(); + client = new Client({ name: "test", version: "1" }); + const [ct, st] = InMemoryTransport.createLinkedPair(); + await Promise.all([server.connect(st), client.connect(ct)]); + }); + + afterEach(async () => { + await client.close(); + await server.close(); + }); + + it("rejects non-Wikipedia URLs", async () => { + const r = await client.callTool({ + name: "get-first-degree-links", + arguments: { url: "https://evil.com/wiki/Test" }, + }); + const result = JSON.parse(firstText(r)); + expect(result.error).toBe("Not a valid Wikipedia URL"); + }); + + it("rejects path traversal that escapes /wiki/", async () => { + // This URL passes the old regex but resolves to /w/api.php (outside /wiki/) + const r = await client.callTool({ + name: "get-first-degree-links", + arguments: { + url: "https://en.wikipedia.org/wiki/../../w/api.php?action=query&list=allusers", + }, + }); + const result = JSON.parse(firstText(r)); + expect(result.error).toBe("Not a valid Wikipedia URL"); + }); + + it("rejects path traversal to API endpoints", async () => { + const r = await client.callTool({ + name: "get-first-degree-links", + arguments: { + url: "https://en.wikipedia.org/wiki/../../../api/rest_v1/feed/featured/2024/01/01", + }, + }); + const result = JSON.parse(firstText(r)); + expect(result.error).toBe("Not a valid Wikipedia URL"); + }); + + it("accepts valid Wikipedia URLs", async () => { + // Mock fetch to avoid real network requests + const mockFetch = spyOn(globalThis, "fetch").mockResolvedValueOnce( + new Response("Test", { + status: 200, + headers: { "Content-Type": "text/html" }, + }), + ); + + try { + const r = await client.callTool({ + name: "get-first-degree-links", + arguments: { + url: "https://en.wikipedia.org/wiki/Model_Context_Protocol", + }, + }); + const result = JSON.parse(firstText(r)); + expect(result.error).toBeNull(); + expect(result.page.url).toBe( + "https://en.wikipedia.org/wiki/Model_Context_Protocol", + ); + } finally { + mockFetch.mockRestore(); + } + }); + + it("disables redirect following on fetch", async () => { + // Ensure fetch is called with redirect: 'error' or 'manual' to prevent + // following redirects to non-Wikipedia domains + const mockFetch = spyOn(globalThis, "fetch").mockResolvedValueOnce( + new Response("", { + status: 200, + headers: { "Content-Type": "text/html" }, + }), + ); + + try { + await client.callTool({ + name: "get-first-degree-links", + arguments: { + url: "https://en.wikipedia.org/wiki/Test_Page", + }, + }); + expect(mockFetch).toHaveBeenCalledTimes(1); + const fetchArgs = mockFetch.mock.calls[0]; + // Second argument should have redirect: "error" + expect(fetchArgs[1]).toBeDefined(); + expect((fetchArgs[1] as RequestInit).redirect).toBe("error"); + } finally { + mockFetch.mockRestore(); + } + }); +}); diff --git a/examples/wiki-explorer-server/server.ts b/examples/wiki-explorer-server/server.ts index 59d5e18e..40bddde1 100644 --- a/examples/wiki-explorer-server/server.ts +++ b/examples/wiki-explorer-server/server.ts @@ -31,6 +31,37 @@ function extractTitleFromUrl(url: string): string { } } +/** + * Validate that a URL points to a Wikipedia wiki page. + * Uses parsed URL components (not raw string matching) to prevent + * path-traversal bypasses such as `/wiki/../../w/api.php`. + */ +function isValidWikipediaUrl(url: string): boolean { + let parsed: URL; + try { + parsed = new URL(url); + } catch { + return false; + } + + // Protocol must be HTTPS (or HTTP for dev, matching prior behaviour) + if (parsed.protocol !== "https:" && parsed.protocol !== "http:") { + return false; + } + + // Hostname must be .wikipedia.org — language codes are lowercase ASCII + if (!/^[a-z]+\.wikipedia\.org$/.test(parsed.hostname)) { + return false; + } + + // After URL resolution, the pathname must still start with /wiki/ + if (!parsed.pathname.startsWith("/wiki/")) { + return false; + } + + return true; +} + // Wikipedia namespace prefixes to exclude from link extraction const EXCLUDED_PREFIXES = [ "Wikipedia:", @@ -113,13 +144,13 @@ export function createServer(): McpServer { let title = url; try { - if (!url.match(/^https?:\/\/[a-z]+\.wikipedia\.org\/wiki\//)) { + if (!isValidWikipediaUrl(url)) { throw new Error("Not a valid Wikipedia URL"); } title = extractTitleFromUrl(url); - const response = await fetch(url); + const response = await fetch(url, { redirect: "error" }); if (!response.ok) { throw new Error(