These are the release notes.
+diff --git a/src/fetch/src/mcp_server_fetch/server.py b/src/fetch/src/mcp_server_fetch/server.py index b42c7b1f6b..dc75abd4fc 100644 --- a/src/fetch/src/mcp_server_fetch/server.py +++ b/src/fetch/src/mcp_server_fetch/server.py @@ -41,7 +41,15 @@ def extract_content_from_html(html: str) -> str: content = markdownify.markdownify( ret["content"], heading_style=markdownify.ATX, - ) + ).lstrip() + title = ret.get("title") + if title: + title_markdown = f"# {title.strip()}" + # Readability often omits the document title when the article body does + # not repeat it. Include it once so callers can identify fetched pages, + # while avoiding duplicate headings when the title is already present. + if title_markdown.casefold() not in content[: len(title_markdown) + 32].casefold(): + content = f"{title_markdown}\n\n{content}" return content diff --git a/src/fetch/tests/test_server.py b/src/fetch/tests/test_server.py index 96c1cb38c7..f6dd0a880a 100644 --- a/src/fetch/tests/test_server.py +++ b/src/fetch/tests/test_server.py @@ -67,6 +67,43 @@ def test_simple_html(self): # readabilipy may extract different parts depending on the content assert "test paragraph" in result + def test_html_includes_document_title_when_omitted_from_article(self): + """Test that simplified markdown preserves the page title.""" + html = """ + +
These are the release notes.
+Article body.
+