diff --git a/python/semantic_kernel/contents/chat_history.py b/python/semantic_kernel/contents/chat_history.py index e5daec19fef5..10ac1a6b1d39 100644 --- a/python/semantic_kernel/contents/chat_history.py +++ b/python/semantic_kernel/contents/chat_history.py @@ -345,16 +345,45 @@ def from_rendered_prompt(cls: type[_T], rendered_prompt: str) -> _T: except ParseError as exc: logger.info(f"Could not parse prompt {prompt} as xml, treating as text, error was: {exc}") return cls(messages=[ChatMessageContent(role=AuthorRole.USER, content=unescape(prompt))]) - if xml_prompt.text and xml_prompt.text.strip(): - messages.append(ChatMessageContent(role=AuthorRole.SYSTEM, content=unescape(xml_prompt.text.strip()))) + # Accumulate text content that should be combined into a single message. + # This handles HTML-like tags (e.g.,

,

) that are valid XML but not + # recognized as chat message tags — their content should be preserved as text. + pending_text_parts: list[str] = [] + if xml_prompt.text: + pending_text_parts.append(xml_prompt.text) + + def flush_pending_text(role: AuthorRole = AuthorRole.SYSTEM) -> None: + """Flush accumulated text as a chat message if non-empty.""" + if pending_text_parts: + combined = "".join(pending_text_parts).strip() + if combined: + messages.append(ChatMessageContent(role=role, content=unescape(combined))) + pending_text_parts.clear() + for item in xml_prompt: if item.tag == CHAT_MESSAGE_CONTENT_TAG: + # Flush any pending text before a structured message + flush_pending_text() messages.append(ChatMessageContent.from_element(item)) + # Tail text after a recognized message element is treated as USER content + if item.tail and item.tail.strip(): + messages.append(ChatMessageContent(role=AuthorRole.USER, content=unescape(item.tail.strip()))) elif item.tag == CHAT_HISTORY_TAG: + flush_pending_text() for message in item: messages.append(ChatMessageContent.from_element(message)) - if item.tail and item.tail.strip(): - messages.append(ChatMessageContent(role=AuthorRole.USER, content=unescape(item.tail.strip()))) + # Tail text after a recognized history element is treated as USER content + if item.tail and item.tail.strip(): + messages.append(ChatMessageContent(role=AuthorRole.USER, content=unescape(item.tail.strip()))) + else: + # Unrecognized element (e.g.,

, ,

) — serialize it back + # to XML so the original content is preserved in the prompt text. + # This fixes #13632 where HTML tags caused content to be silently dropped. + # Note: tostring() includes the element's tail, so we don't add it separately. + pending_text_parts.append(tostring(item, encoding="unicode")) + + # Flush any remaining text (as USER if messages exist, SYSTEM otherwise) + flush_pending_text(role=AuthorRole.USER if messages else AuthorRole.SYSTEM) if len(messages) == 1 and messages[0].role == AuthorRole.SYSTEM: messages[0].role = AuthorRole.USER return cls(messages=messages) diff --git a/python/tests/unit/contents/test_chat_history.py b/python/tests/unit/contents/test_chat_history.py index ac41949a7325..b993116ec4f6 100644 --- a/python/tests/unit/contents/test_chat_history.py +++ b/python/tests/unit/contents/test_chat_history.py @@ -592,6 +592,48 @@ async def test_template_empty_history(chat_history: ChatHistory): assert chat_history_2.messages[1].role == AuthorRole.USER +def test_chat_history_from_rendered_prompt_with_html_tags(): + """Regression test for #13632: HTML tags in prompts caused content to be dropped. + + When the prompt contains valid XML tags like

,

, etc., the XML parser + was treating them as elements and silently discarding their content because + they weren't recognized as chat message tags. The fix serializes unrecognized + elements back to their string representation. + """ + # Prompt with HTML-like tags + prompt_with_html = 'Translate this: "

What is your name?

"' + # Same prompt without HTML tags + prompt_without_html = 'Translate this: "What is your name?"' + + history_with_html = ChatHistory.from_rendered_prompt(prompt_with_html) + history_without_html = ChatHistory.from_rendered_prompt(prompt_without_html) + + # Both should produce a single user message + assert len(history_with_html.messages) == 1 + assert len(history_without_html.messages) == 1 + + # Both should contain the question text + assert "What is your name?" in history_with_html.messages[0].content + assert "What is your name?" in history_without_html.messages[0].content + + # The HTML version should preserve the

tags + assert "

" in history_with_html.messages[0].content + assert "

" in history_with_html.messages[0].content + + +def test_chat_history_from_rendered_prompt_with_nested_html(): + """Test that nested HTML-like tags are preserved.""" + prompt = "Format this:

Hello

World

" + + history = ChatHistory.from_rendered_prompt(prompt) + + assert len(history.messages) == 1 + assert "Hello" in history.messages[0].content + assert "World" in history.messages[0].content + assert "
" in history.messages[0].content + assert "

" in history.messages[0].content + + def test_to_from_file(chat_history: ChatHistory, tmp_path): chat_history.add_system_message("You are an AI assistant") chat_history.add_user_message("What is the weather in Seattle?")