Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions sdk/evaluation/azure-ai-evaluation/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@

- Added support for evaluator `properties` passthrough in AOAI evaluation results. When an evaluator returns a `properties` dict, it is included alongside `score`, `label`, `reason`, `threshold`, and `passed` in the result object.

### Bugs Fixed

- Fixed unsandboxed Jinja2 template rendering in `_legacy/prompty/_utils.py` and `simulator/_conversation/__init__.py` that allowed Server-Side Template Injection (SSTI) leading to potential Remote Code Execution. Templates now use `jinja2.sandbox.SandboxedEnvironment` by default, matching the PromptFlow sandboxing pattern. Set `PF_USE_SANDBOX_FOR_JINJA=false` to opt out (not recommended).

## 1.16.3 (2026-04-01)

### Features Added
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
)

from jinja2 import Template
from jinja2.sandbox import SandboxedEnvironment
from openai import AsyncStream
from openai.types.chat import ChatCompletion, ChatCompletionChunk, ChatCompletionUserMessageParam
from openai import APIConnectionError, APIStatusError, APITimeoutError, OpenAIError
Expand Down Expand Up @@ -244,8 +245,16 @@ def update_dict_recursively(origin_dict: Mapping[str, Any], overwrite_dict: Mapp

def render_jinja_template(template_str: str, *, trim_blocks=True, keep_trailing_newline=True, **kwargs) -> str:
try:
template = Template(template_str, trim_blocks=trim_blocks, keep_trailing_newline=keep_trailing_newline)
return template.render(**kwargs)
use_sandbox_env = os.environ.get("PF_USE_SANDBOX_FOR_JINJA", "true")
if use_sandbox_env.lower() == "false":
template = Template(template_str, trim_blocks=trim_blocks, keep_trailing_newline=keep_trailing_newline)
return template.render(**kwargs)
else:
sandbox_env = SandboxedEnvironment(
trim_blocks=trim_blocks, keep_trailing_newline=keep_trailing_newline
)
sanitized_template = sandbox_env.from_string(template_str)
return sanitized_template.render(**kwargs)
Comment on lines +256 to +257
Copy link

Copilot AI Apr 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Variable name "sanitized_template" is misleading here: SandboxedEnvironment.from_string() returns a compiled template but does not sanitize the template content. Renaming to something like "sandboxed_template"/"template" would better reflect what the value represents and avoid confusion when reading security-related code.

Suggested change
sanitized_template = sandbox_env.from_string(template_str)
return sanitized_template.render(**kwargs)
sandboxed_template = sandbox_env.from_string(template_str)
return sandboxed_template.render(**kwargs)

Copilot uses AI. Check for mistakes.
Comment on lines 246 to +257
Copy link

Copilot AI Apr 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider adding a unit/regression test that verifies render_jinja_template uses SandboxedEnvironment by default and blocks a representative SSTI payload with jinja2.exceptions.SecurityError. Given this is a security fix, a test would help catch future regressions (including the PF_USE_SANDBOX_FOR_JINJA opt-out behavior).

Copilot uses AI. Check for mistakes.
except Exception as e: # pylint: disable=broad-except
raise PromptyException(f"Failed to render jinja template - {type(e).__name__}: {str(e)}") from e

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@

import copy
import logging
import os
import time
from dataclasses import dataclass
from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast
import base64
import re
import jinja2
from jinja2.sandbox import SandboxedEnvironment

from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
from azure.ai.evaluation._http_utils import AsyncHttpPipeline
Expand All @@ -20,6 +22,18 @@
from .constants import ConversationRole


def _create_jinja_template(template_content: str) -> jinja2.Template:
"""Create a Jinja2 template, using SandboxedEnvironment by default to prevent SSTI attacks.

Set env var PF_USE_SANDBOX_FOR_JINJA=false to opt out (not recommended).
"""
use_sandbox = os.environ.get("PF_USE_SANDBOX_FOR_JINJA", "true")
if use_sandbox.lower() == "false":
Comment on lines +25 to +31
Copy link

Copilot AI Apr 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PF_USE_SANDBOX_FOR_JINJA parsing/behavior toggle is now duplicated here and in _legacy/prompty/_utils.py. To reduce the chance of the two implementations drifting (default value, accepted falsey values, StrictUndefined config), consider centralizing this into a shared helper or constant in a common module.

Suggested change
def _create_jinja_template(template_content: str) -> jinja2.Template:
"""Create a Jinja2 template, using SandboxedEnvironment by default to prevent SSTI attacks.
Set env var PF_USE_SANDBOX_FOR_JINJA=false to opt out (not recommended).
"""
use_sandbox = os.environ.get("PF_USE_SANDBOX_FOR_JINJA", "true")
if use_sandbox.lower() == "false":
_PF_USE_SANDBOX_FOR_JINJA_ENV_VAR = "PF_USE_SANDBOX_FOR_JINJA"
_PF_USE_SANDBOX_FOR_JINJA_DEFAULT = "true"
_PF_USE_SANDBOX_FOR_JINJA_FALSEY_VALUES = {"false"}
def _use_sandbox_for_jinja() -> bool:
"""Return whether sandboxed Jinja rendering is enabled."""
use_sandbox = os.environ.get(_PF_USE_SANDBOX_FOR_JINJA_ENV_VAR, _PF_USE_SANDBOX_FOR_JINJA_DEFAULT)
return use_sandbox.lower() not in _PF_USE_SANDBOX_FOR_JINJA_FALSEY_VALUES
def _create_jinja_template(template_content: str) -> jinja2.Template:
"""Create a Jinja2 template, using SandboxedEnvironment by default to prevent SSTI attacks.
Set env var PF_USE_SANDBOX_FOR_JINJA=false to opt out (not recommended).
"""
if not _use_sandbox_for_jinja():

Copilot uses AI. Check for mistakes.
return jinja2.Template(template_content, undefined=jinja2.StrictUndefined)
sandbox_env = SandboxedEnvironment(undefined=jinja2.StrictUndefined)
return sandbox_env.from_string(template_content)
Comment on lines +25 to +34
Copy link

Copilot AI Apr 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider adding a focused regression test for the sandboxed Jinja path (default) to ensure a known SSTI payload (e.g., accessing class/subclasses) raises jinja2.exceptions.SecurityError, and that PF_USE_SANDBOX_FOR_JINJA=false opts out as expected. This helps prevent accidental reintroduction of unsandboxed rendering in the simulator conversation templates.

Copilot uses AI. Check for mistakes.


@dataclass
class ConversationTurn:
"""Class to represent a turn in a conversation.
Expand Down Expand Up @@ -115,9 +129,7 @@ def __init__(
) -> None:
self.role = role
self.conversation_template_orig = conversation_template
self.conversation_template: jinja2.Template = jinja2.Template(
conversation_template, undefined=jinja2.StrictUndefined
)
self.conversation_template: jinja2.Template = _create_jinja_template(conversation_template)
self.persona_template_args = instantiation_parameters
if self.role == ConversationRole.USER:
self.name: str = cast(str, self.persona_template_args.get("name", role.value))
Expand All @@ -134,9 +146,7 @@ def __init__(
self.conversation_starter = conversation_starter_content
else:
try:
self.conversation_starter = jinja2.Template(
conversation_starter_content, undefined=jinja2.StrictUndefined
)
self.conversation_starter = _create_jinja_template(conversation_starter_content)
except jinja2.exceptions.TemplateSyntaxError as e: # noqa: F841
self.conversation_starter = conversation_starter_content
else:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
"""Tests for Jinja2 sandboxing in azure-ai-evaluation (MSRC-110257)."""

import os
from unittest.mock import patch

import jinja2
import pytest

from azure.ai.evaluation._legacy.prompty._utils import render_jinja_template
from azure.ai.evaluation.simulator._conversation import (
_create_jinja_template,
ConversationBot,
ConversationRole,
OpenAIChatCompletionsModel,
)


# Common SSTI payloads that should be blocked by the sandbox
SSTI_PAYLOAD_CLASS = "{{ ().__class__.__base__.__subclasses__() }}"
SSTI_PAYLOAD_IMPORT = (
"{% for x in ().__class__.__base__.__subclasses__() %}"
"{% if 'catch_warnings' in x.__name__ %}"
"{{ x().__enter__.__globals__['__builtins__']['__import__']('os').popen('echo pwned').read() }}"
"{% endif %}{% endfor %}"
)
SSTI_PAYLOAD_GETATTR = "{{ ''.__class__.__mro__[1].__subclasses__() }}"


class MockModel(OpenAIChatCompletionsModel):
def __init__(self):
super().__init__(name="mock", endpoint_url="https://mock", token_manager="mock")


# ============================================================
# Tests for _legacy/prompty/_utils.py :: render_jinja_template
# ============================================================


@pytest.mark.unittest
class TestRenderJinjaTemplateSandbox:
"""Tests for render_jinja_template in _legacy/prompty/_utils.py."""

def test_normal_template_renders_with_sandbox(self):
"""Normal templates should render correctly with sandbox enabled."""
result = render_jinja_template("Hello, {{ name }}!", name="World")
assert result == "Hello, World!"

def test_template_with_loop_renders(self):
"""Templates with standard Jinja2 features like loops should work."""
template = "{% for item in items %}{{ item }} {% endfor %}"
result = render_jinja_template(template, items=["a", "b", "c"])
assert result == "a b c "

def test_template_with_conditionals_renders(self):
"""Templates with conditionals should work."""
template = "{% if show %}visible{% else %}hidden{% endif %}"
assert render_jinja_template(template, show=True) == "visible"
assert render_jinja_template(template, show=False) == "hidden"

def test_ssti_class_access_blocked(self):
"""Accessing __class__ should raise SecurityError via sandbox."""
with pytest.raises(Exception) as exc_info:
render_jinja_template(SSTI_PAYLOAD_CLASS)
# The SecurityError gets wrapped in PromptyException
assert "SecurityError" in str(exc_info.value) or "unsafe" in str(exc_info.value).lower()

def test_ssti_import_blocked(self):
"""Attempting to import modules via SSTI should be blocked."""
with pytest.raises(Exception):
render_jinja_template(SSTI_PAYLOAD_IMPORT)

def test_ssti_mro_access_blocked(self):
"""Accessing __mro__ should be blocked by sandbox."""
with pytest.raises(Exception):
render_jinja_template(SSTI_PAYLOAD_GETATTR)

@patch.dict(os.environ, {"PF_USE_SANDBOX_FOR_JINJA": "true"})
def test_sandbox_enabled_explicitly(self):
"""Sandbox should be active when PF_USE_SANDBOX_FOR_JINJA=true."""
with pytest.raises(Exception):
render_jinja_template(SSTI_PAYLOAD_CLASS)

@patch.dict(os.environ, {"PF_USE_SANDBOX_FOR_JINJA": "false"})
def test_sandbox_disabled_allows_ssti(self):
"""When sandbox is explicitly disabled, SSTI payloads should NOT raise SecurityError."""
# This is the unsafe opt-out — template renders without sandbox
result = render_jinja_template(SSTI_PAYLOAD_CLASS)
assert result is not None # It renders (dangerously)

def test_sandbox_enabled_by_default(self):
"""Sandbox should be enabled by default when env var is not set."""
with patch.dict(os.environ, {}, clear=False):
os.environ.pop("PF_USE_SANDBOX_FOR_JINJA", None)
with pytest.raises(Exception):
render_jinja_template(SSTI_PAYLOAD_CLASS)


# ============================================================
# Tests for simulator/_conversation :: _create_jinja_template
# ============================================================


@pytest.mark.unittest
class TestCreateJinjaTemplateSandbox:
"""Tests for _create_jinja_template in simulator/_conversation/__init__.py."""

def test_normal_template_renders(self):
"""Normal templates should render correctly."""
tmpl = _create_jinja_template("Hello, {{ name }}!")
assert tmpl.render(name="World") == "Hello, World!"

def test_strict_undefined_preserved(self):
"""StrictUndefined should still be enforced — missing vars raise."""
tmpl = _create_jinja_template("Hello, {{ name }}!")
with pytest.raises(jinja2.UndefinedError):
tmpl.render() # 'name' not provided

def test_ssti_class_access_blocked(self):
"""Accessing __class__ should raise SecurityError."""
tmpl = _create_jinja_template(SSTI_PAYLOAD_CLASS)
with pytest.raises(jinja2.sandbox.SecurityError):
tmpl.render()

def test_ssti_import_blocked(self):
"""Attempting to import modules via SSTI should be blocked."""
tmpl = _create_jinja_template(SSTI_PAYLOAD_IMPORT)
with pytest.raises(jinja2.sandbox.SecurityError):
tmpl.render()

@patch.dict(os.environ, {"PF_USE_SANDBOX_FOR_JINJA": "false"})
def test_sandbox_disabled_allows_ssti(self):
"""When sandbox is disabled, SSTI should not raise SecurityError."""
tmpl = _create_jinja_template(SSTI_PAYLOAD_CLASS)
result = tmpl.render()
assert result is not None

@patch.dict(os.environ, {"PF_USE_SANDBOX_FOR_JINJA": "true"})
def test_sandbox_enabled_blocks_ssti(self):
"""When sandbox is explicitly enabled, SSTI should be blocked."""
tmpl = _create_jinja_template(SSTI_PAYLOAD_CLASS)
with pytest.raises(jinja2.sandbox.SecurityError):
tmpl.render()


# ============================================================
# Tests for ConversationBot integration with sandbox
# ============================================================


@pytest.mark.unittest
class TestConversationBotSandbox:
"""Tests that ConversationBot uses sandboxed templates."""

def test_bot_normal_template_works(self):
"""ConversationBot should render normal conversation templates."""
bot = ConversationBot(
role=ConversationRole.USER,
model=MockModel(),
conversation_template="Hello, {{ name }}!",
instantiation_parameters={"name": "TestUser", "conversation_starter": "Hi there"},
)
assert isinstance(bot.conversation_template, jinja2.Template)

def test_bot_ssti_template_blocked_on_render(self):
"""ConversationBot with SSTI template should block on render."""
bot = ConversationBot(
role=ConversationRole.ASSISTANT,
model=MockModel(),
conversation_template=SSTI_PAYLOAD_CLASS,
instantiation_parameters={"chatbot_name": "Bot"},
)
with pytest.raises(jinja2.sandbox.SecurityError):
bot.conversation_template.render()

def test_bot_conversation_starter_sandboxed(self):
"""ConversationBot conversation_starter template should also be sandboxed."""
bot = ConversationBot(
role=ConversationRole.USER,
model=MockModel(),
conversation_template="Hello {{ name }}",
instantiation_parameters={
"name": "User",
"conversation_starter": "Normal starter {{ name }}",
},
)
# conversation_starter should be a Template (sandboxed)
assert isinstance(bot.conversation_starter, jinja2.Template)
result = bot.conversation_starter.render(name="User")
assert result == "Normal starter User"

def test_bot_ssti_conversation_starter_blocked(self):
"""SSTI in conversation_starter should be blocked on render."""
bot = ConversationBot(
role=ConversationRole.USER,
model=MockModel(),
conversation_template="Hello {{ name }}",
instantiation_parameters={
"name": "User",
"conversation_starter": SSTI_PAYLOAD_CLASS,
},
)
assert isinstance(bot.conversation_starter, jinja2.Template)
with pytest.raises(jinja2.sandbox.SecurityError):
bot.conversation_starter.render()
Loading