Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions backend/app/services/report/render.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from datetime import datetime

import jinja2
import jinja2.sandbox

from app.services.report.markdown import prepare_docx_context
from app.services.report.report_data import ReportContext
Expand Down Expand Up @@ -63,7 +64,10 @@ def render_html_report(
Image data is already embedded as base64 in the context (FileReport.data_base64),
so templates handle image rendering client-side.
"""
env = jinja2.Environment(autoescape=True)
# Use a SandboxedEnvironment so that templates (which originate from an
# external, admin-seeded source via CUSTOM_DATA_URL) cannot execute
# arbitrary Python via Jinja2 SSTI user generates a report.
env = jinja2.sandbox.SandboxedEnvironment(autoescape=True)
env.filters["tojson"] = _tojson_filter

template = env.from_string(template_content.decode("utf-8"))
Expand All @@ -90,7 +94,10 @@ def render_docx_report(

tpl = DocxTemplate(io.BytesIO(template_content))
docx_context = prepare_docx_context(asdict(context), tpl, image_data)
tpl.render(docx_context)
# Render with a SandboxedEnvironment to prevent Jinja2 SSTI/RCE from
# externally-sourced (admin-seeded) templates. docxtpl manages XML escaping
# itself, so autoescape stays off (the default).
tpl.render(docx_context, jinja_env=jinja2.sandbox.SandboxedEnvironment())

output = io.BytesIO()
tpl.save(output)
Expand Down
74 changes: 50 additions & 24 deletions backend/app/services/report/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@
from datetime import datetime

from fastapi import HTTPException, status
from jinja2.exceptions import SecurityError, TemplateError
from sqlalchemy import select
from sqlalchemy.orm import Session

from app.core.logging import app_logger
from app.enums.enums import ReportTemplateFormat
from app.models.report_template import ReportTemplate
from app.models.user import User
Expand Down Expand Up @@ -54,33 +56,57 @@ def generate_report_service(
sort_order=request.sort_order,
)

if template.format == ReportTemplateFormat.HTML:
output = render_html_report(template.template_content, context)
result = GeneratedReport(
content=output.encode("utf-8"),
media_type="text/html",
filename=template.filename,
try:
if template.format == ReportTemplateFormat.HTML:
output = render_html_report(template.template_content, context)
result = GeneratedReport(
content=output.encode("utf-8"),
media_type="text/html",
filename=template.filename,
)
del context, output
release_memory()
return result
elif template.format == ReportTemplateFormat.DOCX:
# Collect image data for DOCX embedding (markdown → InlineImage)
image_data = collect_report_images(session, assessment_id)
output = render_docx_report(template.template_content, context, image_data)

del context, image_data
release_memory()

return GeneratedReport(
content=output,
media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
filename=template.filename,
)
else:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Unsupported format: {template.format}",
)
except SecurityError as e:
# The template tried to access a sandbox-disallowed attribute — a
# blocked SSTI attempt. Log as a security event (template id/filename
# and the non-sensitive sandbox message) but keep the client response
# generic so no payload/traceback is leaked.
app_logger.error(
f"Rejected report template id={template.id} "
f"filename={template.filename!r}: blocked unsafe expression: {e}"
)
del context, output
release_memory()
return result
elif template.format == ReportTemplateFormat.DOCX:
# Collect image data for DOCX embedding (markdown → InlineImage)
image_data = collect_report_images(session, assessment_id)
output = render_docx_report(template.template_content, context, image_data)

del context, image_data
release_memory()

return GeneratedReport(
content=output,
media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
filename=template.filename,
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
detail="The selected report template was rejected: it contains disallowed expressions.",
)
except TemplateError as e:
# Malformed template (syntax / undefined / runtime error).
app_logger.error(
f"Failed to render report template id={template.id} "
f"filename={template.filename!r}: {e}"
)
else:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Unsupported format: {template.format}",
status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
detail="The selected report template could not be rendered. Please check the template.",
)


Expand Down
77 changes: 77 additions & 0 deletions backend/tests/services/report/test_render.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
"""
Unit tests for the report render layer.

Focus: the Jinja2 SandboxedEnvironment must block SSTI/RCE payloads on both the
HTML and DOCX paths while still rendering benign templates correctly.
"""

import io
from datetime import datetime

import pytest
from docx import Document
from jinja2.exceptions import SecurityError

from app.services.report.render import render_docx_report, render_html_report
from app.services.report.report_data import AssessmentInfo, ReportContext

# Classic Jinja2 SSTI -> RCE payload (harmless `id` command).
RCE_PAYLOAD = (
"{{ self.__init__.__globals__.__builtins__.__import__('os').popen('id').read() }}"
)


def _context() -> ReportContext:
"""Minimal, DB-free ReportContext for rendering."""
return ReportContext(
assessment=AssessmentInfo(
id="1", name="Acme Engagement", description="", assessment_type="RedTeam"
),
activities_grouped=[],
activities_flat=[],
statistics={},
generated_at=datetime(2026, 1, 1),
generated_by="tester",
template_filename="t",
)


def _docx_bytes(paragraph_text: str) -> bytes:
doc = Document()
doc.add_paragraph(paragraph_text)
buf = io.BytesIO()
doc.save(buf)
return buf.getvalue()


def test_html_sandbox_blocks_rce():
template = f"<pre>{RCE_PAYLOAD}</pre>".encode()
with pytest.raises(SecurityError):
render_html_report(template, _context())


def test_docx_sandbox_blocks_rce():
template = _docx_bytes(RCE_PAYLOAD)
with pytest.raises(SecurityError):
render_docx_report(template, _context(), None)


def test_html_benign_renders_and_tojson_works():
template = (
b"<h1>{{ assessment.name }}</h1>"
b"<script>var d = {{ statistics | tojson }};</script>"
)
out = render_html_report(template, _context())

assert "Acme Engagement" in out
# tojson emits raw JSON (not HTML-entity-encoded) so the <script> stays valid.
assert "var d = {};" in out


def test_docx_benign_renders():
template = _docx_bytes("Assessment: {{ assessment.name }}")
result = render_docx_report(template, _context(), None)

rendered = Document(io.BytesIO(result))
text = "\n".join(p.text for p in rendered.paragraphs)
assert "Assessment: Acme Engagement" in text
98 changes: 98 additions & 0 deletions backend/tests/services/report/test_report_errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
"""
Tests for generate_report_service error handling.

A sandbox-blocked (SSTI) or malformed template must surface as a clean HTTP 422
with a generic message and be logged via app_logger — never an opaque 500 or a
leaked payload.
"""

import logging

import pytest
from fastapi import HTTPException, status
from sqlalchemy.orm import Session

from app.core.config import settings
from app.enums.enums import ReportTemplateFormat
from app.models.assessment import Assessment
from app.models.report_template import ReportTemplate
from app.models.user import User
from app.schemas.report import ReportGenerateRequest
from app.services.report.report import generate_report_service

RCE_PAYLOAD = (
"{{ self.__init__.__globals__.__builtins__.__import__('os').popen('id').read() }}"
)


@pytest.fixture
def assessment(session: Session, test_admin_user: User) -> Assessment:
assessment = Assessment(
name="Test Assessment",
description="A test assessment",
assessment_type="RedTeam",
created_by=test_admin_user.id,
)
session.add(assessment)
session.commit()
return assessment


def _add_template(session: Session, content: str) -> ReportTemplate:
template = ReportTemplate(
filename="report.html",
format=ReportTemplateFormat.HTML,
template_content=content.encode("utf-8"),
)
session.add(template)
session.commit()
return template


def test_malicious_template_rejected_with_422_and_logged(
session: Session,
test_admin_user: User,
assessment: Assessment,
caplog,
):
template = _add_template(session, f"<pre>{RCE_PAYLOAD}</pre>")
request = ReportGenerateRequest(template_id=template.id)

with caplog.at_level(logging.ERROR, logger=settings.APPLICATION_NAME):
with pytest.raises(HTTPException) as exc_info:
generate_report_service(assessment.id, request, test_admin_user, session)

assert exc_info.value.status_code == status.HTTP_422_UNPROCESSABLE_CONTENT
# Client message stays generic — no payload echoed back.
assert "disallowed expressions" in exc_info.value.detail
assert "os" not in exc_info.value.detail
# Security event is logged server-side with the template identity.
assert str(template.id) in caplog.text


def test_malformed_template_rejected_with_422(
session: Session,
test_admin_user: User,
assessment: Assessment,
):
template = _add_template(session, "<h1>{{ unclosed </h1>")
request = ReportGenerateRequest(template_id=template.id)

with pytest.raises(HTTPException) as exc_info:
generate_report_service(assessment.id, request, test_admin_user, session)

assert exc_info.value.status_code == status.HTTP_422_UNPROCESSABLE_CONTENT


def test_benign_template_succeeds(
session: Session,
test_admin_user: User,
assessment: Assessment,
):
template = _add_template(session, "<h1>{{ assessment.name }}</h1>")
request = ReportGenerateRequest(template_id=template.id)

result = generate_report_service(assessment.id, request, test_admin_user, session)

assert result.media_type == "text/html"
assert b"Test Assessment" in result.content
Loading