diff --git a/.github/workflows/test-and-publish.yml b/.github/workflows/test-and-publish.yml index 8fe30e1..fe5cf82 100644 --- a/.github/workflows/test-and-publish.yml +++ b/.github/workflows/test-and-publish.yml @@ -7,6 +7,8 @@ on: - main - develop - feature-* + schedule: + - cron: "0 7 * * *" release: types: - published @@ -40,7 +42,6 @@ jobs: tests: name: Tests (Python ${{ matrix.python-version }}) runs-on: ubuntu-latest - environment: ci-live strategy: fail-fast: false matrix: @@ -61,9 +62,7 @@ jobs: cache-suffix: test-and-publish cache-dependency-glob: uv.lock - name: Run tests with nox - run: uvx nox --python ${{ matrix.python-version }} --session tests -- --no-parallel - env: - PDFREST_API_KEY: ${{ secrets.PDFREST_API_KEY }} + run: uvx nox --python ${{ matrix.python-version }} --session tests -- -n 5 -m "not live" - name: Fetch base branch for diff-cover if: github.event_name == 'pull_request' run: | @@ -96,6 +95,37 @@ jobs: name: coverage-${{ matrix.python-version }} path: coverage/py${{ matrix.python-version }} + live-tests: + name: Live Tests (Python 3.11) + if: github.event_name == 'pull_request' || github.event_name == 'schedule' + runs-on: ubuntu-latest + environment: ci-live + permissions: + id-token: write + contents: read + packages: write + pull-requests: write + steps: + - uses: actions/checkout@v4 + - name: Install uv + uses: astral-sh/setup-uv@v6 + with: + version: 0.9.18 + python-version: "3.11" + enable-cache: true + cache-suffix: test-and-publish + cache-dependency-glob: uv.lock + - name: Run live tests with nox + run: uvx nox --python 3.11 --session tests -- -n 5 -m live + env: + PDFREST_API_KEY: ${{ secrets.PDFREST_API_KEY }} + - name: Upload live coverage reports + if: always() + uses: actions/upload-artifact@v4 + with: + name: coverage-live-3.11 + path: coverage/py3.11 + examples: name: Examples (Python ${{ matrix.python-version }}) runs-on: ubuntu-latest diff --git a/TESTING_GUIDELINES.md b/TESTING_GUIDELINES.md index 08f21f4..102bddf 100644 --- a/TESTING_GUIDELINES.md +++ b/TESTING_GUIDELINES.md @@ -19,6 +19,10 @@ iteration required. asserting method/path/headers/body). Optional payload branches (for example, `pages`, `output`, `rgb_color`, and output-prefix fields) require explicit tests so serialization differences are caught early. +- **Keep endpoint tests in their home files.** When adding or restoring coverage + for an endpoint, place the test in that endpoint's existing test module (for + example, `tests/test_convert_to_excel.py`), not in a generic cross-endpoint + coverage file. - **Check client coverage regularly.** Run `uvx nox -s class-coverage` to enforce minimum function-level coverage for `PdfRestClient` and `AsyncPdfRestClient`. @@ -95,6 +99,10 @@ iteration required. or `timeout`, add explicit tests (sync + async) proving those options propagate. Capture `request.extensions["timeout"]` and assert every component equals `pytest.approx(expected)`. +- For both sync and async endpoint helpers, ensure request-customization or + success tests also exercise endpoint-specific optional payload branches (for + example `output`, `output_prefix`, `pages`, `page_groups`, redaction payloads) + so non-live class-function coverage does not depend on live suites. ### Validation & Payload Modeling diff --git a/pyproject.toml b/pyproject.toml index 752f4ad..a022a92 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,6 +45,9 @@ dev = [ minversion = "7.4" testpaths = ["tests"] addopts = "-ra" +markers = [ + "live: tests that call the live pdfRest service", +] [tool.ruff] extend-include = ["*.ipynb"] diff --git a/tests/conftest.py b/tests/conftest.py index 50fb297..e4c4e7a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,8 @@ from __future__ import annotations import os +from itertools import pairwise +from pathlib import Path import httpx import pytest @@ -12,6 +14,23 @@ ) +def _is_live_test_path(path: Path) -> bool: + """Return True when the collected item lives under tests/live.""" + lowered_parts = [part.lower() for part in path.parts] + return any( + first == "tests" and second == "live" + for first, second in pairwise(lowered_parts) + ) + + +def pytest_collection_modifyitems(items: list[pytest.Item]) -> None: + """Mark all live tests so CI can include/exclude them efficiently.""" + for item in items: + item_path = getattr(item, "path", Path(str(item.fspath))) + if _is_live_test_path(item_path) or item.name.startswith("test_live_"): + item.add_marker(pytest.mark.live) + + @pytest.fixture(scope="session") def pdfrest_api_key() -> str: key = os.getenv("PDFREST_API_KEY") diff --git a/tests/live/test_live_file_downloads.py b/tests/live/test_live_file_downloads.py new file mode 100644 index 0000000..de1defa --- /dev/null +++ b/tests/live/test_live_file_downloads.py @@ -0,0 +1,298 @@ +from __future__ import annotations + +import uuid +from contextlib import AsyncExitStack, ExitStack +from dataclasses import dataclass +from pathlib import Path + +import pytest + +from pdfrest import AsyncPdfRestClient, PdfRestClient +from pdfrest.models import PdfRestFile + + +def _create_temp_text_file(tmp_path: Path, prefix: str) -> tuple[Path, str, bytes]: + filename = f"{prefix}.txt" + source_path = tmp_path / filename + source_content = f"{prefix}-line1\n{prefix}-line2\n" + source_path.write_text(source_content, encoding="utf-8") + return source_path, source_content, source_path.read_bytes() + + +@dataclass +class LiveFileData: + prefix: str + file: PdfRestFile + original_bytes: bytes + source_text: str + + +@pytest.fixture(scope="class") +def live_sync_file( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + tmp_path_factory: pytest.TempPathFactory, +) -> LiveFileData: + prefix = f"sync-live-{uuid.uuid4().hex}" + temp_dir = tmp_path_factory.mktemp(prefix) + source_path, source_text, source_bytes = _create_temp_text_file(temp_dir, prefix) + with PdfRestClient( + api_key=pdfrest_api_key, base_url=pdfrest_live_base_url + ) as client: + with source_path.open("rb") as source_file: + uploaded_files = client.files.create([source_file]) + file_repr = uploaded_files[0] + return LiveFileData( + prefix=prefix, + file=file_repr, + original_bytes=source_bytes, + source_text=source_text, + ) + + +@pytest.fixture(scope="class") +def live_async_file( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + tmp_path_factory: pytest.TempPathFactory, +) -> LiveFileData: + prefix = f"async-live-{uuid.uuid4().hex}" + temp_dir = tmp_path_factory.mktemp(prefix) + source_path, source_text, source_bytes = _create_temp_text_file(temp_dir, prefix) + with PdfRestClient( + api_key=pdfrest_api_key, base_url=pdfrest_live_base_url + ) as client: + with source_path.open("rb") as source_file: + uploaded_files = client.files.create([source_file]) + file_repr = uploaded_files[0] + return LiveFileData( + prefix=prefix, + file=file_repr, + original_bytes=source_bytes, + source_text=source_text, + ) + + +class TestLiveFileDownloads: + def test_read_bytes( + self, + pdfrest_api_key: str, + pdfrest_live_base_url: str, + live_sync_file: LiveFileData, + ) -> None: + with PdfRestClient( + api_key=pdfrest_api_key, base_url=pdfrest_live_base_url + ) as client: + assert ( + client.files.read_bytes(live_sync_file.file.id) + == live_sync_file.original_bytes + ) + + def test_read_text( + self, + pdfrest_api_key: str, + pdfrest_live_base_url: str, + live_sync_file: LiveFileData, + ) -> None: + with PdfRestClient( + api_key=pdfrest_api_key, base_url=pdfrest_live_base_url + ) as client: + assert ( + client.files.read_text(live_sync_file.file.id) + == live_sync_file.source_text + ) + + def test_write_bytes( + self, + pdfrest_api_key: str, + pdfrest_live_base_url: str, + tmp_path: Path, + live_sync_file: LiveFileData, + ) -> None: + destination = tmp_path / f"{live_sync_file.prefix}-download.bin" + with PdfRestClient( + api_key=pdfrest_api_key, base_url=pdfrest_live_base_url + ) as client: + written_path = client.files.write_bytes( + live_sync_file.file.id, str(destination) + ) + assert written_path == destination + assert written_path.read_bytes() == live_sync_file.original_bytes + + def test_stream_iter_raw( + self, + pdfrest_api_key: str, + pdfrest_live_base_url: str, + live_sync_file: LiveFileData, + ) -> None: + with ExitStack() as stack: + client = stack.enter_context( + PdfRestClient(api_key=pdfrest_api_key, base_url=pdfrest_live_base_url) + ) + stream = stack.enter_context(client.files.stream(live_sync_file.file.id)) + raw_chunks = list(stream.iter_raw()) + assert b"".join(raw_chunks) == live_sync_file.original_bytes + + def test_stream_iter_bytes( + self, + pdfrest_api_key: str, + pdfrest_live_base_url: str, + live_sync_file: LiveFileData, + ) -> None: + with ExitStack() as stack: + client = stack.enter_context( + PdfRestClient(api_key=pdfrest_api_key, base_url=pdfrest_live_base_url) + ) + stream = stack.enter_context(client.files.stream(live_sync_file.file.id)) + chunks = list(stream.iter_bytes(chunk_size=None)) + assert b"".join(chunks) == live_sync_file.original_bytes + + def test_stream_iter_text( + self, + pdfrest_api_key: str, + pdfrest_live_base_url: str, + live_sync_file: LiveFileData, + ) -> None: + with ExitStack() as stack: + client = stack.enter_context( + PdfRestClient(api_key=pdfrest_api_key, base_url=pdfrest_live_base_url) + ) + stream = stack.enter_context(client.files.stream(live_sync_file.file.id)) + text_chunks = list(stream.iter_text(chunk_size=None)) + assert "".join(text_chunks) == live_sync_file.source_text + + def test_stream_iter_lines( + self, + pdfrest_api_key: str, + pdfrest_live_base_url: str, + live_sync_file: LiveFileData, + ) -> None: + with ExitStack() as stack: + client = stack.enter_context( + PdfRestClient(api_key=pdfrest_api_key, base_url=pdfrest_live_base_url) + ) + stream = stack.enter_context(client.files.stream(live_sync_file.file.id)) + lines = list(stream.iter_lines()) + assert lines == live_sync_file.source_text.splitlines() + + +class TestLiveAsyncFileDownloads: + @pytest.mark.asyncio + async def test_read_bytes( + self, + pdfrest_api_key: str, + pdfrest_live_base_url: str, + live_async_file: LiveFileData, + ) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, base_url=pdfrest_live_base_url + ) as client: + assert ( + await client.files.read_bytes(live_async_file.file.id) + == live_async_file.original_bytes + ) + + @pytest.mark.asyncio + async def test_read_text( + self, + pdfrest_api_key: str, + pdfrest_live_base_url: str, + live_async_file: LiveFileData, + ) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, base_url=pdfrest_live_base_url + ) as client: + assert ( + await client.files.read_text(live_async_file.file.id) + == live_async_file.source_text + ) + + @pytest.mark.asyncio + async def test_write_bytes( + self, + pdfrest_api_key: str, + pdfrest_live_base_url: str, + tmp_path: Path, + live_async_file: LiveFileData, + ) -> None: + destination = tmp_path / f"{live_async_file.prefix}-download.bin" + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, base_url=pdfrest_live_base_url + ) as client: + written_path = await client.files.write_bytes( + live_async_file.file, destination + ) + assert written_path == destination + assert written_path.read_bytes() == live_async_file.original_bytes + + @pytest.mark.asyncio + async def test_stream_iter_raw( + self, + pdfrest_api_key: str, + pdfrest_live_base_url: str, + live_async_file: LiveFileData, + ) -> None: + async with AsyncExitStack() as stack: + client = await stack.enter_async_context( + AsyncPdfRestClient( + api_key=pdfrest_api_key, base_url=pdfrest_live_base_url + ) + ) + stream_cm = await client.files.stream(live_async_file.file.id) + stream = await stack.enter_async_context(stream_cm) + raw_chunks = [chunk async for chunk in stream.iter_raw()] + assert b"".join(raw_chunks) == live_async_file.original_bytes + + @pytest.mark.asyncio + async def test_stream_iter_bytes( + self, + pdfrest_api_key: str, + pdfrest_live_base_url: str, + live_async_file: LiveFileData, + ) -> None: + async with AsyncExitStack() as stack: + client = await stack.enter_async_context( + AsyncPdfRestClient( + api_key=pdfrest_api_key, base_url=pdfrest_live_base_url + ) + ) + stream_cm = await client.files.stream(live_async_file.file.id) + stream = await stack.enter_async_context(stream_cm) + chunks = [chunk async for chunk in stream.iter_bytes(chunk_size=None)] + assert b"".join(chunks) == live_async_file.original_bytes + + @pytest.mark.asyncio + async def test_stream_iter_text( + self, + pdfrest_api_key: str, + pdfrest_live_base_url: str, + live_async_file: LiveFileData, + ) -> None: + async with AsyncExitStack() as stack: + client = await stack.enter_async_context( + AsyncPdfRestClient( + api_key=pdfrest_api_key, base_url=pdfrest_live_base_url + ) + ) + stream_cm = await client.files.stream(live_async_file.file.id) + stream = await stack.enter_async_context(stream_cm) + text_chunks = [chunk async for chunk in stream.iter_text(chunk_size=None)] + assert "".join(text_chunks) == live_async_file.source_text + + @pytest.mark.asyncio + async def test_stream_iter_lines( + self, + pdfrest_api_key: str, + pdfrest_live_base_url: str, + live_async_file: LiveFileData, + ) -> None: + async with AsyncExitStack() as stack: + client = await stack.enter_async_context( + AsyncPdfRestClient( + api_key=pdfrest_api_key, base_url=pdfrest_live_base_url + ) + ) + stream_cm = await client.files.stream(live_async_file.file.id) + stream = await stack.enter_async_context(stream_cm) + lines = [line async for line in stream.iter_lines()] + assert lines == live_async_file.source_text.splitlines() diff --git a/tests/test_blank_pdf.py b/tests/test_blank_pdf.py index 6d845bf..4fd28d5 100644 --- a/tests/test_blank_pdf.py +++ b/tests/test_blank_pdf.py @@ -668,6 +668,7 @@ def handler(request: httpx.Request) -> httpx.Response: assert payload["custom_height"] == 100 assert payload["custom_width"] == 50 assert "page_orientation" not in payload + assert payload["output"] == "async-custom" assert payload["debug"] == "yes" return httpx.Response( 200, @@ -695,6 +696,7 @@ def handler(request: httpx.Request) -> httpx.Response: response = await client.blank_pdf( page_size={"custom_height": 100, "custom_width": 50}, page_count=1, + output="async-custom", extra_query={"trace": "async"}, extra_headers={"X-Debug": "async"}, extra_body={"debug": "yes"}, diff --git a/tests/test_convert_colors.py b/tests/test_convert_colors.py index c71d831..c93c680 100644 --- a/tests/test_convert_colors.py +++ b/tests/test_convert_colors.py @@ -277,6 +277,7 @@ def handler(request: httpx.Request) -> httpx.Response: assert payload["color_profile"] == "custom" assert payload["profile_id"] == str(profile_file.id) assert payload["preserve_black"] == "false" + assert payload["output"] == "async-custom" return httpx.Response( 200, json={ @@ -304,6 +305,7 @@ def handler(request: httpx.Request) -> httpx.Response: response = await client.convert_colors( input_file, color_profile=profile_file, + output="async-custom", extra_query={"trace": "async"}, extra_headers={"X-Debug": "async"}, extra_body={"debug": "yes"}, diff --git a/tests/test_convert_to_excel.py b/tests/test_convert_to_excel.py index 8debea4..1ffc6ef 100644 --- a/tests/test_convert_to_excel.py +++ b/tests/test_convert_to_excel.py @@ -204,6 +204,7 @@ def handler(request: httpx.Request) -> httpx.Response: payload = json.loads(request.content.decode("utf-8")) assert payload["debug"] == "yes" assert payload["id"] == str(input_file.id) + assert payload["output"] == "async-custom" return httpx.Response( 200, json={ @@ -230,6 +231,7 @@ def handler(request: httpx.Request) -> httpx.Response: async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: response = await client.convert_to_excel( input_file, + output="async-custom", extra_query={"trace": "async"}, extra_headers={"X-Debug": "async"}, extra_body={"debug": "yes"}, diff --git a/tests/test_convert_to_pdfa.py b/tests/test_convert_to_pdfa.py index 96af385..477ec8f 100644 --- a/tests/test_convert_to_pdfa.py +++ b/tests/test_convert_to_pdfa.py @@ -227,6 +227,7 @@ def handler(request: httpx.Request) -> httpx.Response: payload = json.loads(request.content.decode("utf-8")) assert payload["output_type"] == "PDF/A-2u" assert payload["id"] == str(input_file.id) + assert payload["output"] == "async-custom" assert payload["extra"] == {"note": "async"} assert payload["rasterize_if_errors_encountered"] == "off" return httpx.Response( @@ -251,6 +252,7 @@ def handler(request: httpx.Request) -> httpx.Response: response = await client.convert_to_pdfa( input_file, output_type="PDF/A-2u", + output="async-custom", rasterize_if_errors_encountered="off", extra_query={"trace": "async"}, extra_headers={"X-Debug": "async"}, diff --git a/tests/test_convert_to_pdfx.py b/tests/test_convert_to_pdfx.py index 86f1012..8a42a0d 100644 --- a/tests/test_convert_to_pdfx.py +++ b/tests/test_convert_to_pdfx.py @@ -214,6 +214,7 @@ def handler(request: httpx.Request) -> httpx.Response: payload = json.loads(request.content.decode("utf-8")) assert payload["output_type"] == "PDF/X-6" assert payload["id"] == str(input_file.id) + assert payload["output"] == "async-custom" assert payload["extra"] == {"note": "async"} return httpx.Response( 200, @@ -237,6 +238,7 @@ def handler(request: httpx.Request) -> httpx.Response: response = await client.convert_to_pdfx( input_file, output_type="PDF/X-6", + output="async-custom", extra_query={"trace": "async"}, extra_headers={"X-Debug": "async"}, extra_body={"extra": {"note": "async"}}, diff --git a/tests/test_convert_to_powerpoint.py b/tests/test_convert_to_powerpoint.py index 2665367..f4656f1 100644 --- a/tests/test_convert_to_powerpoint.py +++ b/tests/test_convert_to_powerpoint.py @@ -204,6 +204,7 @@ def handler(request: httpx.Request) -> httpx.Response: payload = json.loads(request.content.decode("utf-8")) assert payload["debug"] == "yes" assert payload["id"] == str(input_file.id) + assert payload["output"] == "async-custom" return httpx.Response( 200, json={ @@ -230,6 +231,7 @@ def handler(request: httpx.Request) -> httpx.Response: async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: response = await client.convert_to_powerpoint( input_file, + output="async-custom", extra_query={"trace": "async"}, extra_headers={"X-Debug": "async"}, extra_body={"debug": "yes"}, diff --git a/tests/test_convert_to_word.py b/tests/test_convert_to_word.py index ba4e034..b5ae353 100644 --- a/tests/test_convert_to_word.py +++ b/tests/test_convert_to_word.py @@ -204,6 +204,7 @@ def handler(request: httpx.Request) -> httpx.Response: payload = json.loads(request.content.decode("utf-8")) assert payload["debug"] == "yes" assert payload["id"] == str(input_file.id) + assert payload["output"] == "async-custom" return httpx.Response( 200, json={ @@ -230,6 +231,7 @@ def handler(request: httpx.Request) -> httpx.Response: async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: response = await client.convert_to_word( input_file, + output="async-custom", extra_query={"trace": "async"}, extra_headers={"X-Debug": "async"}, extra_body={"debug": "yes"}, diff --git a/tests/test_convert_xfa_to_acroforms.py b/tests/test_convert_xfa_to_acroforms.py index 0d634cc..40482e3 100644 --- a/tests/test_convert_xfa_to_acroforms.py +++ b/tests/test_convert_xfa_to_acroforms.py @@ -198,6 +198,7 @@ def handler(request: httpx.Request) -> httpx.Response: payload = json.loads(request.content.decode("utf-8")) assert payload["debug"] == "yes" assert payload["id"] == str(input_file.id) + assert payload["output"] == "async-custom" return httpx.Response( 200, json={ @@ -224,6 +225,7 @@ def handler(request: httpx.Request) -> httpx.Response: async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: response = await client.convert_xfa_to_acroforms( input_file, + output="async-custom", extra_query={"trace": "async"}, extra_headers={"X-Debug": "async"}, extra_body={"debug": "yes"}, diff --git a/tests/test_export_form_data.py b/tests/test_export_form_data.py index f590e50..dfcf084 100644 --- a/tests/test_export_form_data.py +++ b/tests/test_export_form_data.py @@ -262,6 +262,7 @@ def handler(request: httpx.Request) -> httpx.Response: payload = json.loads(request.content.decode("utf-8")) assert payload["id"] == str(input_file.id) assert payload["data_format"] == "xml" + assert payload["output"] == "async-custom" assert payload["note"] == "details" return httpx.Response( 200, @@ -290,6 +291,7 @@ def handler(request: httpx.Request) -> httpx.Response: response = await client.export_form_data( input_file, data_format="xml", + output="async-custom", extra_query={"trace": "async"}, extra_headers={"X-Debug": "async"}, extra_body={"note": "details"}, diff --git a/tests/test_extract_images.py b/tests/test_extract_images.py index 89ef81c..c0510d4 100644 --- a/tests/test_extract_images.py +++ b/tests/test_extract_images.py @@ -199,7 +199,7 @@ async def test_async_extract_images_request_customization( input_file = make_pdf_file(PdfRestFileID.generate(2)) output_id = str(PdfRestFileID.generate()) payload_dump = ExtractImagesPayload.model_validate( - {"files": [input_file], "pages": ["1-last"]} + {"files": [input_file], "pages": ["1-last"], "output": "debug-async"} ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) captured_timeout: dict[str, float | dict[str, float] | None] = {} @@ -235,6 +235,7 @@ def handler(request: httpx.Request) -> httpx.Response: response = await client.extract_images( input_file, pages=["1-last"], + output="debug-async", extra_query={"trace": "true"}, extra_headers={"X-Debug": "async"}, extra_body={"debug": True}, diff --git a/tests/test_files.py b/tests/test_files.py index 8b3f94b..0155701 100644 --- a/tests/test_files.py +++ b/tests/test_files.py @@ -3,8 +3,6 @@ import json import uuid from collections.abc import AsyncIterator, Iterator -from contextlib import AsyncExitStack, ExitStack -from dataclasses import dataclass from datetime import datetime, timezone from pathlib import Path from typing import Any, cast @@ -89,68 +87,6 @@ def _assert_file_matches_payload( assert file_repr.scheduled_deletion_time_utc is None -def _create_temp_text_file(tmp_path: Path, prefix: str) -> tuple[Path, str, bytes]: - filename = f"{prefix}.txt" - source_path = tmp_path / filename - source_content = f"{prefix}-line1\n{prefix}-line2\n" - source_path.write_text(source_content, encoding="utf-8") - return source_path, source_content, source_path.read_bytes() - - -@dataclass -class LiveFileData: - prefix: str - file: PdfRestFile - original_bytes: bytes - source_text: str - - -@pytest.fixture(scope="class") -def live_sync_file( - pdfrest_api_key: str, - pdfrest_live_base_url: str, - tmp_path_factory: pytest.TempPathFactory, -) -> LiveFileData: - prefix = f"sync-live-{uuid.uuid4().hex}" - temp_dir = tmp_path_factory.mktemp(prefix) - source_path, source_text, source_bytes = _create_temp_text_file(temp_dir, prefix) - with PdfRestClient( - api_key=pdfrest_api_key, base_url=pdfrest_live_base_url - ) as client: - with source_path.open("rb") as source_file: - uploaded_files = client.files.create([source_file]) - file_repr = uploaded_files[0] - return LiveFileData( - prefix=prefix, - file=file_repr, - original_bytes=source_bytes, - source_text=source_text, - ) - - -@pytest.fixture(scope="class") -def live_async_file( - pdfrest_api_key: str, - pdfrest_live_base_url: str, - tmp_path_factory: pytest.TempPathFactory, -) -> LiveFileData: - prefix = f"async-live-{uuid.uuid4().hex}" - temp_dir = tmp_path_factory.mktemp(prefix) - source_path, source_text, source_bytes = _create_temp_text_file(temp_dir, prefix) - with PdfRestClient( - api_key=pdfrest_api_key, base_url=pdfrest_live_base_url - ) as client: - with source_path.open("rb") as source_file: - uploaded_files = client.files.create([source_file]) - file_repr = uploaded_files[0] - return LiveFileData( - prefix=prefix, - file=file_repr, - original_bytes=source_bytes, - source_text=source_text, - ) - - @pytest.mark.parametrize( "file_ref", [ @@ -1385,109 +1321,6 @@ def test_live_file_create_from_urls( assert {"report.pdf", "report.docx"} <= names -class TestLiveFileDownloads: - def test_read_bytes( - self, - pdfrest_api_key: str, - pdfrest_live_base_url: str, - live_sync_file: LiveFileData, - ) -> None: - with PdfRestClient( - api_key=pdfrest_api_key, base_url=pdfrest_live_base_url - ) as client: - assert ( - client.files.read_bytes(live_sync_file.file.id) - == live_sync_file.original_bytes - ) - - def test_read_text( - self, - pdfrest_api_key: str, - pdfrest_live_base_url: str, - live_sync_file: LiveFileData, - ) -> None: - with PdfRestClient( - api_key=pdfrest_api_key, base_url=pdfrest_live_base_url - ) as client: - assert ( - client.files.read_text(live_sync_file.file.id) - == live_sync_file.source_text - ) - - def test_write_bytes( - self, - pdfrest_api_key: str, - pdfrest_live_base_url: str, - tmp_path: Path, - live_sync_file: LiveFileData, - ) -> None: - destination = tmp_path / f"{live_sync_file.prefix}-download.bin" - with PdfRestClient( - api_key=pdfrest_api_key, base_url=pdfrest_live_base_url - ) as client: - written_path = client.files.write_bytes( - live_sync_file.file.id, str(destination) - ) - assert written_path == destination - assert written_path.read_bytes() == live_sync_file.original_bytes - - def test_stream_iter_raw( - self, - pdfrest_api_key: str, - pdfrest_live_base_url: str, - live_sync_file: LiveFileData, - ) -> None: - with ExitStack() as stack: - client = stack.enter_context( - PdfRestClient(api_key=pdfrest_api_key, base_url=pdfrest_live_base_url) - ) - stream = stack.enter_context(client.files.stream(live_sync_file.file.id)) - raw_chunks = list(stream.iter_raw()) - assert b"".join(raw_chunks) == live_sync_file.original_bytes - - def test_stream_iter_bytes( - self, - pdfrest_api_key: str, - pdfrest_live_base_url: str, - live_sync_file: LiveFileData, - ) -> None: - with ExitStack() as stack: - client = stack.enter_context( - PdfRestClient(api_key=pdfrest_api_key, base_url=pdfrest_live_base_url) - ) - stream = stack.enter_context(client.files.stream(live_sync_file.file.id)) - chunks = list(stream.iter_bytes(chunk_size=None)) - assert b"".join(chunks) == live_sync_file.original_bytes - - def test_stream_iter_text( - self, - pdfrest_api_key: str, - pdfrest_live_base_url: str, - live_sync_file: LiveFileData, - ) -> None: - with ExitStack() as stack: - client = stack.enter_context( - PdfRestClient(api_key=pdfrest_api_key, base_url=pdfrest_live_base_url) - ) - stream = stack.enter_context(client.files.stream(live_sync_file.file.id)) - text_chunks = list(stream.iter_text(chunk_size=None)) - assert "".join(text_chunks) == live_sync_file.source_text - - def test_stream_iter_lines( - self, - pdfrest_api_key: str, - pdfrest_live_base_url: str, - live_sync_file: LiveFileData, - ) -> None: - with ExitStack() as stack: - client = stack.enter_context( - PdfRestClient(api_key=pdfrest_api_key, base_url=pdfrest_live_base_url) - ) - stream = stack.enter_context(client.files.stream(live_sync_file.file.id)) - lines = list(stream.iter_lines()) - assert lines == live_sync_file.source_text.splitlines() - - @pytest.mark.asyncio async def test_live_async_file_create( pdfrest_api_key: str, pdfrest_live_base_url: str @@ -1525,128 +1358,6 @@ async def test_live_async_file_create_from_paths( } <= names -class TestLiveAsyncFileDownloads: - @pytest.mark.asyncio - async def test_read_bytes( - self, - pdfrest_api_key: str, - pdfrest_live_base_url: str, - live_async_file: LiveFileData, - ) -> None: - async with AsyncPdfRestClient( - api_key=pdfrest_api_key, base_url=pdfrest_live_base_url - ) as client: - assert ( - await client.files.read_bytes(live_async_file.file.id) - == live_async_file.original_bytes - ) - - @pytest.mark.asyncio - async def test_read_text( - self, - pdfrest_api_key: str, - pdfrest_live_base_url: str, - live_async_file: LiveFileData, - ) -> None: - async with AsyncPdfRestClient( - api_key=pdfrest_api_key, base_url=pdfrest_live_base_url - ) as client: - assert ( - await client.files.read_text(live_async_file.file.id) - == live_async_file.source_text - ) - - @pytest.mark.asyncio - async def test_write_bytes( - self, - pdfrest_api_key: str, - pdfrest_live_base_url: str, - tmp_path: Path, - live_async_file: LiveFileData, - ) -> None: - destination = tmp_path / f"{live_async_file.prefix}-download.bin" - async with AsyncPdfRestClient( - api_key=pdfrest_api_key, base_url=pdfrest_live_base_url - ) as client: - written_path = await client.files.write_bytes( - live_async_file.file, destination - ) - assert written_path == destination - assert written_path.read_bytes() == live_async_file.original_bytes - - @pytest.mark.asyncio - async def test_stream_iter_raw( - self, - pdfrest_api_key: str, - pdfrest_live_base_url: str, - live_async_file: LiveFileData, - ) -> None: - async with AsyncExitStack() as stack: - client = await stack.enter_async_context( - AsyncPdfRestClient( - api_key=pdfrest_api_key, base_url=pdfrest_live_base_url - ) - ) - stream_cm = await client.files.stream(live_async_file.file.id) - stream = await stack.enter_async_context(stream_cm) - raw_chunks = [chunk async for chunk in stream.iter_raw()] - assert b"".join(raw_chunks) == live_async_file.original_bytes - - @pytest.mark.asyncio - async def test_stream_iter_bytes( - self, - pdfrest_api_key: str, - pdfrest_live_base_url: str, - live_async_file: LiveFileData, - ) -> None: - async with AsyncExitStack() as stack: - client = await stack.enter_async_context( - AsyncPdfRestClient( - api_key=pdfrest_api_key, base_url=pdfrest_live_base_url - ) - ) - stream_cm = await client.files.stream(live_async_file.file.id) - stream = await stack.enter_async_context(stream_cm) - chunks = [chunk async for chunk in stream.iter_bytes(chunk_size=None)] - assert b"".join(chunks) == live_async_file.original_bytes - - @pytest.mark.asyncio - async def test_stream_iter_text( - self, - pdfrest_api_key: str, - pdfrest_live_base_url: str, - live_async_file: LiveFileData, - ) -> None: - async with AsyncExitStack() as stack: - client = await stack.enter_async_context( - AsyncPdfRestClient( - api_key=pdfrest_api_key, base_url=pdfrest_live_base_url - ) - ) - stream_cm = await client.files.stream(live_async_file.file.id) - stream = await stack.enter_async_context(stream_cm) - text_chunks = [chunk async for chunk in stream.iter_text(chunk_size=None)] - assert "".join(text_chunks) == live_async_file.source_text - - @pytest.mark.asyncio - async def test_stream_iter_lines( - self, - pdfrest_api_key: str, - pdfrest_live_base_url: str, - live_async_file: LiveFileData, - ) -> None: - async with AsyncExitStack() as stack: - client = await stack.enter_async_context( - AsyncPdfRestClient( - api_key=pdfrest_api_key, base_url=pdfrest_live_base_url - ) - ) - stream_cm = await client.files.stream(live_async_file.file.id) - stream = await stack.enter_async_context(stream_cm) - lines = [line async for line in stream.iter_lines()] - assert lines == live_async_file.source_text.splitlines() - - @pytest.mark.asyncio async def test_live_async_file_create_from_urls( pdfrest_api_key: str, pdfrest_live_base_url: str diff --git a/tests/test_flatten_annotations.py b/tests/test_flatten_annotations.py index b34cf57..7f4ef31 100644 --- a/tests/test_flatten_annotations.py +++ b/tests/test_flatten_annotations.py @@ -210,6 +210,7 @@ def handler(request: httpx.Request) -> httpx.Response: payload = json.loads(request.content.decode("utf-8")) assert payload["debug"] == "yes" assert payload["id"] == str(input_file.id) + assert payload["output"] == "async-custom" return httpx.Response( 200, json={ @@ -236,6 +237,7 @@ def handler(request: httpx.Request) -> httpx.Response: async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: response = await client.flatten_annotations( input_file, + output="async-custom", extra_query={"trace": "async"}, extra_headers={"X-Debug": "async"}, extra_body={"debug": "yes"}, diff --git a/tests/test_flatten_layers.py b/tests/test_flatten_layers.py index 963a448..b2cb5b6 100644 --- a/tests/test_flatten_layers.py +++ b/tests/test_flatten_layers.py @@ -198,6 +198,7 @@ def handler(request: httpx.Request) -> httpx.Response: payload = json.loads(request.content.decode("utf-8")) assert payload["debug"] == "yes" assert payload["id"] == str(input_file.id) + assert payload["output"] == "async-custom" return httpx.Response( 200, json={ @@ -224,6 +225,7 @@ def handler(request: httpx.Request) -> httpx.Response: async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: response = await client.flatten_layers( input_file, + output="async-custom", extra_query={"trace": "async"}, extra_headers={"X-Debug": "async"}, extra_body={"debug": "yes"}, diff --git a/tests/test_flatten_pdf_forms.py b/tests/test_flatten_pdf_forms.py index b8f41e6..1298f39 100644 --- a/tests/test_flatten_pdf_forms.py +++ b/tests/test_flatten_pdf_forms.py @@ -195,6 +195,7 @@ def handler(request: httpx.Request) -> httpx.Response: payload = json.loads(request.content.decode("utf-8")) assert payload["flags"] == ["a", "b"] assert payload["id"] == str(input_file.id) + assert payload["output"] == "async-custom" return httpx.Response( 200, json={ @@ -221,6 +222,7 @@ def handler(request: httpx.Request) -> httpx.Response: async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: response = await client.flatten_pdf_forms( input_file, + output="async-custom", extra_query={"trace": "async"}, extra_headers={"X-Debug": "async"}, extra_body={"flags": ["a", "b"]}, diff --git a/tests/test_flatten_transparencies.py b/tests/test_flatten_transparencies.py index 2f50ead..cf3c921 100644 --- a/tests/test_flatten_transparencies.py +++ b/tests/test_flatten_transparencies.py @@ -215,6 +215,7 @@ def handler(request: httpx.Request) -> httpx.Response: assert payload["debug"] == "yes" assert payload["id"] == str(input_file.id) assert payload["quality"] == "high" + assert payload["output"] == "async-custom" return httpx.Response( 200, json={ @@ -241,6 +242,7 @@ def handler(request: httpx.Request) -> httpx.Response: async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: response = await client.flatten_transparencies( input_file, + output="async-custom", quality="high", extra_query={"trace": "async"}, extra_headers={"X-Debug": "async"}, diff --git a/tests/test_import_form_data.py b/tests/test_import_form_data.py index 10519bb..24d703e 100644 --- a/tests/test_import_form_data.py +++ b/tests/test_import_form_data.py @@ -263,6 +263,7 @@ def handler(request: httpx.Request) -> httpx.Response: payload = json.loads(request.content.decode("utf-8")) assert payload["id"] == str(input_file.id) assert payload["data_file_id"] == str(data_file.id) + assert payload["output"] == "async-custom" assert payload["note"] == "details" return httpx.Response( 200, @@ -291,6 +292,7 @@ def handler(request: httpx.Request) -> httpx.Response: response = await client.import_form_data( input_file, data_file, + output="async-custom", extra_query={"trace": "async"}, extra_headers={"X-Debug": "async"}, extra_body={"note": "details"}, diff --git a/tests/test_linearize_pdf.py b/tests/test_linearize_pdf.py index 68e25fb..e220891 100644 --- a/tests/test_linearize_pdf.py +++ b/tests/test_linearize_pdf.py @@ -195,6 +195,7 @@ def handler(request: httpx.Request) -> httpx.Response: payload = json.loads(request.content.decode("utf-8")) assert payload["flags"] == ["a", "b"] assert payload["id"] == str(input_file.id) + assert payload["output"] == "async-linearized-custom" return httpx.Response( 200, json={ @@ -221,6 +222,7 @@ def handler(request: httpx.Request) -> httpx.Response: async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: response = await client.linearize_pdf( input_file, + output="async-linearized-custom", extra_query={"trace": "async"}, extra_headers={"X-Debug": "async"}, extra_body={"flags": ["a", "b"]}, diff --git a/tests/test_ocr_pdf.py b/tests/test_ocr_pdf.py index a3b8b38..f6d643e 100644 --- a/tests/test_ocr_pdf.py +++ b/tests/test_ocr_pdf.py @@ -227,7 +227,7 @@ async def test_async_ocr_pdf_request_customization( monkeypatch.delenv("PDFREST_API_KEY", raising=False) input_file = make_pdf_file(PdfRestFileID.generate(2)) payload_dump = OcrPdfPayload.model_validate( - {"files": [input_file], "languages": ["English"]} + {"files": [input_file], "languages": ["English"], "output": "custom-async-ocr"} ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) output_id = str(PdfRestFileID.generate()) captured_timeout: dict[str, float | dict[str, float] | None] = {} @@ -263,6 +263,7 @@ def handler(request: httpx.Request) -> httpx.Response: async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: response = await client.ocr_pdf( input_file, + output="custom-async-ocr", extra_query={"trace": "true"}, extra_headers={"X-Debug": "async"}, extra_body={"debug": True}, diff --git a/tests/test_pdf_redaction_preview.py b/tests/test_pdf_redaction_preview.py index 386f2b0..1244087 100644 --- a/tests/test_pdf_redaction_preview.py +++ b/tests/test_pdf_redaction_preview.py @@ -7,12 +7,17 @@ from pydantic import ValidationError from pydantic_core import to_json -from pdfrest import PdfRestClient +from pdfrest import AsyncPdfRestClient, PdfRestClient from pdfrest.models import PdfRestFileBasedResponse, PdfRestFileID from pdfrest.models._internal import PdfRedactionPreviewPayload from pdfrest.types import PdfRedactionInstruction -from .graphics_test_helpers import VALID_API_KEY, build_file_info_payload, make_pdf_file +from .graphics_test_helpers import ( + ASYNC_API_KEY, + VALID_API_KEY, + build_file_info_payload, + make_pdf_file, +) @pytest.mark.parametrize( @@ -124,3 +129,61 @@ def test_preview_redactions_requires_instruction( pytest.raises(ValidationError, match="at least 1 item"), ): client.preview_redactions(input_file, redactions=[]) + + +@pytest.mark.asyncio +async def test_async_preview_redactions_success( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + output_id = str(PdfRestFileID.generate()) + redactions: list[PdfRedactionInstruction] = [ + {"type": "literal", "value": "Sensitive"} + ] + + payload_model_dump = PdfRedactionPreviewPayload.model_validate( + { + "files": [input_file], + "redactions": redactions, + "output": "preview-output-async", + } + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + def handler(request: httpx.Request) -> httpx.Response: + if ( + request.method == "POST" + and request.url.path == "/pdf-with-redacted-text-preview" + ): + body = json.loads(request.content.decode("utf-8")) + assert body == payload_model_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, "preview-output-async.pdf", "application/pdf" + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.preview_redactions( + input_file, + redactions=redactions, + output="preview-output-async", + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert str(response.input_id) == str(input_file.id) + assert response.output_files[0].name == "preview-output-async.pdf" + assert response.output_files[0].type == "application/pdf" + assert response.warning is None diff --git a/tests/test_rasterize_pdf.py b/tests/test_rasterize_pdf.py index d5a189e..8c6ec78 100644 --- a/tests/test_rasterize_pdf.py +++ b/tests/test_rasterize_pdf.py @@ -194,6 +194,7 @@ def handler(request: httpx.Request) -> httpx.Response: payload = json.loads(request.content.decode("utf-8")) assert payload["debug"] == "yes" assert payload["id"] == str(input_file.id) + assert payload["output"] == "async-custom" return httpx.Response( 200, json={ @@ -220,6 +221,7 @@ def handler(request: httpx.Request) -> httpx.Response: async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: response = await client.rasterize_pdf( input_file, + output="async-custom", extra_query={"trace": "async"}, extra_headers={"X-Debug": "async"}, extra_body={"debug": "yes"}, diff --git a/tests/test_split_pdf.py b/tests/test_split_pdf.py index 2afd4c6..5d33e55 100644 --- a/tests/test_split_pdf.py +++ b/tests/test_split_pdf.py @@ -159,6 +159,7 @@ async def test_async_split_pdf(monkeypatch: pytest.MonkeyPatch) -> None: request_payload = PdfSplitPayload.model_validate( { "files": [input_file], + "page_groups": ["1-2"], "output_prefix": "async-split", } ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) @@ -188,6 +189,7 @@ def handler(request: httpx.Request) -> httpx.Response: async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: response = await client.split_pdf( input_file, + page_groups=["1-2"], output_prefix="async-split", )