From c42d8361a7a29d6df56a510957d30914bcf79188 Mon Sep 17 00:00:00 2001 From: mscherer Date: Tue, 12 May 2026 12:11:47 +0200 Subject: [PATCH 1/2] Add CsvStreamResponse for streaming large CSV exports Memory-efficient streaming sibling of CsvView built on the new Cake\Http\Response\AbstractStreamResponse base. Emits rows to the wire as they are produced rather than building the entire CSV in memory first, so memory use stays constant regardless of dataset size and the client sees the first row after one round trip instead of after the full export is generated. Reuses CsvView's row-formatting logic (delimiter, enclosure, escape, eol, BOM, setSeparator, excel preset, iconv/mbstring transcoding with strict/ignore/transliterate modes) so the streaming and non-streaming paths produce byte-identical output for the same configuration. A future cleanup could extract the shared row formatter; this PR keeps the duplication explicit and contained. Tears cleanly on mid-stream encoding failures: logs via Log::error() and stops emitting further rows so the client receives a valid but truncated CSV rather than a corrupt one. Tests (23) cover the wire format (header/footer, extract by path / format / callable, custom delimiter+EOL, BOM-on-first-row-only, excel preset, setSeparator), input shapes (array, generator, empty), error paths (tear on unrenderable value, tear on strict transcoding failure, footer-omitted on tear), encoding (UTF-8 to ISO-8859-1 with strict and ignore modes), and the flushEvery validation inherited from the base. The cakephp/cakephp constraint temporarily points at the feature-abstract-stream-response branch on the dereuromark fork while the upstream PR is in review. Will be flipped to dev-5.next (or ^5.4 once 5.4 releases) before merge. --- composer.json | 10 +- src/Http/Response/CsvStreamResponse.php | 476 ++++++++++++++++++ .../Http/Response/CsvStreamResponseTest.php | 423 ++++++++++++++++ 3 files changed, 908 insertions(+), 1 deletion(-) create mode 100644 src/Http/Response/CsvStreamResponse.php create mode 100644 tests/TestCase/Http/Response/CsvStreamResponseTest.php diff --git a/composer.json b/composer.json index 00f0aef..d1b8fc8 100644 --- a/composer.json +++ b/composer.json @@ -44,12 +44,20 @@ } ], "require": { - "cakephp/cakephp": "^5.0" + "cakephp/cakephp": "dev-feature-abstract-stream-response as 5.4.x-dev" }, "require-dev": { "phpunit/phpunit": "^10.1", "cakephp/cakephp-codesniffer": "^5.0" }, + "repositories": [ + { + "type": "vcs", + "url": "https://github.com/dereuromark/cakephp.git" + } + ], + "minimum-stability": "dev", + "prefer-stable": true, "autoload": { "psr-4": { "CsvView\\": "src/" diff --git a/src/Http/Response/CsvStreamResponse.php b/src/Http/Response/CsvStreamResponse.php new file mode 100644 index 0000000..2abea89 --- /dev/null +++ b/src/Http/Response/CsvStreamResponse.php @@ -0,0 +1,476 @@ +Articles->find()->disableBufferedResults(); + * + * return new CsvStreamResponse($rows, [ + * 'header' => ['id', 'title', 'created'], + * 'extract' => ['id', 'title', ['created', '%s']], + * ]); + * } + * ``` + * + * ### Options + * + * - `header` (array|null, default: null): A flat array of header column names + * - `footer` (array|null, default: null): A flat array of footer column names + * - `extract` (array|null, default: null): Hash-compatible paths and / or + * callables describing how to flatten each row. + * - `delimiter` (string, default: ','): CSV column delimiter + * - `enclosure` (string, default: '"'): CSV value enclosure + * - `escape` (string, default: ''): CSV escape character. Empty string is + * RFC 4180 compliant and avoids PHP 8.4's deprecation warning for non-empty + * escape values. + * - `newline` (string, default: "\n"): replacement for newline characters + * found inside a field + * - `eol` (string, default: PHP_EOL): end-of-line written between rows + * - `null` (string, default: ''): replacement for null cells + * - `bom` (bool, default: false): Prepend a UTF-* BOM to the response + * - `setSeparator` (bool, default: false): Emit `sep={delimiter}\n` before the + * header. Excel-only hint. + * - `csvEncoding` (string, default: 'UTF-8'): Target encoding of the response + * - `dataEncoding` (string, default: 'UTF-8'): Source encoding of the rows + * - `transcodingExtension` (string, default: 'iconv'): 'iconv' or 'mbstring' + * - `transcodingMode` (string, default: 'strict'): 'strict', 'ignore' or + * 'transliterate' — controls how unconvertible characters are handled when + * transcoding rows. + * - `excel` (bool, default: false): Shorthand for an Excel-friendly UTF-8 + * export. When true forces `bom => true`, `eol => "\r\n"`, + * `csvEncoding => 'UTF-8'`. + * - `flushEvery` (int, default: 1): Flush output buffers every N items + * (inherited from {@see AbstractStreamResponse}) + * + * ### Mid-stream errors + * + * A row that cannot be encoded (eg. a path that resolves to an unrenderable + * non-scalar value, or a transcode failure under `strict` mode) is logged + * via `Log::error()` and the stream is torn cleanly — no further rows are + * written, the footer is omitted, and the client receives a truncated CSV. + * This trades a partial response for valid up-to-the-error CSV: callers can + * detect truncation server-side via the log entry. + * + * @see \CsvView\View\CsvView The non-streaming sibling for smaller datasets. + * @see \Cake\Http\Response\AbstractStreamResponse The streaming base class. + */ +class CsvStreamResponse extends AbstractStreamResponse +{ + /** + * Iconv extension identifier. + */ + public const EXTENSION_ICONV = 'iconv'; + + /** + * Mbstring extension identifier. + */ + public const EXTENSION_MBSTRING = 'mbstring'; + + /** + * Transcoding mode: throw on any unconvertible byte / character (default). + */ + public const TRANSCODING_MODE_STRICT = 'strict'; + + /** + * Transcoding mode: silently drop unconvertible characters and keep going. + * Maps to iconv's `//IGNORE` suffix and mbstring's substitute-char `'none'`. + */ + public const TRANSCODING_MODE_IGNORE = 'ignore'; + + /** + * Transcoding mode: transliterate where possible, ignore otherwise. + * Maps to iconv's `//TRANSLIT//IGNORE` suffix. For mbstring this falls + * back to ignore (mbstring has no transliteration). + */ + public const TRANSCODING_MODE_TRANSLITERATE = 'transliterate'; + + /** + * Default streaming options. + * + * @var array + */ + protected array $_defaultConfig = [ + 'extract' => null, + 'footer' => null, + 'header' => null, + 'delimiter' => ',', + 'enclosure' => '"', + 'newline' => "\n", + 'escape' => '', + 'eol' => PHP_EOL, + 'null' => '', + 'bom' => false, + 'setSeparator' => false, + 'csvEncoding' => 'UTF-8', + 'dataEncoding' => 'UTF-8', + 'transcodingExtension' => self::EXTENSION_ICONV, + 'excel' => false, + 'transcodingMode' => self::TRANSCODING_MODE_STRICT, + 'flushEvery' => 1, + ]; + + /** + * BOM byte sequences by target encoding. + * + * @var array + */ + protected array $bomMap = []; + + /** + * Whether the next row should be prefixed with a BOM. + * + * @var bool + */ + protected bool $isFirstBom = true; + + /** + * Cached `php://temp` stream reused by `generateRow()` to format each row. + * + * @var resource|null + */ + protected $fp = null; + + /** + * @param iterable $data The rows to stream (array, generator, ResultSet, …). + * @param array $options Streaming options; see the class docblock. + */ + public function __construct(iterable $data, array $options = []) + { + parent::__construct($data, $options); + + $this->bomMap = [ + 'UTF-32BE' => chr(0x00) . chr(0x00) . chr(0xFE) . chr(0xFF), + 'UTF-32LE' => chr(0xFF) . chr(0xFE) . chr(0x00) . chr(0x00), + 'UTF-16BE' => chr(0xFE) . chr(0xFF), + 'UTF-16LE' => chr(0xFF) . chr(0xFE), + 'UTF-8' => chr(0xEF) . chr(0xBB) . chr(0xBF), + ]; + + if ($this->getConfig('excel')) { + $this->setConfig([ + 'bom' => true, + 'eol' => "\r\n", + 'csvEncoding' => 'UTF-8', + ]); + } + + if ( + $this->getConfig('transcodingExtension') === self::EXTENSION_ICONV + && !extension_loaded(self::EXTENSION_ICONV) + ) { + $this->setConfig('transcodingExtension', self::EXTENSION_MBSTRING); + } + } + + /** + * Close the cached row-formatting stream when the response is destroyed. + */ + public function __destruct() + { + if (is_resource($this->fp)) { + fclose($this->fp); + $this->fp = null; + } + } + + /** + * @inheritDoc + */ + protected function contentType(): string + { + return 'text/csv'; + } + + /** + * @inheritDoc + */ + protected function streamData(): void + { + $header = $this->getConfig('header'); + $footer = $this->getConfig('footer'); + + if ($header !== null) { + $row = $this->generateRow($header); + if ($row !== '') { + $this->outputAndFlush($row); + } + } + + $completed = $this->streamRows(); + + if ($completed && $footer !== null) { + $row = $this->generateRow($footer); + if ($row !== '') { + $this->outputAndFlush($row, force: true); + } + } + + $this->flushOutputBuffers(); + } + + /** + * Iterate the data, emitting one CSV row per item. + * + * @return bool true if the iteration completed without an encoding failure; + * false if an error was logged and the stream was torn. + */ + protected function streamRows(): bool + { + $index = 0; + foreach ($this->data as $item) { + try { + $values = $this->extractRowValues($item); + $row = $this->generateRow($values); + } catch (Throwable $exception) { + $this->logStreamError($exception->getMessage(), $index); + + return false; + } + + if ($row !== '') { + $this->outputAndFlush($row); + } + $index++; + } + + return true; + } + + /** + * Flatten a single item into the array of values that will form one CSV row. + * + * @param mixed $item The current item from the input iterable. + * @return array + */ + protected function extractRowValues(mixed $item): array + { + if ($item instanceof EntityInterface) { + $item = $item->toArray(); + } + + $extract = $this->getConfig('extract'); + if ($extract === null) { + return array_values((array)$item); + } + + $values = []; + foreach ($extract as $formatter) { + if (!is_string($formatter) && is_callable($formatter)) { + $value = $formatter($item); + $pathForError = ''; + } else { + $path = $formatter; + $format = null; + if (is_array($formatter)) { + [$path, $format] = $formatter; + } + $pathForError = (string)$path; + + $value = Hash::get($item, $path); + + if ($format !== null) { + $value = sprintf($format, $value); + } + } + + if ( + $value !== null + && !is_scalar($value) + && !($value instanceof Stringable) + ) { + throw new CakeException(sprintf( + 'Extract path `%s` resolved to a non-scalar `%s`. ' + . 'Use a callable formatter to flatten it, or adjust the extract path.', + $pathForError, + get_debug_type($value), + )); + } + + $values[] = $value; + } + + return $values; + } + + /** + * Generate a single row of CSV text from an array of cell values. + * + * Mirrors {@see \CsvView\View\CsvView::_generateRow()} so the streaming + * and non-streaming paths emit byte-identical output for the same config. + * + * @param array|null $row Row data. + * @return string CSV-formatted row including the configured `eol`, or + * empty string if `$row` is null or empty. + */ + protected function generateRow(?array $row): string + { + if (!$row) { + return ''; + } + + if ($this->fp === null) { + $stream = 'php://temp'; + $fp = fopen($stream, 'r+'); + if ($fp === false) { + throw new CakeException(sprintf('Cannot open stream `%s`', $stream)); + } + $this->fp = $fp; + + $setSeparator = $this->getConfig('setSeparator'); + if ($setSeparator) { + fwrite($this->fp, 'sep=' . $this->getConfig('delimiter') . "\n"); + } + } else { + ftruncate($this->fp, 0); + } + + $null = $this->getConfig('null'); + if ($null !== '') { + foreach ($row as &$field) { + if ($field === null) { + $field = $null; + } + } + unset($field); + } + + $delimiter = $this->getConfig('delimiter'); + $enclosure = $this->getConfig('enclosure'); + $newline = $this->getConfig('newline'); + $escape = $this->getConfig('escape'); + + /** @phpstan-ignore-next-line */ + $row = str_replace(["\r\n", "\n", "\r"], $newline, $row); + if ($enclosure === '') { + if (fputs($this->fp, implode($delimiter, $row) . "\n") === false) { + throw new CakeException('fputs() failed writing CSV row'); + } + } else { + if (fputcsv($this->fp, $row, $delimiter, $enclosure, $escape) === false) { + throw new CakeException('fputcsv() failed writing CSV row'); + } + } + + rewind($this->fp); + + $csv = ''; + while (($buffer = fgets($this->fp, 4096)) !== false) { + $csv .= $buffer; + } + + $eol = $this->getConfig('eol'); + if ($eol !== "\n") { + $csv = str_replace("\n", $eol, $csv); + } + + $dataEncoding = $this->getConfig('dataEncoding'); + $csvEncoding = $this->getConfig('csvEncoding'); + if ($dataEncoding !== $csvEncoding) { + $csv = $this->transcode($csv, $dataEncoding, $csvEncoding); + } + + $bom = $this->getConfig('bom'); + if ($bom && $this->isFirstBom) { + $csv = $this->getBom($csvEncoding) . $csv; + $this->isFirstBom = false; + } + + return $csv; + } + + /** + * Return the BOM byte sequence for the configured target encoding, or an + * empty string for unsupported encodings. + */ + protected function getBom(string $csvEncoding): string + { + $csvEncoding = strtoupper($csvEncoding); + + return $this->bomMap[$csvEncoding] ?? ''; + } + + /** + * Transcode a CSV row between encodings honoring the configured mode. + * + * Mirrors {@see \CsvView\View\CsvView::_transcode()}. + * + * @throws \Cake\Core\Exception\CakeException When mode is `strict` and + * iconv reports a conversion failure. + */ + protected function transcode(string $csv, string $dataEncoding, string $csvEncoding): string + { + $extension = $this->getConfig('transcodingExtension'); + $mode = $this->getConfig('transcodingMode'); + + if ($extension === self::EXTENSION_ICONV) { + $targetSpec = match ($mode) { + self::TRANSCODING_MODE_IGNORE => $csvEncoding . '//IGNORE', + self::TRANSCODING_MODE_TRANSLITERATE => $csvEncoding . '//TRANSLIT//IGNORE', + default => $csvEncoding, + }; + set_error_handler(static fn(): bool => true, E_NOTICE | E_WARNING); + try { + $converted = iconv($dataEncoding, $targetSpec, $csv); + } finally { + restore_error_handler(); + } + if ($converted === false) { + if ($mode === self::TRANSCODING_MODE_STRICT) { + throw new CakeException(sprintf( + 'iconv() failed to transcode row from `%s` to `%s`.', + $dataEncoding, + $csvEncoding, + )); + } + + return ''; + } + + return $converted; + } + + if ($extension === self::EXTENSION_MBSTRING) { + $previousSubstitute = null; + if ($mode !== self::TRANSCODING_MODE_STRICT) { + $previousSubstitute = mb_substitute_character(); + mb_substitute_character('none'); + } + try { + $converted = mb_convert_encoding($csv, $csvEncoding, $dataEncoding); + } finally { + if ($previousSubstitute !== null) { + mb_substitute_character($previousSubstitute); + } + } + + return $converted; + } + + return $csv; + } +} diff --git a/tests/TestCase/Http/Response/CsvStreamResponseTest.php b/tests/TestCase/Http/Response/CsvStreamResponseTest.php new file mode 100644 index 0000000..25c3a11 --- /dev/null +++ b/tests/TestCase/Http/Response/CsvStreamResponseTest.php @@ -0,0 +1,423 @@ + ArrayLog::class]); + } + + protected function tearDown(): void + { + parent::tearDown(); + Log::drop('csvstreamtest'); + } + + /** + * Capture body output emitted by the streaming callback. + */ + protected function getStreamedBody(CsvStreamResponse $response): string + { + ob_start(); + try { + (string)$response->getBody(); + } catch (Throwable $exception) { + ob_end_clean(); + + throw $exception; + } + + return ob_get_clean() ?: ''; + } + + public function testSimpleArrayStreaming(): void + { + $data = [ + ['id' => 1, 'name' => 'Alice'], + ['id' => 2, 'name' => 'Bob'], + ]; + + $response = new CsvStreamResponse($data); + $body = $this->getStreamedBody($response); + + $this->assertSame("1,Alice\n2,Bob\n", $body); + $this->assertSame('text/csv; charset=UTF-8', $response->getHeaderLine('Content-Type')); + $this->assertSame('no', $response->getHeaderLine('X-Accel-Buffering')); + } + + public function testWithHeaderRow(): void + { + $data = [ + ['id' => 1, 'name' => 'Alice'], + ['id' => 2, 'name' => 'Bob'], + ]; + + $response = new CsvStreamResponse($data, [ + 'header' => ['id', 'name'], + ]); + + $this->assertSame( + "id,name\n1,Alice\n2,Bob\n", + $this->getStreamedBody($response), + ); + } + + public function testWithFooterRow(): void + { + $data = [ + ['id' => 1, 'amount' => 10], + ['id' => 2, 'amount' => 20], + ]; + + $response = new CsvStreamResponse($data, [ + 'header' => ['id', 'amount'], + 'footer' => ['total', 30], + ]); + + $this->assertSame( + "id,amount\n1,10\n2,20\ntotal,30\n", + $this->getStreamedBody($response), + ); + } + + public function testExtractByPath(): void + { + $data = [ + ['user' => ['id' => 1, 'name' => 'Alice'], 'secret' => 'x'], + ['user' => ['id' => 2, 'name' => 'Bob'], 'secret' => 'y'], + ]; + + $response = new CsvStreamResponse($data, [ + 'header' => ['id', 'name'], + 'extract' => ['user.id', 'user.name'], + ]); + + $body = $this->getStreamedBody($response); + $this->assertSame("id,name\n1,Alice\n2,Bob\n", $body); + $this->assertStringNotContainsString('secret', $body); + } + + public function testExtractWithFormat(): void + { + $data = [ + ['id' => 1, 'amount' => 5.5], + ['id' => 2, 'amount' => 42.25], + ]; + + $response = new CsvStreamResponse($data, [ + 'extract' => ['id', ['amount', '%.2f']], + ]); + + $this->assertSame( + "1,5.50\n2,42.25\n", + $this->getStreamedBody($response), + ); + } + + public function testExtractWithCallable(): void + { + $data = [ + (object)['first' => 'Alice', 'last' => 'Smith'], + (object)['first' => 'Bob', 'last' => 'Jones'], + ]; + + $response = new CsvStreamResponse($data, [ + 'extract' => [ + fn($row) => $row->first . ' ' . $row->last, + ], + ]); + + $this->assertSame( + "\"Alice Smith\"\n\"Bob Jones\"\n", + $this->getStreamedBody($response), + ); + } + + public function testEmptyIterableEmitsHeaderAndFooterOnly(): void + { + $response = new CsvStreamResponse([], [ + 'header' => ['id', 'name'], + 'footer' => ['done', ''], + ]); + + $this->assertSame( + "id,name\ndone,\n", + $this->getStreamedBody($response), + ); + } + + public function testEmptyIterableNoHeaderEmitsNothing(): void + { + $response = new CsvStreamResponse([]); + + $this->assertSame('', $this->getStreamedBody($response)); + } + + public function testGeneratorInput(): void + { + $generator = function () { + yield ['id' => 1]; + yield ['id' => 2]; + yield ['id' => 3]; + }; + + $response = new CsvStreamResponse($generator()); + + $this->assertSame( + "1\n2\n3\n", + $this->getStreamedBody($response), + ); + } + + public function testCustomDelimiterAndEol(): void + { + $data = [ + ['a', 'b'], + ['c', 'd'], + ]; + + $response = new CsvStreamResponse($data, [ + 'delimiter' => ';', + 'eol' => "\r\n", + ]); + + $this->assertSame("a;b\r\nc;d\r\n", $this->getStreamedBody($response)); + } + + public function testBomAddedOnFirstRowOnly(): void + { + $data = [ + ['a', 'b'], + ['c', 'd'], + ]; + + $response = new CsvStreamResponse($data, [ + 'bom' => true, + 'csvEncoding' => 'UTF-8', + ]); + + $body = $this->getStreamedBody($response); + $expectedBom = chr(0xEF) . chr(0xBB) . chr(0xBF); + + $this->assertStringStartsWith($expectedBom, $body); + $this->assertSame($expectedBom . "a,b\nc,d\n", $body); + // BOM appears once + $this->assertSame(1, substr_count($body, $expectedBom)); + } + + public function testExcelPresetForcesBomCrlfEolAndUtf8(): void + { + $data = [['id' => 1, 'name' => 'Alice']]; + + $response = new CsvStreamResponse($data, [ + 'header' => ['id', 'name'], + 'excel' => true, + ]); + + $body = $this->getStreamedBody($response); + $expectedBom = chr(0xEF) . chr(0xBB) . chr(0xBF); + + $this->assertStringStartsWith($expectedBom, $body); + $this->assertStringContainsString("id,name\r\n", $body); + $this->assertStringContainsString("1,Alice\r\n", $body); + } + + public function testSetSeparatorLineEmittedBeforeHeader(): void + { + $data = [['a', 'b']]; + + $response = new CsvStreamResponse($data, [ + 'delimiter' => ';', + 'setSeparator' => true, + ]); + + $body = $this->getStreamedBody($response); + $this->assertStringStartsWith("sep=;\n", $body); + $this->assertStringContainsString("a;b\n", $body); + } + + public function testEncodingTranscodeDataToCsv(): void + { + if (!extension_loaded('iconv')) { + $this->markTestSkipped('iconv is required for this test'); + } + + $data = [['Grüße', 'Café']]; + + $response = new CsvStreamResponse($data, [ + 'dataEncoding' => 'UTF-8', + 'csvEncoding' => 'ISO-8859-1', + ]); + + $body = $this->getStreamedBody($response); + + $expected = iconv('UTF-8', 'ISO-8859-1', "Grüße,Café\n"); + $this->assertSame($expected, $body); + } + + public function testTearsCleanlyOnUnrenderableExtractValue(): void + { + $data = [ + ['ok' => 'first'], + ['ok' => ['array', 'not', 'scalar']], + ['ok' => 'never reached'], + ]; + + $response = new CsvStreamResponse($data, [ + 'extract' => ['ok'], + ]); + + $body = $this->getStreamedBody($response); + + // First row written, second row triggers the error -> tear, third row never reached + $this->assertStringStartsWith("first\n", $body); + $this->assertStringNotContainsString('never', $body); + + $messages = Log::engine('csvstreamtest')->read(); + $this->assertNotEmpty($messages); + $this->assertStringContainsString( + 'CsvStreamResponse encoding failed at index 1', + implode("\n", $messages), + ); + } + + public function testTearOmitsFooterRow(): void + { + $data = [ + ['ok' => 'first'], + ['ok' => ['array', 'not', 'scalar']], + ]; + + $response = new CsvStreamResponse($data, [ + 'extract' => ['ok'], + 'footer' => ['total'], + ]); + + $body = $this->getStreamedBody($response); + $this->assertStringNotContainsString('total', $body); + } + + public function testNullCellReplacement(): void + { + $data = [ + ['id' => 1, 'name' => null], + ['id' => 2, 'name' => 'Bob'], + ]; + + $response = new CsvStreamResponse($data, [ + 'null' => 'NULL', + ]); + + $this->assertSame("1,NULL\n2,Bob\n", $this->getStreamedBody($response)); + } + + public function testEscapedNewlineInsideField(): void + { + $data = [ + ['id' => 1, 'note' => "line1\nline2"], + ]; + + $response = new CsvStreamResponse($data, [ + 'newline' => ' / ', + ]); + + // fputcsv with the default empty-string escape (RFC 4180) quotes fields + // that contained a newline before replacement, so the post-replacement + // value still arrives quoted to the wire. + $this->assertSame("1,\"line1 / line2\"\n", $this->getStreamedBody($response)); + } + + public function testCustomEnclosureWithFputcsv(): void + { + // Enclosure '"' is the default; verify it triggers when fields contain commas + $data = [ + ['hello, world', 'plain'], + ]; + + $response = new CsvStreamResponse($data); + $this->assertSame("\"hello, world\",plain\n", $this->getStreamedBody($response)); + } + + public function testInvalidFlushEveryThrowsFromAbstract(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('`flushEvery` must be an integer greater than or equal to 1'); + + new CsvStreamResponse([], ['flushEvery' => 0]); + } + + public function testStrictTranscodingFailureLogs(): void + { + if (!extension_loaded('iconv')) { + $this->markTestSkipped('iconv is required for this test'); + } + + $data = [ + ['ok' => 'first'], + // ✦ (U+2728) has no ISO-8859-1 representation; strict mode raises + ['ok' => "second \u{2728}"], + ['ok' => 'never reached'], + ]; + + $response = new CsvStreamResponse($data, [ + 'extract' => ['ok'], + 'dataEncoding' => 'UTF-8', + 'csvEncoding' => 'ISO-8859-1', + 'transcodingMode' => CsvStreamResponse::TRANSCODING_MODE_STRICT, + ]); + + $body = $this->getStreamedBody($response); + + $this->assertStringStartsWith('first', $body); + $this->assertStringNotContainsString('never', $body); + + $messages = Log::engine('csvstreamtest')->read(); + $this->assertNotEmpty($messages); + $this->assertStringContainsString( + 'CsvStreamResponse encoding failed at index 1', + implode("\n", $messages), + ); + } + + public function testIgnoreTranscodingDropsUnconvertibleCharacters(): void + { + if (!extension_loaded('iconv')) { + $this->markTestSkipped('iconv is required for this test'); + } + + $data = [['hello \u{2728} world']]; + + $response = new CsvStreamResponse($data, [ + 'dataEncoding' => 'UTF-8', + 'csvEncoding' => 'ISO-8859-1', + 'transcodingMode' => CsvStreamResponse::TRANSCODING_MODE_IGNORE, + ]); + + $body = $this->getStreamedBody($response); + + // The ✦ character is dropped; the surrounding text survives + $this->assertStringContainsString('hello', $body); + $this->assertStringContainsString('world', $body); + } + + public function testContentTypeIsTextCsv(): void + { + $response = new CsvStreamResponse([]); + + $this->assertSame('text/csv; charset=UTF-8', $response->getHeaderLine('Content-Type')); + } +} From d772f982ac1fc817c650b8b08f58d01ffaa37dd2 Mon Sep 17 00:00:00 2001 From: mscherer Date: Tue, 12 May 2026 12:44:22 +0200 Subject: [PATCH 2/2] Document CsvStreamResponse in README Add a "Streaming large exports" section covering: when to reach for the streaming response over CsvView, the controller-only usage pattern, the full option list (row formatting reused from CsvView plus the inherited flushEvery), the excel shorthand, custom filename via withDownload(), and the tear-cleanly mid-stream error contract. --- README.md | 119 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) diff --git a/README.md b/README.md index b03f5cd..b1132b5 100644 --- a/README.md +++ b/README.md @@ -362,3 +362,122 @@ $view->set(compact('data')); // And Save the file file_put_contents('/full/path/to/file.csv', $view->render()); ``` + +## Streaming large exports + +`CsvView` builds the whole CSV in memory before sending it. That is fine for a +few thousand rows but becomes a problem for very large exports — memory grows +with the row count and the user does not see the first byte until the whole +file is generated. + +For those cases the plugin provides `CsvStreamResponse`, a response class that +writes rows directly to the wire as the iterable yields them. Memory stays +constant regardless of dataset size and time-to-first-byte drops to "after the +first row". + +> Requires CakePHP **5.4+** for `Cake\Http\Response\AbstractStreamResponse`. + +### Usage + +Return a `CsvStreamResponse` from the controller — no view layer involved: + +```php +use CsvView\Http\Response\CsvStreamResponse; + +public function export() +{ + $rows = $this->Articles->find()->disableBufferedResults(); + + return new CsvStreamResponse($rows, [ + 'header' => ['id', 'title', 'created'], + 'extract' => ['id', 'title', ['created', '%s']], + ]); +} +``` + +Any `iterable` works: an array, a generator, a `SelectQuery`, a `ResultSet`, +anything implementing `Traversable`. For ORM queries call +`disableBufferedResults()` so the driver streams rows one at a time instead of +loading the full result set in memory first; result formatters such as `map()` +or `combine()` buffer internally and will defeat the streaming. + +### Options + +The response accepts the same row-formatting options as `CsvView` plus a few +streaming-specific ones inherited from the base class. + +Row formatting (matches `CsvView` byte-for-byte): + +- `header` (`array|null`, default `null`) — flat array of header column names. +- `footer` (`array|null`, default `null`) — flat array of footer column names. +- `extract` (`array|null`, default `null`) — Hash-compatible paths and/or + callables describing how to flatten each row. Same shape as `CsvView`: + `[$path]`, `[$path, $sprintfFormat]`, or `fn($row) => …`. +- `delimiter` (`string`, default `','`). +- `enclosure` (`string`, default `'"'`). +- `escape` (`string`, default `''`) — empty string is RFC 4180 compliant and + avoids PHP 8.4's deprecation warning for non-empty escape values. +- `newline` (`string`, default `"\n"`) — replacement for newline characters + found inside a field. +- `eol` (`string`, default `PHP_EOL`) — line ending written between rows. +- `null` (`string`, default `''`) — replacement for `null` cells. +- `bom` (`bool`, default `false`) — prepend a UTF-* BOM. +- `setSeparator` (`bool`, default `false`) — emit `sep={delimiter}` before the + header (Excel hint). +- `csvEncoding` / `dataEncoding` (`string`, default `'UTF-8'`) — transcoding + pair. Uses `iconv` if available, falls back to `mbstring`. +- `transcodingMode` (`string`, default `'strict'`) — `'strict'`, `'ignore'`, + or `'transliterate'`. Controls behavior on unconvertible characters. +- `excel` (`bool`, default `false`) — shorthand that forces `bom => true`, + `eol => "\r\n"`, `csvEncoding => 'UTF-8'` for Excel-friendly UTF-8 exports. + +Streaming behavior (inherited from `AbstractStreamResponse`): + +- `flushEvery` (`int`, default `1`) — flush output buffers every N rows. The + default flushes after every row so clients see data as soon as possible; + raise it for fewer flush syscalls at the cost of slightly delayed first-byte. + +### Excel example + +```php +return new CsvStreamResponse($rows, [ + 'header' => ['id', 'title', 'amount'], + 'extract' => ['id', 'title', ['amount', '%.2f']], + 'excel' => true, +]); +``` + +Produces a UTF-8 BOM, CRLF line endings and UTF-8 encoding — opens cleanly in +Excel on Windows. + +### Forcing a download filename + +Use the standard CakePHP response API; `CsvStreamResponse` is a regular +`Cake\Http\Response`: + +```php +return (new CsvStreamResponse($rows, $options)) + ->withDownload('articles-' . date('Y-m-d') . '.csv'); +``` + +### Error handling — tear cleanly + +If a row cannot be encoded (unrenderable extract path, strict-mode transcoding +failure, …) the stream tears: the response logs the failure via `Log::error()` +and stops emitting further rows. The client receives a valid but truncated +CSV; the footer is omitted. Server-side logging surfaces the failure in Sentry +or whatever log adapter is configured. + +The trade-off is intentional: once headers and the first byte have been sent +the HTTP status can no longer change to 500, so emitting an invalid CSV with +inline error markers would be worse than a clean truncation plus a server-side +log entry. + +### When to use which + +| Need | Use | +|----------------------------------------------------------------------|----------------------| +| Small / medium export, want to keep it inside the view layer | `CsvView` (existing) | +| Large export, memory pressure, slow time-to-first-byte | `CsvStreamResponse` | +| Save the CSV to disk on the server | `CsvView` (use `ViewBuilder` as shown above) | +| Ship rows over the wire as they are computed | `CsvStreamResponse` |