From 628ce5d0d489383db3ef0df1ac237e4b1e4027ec Mon Sep 17 00:00:00 2001 From: mscherer Date: Mon, 11 May 2026 21:15:47 +0200 Subject: [PATCH] Add `excel` shorthand for Excel-friendly UTF-8 exports. Microsoft Excel on Windows does not recognise a UTF-8 CSV unless it has a byte-order mark, CRLF line endings, and an explicit UTF-8 declaration. Users have had to remember and set all three options individually each time. This is a recurring source of "opens as mojibake" reports. Add a single `excel` config key (default false). When enabled, the view forces `bom => true`, `eol => "\r\n"`, and `csvEncoding => 'UTF-8'` at serialize time. The preset wins for those three keys; other CSV options (delimiter, enclosure, header, extract, setSeparator, etc.) are independent and behave normally. The preset runs in `_serialize()` rather than `initialize()` so it takes effect regardless of when `excel` is set, including the common test pattern of constructing the view and then calling `setConfig()`. README documents the new option under a new "Excel-friendly UTF-8 export" heading in the Usage section. --- README.md | 30 ++++++++++++++++++++ src/View/CsvView.php | 29 +++++++++++++++++++ tests/TestCase/View/CsvViewTest.php | 44 +++++++++++++++++++++++++++++ 3 files changed, 103 insertions(+) diff --git a/README.md b/README.md index 4f506e6..b03f5cd 100644 --- a/README.md +++ b/README.md @@ -263,6 +263,36 @@ The currently supported encoding extensions are as follows: - `iconv` - `mbstring` +#### Excel-friendly UTF-8 export + +Microsoft Excel on Windows does not recognise a UTF-8 CSV unless it has a +byte-order mark, CRLF line endings, and an explicit UTF-8 declaration. Setting +all three options individually each time is repetitive and easy to get wrong. + +The `excel` shorthand sets the right defaults in one go: + +```php +$this->viewBuilder() + ->setClassName('CsvView.Csv') + ->setOptions([ + 'serialize' => 'data', + 'excel' => true, + ]); +``` + +`excel => true` is equivalent to: + +```php +'bom' => true, +'eol' => "\r\n", +'csvEncoding' => 'UTF-8', +``` + +The shorthand always wins for the three keys it controls; if you need a +different combination (e.g. UTF-16, no BOM) do not enable `excel` and set the +individual keys yourself instead. Other CSV options (`delimiter`, `enclosure`, +`setSeparator`, `header`, `extract`, etc.) are independent and behave normally. + #### Setting the downloaded file name By default, the downloaded file will be named after the last segment of the URL diff --git a/src/View/CsvView.php b/src/View/CsvView.php index 60ec9c7..a76afc5 100644 --- a/src/View/CsvView.php +++ b/src/View/CsvView.php @@ -144,6 +144,10 @@ class CsvView extends SerializedView * - 'csvEncoding': (default 'UTF-8') CSV file encoding * - 'dataEncoding': (default 'UTF-8') Encoding of data to be serialized * - 'transcodingExtension': (default 'iconv') PHP extension to use for character encoding conversion + * - 'excel': (default false) Shorthand for an Excel-friendly UTF-8 export. + * When true, sets `bom => true`, `eol => "\r\n"`, and `csvEncoding => 'UTF-8'`. + * These specific keys are forced; if you need a different combination + * do not enable `excel` and set them individually instead. * * @var array */ @@ -163,6 +167,7 @@ class CsvView extends SerializedView 'csvEncoding' => 'UTF-8', 'dataEncoding' => 'UTF-8', 'transcodingExtension' => self::EXTENSION_ICONV, + 'excel' => false, ]; /** @@ -210,6 +215,7 @@ public static function contentType(): string protected function _serialize(array|string $serialize): string { $this->resetState(); + $this->_applyExcelPreset(); $this->_renderRow($this->getConfig('header')); $this->_renderContent(); @@ -246,6 +252,29 @@ public function __destruct() } } + /** + * Apply the `excel` shorthand if enabled: BOM + CRLF EOL + UTF-8 encoding, + * the three options Excel needs to open a UTF-8 CSV correctly on Windows. + * + * Applied at serialize-time (rather than `initialize()`) so the preset + * takes effect regardless of when `excel` is set — including the test + * pattern of constructing the view and then calling `setConfig()`. + * + * @return void + */ + protected function _applyExcelPreset(): void + { + if (!$this->getConfig('excel')) { + return; + } + + $this->setConfig([ + 'bom' => true, + 'eol' => "\r\n", + 'csvEncoding' => 'UTF-8', + ]); + } + /** * Renders the body of the data to the csv * diff --git a/tests/TestCase/View/CsvViewTest.php b/tests/TestCase/View/CsvViewTest.php index 117eb32..283ca4b 100644 --- a/tests/TestCase/View/CsvViewTest.php +++ b/tests/TestCase/View/CsvViewTest.php @@ -597,4 +597,48 @@ public function testRenderViaExtractArrayValueThrows() ); } } + + /** + * `excel => true` is a shorthand that forces the three options Excel + * needs to open a UTF-8 CSV correctly on Windows: BOM, CRLF line + * endings, and UTF-8 encoding. + * + * @return void + */ + public function testExcelPresetEmitsBomCrlfAndUtf8() + { + $data = [['Möhre', 'café'], ['ü', 'ß']]; + $this->view->set(['data' => $data]) + ->setConfig(['serialize' => 'data', 'excel' => true]); + + $bom = chr(0xEF) . chr(0xBB) . chr(0xBF); + $expected = $bom . 'Möhre,café' . "\r\n" . 'ü,ß' . "\r\n"; + + $this->assertSame($expected, $this->view->render()); + } + + /** + * The Excel preset wins for the three keys it controls even when the + * user has explicitly set them to other values. `excel => true` is a + * single switch; for a different combination set the individual keys + * yourself instead of enabling the preset. + * + * @return void + */ + public function testExcelPresetOverridesIndividualKeys() + { + $data = [['a', 'b']]; + $this->view->set(['data' => $data]) + ->setConfig([ + 'serialize' => 'data', + 'excel' => true, + 'bom' => false, + 'eol' => "\n", + ]); + + $output = $this->view->render(); + $bom = chr(0xEF) . chr(0xBB) . chr(0xBF); + $this->assertStringStartsWith($bom, $output); + $this->assertStringEndsWith("\r\n", $output); + } }