From e5f5befb9ed472b87b920dc014325f0bf0cc6a20 Mon Sep 17 00:00:00 2001 From: Henrique Moody Date: Sat, 31 Jan 2026 01:46:01 +0100 Subject: [PATCH 1/2] Add UppercaseFormatter with proper UTF-8 support The new UppercaseFormatter provides reliable UTF-8 aware uppercase conversion for international text, ensuring accented characters and non-Latin scripts are handled correctly using mb_strtoupper(). This formatter is essential for applications requiring proper internationalization support when manipulating text in various languages like French, German, Turkish, Greek, Cyrillic, and CJK languages. Includes comprehensive tests covering ASCII, Latin accents, non-Latin scripts, emoji, combining diacritics, right-to-left text, multi-byte characters, and mixed content scenarios. Assisted-by: OpenCode (GLM-4.7) --- README.md | 1 + docs/UppercaseFormatter.md | 88 +++++++++ src/Mixin/Builder.php | 2 + src/Mixin/Chain.php | 2 + src/UppercaseFormatter.php | 21 ++ tests/Unit/UppercaseFormatterTest.php | 269 ++++++++++++++++++++++++++ 6 files changed, 383 insertions(+) create mode 100644 docs/UppercaseFormatter.md create mode 100644 src/UppercaseFormatter.php create mode 100644 tests/Unit/UppercaseFormatterTest.php diff --git a/README.md b/README.md index 3568c6c..6ca6c69 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,7 @@ See the [PlaceholderFormatter documentation](docs/PlaceholderFormatter.md) and [ | [PlaceholderFormatter](docs/PlaceholderFormatter.md) | Template interpolation with placeholder replacement | | [SecureCreditCardFormatter](docs/SecureCreditCardFormatter.md) | Masked credit card formatting for secure display | | [TimeFormatter](docs/TimeFormatter.md) | Time promotion (mil, c, dec, y, mo, w, d, h, min, s, ms, us, ns) | +| [UppercaseFormatter](docs/UppercaseFormatter.md) | Convert string to uppercase | ## Contributing diff --git a/docs/UppercaseFormatter.md b/docs/UppercaseFormatter.md new file mode 100644 index 0000000..43859bd --- /dev/null +++ b/docs/UppercaseFormatter.md @@ -0,0 +1,88 @@ + + +# UppercaseFormatter + +The `UppercaseFormatter` converts strings to uppercase with proper UTF-8 character support for international text. + +## Usage + +### Basic Usage + +```php +use Respect\StringFormatter\UppercaseFormatter; + +$formatter = new UppercaseFormatter(); + +echo $formatter->format('hello world'); +// Outputs: "HELLO WORLD" +``` + +### Unicode Characters + +```php +use Respect\StringFormatter\UppercaseFormatter; + +$formatter = new UppercaseFormatter(); + +echo $formatter->format('café français'); +// Outputs: "CAFÉ FRANÇAIS" + +echo $formatter->format('こんにちは'); +// Outputs: "コンニチハ" +``` + +### Mixed Content + +```php +use Respect\StringFormatter\UppercaseFormatter; + +$formatter = new UppercaseFormatter(); + +echo $formatter->format('Hello World 😊'); +// Outputs: "HELLO WORLD 😊" +``` + +## API + +### `UppercaseFormatter::__construct` + +- `__construct()` + +Creates a new uppercase formatter instance. + +### `format` + +- `format(string $input): string` + +Converts the input string to uppercase using UTF-8 aware conversion. + +**Parameters:** + +- `$input`: The string to convert to uppercase + +**Returns:** The uppercase string + +## Examples + +| Input | Output | Description | +| ------------ | ------------ | --------------------------------------- | +| `hello` | `HELLO` | Simple ASCII text | +| `café` | `CAFÉ` | Latin characters with accents | +| `привет` | `ПРИВЕТ` | Cyrillic text | +| `こんにちは` | `コンニチハ` | Japanese hiragana converted to katakana | +| `Hello 😊` | `HELLO 😊` | Text with emoji | +| `éîôû` | `ÉÎÔÛ` | Multiple accented characters | + +## Notes + +- Uses `mb_strtoupper()` for proper Unicode handling +- Preserves accent marks and diacritical marks +- Works with all Unicode scripts (Latin, Cyrillic, Greek, CJK, etc.) +- Emoji and special symbols are preserved unchanged +- Combining diacritics are properly handled +- Numbers and special characters remain unchanged +- Empty strings return empty strings diff --git a/src/Mixin/Builder.php b/src/Mixin/Builder.php index 80c5728..ae06b5f 100644 --- a/src/Mixin/Builder.php +++ b/src/Mixin/Builder.php @@ -48,4 +48,6 @@ public static function pattern(string $pattern): FormatterBuilder; public static function placeholder(array $parameters): FormatterBuilder; public static function time(string $unit): FormatterBuilder; + + public static function uppercase(): FormatterBuilder; } diff --git a/src/Mixin/Chain.php b/src/Mixin/Chain.php index 00e8f17..9900edd 100644 --- a/src/Mixin/Chain.php +++ b/src/Mixin/Chain.php @@ -48,4 +48,6 @@ public function pattern(string $pattern): FormatterBuilder; public function placeholder(array $parameters): FormatterBuilder; public function time(string $unit): FormatterBuilder; + + public function uppercase(): FormatterBuilder; } diff --git a/src/UppercaseFormatter.php b/src/UppercaseFormatter.php new file mode 100644 index 0000000..42a5293 --- /dev/null +++ b/src/UppercaseFormatter.php @@ -0,0 +1,21 @@ + + */ + +declare(strict_types=1); + +namespace Respect\StringFormatter; + +use function mb_strtoupper; + +final readonly class UppercaseFormatter implements Formatter +{ + public function format(string $input): string + { + return mb_strtoupper($input); + } +} diff --git a/tests/Unit/UppercaseFormatterTest.php b/tests/Unit/UppercaseFormatterTest.php new file mode 100644 index 0000000..f198f17 --- /dev/null +++ b/tests/Unit/UppercaseFormatterTest.php @@ -0,0 +1,269 @@ + + */ + +declare(strict_types=1); + +namespace Respect\StringFormatter\Test\Unit; + +use PHPUnit\Framework\Attributes\CoversClass; +use PHPUnit\Framework\Attributes\DataProvider; +use PHPUnit\Framework\Attributes\Test; +use PHPUnit\Framework\TestCase; +use Respect\StringFormatter\UppercaseFormatter; + +#[CoversClass(UppercaseFormatter::class)] +final class UppercaseFormatterTest extends TestCase +{ + #[Test] + #[DataProvider('providerForValidFormattedString')] + public function testShouldFormatString(string $input, string $expected): void + { + $formatter = new UppercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + public function testShouldHandleEmptyString(): void + { + $formatter = new UppercaseFormatter(); + + $actual = $formatter->format(''); + + self::assertSame('', $actual); + } + + #[Test] + #[DataProvider('providerForUnicodeString')] + public function testShouldHandleUnicodeCharacters(string $input, string $expected): void + { + $formatter = new UppercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForLatinAccents')] + public function testShouldHandleLatinCharactersWithAccents(string $input, string $expected): void + { + $formatter = new UppercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForNonLatinScripts')] + public function testShouldHandleNonLatinScripts(string $input, string $expected): void + { + $formatter = new UppercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForEmojiAndSpecialChars')] + public function testShouldHandleEmojiAndSpecialCharacters(string $input, string $expected): void + { + $formatter = new UppercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForCombiningDiacritics')] + public function testShouldHandleCombiningDiacritics(string $input, string $expected): void + { + $formatter = new UppercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForRightToLeft')] + public function testShouldHandleRightToLeftText(string $input, string $expected): void + { + $formatter = new UppercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForMultiByte')] + public function testShouldHandleMultiByteCharacters(string $input, string $expected): void + { + $formatter = new UppercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForNumbersAndSpecial')] + public function testShouldHandleNumbersAndSpecialChars(string $input, string $expected): void + { + $formatter = new UppercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForMixed')] + public function testShouldHandleMixedContent(string $input, string $expected): void + { + $formatter = new UppercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + /** @return array */ + public static function providerForValidFormattedString(): array + { + return [ + 'empty string' => ['', ''], + 'single lowercase letter' => ['a', 'A'], + 'all lowercase' => ['hello', 'HELLO'], + 'already uppercase' => ['HELLO', 'HELLO'], + 'mixed case' => ['Hello World', 'HELLO WORLD'], + 'with punctuation' => ['hello, world!', 'HELLO, WORLD!'], + 'with numbers' => ['hello123', 'HELLO123'], + 'single word' => ['test', 'TEST'], + 'multiple words' => ['test string case', 'TEST STRING CASE'], + ]; + } + + /** @return array */ + public static function providerForUnicodeString(): array + { + return [ + 'german umlauts' => ['über', 'ÜBER'], + 'french accents' => ['café', 'CAFÉ'], + 'spanish tilde' => ['niño', 'NIÑO'], + 'portuguese' => ['coração', 'CORAÇÃO'], + 'icelandic' => ['þingvellir', 'ÞINGVELLIR'], + 'scandinavian' => ['ørsted', 'ØRSTED'], + 'polish' => ['łęski', 'ŁĘSKI'], + ]; + } + + /** @return array */ + public static function providerForLatinAccents(): array + { + return [ + 'c-cedilla' => ['café français', 'CAFÉ FRANÇAIS'], + 'umlauts' => ['äöü', 'ÄÖÜ'], + 'tilde' => ['ãñõ', 'ÃÑÕ'], + 'circumflex' => ['êîôû', 'ÊÎÔÛ'], + 'acute' => ['áéíóú', 'ÁÉÍÓÚ'], + 'grave' => ['àèìòù', 'ÀÈÌÒÙ'], + 'mixed accents' => ['résumé déjà vu', 'RÉSUMÉ DÉJÀ VU'], + ]; + } + + /** @return array */ + public static function providerForNonLatinScripts(): array + { + return [ + 'greek lowercase' => ['γεια σας', 'ΓΕΙΑ ΣΑΣ'], + 'cyrillic lowercase' => ['привет мир', 'ПРИВЕТ МИР'], + 'arabic' => ['مرحبا', 'مرحبا'], + 'hebrew' => ['שלום', 'שלום'], + 'thai' => ['สวัสดี', 'สวัสดี'], + ]; + } + + /** @return array */ + public static function providerForEmojiAndSpecialChars(): array + { + return [ + 'smiley face' => ['hello 😊', 'HELLO 😊'], + 'multiple emoji' => ['hi 👋 bye 👋', 'HI 👋 BYE 👋'], + 'hearts' => ['❤️ love ❤️', '❤️ LOVE ❤️'], + 'special symbols' => ['© ™ ®', '© ™ ®'], + 'math symbols' => ['∑ π ∫', '∑ Π ∫'], + ]; + } + + /** @return array */ + public static function providerForCombiningDiacritics(): array + { + return [ + 'e with combining acute' => ["e\u{0301}", "E\u{0301}"], + 'a with combining grave' => ["a\u{0300}", "A\u{0300}"], + 'multiple diacritics' => ["e\u{0301}\u{0301}", "E\u{0301}\u{0301}"], + 'word with combining marks' => ["cafe\u{0301}", "CAFE\u{0301}"], + ]; + } + + /** @return array */ + public static function providerForRightToLeft(): array + { + return [ + 'arabic word' => ['مرحبا', 'مرحبا'], + 'hebrew word' => ['שלום', 'שלום'], + 'mixed direction' => ['hello مرحبا', 'HELLO مرحبا'], + ]; + } + + /** @return array */ + public static function providerForMultiByte(): array + { + return [ + 'e-acute' => ['é', 'É'], + 'u-umlaut' => ['ü', 'Ü'], + 'greek sigma' => ['σ', 'Σ'], + 'cyrillic de' => ['д', 'Д'], + 'polish l-stroke' => ['ł', 'Ł'], + 'full accented word' => ['résumé', 'RÉSUMÉ'], + 'mixed multibyte and ascii' => ['über cool', 'ÜBER COOL'], + 'multibyte with cjk' => ['café你好', 'CAFÉ你好'], + ]; + } + + /** @return array */ + public static function providerForNumbersAndSpecial(): array + { + return [ + 'digits only' => ['1234567890', '1234567890'], + 'mixed alphanumeric' => ['abc123def', 'ABC123DEF'], + 'special chars only' => ['!@#$%^&*()', '!@#$%^&*()'], + 'whitespace' => [' ', ' '], + 'tabs and newlines' => ["hello\tworld\n", "HELLO\tWORLD\n"], + ]; + } + + /** @return array */ + public static function providerForMixed(): array + { + return [ + 'unicode with numbers' => ['café123', 'CAFÉ123'], + 'emoji with text' => ['Hello World 😊', 'HELLO WORLD 😊'], + 'cjk with latin' => ['Hello你好', 'HELLO你好'], + 'mixed scripts' => ['Hello 世界 Мир', 'HELLO 世界 МИР'], + 'complex string' => ['CAFé 123 😊 你好', 'CAFÉ 123 😊 你好'], + ]; + } +} From 7d0aa0d3813fa89b54e044d51a42eb3a4a082f20 Mon Sep 17 00:00:00 2001 From: Henrique Moody Date: Sat, 31 Jan 2026 01:46:05 +0100 Subject: [PATCH 2/2] Add LowercaseFormatter with proper UTF-8 support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new LowercaseFormatter provides reliable UTF-8 aware lowercase conversion for international text, ensuring accented characters and Turkish special cases (İ/i) are handled correctly using mb_strtolower(). This formatter complements UppercaseFormatter and is essential for applications requiring proper internationalization support when manipulating text in various languages including those with special character mapping rules. Includes comprehensive tests covering ASCII, Latin accents, Turkish characters, non-Latin scripts, emoji, combining diacritics, right-to-left text, multi-byte characters, and mixed content. Assisted-by: OpenCode (GLM-4.7) --- README.md | 1 + docs/LowercaseFormatter.md | 88 ++++++++ src/LowercaseFormatter.php | 21 ++ src/Mixin/Builder.php | 2 + src/Mixin/Chain.php | 2 + tests/Unit/LowercaseFormatterTest.php | 290 ++++++++++++++++++++++++++ 6 files changed, 404 insertions(+) create mode 100644 docs/LowercaseFormatter.md create mode 100644 src/LowercaseFormatter.php create mode 100644 tests/Unit/LowercaseFormatterTest.php diff --git a/README.md b/README.md index 6ca6c69..5df0fbd 100644 --- a/README.md +++ b/README.md @@ -64,6 +64,7 @@ See the [PlaceholderFormatter documentation](docs/PlaceholderFormatter.md) and [ | [ImperialAreaFormatter](docs/ImperialAreaFormatter.md) | Imperial area promotion (in², ft², yd², ac, mi²) | | [ImperialLengthFormatter](docs/ImperialLengthFormatter.md) | Imperial length promotion (in, ft, yd, mi) | | [ImperialMassFormatter](docs/ImperialMassFormatter.md) | Imperial mass promotion (oz, lb, st, ton) | +| [LowercaseFormatter](docs/LowercaseFormatter.md) | Convert string to lowercase | | [MaskFormatter](docs/MaskFormatter.md) | Range-based string masking with Unicode support | | [MassFormatter](docs/MassFormatter.md) | Metric mass promotion (mg, g, kg, t) | | [MetricFormatter](docs/MetricFormatter.md) | Metric length promotion (mm, cm, m, km) | diff --git a/docs/LowercaseFormatter.md b/docs/LowercaseFormatter.md new file mode 100644 index 0000000..1becf63 --- /dev/null +++ b/docs/LowercaseFormatter.md @@ -0,0 +1,88 @@ + + +# LowercaseFormatter + +The `LowercaseFormatter` converts strings to lowercase with proper UTF-8 character support for international text. + +## Usage + +### Basic Usage + +```php +use Respect\StringFormatter\LowercaseFormatter; + +$formatter = new LowercaseFormatter(); + +echo $formatter->format('HELLO WORLD'); +// Outputs: "hello world" +``` + +### Unicode Characters + +```php +use Respect\StringFormatter\LowercaseFormatter; + +$formatter = new LowercaseFormatter(); + +echo $formatter->format('CAFÉ FRANÇAIS'); +// Outputs: "café français" + +echo $formatter->format('コンニチハ'); +// Outputs: "コンニチハ" +``` + +### Mixed Content + +```php +use Respect\StringFormatter\LowercaseFormatter; + +$formatter = new LowercaseFormatter(); + +echo $formatter->format('HELLO WORLD 😊'); +// Outputs: "hello world 😊" +``` + +## API + +### `LowercaseFormatter::__construct` + +- `__construct()` + +Creates a new lowercase formatter instance. + +### `format` + +- `format(string $input): string` + +Converts the input string to lowercase using UTF-8 aware conversion. + +**Parameters:** + +- `$input`: The string to convert to lowercase + +**Returns:** The lowercase string + +## Examples + +| Input | Output | Description | +| ------------ | ------------ | ----------------------------- | +| `HELLO` | `hello` | Simple ASCII text | +| `CAFÉ` | `café` | Latin characters with accents | +| `ПРИВЕТ` | `привет` | Cyrillic text | +| `コンニチハ` | `コンニチハ` | Japanese text | +| `HELLO 😊` | `hello 😊` | Text with emoji | +| `ÉÎÔÛ` | `éîôû` | Multiple accented characters | + +## Notes + +- Uses `mb_strtolower()` for proper Unicode handling +- Preserves accent marks and diacritical marks +- Works with all Unicode scripts (Latin, Cyrillic, Greek, CJK, etc.) +- Emoji and special symbols are preserved unchanged +- Combining diacritics are properly handled +- Numbers and special characters remain unchanged +- Empty strings return empty strings diff --git a/src/LowercaseFormatter.php b/src/LowercaseFormatter.php new file mode 100644 index 0000000..906e256 --- /dev/null +++ b/src/LowercaseFormatter.php @@ -0,0 +1,21 @@ + + */ + +declare(strict_types=1); + +namespace Respect\StringFormatter; + +use function mb_strtolower; + +final readonly class LowercaseFormatter implements Formatter +{ + public function format(string $input): string + { + return mb_strtolower($input); + } +} diff --git a/src/Mixin/Builder.php b/src/Mixin/Builder.php index ae06b5f..85bbfc8 100644 --- a/src/Mixin/Builder.php +++ b/src/Mixin/Builder.php @@ -30,6 +30,8 @@ public static function imperialMass(string $unit): FormatterBuilder; public static function date(string $format = 'Y-m-d H:i:s'): FormatterBuilder; + public static function lowercase(): FormatterBuilder; + public static function mask(string $range, string $replacement = '*'): FormatterBuilder; public static function metric(string $unit): FormatterBuilder; diff --git a/src/Mixin/Chain.php b/src/Mixin/Chain.php index 9900edd..745fe7d 100644 --- a/src/Mixin/Chain.php +++ b/src/Mixin/Chain.php @@ -30,6 +30,8 @@ public function imperialMass(string $unit): FormatterBuilder; public function date(string $format = 'Y-m-d H:i:s'): FormatterBuilder; + public function lowercase(): FormatterBuilder; + public function mask(string $range, string $replacement = '*'): FormatterBuilder; public function metric(string $unit): FormatterBuilder; diff --git a/tests/Unit/LowercaseFormatterTest.php b/tests/Unit/LowercaseFormatterTest.php new file mode 100644 index 0000000..5cb96e2 --- /dev/null +++ b/tests/Unit/LowercaseFormatterTest.php @@ -0,0 +1,290 @@ + + */ + +declare(strict_types=1); + +namespace Respect\StringFormatter\Test\Unit; + +use PHPUnit\Framework\Attributes\CoversClass; +use PHPUnit\Framework\Attributes\DataProvider; +use PHPUnit\Framework\Attributes\Test; +use PHPUnit\Framework\TestCase; +use Respect\StringFormatter\LowercaseFormatter; + +#[CoversClass(LowercaseFormatter::class)] +final class LowercaseFormatterTest extends TestCase +{ + #[Test] + #[DataProvider('providerForValidFormattedString')] + public function testShouldFormatString(string $input, string $expected): void + { + $formatter = new LowercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + public function testShouldHandleEmptyString(): void + { + $formatter = new LowercaseFormatter(); + + $actual = $formatter->format(''); + + self::assertSame('', $actual); + } + + #[Test] + #[DataProvider('providerForUnicodeString')] + public function testShouldHandleUnicodeCharacters(string $input, string $expected): void + { + $formatter = new LowercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForLatinAccents')] + public function testShouldHandleLatinCharactersWithAccents(string $input, string $expected): void + { + $formatter = new LowercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForNonLatinScripts')] + public function testShouldHandleNonLatinScripts(string $input, string $expected): void + { + $formatter = new LowercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForEmojiAndSpecialChars')] + public function testShouldHandleEmojiAndSpecialCharacters(string $input, string $expected): void + { + $formatter = new LowercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForTurkish')] + public function testShouldHandleTurkishCharacters(string $input, string $expected): void + { + $formatter = new LowercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForCombiningDiacritics')] + public function testShouldHandleCombiningDiacritics(string $input, string $expected): void + { + $formatter = new LowercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForRightToLeft')] + public function testShouldHandleRightToLeftText(string $input, string $expected): void + { + $formatter = new LowercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForMultiByte')] + public function testShouldHandleMultiByteCharacters(string $input, string $expected): void + { + $formatter = new LowercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForNumbersAndSpecial')] + public function testShouldHandleNumbersAndSpecialChars(string $input, string $expected): void + { + $formatter = new LowercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + #[DataProvider('providerForMixed')] + public function testShouldHandleMixedContent(string $input, string $expected): void + { + $formatter = new LowercaseFormatter(); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + /** @return array */ + public static function providerForValidFormattedString(): array + { + return [ + 'empty string' => ['', ''], + 'single uppercase letter' => ['A', 'a'], + 'all uppercase' => ['HELLO', 'hello'], + 'already lowercase' => ['hello', 'hello'], + 'mixed case' => ['Hello World', 'hello world'], + 'with punctuation' => ['Hello, World!', 'hello, world!'], + 'with numbers' => ['Hello123', 'hello123'], + 'single word' => ['TEST', 'test'], + 'multiple words' => ['Test String Case', 'test string case'], + ]; + } + + /** @return array */ + public static function providerForUnicodeString(): array + { + return [ + 'german umlauts' => ['ÜBER', 'über'], + 'french accents' => ['CAFÉ', 'café'], + 'spanish tilde' => ['NIÑO', 'niño'], + 'portuguese' => ['CORAÇÃO', 'coração'], + 'icelandic' => ['ÞINGVELLIR', 'þingvellir'], + 'scandinavian' => ['ØRSTED', 'ørsted'], + 'polish' => ['ŁĘSKI', 'łęski'], + ]; + } + + /** @return array */ + public static function providerForLatinAccents(): array + { + return [ + 'c-cedilla' => ['CAFÉ FRANÇAIS', 'café français'], + 'umlauts' => ['ÄÖÜ', 'äöü'], + 'tilde' => ['ÃÑÕ', 'ãñõ'], + 'circumflex' => ['ÊÎÔÛ', 'êîôû'], + 'acute' => ['ÁÉÍÓÚ', 'áéíóú'], + 'grave' => ['ÀÈÌÒÙ', 'àèìòù'], + 'mixed accents' => ['RÉSUMÉ DÉJÀ VU', 'résumé déjà vu'], + ]; + } + + /** @return array */ + public static function providerForNonLatinScripts(): array + { + return [ + 'greek uppercase' => ['ΓΕΙΑ ΣΑΣ', 'γεια σας'], + 'cyrillic uppercase' => ['ПРИВЕТ МИР', 'привет мир'], + 'arabic' => ['مرحبا', 'مرحبا'], + 'hebrew' => ['שלום', 'שלום'], + ]; + } + + /** @return array */ + public static function providerForEmojiAndSpecialChars(): array + { + return [ + 'smiley face' => ['HELLO 😊', 'hello 😊'], + 'multiple emoji' => ['HI 👋 BYE 👋', 'hi 👋 bye 👋'], + 'hearts' => ['❤️ LOVE ❤️', '❤️ love ❤️'], + 'special symbols' => ['© ™ ®', '© ™ ®'], + 'math symbols' => ['∑ π ∫', '∑ π ∫'], + ]; + } + + /** @return array */ + public static function providerForTurkish(): array + { + return [ + 'turkish i' => ['İ', 'i̇'], + 'turkish I' => ['I', 'i'], + 'turkish mixed' => ['İSTANBUL', 'i̇stanbul'], + 'capital i with dot' => ['İi', 'i̇i'], + ]; + } + + /** @return array */ + public static function providerForCombiningDiacritics(): array + { + return [ + 'E with combining acute' => ["E\u{0301}", "e\u{0301}"], + 'A with combining grave' => ["A\u{0300}", "a\u{0300}"], + 'combined character' => ['É', 'é'], + 'word with combining marks' => ["CAFE\u{0301}", "cafe\u{0301}"], + ]; + } + + /** @return array */ + public static function providerForRightToLeft(): array + { + return [ + 'arabic word' => ['مرحبا', 'مرحبا'], + 'hebrew word' => ['שלום', 'שלום'], + 'mixed direction' => ['HELLO مرحبا', 'hello مرحبا'], + ]; + } + + /** @return array */ + public static function providerForMultiByte(): array + { + return [ + 'e-acute' => ['É', 'é'], + 'u-umlaut' => ['Ü', 'ü'], + 'greek sigma' => ['Σ', 'σ'], + 'cyrillic de' => ['Д', 'д'], + 'polish l-stroke' => ['Ł', 'ł'], + 'full accented word' => ['RÉSUMÉ', 'résumé'], + 'mixed multibyte and ascii' => ['ÜBER COOL', 'über cool'], + 'multibyte with cjk' => ['CAFÉ你好', 'café你好'], + ]; + } + + /** @return array */ + public static function providerForNumbersAndSpecial(): array + { + return [ + 'digits only' => ['1234567890', '1234567890'], + 'mixed alphanumeric' => ['ABC123DEF', 'abc123def'], + 'special chars only' => ['!@#$%^&*()', '!@#$%^&*()'], + 'whitespace' => [' ', ' '], + 'tabs and newlines' => ["HELLO\tWORLD\n", "hello\tworld\n"], + ]; + } + + /** @return array */ + public static function providerForMixed(): array + { + return [ + 'unicode with numbers' => ['CAFÉ123', 'café123'], + 'emoji with text' => ['HELLO WORLD 😊', 'hello world 😊'], + 'cjk with latin' => ['HELLO你好', 'hello你好'], + 'mixed scripts' => ['HELLO 世界 МИР', 'hello 世界 мир'], + 'complex string' => ['CAFÉ 123 😊 你好', 'café 123 😊 你好'], + ]; + } +}