From 25d50a10000103a72960f2c17d24d38ddc698394 Mon Sep 17 00:00:00 2001 From: phpstan-bot <79867460+phpstan-bot@users.noreply.github.com> Date: Sun, 3 May 2026 11:30:33 +0000 Subject: [PATCH 1/6] Truncate `sscanf`/`fscanf` format string at NUL byte before counting placeholders - In PHP's sscanf/fscanf, a NUL byte (\0) in the format string terminates parsing because the C implementation treats it as end-of-string. PHPStan was not accounting for this and counted placeholders after the NUL byte. - Truncate format string at the first NUL byte in PrintfHelper::parsePlaceholders() when $isScanf is true, fixing both parameter count validation and placeholder parsing. - Apply the same NUL truncation in SscanfFunctionDynamicReturnTypeExtension before matching format specifiers for return type inference. - Restructure the extension to return array{}|null when no specifiers are found (e.g. format starts with NUL) instead of falling through to the generic signature return type. - Verified that printf/sprintf do NOT truncate at NUL (C's snprintf processes NUL as data), so no changes needed for printf-family functions. --- src/Rules/Functions/PrintfHelper.php | 8 ++++++++ ...canfFunctionDynamicReturnTypeExtension.php | 15 ++++++++++----- tests/PHPStan/Analyser/nsrt/bug-14567.php | 19 +++++++++++++++++++ .../Functions/PrintfParametersRuleTest.php | 5 +++++ .../Rules/Functions/data/bug-14567.php | 18 ++++++++++++++++++ 5 files changed, 60 insertions(+), 5 deletions(-) create mode 100644 tests/PHPStan/Analyser/nsrt/bug-14567.php create mode 100644 tests/PHPStan/Rules/Functions/data/bug-14567.php diff --git a/src/Rules/Functions/PrintfHelper.php b/src/Rules/Functions/PrintfHelper.php index 411972885d3..3370a8bc7e6 100644 --- a/src/Rules/Functions/PrintfHelper.php +++ b/src/Rules/Functions/PrintfHelper.php @@ -12,6 +12,7 @@ use function max; use function sprintf; use function strlen; +use function strstr; use const PREG_SET_ORDER; #[AutowiredService] @@ -45,6 +46,13 @@ public function getScanfPlaceholdersCount(string $format): ?int */ private function parsePlaceholders(string $specifiersPattern, string $format, bool $isScanf): ?array { + if ($isScanf) { + $beforeNul = strstr($format, "\0", true); + if ($beforeNul !== false) { + $format = $beforeNul; + } + } + $addSpecifier = ''; if ($this->phpVersion->supportsHhPrintfSpecifier()) { $addSpecifier .= 'hH'; diff --git a/src/Type/Php/SscanfFunctionDynamicReturnTypeExtension.php b/src/Type/Php/SscanfFunctionDynamicReturnTypeExtension.php index de22ba0a461..0cbcb3e9098 100644 --- a/src/Type/Php/SscanfFunctionDynamicReturnTypeExtension.php +++ b/src/Type/Php/SscanfFunctionDynamicReturnTypeExtension.php @@ -21,6 +21,7 @@ use function count; use function in_array; use function preg_match_all; +use function strstr; #[AutowiredService] final class SscanfFunctionDynamicReturnTypeExtension implements DynamicFunctionReturnTypeExtension @@ -48,9 +49,15 @@ public function getTypeFromFunctionCall( return null; } - if (preg_match_all('/%(\d*)(\[[^\]]+\]|[cdeEfosux]{1})/', $formatType->getValue(), $matches) > 0) { - $arrayBuilder = ConstantArrayTypeBuilder::createEmpty(); + $formatValue = $formatType->getValue(); + $beforeNul = strstr($formatValue, "\0", true); + if ($beforeNul !== false) { + $formatValue = $beforeNul; + } + + $arrayBuilder = ConstantArrayTypeBuilder::createEmpty(); + if (preg_match_all('/%(\d*)(\[[^\]]+\]|[cdeEfosux]{1})/', $formatValue, $matches) > 0) { for ($i = 0; $i < count($matches[0]); $i++) { $length = $matches[1][$i]; $specifier = $matches[2][$i]; @@ -81,11 +88,9 @@ public function getTypeFromFunctionCall( $type = TypeCombinator::addNull($type); $arrayBuilder->setOffsetValueType(new ConstantIntegerType($i), $type); } - - return TypeCombinator::addNull($arrayBuilder->getArray()); } - return null; + return TypeCombinator::addNull($arrayBuilder->getArray()); } } diff --git a/tests/PHPStan/Analyser/nsrt/bug-14567.php b/tests/PHPStan/Analyser/nsrt/bug-14567.php new file mode 100644 index 00000000000..d838290164a --- /dev/null +++ b/tests/PHPStan/Analyser/nsrt/bug-14567.php @@ -0,0 +1,19 @@ +analyse([__DIR__ . '/data/bug-10260.php'], []); } + public function testBug14567(): void + { + $this->analyse([__DIR__ . '/data/bug-14567.php'], []); + } + } diff --git a/tests/PHPStan/Rules/Functions/data/bug-14567.php b/tests/PHPStan/Rules/Functions/data/bug-14567.php new file mode 100644 index 00000000000..127d3c577f9 --- /dev/null +++ b/tests/PHPStan/Rules/Functions/data/bug-14567.php @@ -0,0 +1,18 @@ + Date: Sun, 3 May 2026 19:17:58 +0000 Subject: [PATCH 2/6] Add missing `%n`, `%i`, `%X` specifiers to sscanf return type inference The `SscanfFunctionDynamicReturnTypeExtension` regex was missing several valid scanf specifiers (`i`, `n`, `X`) that were already handled by `PrintfHelper`. This caused incorrect return type inference for format strings using these specifiers. Also adds test cases for edge cases raised in review: empty format string, `%%` literal percent, and mixed specifiers with `%n`. Co-Authored-By: Claude Opus 4.6 --- ...canfFunctionDynamicReturnTypeExtension.php | 4 ++-- tests/PHPStan/Analyser/nsrt/bug-14567.php | 20 +++++++++++++++++++ .../Rules/Functions/data/bug-14567.php | 12 +++++++++++ 3 files changed, 34 insertions(+), 2 deletions(-) diff --git a/src/Type/Php/SscanfFunctionDynamicReturnTypeExtension.php b/src/Type/Php/SscanfFunctionDynamicReturnTypeExtension.php index 0cbcb3e9098..0ef3f62f1ed 100644 --- a/src/Type/Php/SscanfFunctionDynamicReturnTypeExtension.php +++ b/src/Type/Php/SscanfFunctionDynamicReturnTypeExtension.php @@ -57,7 +57,7 @@ public function getTypeFromFunctionCall( $arrayBuilder = ConstantArrayTypeBuilder::createEmpty(); - if (preg_match_all('/%(\d*)(\[[^\]]+\]|[cdeEfosux]{1})/', $formatValue, $matches) > 0) { + if (preg_match_all('/%(\d*)(\[[^\]]+\]|[cdeEfinosuxX]{1})/', $formatValue, $matches) > 0) { for ($i = 0; $i < count($matches[0]); $i++) { $length = $matches[1][$i]; $specifier = $matches[2][$i]; @@ -77,7 +77,7 @@ public function getTypeFromFunctionCall( } } - if (in_array($specifier, ['d', 'o', 'u', 'x'], true)) { + if (in_array($specifier, ['d', 'i', 'n', 'o', 'u', 'x', 'X'], true)) { $type = new IntegerType(); } diff --git a/tests/PHPStan/Analyser/nsrt/bug-14567.php b/tests/PHPStan/Analyser/nsrt/bug-14567.php index d838290164a..5843b4db717 100644 --- a/tests/PHPStan/Analyser/nsrt/bug-14567.php +++ b/tests/PHPStan/Analyser/nsrt/bug-14567.php @@ -17,3 +17,23 @@ function fscanfNulTerminator($r) { assertType('array{int|null, string|null}|null', fscanf($r, "%d %s\0%d")); assertType('array{}|null', fscanf($r, "\0%d%s")); } + +function sscanfEdgeCases(string $s) { + // Empty format string - no placeholders + assertType('array{}|null', sscanf($s, "")); + + // %n - counts characters consumed, returns integer + assertType('array{int|null}|null', sscanf($s, "%n")); + + // %% - literal percent, not a placeholder + assertType('array{}|null', sscanf($s, "%%")); + + // %i - integer with base detection + assertType('array{int|null}|null', sscanf($s, "%i")); + + // %X - uppercase hex, same as %x + assertType('array{int|null}|null', sscanf($s, "%X")); + + // mixed specifiers with %n + assertType('array{int|null, int|null}|null', sscanf($s, "%d%n")); +} diff --git a/tests/PHPStan/Rules/Functions/data/bug-14567.php b/tests/PHPStan/Rules/Functions/data/bug-14567.php index 127d3c577f9..910e401e0a3 100644 --- a/tests/PHPStan/Rules/Functions/data/bug-14567.php +++ b/tests/PHPStan/Rules/Functions/data/bug-14567.php @@ -16,3 +16,15 @@ // Multiple placeholders, NUL in middle sscanf('123 456 789', "%d %d\0%d", $b, $c); + +// %n specifier - counts characters consumed, 1 placeholder +sscanf('hello', "%n", $n); + +// %% - literal percent, 0 placeholders +sscanf('100%', "100%%"); + +// %i specifier - integer with base detection, 1 placeholder +sscanf('0xff', "%i", $hex); + +// Mixed with %n +sscanf('hello world', "%s%n", $word, $pos); From 16b970d017ad68a6495975b0b3e4cef5ba500b2e Mon Sep 17 00:00:00 2001 From: phpstan-bot Date: Sun, 3 May 2026 19:30:07 +0000 Subject: [PATCH 3/6] Add missing `%g` and `%D` sscanf specifiers to return type inference and parameter counting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PHP's sscanf supports `%g` (general float) and `%D` (integer alias for `%d`) but both were missing from the return type extension and `%g` was missing from the parameter count helper. Verified against PHP 8.4 — `%G` is intentionally excluded as it throws ValueError. Co-Authored-By: Claude Opus 4.6 --- src/Rules/Functions/PrintfHelper.php | 2 +- src/Type/Php/SscanfFunctionDynamicReturnTypeExtension.php | 6 +++--- tests/PHPStan/Analyser/nsrt/bug-14567.php | 6 ++++++ tests/PHPStan/Rules/Functions/data/bug-14567.php | 6 ++++++ 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/Rules/Functions/PrintfHelper.php b/src/Rules/Functions/PrintfHelper.php index 3370a8bc7e6..5455ba7edc0 100644 --- a/src/Rules/Functions/PrintfHelper.php +++ b/src/Rules/Functions/PrintfHelper.php @@ -38,7 +38,7 @@ public function getPrintfPlaceholders(string $format): ?array public function getScanfPlaceholdersCount(string $format): ?int { - return $this->getPlaceholdersCount('(?[cdDeEfinosuxX%s]|\[[^\]]+\])', $format, true); + return $this->getPlaceholdersCount('(?[cdDeEfginosuxX%s]|\[[^\]]+\])', $format, true); } /** diff --git a/src/Type/Php/SscanfFunctionDynamicReturnTypeExtension.php b/src/Type/Php/SscanfFunctionDynamicReturnTypeExtension.php index 0ef3f62f1ed..6506e29c313 100644 --- a/src/Type/Php/SscanfFunctionDynamicReturnTypeExtension.php +++ b/src/Type/Php/SscanfFunctionDynamicReturnTypeExtension.php @@ -57,7 +57,7 @@ public function getTypeFromFunctionCall( $arrayBuilder = ConstantArrayTypeBuilder::createEmpty(); - if (preg_match_all('/%(\d*)(\[[^\]]+\]|[cdeEfinosuxX]{1})/', $formatValue, $matches) > 0) { + if (preg_match_all('/%(\d*)(\[[^\]]+\]|[cDdeEfginosuxX]{1})/', $formatValue, $matches) > 0) { for ($i = 0; $i < count($matches[0]); $i++) { $length = $matches[1][$i]; $specifier = $matches[2][$i]; @@ -77,11 +77,11 @@ public function getTypeFromFunctionCall( } } - if (in_array($specifier, ['d', 'i', 'n', 'o', 'u', 'x', 'X'], true)) { + if (in_array($specifier, ['d', 'D', 'i', 'n', 'o', 'u', 'x', 'X'], true)) { $type = new IntegerType(); } - if (in_array($specifier, ['e', 'E', 'f'], true)) { + if (in_array($specifier, ['e', 'E', 'f', 'g'], true)) { $type = new FloatType(); } diff --git a/tests/PHPStan/Analyser/nsrt/bug-14567.php b/tests/PHPStan/Analyser/nsrt/bug-14567.php index 5843b4db717..0ad11425867 100644 --- a/tests/PHPStan/Analyser/nsrt/bug-14567.php +++ b/tests/PHPStan/Analyser/nsrt/bug-14567.php @@ -34,6 +34,12 @@ function sscanfEdgeCases(string $s) { // %X - uppercase hex, same as %x assertType('array{int|null}|null', sscanf($s, "%X")); + // %D - uppercase alias for %d + assertType('array{int|null}|null', sscanf($s, "%D")); + + // %g - general float + assertType('array{float|null}|null', sscanf($s, "%g")); + // mixed specifiers with %n assertType('array{int|null, int|null}|null', sscanf($s, "%d%n")); } diff --git a/tests/PHPStan/Rules/Functions/data/bug-14567.php b/tests/PHPStan/Rules/Functions/data/bug-14567.php index 910e401e0a3..60adb979633 100644 --- a/tests/PHPStan/Rules/Functions/data/bug-14567.php +++ b/tests/PHPStan/Rules/Functions/data/bug-14567.php @@ -28,3 +28,9 @@ // Mixed with %n sscanf('hello world', "%s%n", $word, $pos); + +// %D specifier - uppercase alias for %d, 1 placeholder +sscanf('42', "%D", $dval); + +// %g specifier - general float, 1 placeholder +sscanf('1.5', "%g", $gval); From 24cc3b76f875886fe01f190aa56cef5f14eae0ba Mon Sep 17 00:00:00 2001 From: phpstan-bot Date: Sun, 3 May 2026 19:30:54 +0000 Subject: [PATCH 4/6] Fix `%u` sscanf specifier return type to `int|string` PHP's sscanf `%u` specifier returns a string when the parsed unsigned value exceeds PHP_INT_MAX. The other integer specifiers (%d, %i, %o, %x, %X) clamp to PHP_INT_MAX/MIN and always return int, but %u wraps the large value into a string representation instead. Co-Authored-By: Claude Opus 4.6 --- src/Type/Php/SscanfFunctionDynamicReturnTypeExtension.php | 6 +++++- tests/PHPStan/Analyser/nsrt/bug-14567.php | 3 +++ tests/PHPStan/Analyser/nsrt/sscanf.php | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/Type/Php/SscanfFunctionDynamicReturnTypeExtension.php b/src/Type/Php/SscanfFunctionDynamicReturnTypeExtension.php index 6506e29c313..2be637b7e7b 100644 --- a/src/Type/Php/SscanfFunctionDynamicReturnTypeExtension.php +++ b/src/Type/Php/SscanfFunctionDynamicReturnTypeExtension.php @@ -77,10 +77,14 @@ public function getTypeFromFunctionCall( } } - if (in_array($specifier, ['d', 'D', 'i', 'n', 'o', 'u', 'x', 'X'], true)) { + if (in_array($specifier, ['d', 'D', 'i', 'n', 'o', 'x', 'X'], true)) { $type = new IntegerType(); } + if ($specifier === 'u') { + $type = TypeCombinator::union(new IntegerType(), new StringType()); + } + if (in_array($specifier, ['e', 'E', 'f', 'g'], true)) { $type = new FloatType(); } diff --git a/tests/PHPStan/Analyser/nsrt/bug-14567.php b/tests/PHPStan/Analyser/nsrt/bug-14567.php index 0ad11425867..327fba81d7c 100644 --- a/tests/PHPStan/Analyser/nsrt/bug-14567.php +++ b/tests/PHPStan/Analyser/nsrt/bug-14567.php @@ -40,6 +40,9 @@ function sscanfEdgeCases(string $s) { // %g - general float assertType('array{float|null}|null', sscanf($s, "%g")); + // %u - unsigned integer, can return string for values > PHP_INT_MAX + assertType('array{int|string|null}|null', sscanf($s, "%u")); + // mixed specifiers with %n assertType('array{int|null, int|null}|null', sscanf($s, "%d%n")); } diff --git a/tests/PHPStan/Analyser/nsrt/sscanf.php b/tests/PHPStan/Analyser/nsrt/sscanf.php index 484febdf9b4..49bc59c7854 100644 --- a/tests/PHPStan/Analyser/nsrt/sscanf.php +++ b/tests/PHPStan/Analyser/nsrt/sscanf.php @@ -20,7 +20,7 @@ function sscanfFormatInference(string $s) { assertType('array{float|null}|null', sscanf($s, '%f')); assertType('array{int|null}|null', sscanf($s, '%o')); assertType('array{string|null}|null', sscanf($s, '%s')); - assertType('array{int|null}|null', sscanf($s, '%u')); + assertType('array{int|string|null}|null', sscanf($s, '%u')); assertType('array{int|null}|null', sscanf($s, '%x')); $mandate = "January 01 2000"; From fea90275fee80e8e1d2dc33c24f8d758c306534d Mon Sep 17 00:00:00 2001 From: phpstan-bot Date: Mon, 4 May 2026 08:15:09 +0000 Subject: [PATCH 5/6] Add comprehensive sscanf format validation test with C ValidateFormat port Port ValidateFormat() and per-specifier return type dispatch from php_sscanf_internal() in php-src ext/standard/scanf.c to PHP, and cross-validate against PrintfHelper, the extension regex, and PHP runtime sscanf across 60+ format strings from production and test code. Ported from php-src: File: ext/standard/scanf.c Commit: 5164621436e8eb84952c9fdb4c931cd9a50754d9 Blob: 980009c30640a0dee171d11155a8d7ae09f174ff The ported code (validateFormatC, specifierReturnTypeC) is used under the BSD-3-Clause license per php-src COPYING. Attribution and SPDX identifiers are in the file header. Co-Authored-By: Claude Opus 4.6 --- .../Functions/SscanfFormatValidationTest.php | 1074 +++++++++++++++++ 1 file changed, 1074 insertions(+) create mode 100644 tests/PHPStan/Rules/Functions/SscanfFormatValidationTest.php diff --git a/tests/PHPStan/Rules/Functions/SscanfFormatValidationTest.php b/tests/PHPStan/Rules/Functions/SscanfFormatValidationTest.php new file mode 100644 index 00000000000..e7e70d4944d --- /dev/null +++ b/tests/PHPStan/Rules/Functions/SscanfFormatValidationTest.php @@ -0,0 +1,1074 @@ + + * SPDX-License-Identifier: BSD-3-Clause + * See https://www.php.net/license/ + * + * The validateFormatC() and specifierReturnTypeC() methods are PHP ports + * of ValidateFormat() and the per-specifier dispatch in php_sscanf_internal() + * from php-src, used under the BSD-3-Clause license. + * + * Source: ext/standard/scanf.c + * Commit: 5164621436e8eb84952c9fdb4c931cd9a50754d9 + * Blob: 980009c30640a0dee171d11155a8d7ae09f174ff + */ + +namespace PHPStan\Rules\Functions; + +use Override; +use PHPStan\Php\PhpVersion; +use PHPStan\Testing\PHPStanTestCase; +use PHPUnit\Framework\Attributes\RequiresPhp; +use const PHP_VERSION_ID; + +/** + * Comprehensive test comparing PHPStan's sscanf format string parsing + * against the authoritative C implementation (ValidateFormat in ext/standard/scanf.c). + * + * Algorithms compared: + * 1. C reference (ValidateFormat ported to PHP) + * 2. PrintfHelper::getScanfPlaceholdersCount (parameter count rule) + * 3. SscanfFunctionDynamicReturnTypeExtension regex (return type inference) + * 4. Runtime sscanf with matching input (PHP's own implementation) + */ +class SscanfFormatValidationTest extends PHPStanTestCase +{ + + private PrintfHelper $printfHelper; + + #[Override] + protected function setUp(): void + { + $this->printfHelper = new PrintfHelper(new PhpVersion(PHP_VERSION_ID)); + } + + // --------------------------------------------------------------- + // PHP port of PHPAPI int ValidateFormat(char *format, int numVars, int *totalSubs) + // from ext/standard/scanf.c (php-src). + // + // This function iterates the format string character by character, + // exactly replicating the C logic: + // - C strings terminate at NUL (\0); this port checks explicitly + // - %% is a literal percent, not a placeholder + // - %* is assignment suppression (placeholder parsed but not counted) + // - Digits after % may be either XPG3 positional (%n$) or a width + // - Size modifiers (l, L, h) are consumed and ignored + // - The switch on the specifier character is the definitive list + // of valid scanf specifiers: n d D i o x X u f e E g s c [ + // - Character sets ([...]) handle ] as the first character and ^ + // + // Returns: + // count = number of capturing (non-suppressed) placeholders + // error = error message string on failure, null on success + // --------------------------------------------------------------- + + /** + * @return array{count: int|null, error: string|null} + */ + public static function validateFormatC(string $format): array + { + $len = strlen($format); + $pos = 0; + $objIndex = 0; + $gotXpg = false; + $gotSequential = false; + $xpgSize = 0; + + while ($pos < $len && $format[$pos] !== "\0") { + $ch = $format[$pos]; + $pos++; + + if ($ch !== '%') { + continue; + } + + if ($pos >= $len || $format[$pos] === "\0") { + return ['count' => null, 'error' => 'Bad scan conversion character ""']; + } + + $ch = $format[$pos]; + $pos++; + + if ($ch === '%') { + continue; + } + + $suppress = false; + + if ($ch === '*') { + $suppress = true; + if ($pos >= $len || $format[$pos] === "\0") { + return ['count' => null, 'error' => 'Bad scan conversion character ""']; + } + $ch = $format[$pos]; + $pos++; + } elseif (ctype_digit($ch)) { + $numStart = $pos - 1; + while ($pos < $len && ctype_digit($format[$pos])) { + $pos++; + } + if ($pos < $len && $format[$pos] === '$') { + $value = (int) substr($format, $numStart, $pos - $numStart); + $pos++; + $gotXpg = true; + if ($gotSequential) { + return ['count' => null, 'error' => 'cannot mix "%" and "%n$" conversion specifiers']; + } + if ($value < 1 || $value > 255) { + return ['count' => null, 'error' => '"%n$" argument index out of range']; + } + $xpgSize = max($xpgSize, $value); + $objIndex = $value - 1; + if ($pos >= $len || $format[$pos] === "\0") { + return ['count' => null, 'error' => 'Bad scan conversion character ""']; + } + $ch = $format[$pos]; + $pos++; + } else { + $pos = $numStart + 1; + $gotSequential = true; + if ($gotXpg) { + return ['count' => null, 'error' => 'cannot mix "%" and "%n$" conversion specifiers']; + } + } + } else { + $gotSequential = true; + if ($gotXpg) { + return ['count' => null, 'error' => 'cannot mix "%" and "%n$" conversion specifiers']; + } + } + + if (ctype_digit($ch)) { + while ($pos < $len && ctype_digit($format[$pos])) { + $pos++; + } + if ($pos >= $len || $format[$pos] === "\0") { + return ['count' => null, 'error' => 'Bad scan conversion character ""']; + } + $ch = $format[$pos]; + $pos++; + } + + if ($ch === 'l' || $ch === 'L' || $ch === 'h') { + if ($pos >= $len || $format[$pos] === "\0") { + return ['count' => null, 'error' => 'Bad scan conversion character ""']; + } + $ch = $format[$pos]; + $pos++; + } + + switch ($ch) { + case 'n': + case 'd': + case 'D': + case 'i': + case 'o': + case 'x': + case 'X': + case 'u': + case 'f': + case 'e': + case 'E': + case 'g': + case 's': + case 'c': + break; + + case '[': + if ($pos >= $len || $format[$pos] === "\0") { + return ['count' => null, 'error' => 'Unmatched [ in format string']; + } + $setCh = $format[$pos]; + $pos++; + if ($setCh === '^') { + if ($pos >= $len || $format[$pos] === "\0") { + return ['count' => null, 'error' => 'Unmatched [ in format string']; + } + $setCh = $format[$pos]; + $pos++; + } + if ($setCh === ']') { + if ($pos >= $len || $format[$pos] === "\0") { + return ['count' => null, 'error' => 'Unmatched [ in format string']; + } + $setCh = $format[$pos]; + $pos++; + } + while ($setCh !== ']') { + if ($pos >= $len || $format[$pos] === "\0") { + return ['count' => null, 'error' => 'Unmatched [ in format string']; + } + $setCh = $format[$pos]; + $pos++; + } + break; + + default: + return ['count' => null, 'error' => sprintf('Bad scan conversion character "%s"', $ch)]; + } + + if (!$suppress) { + $objIndex++; + } + } + + if ($xpgSize > 0) { + return ['count' => $xpgSize, 'error' => null]; + } + + return ['count' => $objIndex, 'error' => null]; + } + + // --------------------------------------------------------------- + // PHP port of per-specifier return type logic from php_sscanf_internal. + // + // Derived from the switch(op) dispatch in the C source: + // %n → add_index_long → int + // %d %D %i → ZEND_STRTOL (signed) → int + // %o %x %X → ZEND_STRTOL (signed) → int + // %u → ZEND_STRTOUL (unsigned) → int|string + // (string when value > PHP_INT_MAX, via snprintf) + // %f %e %E %g → zend_strtod → float + // %s %c → string copy → string + // %[...] → CharSet matching → string + // --------------------------------------------------------------- + + /** @return 'int'|'int|string'|'float'|'string' */ + public static function specifierReturnTypeC(string $specifier): string + { + switch ($specifier) { + case 'n': + case 'd': + case 'D': + case 'i': + case 'o': + case 'x': + case 'X': + return 'int'; + case 'u': + return 'int|string'; + case 'f': + case 'e': + case 'E': + case 'g': + return 'float'; + default: + return 'string'; // s, c, [...] + } + } + + /** + * Extracts specifier types using the same regex and mapping logic + * as SscanfFunctionDynamicReturnTypeExtension::getTypeFromFunctionCall. + * + * @return array{count: int, types: list} + */ + public static function extensionRegexParse(string $format): array + { + $beforeNul = strstr($format, "\0", true); + if ($beforeNul !== false) { + $format = $beforeNul; + } + + $types = []; + if (preg_match_all('/%(\d*)(\[[^\]]+\]|[cDdeEfginosuxX]{1})/', $format, $matches) > 0) { + for ($i = 0; $i < count($matches[0]); $i++) { + $specifier = $matches[2][$i]; + + if (in_array($specifier, ['d', 'D', 'i', 'n', 'o', 'x', 'X'], true)) { + $types[] = 'int'; + } elseif ($specifier === 'u') { + $types[] = 'int|string'; + } elseif (in_array($specifier, ['e', 'E', 'f', 'g'], true)) { + $types[] = 'float'; + } else { + $types[] = 'string'; + } + } + } + + return ['count' => count($types), 'types' => $types]; + } + + /** + * Uses PHP runtime sscanf with crafted input to determine placeholder count. + * + * Note: sscanf('', $format) returns null for any format with specifiers + * (except %n) because the C code checks *string == '\0' before each + * conversion attempt and triggers underflow. A string with sufficient + * matching data is needed. This method uses a numeric string that + * satisfies most specifier types. + * + * @return array{count: int|null, error: string|null} + */ + public static function runtimeSscanfCount(string $format, string $input = '999 999 999 999 999 999 999 999 999 999'): array + { + try { + $result = @sscanf($input, $format); + if ($result === null) { + return ['count' => null, 'error' => null]; + } + + return ['count' => count($result), 'error' => null]; + } catch (\ValueError $e) { + return ['count' => null, 'error' => $e->getMessage()]; + } + } + + /** + * All scanf format strings found in phpstan-src production and test code, + * plus hakre's review test cases, specifier coverage, and edge cases. + * + * Each entry documents: + * format — the scanf format string + * count — expected number of capturing placeholders (from C ValidateFormat) + * error — error message from C ValidateFormat (null if valid) + * types — expected per-specifier return types (from C php_sscanf_internal) + * runtimeInput — optional input string for runtime verification + * + * @return array, runtimeInput?: string}> + */ + public static function allFormatStrings(): array + { + return [ + // ============================================= + // hakre's 5 test cases from review + // ============================================= + 'hakre #01: empty format' => [ + 'format' => '', + 'count' => 0, + 'error' => null, + 'types' => [], + 'runtimeInput' => '', + ], + 'hakre #02: lone percent' => [ + 'format' => '%', + 'count' => null, + 'error' => 'Bad scan conversion character ""', + 'types' => [], + ], + 'hakre #03: %n specifier' => [ + 'format' => '%n', + 'count' => 1, + 'error' => null, + 'types' => ['int'], + 'runtimeInput' => 'hello', + ], + 'hakre #04: %% literal' => [ + 'format' => '%%', + 'count' => 0, + 'error' => null, + 'types' => [], + 'runtimeInput' => '%', + ], + 'hakre #05: unmatched [' => [ + 'format' => '%[', + 'count' => null, + 'error' => 'Unmatched [ in format string', + 'types' => [], + ], + + // ============================================= + // All 15 specifiers from ValidateFormat switch + // (the definitive set from ext/standard/scanf.c) + // ============================================= + 'spec %n (chars consumed)' => [ + 'format' => '%n', + 'count' => 1, + 'error' => null, + 'types' => ['int'], + 'runtimeInput' => 'hello', + ], + 'spec %d (signed decimal)' => [ + 'format' => '%d', + 'count' => 1, + 'error' => null, + 'types' => ['int'], + 'runtimeInput' => '42', + ], + 'spec %D (alias for %d)' => [ + 'format' => '%D', + 'count' => 1, + 'error' => null, + 'types' => ['int'], + 'runtimeInput' => '42', + ], + 'spec %i (base-detecting int)' => [ + 'format' => '%i', + 'count' => 1, + 'error' => null, + 'types' => ['int'], + 'runtimeInput' => '0xff', + ], + 'spec %o (octal)' => [ + 'format' => '%o', + 'count' => 1, + 'error' => null, + 'types' => ['int'], + 'runtimeInput' => '77', + ], + 'spec %x (hex lowercase)' => [ + 'format' => '%x', + 'count' => 1, + 'error' => null, + 'types' => ['int'], + 'runtimeInput' => 'ff', + ], + 'spec %X (hex uppercase)' => [ + 'format' => '%X', + 'count' => 1, + 'error' => null, + 'types' => ['int'], + 'runtimeInput' => 'FF', + ], + 'spec %u (unsigned)' => [ + 'format' => '%u', + 'count' => 1, + 'error' => null, + 'types' => ['int|string'], + 'runtimeInput' => '42', + ], + 'spec %f (float)' => [ + 'format' => '%f', + 'count' => 1, + 'error' => null, + 'types' => ['float'], + 'runtimeInput' => '3.14', + ], + 'spec %e (scientific)' => [ + 'format' => '%e', + 'count' => 1, + 'error' => null, + 'types' => ['float'], + 'runtimeInput' => '1.5e2', + ], + 'spec %E (scientific uc)' => [ + 'format' => '%E', + 'count' => 1, + 'error' => null, + 'types' => ['float'], + 'runtimeInput' => '1.5E2', + ], + 'spec %g (general float)' => [ + 'format' => '%g', + 'count' => 1, + 'error' => null, + 'types' => ['float'], + 'runtimeInput' => '1.5', + ], + 'spec %s (string)' => [ + 'format' => '%s', + 'count' => 1, + 'error' => null, + 'types' => ['string'], + 'runtimeInput' => 'hello', + ], + 'spec %c (character)' => [ + 'format' => '%c', + 'count' => 1, + 'error' => null, + 'types' => ['string'], + 'runtimeInput' => 'x', + ], + 'spec %[a-z] (char class)' => [ + 'format' => '%[a-z]', + 'count' => 1, + 'error' => null, + 'types' => ['string'], + 'runtimeInput' => 'hello', + ], + 'spec %[^/] (negated class)' => [ + 'format' => '%[^/]', + 'count' => 1, + 'error' => null, + 'types' => ['string'], + 'runtimeInput' => 'hello', + ], + + // ============================================= + // Assignment suppression (%*) + // ============================================= + 'suppress %*d' => [ + 'format' => '%*d', + 'count' => 0, + 'error' => null, + 'types' => [], + 'runtimeInput' => '42', + ], + 'suppress %*s' => [ + 'format' => '%*s', + 'count' => 0, + 'error' => null, + 'types' => [], + 'runtimeInput' => 'hello', + ], + 'suppress %*[a-z]' => [ + 'format' => '%*[a-z]', + 'count' => 0, + 'error' => null, + 'types' => [], + 'runtimeInput' => 'hello', + ], + 'suppress mixed: %*d %d' => [ + 'format' => '%*d %d', + 'count' => 1, + 'error' => null, + 'types' => ['int'], + 'runtimeInput' => '10 20', + ], + + // ============================================= + // Width specifiers + // ============================================= + 'width %0s' => [ + 'format' => '%0s', + 'count' => 1, + 'error' => null, + 'types' => ['string'], + 'runtimeInput' => 'hello', + ], + 'width %2x' => [ + 'format' => '%2x', + 'count' => 1, + 'error' => null, + 'types' => ['int'], + 'runtimeInput' => 'ff', + ], + 'width %20s' => [ + 'format' => '%20s', + 'count' => 1, + 'error' => null, + 'types' => ['string'], + 'runtimeInput' => 'hello', + ], + + // ============================================= + // NUL byte termination + // ============================================= + "nul: %d\\0%d" => [ + 'format' => "%d\0%d", + 'count' => 1, + 'error' => null, + 'types' => ['int'], + 'runtimeInput' => '42', + ], + "nul: %d %s\\0%d" => [ + 'format' => "%d %s\0%d", + 'count' => 2, + 'error' => null, + 'types' => ['int', 'string'], + 'runtimeInput' => '42 hello', + ], + "nul: \\0%d%s (nul at start)" => [ + 'format' => "\0%d%s", + 'count' => 0, + 'error' => null, + 'types' => [], + 'runtimeInput' => '', + ], + + // ============================================= + // Error cases + // ============================================= + 'error: %z (bad specifier)' => [ + 'format' => '%z', + 'count' => null, + 'error' => 'Bad scan conversion character "z"', + 'types' => [], + ], + 'error: %b (bad specifier)' => [ + 'format' => '%b', + 'count' => null, + 'error' => 'Bad scan conversion character "b"', + 'types' => [], + ], + 'error: %[abc (unmatched)' => [ + 'format' => '%[abc', + 'count' => null, + 'error' => 'Unmatched [ in format string', + 'types' => [], + ], + 'error: %[^abc (unmatched)' => [ + 'format' => '%[^abc', + 'count' => null, + 'error' => 'Unmatched [ in format string', + 'types' => [], + ], + + // ============================================= + // Production code format strings (src/) + // ============================================= + 'prod: RegexGroupParser {%d,%d}' => [ + 'format' => '{%d,%d}', + 'count' => 2, + 'error' => null, + 'types' => ['int', 'int'], + 'runtimeInput' => '{10,20}', + ], + 'prod: RegexGroupParser {%d,}' => [ + 'format' => '{%d,}', + 'count' => 1, + 'error' => null, + 'types' => ['int'], + 'runtimeInput' => '{10,}', + ], + 'prod: RegexGroupParser {%d}' => [ + 'format' => '{%d}', + 'count' => 1, + 'error' => null, + 'types' => ['int'], + 'runtimeInput' => '{10}', + ], + + // ============================================= + // Test data format strings (tests/) + // ============================================= + 'test: sscanf.php %d-%d' => [ + 'format' => '%d-%d', + 'count' => 2, + 'error' => null, + 'types' => ['int', 'int'], + 'runtimeInput' => '20-20', + ], + 'test: sscanf.php %s %d %d' => [ + 'format' => '%s %d %d', + 'count' => 3, + 'error' => null, + 'types' => ['string', 'int', 'int'], + 'runtimeInput' => 'January 01 2000', + ], + 'test: sscanf.php %1s' => [ + 'format' => '%1s', + 'count' => 1, + 'error' => null, + 'types' => ['string'], + 'runtimeInput' => 'x', + ], + 'test: sscanf.php %2s' => [ + 'format' => '%2s', + 'count' => 1, + 'error' => null, + 'types' => ['string'], + 'runtimeInput' => 'xy', + ], + 'test: sscanf.php %2x%2x%2x' => [ + 'format' => '%2x%2x%2x', + 'count' => 3, + 'error' => null, + 'types' => ['int', 'int', 'int'], + 'runtimeInput' => '00ccff', + ], + 'test: sscanf.php %*s %d' => [ + 'format' => '%*s %d', + 'count' => 1, + 'error' => null, + 'types' => ['int'], + 'runtimeInput' => 'skip 42', + ], + 'test: sscanf.php %*d %s' => [ + 'format' => '%*d %s', + 'count' => 1, + 'error' => null, + 'types' => ['string'], + 'runtimeInput' => '42 hello', + ], + 'test: sscanf.php %*[a-z]%d' => [ + 'format' => '%*[a-z]%d', + 'count' => 1, + 'error' => null, + 'types' => ['int'], + 'runtimeInput' => 'abc42', + ], + 'test: bug-7764 %[^/]/%[^/]/%s' => [ + 'format' => '%[^/]/%[^/]/%s', + 'count' => 3, + 'error' => null, + 'types' => ['string', 'string', 'string'], + 'runtimeInput' => 'hello/world/foo', + ], + 'test: bug-7563 %[1234567890.]%s' => [ + 'format' => '%[1234567890.]%s', + 'count' => 2, + 'error' => null, + 'types' => ['string', 'string'], + 'runtimeInput' => '123.45cm', + ], + 'test: bug-7563 %s [%d] at %[^:]:%d: %[^[]]' => [ + 'format' => '%s [%d] at %[^:]:%d: %[^[]]', + 'count' => 5, + 'error' => null, + 'types' => ['string', 'int', 'string', 'int', 'string'], + 'runtimeInput' => 'Exception [1234] at /path:42: message', + ], + 'test: bug-7563 %[%[]' => [ + 'format' => '%[%[]', + 'count' => 1, + 'error' => null, + 'types' => ['string'], + 'runtimeInput' => '%[test', + ], + 'test: printf.php %d%d' => [ + 'format' => '%d%d', + 'count' => 2, + 'error' => null, + 'types' => ['int', 'int'], + 'runtimeInput' => '12 34', + ], + 'test: printf.php %20[^,],%d' => [ + 'format' => '%20[^,],%d', + 'count' => 2, + 'error' => null, + 'types' => ['string', 'int'], + 'runtimeInput' => 'hello,42', + ], + "test: printf.php %20[^\\n]\\n%d" => [ + 'format' => "%20[^\n]\n%d", + 'count' => 2, + 'error' => null, + 'types' => ['string', 'int'], + 'runtimeInput' => "hello\n42", + ], + 'test: printf.php %20[^abcde]a%d' => [ + 'format' => '%20[^abcde]a%d', + 'count' => 2, + 'error' => null, + 'types' => ['string', 'int'], + 'runtimeInput' => 'xyz a42', + ], + 'test: printf.php %[A-Z]%d' => [ + 'format' => '%[A-Z]%d', + 'count' => 2, + 'error' => null, + 'types' => ['string', 'int'], + 'runtimeInput' => 'ABC123', + ], + 'test: bug-10260 %*[a-z]_day_%s' => [ + 'format' => '%*[a-z]_day_%s', + 'count' => 1, + 'error' => null, + 'types' => ['string'], + 'runtimeInput' => 'appletone_day_1', + ], + 'test: bug-10260 %*s %*d %s' => [ + 'format' => '%*s %*d %s', + 'count' => 1, + 'error' => null, + 'types' => ['string'], + 'runtimeInput' => 'foo 123 bar', + ], + 'test: bug-10260 %*[A-Z]%d' => [ + 'format' => '%*[A-Z]%d', + 'count' => 1, + 'error' => null, + 'types' => ['int'], + 'runtimeInput' => 'ABC123', + ], + 'test: bug-10260 %s %*s %d' => [ + 'format' => '%s %*s %d', + 'count' => 2, + 'error' => null, + 'types' => ['string', 'int'], + 'runtimeInput' => 'hello world 42', + ], + 'test: bug-10260 %*d %*s' => [ + 'format' => '%*d %*s', + 'count' => 0, + 'error' => null, + 'types' => [], + 'runtimeInput' => '123 abc', + ], + 'test: param-out %d:%d:%d' => [ + 'format' => '%d:%d:%d', + 'count' => 3, + 'error' => null, + 'types' => ['int', 'int', 'int'], + 'runtimeInput' => '10:05:03', + ], + 'test: param-out %s %s' => [ + 'format' => '%s %s', + 'count' => 2, + 'error' => null, + 'types' => ['string', 'string'], + 'runtimeInput' => '42 psalm', + ], + 'test: bug-14567 %d%n' => [ + 'format' => '%d%n', + 'count' => 2, + 'error' => null, + 'types' => ['int', 'int'], + 'runtimeInput' => '42', + ], + + // ============================================= + // Combination and edge cases + // ============================================= + 'combo: all integer specifiers' => [ + 'format' => '%d %D %i %o %x %X %u %n', + 'count' => 8, + 'error' => null, + 'types' => ['int', 'int', 'int', 'int', 'int', 'int', 'int|string', 'int'], + 'runtimeInput' => '1 2 3 4 5 6 7 8', + ], + 'combo: all float specifiers' => [ + 'format' => '%e %E %f %g', + 'count' => 4, + 'error' => null, + 'types' => ['float', 'float', 'float', 'float'], + 'runtimeInput' => '1.0 2.0 3.0 4.0', + ], + 'combo: mixed types' => [ + 'format' => '%d %f %s', + 'count' => 3, + 'error' => null, + 'types' => ['int', 'float', 'string'], + 'runtimeInput' => '42 3.14 hello', + ], + 'edge: literal text only' => [ + 'format' => 'hello world', + 'count' => 0, + 'error' => null, + 'types' => [], + 'runtimeInput' => 'hello world', + ], + 'edge: %%%%' => [ + 'format' => '%%%%', + 'count' => 0, + 'error' => null, + 'types' => [], + 'runtimeInput' => '%%', + ], + 'edge: %%%d' => [ + 'format' => '%%%d', + 'count' => 1, + 'error' => null, + 'types' => ['int'], + 'runtimeInput' => '%42', + ], + ]; + } + + /** + * Subset of format strings where the extension regex has known limitations. + * These are excluded from the regex comparison test but documented here. + * + * The regex \[[^\]]+\] cannot match character sets where ] is the first + * character (e.g., %[]abc]), because [^\]]+ requires at least one non-] + * character. The C code handles this via special-casing: if the first + * character after [ (or [^) is ], it's treated as a literal member of + * the set rather than the closing bracket. + * + * @return array}> + */ + public static function regexKnownLimitations(): array + { + // The regex \[[^\]]+\] cannot match %[]abc] where ] is the first + // character in the set: [^\]]+ requires at least one non-] character + // before the closing ], but in []abc] the ] comes immediately. + // The C code special-cases this in BuildCharSet. + // + // %[^]abc] is NOT affected: [^\]]+ matches ^ (non-] char), then \] + // matches the first ], yielding [^] as the parsed set — wrong parse, + // but correct count (1 placeholder). So it's excluded from here. + return [ + '%[]abc] — ] as first char in set' => [ + 'format' => '%[]abc]', + 'count' => 1, + 'types' => ['string'], + ], + ]; + } + + // ============================================= + // Test methods + // ============================================= + + public function testValidateFormatCPort(): void + { + foreach (self::allFormatStrings() as $label => $entry) { + $result = self::validateFormatC($entry['format']); + + if ($entry['error'] !== null) { + self::assertNotNull( + $result['error'], + sprintf('[%s] C port should report error for %s', $label, self::esc($entry['format'])), + ); + } else { + self::assertNull( + $result['error'], + sprintf('[%s] C port unexpected error for %s: %s', $label, self::esc($entry['format']), $result['error'] ?? ''), + ); + self::assertSame( + $entry['count'], + $result['count'], + sprintf('[%s] C port count mismatch for %s', $label, self::esc($entry['format'])), + ); + } + } + } + + #[RequiresPhp('>= 8.0')] + public function testValidateFormatCPortMatchesRuntime(): void + { + + foreach (self::allFormatStrings() as $label => $entry) { + $cResult = self::validateFormatC($entry['format']); + + if ($entry['error'] !== null) { + // Runtime should also error + $runtime = self::runtimeSscanfCount($entry['format']); + self::assertNotNull( + $runtime['error'], + sprintf('[%s] Runtime should error for %s', $label, self::esc($entry['format'])), + ); + continue; + } + + // For valid formats with runtimeInput, verify count matches + if (!isset($entry['runtimeInput'])) { + continue; + } + $runtime = self::runtimeSscanfCount($entry['format'], $entry['runtimeInput']); + self::assertNull($runtime['error'], sprintf('[%s] Runtime error: %s', $label, $runtime['error'] ?? '')); + self::assertSame( + $entry['count'], + $runtime['count'], + sprintf('[%s] Runtime count mismatch for %s with input %s', $label, self::esc($entry['format']), self::esc($entry['runtimeInput'])), + ); + } + } + + public function testPrintfHelperMatchesCReference(): void + { + foreach (self::allFormatStrings() as $label => $entry) { + if ($entry['error'] !== null) { + continue; + } + + $count = $this->printfHelper->getScanfPlaceholdersCount($entry['format']); + + self::assertSame( + $entry['count'], + $count, + sprintf('[%s] PrintfHelper count mismatch for %s', $label, self::esc($entry['format'])), + ); + } + } + + public function testExtensionRegexMatchesCReference(): void + { + $knownLimitations = array_map( + static fn (array $e): string => $e['format'], + self::regexKnownLimitations(), + ); + + foreach (self::allFormatStrings() as $label => $entry) { + if ($entry['error'] !== null) { + continue; + } + if (in_array($entry['format'], $knownLimitations, true)) { + continue; + } + + $result = self::extensionRegexParse($entry['format']); + + self::assertSame( + $entry['count'], + $result['count'], + sprintf('[%s] Regex count mismatch for %s', $label, self::esc($entry['format'])), + ); + self::assertSame( + $entry['types'], + $result['types'], + sprintf('[%s] Regex types mismatch for %s', $label, self::esc($entry['format'])), + ); + } + } + + public function testExtensionRegexKnownLimitations(): void + { + foreach (self::regexKnownLimitations() as $label => $entry) { + $result = self::extensionRegexParse($entry['format']); + + // Document the current (incorrect) behavior + self::assertNotSame( + $entry['count'], + $result['count'], + sprintf('[%s] Regex limitation appears to be fixed — move from regexKnownLimitations to allFormatStrings', $label), + ); + } + } + + public function testSpecifierReturnTypes(): void + { + $expected = [ + 'n' => 'int', 'd' => 'int', 'D' => 'int', 'i' => 'int', + 'o' => 'int', 'x' => 'int', 'X' => 'int', + 'u' => 'int|string', + 'f' => 'float', 'e' => 'float', 'E' => 'float', 'g' => 'float', + 's' => 'string', 'c' => 'string', + ]; + + foreach ($expected as $spec => $type) { + self::assertSame($type, self::specifierReturnTypeC($spec), sprintf('%%%s', $spec)); + } + } + + /** + * Cross-validate: C port, PrintfHelper, and Extension regex must agree on + * placeholder count for all valid, non-limitation format strings. + */ + public function testStaticAlgorithmsAgree(): void + { + $knownLimitations = array_map( + static fn (array $e): string => $e['format'], + self::regexKnownLimitations(), + ); + + $mismatches = []; + + foreach (self::allFormatStrings() as $label => $entry) { + if ($entry['error'] !== null) { + continue; + } + if (in_array($entry['format'], $knownLimitations, true)) { + continue; + } + + $cCount = self::validateFormatC($entry['format'])['count']; + $helperCount = $this->printfHelper->getScanfPlaceholdersCount($entry['format']); + $regexResult = self::extensionRegexParse($entry['format']); + + if ($cCount !== $helperCount || $cCount !== $regexResult['count']) { + $mismatches[] = sprintf( + '[%s] %s C=%s Helper=%s Regex=%s', + $label, + self::esc($entry['format']), + self::descCount($cCount), + self::descCount($helperCount), + self::descCount($regexResult['count']), + ); + } + } + + self::assertSame([], $mismatches, "Algorithm mismatches:\n" . implode("\n", $mismatches)); + } + + private static function esc(string $s): string + { + return '"' . addcslashes($s, "\0\n\r\t\"\\") . '"'; + } + + private static function descCount(int|null $c): string + { + return $c === null ? 'null' : (string) $c; + } + +} From f7a725de2d4d83c80b0623dd9d97facb66aeba39 Mon Sep 17 00:00:00 2001 From: phpstan-bot Date: Mon, 4 May 2026 09:16:30 +0000 Subject: [PATCH 6/6] Handle `l`/`L`/`h` scanf size modifiers in format parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PHP's scanf inherits size modifier syntax from C (e.g. %ld, %lf, %Lf, %hd). In ValidateFormat (ext/standard/scanf.c), these are consumed before the specifier character: if (*ch == 'l' || *ch == 'L' || *ch == 'h') { ch++; } They have no effect on PHP's result types (PHP always uses zend_long and double regardless of modifier) but must be accepted as valid syntax. Both the SscanfFunctionDynamicReturnTypeExtension regex and the PrintfHelper scanf pattern now include optional [lLh]? before the specifier character class. Also corrects the runtimeSscanfCount documentation: %n (and %*n) always provides a value even with empty input because it records characters consumed without requiring matching data. This means count(sscanf("", "%*n" . $format)) is a reliable counting method — %*n increments nconversions internally, preventing the null return path that triggers only when underflow AND nconversions==0. Co-Authored-By: Claude Opus 4.6 --- src/Rules/Functions/PrintfHelper.php | 2 +- ...canfFunctionDynamicReturnTypeExtension.php | 2 +- tests/PHPStan/Analyser/nsrt/bug-14567.php | 8 ++ .../Functions/SscanfFormatValidationTest.php | 93 +++++++++++++++++-- .../Rules/Functions/data/bug-14567.php | 15 +++ 5 files changed, 111 insertions(+), 9 deletions(-) diff --git a/src/Rules/Functions/PrintfHelper.php b/src/Rules/Functions/PrintfHelper.php index 5455ba7edc0..19d76accf4c 100644 --- a/src/Rules/Functions/PrintfHelper.php +++ b/src/Rules/Functions/PrintfHelper.php @@ -38,7 +38,7 @@ public function getPrintfPlaceholders(string $format): ?array public function getScanfPlaceholdersCount(string $format): ?int { - return $this->getPlaceholdersCount('(?[cdDeEfginosuxX%s]|\[[^\]]+\])', $format, true); + return $this->getPlaceholdersCount('(?:[lLh]?(?[cdDeEfginosuxX%s]|\[[^\]]+\]))', $format, true); } /** diff --git a/src/Type/Php/SscanfFunctionDynamicReturnTypeExtension.php b/src/Type/Php/SscanfFunctionDynamicReturnTypeExtension.php index 2be637b7e7b..efaec2f7552 100644 --- a/src/Type/Php/SscanfFunctionDynamicReturnTypeExtension.php +++ b/src/Type/Php/SscanfFunctionDynamicReturnTypeExtension.php @@ -57,7 +57,7 @@ public function getTypeFromFunctionCall( $arrayBuilder = ConstantArrayTypeBuilder::createEmpty(); - if (preg_match_all('/%(\d*)(\[[^\]]+\]|[cDdeEfginosuxX]{1})/', $formatValue, $matches) > 0) { + if (preg_match_all('/%(\d*)[lLh]?(\[[^\]]+\]|[cDdeEfginosuxX])/', $formatValue, $matches) > 0) { for ($i = 0; $i < count($matches[0]); $i++) { $length = $matches[1][$i]; $specifier = $matches[2][$i]; diff --git a/tests/PHPStan/Analyser/nsrt/bug-14567.php b/tests/PHPStan/Analyser/nsrt/bug-14567.php index 327fba81d7c..51ce2b42223 100644 --- a/tests/PHPStan/Analyser/nsrt/bug-14567.php +++ b/tests/PHPStan/Analyser/nsrt/bug-14567.php @@ -45,4 +45,12 @@ function sscanfEdgeCases(string $s) { // mixed specifiers with %n assertType('array{int|null, int|null}|null', sscanf($s, "%d%n")); + + // Size modifiers (l, L, h) — consumed by ValidateFormat, no effect on PHP type + assertType('array{int|null}|null', sscanf($s, "%ld")); + assertType('array{float|null}|null', sscanf($s, "%lf")); + assertType('array{float|null}|null', sscanf($s, "%Lf")); + assertType('array{int|null}|null', sscanf($s, "%hd")); + assertType('array{int|string|null}|null', sscanf($s, "%lu")); + assertType('array{int|null, float|null, string|null}|null', sscanf($s, "%ld %lf %s")); } diff --git a/tests/PHPStan/Rules/Functions/SscanfFormatValidationTest.php b/tests/PHPStan/Rules/Functions/SscanfFormatValidationTest.php index e7e70d4944d..b48759a84b8 100644 --- a/tests/PHPStan/Rules/Functions/SscanfFormatValidationTest.php +++ b/tests/PHPStan/Rules/Functions/SscanfFormatValidationTest.php @@ -60,7 +60,12 @@ protected function setUp(): void // - %% is a literal percent, not a placeholder // - %* is assignment suppression (placeholder parsed but not counted) // - Digits after % may be either XPG3 positional (%n$) or a width - // - Size modifiers (l, L, h) are consumed and ignored + // - Size modifiers (l, L, h) are consumed and ignored — these are + // inherited from C's scanf where they denote storage size (long, + // long double, short). In PHP they have no effect on the result + // type since PHP uses its own type system (zend_long, double), + // but they must be accepted as valid syntax. The C code simply + // advances past them: if (*ch == 'l' || *ch == 'L' || *ch == 'h') // - The switch on the specifier character is the definitive list // of valid scanf specifiers: n d D i o x X u f e E g s c [ // - Character sets ([...]) handle ] as the first character and ^ @@ -279,7 +284,7 @@ public static function extensionRegexParse(string $format): array } $types = []; - if (preg_match_all('/%(\d*)(\[[^\]]+\]|[cDdeEfginosuxX]{1})/', $format, $matches) > 0) { + if (preg_match_all('/%(\d*)[lLh]?(\[[^\]]+\]|[cDdeEfginosuxX])/', $format, $matches) > 0) { for ($i = 0; $i < count($matches[0]); $i++) { $specifier = $matches[2][$i]; @@ -301,11 +306,15 @@ public static function extensionRegexParse(string $format): array /** * Uses PHP runtime sscanf with crafted input to determine placeholder count. * - * Note: sscanf('', $format) returns null for any format with specifiers - * (except %n) because the C code checks *string == '\0' before each - * conversion attempt and triggers underflow. A string with sufficient - * matching data is needed. This method uses a numeric string that - * satisfies most specifier types. + * %n always provides a value (characters consumed) even with empty input, + * and %*n (suppressed) still increments nconversions internally. This means + * count(sscanf("", "%*n" . $format)) reliably returns the number of + * capturing placeholders for any valid format string — %*n prevents the + * null return path (which triggers only when underflow AND nconversions==0). + * + * For specifiers other than %n, sscanf("", $format) returns null when the + * first non-%n specifier encounters empty input before any conversion has + * succeeded. This method uses crafted input that satisfies the specifiers. * * @return array{count: int|null, error: string|null} */ @@ -550,6 +559,76 @@ public static function allFormatStrings(): array 'runtimeInput' => 'hello', ], + // ============================================= + // Size modifiers (l, L, h) + // ValidateFormat consumes these before the specifier character. + // They have no effect on PHP's type — %ld behaves identically + // to %d — but they must be accepted as valid format syntax. + // ============================================= + 'size: %ld (long int)' => [ + 'format' => '%ld', + 'count' => 1, + 'error' => null, + 'types' => ['int'], + 'runtimeInput' => '42', + ], + 'size: %lf (long float/double)' => [ + 'format' => '%lf', + 'count' => 1, + 'error' => null, + 'types' => ['float'], + 'runtimeInput' => '3.14', + ], + 'size: %Lf (long double)' => [ + 'format' => '%Lf', + 'count' => 1, + 'error' => null, + 'types' => ['float'], + 'runtimeInput' => '3.14', + ], + 'size: %hd (short int)' => [ + 'format' => '%hd', + 'count' => 1, + 'error' => null, + 'types' => ['int'], + 'runtimeInput' => '42', + ], + 'size: %lu (long unsigned)' => [ + 'format' => '%lu', + 'count' => 1, + 'error' => null, + 'types' => ['int|string'], + 'runtimeInput' => '42', + ], + 'size: %lx (long hex)' => [ + 'format' => '%lx', + 'count' => 1, + 'error' => null, + 'types' => ['int'], + 'runtimeInput' => 'ff', + ], + 'size: %10ld (width + size modifier)' => [ + 'format' => '%10ld', + 'count' => 1, + 'error' => null, + 'types' => ['int'], + 'runtimeInput' => '42', + ], + 'size: %*ld (suppressed + size modifier)' => [ + 'format' => '%*ld', + 'count' => 0, + 'error' => null, + 'types' => [], + 'runtimeInput' => '42', + ], + 'size: %ld %lf %s (mixed with size mods)' => [ + 'format' => '%ld %lf %s', + 'count' => 3, + 'error' => null, + 'types' => ['int', 'float', 'string'], + 'runtimeInput' => '42 3.14 hello', + ], + // ============================================= // NUL byte termination // ============================================= diff --git a/tests/PHPStan/Rules/Functions/data/bug-14567.php b/tests/PHPStan/Rules/Functions/data/bug-14567.php index 60adb979633..8b2f01218ba 100644 --- a/tests/PHPStan/Rules/Functions/data/bug-14567.php +++ b/tests/PHPStan/Rules/Functions/data/bug-14567.php @@ -34,3 +34,18 @@ // %g specifier - general float, 1 placeholder sscanf('1.5', "%g", $gval); + +// Size modifiers (l, L, h) - consumed before specifier, 1 placeholder each +sscanf('42', "%ld", $long); +sscanf('3.14', "%lf", $longf); +sscanf('3.14', "%Lf", $longdouble); +sscanf('42', "%hd", $short); + +// Size modifier with width +sscanf('42', "%10ld", $widelong); + +// Size modifier with suppression - 0 capturing placeholders +sscanf('42 hello', "%*ld %s", $afterskip); + +// Mixed size modifiers +sscanf('42 3.14 hello', "%ld %lf %s", $mix1, $mix2, $mix3);