Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
213 changes: 169 additions & 44 deletions include/fast_float/ascii_number.h
Original file line number Diff line number Diff line change
Expand Up @@ -337,12 +337,24 @@ report_parse_error(UC const *p, parse_error error) {
// spans (read only by the rare digit_comp slow path) are not materialized,
// which keeps the fat parsed_number_string_t off the hot path. The caller
// re-parses with store_spans=true if the slow path is actually reached.
template <bool basic_json_fmt, typename UC>
//
// has_separator is a *compile-time* flag (the opposite choice from store_spans,
// and deliberately so): the separator-aware code paths are an opt-in feature
// that the vast majority of callers never enable. Gating them on a template
// parameter means the has_separator==false instantiation -- the default that
// everybody uses -- compiles to exactly the same code as if the feature did not
// exist: no separator comparison ever enters a digit loop, and the SIMD
// eight-digit fast path stays intact. The has_separator==true instantiation is
// cold code that default callers never execute. See parse_number_string_options
// for the runtime->compile-time dispatch.
template <bool basic_json_fmt, bool has_separator, typename UC>
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string_t<UC>
parse_number_string(UC const *p, UC const *pend, parse_options_t<UC> options,
bool store_spans = true) noexcept {
chars_format const fmt = detail::adjust_for_feature_macros(options.format);
UC const decimal_point = options.decimal_point;
UC const separator = options.digit_separator;
(void)separator; // unused when has_separator == false

parsed_number_string_t<UC> answer;
answer.valid = false;
Expand Down Expand Up @@ -375,16 +387,19 @@ parse_number_string(UC const *p, UC const *pend, parse_options_t<UC> options,
UC const *const start_digits = p;

uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad)

// Straight-line unroll of the integer-part scan: most integer parts are
// 1-5 digits, so peeling the first iterations eliminates the loop back-edge
// for the common case. Semantics are identical to the original `while` loop:
// i = 10*i + digit, advancing p.
if ((p != pend) && is_integer(*p)) {
i = uint64_t(*p - UC('0'));
++p;
int64_t digit_count = 0;
// Points at the first actual digit (== start_digits when no separator
// precedes it). Used only by the basic_json leading-zero check.
UC const *first_digit_ptr = start_digits;
(void)first_digit_ptr; // only read in the basic_json_fmt path

FASTFLOAT_IF_CONSTEXPR17(!has_separator) {
// Straight-line unroll of the integer-part scan: most integer parts are
// 1-5 digits, so peeling the first iterations eliminates the loop back-edge
// for the common case. Semantics are identical to the original `while`
// loop: i = 10*i + digit, advancing p.
if ((p != pend) && is_integer(*p)) {
i = 10 * i + uint64_t(*p - UC('0'));
i = uint64_t(*p - UC('0'));
++p;
if ((p != pend) && is_integer(*p)) {
i = 10 * i + uint64_t(*p - UC('0'));
Expand All @@ -395,29 +410,58 @@ parse_number_string(UC const *p, UC const *pend, parse_options_t<UC> options,
if ((p != pend) && is_integer(*p)) {
i = 10 * i + uint64_t(*p - UC('0'));
++p;
while ((p != pend) && is_integer(*p)) {
// a multiplication by 10 is cheaper than an arbitrary integer
// multiplication
i = 10 * i +
uint64_t(*p - UC('0')); // might overflow, handled later
if ((p != pend) && is_integer(*p)) {
i = 10 * i + uint64_t(*p - UC('0'));
++p;
while ((p != pend) && is_integer(*p)) {
// a multiplication by 10 is cheaper than an arbitrary integer
// multiplication
i = 10 * i +
uint64_t(*p - UC('0')); // might overflow, handled later
++p;
}
}
}
}
}
}
digit_count = int64_t(p - start_digits);
}
else {
// Separator-aware scan: a configured digit separator (e.g. '\'') may appear
// between digits. It is skipped and does not contribute to the value or the
// digit count, but it is retained in the integer span below so the overflow
// re-scan can re-tokenize correctly.
while (p != pend) {
if (*p == separator) {
++p;
continue;
}
if (!is_integer(*p)) {
break;
}
if (digit_count == 0) {
first_digit_ptr = p;
}
i = 10 * i + uint64_t(*p - UC('0')); // might overflow, handled later
++p;
++digit_count;
}
}
UC const *const end_of_integer_part = p;
int64_t digit_count = int64_t(end_of_integer_part - start_digits);
if (store_spans) {
answer.integer = span<UC const>(start_digits, size_t(digit_count));
// The span keeps the raw characters (separators included) so the overflow
// re-scan below can re-tokenize correctly; for has_separator == false the
// length equals digit_count.
answer.integer = span<UC const>(start_digits,
size_t(end_of_integer_part - start_digits));
}
FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
// at least 1 digit in integer part, without leading zeros
if (digit_count == 0) {
return report_parse_error<UC>(p, parse_error::no_digits_in_integer_part);
}
if ((start_digits[0] == UC('0') && digit_count > 1)) {
if ((*first_digit_ptr == UC('0') && digit_count > 1)) {
return report_parse_error<UC>(start_digits,
parse_error::leading_zeros_in_integer_part);
}
Expand All @@ -428,20 +472,40 @@ parse_number_string(UC const *p, UC const *pend, parse_options_t<UC> options,
if (has_decimal_point) {
++p;
UC const *before = p;
// can occur at most twice without overflowing, but let it occur more, since
// for integers with many digits, digit parsing is the primary bottleneck.
loop_parse_if_eight_digits(p, pend, i);
int64_t fractional_digit_count = 0;
FASTFLOAT_IF_CONSTEXPR17(!has_separator) {
// can occur at most twice without overflowing, but let it occur more,
// since for integers with many digits, digit parsing is the primary
// bottleneck.
loop_parse_if_eight_digits(p, pend, i);

while ((p != pend) && is_integer(*p)) {
uint8_t digit = uint8_t(*p - UC('0'));
++p;
i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
while ((p != pend) && is_integer(*p)) {
uint8_t digit = uint8_t(*p - UC('0'));
++p;
i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
}
fractional_digit_count = int64_t(p - before);
}
exponent = before - p;
else {
while (p != pend) {
if (*p == separator) {
++p;
continue;
}
if (!is_integer(*p)) {
break;
}
uint8_t digit = uint8_t(*p - UC('0'));
++p;
i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
++fractional_digit_count;
}
}
exponent = -fractional_digit_count;
if (store_spans) {
answer.fraction = span<UC const>(before, size_t(p - before));
}
digit_count -= exponent;
digit_count += fractional_digit_count;
}
FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
// at least 1 digit in fractional part
Expand Down Expand Up @@ -483,12 +547,30 @@ parse_number_string(UC const *p, UC const *pend, parse_options_t<UC> options,
// Otherwise, we will be ignoring the 'e'.
p = location_of_e;
} else {
while ((p != pend) && is_integer(*p)) {
uint8_t digit = uint8_t(*p - UC('0'));
if (exp_number < 0x10000000) {
exp_number = 10 * exp_number + digit;
FASTFLOAT_IF_CONSTEXPR17(!has_separator) {
while ((p != pend) && is_integer(*p)) {
uint8_t digit = uint8_t(*p - UC('0'));
if (exp_number < 0x10000000) {
exp_number = 10 * exp_number + digit;
}
++p;
}
}
else {
while (p != pend) {
if (*p == separator) {
++p;
continue;
}
if (!is_integer(*p)) {
break;
}
uint8_t digit = uint8_t(*p - UC('0'));
if (exp_number < 0x10000000) {
exp_number = 10 * exp_number + digit;
}
++p;
}
++p;
}
if (neg_exp) {
exp_number = -exp_number;
Expand All @@ -514,9 +596,12 @@ parse_number_string(UC const *p, UC const *pend, parse_options_t<UC> options,
// It is possible that the integer had an overflow.
// We have to handle the case where we have 0.0000somenumber.
// We need to be mindful of the case where we only have zeroes...
// E.g., 0.000000000...000.
// E.g., 0.000000000...000. The `has_separator &&` guard below is a
// compile-time constant, so this loop is identical to the original when the
// feature is disabled.
UC const *start = start_digits;
while ((start != pend) && (*start == UC('0') || *start == decimal_point)) {
while ((start != pend) && (*start == UC('0') || *start == decimal_point ||
(has_separator && *start == separator))) {
if (*start == UC('0')) {
digit_count--;
}
Expand All @@ -537,20 +622,60 @@ parse_number_string(UC const *p, UC const *pend, parse_options_t<UC> options,
p = answer.integer.ptr;
UC const *int_end = p + answer.integer.len();
uint64_t const minimal_nineteen_digit_integer{1000000000000000000};
while ((i < minimal_nineteen_digit_integer) && (p != int_end)) {
i = i * 10 + uint64_t(*p - UC('0'));
++p;
FASTFLOAT_IF_CONSTEXPR17(!has_separator) {
while ((i < minimal_nineteen_digit_integer) && (p != int_end)) {
i = i * 10 + uint64_t(*p - UC('0'));
++p;
}
if (i >= minimal_nineteen_digit_integer) { // We have a big integer
exponent = end_of_integer_part - p + exp_number;
} else { // We have a value with a fractional component.
p = answer.fraction.ptr;
UC const *frac_end = p + answer.fraction.len();
while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
i = i * 10 + uint64_t(*p - UC('0'));
++p;
}
exponent = answer.fraction.ptr - p + exp_number;
}
}
if (i >= minimal_nineteen_digit_integer) { // We have a big integer
exponent = end_of_integer_part - p + exp_number;
} else { // We have a value with a fractional component.
p = answer.fraction.ptr;
UC const *frac_end = p + answer.fraction.len();
while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
else {
// Separator-aware re-scan: separators are skipped and excluded from
// the digit counts that determine the exponent.
while ((i < minimal_nineteen_digit_integer) && (p != int_end)) {
if (*p == separator) {
++p;
continue;
}
i = i * 10 + uint64_t(*p - UC('0'));
++p;
}
exponent = answer.fraction.ptr - p + exp_number;
if (i >= minimal_nineteen_digit_integer) { // We have a big integer
int64_t remaining_integer_digits = 0;
while (p != int_end) {
if (*p == separator) {
++p;
continue;
}
++p;
++remaining_integer_digits;
}
exponent = remaining_integer_digits + exp_number;
} else { // We have a value with a fractional component.
p = answer.fraction.ptr;
UC const *frac_end = p + answer.fraction.len();
int64_t fraction_digits_consumed = 0;
while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
if (*p == separator) {
++p;
continue;
}
i = i * 10 + uint64_t(*p - UC('0'));
++p;
++fraction_digits_consumed;
}
exponent = exp_number - fraction_digits_consumed;
}
}
// We have now corrected both exponent and i, to a truncated value
}
Expand Down
24 changes: 22 additions & 2 deletions include/fast_float/float_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,35 @@ using from_chars_result = from_chars_result_t<char>;

template <typename UC> struct parse_options_t {
constexpr explicit parse_options_t(chars_format fmt = chars_format::general,
UC dot = UC('.'), int b = 10)
: format(fmt), decimal_point(dot), base(b) {}
UC dot = UC('.'), int b = 10,
UC sep = UC('\0'), uint8_t opts = 0)
: format(fmt), decimal_point(dot), digit_separator(sep),
format_options(opts), base(b) {}

// Member order is chosen so that, for the common UC == char case, the two
// new single-byte fields land in the padding that already existed between
// decimal_point and base. This keeps sizeof(parse_options_t<char>) == 16, so
// the struct is still passed in registers (ARM64/x86-64) and the default
// parse path is unaffected. Reordering would grow the struct and force it
// onto the stack at the call boundary.

/** Which number formats are accepted */
chars_format format;
/** The character used as decimal point */
UC decimal_point;
/** The character used as digit separator (e.g. '\''). Use '\0' to disable.
* When disabled (the default), the parser compiles to the exact same code as
* if this option did not exist: separator handling is gated on a compile-time
* template parameter, so the default hot path carries no extra branches. */
UC digit_separator;
/** Additional format options (bitmask), see the static flags below. */
uint8_t format_options;
/** The base used for integers */
int base;

/** Skip a leading base prefix (0x/0X, 0b/0B) before parsing. Decimal-only:
* the digits are still parsed in base 10, the prefix is merely consumed. */
static constexpr uint8_t skip_prefix = 1;
};

using parse_options = parse_options_t<char>;
Expand Down
37 changes: 33 additions & 4 deletions include/fast_float/parse_number.h
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,30 @@ from_chars_advanced(parsed_number_string_t<UC> &pns, T &value) noexcept {
return answer;
}

// Runtime -> compile-time dispatch over both boolean knobs of
// parse_number_string. basic_json_fmt was already dispatched this way; the
// digit separator is selected here too so that the separator-aware code paths
// stay confined to the (cold) has_separator==true instantiation. Callers that
// never set a separator -- the overwhelming majority -- run the
// has_separator==false instantiation, which is byte-for-byte the original
// separator-free parser.
template <typename UC>
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string_t<UC>
parse_number_string_options(UC const *first, UC const *last,
parse_options_t<UC> options, bool bjf,
bool store_spans) noexcept {
if (options.digit_separator != UC('\0')) {
return bjf ? parse_number_string<true, true, UC>(first, last, options,
store_spans)
: parse_number_string<false, true, UC>(first, last, options,
store_spans);
}
return bjf ? parse_number_string<true, false, UC>(first, last, options,
store_spans)
: parse_number_string<false, false, UC>(first, last, options,
store_spans);
}

// Slow path: re-parse materializing the integer/fraction spans the hot no-span
// parse skipped, then run the full algorithm. The two callers reach it only
// through a fastfloat_unlikely branch, so the optimizer keeps this re-parse off
Expand All @@ -301,8 +325,7 @@ FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
parse_number_slow_path(UC const *first, UC const *last, T &value,
parse_options_t<UC> options, bool bjf) noexcept {
parsed_number_string_t<UC> pns =
bjf ? parse_number_string<true, UC>(first, last, options, true)
: parse_number_string<false, UC>(first, last, options, true);
parse_number_string_options(first, last, options, bjf, true);
return from_chars_advanced(pns, value);
}

Expand Down Expand Up @@ -336,8 +359,7 @@ from_chars_float_advanced(UC const *first, UC const *last, T &value,
// parsed_number_string_t off the hot path. store_spans is a runtime argument,
// so this reuses the single parse_number_string instantiation.
parsed_number_string_t<UC> pns =
bjf ? parse_number_string<true, UC>(first, last, options, false)
: parse_number_string<false, UC>(first, last, options, false);
parse_number_string_options(first, last, options, bjf, false);
if (!pns.valid) {
if (uint64_t(fmt & chars_format::no_infnan)) {
answer.ec = std::errc::invalid_argument;
Expand Down Expand Up @@ -539,6 +561,13 @@ template <typename T, typename UC>
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
from_chars_advanced(UC const *first, UC const *last, T &value,
parse_options_t<UC> options) noexcept {
if (((options.format_options & parse_options_t<UC>::skip_prefix) != 0) &&
(last - first >= 2) && (*first == UC('0'))) {
UC const c_low = UC(first[1] | UC(0x20));
if (c_low == UC('x') || c_low == UC('b')) {
first += 2;
}
}
return from_chars_advanced_caller<
size_t(is_supported_float_type<T>::value) +
2 * size_t(is_supported_integer_type<T>::value)>::call(first, last, value,
Expand Down
Loading
Loading