From 03377a41a907cec5c95b37f9f8ba85168db959b2 Mon Sep 17 00:00:00 2001 From: Pierluigi Lenoci Date: Tue, 17 Mar 2026 00:36:47 +0100 Subject: [PATCH 1/3] fix(scripts): encode residual control chars as \uXXXX instead of stripping json_escape() was silently deleting control characters (U+0000-U+001F) that were not individually handled (\n, \t, \r, \b, \f). Per RFC 8259, these must be encoded as \uXXXX sequences to preserve data integrity. Replace the tr -d strip with a char-by-char loop that emits proper \uXXXX escapes for any remaining control characters. --- scripts/bash/common.sh | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/scripts/bash/common.sh b/scripts/bash/common.sh index 826e740f00..2917bcc9e1 100644 --- a/scripts/bash/common.sh +++ b/scripts/bash/common.sh @@ -171,9 +171,21 @@ json_escape() { s="${s//$'\r'/\\r}" s="${s//$'\b'/\\b}" s="${s//$'\f'/\\f}" - # Strip remaining control characters (U+0000–U+001F) not individually escaped above - s=$(printf '%s' "$s" | tr -d '\000-\007\013\016-\037') - printf '%s' "$s" + # Escape any remaining U+0000-U+001F control characters as \uXXXX. + # Only single-byte characters can be JSON control chars; multi-byte UTF-8 + # sequences have first-byte values >= 0xC0 and are never control characters. + local i char code + local out="" + for (( i=0; i<${#s}; i++ )); do + char="${s:$i:1}" + code=$(LC_ALL=C printf '%d' "'$char" 2>/dev/null || echo 256) + if (( code >= 0 && code <= 31 )); then + out+=$(printf '\\u%04x' "$code") + else + out+="$char" + fi + done + printf '%s' "$out" } check_file() { [[ -f "$1" ]] && echo " ✓ $2" || echo " ✗ $2"; } From 1b7ce3bec2d6ff4c7f39020dd2b83d68c74c88d9 Mon Sep 17 00:00:00 2001 From: Pierluigi Lenoci Date: Tue, 17 Mar 2026 20:18:37 +0100 Subject: [PATCH 2/3] fix(scripts): address Copilot review on json_escape control char loop - Set LC_ALL=C for the entire loop (not just printf) so that ${#s} and ${s:$i:1} operate on bytes deterministically across locales - Fix comment: U+0000 (NUL) cannot exist in bash strings, range is U+0001-U+001F; adjust code guard accordingly (code >= 1) - Emit directly to stdout instead of accumulating in a variable, avoiding quadratic string concatenation on longer inputs --- scripts/bash/common.sh | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/scripts/bash/common.sh b/scripts/bash/common.sh index 2917bcc9e1..644474aaeb 100644 --- a/scripts/bash/common.sh +++ b/scripts/bash/common.sh @@ -171,21 +171,21 @@ json_escape() { s="${s//$'\r'/\\r}" s="${s//$'\b'/\\b}" s="${s//$'\f'/\\f}" - # Escape any remaining U+0000-U+001F control characters as \uXXXX. - # Only single-byte characters can be JSON control chars; multi-byte UTF-8 - # sequences have first-byte values >= 0xC0 and are never control characters. + # Escape any remaining U+0001-U+001F control characters as \uXXXX. + # (U+0000/NUL cannot appear in bash strings and is excluded.) + # LC_ALL=C ensures ${#s} counts bytes and ${s:$i:1} yields single bytes, + # so multi-byte UTF-8 sequences (first byte >= 0xC0) pass through intact. + local LC_ALL=C local i char code - local out="" for (( i=0; i<${#s}; i++ )); do char="${s:$i:1}" - code=$(LC_ALL=C printf '%d' "'$char" 2>/dev/null || echo 256) - if (( code >= 0 && code <= 31 )); then - out+=$(printf '\\u%04x' "$code") + code=$(printf '%d' "'$char" 2>/dev/null || echo 256) + if (( code >= 1 && code <= 31 )); then + printf '\\u%04x' "$code" else - out+="$char" + printf '%s' "$char" fi done - printf '%s' "$out" } check_file() { [[ -f "$1" ]] && echo " ✓ $2" || echo " ✗ $2"; } From 0b1f005bb3305780e30eff9079cd029c3b2c0205 Mon Sep 17 00:00:00 2001 From: Pierluigi Lenoci Date: Tue, 17 Mar 2026 22:23:27 +0100 Subject: [PATCH 3/3] perf(scripts): use printf -v to avoid subshell in json_escape loop Replace code=$(printf ...) with printf -v code to assign the character code without spawning a subshell on every byte, reducing overhead for longer inputs. --- scripts/bash/common.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/bash/common.sh b/scripts/bash/common.sh index 644474aaeb..40f1c96e7d 100644 --- a/scripts/bash/common.sh +++ b/scripts/bash/common.sh @@ -179,7 +179,7 @@ json_escape() { local i char code for (( i=0; i<${#s}; i++ )); do char="${s:$i:1}" - code=$(printf '%d' "'$char" 2>/dev/null || echo 256) + printf -v code '%d' "'$char" 2>/dev/null || code=256 if (( code >= 1 && code <= 31 )); then printf '\\u%04x' "$code" else