From c9d898f892b92274e8aae3024298f1432b47ccae Mon Sep 17 00:00:00 2001 From: He-Pin Date: Thu, 18 Jun 2026 18:29:43 +0800 Subject: [PATCH 1/5] fix: std.parseYaml YAML 1.2 octal (0o777) and document marker handling Motivation: Two issues in std.parseYaml diverging from go-jsonnet: 1. SnakeYAML's SafeConstructor uses YAML 1.1 type resolution which does not recognize the 0o prefix for octal integers (YAML 1.2), causing unquoted 0o777 to be parsed as string "0o777" instead of 511. 2. Explicit document start markers (---) caused single-doc YAML to be returned directly instead of wrapped in an array as go-jsonnet does. Modification: Replaced SafeConstructor-based parsing with composeAll() + custom yamlNodeToJson() that handles YAML 1.2 octal (0o prefix) for plain scalars while preserving quoted values as strings. Added YamlDocStartPattern regex to detect explicit --- and wrap single-doc results in an array. Result: std.parseYaml now matches go-jsonnet for both YAML 1.2 octal syntax and document start marker handling. | YAML input | go-jsonnet v0.22.0 | jrsonnet 0.5.0-pre99 | sjsonnet (before) | sjsonnet (after) | |-----------|-------------------|---------------------|-------------------|-----------------| | 0o777 | 511 | 511 | "0o777" (bug) | 511 | | -0o777 | -511 | -511 | "-0o777" (bug) | -511 | | "0o777" | "0o777" | "0o777" | "0o777" | "0o777" | | "---" | [null] | null | null (bug) | [null] | | "---\na:1"| [{a:1}] | {a:1} | {a:1} (bug) | [{a:1}] | | "a: 1" | {a:1} | {a:1} | {a:1} | {a:1} | Note: jrsonnet does NOT wrap --- in array; sjsonnet aligns with go-jsonnet. --- sjsonnet/src-js/sjsonnet/Platform.scala | 40 +++++- sjsonnet/src-jvm/sjsonnet/Platform.scala | 133 +++++++++++++----- sjsonnet/src-native/sjsonnet/Platform.scala | 39 ++++- .../go_test_suite/parseYaml.jsonnet.golden | 4 +- .../parseyaml_doc_marker.jsonnet | 9 ++ .../parseyaml_doc_marker.jsonnet.golden | 1 + .../parseyaml_yaml12_octal.jsonnet | 20 +++ .../parseyaml_yaml12_octal.jsonnet.golden | 1 + .../test/src/sjsonnet/ParseYamlTests.scala | 12 +- 9 files changed, 204 insertions(+), 55 deletions(-) create mode 100644 sjsonnet/test/resources/new_test_suite/parseyaml_doc_marker.jsonnet create mode 100644 sjsonnet/test/resources/new_test_suite/parseyaml_doc_marker.jsonnet.golden create mode 100644 sjsonnet/test/resources/new_test_suite/parseyaml_yaml12_octal.jsonnet create mode 100644 sjsonnet/test/resources/new_test_suite/parseyaml_yaml12_octal.jsonnet.golden diff --git a/sjsonnet/src-js/sjsonnet/Platform.scala b/sjsonnet/src-js/sjsonnet/Platform.scala index 5d15694b2..302bd7992 100644 --- a/sjsonnet/src-js/sjsonnet/Platform.scala +++ b/sjsonnet/src-js/sjsonnet/Platform.scala @@ -41,10 +41,21 @@ object Platform { def isAsciiJsonSafe(s: String, from: Int, to: Int): Boolean = CharSWAR.isAsciiJsonSafe(s, from, to) - private def nodeToJson(node: Node): ujson.Value = node match { - case _: Node.ScalarNode => - YamlDecoder.forAny.construct(node).getOrElse("") match { + private val Yaml12OctalPattern = Pattern.compile("[-+]?0o[0-7]+") + + private def nodeToJson(node: Node, input: String): ujson.Value = node match { + case sn: Node.ScalarNode => + val constructed = YamlDecoder.forAny.construct(sn).getOrElse("") + constructed match { case null | None => ujson.Null + case v: String + if sn.tag == Tag.str && Yaml12OctalPattern.matcher(v).matches() && + !isQuotedScalar(sn, input) => + val negative = v.charAt(0) == '-' + val octalPart = + if (negative || v.charAt(0) == '+') v.substring(3) else v.substring(2) + val result = java.lang.Long.parseLong(octalPart, 8) + ujson.Num((if (negative) -result else result).toDouble) case v: String => ujson.read(s"\"${v.replace("\"", "\\\"").replace("\n", "\\n")}\"", false) case v: Boolean => ujson.Bool(v) case v: Int => ujson.Num(v.toDouble) @@ -59,7 +70,7 @@ object Platform { case Node.SequenceNode(nodes, _) => val buf = new mutable.ArrayBuffer[ujson.Value](nodes.size) for (n <- nodes) { - buf += nodeToJson(n) + buf += nodeToJson(n, input) } ujson.Arr(buf) case Node.MappingNode(mappings, _) => @@ -67,7 +78,7 @@ object Platform { buf.sizeHint(mappings.size) for ((key, value) <- mappings) { key match { - case Node.ScalarNode(k, _) => buf(k) = nodeToJson(value) + case Node.ScalarNode(k, _) => buf(k) = nodeToJson(value, input) case _ => Error.fail("Invalid YAML mapping key class: " + key.getClass.getSimpleName) } } @@ -76,6 +87,20 @@ object Platform { Error.fail("Unsupported YAML node type: " + node.getClass.getSimpleName) } + private def isQuotedScalar(sn: Node.ScalarNode, input: String): Boolean = { + sn.pos match { + case Some(range) => + val offset = range.start.offset + offset >= 0 && offset < input.length && { + val c = input.charAt(offset) + c == '"' || c == '\'' + } + case None => false + } + } + + private val YamlDocStartPattern = Pattern.compile("\\A\\s*---(?:[ \\t\\n\\r]|\\z)") + def yamlToJson(s: String): ujson.Value = { if (s.trim.isEmpty) return ujson.Null @@ -83,16 +108,17 @@ object Platform { // since scala-yaml's parseManyYamls can't handle empty documents // (DocumentStart immediately followed by DocumentEnd). val preprocessed = addExplicitNullsForEmptyDocs(s) + val hasExplicitDocStart = YamlDocStartPattern.matcher(s).find() parseManyYamls(preprocessed) match { case Right(documents) => documents.size match { case 0 => ujson.Null - case 1 => nodeToJson(documents.head) + case 1 if !hasExplicitDocStart => nodeToJson(documents.head, preprocessed) case _ => val buf = new mutable.ArrayBuffer[ujson.Value](documents.size) for (doc <- documents) { - buf += nodeToJson(doc) + buf += nodeToJson(doc, preprocessed) } ujson.Arr(buf) } diff --git a/sjsonnet/src-jvm/sjsonnet/Platform.scala b/sjsonnet/src-jvm/sjsonnet/Platform.scala index 4ba3e07b7..e1fdc250a 100644 --- a/sjsonnet/src-jvm/sjsonnet/Platform.scala +++ b/sjsonnet/src-jvm/sjsonnet/Platform.scala @@ -10,8 +10,8 @@ import com.google.re2j.Pattern import net.jpountz.xxhash.{StreamingXXHash64, XXHashFactory} import org.tukaani.xz.LZMA2Options import org.tukaani.xz.XZOutputStream -import org.yaml.snakeyaml.{LoaderOptions, Yaml} -import org.yaml.snakeyaml.constructor.SafeConstructor +import org.yaml.snakeyaml.{DumperOptions, LoaderOptions, Yaml} +import org.yaml.snakeyaml.nodes.{MappingNode, Node, ScalarNode, SequenceNode, Tag} import scala.annotation.nowarn import scala.collection.compat.* @@ -73,48 +73,113 @@ object Platform { xzBytes(s.getBytes(UTF_8), compressionLevel) } - private def nodeToJson(node: Any): ujson.Value = node match { - case m: java.util.List[?] => - val buf = new mutable.ArrayBuffer[ujson.Value](m.size) - for (n <- m.asScala) { - buf += nodeToJson(n) + private val Yaml12OctalPattern = java.util.regex.Pattern.compile("[-+]?0o[0-7]+") + + private def yamlNodeToJson(node: Node): ujson.Value = node match { + case sn: ScalarNode => + val value = sn.getValue + val tag = sn.getTag + val isPlain = sn.getScalarStyle == DumperOptions.ScalarStyle.PLAIN + + if (isPlain && Yaml12OctalPattern.matcher(value).matches()) { + val negative = value.charAt(0) == '-' + val octalPart = + if (negative || value.charAt(0) == '+') value.substring(3) else value.substring(2) + val result = java.lang.Long.parseUnsignedLong(octalPart, 8) + val signed = if (negative) -result else result + ujson.Num(signed.toDouble) + } else if (tag == Tag.INT) { + val cleaned = value.replace("_", "") + val result: Long = + if (cleaned.startsWith("0x") || cleaned.startsWith("-0x") || cleaned.startsWith("+0x")) { + val negative = cleaned.startsWith("-") + val hex = + if (negative || cleaned.startsWith("+")) cleaned.substring(3) + else cleaned.substring(2) + val v = java.lang.Long.parseUnsignedLong(hex, 16) + if (negative) -v else v + } else if ( + cleaned.startsWith("0b") || cleaned.startsWith("-0b") || cleaned.startsWith("+0b") + ) { + val negative = cleaned.startsWith("-") + val bin = + if (negative || cleaned.startsWith("+")) cleaned.substring(3) + else cleaned.substring(2) + val v = java.lang.Long.parseUnsignedLong(bin, 2) + if (negative) -v else v + } else if (cleaned.length > 1 && cleaned.startsWith("0") && !cleaned.contains(".")) { + val negative = cleaned.startsWith("-") + val oct = if (negative || cleaned.startsWith("+")) cleaned.substring(1) else cleaned + val v = java.lang.Long.parseUnsignedLong(oct, 8) + if (negative) -v else v + } else if (cleaned.contains(":")) { + val parts = cleaned.split(":") + parts.foldLeft(0L)((acc, p) => acc * 60 + p.trim.toLong) + } else { + cleaned.toLong + } + ujson.Num(result.toDouble) + } else if (tag == Tag.FLOAT) { + val cleaned = value.replace("_", "") + val result = cleaned match { + case ".inf" | ".Inf" | ".INF" => Double.PositiveInfinity + case "-.inf" | "-.Inf" | "-.INF" => Double.NegativeInfinity + case ".nan" | ".NaN" | ".NAN" => Double.NaN + case s if s.contains(":") => + s.split(":").foldLeft(0.0)((acc, p) => acc * 60 + p.trim.toDouble) + case s => s.toDouble + } + ujson.Num(result) + } else if (tag == Tag.BOOL) { + ujson.Bool(value.toLowerCase match { + case "true" | "yes" | "on" => true + case "false" | "no" | "off" => false + case _ => Error.fail("Invalid YAML boolean: " + value) + }) + } else if (tag == Tag.NULL) { + ujson.Null + } else { + ujson.Str(value) } - ujson.Arr(buf) - case m: java.util.Map[?, ?] => + + case mn: MappingNode => val buf = upickle.core.LinkedHashMap[String, ujson.Value]() - buf.sizeHint(m.size) - for ((key, value) <- m.asScala) { - key match { - case k: String => buf(k) = nodeToJson(value) - case _ => Error.fail("Invalid YAML mapping key class: " + key.getClass.getSimpleName) + buf.sizeHint(mn.getValue.size) + for (tuple <- mn.getValue.asScala) { + val key = tuple.getKeyNode match { + case sn: ScalarNode => sn.getValue + case other => Error.fail("Invalid YAML mapping key type: " + other.getTag) } + buf(key) = yamlNodeToJson(tuple.getValueNode) } ujson.Obj(buf) - case null => ujson.Null - case v: String => ujson.Str(v) - case v: Boolean => ujson.Bool(v) - case v: Int => ujson.Num(v.toDouble) - case v: Long => ujson.Num(v.toDouble) - case v: Double => ujson.Num(v) - case v: Float => ujson.Num(v.toDouble) - case v: BigDecimal => ujson.Num(v.toDouble) - case v: BigInt => ujson.Num(v.toDouble) - case v: Short => ujson.Num(v.toDouble) - case _ => + + case sn: SequenceNode => + val buf = new mutable.ArrayBuffer[ujson.Value](sn.getValue.size) + for (n <- sn.getValue.asScala) { + buf += yamlNodeToJson(n) + } + ujson.Arr(buf) + + case _ => Error.fail("Unsupported YAML node type: " + node.getClass.getSimpleName) } + private val YamlDocStartPattern = + java.util.regex.Pattern.compile("\\A\\s*---(?:[ \\t\\n\\r]|\\z)") + def yamlToJson(yamlString: String): ujson.Value = { try { - val yaml = - new Yaml(new SafeConstructor(new LoaderOptions())).loadAll(yamlString).asScala.toSeq - yaml.size match { - case 0 => ujson.Null - case 1 => nodeToJson(yaml.head) - case _ => - val buf = new mutable.ArrayBuffer[ujson.Value](yaml.size) - for (doc <- yaml) { - buf += nodeToJson(doc) + val yaml = new Yaml(new LoaderOptions()) + val docs = yaml.composeAll(new java.io.StringReader(yamlString)).asScala.toSeq + val hasExplicitDocStart = YamlDocStartPattern.matcher(yamlString).find() + docs.size match { + case 0 => ujson.Null + case 1 if !hasExplicitDocStart => yamlNodeToJson(docs.head) + case _ => + val buf = new mutable.ArrayBuffer[ujson.Value](docs.size) + for (doc <- docs) { + buf += yamlNodeToJson(doc) } ujson.Arr(buf) } diff --git a/sjsonnet/src-native/sjsonnet/Platform.scala b/sjsonnet/src-native/sjsonnet/Platform.scala index ab8f580e9..e74558a4f 100644 --- a/sjsonnet/src-native/sjsonnet/Platform.scala +++ b/sjsonnet/src-native/sjsonnet/Platform.scala @@ -64,12 +64,22 @@ object Platform { throw new Exception("XZ not implemented in Scala Native") } - private def nodeToJson(node: Node): ujson.Value = node match { - case _: Node.ScalarNode => - YamlDecoder.forAny.construct(node) match { + private val Yaml12OctalPattern = Pattern.compile("[-+]?0o[0-7]+") + + private def nodeToJson(node: Node, input: String): ujson.Value = node match { + case sn: Node.ScalarNode => + YamlDecoder.forAny.construct(sn) match { case Right(v) => v match { case null | None => ujson.Null + case v: String + if sn.tag == Tag.str && Yaml12OctalPattern.matcher(v).matches() && + !isQuotedScalar(sn, input) => + val negative = v.charAt(0) == '-' + val octalPart = + if (negative || v.charAt(0) == '+') v.substring(3) else v.substring(2) + val result = java.lang.Long.parseLong(octalPart, 8) + ujson.Num((if (negative) -result else result).toDouble) case v: String => ujson.read(s"\"${v.replace("\"", "\\\"").replace("\n", "\\n")}\"", false) case v: Boolean => ujson.Bool(v) @@ -92,7 +102,7 @@ object Platform { case Node.SequenceNode(nodes, _) => val buf = new mutable.ArrayBuffer[ujson.Value](nodes.size) for (n <- nodes) { - buf += nodeToJson(n) + buf += nodeToJson(n, input) } ujson.Arr(buf) case Node.MappingNode(mappings, _) => @@ -100,7 +110,7 @@ object Platform { buf.sizeHint(mappings.size) for ((key, value) <- mappings) { key match { - case Node.ScalarNode(k, _) => buf(k) = nodeToJson(value) + case Node.ScalarNode(k, _) => buf(k) = nodeToJson(value, input) case _ => Error.fail("Invalid YAML mapping key class: " + key.getClass.getSimpleName) } } @@ -109,6 +119,20 @@ object Platform { Error.fail("Unsupported YAML node type: " + node.getClass.getSimpleName) } + private def isQuotedScalar(sn: Node.ScalarNode, input: String): Boolean = { + sn.pos match { + case Some(range) => + val offset = range.start.offset + offset >= 0 && offset < input.length && { + val c = input.charAt(offset) + c == '"' || c == '\'' + } + case None => false + } + } + + private val YamlDocStartPattern = Pattern.compile("\\A\\s*---(?:[ \\t\\n\\r]|\\z)") + def yamlToJson(s: String): ujson.Value = { if (s.trim.isEmpty) return ujson.Null @@ -116,16 +140,17 @@ object Platform { // since scala-yaml's parseManyYamls can't handle empty documents // (DocumentStart immediately followed by DocumentEnd). val preprocessed = addExplicitNullsForEmptyDocs(s) + val hasExplicitDocStart = YamlDocStartPattern.matcher(s).find() parseManyYamls(preprocessed) match { case Right(documents) => documents.size match { case 0 => ujson.Null - case 1 => nodeToJson(documents.head) + case 1 if !hasExplicitDocStart => nodeToJson(documents.head, preprocessed) case _ => val buf = new mutable.ArrayBuffer[ujson.Value](documents.size) for (doc <- documents) { - buf += nodeToJson(doc) + buf += nodeToJson(doc, preprocessed) } ujson.Arr(buf) } diff --git a/sjsonnet/test/resources/go_test_suite/parseYaml.jsonnet.golden b/sjsonnet/test/resources/go_test_suite/parseYaml.jsonnet.golden index 6082f3374..a5eb13dd6 100644 --- a/sjsonnet/test/resources/go_test_suite/parseYaml.jsonnet.golden +++ b/sjsonnet/test/resources/go_test_suite/parseYaml.jsonnet.golden @@ -52,5 +52,7 @@ null, 2 ], - null + [ + null + ] ] diff --git a/sjsonnet/test/resources/new_test_suite/parseyaml_doc_marker.jsonnet b/sjsonnet/test/resources/new_test_suite/parseyaml_doc_marker.jsonnet new file mode 100644 index 000000000..506bac3a0 --- /dev/null +++ b/sjsonnet/test/resources/new_test_suite/parseyaml_doc_marker.jsonnet @@ -0,0 +1,9 @@ +// Test that explicit --- document start markers cause single-doc YAML +// to be wrapped in an array, matching go-jsonnet behavior. +std.assertEqual(std.parseYaml("---"), [null]) && +std.assertEqual(std.parseYaml("---\n"), [null]) && +std.assertEqual(std.parseYaml("---\na: 1"), [{a: 1}]) && +std.assertEqual(std.parseYaml("--- 3\n"), [3]) && +std.assertEqual(std.parseYaml("---a: 1"), {"---a": 1}) && +std.assertEqual(std.parseYaml("a: 1"), {a: 1}) && +true diff --git a/sjsonnet/test/resources/new_test_suite/parseyaml_doc_marker.jsonnet.golden b/sjsonnet/test/resources/new_test_suite/parseyaml_doc_marker.jsonnet.golden new file mode 100644 index 000000000..27ba77dda --- /dev/null +++ b/sjsonnet/test/resources/new_test_suite/parseyaml_doc_marker.jsonnet.golden @@ -0,0 +1 @@ +true diff --git a/sjsonnet/test/resources/new_test_suite/parseyaml_yaml12_octal.jsonnet b/sjsonnet/test/resources/new_test_suite/parseyaml_yaml12_octal.jsonnet new file mode 100644 index 000000000..e40a6d0f9 --- /dev/null +++ b/sjsonnet/test/resources/new_test_suite/parseyaml_yaml12_octal.jsonnet @@ -0,0 +1,20 @@ +// Test YAML 1.2 modern octal syntax (0o prefix) for unquoted scalars. +// Quoted values must remain strings. Legacy octal (0 prefix) still works. +local yaml = std.parseYaml(||| + a: 0777 + b: 0o777 + c: 0 + d: 0o10 + e: -0o777 + f: "0o777" + g: '0o777' +|||); + +std.assertEqual(yaml.a, 511) && +std.assertEqual(yaml.b, 511) && +std.assertEqual(yaml.c, 0) && +std.assertEqual(yaml.d, 8) && +std.assertEqual(yaml.e, -511) && +std.assertEqual(yaml.f, "0o777") && +std.assertEqual(yaml.g, "0o777") && +true diff --git a/sjsonnet/test/resources/new_test_suite/parseyaml_yaml12_octal.jsonnet.golden b/sjsonnet/test/resources/new_test_suite/parseyaml_yaml12_octal.jsonnet.golden new file mode 100644 index 000000000..27ba77dda --- /dev/null +++ b/sjsonnet/test/resources/new_test_suite/parseyaml_yaml12_octal.jsonnet.golden @@ -0,0 +1 @@ +true diff --git a/sjsonnet/test/src/sjsonnet/ParseYamlTests.scala b/sjsonnet/test/src/sjsonnet/ParseYamlTests.scala index 880bc2854..bf9be541d 100644 --- a/sjsonnet/test/src/sjsonnet/ParseYamlTests.scala +++ b/sjsonnet/test/src/sjsonnet/ParseYamlTests.scala @@ -46,12 +46,12 @@ object ParseYamlTests extends TestSuite { } test { // Scalar documents can start on the same line as the document-start marker - // "--- 3" as standalone - eval("std.parseYaml('--- 3\\n')") ==> ujson.Value("""3""") + // "--- 3" as standalone (explicit doc start → always array) + eval("std.parseYaml('--- 3\\n')") ==> ujson.Value("""[3]""") } test { - // Folded scalar as document - eval("std.parseYaml('--- >\\n hello\\n world\\n')") ==> ujson.Value(""""hello world\n"""") + // Folded scalar as document (explicit doc start → always array) + eval("std.parseYaml('--- >\\n hello\\n world\\n')") ==> ujson.Value("""["hello world\n"]""") } test { // Combined: scalar docs on same line as marker @@ -66,8 +66,8 @@ object ParseYamlTests extends TestSuite { ) } test { - // Bare document separator - eval("""std.parseYaml("---")""") ==> ujson.Value("""null""") + // Bare document separator → explicit doc start, always returns array + eval("""std.parseYaml("---")""") ==> ujson.Value("""[null]""") } test { // Folded scalar without document marker (directly) From f4069e19d44839962e0428870df88fd07d5ac7e5 Mon Sep 17 00:00:00 2001 From: He-Pin Date: Thu, 18 Jun 2026 20:44:24 +0800 Subject: [PATCH 2/5] style: fix scalafmt in src-js/Platform.scala --- sjsonnet/src-js/sjsonnet/Platform.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sjsonnet/src-js/sjsonnet/Platform.scala b/sjsonnet/src-js/sjsonnet/Platform.scala index 302bd7992..6337ffe11 100644 --- a/sjsonnet/src-js/sjsonnet/Platform.scala +++ b/sjsonnet/src-js/sjsonnet/Platform.scala @@ -50,7 +50,7 @@ object Platform { case null | None => ujson.Null case v: String if sn.tag == Tag.str && Yaml12OctalPattern.matcher(v).matches() && - !isQuotedScalar(sn, input) => + !isQuotedScalar(sn, input) => val negative = v.charAt(0) == '-' val octalPart = if (negative || v.charAt(0) == '+') v.substring(3) else v.substring(2) @@ -113,9 +113,9 @@ object Platform { parseManyYamls(preprocessed) match { case Right(documents) => documents.size match { - case 0 => ujson.Null + case 0 => ujson.Null case 1 if !hasExplicitDocStart => nodeToJson(documents.head, preprocessed) - case _ => + case _ => val buf = new mutable.ArrayBuffer[ujson.Value](documents.size) for (doc <- documents) { buf += nodeToJson(doc, preprocessed) From e06f12e10cf4cba714317afd09c13f6addda3b9a Mon Sep 17 00:00:00 2001 From: He-Pin Date: Thu, 18 Jun 2026 21:06:01 +0800 Subject: [PATCH 3/5] style: fix scalafmt in src-native/Platform.scala --- sjsonnet/src-native/sjsonnet/Platform.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sjsonnet/src-native/sjsonnet/Platform.scala b/sjsonnet/src-native/sjsonnet/Platform.scala index e74558a4f..2a9428cab 100644 --- a/sjsonnet/src-native/sjsonnet/Platform.scala +++ b/sjsonnet/src-native/sjsonnet/Platform.scala @@ -74,13 +74,13 @@ object Platform { case null | None => ujson.Null case v: String if sn.tag == Tag.str && Yaml12OctalPattern.matcher(v).matches() && - !isQuotedScalar(sn, input) => + !isQuotedScalar(sn, input) => val negative = v.charAt(0) == '-' val octalPart = if (negative || v.charAt(0) == '+') v.substring(3) else v.substring(2) val result = java.lang.Long.parseLong(octalPart, 8) ujson.Num((if (negative) -result else result).toDouble) - case v: String => + case v: String => ujson.read(s"\"${v.replace("\"", "\\\"").replace("\n", "\\n")}\"", false) case v: Boolean => ujson.Bool(v) case v: Byte => ujson.Num(v.toDouble) @@ -145,9 +145,9 @@ object Platform { parseManyYamls(preprocessed) match { case Right(documents) => documents.size match { - case 0 => ujson.Null + case 0 => ujson.Null case 1 if !hasExplicitDocStart => nodeToJson(documents.head, preprocessed) - case _ => + case _ => val buf = new mutable.ArrayBuffer[ujson.Value](documents.size) for (doc <- documents) { buf += nodeToJson(doc, preprocessed) From 5ec66300e85e2cace09bb93bc6a83e9c984442d8 Mon Sep 17 00:00:00 2001 From: He-Pin Date: Fri, 19 Jun 2026 01:42:07 +0800 Subject: [PATCH 4/5] fix: use parseLong for YAML numeric parsing and support underscores in 0o octal Motivation: The JVM implementation used parseUnsignedLong which diverges from JS/Native (parseLong), causing potential inconsistency for large values. The Yaml12OctalPattern regex also didn't match YAML numbers with underscores (e.g. 0o7_7_7), causing runtime errors on JVM. Additionally, +.inf was not handled in float parsing. Modification: - Replace parseUnsignedLong with parseLong in JVM octal/hex/binary parsing for consistency with JS/Native and go-jsonnet - Update Yaml12OctalPattern regex to accept underscores: [-+]?0o[0-7][0-7_]* - Strip underscores before parseLong in YAML 1.2 octal path - Add +.inf/+Inf/+INF handling in Tag.FLOAT branch Result: Consistent overflow behavior across JVM/JS/Native. 0o7_7_7 no longer causes runtime error. +.inf parses correctly as Infinity. --- sjsonnet/src-js/sjsonnet/Platform.scala | 4 ++-- sjsonnet/src-jvm/sjsonnet/Platform.scala | 15 ++++++++------- sjsonnet/src-native/sjsonnet/Platform.scala | 4 ++-- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/sjsonnet/src-js/sjsonnet/Platform.scala b/sjsonnet/src-js/sjsonnet/Platform.scala index 6337ffe11..964268064 100644 --- a/sjsonnet/src-js/sjsonnet/Platform.scala +++ b/sjsonnet/src-js/sjsonnet/Platform.scala @@ -41,7 +41,7 @@ object Platform { def isAsciiJsonSafe(s: String, from: Int, to: Int): Boolean = CharSWAR.isAsciiJsonSafe(s, from, to) - private val Yaml12OctalPattern = Pattern.compile("[-+]?0o[0-7]+") + private val Yaml12OctalPattern = Pattern.compile("[-+]?0o[0-7][0-7_]*") private def nodeToJson(node: Node, input: String): ujson.Value = node match { case sn: Node.ScalarNode => @@ -54,7 +54,7 @@ object Platform { val negative = v.charAt(0) == '-' val octalPart = if (negative || v.charAt(0) == '+') v.substring(3) else v.substring(2) - val result = java.lang.Long.parseLong(octalPart, 8) + val result = java.lang.Long.parseLong(octalPart.replace("_", ""), 8) ujson.Num((if (negative) -result else result).toDouble) case v: String => ujson.read(s"\"${v.replace("\"", "\\\"").replace("\n", "\\n")}\"", false) case v: Boolean => ujson.Bool(v) diff --git a/sjsonnet/src-jvm/sjsonnet/Platform.scala b/sjsonnet/src-jvm/sjsonnet/Platform.scala index e1fdc250a..d057cf19f 100644 --- a/sjsonnet/src-jvm/sjsonnet/Platform.scala +++ b/sjsonnet/src-jvm/sjsonnet/Platform.scala @@ -73,7 +73,7 @@ object Platform { xzBytes(s.getBytes(UTF_8), compressionLevel) } - private val Yaml12OctalPattern = java.util.regex.Pattern.compile("[-+]?0o[0-7]+") + private val Yaml12OctalPattern = java.util.regex.Pattern.compile("[-+]?0o[0-7][0-7_]*") private def yamlNodeToJson(node: Node): ujson.Value = node match { case sn: ScalarNode => @@ -85,7 +85,7 @@ object Platform { val negative = value.charAt(0) == '-' val octalPart = if (negative || value.charAt(0) == '+') value.substring(3) else value.substring(2) - val result = java.lang.Long.parseUnsignedLong(octalPart, 8) + val result = java.lang.Long.parseLong(octalPart.replace("_", ""), 8) val signed = if (negative) -result else result ujson.Num(signed.toDouble) } else if (tag == Tag.INT) { @@ -96,7 +96,7 @@ object Platform { val hex = if (negative || cleaned.startsWith("+")) cleaned.substring(3) else cleaned.substring(2) - val v = java.lang.Long.parseUnsignedLong(hex, 16) + val v = java.lang.Long.parseLong(hex, 16) if (negative) -v else v } else if ( cleaned.startsWith("0b") || cleaned.startsWith("-0b") || cleaned.startsWith("+0b") @@ -105,12 +105,12 @@ object Platform { val bin = if (negative || cleaned.startsWith("+")) cleaned.substring(3) else cleaned.substring(2) - val v = java.lang.Long.parseUnsignedLong(bin, 2) + val v = java.lang.Long.parseLong(bin, 2) if (negative) -v else v } else if (cleaned.length > 1 && cleaned.startsWith("0") && !cleaned.contains(".")) { val negative = cleaned.startsWith("-") val oct = if (negative || cleaned.startsWith("+")) cleaned.substring(1) else cleaned - val v = java.lang.Long.parseUnsignedLong(oct, 8) + val v = java.lang.Long.parseLong(oct, 8) if (negative) -v else v } else if (cleaned.contains(":")) { val parts = cleaned.split(":") @@ -122,8 +122,9 @@ object Platform { } else if (tag == Tag.FLOAT) { val cleaned = value.replace("_", "") val result = cleaned match { - case ".inf" | ".Inf" | ".INF" => Double.PositiveInfinity - case "-.inf" | "-.Inf" | "-.INF" => Double.NegativeInfinity + case ".inf" | ".Inf" | ".INF" => Double.PositiveInfinity + case "+.inf" | "+.Inf" | "+.INF" => Double.PositiveInfinity + case "-.inf" | "-.Inf" | "-.INF" => Double.NegativeInfinity case ".nan" | ".NaN" | ".NAN" => Double.NaN case s if s.contains(":") => s.split(":").foldLeft(0.0)((acc, p) => acc * 60 + p.trim.toDouble) diff --git a/sjsonnet/src-native/sjsonnet/Platform.scala b/sjsonnet/src-native/sjsonnet/Platform.scala index 2a9428cab..08756ae47 100644 --- a/sjsonnet/src-native/sjsonnet/Platform.scala +++ b/sjsonnet/src-native/sjsonnet/Platform.scala @@ -64,7 +64,7 @@ object Platform { throw new Exception("XZ not implemented in Scala Native") } - private val Yaml12OctalPattern = Pattern.compile("[-+]?0o[0-7]+") + private val Yaml12OctalPattern = Pattern.compile("[-+]?0o[0-7][0-7_]*") private def nodeToJson(node: Node, input: String): ujson.Value = node match { case sn: Node.ScalarNode => @@ -78,7 +78,7 @@ object Platform { val negative = v.charAt(0) == '-' val octalPart = if (negative || v.charAt(0) == '+') v.substring(3) else v.substring(2) - val result = java.lang.Long.parseLong(octalPart, 8) + val result = java.lang.Long.parseLong(octalPart.replace("_", ""), 8) ujson.Num((if (negative) -result else result).toDouble) case v: String => ujson.read(s"\"${v.replace("\"", "\\\"").replace("\n", "\\n")}\"", false) From 98c4b340d20e0835dd9c02266cdc4540c4690031 Mon Sep 17 00:00:00 2001 From: He-Pin Date: Fri, 19 Jun 2026 01:58:46 +0800 Subject: [PATCH 5/5] style: fix scalafmt in Platform.scala float parsing alignment --- sjsonnet/src-jvm/sjsonnet/Platform.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sjsonnet/src-jvm/sjsonnet/Platform.scala b/sjsonnet/src-jvm/sjsonnet/Platform.scala index d057cf19f..f973016cc 100644 --- a/sjsonnet/src-jvm/sjsonnet/Platform.scala +++ b/sjsonnet/src-jvm/sjsonnet/Platform.scala @@ -122,9 +122,9 @@ object Platform { } else if (tag == Tag.FLOAT) { val cleaned = value.replace("_", "") val result = cleaned match { - case ".inf" | ".Inf" | ".INF" => Double.PositiveInfinity - case "+.inf" | "+.Inf" | "+.INF" => Double.PositiveInfinity - case "-.inf" | "-.Inf" | "-.INF" => Double.NegativeInfinity + case ".inf" | ".Inf" | ".INF" => Double.PositiveInfinity + case "+.inf" | "+.Inf" | "+.INF" => Double.PositiveInfinity + case "-.inf" | "-.Inf" | "-.INF" => Double.NegativeInfinity case ".nan" | ".NaN" | ".NAN" => Double.NaN case s if s.contains(":") => s.split(":").foldLeft(0.0)((acc, p) => acc * 60 + p.trim.toDouble)