diff --git a/sjsonnet/src-js/sjsonnet/Platform.scala b/sjsonnet/src-js/sjsonnet/Platform.scala index 5d15694b..96426806 100644 --- a/sjsonnet/src-js/sjsonnet/Platform.scala +++ b/sjsonnet/src-js/sjsonnet/Platform.scala @@ -41,10 +41,21 @@ object Platform { def isAsciiJsonSafe(s: String, from: Int, to: Int): Boolean = CharSWAR.isAsciiJsonSafe(s, from, to) - private def nodeToJson(node: Node): ujson.Value = node match { - case _: Node.ScalarNode => - YamlDecoder.forAny.construct(node).getOrElse("") match { + private val Yaml12OctalPattern = Pattern.compile("[-+]?0o[0-7][0-7_]*") + + private def nodeToJson(node: Node, input: String): ujson.Value = node match { + case sn: Node.ScalarNode => + val constructed = YamlDecoder.forAny.construct(sn).getOrElse("") + constructed match { case null | None => ujson.Null + case v: String + if sn.tag == Tag.str && Yaml12OctalPattern.matcher(v).matches() && + !isQuotedScalar(sn, input) => + val negative = v.charAt(0) == '-' + val octalPart = + if (negative || v.charAt(0) == '+') v.substring(3) else v.substring(2) + val result = java.lang.Long.parseLong(octalPart.replace("_", ""), 8) + ujson.Num((if (negative) -result else result).toDouble) case v: String => ujson.read(s"\"${v.replace("\"", "\\\"").replace("\n", "\\n")}\"", false) case v: Boolean => ujson.Bool(v) case v: Int => ujson.Num(v.toDouble) @@ -59,7 +70,7 @@ object Platform { case Node.SequenceNode(nodes, _) => val buf = new mutable.ArrayBuffer[ujson.Value](nodes.size) for (n <- nodes) { - buf += nodeToJson(n) + buf += nodeToJson(n, input) } ujson.Arr(buf) case Node.MappingNode(mappings, _) => @@ -67,7 +78,7 @@ object Platform { buf.sizeHint(mappings.size) for ((key, value) <- mappings) { key match { - case Node.ScalarNode(k, _) => buf(k) = nodeToJson(value) + case Node.ScalarNode(k, _) => buf(k) = nodeToJson(value, input) case _ => Error.fail("Invalid YAML mapping key class: " + key.getClass.getSimpleName) } } @@ -76,6 +87,20 @@ object Platform { Error.fail("Unsupported YAML node type: " + node.getClass.getSimpleName) } + private def isQuotedScalar(sn: Node.ScalarNode, input: String): Boolean = { + sn.pos match { + case Some(range) => + val offset = range.start.offset + offset >= 0 && offset < input.length && { + val c = input.charAt(offset) + c == '"' || c == '\'' + } + case None => false + } + } + + private val YamlDocStartPattern = Pattern.compile("\\A\\s*---(?:[ \\t\\n\\r]|\\z)") + def yamlToJson(s: String): ujson.Value = { if (s.trim.isEmpty) return ujson.Null @@ -83,16 +108,17 @@ object Platform { // since scala-yaml's parseManyYamls can't handle empty documents // (DocumentStart immediately followed by DocumentEnd). val preprocessed = addExplicitNullsForEmptyDocs(s) + val hasExplicitDocStart = YamlDocStartPattern.matcher(s).find() parseManyYamls(preprocessed) match { case Right(documents) => documents.size match { - case 0 => ujson.Null - case 1 => nodeToJson(documents.head) - case _ => + case 0 => ujson.Null + case 1 if !hasExplicitDocStart => nodeToJson(documents.head, preprocessed) + case _ => val buf = new mutable.ArrayBuffer[ujson.Value](documents.size) for (doc <- documents) { - buf += nodeToJson(doc) + buf += nodeToJson(doc, preprocessed) } ujson.Arr(buf) } diff --git a/sjsonnet/src-jvm/sjsonnet/Platform.scala b/sjsonnet/src-jvm/sjsonnet/Platform.scala index 4ba3e07b..f973016c 100644 --- a/sjsonnet/src-jvm/sjsonnet/Platform.scala +++ b/sjsonnet/src-jvm/sjsonnet/Platform.scala @@ -10,8 +10,8 @@ import com.google.re2j.Pattern import net.jpountz.xxhash.{StreamingXXHash64, XXHashFactory} import org.tukaani.xz.LZMA2Options import org.tukaani.xz.XZOutputStream -import org.yaml.snakeyaml.{LoaderOptions, Yaml} -import org.yaml.snakeyaml.constructor.SafeConstructor +import org.yaml.snakeyaml.{DumperOptions, LoaderOptions, Yaml} +import org.yaml.snakeyaml.nodes.{MappingNode, Node, ScalarNode, SequenceNode, Tag} import scala.annotation.nowarn import scala.collection.compat.* @@ -73,48 +73,114 @@ object Platform { xzBytes(s.getBytes(UTF_8), compressionLevel) } - private def nodeToJson(node: Any): ujson.Value = node match { - case m: java.util.List[?] => - val buf = new mutable.ArrayBuffer[ujson.Value](m.size) - for (n <- m.asScala) { - buf += nodeToJson(n) + private val Yaml12OctalPattern = java.util.regex.Pattern.compile("[-+]?0o[0-7][0-7_]*") + + private def yamlNodeToJson(node: Node): ujson.Value = node match { + case sn: ScalarNode => + val value = sn.getValue + val tag = sn.getTag + val isPlain = sn.getScalarStyle == DumperOptions.ScalarStyle.PLAIN + + if (isPlain && Yaml12OctalPattern.matcher(value).matches()) { + val negative = value.charAt(0) == '-' + val octalPart = + if (negative || value.charAt(0) == '+') value.substring(3) else value.substring(2) + val result = java.lang.Long.parseLong(octalPart.replace("_", ""), 8) + val signed = if (negative) -result else result + ujson.Num(signed.toDouble) + } else if (tag == Tag.INT) { + val cleaned = value.replace("_", "") + val result: Long = + if (cleaned.startsWith("0x") || cleaned.startsWith("-0x") || cleaned.startsWith("+0x")) { + val negative = cleaned.startsWith("-") + val hex = + if (negative || cleaned.startsWith("+")) cleaned.substring(3) + else cleaned.substring(2) + val v = java.lang.Long.parseLong(hex, 16) + if (negative) -v else v + } else if ( + cleaned.startsWith("0b") || cleaned.startsWith("-0b") || cleaned.startsWith("+0b") + ) { + val negative = cleaned.startsWith("-") + val bin = + if (negative || cleaned.startsWith("+")) cleaned.substring(3) + else cleaned.substring(2) + val v = java.lang.Long.parseLong(bin, 2) + if (negative) -v else v + } else if (cleaned.length > 1 && cleaned.startsWith("0") && !cleaned.contains(".")) { + val negative = cleaned.startsWith("-") + val oct = if (negative || cleaned.startsWith("+")) cleaned.substring(1) else cleaned + val v = java.lang.Long.parseLong(oct, 8) + if (negative) -v else v + } else if (cleaned.contains(":")) { + val parts = cleaned.split(":") + parts.foldLeft(0L)((acc, p) => acc * 60 + p.trim.toLong) + } else { + cleaned.toLong + } + ujson.Num(result.toDouble) + } else if (tag == Tag.FLOAT) { + val cleaned = value.replace("_", "") + val result = cleaned match { + case ".inf" | ".Inf" | ".INF" => Double.PositiveInfinity + case "+.inf" | "+.Inf" | "+.INF" => Double.PositiveInfinity + case "-.inf" | "-.Inf" | "-.INF" => Double.NegativeInfinity + case ".nan" | ".NaN" | ".NAN" => Double.NaN + case s if s.contains(":") => + s.split(":").foldLeft(0.0)((acc, p) => acc * 60 + p.trim.toDouble) + case s => s.toDouble + } + ujson.Num(result) + } else if (tag == Tag.BOOL) { + ujson.Bool(value.toLowerCase match { + case "true" | "yes" | "on" => true + case "false" | "no" | "off" => false + case _ => Error.fail("Invalid YAML boolean: " + value) + }) + } else if (tag == Tag.NULL) { + ujson.Null + } else { + ujson.Str(value) } - ujson.Arr(buf) - case m: java.util.Map[?, ?] => + + case mn: MappingNode => val buf = upickle.core.LinkedHashMap[String, ujson.Value]() - buf.sizeHint(m.size) - for ((key, value) <- m.asScala) { - key match { - case k: String => buf(k) = nodeToJson(value) - case _ => Error.fail("Invalid YAML mapping key class: " + key.getClass.getSimpleName) + buf.sizeHint(mn.getValue.size) + for (tuple <- mn.getValue.asScala) { + val key = tuple.getKeyNode match { + case sn: ScalarNode => sn.getValue + case other => Error.fail("Invalid YAML mapping key type: " + other.getTag) } + buf(key) = yamlNodeToJson(tuple.getValueNode) } ujson.Obj(buf) - case null => ujson.Null - case v: String => ujson.Str(v) - case v: Boolean => ujson.Bool(v) - case v: Int => ujson.Num(v.toDouble) - case v: Long => ujson.Num(v.toDouble) - case v: Double => ujson.Num(v) - case v: Float => ujson.Num(v.toDouble) - case v: BigDecimal => ujson.Num(v.toDouble) - case v: BigInt => ujson.Num(v.toDouble) - case v: Short => ujson.Num(v.toDouble) - case _ => + + case sn: SequenceNode => + val buf = new mutable.ArrayBuffer[ujson.Value](sn.getValue.size) + for (n <- sn.getValue.asScala) { + buf += yamlNodeToJson(n) + } + ujson.Arr(buf) + + case _ => Error.fail("Unsupported YAML node type: " + node.getClass.getSimpleName) } + private val YamlDocStartPattern = + java.util.regex.Pattern.compile("\\A\\s*---(?:[ \\t\\n\\r]|\\z)") + def yamlToJson(yamlString: String): ujson.Value = { try { - val yaml = - new Yaml(new SafeConstructor(new LoaderOptions())).loadAll(yamlString).asScala.toSeq - yaml.size match { - case 0 => ujson.Null - case 1 => nodeToJson(yaml.head) - case _ => - val buf = new mutable.ArrayBuffer[ujson.Value](yaml.size) - for (doc <- yaml) { - buf += nodeToJson(doc) + val yaml = new Yaml(new LoaderOptions()) + val docs = yaml.composeAll(new java.io.StringReader(yamlString)).asScala.toSeq + val hasExplicitDocStart = YamlDocStartPattern.matcher(yamlString).find() + docs.size match { + case 0 => ujson.Null + case 1 if !hasExplicitDocStart => yamlNodeToJson(docs.head) + case _ => + val buf = new mutable.ArrayBuffer[ujson.Value](docs.size) + for (doc <- docs) { + buf += yamlNodeToJson(doc) } ujson.Arr(buf) } diff --git a/sjsonnet/src-native/sjsonnet/Platform.scala b/sjsonnet/src-native/sjsonnet/Platform.scala index ab8f580e..08756ae4 100644 --- a/sjsonnet/src-native/sjsonnet/Platform.scala +++ b/sjsonnet/src-native/sjsonnet/Platform.scala @@ -64,13 +64,23 @@ object Platform { throw new Exception("XZ not implemented in Scala Native") } - private def nodeToJson(node: Node): ujson.Value = node match { - case _: Node.ScalarNode => - YamlDecoder.forAny.construct(node) match { + private val Yaml12OctalPattern = Pattern.compile("[-+]?0o[0-7][0-7_]*") + + private def nodeToJson(node: Node, input: String): ujson.Value = node match { + case sn: Node.ScalarNode => + YamlDecoder.forAny.construct(sn) match { case Right(v) => v match { case null | None => ujson.Null - case v: String => + case v: String + if sn.tag == Tag.str && Yaml12OctalPattern.matcher(v).matches() && + !isQuotedScalar(sn, input) => + val negative = v.charAt(0) == '-' + val octalPart = + if (negative || v.charAt(0) == '+') v.substring(3) else v.substring(2) + val result = java.lang.Long.parseLong(octalPart.replace("_", ""), 8) + ujson.Num((if (negative) -result else result).toDouble) + case v: String => ujson.read(s"\"${v.replace("\"", "\\\"").replace("\n", "\\n")}\"", false) case v: Boolean => ujson.Bool(v) case v: Byte => ujson.Num(v.toDouble) @@ -92,7 +102,7 @@ object Platform { case Node.SequenceNode(nodes, _) => val buf = new mutable.ArrayBuffer[ujson.Value](nodes.size) for (n <- nodes) { - buf += nodeToJson(n) + buf += nodeToJson(n, input) } ujson.Arr(buf) case Node.MappingNode(mappings, _) => @@ -100,7 +110,7 @@ object Platform { buf.sizeHint(mappings.size) for ((key, value) <- mappings) { key match { - case Node.ScalarNode(k, _) => buf(k) = nodeToJson(value) + case Node.ScalarNode(k, _) => buf(k) = nodeToJson(value, input) case _ => Error.fail("Invalid YAML mapping key class: " + key.getClass.getSimpleName) } } @@ -109,6 +119,20 @@ object Platform { Error.fail("Unsupported YAML node type: " + node.getClass.getSimpleName) } + private def isQuotedScalar(sn: Node.ScalarNode, input: String): Boolean = { + sn.pos match { + case Some(range) => + val offset = range.start.offset + offset >= 0 && offset < input.length && { + val c = input.charAt(offset) + c == '"' || c == '\'' + } + case None => false + } + } + + private val YamlDocStartPattern = Pattern.compile("\\A\\s*---(?:[ \\t\\n\\r]|\\z)") + def yamlToJson(s: String): ujson.Value = { if (s.trim.isEmpty) return ujson.Null @@ -116,16 +140,17 @@ object Platform { // since scala-yaml's parseManyYamls can't handle empty documents // (DocumentStart immediately followed by DocumentEnd). val preprocessed = addExplicitNullsForEmptyDocs(s) + val hasExplicitDocStart = YamlDocStartPattern.matcher(s).find() parseManyYamls(preprocessed) match { case Right(documents) => documents.size match { - case 0 => ujson.Null - case 1 => nodeToJson(documents.head) - case _ => + case 0 => ujson.Null + case 1 if !hasExplicitDocStart => nodeToJson(documents.head, preprocessed) + case _ => val buf = new mutable.ArrayBuffer[ujson.Value](documents.size) for (doc <- documents) { - buf += nodeToJson(doc) + buf += nodeToJson(doc, preprocessed) } ujson.Arr(buf) } diff --git a/sjsonnet/test/resources/go_test_suite/parseYaml.jsonnet.golden b/sjsonnet/test/resources/go_test_suite/parseYaml.jsonnet.golden index 6082f337..a5eb13dd 100644 --- a/sjsonnet/test/resources/go_test_suite/parseYaml.jsonnet.golden +++ b/sjsonnet/test/resources/go_test_suite/parseYaml.jsonnet.golden @@ -52,5 +52,7 @@ null, 2 ], - null + [ + null + ] ] diff --git a/sjsonnet/test/resources/new_test_suite/parseyaml_doc_marker.jsonnet b/sjsonnet/test/resources/new_test_suite/parseyaml_doc_marker.jsonnet new file mode 100644 index 00000000..506bac3a --- /dev/null +++ b/sjsonnet/test/resources/new_test_suite/parseyaml_doc_marker.jsonnet @@ -0,0 +1,9 @@ +// Test that explicit --- document start markers cause single-doc YAML +// to be wrapped in an array, matching go-jsonnet behavior. +std.assertEqual(std.parseYaml("---"), [null]) && +std.assertEqual(std.parseYaml("---\n"), [null]) && +std.assertEqual(std.parseYaml("---\na: 1"), [{a: 1}]) && +std.assertEqual(std.parseYaml("--- 3\n"), [3]) && +std.assertEqual(std.parseYaml("---a: 1"), {"---a": 1}) && +std.assertEqual(std.parseYaml("a: 1"), {a: 1}) && +true diff --git a/sjsonnet/test/resources/new_test_suite/parseyaml_doc_marker.jsonnet.golden b/sjsonnet/test/resources/new_test_suite/parseyaml_doc_marker.jsonnet.golden new file mode 100644 index 00000000..27ba77dd --- /dev/null +++ b/sjsonnet/test/resources/new_test_suite/parseyaml_doc_marker.jsonnet.golden @@ -0,0 +1 @@ +true diff --git a/sjsonnet/test/resources/new_test_suite/parseyaml_yaml12_octal.jsonnet b/sjsonnet/test/resources/new_test_suite/parseyaml_yaml12_octal.jsonnet new file mode 100644 index 00000000..e40a6d0f --- /dev/null +++ b/sjsonnet/test/resources/new_test_suite/parseyaml_yaml12_octal.jsonnet @@ -0,0 +1,20 @@ +// Test YAML 1.2 modern octal syntax (0o prefix) for unquoted scalars. +// Quoted values must remain strings. Legacy octal (0 prefix) still works. +local yaml = std.parseYaml(||| + a: 0777 + b: 0o777 + c: 0 + d: 0o10 + e: -0o777 + f: "0o777" + g: '0o777' +|||); + +std.assertEqual(yaml.a, 511) && +std.assertEqual(yaml.b, 511) && +std.assertEqual(yaml.c, 0) && +std.assertEqual(yaml.d, 8) && +std.assertEqual(yaml.e, -511) && +std.assertEqual(yaml.f, "0o777") && +std.assertEqual(yaml.g, "0o777") && +true diff --git a/sjsonnet/test/resources/new_test_suite/parseyaml_yaml12_octal.jsonnet.golden b/sjsonnet/test/resources/new_test_suite/parseyaml_yaml12_octal.jsonnet.golden new file mode 100644 index 00000000..27ba77dd --- /dev/null +++ b/sjsonnet/test/resources/new_test_suite/parseyaml_yaml12_octal.jsonnet.golden @@ -0,0 +1 @@ +true diff --git a/sjsonnet/test/src/sjsonnet/ParseYamlTests.scala b/sjsonnet/test/src/sjsonnet/ParseYamlTests.scala index 880bc285..bf9be541 100644 --- a/sjsonnet/test/src/sjsonnet/ParseYamlTests.scala +++ b/sjsonnet/test/src/sjsonnet/ParseYamlTests.scala @@ -46,12 +46,12 @@ object ParseYamlTests extends TestSuite { } test { // Scalar documents can start on the same line as the document-start marker - // "--- 3" as standalone - eval("std.parseYaml('--- 3\\n')") ==> ujson.Value("""3""") + // "--- 3" as standalone (explicit doc start → always array) + eval("std.parseYaml('--- 3\\n')") ==> ujson.Value("""[3]""") } test { - // Folded scalar as document - eval("std.parseYaml('--- >\\n hello\\n world\\n')") ==> ujson.Value(""""hello world\n"""") + // Folded scalar as document (explicit doc start → always array) + eval("std.parseYaml('--- >\\n hello\\n world\\n')") ==> ujson.Value("""["hello world\n"]""") } test { // Combined: scalar docs on same line as marker @@ -66,8 +66,8 @@ object ParseYamlTests extends TestSuite { ) } test { - // Bare document separator - eval("""std.parseYaml("---")""") ==> ujson.Value("""null""") + // Bare document separator → explicit doc start, always returns array + eval("""std.parseYaml("---")""") ==> ujson.Value("""[null]""") } test { // Folded scalar without document marker (directly)