From 6e47a409914d9f2ccb169482252f142c9fb503b8 Mon Sep 17 00:00:00 2001 From: adonovan Date: Mon, 19 Oct 2020 15:16:15 -0700 Subject: [PATCH] starlark/lib/json: a JSON module for Starlark This CL ports the go.starlark.net/starlarkjson module from Go to Java. The json module provides four functions: json.decode json.encode json.indent (not yet implemented) json.encode_indent (not yet implemented) It is tested through eval.ScriptTest, which adds the json module to its environment, along with 'struct', a simple struct-like type. Some tests are commented out, awaiting StarlarkFloat, or richer string support. This is a first step towards removing Bazel's struct.to_json. Updates bazelbuild/starlark#83 PiperOrigin-RevId: 337944489 --- src/BUILD | 1 + .../java/net/starlark/java/lib/json/BUILD | 21 + .../java/net/starlark/java/lib/json/Json.java | 618 ++++++++++++++++++ src/test/java/net/starlark/java/eval/BUILD | 3 + .../net/starlark/java/eval/ScriptTest.java | 50 ++ .../net/starlark/java/eval/testdata/json.sky | 238 +++++++ 6 files changed, 931 insertions(+) create mode 100644 src/main/java/net/starlark/java/lib/json/BUILD create mode 100644 src/main/java/net/starlark/java/lib/json/Json.java create mode 100644 src/test/java/net/starlark/java/eval/testdata/json.sky diff --git a/src/BUILD b/src/BUILD index fe7bb28cc5a23b..88acc042fcec9f 100644 --- a/src/BUILD +++ b/src/BUILD @@ -449,6 +449,7 @@ filegroup( "//src/main/java/com/google/devtools/common/options:srcs", "//src/main/java/net/starlark/java/cmd:srcs", "//src/main/java/net/starlark/java/spelling:srcs", + "//src/main/java/net/starlark/java/lib/json:srcs", "//src/main/native:srcs", "//src/main/protobuf:srcs", "//src/main/tools:srcs", diff --git a/src/main/java/net/starlark/java/lib/json/BUILD b/src/main/java/net/starlark/java/lib/json/BUILD new file mode 100644 index 00000000000000..bb76f94bebea5f --- /dev/null +++ b/src/main/java/net/starlark/java/lib/json/BUILD @@ -0,0 +1,21 @@ +load("@rules_java//java:defs.bzl", "java_library") + +licenses(["notice"]) + +filegroup( + name = "srcs", + srcs = glob(["**"]), + visibility = ["//src:__subpackages__"], +) + +# Starlark json module +java_library( + name = "json", + srcs = ["Json.java"], + visibility = ["//src/main/java/net/starlark/java:clients"], + deps = [ + "//src/main/java/net/starlark/java/annot", + "//src/main/java/net/starlark/java/eval", + "//src/main/java/net/starlark/java/syntax", + ], +) diff --git a/src/main/java/net/starlark/java/lib/json/Json.java b/src/main/java/net/starlark/java/lib/json/Json.java new file mode 100644 index 00000000000000..a5fd566c8d65f5 --- /dev/null +++ b/src/main/java/net/starlark/java/lib/json/Json.java @@ -0,0 +1,618 @@ +// Copyright 2020 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package net.starlark.java.lib.json; + +import java.util.Arrays; +import java.util.Map; +import net.starlark.java.annot.Param; +import net.starlark.java.annot.StarlarkBuiltin; +import net.starlark.java.annot.StarlarkMethod; +import net.starlark.java.eval.ClassObject; +import net.starlark.java.eval.Dict; +import net.starlark.java.eval.EvalException; +import net.starlark.java.eval.Mutability; +import net.starlark.java.eval.Starlark; +import net.starlark.java.eval.StarlarkInt; +import net.starlark.java.eval.StarlarkIterable; +import net.starlark.java.eval.StarlarkList; +import net.starlark.java.eval.StarlarkThread; +import net.starlark.java.eval.StarlarkValue; +import net.starlark.java.syntax.Location; + +// Tests at //src/test/java/net/starlark/java/eval:testdata/json.sky + +/** + * Json defines the Starlark {@code json} module, which provides functions for encoding/decoding + * Starlark values as JSON (https://tools.ietf.org/html/rfc8259). + */ +@StarlarkBuiltin( + name = "json", + category = "core.lib", + doc = "Module json is a Starlark module of JSON-related functions.") +public final class Json implements StarlarkValue { + + private Json() {} + + /** Call {@code Starlark.addModule(env, Json.INSTANCE)} to add json to the environment. */ + public static final Json INSTANCE = new Json(); + + /** An interface for StarlarkValue subclasses to define their own JSON encoding. */ + public interface Encodable { + String encodeJSON(); + } + + /** + * Encodes a Starlark value as JSON. + * + *

An application-defined subclass of StarlarkValue may define its own JSON encoding by + * implementing the {@link Encodable} interface. Otherwise, the encoder tests for the {@link Map}, + * {@link StarlarkIterable}, and {@link ClassObject} interfaces, in that order, resulting in + * dict-like, list-like, and struct-like encoding, respectively. See the Starlark documentation + * annotation for more detail. + * + *

Encoding any other value yields an error. + */ + @StarlarkMethod( + name = "encode", + doc = + "

The encode function accepts one required positional argument, which it converts to" + + " JSON by cases:\n" + + "

\n" + + "An application-defined type may define its own JSON encoding.\n" + + "Encoding any other value yields an error.\n", + parameters = {@Param(name = "x")}) + public String encode(Object x) throws EvalException { + Encoder enc = new Encoder(); + try { + enc.encode(x); + } catch (StackOverflowError unused) { + throw Starlark.errorf("nesting depth limit exceeded"); + } + return enc.out.toString(); + } + + private static final class Encoder { + + private final StringBuilder out = new StringBuilder(); + + private void encode(Object x) throws EvalException { + if (x == Starlark.NONE) { + out.append("null"); + return; + } + + if (x instanceof String) { + appendQuoted((String) x); + return; + } + + if (x instanceof Boolean || x instanceof StarlarkInt) { + out.append(x); + return; + } + + // if (x instanceof StarlarkFloat) { + // if (!Double.isFinite(((StarlarkFloat) x).toDouble())) { + // throw Starlark.errorf("cannot encode non-finite float %s", x); + // } + // out.append(x.toString()); // always contains a decimal point or exponent + // return; + // } + + if (x instanceof Encodable) { + // Application-defined Starlark value types + // may define their own JSON encoding. + out.append(((Encodable) x).encodeJSON()); + return; + } + + // e.g. dict (must have string keys) + if (x instanceof Map) { + Map m = (Map) x; + + // Sort keys for determinism. + Object[] keys = m.keySet().toArray(); + for (Object key : keys) { + if (!(key instanceof String)) { + throw Starlark.errorf( + "%s has %s key, want string", Starlark.type(x), Starlark.type(key)); + } + } + Arrays.sort(keys); + + // emit object + out.append('{'); + String sep = ""; + for (Object key : keys) { + out.append(sep); + sep = ","; + appendQuoted((String) key); + out.append(':'); + try { + encode(m.get(key)); + } catch (EvalException ex) { + throw Starlark.errorf( + "in %s key %s: %s", Starlark.type(x), Starlark.repr(key), ex.getMessage()); + } + } + out.append('}'); + return; + } + + // e.g. tuple, list + if (x instanceof StarlarkIterable) { + out.append('['); + String sep = ""; + int i = 0; + for (Object elem : (StarlarkIterable) x) { + out.append(sep); + sep = ","; + try { + encode(elem); + } catch (EvalException ex) { + throw Starlark.errorf("at %s index %d: %s", Starlark.type(x), i, ex.getMessage()); + } + i++; + } + out.append(']'); + return; + } + + // e.g. struct + if (x instanceof ClassObject) { + ClassObject obj = (ClassObject) x; + + // Sort keys for determinism. + String[] fields = obj.getFieldNames().toArray(new String[0]); + Arrays.sort(fields); + + out.append('{'); + String sep = ""; + for (String field : fields) { + out.append(sep); + sep = ","; + appendQuoted(field); + out.append(":"); + try { + Object v = obj.getValue(field); // may fail (field not defined) + encode(v); // may fail (unexpected type) + } catch (EvalException ex) { + throw Starlark.errorf("in %s field .%s: %s", Starlark.type(x), field, ex.getMessage()); + } + } + out.append('}'); + return; + } + + throw Starlark.errorf("cannot encode %s as JSON", Starlark.type(x)); + } + + private void appendQuoted(String s) { + // We use String's code point iterator so that we can map + // unpaired surrogates to U+FFFD in the output. + // TODO(adonovan): if we ever get an isPrintable(codepoint) + // function, use uXXXX escapes for non-printables. + out.append('"'); + for (int i = 0, n = s.length(); i < n; ) { + int cp = s.codePointAt(i); + + // ASCII control code? + if (cp < 0x20) { + switch (cp) { + case '\b': + out.append("\\b"); + break; + case '\f': + out.append("\\f"); + break; + case '\n': + out.append("\\n"); + break; + case '\r': + out.append("\\r"); + break; + case '\t': + out.append("\\t"); + break; + default: + out.append("\\u00"); + out.append(HEX[(cp >> 4) & 0xf]); + out.append(HEX[cp & 0xf]); + } + i++; + continue; + } + + // printable ASCII (or DEL 0x7f)? (common case) + if (cp < 0x80) { + if (cp == '"' || cp == '\\') { + out.append('\\'); + } + out.append((char) cp); + i++; + continue; + } + + // non-ASCII + if (Character.MIN_SURROGATE <= cp && cp <= Character.MAX_SURROGATE) { + cp = 0xFFFD; // unpaired surrogate + } + out.appendCodePoint(cp); + i += Character.charCount(cp); + } + out.append('"'); + } + } + + private static final char[] HEX = "0123456789abcdef".toCharArray(); + + /** Parses a JSON string as a Starlark value. */ + @StarlarkMethod( + name = "decode", + doc = + "The decode function accepts one positional parameter, a JSON string.\n" + + "It returns the Starlark value that the string denotes.\n" + + "\n" + + "Decoding fails if x is not a valid JSON encoding.\n", + parameters = {@Param(name = "x")}, + useStarlarkThread = true) + public Object decode(String x, StarlarkThread thread) throws EvalException { + return new Decoder(thread.mutability(), x).decode(); + } + + private static final class Decoder { + + // The decoder necessarily makes certain representation choices + // such as list vs tuple, struct vs dict, int vs float. + // In principle, we could parameterize it to allow the caller to + // control the returned types, but there's no compelling need yet. + + private final Mutability mu; + private final String s; // the input string + private int i = 0; // current index in s + + private Decoder(Mutability mu, String s) { + this.mu = mu; + this.s = s; + } + + // decode is the entry point into the decoder. + private Object decode() throws EvalException { + try { + Object x = parse(); + if (skipSpace()) { + throw Starlark.errorf("unexpected character %s after value", quoteChar(s.charAt(i))); + } + return x; + } catch (StackOverflowError unused) { + throw Starlark.errorf("nesting depth limit exceeded"); + } catch (EvalException ex) { + throw Starlark.errorf("at offset %d, %s", i, ex.getMessage()); + } + } + + // Returns a Starlark string literal that denotes c. + private static String quoteChar(char c) { + return Starlark.repr("" + c); + } + + // parse returns the next JSON value from the input. + // It consumes leading but not trailing whitespace. + private Object parse() throws EvalException { + char c = next(); + switch (c) { + case '"': + return parseString(); + + case 'n': + if (s.startsWith("null", i)) { + i += "null".length(); + return Starlark.NONE; + } + break; + + case 't': + if (s.startsWith("true", i)) { + i += "true".length(); + return true; + } + break; + + case 'f': + if (s.startsWith("false", i)) { + i += "false".length(); + return false; + } + break; + + case '[': + // array + StarlarkList list = StarlarkList.newList(mu); + + i++; // '[' + c = next(); + if (c != ']') { + while (true) { + Object elem = parse(); + list.add(elem, (Location) null); // can't fail + c = next(); + if (c != ',') { + if (c != ']') { + throw Starlark.errorf("got %s, want ',' or ']'", quoteChar(c)); + } + break; + } + i++; // ',' + } + } + i++; // ']' + return list; + + case '{': + // object + Dict dict = Dict.of(mu); + + i++; // '{' + c = next(); + if (c != '}') { + while (true) { + Object key = parse(); + if (!(key instanceof String)) { + throw Starlark.errorf("got %s for object key, want string", Starlark.type(key)); + } + c = next(); + if (c != ':') { + throw Starlark.errorf("after object key, got %s, want ':' ", quoteChar(c)); + } + i++; // ':' + Object value = parse(); + int sz = dict.size(); + dict.put((String) key, value, (Location) null); // can't fail + if (dict.size() == sz) { + throw Starlark.errorf("object has duplicate key: %s", Starlark.repr(key)); + } + c = next(); + if (c != ',') { + if (c != '}') { + throw Starlark.errorf("in object, got %s, want ',' or '}'", quoteChar(c)); + } + break; + } + i++; // ',' + } + } + i++; // '}' + return dict; + + default: + // number? + if (isdigit(c) || c == '-') { + return parseNumber(c); + } + break; + } + throw Starlark.errorf("unexpected character %s", quoteChar(c)); + } + + private String parseString() throws EvalException { + i++; // '"' + StringBuilder str = new StringBuilder(); + while (i < s.length()) { + char c = s.charAt(i); + + // end quote? + if (c == '"') { + i++; // skip '"' + return str.toString(); + } + + // literal char? + if (c != '\\') { + // reject unescaped control codes + if (c <= 0x1F) { + throw Starlark.errorf("invalid character '\\x%02x' in string literal", (int) c); + } + i++; // consume + str.append(c); + continue; + } + + // escape: uXXXX or [\/bfnrt"] + i++; // '\\' + if (i == s.length()) { + throw Starlark.errorf("incomplete escape"); + } + c = s.charAt(i); + i++; // consume c + switch (c) { + case '\\': + case '/': + case '"': + str.append(c); + break; + case 'b': + str.append('\b'); + break; + case 'f': + str.append('\f'); + break; + case 'n': + str.append('\n'); + break; + case 'r': + str.append('\r'); + break; + case 't': + str.append('\t'); + break; + case 'u': // \ uXXXX + if (i + 4 >= s.length()) { + throw Starlark.errorf("incomplete \\uXXXX escape"); + } + int hex = 0; + for (int j = 0; j < 4; j++) { + c = s.charAt(i + j); + int nybble = 0; + if (isdigit(c)) { + nybble = c - '0'; + } else if ('a' <= c && c <= 'f') { + nybble = 10 + c - 'a'; + } else if ('A' <= c && c <= 'F') { + nybble = 10 + c - 'A'; + } else { + throw Starlark.errorf("invalid hex char %s in \\uXXXX escape", quoteChar(c)); + } + hex = (hex << 4) | nybble; + } + str.append((char) hex); + i += 4; + break; + default: + throw Starlark.errorf("invalid escape '\\%s'", c); + } + } + throw Starlark.errorf("unclosed string literal"); + } + + private Object parseNumber(char c) throws EvalException { + // For now, allow any sequence of [0-9.eE+-]*. + boolean isfloat = false; // whether digit string contains [.Ee+-] (other than leading minus) + int j = i; + for (j = i + 1; j < s.length(); j++) { + c = s.charAt(j); + if (isdigit(c)) { + // ok + } else if (c == '.' || c == 'e' || c == 'E' || c == '+' || c == '-') { + isfloat = true; + } else { + break; + } + } + + String num = s.substring(i, j); + + int digits = i; // s[digits:j] is the digit string + if (s.charAt(i) == '-') { + digits++; + } + + // Structural checks not performed by parse routines below. + // Unlike most C-like languages, + // JSON disallows a leading zero before a digit. + if (digits == j // "-" + || s.charAt(digits) == '.' // ".5" + || s.charAt(j - 1) == '.' // "0." + || num.contains(".e") // "5.e1" + || (s.charAt(digits) == '0' && j - digits > 1 && isdigit(s.charAt(digits + 1)))) { // "01" + throw Starlark.errorf("invalid number: %s", num); + } + + i = j; + + // parse number literal + try { + if (isfloat) { + Double.parseDouble(num); + throw Starlark.errorf("floats not yet supported"); + // return StarlarkFloat.of(x); + } else { + return StarlarkInt.parse(num, 10); + } + } catch (NumberFormatException unused) { + throw Starlark.errorf("invalid number: %s", num); + } + } + + // skipSpace consumes leading spaces, and reports whether there is more input. + private boolean skipSpace() { + for (; i < s.length(); i++) { + char c = s.charAt(i); + if (c != ' ' && c != '\t' && c != '\n' && c != '\r') { + return true; + } + } + return false; + } + + // next consumes leading spaces and returns the first non-space. + private char next() throws EvalException { + if (skipSpace()) { + return s.charAt(i); + } + throw Starlark.errorf("unexpected end of file"); + } + + private static boolean isdigit(char c) { + return c >= '0' && c <= '9'; + } + } + + @StarlarkMethod( + name = "indent", + doc = + "The indent function returns the indented form of a valid JSON-encoded string.\n" + + "Each array element or object field appears on a new line, beginning with" + + " the prefix string followed by one or more copies of the indent string, according" + + " to its nesting depth.\n" + + "The function accepts one required positional parameter, the JSON string,\n" + + "and two optional keyword-only string parameters, prefix and indent,\n" + + "that specify a prefix of each new line, and the unit of indentation.", + parameters = { + @Param(name = "s"), + @Param(name = "prefix", positional = false, named = true, defaultValue = "''"), + @Param(name = "indent", positional = false, named = true, defaultValue = "'\\t'") + }) + public String indent(String s, String prefix, String indent) throws EvalException { + // Indentation can be efficiently implemented in a single pass, independent of encoding, + // with no state other than a depth counter. This separation enables efficient indentation + // of values obtained from, say, reading a file, without the need for decoding. + throw Starlark.errorf("not yet implemented"); + } + + @StarlarkMethod( + name = "encode_indent", + doc = + "The encode_indent function is equivalent to json.indent(json.encode(x)," + + " ...). See indent for description of formatting parameters.", + parameters = { + @Param(name = "x"), + @Param(name = "prefix", positional = false, named = true, defaultValue = "''"), + @Param(name = "indent", positional = false, named = true, defaultValue = "'\\t'"), + }) + public String encodeIndent(Object x, String prefix, String indent) throws EvalException { + return indent(encode(x), prefix, indent); + } +} diff --git a/src/test/java/net/starlark/java/eval/BUILD b/src/test/java/net/starlark/java/eval/BUILD index d4328134ec4e3b..e6b8e193d7b5de 100644 --- a/src/test/java/net/starlark/java/eval/BUILD +++ b/src/test/java/net/starlark/java/eval/BUILD @@ -56,6 +56,7 @@ java_test( "testdata/function.sky", "testdata/int.sky", "testdata/int_constructor.sky", + "testdata/json.sky", "testdata/list_mutation.sky", "testdata/list_slices.sky", "testdata/min_max.sky", @@ -72,10 +73,12 @@ java_test( "testdata/string_splitlines.sky", "testdata/string_test_characters.sky", ], + jvm_flags = ["-Dfile.encoding=UTF8"], use_testrunner = False, deps = [ "//src/main/java/net/starlark/java/annot", "//src/main/java/net/starlark/java/eval", + "//src/main/java/net/starlark/java/lib/json", "//src/main/java/net/starlark/java/syntax", "//third_party:guava", ], diff --git a/src/test/java/net/starlark/java/eval/ScriptTest.java b/src/test/java/net/starlark/java/eval/ScriptTest.java index 03584ae0b93c8e..bd3c5b9dd1c105 100644 --- a/src/test/java/net/starlark/java/eval/ScriptTest.java +++ b/src/test/java/net/starlark/java/eval/ScriptTest.java @@ -17,6 +17,7 @@ import static java.nio.charset.StandardCharsets.UTF_8; import com.google.common.base.Splitter; +import com.google.common.collect.ImmutableCollection; import com.google.common.collect.ImmutableMap; import com.google.common.io.Files; import java.io.File; @@ -24,7 +25,9 @@ import java.util.List; import java.util.Map; import net.starlark.java.annot.Param; +import net.starlark.java.annot.StarlarkBuiltin; import net.starlark.java.annot.StarlarkMethod; +import net.starlark.java.lib.json.Json; import net.starlark.java.syntax.FileOptions; import net.starlark.java.syntax.ParserInput; import net.starlark.java.syntax.SyntaxError; @@ -90,6 +93,12 @@ public Object assertEq(Object x, Object y, StarlarkThread thread) throws EvalExc return Starlark.NONE; } + // Constructor for simple structs, for testing. + @StarlarkMethod(name = "struct", documented = false, extraKeywords = @Param(name = "kwargs")) + public SimpleStruct struct(Dict kwargs) throws EvalException { + return new SimpleStruct(ImmutableMap.copyOf(kwargs)); + } + private static boolean ok = true; public static void main(String[] args) throws Exception { @@ -136,6 +145,8 @@ public static void main(String[] args) throws Exception { ParserInput input = ParserInput.fromString(buf.toString(), file.toString()); ImmutableMap.Builder predeclared = ImmutableMap.builder(); Starlark.addMethods(predeclared, new ScriptTest()); // e.g. assert_eq + Starlark.addModule(predeclared, Json.INSTANCE); // json + StarlarkSemantics semantics = StarlarkSemantics.DEFAULT; Module module = Module.withPredeclared(semantics, predeclared.build()); try (Mutability mu = Mutability.create("test")) { @@ -225,4 +236,43 @@ private static int newlines(String s) { } return n; } + + // A trivial struct-like class with Starlark fields defined by a map. + @StarlarkBuiltin(name = "struct") + private static class SimpleStruct implements StarlarkValue, ClassObject { + private final ImmutableMap fields; + + SimpleStruct(ImmutableMap fields) { + this.fields = fields; + } + + @Override + public ImmutableCollection getFieldNames() { + return fields.keySet(); + } + + @Override + public Object getValue(String name) { + return fields.get(name); + } + + @Override + public String getErrorMessageForUnknownField(String name) { + return null; + } + + @Override + public void repr(Printer p) { + // This repr function prints only the fields. + // Any methods are still accessible through dir/getattr/hasattr. + p.append(Starlark.type(this)); + p.append("("); + String sep = ""; + for (Map.Entry e : fields.entrySet()) { + p.append(sep).append(e.getKey()).append(" = ").repr(e.getValue()); + sep = ", "; + } + p.append(")"); + } + } } diff --git a/src/test/java/net/starlark/java/eval/testdata/json.sky b/src/test/java/net/starlark/java/eval/testdata/json.sky new file mode 100644 index 00000000000000..13f18bf8f8a6ad --- /dev/null +++ b/src/test/java/net/starlark/java/eval/testdata/json.sky @@ -0,0 +1,238 @@ +# tests of JSON encoding/decoding + +# TODO(adonovan): +# - implement indent, float, assert.fails + +assert_eq(dir(json), ["decode", "encode", "encode_indent", "indent"]) + +# Some of these cases were inspired by github.com/nst/JSONTestSuite. + +## json.encode + +assert_eq(json.encode(None), "null") +assert_eq(json.encode(True), "true") +assert_eq(json.encode(False), "false") +assert_eq(json.encode(-123), "-123") +assert_eq(json.encode(12345 * 12345 * 12345 * 12345 * 12345 * 12345), "3539537889086624823140625") +# assert_eq(json.encode(float(12345*12345*12345*12345*12345*12345)), "3.539537889086625e+24") +# assert_eq(json.encode(12.345e67), "1.2345e+68") + +assert_eq(json.encode("hello"), '"hello"') +# TODO(adonovan): test more control codes when Starlark/Java has string escapes +assert_eq(json.encode("\t"), r'"\t"') +assert_eq(json.encode("\r"), r'"\r"') +assert_eq(json.encode("\n"), r'"\n"') +assert_eq(json.encode("'"), '"\'"') +assert_eq(json.encode("\""), r'"\""') +assert_eq(json.encode("/"), '"/"') +assert_eq(json.encode("\\"), r'"\\"') +assert_eq(json.encode(""), '""') +assert_eq(json.encode("😹"[:1]), '"οΏ½"') # invalid UTF-16 -> replacement char U+FFFD + +assert_eq(json.encode([1, 2, 3]), "[1,2,3]") +assert_eq(json.encode((1, 2, 3)), "[1,2,3]") +assert_eq(json.encode(range(3)), "[0,1,2]") # a built-in iterable +assert_eq(json.encode(dict(x = 1, y = "two")), '{"x":1,"y":"two"}') +assert_eq(json.encode(dict(y = "two", x = 1)), '{"x":1,"y":"two"}') # key, not insertion, order +assert_eq(json.encode(struct(x = 1, y = "two")), '{"x":1,"y":"two"}') # a value with fields +assert_eq(json.encode(struct(y = "two", x = 1, )), '{"x":1,"y":"two"}') # field name order +assert_eq(json.encode(struct(**{'\t': 0})), '{"\\t":0}') # struct keys are escaped too + +# json.encode(float("NaN")) ## cannot encode non-finite float NaN +--- +json.encode({1: "two"}) ### dict has int key, want string +--- +json.encode(len) ### cannot encode function as JSON +--- +json.encode(struct(x = [1, len])) ### in struct field .x: at list index 1: cannot encode function as JSON +--- +json.encode(struct(x = [1, {"x": len}])) ### in struct field .x: at list index 1: in dict key "x": cannot encode function as JSON +--- +def f(deep): + for x in range(10000): + deep = [deep] + json.encode(deep) ### nesting depth limit exceeded +f(None) +--- +## json.decode + +assert_eq(json.decode("null"), None) +assert_eq(json.decode("true"), True) +assert_eq(json.decode("false"), False) +assert_eq(json.decode("-123"), -123) +assert_eq(json.decode("-0"), 0) +assert_eq(json.decode("3539537889086624823140625"), 3539537889086624823140625) +#assert_eq(json.decode("3539537889086624823140625.0"), float(3539537889086624823140625)) +#assert_eq(json.decode("3.539537889086625e+24"), 3.539537889086625e+24) +#assert_eq(json.decode("0e+1"), 0) +#assert_eq(json.decode("-0.0"), -0.0) +#assert_eq(json.decode( +# "-0.000000000000000000000000000000000000000000000000000000000000000000000000000001"), +# -0.000000000000000000000000000000000000000000000000000000000000000000000000000001) +# TODO(adonovan): test "5e-1" "5e1" "5.0e1" ".5e1" +assert_eq(json.decode('[]'), []) +assert_eq(json.decode('[1]'), [1]) +assert_eq(json.decode('[1,2,3]'), [1, 2, 3]) +assert_eq(json.decode('{"one": 1, "two": 2}'), dict(one=1, two=2)) +assert_eq(json.decode('{"foo\\u0000bar": 42}'), {"foo\0bar": 42}) +assert_eq(json.decode('"\\ud83d\\ude39\\ud83d\\udc8d"'), "πŸ˜ΉπŸ’") +assert_eq(json.decode('"\\u0123"'), 'Δ£') +#assert_eq(json.decode('"\x7f"'), "\x7f") +assert_eq(json.decode('\t[\t1,\r2,\n3]\n'), [1, 2, 3]) # whitespace other than ' ' +assert_eq(json.decode('\n{\t"a":\r1\t}\n'), {'a': 1}) # same, with dict +assert_eq(json.decode(r'"\\\/\"\n\r\t"'), "\\/\"\n\r\t") # TODO(adonovan): test \b\f when Starlark/Java supports them + +# We accept UTF-16 strings that have been arbitrarily truncated, +# as many Java and JavaScript programs emit them. +assert_eq(json.decode('"<' + "😹"[:1] + '>"'), '<' + "😹"[:1] + '>') + +# Lists and dicts are mutable. +mutable = json.decode('[{}]') +mutable.append(3) +mutable[0][1] = 2 +assert_eq(str(mutable), "[{1: 2}, 3]") + +# def decode_error(expr, error): +# assert.fails(lambda: json.decode(expr), error) + +# decode_error('truefalse', +# "json.decode: at offset 4, unexpected character "f" after value") +json.decode('truefalse') ### at offset 4, unexpected character "f" after value +--- +json.decode('"abc') ### unclosed string literal +--- +json.decode('"ab\\gc"') ### invalid escape '\g' +--- +json.decode("'abc'") ### unexpected character "'" +--- +json.decode("1.2.3") ### invalid number: 1.2.3 +--- +json.decode("+1") ### unexpected character "+" +--- +json.decode("-abc") ### invalid number: - +--- +json.decode("-") ### invalid number: - +--- +json.decode("-00") ### invalid number: -00 +--- +json.decode("00") ### invalid number: 00 +--- +json.decode("--1") ### invalid number: --1 +--- +json.decode("-+1") ### invalid number: -+1 +--- +json.decode("1e1e1") ### invalid number: 1e1e1 +--- +json.decode("5.") ### invalid number: 5. +--- +json.decode(".5") ### unexpected character "." +--- +json.decode("5.e1") ### invalid number: 5.e1 +--- +json.decode("5e") ### invalid number: 5e +--- +json.decode("5ee1") ### invalid number +--- +json.decode("0123") ### invalid number: 0123 +--- +json.decode("000.123") ### invalid number: 000.123 +--- +json.decode("-0123") ### invalid number: -0123 +--- +json.decode("-000.123") ### invalid number: -000.123 +--- +json.decode("0x123") ### unexpected character "x" after value +--- +json.decode('[1, 2 ') ### unexpected end of file +--- +json.decode('[1, 2, ') ### unexpected end of file +--- +json.decode('[1, 2, ]') ### unexpected character "]" +--- +json.decode('[1, 2, }') ### unexpected character "}" +--- +json.decode('[1, 2}') ### got "}", want ',' or ']' +--- +json.decode('{"one": 1') ### unexpected end of file +--- +json.decode('{"one" 1') ### after object key, got "1", want ':' +--- +json.decode('{"one": 1 "two": 2') ### in object, got "\"", want ',' or '}' +--- +json.decode('{"x": 1, "x": 2}') ### object has duplicate key: "x" +--- +json.decode('{1:2}') ### got int for object key, want string +--- +json.decode('{"one": 1,') ### unexpected end of file +--- +json.decode('{"one": 1, }') ### unexpected character "}" +--- +json.decode('{"one": 1]') ### in object, got "]", want ',' or '}' +--- +json.decode('[' * 10000) ### nesting depth limit exceeded +--- +# Unescaped control codes (even tabs) are forbidden in strings. +json.decode('"\t"') ### invalid character '\x09' in string literal +--- +json.decode('"\\u123"') ### incomplete \uXXXX escape +--- +json.decode('"\\u123') ### incomplete \uXXXX escape +--- +json.decode('"\\u1') ### incomplete \uXXXX escape +--- + +def codec(x): + return json.decode(json.encode(x)) + +# string round-tripping +strings = [ + "\t", + "'", + "\"", + "/", + "\\", + "", + "😿", # U+1F63F CRYING_CAT_FACE + "πŸ±β€πŸ‘€", # CAT FACE + ZERO WIDTH JOINER + BUST IN SILHOUETTE +] +assert_eq(codec(strings), strings) + +# # codepoints is a string with every valid non-surrogate 16-bit code point. +# TODO(adonovan): enable once %c is supported. +# codepoints = ''.join(['%c' % c for c in range(65536) if c < 0xD800 or d > 0xDFFF]) +# assert_eq(codec(codepoints), codepoints) + +# number round-tripping +numbers = [ + 0, 1, -1, +1, + # 0.0, -0.0, 1.0, -1.0, +1.0, 1e6, -1e6, 1.23e45, -1.23e-45, + 3539537889086624823140625, +# float(3539537889086624823140625), +] +assert_eq(codec(numbers), numbers) + +## json.indent + +s = json.encode(dict(x = 1, y = ["one", "two"])) + +# assert_eq(json.indent(s), '''\ +# { +# "x": 1, +# "y": [ +# "one", +# "two" +# ] +# }''') + +# assert_eq(json.decode(json.indent(s)), {"x": 1, "y": ["one", "two"]}) + +# assert_eq(json.indent(s, prefix='ΒΆ', indent='–––'), '''\ +# { +# ¢–––"x": 1, +# ¢–––"y": [ +# ¢––––––"one", +# ¢––––––"two" +# ¢–––] +# ΒΆ}''') + +# assert.fails(lambda: json.indent("!@#$%^& this is not json"), 'invalid character')