diff --git a/starlark/eval_test.go b/starlark/eval_test.go index 49792a1a..393df0e3 100644 --- a/starlark/eval_test.go +++ b/starlark/eval_test.go @@ -16,6 +16,8 @@ import ( "go.starlark.net/internal/chunkedfile" "go.starlark.net/resolve" "go.starlark.net/starlark" + "go.starlark.net/starlarkjson" + "go.starlark.net/starlarkstruct" "go.starlark.net/starlarktest" "go.starlark.net/syntax" ) @@ -112,6 +114,7 @@ func TestExecFile(t *testing.T) { "testdata/float.star", "testdata/function.star", "testdata/int.star", + "testdata/json.star", "testdata/list.star", "testdata/misc.star", "testdata/set.star", @@ -125,6 +128,7 @@ func TestExecFile(t *testing.T) { predeclared := starlark.StringDict{ "hasfields": starlark.NewBuiltin("hasfields", newHasFields), "fibonacci": fib{}, + "struct": starlark.NewBuiltin("struct", starlarkstruct.Make), } setOptions(chunk.Source) @@ -179,6 +183,9 @@ func load(thread *starlark.Thread, module string) (starlark.StringDict, error) { if module == "assert.star" { return starlarktest.LoadAssertModule() } + if module == "json.star" { + return starlark.StringDict{"json": starlarkjson.Module}, nil + } // TODO(adonovan): test load() using this execution path. filename := filepath.Join(filepath.Dir(thread.Caller().Position().Filename()), module) diff --git a/starlark/testdata/json.star b/starlark/testdata/json.star new file mode 100644 index 00000000..4ed1d861 --- /dev/null +++ b/starlark/testdata/json.star @@ -0,0 +1,68 @@ +# Tests of json module. +# option:float + +load("assert.star", "assert") +load("json.star", "json") + +assert.eq(dir(json), ["decode", "encode", "indent"]) + +## json.encode + +assert.eq(json.encode(None), "null") +assert.eq(json.encode(True), "true") +assert.eq(json.encode(-123), "-123") +assert.eq(json.encode(12345*12345*12345*12345*12345*12345), "3539537889086624823140625") +assert.eq(json.encode(12.345e67), "1.2345e+68") +assert.eq(json.encode("hello"), '"hello"') +assert.eq(json.encode([1, 2, 3]), "[1,2,3]") +assert.eq(json.encode((1, 2, 3)), "[1,2,3]") +assert.eq(json.encode(range(3)), "[0,1,2]") # a built-in iterable +assert.eq(json.encode(dict(x = 1, y = "two")), '{"x":1,"y":"two"}') +assert.eq(json.encode(struct(x = 1, y = "two")), '{"x":1,"y":"two"}') # a user-defined HasAttrs + +# errors +assert.fails(lambda: json.encode(float("NaN")), "cannot encode non-finite float NaN") +assert.fails(lambda: json.encode({1: "two"}), "dict has int key, want string") +assert.fails(lambda: json.encode(len), "cannot encode builtin_function_or_method as JSON") +assert.fails(lambda: json.encode(struct(x=[1, {"x": len}])), # nested failure + 'in field .x: at list index 1: in dict key "x": cannot encode...') + +## json.decode + +assert.eq(json.decode("null"), None) +assert.eq(json.decode("true"), True) +assert.eq(json.decode("-123"), -123) +assert.eq(json.decode("3539537889086624823140625"), float(3539537889086624823140625)) +assert.eq(json.decode('[]'), ()) +assert.eq(json.decode('[1]'), (1,)) +assert.eq(json.decode('[1,2,3]'), (1, 2, 3)) +assert.eq(json.decode('{"one": 1, "two": 2}'), dict(one=1, two=2)) + +# Exercise JSON string coding by round-tripping a string with every 16-bit code point. +def codec(x): + return json.decode(json.encode(x)) +codepoints = ''.join(['%c' %c for c in range(65536)]) +assert.eq(codec(codepoints), codepoints) + +## json.indent + +s = json.encode(dict(x = 1, y = ["one", "two"])) + +assert.eq(json.indent(s), '''{ + "x": 1, + "y": [ + "one", + "two" + ] +}''') + +assert.eq(json.indent(s, prefix='¶', indent='–––'), '''{ +¶–––"x": 1, +¶–––"y": [ +¶––––––"one", +¶––––––"two" +¶–––] +¶}''') + +assert.fails(lambda: json.indent("!@#$%^& this is not json"), 'invalid character') +--- diff --git a/starlarkjson/json.go b/starlarkjson/json.go new file mode 100644 index 00000000..c9dda932 --- /dev/null +++ b/starlarkjson/json.go @@ -0,0 +1,298 @@ +// Copyright 2019 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package starlarkjson defines utilities for converting Starlark values +// to/from JSON strings. See www.json.org. +package starlarkjson // import "go.starlark.net/starlarkjson" + +import ( + "bytes" + "encoding/json" + "fmt" + "log" + "math" + "sort" + "strconv" + + "go.starlark.net/starlark" + "go.starlark.net/starlarkstruct" +) + +// Module is a Starlark module of JSON-related functions. +// +// json = module( +// encode, +// decode, +// indent, +// ) +// +// def encode(x): +// +// The encode function accepts one required positional argument, +// which it converts to JSON by cases: +// - Starlark None, bool, int, float, and string values are +// encoded as their corresponding JSON atoms. +// JSON has only one number data type. +// It is an error to encode a non-finite floating-point value. +// - a Starlark IterableMapping (e.g. dict) is encoded as a JSON object. +// It is an error if any key is not a string. +// - any other Starlark Iterable (e.g. list, tuple) is encoded as a JSON array. +// - a Starlark HasAttrs (e.g. struct) is encoded as an JSON object. +// If a user-defined type matches multiple interfaces (e.g. Iterable and +// HasFields), the earliest case described above wins. +// An application-defined Starlark value type that implements the +// standard json.Marshal Go interface defines its own JSON encoding. +// Encoding any other value yields an error. +// +// def decode(x): +// +// The decode function accepts one positional parameter, a JSON string. +// It returns the Starlark value that the string denotes. +// - Numbers may be parsed as as int or float, depending on magnitude. +// - JSON objects are parsed as Starlark dicts. +// - JSON arrays are parsed as Starlark tuples. +// Decoding fails if x is not a valid JSON string. +// +// def indent(str, *, prefix="", indent="\t"): +// +// The indent function pretty-prints a valid JSON encoding, +// and returns a string containing the indented form. +// It accepts one required positional parameter, the JSON string, +// and two optional keyword-only string parameters, prefix and indent, +// that specify a prefix of each new line, and the unit of indentation. +// +var Module = &starlarkstruct.Module{ + Name: "json", + Members: starlark.StringDict{ + "encode": starlark.NewBuiltin("json.encode", encode), + "decode": starlark.NewBuiltin("json.decode", decode), + "indent": starlark.NewBuiltin("json.indent", indent), + }, +} + +func encode(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + var x starlark.Value + if err := starlark.UnpackPositionalArgs(b.Name(), args, kwargs, 1, &x); err != nil { + return nil, err + } + + buf := new(bytes.Buffer) + + var emit func(x starlark.Value) error + emit = func(x starlark.Value) error { + switch x := x.(type) { + case json.Marshaler: + // Application-defined starlark.Value types + // may define their own JSON encoding. + data, err := x.MarshalJSON() + if err != nil { + return err + } + buf.Write(data) + + case starlark.NoneType: + buf.WriteString("null") + + case starlark.Bool: + fmt.Fprintf(buf, "%t", x) + + case starlark.Int: + // JSON imposes no limit on numbers, + // but the standard Go decoder may switch to float. + fmt.Fprint(buf, x) + + case starlark.Float: + if !isFinite(float64(x)) { + return fmt.Errorf("cannot encode non-finite float %v", x) + } + fmt.Fprintf(buf, "%g", x) + + case starlark.String: + quote(buf, string(x)) + + case starlark.IterableMapping: + // e.g. dict (must have string keys) + buf.WriteByte('{') + iter := x.Iterate() + defer iter.Done() + var k starlark.Value + for i := 0; iter.Next(&k); i++ { + if i > 0 { + buf.WriteByte(',') + } + s, ok := starlark.AsString(k) + if !ok { + return fmt.Errorf("%s has %s key, want string", x.Type(), k.Type()) + } + v, found, err := x.Get(k) + if err != nil || !found { + log.Fatalf("internal error: mapping %s has %s among keys but value lookup fails", x.Type(), k) + } + + quote(buf, s) + buf.WriteByte(':') + if err := emit(v); err != nil { + return fmt.Errorf("in %s key %s: %v", x.Type(), k, err) + } + } + buf.WriteByte('}') + + case starlark.Iterable: + // e.g. tuple, list + buf.WriteByte('[') + iter := x.Iterate() + defer iter.Done() + var elem starlark.Value + for i := 0; iter.Next(&elem); i++ { + if i > 0 { + buf.WriteByte(',') + } + if err := emit(elem); err != nil { + return fmt.Errorf("at %s index %d: %v", x.Type(), i, err) + } + } + buf.WriteByte(']') + + case starlark.HasAttrs: + // e.g. struct + buf.WriteByte('{') + var names []string + names = append(names, x.AttrNames()...) + sort.Strings(names) + for i, name := range names { + v, err := x.Attr(name) + if err != nil || v == nil { + log.Fatalf("internal error: dir(%s) includes %q but value has no .%s field", x.Type(), name, name) + } + if i > 0 { + buf.WriteByte(',') + } + quote(buf, name) + buf.WriteByte(':') + if err := emit(v); err != nil { + return fmt.Errorf("in field .%s: %v", name, err) + } + } + buf.WriteByte('}') + + default: + return fmt.Errorf("cannot encode %s as JSON", x.Type()) + } + return nil + } + + if err := emit(x); err != nil { + return nil, fmt.Errorf("%s: %v", b.Name(), err) + } + return starlark.String(buf.String()), nil +} + +func quote(buf *bytes.Buffer, s string) { + if goQuoteIsSafe(s) { + var quoteSpace [128]byte + buf.Write(strconv.AppendQuote(quoteSpace[:0], s)) + } else { + // vanishingly rare for text strings + data, _ := json.Marshal(s) + buf.Write(data) + } +} + +func goQuoteIsSafe(s string) bool { + for _, r := range s { + // JSON doesn't like Go's \xHH escapes for ASCII control codes, + // nor its \UHHHHHHHH escapes for runes >16 bits. + if r < 0x20 || r >= 0x10000 { + return false + } + } + return true +} + +func indent(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + prefix, indent := "", "\t" // keyword-only + if err := starlark.UnpackArgs(b.Name(), nil, kwargs, + "prefix?", &prefix, + "indent?", &indent, + ); err != nil { + return nil, err + } + var str string // positional-only + if err := starlark.UnpackPositionalArgs(b.Name(), args, nil, 1, &str); err != nil { + return nil, err + } + + buf := new(bytes.Buffer) + if err := json.Indent(buf, []byte(str), prefix, indent); err != nil { + return nil, fmt.Errorf("%s: %v", b.Name(), err) + } + return starlark.String(buf.String()), nil +} + +func decode(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + var str string + if err := starlark.UnpackPositionalArgs(b.Name(), args, kwargs, 1, &str); err != nil { + return nil, err + } + + // TODO(adonovan): design a mechanism whereby the caller can + // control the types instantiated by the decoder (e.g. list + // instead of tuple, or struct instead of dict; or any type that + // satisfies json.Unmarshaller). + + // This implementation is just a sketch. + // TODO(adonovan) reimplement it independent of the Go decoder. + // For example, we could decode large integers to bigint, not float. + var x interface{} + if err := json.Unmarshal([]byte(str), &x); err != nil { + return nil, fmt.Errorf("%s: %v", b.Name(), err) + } + var decode func(x interface{}) (starlark.Value, error) + decode = func(x interface{}) (starlark.Value, error) { + switch x := x.(type) { + case nil: + return starlark.None, nil + case bool: + return starlark.Bool(x), nil + case int: + return starlark.MakeInt(x), nil + case float64: + return starlark.Float(x), nil + case string: + return starlark.String(x), nil + case map[string]interface{}: // object + dict := new(starlark.Dict) + for k, v := range x { + vv, err := decode(v) + if err != nil { + return nil, fmt.Errorf("in object field .%s, %v", k, err) + } + dict.SetKey(starlark.String(k), vv) // can't fail + } + return dict, nil + case []interface{}: // array + tuple := make(starlark.Tuple, len(x)) + for i, v := range x { + vv, err := decode(v) + if err != nil { + return nil, fmt.Errorf("at array index %d, %v", i, err) + } + tuple[i] = vv + } + return tuple, nil + } + panic(x) // unreachable + } + v, err := decode(x) + if err != nil { + return nil, fmt.Errorf("%s: %v", b.Name(), err) + } + return v, nil +} + +// isFinite reports whether f represents a finite rational value. +// It is equivalent to !math.IsNan(f) && !math.IsInf(f, 0). +func isFinite(f float64) bool { + return math.Abs(f) <= math.MaxFloat64 +}