From 8ea283396ffab5487a97927b3f55284c130930a4 Mon Sep 17 00:00:00 2001 From: George Papanikolaou Date: Wed, 17 Jul 2024 11:20:23 +0300 Subject: [PATCH] feat: conditionally validate names of Record, Enums and Fixed. (#415) --- README.md | 16 ++++++++++++++ ocf/ocf.go | 2 +- ocf/ocf_test.go | 37 +++++++++++++++++++++++++++++++++ ocf/testdata/invalid-name.avro | Bin 0 -> 226 bytes reader.go | 2 +- schema.go | 4 ++++ schema_internal_test.go | 10 +++++++++ schema_parse.go | 9 +++++++- 8 files changed, 77 insertions(+), 3 deletions(-) create mode 100644 ocf/testdata/invalid-name.avro diff --git a/README.md b/README.md index 9b9bad0..7cc0671 100644 --- a/README.md +++ b/README.md @@ -187,6 +187,8 @@ Or use it as a lib in internal commands, it's the `gen` package ## Avro schema validation +### avrosv + A small Avro schema validation command-line utility is also available. This simple tool leverages the schema parsing functionality of the library, showing validation errors or optionally dumping parsed schemas to the console. It can be used in CI/CD pipelines to validate schema changes in a repository. @@ -223,6 +225,20 @@ Check the options and usage with `-h`: avrosv -h ``` +### Name Validation + +Avro names are validated according to the +[Avro specification](https://avro.apache.org/docs/1.11.1/specification/#names). + +However, the official Java library does not validate said names accordingly, resulting to some files out in the wild +to have invalid names. Thus, this library has a configuration option to allow for these invalid names to be parsed. + +```go +avro.SkipNameValidation = true +``` + +Note that this variable is global, so ideally you'd need to unset it after you're done with the invalid schema. + ## Go Version Support This library supports the last two versions of Go. While the minimum Go version is diff --git a/ocf/ocf.go b/ocf/ocf.go index f258bde..137cc03 100644 --- a/ocf/ocf.go +++ b/ocf/ocf.go @@ -155,7 +155,7 @@ type encoderConfig struct { EncodingConfig avro.API } -// EncoderFunc represents an configuration function for Encoder. +// EncoderFunc represents a configuration function for Encoder. type EncoderFunc func(cfg *encoderConfig) // WithBlockLength sets the block length on the encoder. diff --git a/ocf/ocf_test.go b/ocf/ocf_test.go index 102a1da..5eee2bb 100644 --- a/ocf/ocf_test.go +++ b/ocf/ocf_test.go @@ -195,6 +195,43 @@ func TestDecoder_WithDeflate(t *testing.T) { assert.Equal(t, 1, count) } +func TestDecoder_InvalidName(t *testing.T) { + type record struct { + Hello int `avro:"hello"` + What string `avro:"what"` + } + want := record{ + What: "yes", + Hello: 1, + } + + f, err := os.Open("testdata/invalid-name.avro") + if err != nil { + t.Error(err) + return + } + t.Cleanup(func() { _ = f.Close() }) + + avro.SkipNameValidation = true + defer func() { avro.SkipNameValidation = false }() + + dec, err := ocf.NewDecoder(f) + require.NoError(t, err) + + var count int + for dec.HasNext() { + count++ + var got record + err = dec.Decode(&got) + + require.NoError(t, err) + assert.Equal(t, want, got) + } + + require.NoError(t, dec.Error()) + assert.Equal(t, 1, count) +} + func TestDecoder_WithDeflateHandlesInvalidData(t *testing.T) { f, err := os.Open("testdata/deflate-invalid-data.avro") if err != nil { diff --git a/ocf/testdata/invalid-name.avro b/ocf/testdata/invalid-name.avro new file mode 100644 index 0000000000000000000000000000000000000000..cf8e2491baecddfdd24f848c244967bd6d9a263e GIT binary patch literal 226 zcmeZI%3@>@ODrqO*DFrWNX<=L$5gFUQdy9yWTjM;nw(#hqNJmgmzWFU=jj3|2(P#x zF&QM3UzDzwoS$2em{+L>G6X1{lAjEea8xKsOv(W%Ps>crNhwycimry4laZQ}15ydK zCNr-@sa6LrTb_|vf{-jODay=CSE`Mz6#+XkIX@*enIo?>Cx?MaWXBZ=)#{A$m}SX7 Q -1 { return fmt.Errorf("invalid name %s", name) } diff --git a/schema_internal_test.go b/schema_internal_test.go index 7e029b2..36a3f1a 100644 --- a/schema_internal_test.go +++ b/schema_internal_test.go @@ -96,6 +96,16 @@ func TestProperties_PropGetsFromEmptySet(t *testing.T) { assert.Nil(t, p.Prop("test")) } +func TestName_InvalidNameFirstCharButValidationSkipped(t *testing.T) { + SkipNameValidation = true + t.Cleanup(func() { + SkipNameValidation = false + }) + + _, err := newName("+bar", "foo", nil) + assert.NoError(t, err) +} + func TestIsValidDefault(t *testing.T) { tests := []struct { name string diff --git a/schema_parse.go b/schema_parse.go index 2be5a37..76e21e1 100644 --- a/schema_parse.go +++ b/schema_parse.go @@ -15,6 +15,13 @@ import ( // DefaultSchemaCache is the default cache for schemas. var DefaultSchemaCache = &SchemaCache{} +// SkipNameValidation sets whether to skip name validation. +// Avro spec incurs a strict naming convention for names and aliases, however official Avro tools do not follow that +// More info: +// https://lists.apache.org/thread/39v98os6wdpyr6w31xdkz0yzol51fsrr +// https://github.com/apache/avro/pull/1995 +var SkipNameValidation = false + // Parse parses a schema string. func Parse(schema string) (Schema, error) { return ParseBytes([]byte(schema)) @@ -25,7 +32,7 @@ func ParseWithCache(schema, namespace string, cache *SchemaCache) (Schema, error return ParseBytesWithCache([]byte(schema), namespace, cache) } -// MustParse parses a schema string, panicing if there is an error. +// MustParse parses a schema string, panicking if there is an error. func MustParse(schema string) Schema { parsed, err := Parse(schema) if err != nil {