Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(gen): implement struct generation from schema registry subjects and versions #498

Merged
merged 13 commits into from
Feb 6, 2025
82 changes: 67 additions & 15 deletions cmd/avrogen/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@ package main

import (
"bytes"
"context"
"errors"
"flag"
"fmt"
"github.com/hamba/avro/v2/registry"
"io"
"os"
"path/filepath"
"strconv"
"strings"

"github.com/hamba/avro/v2"
Expand All @@ -18,15 +21,16 @@ import (
type config struct {
TemplateFileName string

Pkg string
PkgDoc string
Out string
Tags string
FullName bool
Encoders bool
FullSchema bool
StrictTypes bool
Initialisms string
Pkg string
PkgDoc string
Out string
Tags string
FullName bool
Encoders bool
FullSchema bool
StrictTypes bool
Initialisms string
SchemaRegistry string
}

func main() {
Expand All @@ -47,6 +51,7 @@ func realMain(args []string, stdout, stderr io.Writer) int {
flgs.BoolVar(&cfg.StrictTypes, "strict-types", false, "Use strict type sizes (e.g. int32) during generation.")
flgs.StringVar(&cfg.Initialisms, "initialisms", "", "Custom initialisms <VAL>[,...] for struct and field names.")
flgs.StringVar(&cfg.TemplateFileName, "template-filename", "", "Override output template with one loaded from file.")
flgs.StringVar(&cfg.SchemaRegistry, "schemaregistry", "", "The URL to schema registry, e.g.: http://localhost:8081.")
flgs.Usage = func() {
_, _ = fmt.Fprintln(stderr, "Usage: avrogen [options] schemas")
_, _ = fmt.Fprintln(stderr, "Options:")
Expand Down Expand Up @@ -88,14 +93,47 @@ func realMain(args []string, stdout, stderr io.Writer) int {
gen.WithStrictTypes(cfg.StrictTypes),
gen.WithFullSchema(cfg.FullSchema),
}

g := gen.NewGenerator(cfg.Pkg, tags, opts...)
for _, file := range flgs.Args() {
schema, err := avro.ParseFiles(filepath.Clean(file))
if err != nil {
_, _ = fmt.Fprintf(stderr, "Error: %v\n", err)
return 2

ctx := context.Background()
ekazakas marked this conversation as resolved.
Show resolved Hide resolved

for _, entry := range flgs.Args() {
var schema avro.Schema
var schemaMetadata *gen.SchemaMetadata

if cfg.SchemaRegistry != "" {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A switch here would be better, as else is in general a code smell.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed

client, err := registry.NewClient(cfg.SchemaRegistry)
if err != nil {
_, _ = fmt.Fprintf(stderr, "Error: %v\n", err)
return 2
}

subject, version, err := parseSubjectVersion(entry)
if err != nil {
_, _ = fmt.Fprintf(stderr, "Error: %v\n", err)
return 2
}

schemaMetadata = &gen.SchemaMetadata{
Subject: subject,
Version: version,
}

schema, err = client.GetSchemaByVersion(ctx, subject, version)
if err != nil {
_, _ = fmt.Fprintf(stderr, "Error: %v\n", err)
return 2
}
} else {
schema, err = avro.ParseFiles(filepath.Clean(entry))
if err != nil {
_, _ = fmt.Fprintf(stderr, "Error: %v\n", err)
return 2
}
}
g.Parse(schema)

g.Parse(schema, schemaMetadata)
}

var buf bytes.Buffer
Expand Down Expand Up @@ -205,3 +243,17 @@ func loadTemplate(templateFileName string) ([]byte, error) {
}
return os.ReadFile(filepath.Clean(templateFileName))
}

func parseSubjectVersion(entry string) (string, int, error) {
parts := strings.Split(entry, ":")
if len(parts) != 2 {
return "", -1, errors.New("entry must be of format subject:version")
}

version, err := strconv.Atoi(parts[1])
if err != nil {
return "", -1, err
}

return parts[0], version, nil
}
82 changes: 52 additions & 30 deletions gen/gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ type Config struct {
LogicalTypes []LogicalType
}

// SchemaMetadata contains schema registry metadata
type SchemaMetadata struct {
Subject string
Version int
}

// TagStyle defines the styling for a tag.
type TagStyle string

Expand Down Expand Up @@ -65,15 +71,24 @@ var (

// Struct generates Go structs based on the schema and writes them to w.
func Struct(s string, w io.Writer, cfg Config) error {
return StructWithMetadata(s, nil, w, cfg)
}

func StructFromSchema(schema avro.Schema, w io.Writer, cfg Config) error {
return StructFromSchemaWithMetadata(schema, nil, w, cfg)
}

// StructWithMetadata generates Go structs based on the schema and writes them to w.
func StructWithMetadata(s string, schemaMetadata *SchemaMetadata, w io.Writer, cfg Config) error {
schema, err := avro.Parse(s)
if err != nil {
return err
}
return StructFromSchema(schema, w, cfg)
return StructFromSchemaWithMetadata(schema, schemaMetadata, w, cfg)
}

// StructFromSchema generates Go structs based on the schema and writes them to w.
func StructFromSchema(schema avro.Schema, w io.Writer, cfg Config) error {
// StructFromSchemaWithMetadata generates Go structs based on the schema and writes them to w.
func StructFromSchemaWithMetadata(schema avro.Schema, schemaMetadata *SchemaMetadata, w io.Writer, cfg Config) error {
rec, ok := schema.(*avro.RecordSchema)
if !ok {
return errors.New("can only generate Go code from Record Schemas")
Expand All @@ -90,7 +105,7 @@ func StructFromSchema(schema avro.Schema, w io.Writer, cfg Config) error {
opts = append(opts, WithLogicalType(opt))
}
g := NewGenerator(strcase.ToSnake(cfg.PackageName), cfg.Tags, opts...)
g.Parse(rec)
g.Parse(rec, schemaMetadata)

buf := &bytes.Buffer{}
if err := g.Write(buf); err != nil {
Expand Down Expand Up @@ -258,16 +273,21 @@ func (g *Generator) Reset() {
}

// Parse parses an avro schema into Go types.
func (g *Generator) Parse(schema avro.Schema) {
_ = g.generate(schema)
func (g *Generator) Parse(schema avro.Schema, schemaMetadata *SchemaMetadata) {
ekazakas marked this conversation as resolved.
Show resolved Hide resolved
_ = g.generate(schema, schemaMetadata, 0)
}

func (g *Generator) generate(schema avro.Schema) string {
func (g *Generator) generate(schema avro.Schema, schemaMetadata *SchemaMetadata, level int) string {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not entirely sure I see the purpose of schemaMetadata. It is a very intrusive change for something that brings little value. They are also values that are meaningless outside of a specific registry.

Copy link
Contributor Author

@ekazakas ekazakas Feb 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I see your point. It does feel intrusive. These values are mostly useful when using Schema registry. The metadata provides information about the subject and the version of some specific schema that is fetched from Schema registry.

However the benefit of these is that the subject and version can be embedded in the generated struct. Whoever will be working with schema registry might certainly appreciate the extra information provided by the generated struct.

I wonder if there could be some other way to embed this information into struct?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Trying to attach it to the first struct is not really correct in this case. For instance a schema that is a union -> array -> record would attach it in a weird place. IMO it would be better to make them consts in the package, meaning it could be attached to the Generator itself, and handle in the Write. This would be simpler.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this is a good idea. This will keep the method signatures backwards compatible and still provide information about subject and version. I will implement these changes tomorrow and ask again for a review.

Copy link
Contributor Author

@ekazakas ekazakas Feb 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have made the changes, the API is backwards compatible, but there are a few things and limitations I would like to point out and clarify.

At the moment, I can only generate structs for a single subject and version pair, because the SchemaMetadata can only be attached on the generator config, so only once. With files, we can still do multiple files per single generate command.

The subject and version are now constants on the package, but I would really like to attach them on the "root" type, the reason being that it would allow us to work with schemaregistry (de)serializer, e.g.:

type AvroMessage interface {
    Schema() avro.Schema
    Subject() string
    Version() int
}

func (s *Deserializer) DeserializeInto(payload []byte, msg interface{}) error {
        avroMsg, ok := v.(AvroMessage)
	if !ok {
		return nil, errors.WithStack(NoAvroMessageError{v})
	}
	
	schemaMetadata, err := m.schemaRegistryClient.GetSchemaMetadata(avroMsg.Subject(), avroMsg.Version())
	if err != nil {
		return nil, err
	}
	
	fmt.Println(schemaMetadata.ID)
   <...>
}

func (m *AvroMarshaler) Marshal(v interface{}) ([]byte, error) {
	avroMsg, ok := v.(AvroMessage)
	if !ok {
		return nil, errors.WithStack(NoAvroMessageError{v})
	}

	schemaMetadata, err := m.schemaRegistryClient.GetSchemaMetadata(avroMsg.Subject(), avroMsg.Version())
	if err != nil {
		return nil, err
	}

	config := avrov2.NewSerializerConfig()
	config.AutoRegisterSchemas = false
	config.UseSchemaID = schemaMetadata.ID

	serializer, err := avrov2.NewSerializer(m.schemaRegistryClient, serde.ValueSerde, config)
	if err != nil {
		return nil, err
	}

	b, err := serializer.Serialize(avroMsg.Subject(), avroMsg)
	if err != nil {
		return nil, err
	}

	return b, nil
}

I am not sure what would be the best way to achieve this and whether it's possible at all. I could also generate the Subject() and Version() functions on each of the types that come from current subject and version pair

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe one of the options here would be to provide a map that could be used to generate custom functions on specified types. E.g.:

Customizations: map[string]map[string]any

Where root map key would be type name, child map key would be function name and value would be the value it returns.

e.g.:

	test := map[string]map[string]any{
		"SomeType": map[string]string{
			"AFunction": "a",
			"BFunction": "b",
		},
	}
============================================
	type SomeType struct {
		// SomeString is a string.
		SomeString string `avro:"someString"`
		SomeInt    int    `avro:"someInt"`
	}
	
	func (o *SomeType) AFunction() string {
		return "a"
	}
	
	func (o *SomeType) BFunction() string {
		return "b"
	}	

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What you are hoping to generate seems out of the scope of avrogen, but you could use gen and a custom template to achieve what you need. In this case, the only change necessary is a permissive (read any) attachment of metadata in the generator.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, it's really a very specific usecase... I guess it's a bit too much to ask without affecting other users.

I have updated avrogen to support reading from schema registry subject versions. It can do it with multiple entries.

And I have also added the any metadata field for gen to be able to implement a custom version of avrogen

switch s := schema.(type) {
case *avro.RefSchema:
return g.resolveRefSchema(s)
return g.resolveRefSchema(s, level)
case *avro.RecordSchema:
return g.resolveRecordSchema(s)
var actualSchemaMetadata *SchemaMetadata
if level < 1 {
actualSchemaMetadata = schemaMetadata
}

return g.resolveRecordSchema(s, actualSchemaMetadata, level)
case *avro.PrimitiveSchema:
typ := primitiveMappings[s.Type()]
if ls := s.Logical(); ls != nil {
Expand All @@ -280,7 +300,7 @@ func (g *Generator) generate(schema avro.Schema) string {
}
return typ
case *avro.ArraySchema:
return "[]" + g.generate(s.Items())
return "[]" + g.generate(s.Items(), nil, level+1)
case *avro.EnumSchema:
return "string"
case *avro.FixedSchema:
Expand All @@ -290,9 +310,9 @@ func (g *Generator) generate(schema avro.Schema) string {
}
return typ
case *avro.MapSchema:
return "map[string]" + g.generate(s.Values())
return "map[string]" + g.generate(s.Values(), nil, level+1)
case *avro.UnionSchema:
return g.resolveUnionTypes(s)
return g.resolveUnionTypes(s, level)
default:
return ""
}
Expand All @@ -305,16 +325,16 @@ func (g *Generator) resolveTypeName(s avro.NamedSchema) string {
return g.nameCaser.ToPascal(s.Name())
}

func (g *Generator) resolveRecordSchema(schema *avro.RecordSchema) string {
func (g *Generator) resolveRecordSchema(schema *avro.RecordSchema, schemaMetadata *SchemaMetadata, level int) string {
fields := make([]field, len(schema.Fields()))
for i, f := range schema.Fields() {
typ := g.generate(f.Type())
typ := g.generate(f.Type(), nil, level+1)
fields[i] = g.newField(g.nameCaser.ToPascal(f.Name()), typ, f.Doc(), f.Name(), f.Props())
}

typeName := g.resolveTypeName(schema)
if !g.hasTypeDef(typeName) {
g.typedefs = append(g.typedefs, newType(typeName, schema.Doc(), fields, g.rawSchema(schema), schema.Props()))
g.typedefs = append(g.typedefs, newType(typeName, schema.Doc(), fields, g.rawSchema(schema), schemaMetadata, schema.Props()))
}
return typeName
}
Expand All @@ -340,20 +360,20 @@ func (g *Generator) hasTypeDef(name string) bool {
return false
}

func (g *Generator) resolveRefSchema(s *avro.RefSchema) string {
func (g *Generator) resolveRefSchema(s *avro.RefSchema, level int) string {
if sx, ok := s.Schema().(*avro.RecordSchema); ok {
return g.resolveTypeName(sx)
}
return g.generate(s.Schema())
return g.generate(s.Schema(), nil, level+1)
}

func (g *Generator) resolveUnionTypes(s *avro.UnionSchema) string {
func (g *Generator) resolveUnionTypes(s *avro.UnionSchema, level int) string {
types := make([]string, 0, len(s.Types()))
for _, elem := range s.Types() {
if _, ok := elem.(*avro.NullSchema); ok {
continue
}
types = append(types, g.generate(elem))
types = append(types, g.generate(elem, nil, level+1))
}
if s.Nullable() {
return "*" + types[0]
Expand Down Expand Up @@ -461,20 +481,22 @@ func (g *Generator) Write(w io.Writer) error {
}

type typedef struct {
Name string
Doc string
Fields []field
Schema string
Props map[string]any
Name string
Doc string
Fields []field
Schema string
SchemaMetadata *SchemaMetadata
Props map[string]any
}

func newType(name, doc string, fields []field, schema string, props map[string]any) typedef {
func newType(name, doc string, fields []field, schema string, schemaMetadata *SchemaMetadata, props map[string]any) typedef {
return typedef{
Name: name,
Doc: ensureTrailingPeriod(doc),
Fields: fields,
Schema: schema,
Props: props,
Name: name,
Doc: ensureTrailingPeriod(doc),
Fields: fields,
Schema: schema,
SchemaMetadata: schemaMetadata,
Props: props,
}
}

Expand Down
16 changes: 10 additions & 6 deletions gen/gen_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@ import (
var update = flag.Bool("update", false, "Update golden files")

func TestStruct_InvalidSchemaYieldsErr(t *testing.T) {
err := gen.Struct(`asd`, &bytes.Buffer{}, gen.Config{})
err := gen.StructWithMetadata(`asd`, nil, &bytes.Buffer{}, gen.Config{})

assert.Error(t, err)
}

func TestStruct_NonRecordSchemasAreNotSupported(t *testing.T) {
err := gen.Struct(`{"type": "string"}`, &bytes.Buffer{}, gen.Config{})
err := gen.StructWithMetadata(`{"type": "string"}`, nil, &bytes.Buffer{}, gen.Config{})

require.Error(t, err)
assert.Contains(t, strings.ToLower(err.Error()), "only")
Expand Down Expand Up @@ -286,8 +286,8 @@ func TestGenerator(t *testing.T) {
require.NoError(t, err)

g := gen.NewGenerator("something", map[string]gen.TagStyle{})
g.Parse(unionSchema)
g.Parse(mainSchema)
g.Parse(unionSchema, nil)
g.Parse(mainSchema, nil)

var buf bytes.Buffer
err = g.Write(&buf)
Expand All @@ -311,7 +311,11 @@ func generate(t *testing.T, schema string, gc gen.Config) ([]byte, []string) {
t.Helper()

buf := &bytes.Buffer{}
err := gen.Struct(schema, buf, gc)
schemaMetadata := gen.SchemaMetadata{
Subject: "test",
Version: 1,
}
err := gen.StructWithMetadata(schema, &schemaMetadata, buf, gc)
require.NoError(t, err)

b := make([]byte, buf.Len())
Expand All @@ -335,5 +339,5 @@ func removeSpaceAndEmptyLines(goCode []byte) []string {
// removeMoreThanOneConsecutiveSpaces replaces all sequences of more than one space, with a single one
func removeMoreThanOneConsecutiveSpaces(lineBytes []byte) string {
lines := strings.TrimSpace(string(lineBytes))
return strings.Join(regexp.MustCompile("\\s+|\\t+").Split(lines, -1), " ")
return strings.Join(regexp.MustCompile(`\s+|\t+`).Split(lines, -1), " ")
}
12 changes: 12 additions & 0 deletions gen/output_template.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,18 @@ package {{ .PackageName }}
return schema{{ .Name }}
}

{{- if .SchemaMetadata }}
// Subject returns the schema registry subject for {{ .Name }}.
func (o *{{ .Name }}) Subject() string {
return "{{ .SchemaMetadata.Subject }}"
}

// Version returns the schema registry version for {{ .Name }}.
func (o *{{ .Name }}) Version() int {
return {{ .SchemaMetadata.Version }}
}
{{- end }}

// Unmarshal decodes b into the receiver.
func (o *{{ .Name }}) Unmarshal(b []byte) error {
return avro.Unmarshal(o.Schema(), b, o)
Expand Down
10 changes: 10 additions & 0 deletions gen/testdata/golden_encoders.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions gen/testdata/golden_encoders_fullschema.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.