Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix scvals to print in json string #305

Merged
merged 14 commits into from
Jan 28, 2025
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
738 changes: 738 additions & 0 deletions Cargo.lock

Large diffs are not rendered by default.

45 changes: 45 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
[workspace]
resolver = "2"
members = [
"lib/ffi",
"lib/xdr2json"
]

[workspace.package]
rust-version = "1.79.0"

[workspace.dependencies.soroban-env-host-prev]
package = "soroban-env-host"
version = "=21.2.1"

[workspace.dependencies.soroban-env-host-curr]
package = "soroban-env-host"
version = "=22.0.0"
#git = "https://github.com/stellar/rs-soroban-env"
#rev = "0497816694bef2b103494c8c61b7c8a06a72c7d3"

[workspace.dependencies.soroban-simulation-prev]
package = "soroban-simulation"
version = "=21.2.1"

[workspace.dependencies.soroban-simulation-curr]
package = "soroban-simulation"
version = "=22.0.0"
# git = "https://github.com/stellar/rs-soroban-env"
# rev = "0497816694bef2b103494c8c61b7c8a06a72c7d3"

[workspace.dependencies.stellar-xdr]
version = "=22.0.0"
features = [ "serde" ]

[workspace.dependencies]
base64 = "0.22.0"
sha2 = "0.10.7"
libc = "0.2.147"
anyhow = "1.0.75"
rand = { version = "0.8.5", features = [] }
serde_json = "1.0"

[profile.release-with-panic-unwind]
inherits = 'release'
panic = 'unwind'
11 changes: 11 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,17 @@ BUILD_DATE := $(shell date -u +%FT%TZ)
DEFAULT_ETLHASH := stellar/stellar-etl:$(shell git rev-parse --short=9 HEAD)
ETLHASH ?= $(DEFAULT_ETLHASH)

# Build rust xdr2json
CARGO_BUILD_TARGET ?= $(shell rustc -vV | sed -n 's|host: ||p')

# update the Cargo.lock every time the Cargo.toml changes.
Cargo.lock: Cargo.toml
cargo update --workspace

build-libs: Cargo.lock
cd lib/xdr2json && \
cargo build --target $(CARGO_BUILD_TARGET) --profile release-with-panic-unwind

docker-build:
chowbao marked this conversation as resolved.
Show resolved Hide resolved
$(SUDO) docker build --platform linux/amd64 --pull --no-cache --label org.opencontainers.image.created="$(BUILD_DATE)" \
-t $(ETLHASH) -t stellar/stellar-etl:latest -f ./docker/Dockerfile .
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ require (
github.com/spf13/cobra v1.7.0
github.com/spf13/pflag v1.0.5
github.com/spf13/viper v1.17.0
github.com/stellar/go v0.0.0-20240905180041-acfaa0686213
github.com/stellar/go v0.0.0-20240924182550-69667b25baf4
github.com/stretchr/testify v1.9.0
github.com/xitongsys/parquet-go v1.6.2
github.com/xitongsys/parquet-go-source v0.0.0-20240122235623-d6294584ab18
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -641,8 +641,8 @@ github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/spf13/viper v1.17.0 h1:I5txKw7MJasPL/BrfkbA0Jyo/oELqVmux4pR/UxOMfI=
github.com/spf13/viper v1.17.0/go.mod h1:BmMMMLQXSbcHK6KAOiFLz0l5JHrU89OdIRHvsk0+yVI=
github.com/stellar/go v0.0.0-20240905180041-acfaa0686213 h1:224VUCwV1xmmeTru1zCmTHxvi2RECoHdfdWgd9ni518=
github.com/stellar/go v0.0.0-20240905180041-acfaa0686213/go.mod h1:rrFK7a8i2h9xad9HTfnSN/dTNEqXVHKAbkFeR7UxAgs=
github.com/stellar/go v0.0.0-20240924182550-69667b25baf4 h1:Kd4ivg3hCG8AfFQpxjUjhEXKc40Ux+piUWL03dBB/sw=
github.com/stellar/go v0.0.0-20240924182550-69667b25baf4/go.mod h1:rrFK7a8i2h9xad9HTfnSN/dTNEqXVHKAbkFeR7UxAgs=
github.com/stellar/go-xdr v0.0.0-20231122183749-b53fb00bcac2 h1:OzCVd0SV5qE3ZcDeSFCmOWLZfEWZ3Oe8KtmSOYKEVWE=
github.com/stellar/go-xdr v0.0.0-20231122183749-b53fb00bcac2/go.mod h1:yoxyU/M8nl9LKeWIoBrbDPQ7Cy+4jxRcWcOayZ4BMps=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
Expand Down
10 changes: 8 additions & 2 deletions internal/transform/contract_data.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,14 @@ func (t *TransformContractDataStruct) TransformContractData(ledgerChange ingest.

ledgerSequence := header.Header.LedgerSeq

outputKey, outputKeyDecoded := serializeScVal(contractData.Key)
outputVal, outputValDecoded := serializeScVal(contractData.Val)
outputKey, outputKeyDecoded, err := serializeScVal(contractData.Key)
if err != nil {
return ContractDataOutput{}, err, false
}
outputVal, outputValDecoded, err := serializeScVal(contractData.Val)
if err != nil {
return ContractDataOutput{}, err, false
}

outputContractDataXDR, err := xdr.MarshalBase64(contractData)
if err != nil {
Expand Down
4 changes: 2 additions & 2 deletions internal/transform/contract_data_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ func makeContractDataTestOutput() []ContractDataOutput {

keyDecoded := map[string]string{
"type": "Instance",
"value": "0000000000000000000000000000000000000000000000000000000000000000: [{a a}]",
"value": "{\"Instance\":{\"Executable\":{\"Type\":0,\"WasmHash\":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]},\"Storage\":[{\"Key\":{\"Str\":\"a\",\"Type\":14},\"Val\":{\"Str\":\"a\",\"Type\":14}}]},\"Type\":19}",
}

val := map[string]string{
Expand All @@ -145,7 +145,7 @@ func makeContractDataTestOutput() []ContractDataOutput {

valDecoded := map[string]string{
"type": "B",
"value": "true",
"value": "{\"B\":true,\"Type\":0}",
}

return []ContractDataOutput{
Expand Down
42 changes: 32 additions & 10 deletions internal/transform/contract_events.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@ package transform

import (
"encoding/base64"
"encoding/json"
"fmt"

"github.com/stellar/stellar-etl/internal/toid"
"github.com/stellar/stellar-etl/internal/utils"
"github.com/stellar/stellar-etl/internal/xdr2json"

"github.com/stellar/go/ingest"
"github.com/stellar/go/strkey"
Expand Down Expand Up @@ -48,12 +50,18 @@ func TransformContractEvent(transaction ingest.LedgerTransaction, lhe xdr.Ledger
outputTypeString := event.Type.String()

eventTopics := getEventTopics(event.Body)
outputTopics, outputTopicsDecoded := serializeScValArray(eventTopics)
outputTopics, outputTopicsDecoded, err := serializeScValArray(eventTopics)
if err != nil {
return []ContractEventOutput{}, err
}
outputTopicsJson["topics"] = outputTopics
outputTopicsDecodedJson["topics_decoded"] = outputTopicsDecoded

eventData := getEventData(event.Body)
outputData, outputDataDecoded := serializeScVal(eventData)
outputData, outputDataDecoded, err := serializeScVal(eventData)
if err != nil {
return []ContractEventOutput{}, err
}

// Convert the xdrContactId to string
// TODO: https://stellarorg.atlassian.net/browse/HUBBLE-386 this should be a stellar/go/xdr function
Expand Down Expand Up @@ -117,7 +125,7 @@ func getEventData(eventBody xdr.ContractEventBody) xdr.ScVal {
}

// TODO this should also be used in the operations processor
func serializeScVal(scVal xdr.ScVal) (map[string]string, map[string]string) {
func serializeScVal(scVal xdr.ScVal) (map[string]string, map[string]string, error) {
serializedData := map[string]string{}
serializedData["value"] = "n/a"
serializedData["type"] = "n/a"
Expand All @@ -127,27 +135,41 @@ func serializeScVal(scVal xdr.ScVal) (map[string]string, map[string]string) {
serializedDataDecoded["type"] = "n/a"

if scValTypeName, ok := scVal.ArmForSwitch(int32(scVal.Type)); ok {
var err error
var raw []byte
var jsonMessage json.RawMessage
serializedData["type"] = scValTypeName
serializedDataDecoded["type"] = scValTypeName
if raw, err := scVal.MarshalBinary(); err == nil {
serializedData["value"] = base64.StdEncoding.EncodeToString(raw)
serializedDataDecoded["value"] = scVal.String()
raw, err = scVal.MarshalBinary()
if err != nil {
return nil, nil, err
}

serializedData["value"] = base64.StdEncoding.EncodeToString(raw)
jsonMessage, err = xdr2json.ConvertBytes(xdr.ScVal{}, raw)
if err != nil {
return nil, nil, err
}

serializedDataDecoded["value"] = string(jsonMessage)
Copy link
Contributor Author

@chowbao chowbao Jan 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the xdr2json. It works pretty well and will now output like

{
  "type": "Instance",
  "value": "{\"contract_instance\":{\"executable\":\"stellar_asset\",\"storage\":[{\"key\":{\"symbol\":\"METADATA\"},\"val\":{\"map\":[{\"key\":{\"symbol\":\"decimal\"},\"val\":{\"u32\":7}},{\"key\":{\"symbol\":\"name\"},\"val\":{\"string\":\"EURMTL:GACKTN5DAZGWXRWB2WLM6OPBDHAMT6SJNGLJZPQMEZBUR4JUGBX2UK7V\"}},{\"key\":{\"symbol\":\"symbol\"},\"val\":{\"string\":\"EURMTL\"}}]}},{\"key\":{\"vec\":[{\"symbol\":\"Admin\"}]},\"val\":{\"address\":\"GACKTN5DAZGWXRWB2WLM6OPBDHAMT6SJNGLJZPQMEZBUR4JUGBX2UK7V\"}},{\"key\":{\"vec\":[{\"symbol\":\"AssetInfo\"}]},\"val\":{\"vec\":[{\"symbol\":\"AlphaNum12\"},{\"map\":[{\"key\":{\"symbol\":\"asset_code\"},\"val\":{\"string\":\"EURMTL\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0\"}},{\"key\":{\"symbol\":\"issuer\"},\"val\":{\"bytes\":\"04a9b7a3064d6bc6c1d596cf39e119c0c9fa4969969cbe0c264348f134306faa\"}}]}]}}]}}"
}

How does this look now @sydneynotthecity @leighmcculloch

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah i like this better than cli. i don't like that we've copied over xdr2json and think that that should be published as its own package but that's outside the scope of this work.

do you think there is merit in us using xdr2json to parse operation details in the future? this looks much simpler to parse vs what we have to do today

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it depends. For the nested params (also ScVals) for invoke host function yes this makes sense. For some of the other operations where we do some mafhs or add extra non-xdr labels maybe 🤷‍♀️
I think we could technically redesign the operations.Details field as a whole too.

Definitely worth a spike though

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why does the value have two layers of JSON? The outer layer doesn't look like the canonical xdr-json format.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The outer layer is not. There's a few things that need to be reworked (mentioned it should be put back in draft).

I think it's debatable whether or not to remove that outer layer or not.

Pros with the extra Type/Value outer layer

  • Gives a fast/easy way for big query to filter by Type (e.g., I'm only looking for ScMaps)
  • The value in the outer layer Type is equivalent to ArmForSwitch(ScVal.Type) strings rather than the string names from xdr2json (e.g., Instance vs contract_instance). I don't know if there is an existing list of the type names like in the xdr const or ArmForSwitch()
  • iirc the JSON type in BQ is weird and can't pass the xdr2json json directly to the JSON type column in BQ. This is why we have like a BQ column named topics_decoded but the JSON inside also starts with topics_decoded meaning to access it you have to select topics_decoded.topics_decoded.<the json field you want>

Cons

  • Technically the JSON by itself will probably be good enough for anything the data team or anyone else wants to do.
  • 1 less column means storage/cost savings in BQ

One thing I should do is I think we can save it as a JSON instead of string(json). This will depend on how many schema changes and if it's okay to change to a generic interface{}.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI the json isn't stable between protocol versions. I mean it won't change unnecessarily, but xdr structure can change in ways that is binary backwards compatible but not structurally backwards compatible.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can't pass the xdr2json json directly to the JSON type column in BQ

Would be helpful to get some feedback about the structure of the json and what you'd change if anything.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI the json isn't stable between protocol versions. I mean it won't change unnecessarily, but xdr structure can change in ways that is binary backwards compatible but not structurally backwards compatible.

That's unfortunate but makes sense (aka unified event changes)

Yup I'll revise and update with the proposed structure on Monday for y'all's review. I'll add some examples SQL queries to show what it'll look like in BQ as well

Copy link
Contributor Author

@chowbao chowbao Jan 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thinking and playing with the ScVal column in BQ I think it makes sense to just make everything a generic interface (use whatever xdr2json spits out). The main reason I liked the outer layer was for easier filtering BUT I think that filtering ScVal types will be rare but still possible in BQ. And if it is a pain this change could be added downstream (dbt model for enriched contract events) keeping the history_contract_events table at its rawest form.

Another benefit is the human names for the ScVal types from xdr2json are nicer than the ArmForSwitch. Not sure why they diverged but I would say it'd be easy enough to make a new map/const/enum for the xdr2json names if needed (or I wonder if we can update ArmForSwitch).

Example traversing column in BQ is still the same except with the xdr2json names

select
  val_decoded.contract_instance.storage[0].val.map.symbol
from history_contract_events

Example filtering in BQ

select
  val_decoded
from history_contract_events
where true
  -- filter for only contract_instance isn't that difficult but less straightforward
  and json_extract(val_decoded, '$.contract_instance') is not null

Original filtering in BQ

select
  val_decoded
where true
  -- ArmForSwitch name for Type
  and val_decoded.Type in ("Instance")

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure why they diverged

There's a few things that the Rust xdr lib does that we didn't implement in other xdr libs:

  • Arm names are reduced to their unique text, so if they have a long prefix shared among them, that prefix disappears. Unfortunately this doesn't work for single value enums 😅
  • Anything representable by a strkey is rendered as the strkey instead of the structure it contains.
  • Asset codes are rendered as escaped ascii instead of hex, base64, or hope-for-the-best utf8.

}

return serializedData, serializedDataDecoded
return serializedData, serializedDataDecoded, nil
}

// TODO this should also be used in the operations processor
func serializeScValArray(scVals []xdr.ScVal) ([]map[string]string, []map[string]string) {
func serializeScValArray(scVals []xdr.ScVal) ([]map[string]string, []map[string]string, error) {
data := make([]map[string]string, 0, len(scVals))
dataDecoded := make([]map[string]string, 0, len(scVals))

for _, scVal := range scVals {
serializedData, serializedDataDecoded := serializeScVal(scVal)
serializedData, serializedDataDecoded, err := serializeScVal(scVal)
if err != nil {
return nil, nil, err
}
data = append(data, serializedData)
dataDecoded = append(dataDecoded, serializedDataDecoded)
}

return data, dataDecoded
return data, dataDecoded, nil
}
4 changes: 2 additions & 2 deletions internal/transform/contract_events_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ func makeContractEventTestOutput() (output [][]ContractEventOutput, err error) {
topicsDecoded["topics_decoded"] = []map[string]string{
{
"type": "B",
"value": "true",
"value": "{\"B\":true,\"Type\":0}",
chowbao marked this conversation as resolved.
Show resolved Hide resolved
},
}

Expand All @@ -68,7 +68,7 @@ func makeContractEventTestOutput() (output [][]ContractEventOutput, err error) {

dataDecoded := map[string]string{
"type": "B",
"value": "true",
"value": "{\"B\":true,\"Type\":0}",
}

output = [][]ContractEventOutput{{
Expand Down
90 changes: 90 additions & 0 deletions internal/xdr2json/conversion.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
//nolint:lll
package xdr2json

/*
// See preflight.go for add'l explanations:
// Note: no blank lines allowed.
#include <stdlib.h>
#include "../../lib/xdr2json.h"
#cgo windows,amd64 LDFLAGS: -L${SRCDIR}/../../target/x86_64-pc-windows-gnu/release-with-panic-unwind/ -lxdr2json -lntdll -static -lws2_32 -lbcrypt -luserenv
#cgo darwin,amd64 LDFLAGS: -L${SRCDIR}/../../target/x86_64-apple-darwin/release-with-panic-unwind/ -lxdr2json -ldl -lm
#cgo darwin,arm64 LDFLAGS: -L${SRCDIR}/../../target/aarch64-apple-darwin/release-with-panic-unwind/ -lxdr2json -ldl -lm
#cgo linux,amd64 LDFLAGS: -L${SRCDIR}/../../target/x86_64-unknown-linux-gnu/release-with-panic-unwind/ -lxdr2json -ldl -lm
#cgo linux,arm64 LDFLAGS: -L${SRCDIR}/../../target/aarch64-unknown-linux-gnu/release-with-panic-unwind/ -lxdr2json -ldl -lm
*/
import "C"

import (
"encoding"
"encoding/json"
"reflect"
"unsafe"

"github.com/pkg/errors"
)

// ConvertBytes takes an XDR object (`xdr`) and its serialized bytes (`field`)
// and returns the raw JSON-formatted serialization of that object.
// It can be unmarshalled to a proper JSON structure, but the raw bytes are
// returned to avoid unnecessary round-trips. If there is an
// error, it returns an empty string.
//
// The `xdr` object does not need to actually be initialized/valid:
// we only use it to determine the name of the structure. We could just
// accept a string, but that would make mistakes likelier than passing the
// structure itself (by reference).
func ConvertBytes(xdr interface{}, field []byte) (json.RawMessage, error) {
if len(field) == 0 {
return []byte(""), nil
}

xdrTypeName := reflect.TypeOf(xdr).Name()
return convertAnyBytes(xdrTypeName, field)
}

// ConvertInterface takes a valid XDR object (`xdr`) and returns
// the raw JSON-formatted serialization of that object. If there is an
// error, it returns an empty string.
//
// Unlike `ConvertBytes`, the value here needs to be valid and
// serializable.
func ConvertInterface(xdr encoding.BinaryMarshaler) (json.RawMessage, error) {
xdrTypeName := reflect.TypeOf(xdr).Name()
data, err := xdr.MarshalBinary()
if err != nil {
return []byte(""), errors.Wrapf(err, "failed to serialize XDR type '%s'", xdrTypeName)
}

return convertAnyBytes(xdrTypeName, data)
}

func convertAnyBytes(xdrTypeName string, field []byte) (json.RawMessage, error) {
var jsonStr, errStr string
// scope just added to show matching alloc/frees
{
goRawXdr := CXDR(field)
b := C.CString(xdrTypeName)

result := C.xdr_to_json(b, goRawXdr)
C.free(unsafe.Pointer(b))

jsonStr = C.GoString(result.json)
errStr = C.GoString(result.error)

C.free_conversion_result(result)
}

if errStr != "" {
return json.RawMessage(jsonStr), errors.New(errStr)
}

return json.RawMessage(jsonStr), nil
}

// CXDR is ripped directly from preflight.go to avoid a dependency.
func CXDR(xdr []byte) C.xdr_t {
return C.xdr_t{
xdr: (*C.uchar)(C.CBytes(xdr)),
len: C.size_t(len(xdr)),
}
}
47 changes: 47 additions & 0 deletions internal/xdr2json/conversion_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package xdr2json

import (
"encoding/json"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"github.com/stellar/go/keypair"
"github.com/stellar/go/xdr"
)

func TestConversion(t *testing.T) {
// Make a structure to encode
pubkey := keypair.MustRandom()
asset := xdr.MustNewCreditAsset("ABCD", pubkey.Address())

// Try the all-inclusive version
jsi, err := ConvertInterface(asset)
require.NoError(t, err)

// Try the byte-and-interface version
rawBytes, err := asset.MarshalBinary()
require.NoError(t, err)
jsb, err := ConvertBytes(xdr.Asset{}, rawBytes)
require.NoError(t, err)

for _, rawJs := range []json.RawMessage{jsi, jsb} {
var dest map[string]interface{}
require.NoError(t, json.Unmarshal(rawJs, &dest))

require.Contains(t, dest, "credit_alphanum4")
require.Contains(t, dest["credit_alphanum4"], "asset_code")
require.Contains(t, dest["credit_alphanum4"], "issuer")
require.IsType(t, map[string]interface{}{}, dest["credit_alphanum4"])
if converted, ok := dest["credit_alphanum4"].(map[string]interface{}); assert.True(t, ok) {
require.Equal(t, pubkey.Address(), converted["issuer"])
}
}
}

func TestEmptyConversion(t *testing.T) {
js, err := ConvertBytes(xdr.SorobanTransactionData{}, []byte{})
require.NoError(t, err)
require.Equal(t, "", string(js))
}
11 changes: 11 additions & 0 deletions lib/ffi/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[package]
name = "ffi"
version = "22.0.0"
publish = false
edition = "2021"

[lib]
crate-type = ["lib"]

[dependencies]
libc = { workspace = true }
Loading
Loading