From fa51d44e4cdc43b22af51331f6c49c79b9ec8129 Mon Sep 17 00:00:00 2001 From: Aman Mangal Date: Wed, 8 Mar 2023 01:22:38 +0530 Subject: [PATCH] opt(snapshot): use full table copy when streaming the entire data (#7870) When streaming the entire data in the snapshot (snap.SinceTs=0), we can do an entire table copy instead of iterating over the KVs. This brings about 3x performance improvement, as well as the CPU of the sender, is idle. Refer dgraph-io/badger#1700 for more details. --- go.mod | 2 +- go.sum | 4 ++-- worker/snapshot.go | 5 +++++ 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 65080e1966e..f3be3f213a5 100644 --- a/go.mod +++ b/go.mod @@ -10,7 +10,7 @@ require ( github.com/Masterminds/semver/v3 v3.1.0 github.com/Shopify/sarama v1.27.2 github.com/blevesearch/bleve v1.0.13 - github.com/dgraph-io/badger/v4 v4.1.0 + github.com/dgraph-io/badger/v4 v4.0.2-0.20230517044213-e39ebb5bc915 github.com/dgraph-io/dgo/v230 v230.0.1-rc1 github.com/dgraph-io/gqlgen v0.13.2 github.com/dgraph-io/gqlparser/v2 v2.2.1 diff --git a/go.sum b/go.sum index 0b34aa04252..8784d7b2cd3 100644 --- a/go.sum +++ b/go.sum @@ -157,8 +157,8 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dgraph-io/badger v1.6.0/go.mod h1:zwt7syl517jmP8s94KqSxTlM6IMsdhYy6psNgSztDR4= -github.com/dgraph-io/badger/v4 v4.1.0 h1:E38jc0f+RATYrycSUf9LMv/t47XAy+3CApyYSq4APOQ= -github.com/dgraph-io/badger/v4 v4.1.0/go.mod h1:P50u28d39ibBRmIJuQC/NSdBOg46HnHw7al2SW5QRHg= +github.com/dgraph-io/badger/v4 v4.0.2-0.20230517044213-e39ebb5bc915 h1:wRQYXpjlGp2LfQtllJHNtgXJU8Plx/GIEiNCO0dg0dM= +github.com/dgraph-io/badger/v4 v4.0.2-0.20230517044213-e39ebb5bc915/go.mod h1:P50u28d39ibBRmIJuQC/NSdBOg46HnHw7al2SW5QRHg= github.com/dgraph-io/dgo/v230 v230.0.1-rc1 h1:ppvzfijQQ9o4xQDoODLVTLQrOX1eC/aTSkZ1A71BcxM= github.com/dgraph-io/dgo/v230 v230.0.1-rc1/go.mod h1:5FerO2h4LPOxR2XTkOAtqUUPaFdQ+5aBOHXPBJ3nT10= github.com/dgraph-io/gqlgen v0.13.2 h1:TNhndk+eHKj5qE7BenKKSYdSIdOGhLqxR1rCiMso9KM= diff --git a/worker/snapshot.go b/worker/snapshot.go index f75130ff730..72b71f2ee40 100644 --- a/worker/snapshot.go +++ b/worker/snapshot.go @@ -210,6 +210,11 @@ func doStreamSnapshot(snap *pb.Snapshot, out pb.Worker_StreamSnapshotServer) err // Use the default implementation. We no longer try to generate a rolled up posting list here. // Instead, we just stream out all the versions as they are. stream.KeyToList = nil + stream.SinceTs = snap.SinceTs + if snap.SinceTs == 0 { + // Do full table copy when streaming the entire data. + stream.FullCopy = true + } stream.Send = func(buf *z.Buffer) error { kvs := &pb.KVS{Data: buf.Bytes()} return out.Send(kvs)