From 5ac38c506200322fa7e2cafcefe697537f9e77f5 Mon Sep 17 00:00:00 2001 From: Aman Mangal Date: Wed, 8 Mar 2023 01:22:38 +0530 Subject: [PATCH] opt(snapshot): use full table copy when streaming the entire data (#7870) When streaming the entire data in the snapshot (snap.SinceTs=0), we can do an entire table copy instead of iterating over the KVs. This brings about 3x performance improvement, as well as the CPU of the sender, is idle. Refer dgraph-io/badger#1700 for more details. --- go.mod | 2 +- go.sum | 4 ++-- worker/snapshot.go | 5 +++++ 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index cbf027325fc..e42354ce795 100644 --- a/go.mod +++ b/go.mod @@ -10,7 +10,7 @@ require ( github.com/Masterminds/semver/v3 v3.1.0 github.com/Shopify/sarama v1.27.2 github.com/blevesearch/bleve v1.0.13 - github.com/dgraph-io/badger/v4 v4.1.0 + github.com/dgraph-io/badger/v4 v4.0.2-0.20230607143725-32230b14274f github.com/dgraph-io/dgo/v230 v230.0.1 github.com/dgraph-io/gqlgen v0.13.2 github.com/dgraph-io/gqlparser/v2 v2.2.1 diff --git a/go.sum b/go.sum index ad45126e357..18838deeb89 100644 --- a/go.sum +++ b/go.sum @@ -145,8 +145,8 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dgraph-io/badger v1.6.0/go.mod h1:zwt7syl517jmP8s94KqSxTlM6IMsdhYy6psNgSztDR4= -github.com/dgraph-io/badger/v4 v4.1.0 h1:E38jc0f+RATYrycSUf9LMv/t47XAy+3CApyYSq4APOQ= -github.com/dgraph-io/badger/v4 v4.1.0/go.mod h1:P50u28d39ibBRmIJuQC/NSdBOg46HnHw7al2SW5QRHg= +github.com/dgraph-io/badger/v4 v4.0.2-0.20230607143725-32230b14274f h1:WbfxvJEWWFRuQ/rMvV1AVok9A9HjV76KeUnR/lEpHnk= +github.com/dgraph-io/badger/v4 v4.0.2-0.20230607143725-32230b14274f/go.mod h1:P50u28d39ibBRmIJuQC/NSdBOg46HnHw7al2SW5QRHg= github.com/dgraph-io/dgo/v230 v230.0.1 h1:kR7gI7/ZZv0jtG6dnedNgNOCxe1cbSG8ekF+pNfReks= github.com/dgraph-io/dgo/v230 v230.0.1/go.mod h1:5FerO2h4LPOxR2XTkOAtqUUPaFdQ+5aBOHXPBJ3nT10= github.com/dgraph-io/gqlgen v0.13.2 h1:TNhndk+eHKj5qE7BenKKSYdSIdOGhLqxR1rCiMso9KM= diff --git a/worker/snapshot.go b/worker/snapshot.go index f75130ff730..72b71f2ee40 100644 --- a/worker/snapshot.go +++ b/worker/snapshot.go @@ -210,6 +210,11 @@ func doStreamSnapshot(snap *pb.Snapshot, out pb.Worker_StreamSnapshotServer) err // Use the default implementation. We no longer try to generate a rolled up posting list here. // Instead, we just stream out all the versions as they are. stream.KeyToList = nil + stream.SinceTs = snap.SinceTs + if snap.SinceTs == 0 { + // Do full table copy when streaming the entire data. + stream.FullCopy = true + } stream.Send = func(buf *z.Buffer) error { kvs := &pb.KVS{Data: buf.Bytes()} return out.Send(kvs)