-
Notifications
You must be signed in to change notification settings - Fork 979
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Apply buckets optimization #4634
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -120,7 +120,8 @@ template <class BucketT> | |||||
DiskIndex<BucketT>::DiskIndex(BucketManager& bm, | ||||||
std::filesystem::path const& filename, | ||||||
std::streamoff pageSize, Hash const& hash, | ||||||
asio::io_context& ctx) | ||||||
asio::io_context& ctx, | ||||||
std::optional<SHA256>& hasher) | ||||||
: mBloomLookupMeter(bm.getBloomLookupMeter<BucketT>()) | ||||||
, mBloomMissMeter(bm.getBloomMissMeter<BucketT>()) | ||||||
{ | ||||||
|
@@ -147,8 +148,9 @@ DiskIndex<BucketT>::DiskIndex(BucketManager& bm, | |||||
|
||||||
std::vector<uint64_t> keyHashes; | ||||||
auto seed = shortHash::getShortHashInitKey(); | ||||||
SHA256* hasherPtr = hasher.has_value() ? &hasher.value() : nullptr; | ||||||
|
||||||
while (in && in.readOne(be)) | ||||||
while (in && in.readOne(be, hasherPtr)) | ||||||
{ | ||||||
// peridocially check if bucket manager is exiting to stop indexing | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
// gracefully | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -50,7 +50,8 @@ InMemoryBucketState::scan(IterT start, LedgerKey const& searchKey) const | |
} | ||
|
||
InMemoryIndex::InMemoryIndex(BucketManager const& bm, | ||
std::filesystem::path const& filename) | ||
std::filesystem::path const& filename, | ||
std::optional<SHA256>& hasher) | ||
{ | ||
XDRInputFileStream in; | ||
in.open(filename.string()); | ||
|
@@ -59,8 +60,9 @@ InMemoryIndex::InMemoryIndex(BucketManager const& bm, | |
std::streamoff lastOffset = 0; | ||
std::optional<std::streamoff> firstOffer; | ||
std::optional<std::streamoff> lastOffer; | ||
SHA256* hasherPtr = hasher.has_value() ? &hasher.value() : nullptr; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: Why not pass the pointer all around? Pointer is still the best we have now for |
||
|
||
while (in && in.readOne(be)) | ||
while (in && in.readOne(be, hasherPtr)) | ||
{ | ||
if (++iter >= 1000) | ||
{ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,6 +6,7 @@ | |
#include "bucket/BucketManager.h" | ||
#include "bucket/DiskIndex.h" | ||
#include "bucket/LiveBucket.h" | ||
#include "crypto/SHA.h" | ||
#include "util/Fs.h" | ||
#include "util/Logging.h" | ||
#include "util/UnorderedSet.h" | ||
|
@@ -86,9 +87,10 @@ IndexBucketsWork::IndexWork::postWork() | |
if (!self->mIndex) | ||
{ | ||
// TODO: Fix this when archive BucketLists assume state | ||
self->mIndex = | ||
createIndex<LiveBucket>(bm, self->mBucket->getFilename(), | ||
self->mBucket->getHash(), ctx); | ||
std::optional<SHA256> empty{}; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not pass |
||
self->mIndex = createIndex<LiveBucket>( | ||
bm, self->mBucket->getFilename(), self->mBucket->getHash(), | ||
ctx, empty); | ||
} | ||
|
||
app.postOnMainThread( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,7 @@ | |
// of this distribution or at http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
#include "historywork/VerifyBucketWork.h" | ||
#include "bucket/LiveBucketIndex.h" | ||
#include "crypto/Hex.h" | ||
#include "crypto/SHA.h" | ||
#include "main/Application.h" | ||
|
@@ -20,13 +21,13 @@ | |
namespace stellar | ||
{ | ||
|
||
VerifyBucketWork::VerifyBucketWork(Application& app, | ||
std::string const& bucketFile, | ||
uint256 const& hash, | ||
OnFailureCallback failureCb) | ||
VerifyBucketWork::VerifyBucketWork( | ||
Application& app, std::string const& bucketFile, uint256 const& hash, | ||
std::unique_ptr<LiveBucketIndex const>& index, OnFailureCallback failureCb) | ||
: BasicWork(app, "verify-bucket-hash-" + bucketFile, BasicWork::RETRY_NEVER) | ||
, mBucketFile(bucketFile) | ||
, mHash(hash) | ||
, mIndex(index) | ||
, mOnFailure(failureCb) | ||
{ | ||
} | ||
|
@@ -57,8 +58,8 @@ VerifyBucketWork::spawnVerifier() | |
std::weak_ptr<VerifyBucketWork> weak( | ||
std::static_pointer_cast<VerifyBucketWork>(shared_from_this())); | ||
app.postOnBackgroundThread( | ||
[&app, filename, weak, hash]() { | ||
SHA256 hasher; | ||
[&app, filename, weak, hash, &index = mIndex]() { | ||
auto hasher = std::make_optional<SHA256>(); | ||
asio::error_code ec; | ||
|
||
// No point in verifying buckets if things are shutting down | ||
|
@@ -71,24 +72,15 @@ VerifyBucketWork::spawnVerifier() | |
try | ||
{ | ||
ZoneNamedN(verifyZone, "bucket verify", true); | ||
CLOG_INFO(History, "Verifying bucket {}", binToHex(hash)); | ||
CLOG_INFO(History, "Verifying and indexing bucket {}", | ||
binToHex(hash)); | ||
|
||
// ensure that the stream gets its own scope to avoid race with | ||
// main thread | ||
std::ifstream in(filename, std::ifstream::binary); | ||
if (!in) | ||
{ | ||
throw std::runtime_error(fmt::format( | ||
FMT_STRING("Error opening file {}"), filename)); | ||
} | ||
in.exceptions(std::ios::badbit); | ||
char buf[4096]; | ||
while (in) | ||
{ | ||
in.read(buf, sizeof(buf)); | ||
hasher.add(ByteSlice(buf, in.gcount())); | ||
} | ||
uint256 vHash = hasher.finish(); | ||
index = createIndex<LiveBucket>( | ||
app.getBucketManager(), filename, hash, | ||
app.getWorkerIOContext(), hasher); | ||
releaseAssertOrThrow(index); | ||
|
||
uint256 vHash = hasher->finish(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nitpick: Assert |
||
if (vHash == hash) | ||
{ | ||
CLOG_DEBUG(History, "Verified hash ({}) for {}", | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm a little confused about this. It looks like
VerifyBucketWork
stores a reference to a unique pointer and this map exists to ensure that by the time that task runs, the pointee hasn't been deallocated due to this function going out of scope. Why not haveVerifyBucketWork
take ownership of the pointer completely and get rid of this map?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
+1, this seems sketchy