Skip to content

Commit

Permalink
Track Jemalloc external fragmentation to bound the cache
Browse files Browse the repository at this point in the history
Summary:
Currently, size-awareness isn't tracking Jemalloc external fragmentation. And a bad external fragmentation can potentially cause OOM issue.

In this diff, we utilize two Jemalloc counters `stats.allocated` and `stats.active` to track the external fragmentation:
```
external fragmentation = stats.active - stats.allocated.
```
the total active object size will be:
```
totalActiveObjSize = totalAllocatedObjSize / stats.allocated * stats.active
```
Also add two Jemalloc counters:
-  `objcache.jemalloc_active_bytes`
- `objcache.jemalloc_allocated_bytes`

Reviewed By: therealgymmy, jaesoo-fb

Differential Revision: D45460371

fbshipit-source-id: 71b5bc261581d9cc0c2f914237aa3766bf54ff12
  • Loading branch information
Jiayue Bao authored and facebook-github-bot committed May 8, 2023
1 parent b2785af commit 94907cc
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 0 deletions.
3 changes: 3 additions & 0 deletions cachelib/experimental/objcache2/ObjectCache-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,9 @@ void ObjectCache<AllocatorT>::getObjectCacheCounters(
visitor("objcache.evictions", evictions_.get(),
util::CounterVisitor::CounterType::RATE);
visitor("objcache.object_size_bytes", getTotalObjectSize());
if (sizeController_) {
sizeController_->getCounters(visitor);
}
}

template <typename AllocatorT>
Expand Down
19 changes: 19 additions & 0 deletions cachelib/experimental/objcache2/ObjectCacheConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,9 @@ struct ObjectCacheConfig {
SerializeCb serializeCallback,
DeserializeCb deserializeCallback);

// Enable tracking Jemalloc external fragmentation.
ObjectCacheConfig& enableFragmentationTracking();

ObjectCacheConfig& setItemReaperInterval(std::chrono::milliseconds interval);

ObjectCacheConfig& setEvictionPolicyConfig(
Expand Down Expand Up @@ -164,6 +167,10 @@ struct ObjectCacheConfig {
// If this is enabled, user has to pass the object size upon insertion
bool objectSizeTrackingEnabled{false};

// If this is enabled, we will track Jemalloc external fragmentation and add
// the fragmentation bytes on top of total object size to bound the cache
bool fragmentationTrackingEnabled{false};

// Period to fire size controller in milliseconds. 0 means size controller is
// disabled.
int sizeControllerIntervalMs{0};
Expand Down Expand Up @@ -278,6 +285,12 @@ ObjectCacheConfig<T>& ObjectCacheConfig<T>::setSizeControllerThrottlerConfig(
return *this;
}

template <typename T>
ObjectCacheConfig<T>& ObjectCacheConfig<T>::enableFragmentationTracking() {
fragmentationTrackingEnabled = true;
return *this;
}

template <typename T>
ObjectCacheConfig<T>& ObjectCacheConfig<T>::setEventTracker(
EventTrackerSharedPtr&& ptr) {
Expand Down Expand Up @@ -394,6 +407,12 @@ const ObjectCacheConfig<T>& ObjectCacheConfig<T>::validate() const {
"Only one of sizeControllerIntervalMs and cacheSizeLimit is set");
}
}

if (fragmentationTrackingEnabled && !objectSizeTrackingEnabled) {
throw std::invalid_argument(
"Object size tracking has to be enabled to have fragmentation "
"tracking");
}
return *this;
}

Expand Down
21 changes: 21 additions & 0 deletions cachelib/experimental/objcache2/ObjectCacheSizeController-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,16 @@ void ObjectCacheSizeController<AllocatorT>::work() {
return;
}
auto totalObjSize = objCache_.getTotalObjectSize();
if (objCache_.config_.fragmentationTrackingEnabled &&
folly::usingJEMalloc()) {
auto [jemallocAllocatedBytes, jemallocActiveBytes] =
trackJemallocMemStats();
// proportionally add Jemalloc external fragmentation bytes (i.e.
// jemallocActiveBytes - jemallocAllocatedBytes)
totalObjSize = static_cast<size_t>(
1.0 * totalObjSize / jemallocAllocatedBytes * jemallocActiveBytes);
}

// Do the calculation only when total object size or total object number
// achieves the threshold. This is to avoid unreliable calculation of average
// object size when the cache is new and only has a few objects.
Expand Down Expand Up @@ -107,6 +117,17 @@ void ObjectCacheSizeController<AllocatorT>::expandCacheByEntriesNum(
entries, before, objCache_.getNumPlaceholders(), currentEntriesLimit_);
}

template <typename AllocatorT>
void ObjectCacheSizeController<AllocatorT>::getCounters(
const util::CounterVisitor& visitor) const {
if (folly::usingJEMalloc()) {
auto [jemallocAllocatedBytes, jemallocActiveBytes] =
trackJemallocMemStats();
visitor("objcache.jemalloc_active_bytes", jemallocActiveBytes);
visitor("objcache.jemalloc_allocated_bytes", jemallocAllocatedBytes);
}
}

template <typename AllocatorT>
ObjectCacheSizeController<AllocatorT>::ObjectCacheSizeController(
ObjectCache& objCache, const util::Throttler::Config& throttlerConfig)
Expand Down
16 changes: 16 additions & 0 deletions cachelib/experimental/objcache2/ObjectCacheSizeController.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@

#pragma once

#include <folly/memory/Malloc.h>

#include "cachelib/common/PeriodicWorker.h"

namespace facebook {
Expand All @@ -35,12 +37,26 @@ class ObjectCacheSizeController : public PeriodicWorker {
return currentEntriesLimit_.load(std::memory_order_relaxed);
}

void getCounters(const util::CounterVisitor& visitor) const;

private:
void work() override final;

void shrinkCacheByEntriesNum(size_t entries);
void expandCacheByEntriesNum(size_t entries);

std::pair<size_t, size_t> trackJemallocMemStats() const {
size_t jemallocAllocatedBytes;
size_t jemallocActiveBytes;
size_t epoch = 1;
size_t sz;
sz = sizeof(size_t);
mallctl("epoch", nullptr, nullptr, &epoch, sizeof(epoch));
mallctl("stats.allocated", &jemallocAllocatedBytes, &sz, nullptr, 0);
mallctl("stats.active", &jemallocActiveBytes, &sz, nullptr, 0);
return {jemallocAllocatedBytes, jemallocActiveBytes};
}

// threshold in percentage to determine whether the size-controller should do
// the calculation
const size_t kSizeControllerThresholdPct = 50;
Expand Down

0 comments on commit 94907cc

Please sign in to comment.