Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FAISS with cuVS enabled in cuvs-bench #561

Merged
merged 33 commits into from
Jan 29, 2025
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
689716c
fix
tarang-jain Dec 19, 2024
8e3a02e
updates
tarang-jain Jan 6, 2025
27dda5f
Merge branch 'branch-25.02' of https://github.com/rapidsai/cuvs into …
tarang-jain Jan 6, 2025
10618fb
Merge branch 'branch-25.02' of https://github.com/rapidsai/cuvs into …
tarang-jain Jan 8, 2025
c0ff9ba
more changes
tarang-jain Jan 8, 2025
21f92fd
working
tarang-jain Jan 9, 2025
20e462c
update yaml
tarang-jain Jan 10, 2025
3f86225
Merge branch 'branch-25.02' of https://github.com/rapidsai/cuvs into …
tarang-jain Jan 13, 2025
8d098bd
update configs
tarang-jain Jan 14, 2025
f8beec6
cagra wrapper
tarang-jain Jan 15, 2025
63ecfde
Merge branch 'branch-25.02' into faiss-cuvs
tarang-jain Jan 15, 2025
c37f028
cagra wrapper
tarang-jain Jan 21, 2025
1cb5b73
merge upstream
tarang-jain Jan 22, 2025
61fd247
Merge branch 'branch-25.02' of https://github.com/rapidsai/cuvs into …
tarang-jain Jan 22, 2025
6d8c396
Merge branch 'faiss-cuvs' of https://github.com/tarang-jain/cuvs into…
tarang-jain Jan 22, 2025
c11c317
automatic pool resource management
tarang-jain Jan 22, 2025
25765ed
rm algo-specific in benchmark.hpp
tarang-jain Jan 22, 2025
26fbe30
refine
tarang-jain Jan 22, 2025
508bd5c
Merge branch 'branch-25.02' into faiss-cuvs
tarang-jain Jan 22, 2025
e4d97db
kHost queries
tarang-jain Jan 22, 2025
fed8c3b
raft_log_level
tarang-jain Jan 22, 2025
52eac3b
cleanup
tarang-jain Jan 22, 2025
9066e2d
rm raft::raft
tarang-jain Jan 23, 2025
bc5714f
Merge branch 'branch-25.02' of https://github.com/rapidsai/cuvs into …
tarang-jain Jan 23, 2025
986f476
conditionally link raft::raft
tarang-jain Jan 23, 2025
ddd711a
Merge branch 'branch-25.02' of https://github.com/rapidsai/cuvs into …
tarang-jain Jan 23, 2025
b3511fa
conditionally link raft::raft only for faiss_gpu
tarang-jain Jan 24, 2025
880966a
Merge branch 'branch-25.02' of https://github.com/rapidsai/cuvs into …
tarang-jain Jan 24, 2025
4a82d13
inherit faiss_gpu
tarang-jain Jan 27, 2025
18d1e62
raft::raft in invidual targets
tarang-jain Jan 27, 2025
4b07e90
merge upstream
tarang-jain Jan 27, 2025
6cffe02
style
tarang-jain Jan 27, 2025
d288548
Merge branch 'branch-25.02' into faiss-cuvs
tarang-jain Jan 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/bench/ann/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ function(ConfigureAnnBench)
target_link_libraries(
${BENCH_NAME}
PRIVATE ${ConfigureAnnBench_LINKS}
raft::raft
nlohmann_json::nlohmann_json
Threads::Threads
$<TARGET_NAME_IF_EXISTS:raft::raft_logger>
Expand Down
8 changes: 8 additions & 0 deletions cpp/bench/ann/src/common/benchmark.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,14 @@ void register_search(std::shared_ptr<const dataset<T>> dataset,
->MeasureProcessCPUTime()
->UseRealTime();

if (metric_objective == Mode::kThroughput) {
tarang-jain marked this conversation as resolved.
Show resolved Hide resolved
if (index.algo.find("faiss_gpu") != std::string::npos) {
tarang-jain marked this conversation as resolved.
Show resolved Hide resolved
log_warn(
"FAISS GPU does not work in throughput mode because the underlying "
"StandardGpuResources object is not thread-safe. This might give unexpected results");
}
b->ThreadRange(threads[0], threads[1]);
}
if (metric_objective == Mode::kThroughput) { b->ThreadRange(threads[0], threads[1]); }
tarang-jain marked this conversation as resolved.
Show resolved Hide resolved
}
}
Expand Down
30 changes: 29 additions & 1 deletion cpp/bench/ann/src/faiss/faiss_gpu_benchmark.cu
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ void parse_build_param(const nlohmann::json& conf,
typename cuvs::bench::faiss_gpu_ivf_flat<T>::build_param& param)
{
parse_base_build_param<T>(conf, param);
if (conf.contains("use_cuvs")) {
param.use_cuvs = conf.at("use_cuvs");
} else {
param.use_cuvs = false;
}
}

template <typename T>
Expand All @@ -60,6 +65,16 @@ void parse_build_param(const nlohmann::json& conf,
} else {
param.use_float16 = false;
}
if (conf.contains("use_cuvs")) {
param.use_cuvs = conf.at("use_cuvs");
} else {
param.use_cuvs = false;
}
if (conf.contains("bitsPerCode")) {
param.bitsPerCode = conf.at("bitsPerCode");
} else {
param.bitsPerCode = 8;
}
}

template <typename T>
Expand Down Expand Up @@ -138,5 +153,18 @@ REGISTER_ALGO_INSTANCE(std::uint8_t);

#ifdef ANN_BENCH_BUILD_MAIN
#include "../common/benchmark.hpp"
int main(int argc, char** argv) { return cuvs::bench::run_main(argc, argv); }
int main(int argc, char** argv)
{
rmm::mr::cuda_memory_resource cuda_mr;
// Construct a resource that uses a coalescing best-fit pool allocator
// and is initially sized to half of free device memory.
rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource> pool_mr{
&cuda_mr, rmm::percent_of_free_device_memory(50)};
// Updates the current device resource pointer to `pool_mr`
auto old_mr = rmm::mr::set_current_device_resource(&pool_mr);
auto ret = cuvs::bench::run_main(argc, argv);
// Restores the current device resource pointer to its previous value
rmm::mr::set_current_device_resource(old_mr);
return ret;
}
tarang-jain marked this conversation as resolved.
Show resolved Hide resolved
#endif
99 changes: 82 additions & 17 deletions cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

#include "../common/ann_types.hpp"
#include "../common/util.hpp"
#include <cuvs/neighbors/refine.hpp>
#include "../cuvs/cuvs_ann_bench_utils.h"

#include <faiss/IndexFlat.h>
#include <faiss/IndexIVFFlat.h>
Expand All @@ -32,6 +34,10 @@
#include <faiss/index_io.h>
#include <omp.h>

#include <raft/core/device_mdarray.hpp>
#include <raft/core/device_resources.hpp>
#include <raft/core/host_mdarray.hpp>

#include <cassert>
#include <iostream>
#include <memory>
Expand All @@ -41,7 +47,7 @@

namespace {

auto parse_metric_type(cuvs::bench::Metric metric) -> faiss::MetricType
auto parse_metric_faiss(cuvs::bench::Metric metric) -> faiss::MetricType
{
if (metric == cuvs::bench::Metric::kInnerProduct) {
return faiss::METRIC_INNER_PRODUCT;
Expand Down Expand Up @@ -93,7 +99,7 @@ class faiss_gpu : public algo<T>, public algo_gpu {
faiss_gpu(Metric metric, int dim, const build_param& param)
: algo<T>(metric, dim),
gpu_resource_{std::make_shared<faiss::gpu::StandardGpuResources>()},
metric_type_(parse_metric_type(metric)),
metric_type_(parse_metric_faiss(metric)),
nlist_{param.nlist},
training_sample_fraction_{1.0 / double(param.ratio)}
{
Expand Down Expand Up @@ -160,6 +166,7 @@ class faiss_gpu : public algo<T>, public algo_gpu {
int device_;
double training_sample_fraction_;
std::shared_ptr<faiss::SearchParameters> search_params_;
std::shared_ptr<faiss::IndexRefineSearchParameters> refine_search_params_{nullptr};
const T* dataset_;
float refine_ratio_ = 1.0;
};
Expand Down Expand Up @@ -199,19 +206,65 @@ template <typename T>
void faiss_gpu<T>::search(
const T* queries, int batch_size, int k, algo_base::index_type* neighbors, float* distances) const
{
// ASSERT(Mode::kLatency, "l2Knn: rowMajorIndex and rowMajorQuery should have same layout");
using IdxT = faiss::idx_t;
static_assert(sizeof(size_t) == sizeof(faiss::idx_t),
"sizes of size_t and faiss::idx_t are different");

if (this->refine_ratio_ > 1.0) {
// TODO(snanditale): FAISS changed their search APIs to accept the search parameters as a struct
// object but their refine API doesn't allow the struct to be passed in. Once this is fixed, we
// need to re-enable refinement below
// index_refine_->search(batch_size, queries, k, distances,
// reinterpret_cast<faiss::idx_t*>(neighbors), this->search_params_.get()); Related FAISS issue:
// https://github.com/facebookresearch/faiss/issues/3118
throw std::runtime_error(
"FAISS doesn't support refinement in their new APIs so this feature is disabled in the "
"benchmarks for the time being.");
if (refine_ratio_ > 1.0) {
if (raft::get_device_for_address(queries) >= 0) {
uint32_t k0 = static_cast<uint32_t>(refine_ratio_ * k);
auto distances_tmp = raft::make_device_matrix<float, IdxT>(
gpu_resource_->getRaftHandle(device_), batch_size, k0);
auto candidates =
raft::make_device_matrix<IdxT, IdxT>(gpu_resource_->getRaftHandle(device_), batch_size, k0);
index_->search(batch_size,
queries,
k0,
distances_tmp.data_handle(),
candidates.data_handle(),
this->search_params_.get());

auto queries_host = raft::make_host_matrix<T, IdxT>(batch_size, index_->d);
auto candidates_host = raft::make_host_matrix<IdxT, IdxT>(batch_size, k0);
auto neighbors_host = raft::make_host_matrix<IdxT, IdxT>(batch_size, k);
auto distances_host = raft::make_host_matrix<float, IdxT>(batch_size, k);
auto dataset_v = raft::make_host_matrix_view<const T, faiss::idx_t>(
this->dataset_, index_->ntotal, index_->d);

raft::device_resources handle_ = gpu_resource_->getRaftHandle(device_);

raft::copy(queries_host.data_handle(), queries, queries_host.size(), handle_.get_stream());
raft::copy(candidates_host.data_handle(),
candidates.data_handle(),
candidates_host.size(),
handle_.get_stream());

// wait for the queries to copy to host in 'stream`
handle_.sync_stream();

cuvs::neighbors::refine(handle_,
dataset_v,
queries_host.view(),
candidates_host.view(),
neighbors_host.view(),
distances_host.view(),
parse_metric_type(this->metric_));

raft::copy(neighbors,
neighbors_host.data_handle(),
neighbors_host.size(),
handle_.get_stream());
raft::copy(
distances, distances_host.data_handle(), distances_host.size(), handle_.get_stream());
} else {
index_refine_->search(batch_size,
queries,
k,
distances,
reinterpret_cast<faiss::idx_t*>(neighbors),
this->refine_search_params_.get());
}
} else {
index_->search(batch_size,
queries,
Expand Down Expand Up @@ -253,15 +306,18 @@ void faiss_gpu<T>::load_(const std::string& file)
template <typename T>
class faiss_gpu_ivf_flat : public faiss_gpu<T> {
public:
using typename faiss_gpu<T>::build_param;
struct build_param : public faiss_gpu<T>::build_param {
bool use_cuvs;
};
using typename faiss_gpu<T>::search_param_base;

faiss_gpu_ivf_flat(Metric metric, int dim, const build_param& param)
: faiss_gpu<T>(metric, dim, param)
{
faiss::gpu::GpuIndexIVFFlatConfig config;
config.device = this->device_;
this->index_ = std::make_shared<faiss::gpu::GpuIndexIVFFlat>(
config.device = this->device_;
config.use_cuvs = param.use_cuvs;
this->index_ = std::make_shared<faiss::gpu::GpuIndexIVFFlat>(
this->gpu_resource_.get(), dim, param.nlist, this->metric_type_, config);
}

Expand Down Expand Up @@ -298,6 +354,8 @@ class faiss_gpu_ivfpq : public faiss_gpu<T> {
int m;
bool use_float16;
bool use_precomputed;
bool use_cuvs;
int bitsPerCode;
};
using typename faiss_gpu<T>::search_param_base;

Expand All @@ -307,14 +365,16 @@ class faiss_gpu_ivfpq : public faiss_gpu<T> {
faiss::gpu::GpuIndexIVFPQConfig config;
config.useFloat16LookupTables = param.use_float16;
config.usePrecomputedTables = param.use_precomputed;
config.device = this->device_;
config.use_cuvs = param.use_cuvs;
if (param.use_cuvs) { config.interleavedLayout = param.use_cuvs; }
config.device = this->device_;

this->index_ =
std::make_shared<faiss::gpu::GpuIndexIVFPQ>(this->gpu_resource_.get(),
dim,
param.nlist,
param.m,
8, // FAISS only supports bitsPerCode=8
param.bitsPerCode,
this->metric_type_,
config);
}
Expand All @@ -334,6 +394,11 @@ class faiss_gpu_ivfpq : public faiss_gpu<T> {
this->index_refine_ =
std::make_shared<faiss::IndexRefineFlat>(this->index_.get(), this->dataset_);
this->index_refine_.get()->k_factor = sp.refine_ratio;
faiss::IndexRefineSearchParameters faiss_refine_search_params;
faiss_refine_search_params.k_factor = this->index_refine_.get()->k_factor;
faiss_refine_search_params.base_index_params = this->search_params_.get();
this->refine_search_params_ =
std::make_unique<faiss::IndexRefineSearchParameters>(faiss_refine_search_params);
}
}

Expand Down
1 change: 1 addition & 0 deletions cpp/cmake/thirdparty/get_faiss.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ function(find_and_configure_faiss)
EXCLUDE_FROM_ALL ${exclude}
OPTIONS
"FAISS_ENABLE_GPU ${PKG_ENABLE_GPU}"
"FAISS_ENABLE_CUVS ${PKG_ENABLE_GPU}"
"FAISS_ENABLE_PYTHON OFF"
"FAISS_OPT_LEVEL ${CUVS_FAISS_OPT_LEVEL}"
"FAISS_USE_CUDA_TOOLKIT_STATIC ${CUDA_STATIC_RUNTIME}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,7 @@ def faiss_gpu_ivf_pq_build(params, dims):
ret = params["M"] <= dims and dims % params["M"] == 0
if "use_cuvs" in params and params["use_cuvs"]:
return ret
pq_bits = 8
if "bitsPerCode" in params:
pq_bits = params["bitsPerCode"]
pq_bits = params.get("bitsPerCode", 8)
lookup_table_size = 4
if "useFloat16" in params and params["useFloat16"]:
lookup_table_size = 2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,17 @@ groups:
nlist: [2048]
ratio: [10]
useFloat16: [False, True]
use_raft: [False]
use_cuvs: [False]
search:
nprobe: [1, 5, 10, 50, 100, 200]
refine_ratio: [1]
groups:
baseraft:
basecuvs:
build:
nlist: [2048]
ratio: [10]
useFloat16: [False, True]
use_raft: [True]
use_cuvs: [True]
search:
nprobe: [1, 5, 10, 50, 100, 200]
refine_ratio: [1]
57 changes: 28 additions & 29 deletions python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_ivf_pq.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,71 +6,70 @@ groups:
base:
build:
nlist: [1024, 2048, 4096, 8192]
M: [64, 32, 16]
ratio: [10]
M: [64, 96]
ratio: [4]
usePrecomputed: [False, True]
useFloat16: [False, True]
use_raft: [False]
use_cuvs: [False]
bitsPerCode: [8]
search:
nprobe: [1, 5, 10, 50, 100, 200]
nprobe: [10, 50, 100, 200]
refine_ratio: [1, 2, 4]
baseraft:
basecuvs:
build:
nlist: [1024, 2048, 4096, 8192]
M: [64, 32, 16]
ratio: [10]
M: [96, 192, 384]
ratio: [4]
usePrecomputed: [False]
useFloat16: [False, True]
use_raft: [True]
bitsPerCode: [8, 6, 5, 4]
use_cuvs: [True]
bitsPerCode: [8]
search:
nprobe: [1, 5, 10, 50, 100, 200]
nprobe: [10, 50, 100, 200]
refine_ratio: [1, 2, 4]
large:
build:
nlist: [8192, 16384, 32768, 65536]
M: [48, 32, 16]
nlist: [16384, 32768, 65536]
M: [64, 96]
ratio: [4]
usePrecomputed: [False, True]
useFloat16: [False, True]
use_raft: [False]
useFloat16: [True]
use_cuvs: [False]
bitsPerCode: [8]
search:
nprobe: [20, 30, 40, 50, 100, 200, 500, 1000]
refine_ratio: [1, 2, 4]
largeraft:
largecuvs:
build:
nlist: [8192, 16384, 32768, 65536]
M: [48, 32, 16]
nlist: [16384, 32768, 65536]
M: [96, 192, 384]
ratio: [4]
usePrecomputed: [False]
useFloat16: [False, True]
use_raft: [True]
use_cuvs: [True]
bitsPerCode: [8, 6, 5, 4]
search:
nprobe: [20, 30, 40, 50, 100, 200, 500, 1000]
refine_ratio: [1, 2, 4]
100M:
build:
nlist: [50000]
M: [48]
ratio: [10]
nlist: [100000]
M: [64, 96]
ratio: [4]
usePrecomputed: [False, True]
useFloat16: [False, True]
use_raft: [False]
useFloat16: [True]
use_cuvs: [False]
bitsPerCode: [8]
search:
nprobe: [20, 30, 40, 50, 100, 200, 500, 1000]
refine_ratio: [1]
100Mraft:
100Mcuvs:
build:
nlist: [50000]
M: [48]
ratio: [10]
usePrecomputed: [False, True]
nlist: [100000]
M: [192, 384]
ratio: [4]
useFloat16: [False, True]
use_raft: [True]
use_cuvs: [True]
bitsPerCode: [8, 6, 5, 4]
search:
nprobe: [20, 30, 40, 50, 100, 200, 500, 1000]
Expand Down
Loading
Loading