From abafcf418aab678c6bfe26c9a354e9313c17c94e Mon Sep 17 00:00:00 2001 From: Varik Matevosyan Date: Tue, 29 Oct 2024 15:43:26 +0400 Subject: [PATCH 01/12] In some cases in postgres 17 the `ldb_get_operator_oids` function called from post_parse_analyze hook was causing an error `ERROR: ResourceOwnerEnlarge called after release started`. This was coming from the function call `LookupOperName` which uses `SearchCatCacheList` (src/backend/utils/cache/catcache.c:1754) to search in catalog and ifa match is found it will call `ResourceOwnerEnlarge` to grow the hash if needed. This error seems to happen when the transaction is reverted because of an error and the list needs to grow. We will now keep the found operator oids in `CacheMemoryContext` and search them again only if it does not exist. We will not free the list and let it go after the `CacheMemoryContext` will be reset --- lantern_hnsw/src/hooks/post_parse.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/lantern_hnsw/src/hooks/post_parse.c b/lantern_hnsw/src/hooks/post_parse.c index 7c27d1ce8..4e1b55f52 100644 --- a/lantern_hnsw/src/hooks/post_parse.c +++ b/lantern_hnsw/src/hooks/post_parse.c @@ -10,12 +10,15 @@ #include #include #include +#include #include "../hnsw/options.h" #include "utils.h" post_parse_analyze_hook_type original_post_parse_analyze_hook = NULL; +List *oidList; + typedef struct { List *oidList; @@ -176,7 +179,13 @@ void post_parse_analyze_hook_with_operator_check(ParseState *pstate, return; } - List *oidList = ldb_get_operator_oids(); + if(!oidList) { + elog(WARNING, "this hook is experimental and can cause undefined behaviour"); + MemoryContext oldCtx = MemoryContextSwitchTo(CacheMemoryContext); + oidList = ldb_get_operator_oids(); + MemoryContextSwitchTo(oldCtx); + } + Node *query_as_node = (Node *)query; if(is_operator_used(query_as_node, oidList)) { List *sort_group_refs = get_sort_group_refs(query_as_node); @@ -185,5 +194,4 @@ void post_parse_analyze_hook_with_operator_check(ParseState *pstate, } list_free(sort_group_refs); } - list_free(oidList); } From 2519d273cd3e9f600bffdc6b39763ddf9b84463a Mon Sep 17 00:00:00 2001 From: Varik Matevosyan Date: Tue, 29 Oct 2024 16:07:34 +0400 Subject: [PATCH 02/12] add postgres 17 support in CI --- .github/workflows/build.yaml | 4 ++-- .github/workflows/publish-docker.yaml | 1 + .github/workflows/sanitizer-build-and-test.yaml | 2 +- .github/workflows/test.yaml | 4 ++-- lantern_extras/Cargo.toml | 3 ++- lantern_extras/src/lib.rs | 2 +- 6 files changed, 9 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index f825f3d40..a92764e71 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -22,7 +22,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-22.04, ubicloud-standard-4-arm, macos-13] - postgres: [12, 13, 14, 15, 16] + postgres: [12, 13, 14, 15, 16, 17] steps: - uses: actions/checkout@v4 with: @@ -53,7 +53,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-22.04, ubicloud-standard-4-arm, macos-13] - postgres: [12, 13, 14, 15, 16] + postgres: [12, 13, 14, 15, 16, 17] steps: - uses: actions/checkout@v4 with: diff --git a/.github/workflows/publish-docker.yaml b/.github/workflows/publish-docker.yaml index 442d0afaa..a089a5f2d 100644 --- a/.github/workflows/publish-docker.yaml +++ b/.github/workflows/publish-docker.yaml @@ -24,6 +24,7 @@ jobs: fail-fast: false matrix: include: + - postgres: 17 - postgres: 16 - postgres: 15 - postgres: 14 diff --git a/.github/workflows/sanitizer-build-and-test.yaml b/.github/workflows/sanitizer-build-and-test.yaml index b10ffb9b8..f66be6a58 100644 --- a/.github/workflows/sanitizer-build-and-test.yaml +++ b/.github/workflows/sanitizer-build-and-test.yaml @@ -56,7 +56,7 @@ jobs: fail-fast: false matrix: os: ["ubuntu-22.04"] - pg: ["12.16", "13.12", "14.9", "15.4", "16.0"] + pg: ["12.16", "13.12", "14.9", "15.4", "16.0", "17.0"] steps: - name: Enable UBSan if this is a release if: ${{ github.event_name == 'release' }} diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 7d00f8bb3..93ee33e1c 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -22,7 +22,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-22.04, ubicloud-standard-4-arm, macos-13] - postgres: [12, 13, 14, 15, 16] + postgres: [12, 13, 14, 15, 16, 17] steps: - uses: actions/checkout@v4 with: @@ -148,7 +148,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-22.04, ubicloud-standard-4-arm] - postgres: [15] + postgres: [15, 17] steps: - uses: actions/checkout@v4 with: diff --git a/lantern_extras/Cargo.toml b/lantern_extras/Cargo.toml index ccd47c043..1f183a1f5 100644 --- a/lantern_extras/Cargo.toml +++ b/lantern_extras/Cargo.toml @@ -8,12 +8,13 @@ crate-type = ["cdylib", "lib"] doctest = false [features] -default = ["pg15"] +default = ["pg17"] pg12 = ["pgrx/pg12", "pgrx-tests/pg12"] pg13 = ["pgrx/pg13", "pgrx-tests/pg13"] pg14 = ["pgrx/pg14", "pgrx-tests/pg14"] pg15 = ["pgrx/pg15", "pgrx-tests/pg15"] pg16 = ["pgrx/pg16", "pgrx-tests/pg16"] +pg17 = ["pgrx/pg17", "pgrx-tests/pg17"] pg_test = [] [dependencies] diff --git a/lantern_extras/src/lib.rs b/lantern_extras/src/lib.rs index e613f46ad..5f1af1ec3 100644 --- a/lantern_extras/src/lib.rs +++ b/lantern_extras/src/lib.rs @@ -133,7 +133,7 @@ pub mod pg_test { pub fn postgresql_conf_options() -> Vec<&'static str> { vec![ - "shared_preload_libraries='lantern_extras.so'", + "shared_preload_libraries='lantern_extras'", "lantern_extras.daemon_databases='pgrx_tests'", "lantern_extras.enable_daemon=true", ] From 0928e30e8762b0ff9c476d90130b834bee8ff253 Mon Sep 17 00:00:00 2001 From: Varik Matevosyan Date: Tue, 29 Oct 2024 16:33:22 +0400 Subject: [PATCH 03/12] update pg_cron in CI to v1.6.4 --- ci/scripts/utils.sh | 2 +- docker/Dockerfile.dev | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/scripts/utils.sh b/ci/scripts/utils.sh index 31008b393..de15df758 100644 --- a/ci/scripts/utils.sh +++ b/ci/scripts/utils.sh @@ -9,7 +9,7 @@ function setup_environment() { export GITHUB_OUTPUT=${GITHUB_OUTPUT:-/dev/null} export PGVECTOR_VERSION=0.7.4-lanterncloud #fix pg_cron at the latest commit of the time - export PG_CRON_COMMIT_SHA=7e91e72b1bebc5869bb900d9253cc9e92518b33f + export PG_CRON_COMMIT_SHA=9490f9cc9803f75105f2f7d89839a998f011f8d8 } function setup_rust() { diff --git a/docker/Dockerfile.dev b/docker/Dockerfile.dev index 371a2041b..e12b9d0cd 100644 --- a/docker/Dockerfile.dev +++ b/docker/Dockerfile.dev @@ -1,7 +1,7 @@ ARG VERSION=15 ARG PGVECTOR_VERSION=0.5.1 #fix pg_cron at the latest commit of the time -ARG PG_CRON_COMMIT_SHA=7e91e72b1bebc5869bb900d9253cc9e92518b33f +ARG PG_CRON_COMMIT_SHA=9490f9cc9803f75105f2f7d89839a998f011f8d8 # If you want to build the base image for different versions use Dockerfile.pg # To use GDB inside container run docker like this: From 1a7fee8762c20d91d36100d3f4d0c4f402635771 Mon Sep 17 00:00:00 2001 From: Varik Matevosyan Date: Tue, 29 Oct 2024 16:56:36 +0400 Subject: [PATCH 04/12] fix pgrx tests in CI for different pg versions --- .github/workflows/test.yaml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 93ee33e1c..b63455c08 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -177,6 +177,7 @@ jobs: run: | cargo install cargo-pgrx --version 0.12.7 cargo pgrx init "--pg$PG_VERSION" /usr/bin/pg_config + sed -i -e "s/default = .*/default=[\"pg${PG_VERSION}\"]/" lantern_extras/Cargo.toml RUSTFLAGS="--cfg profile=\"ci-build\"" cargo pgrx install --sudo --pg-config /usr/bin/pg_config --package lantern_extras env: PG_VERSION: ${{ matrix.postgres }} @@ -186,8 +187,10 @@ jobs: PG_VERSION: ${{ matrix.postgres }} - name: Setup permissions run: | - sudo chmod 777 -R /usr/lib/postgresql/15/lib/ - sudo chmod 777 -R /usr/share/postgresql/15/extension/ + sudo chmod 777 -R "/usr/lib/postgresql/${PG_VERSION}/lib/" + sudo chmod 777 -R "/usr/share/postgresql/${PG_VERSION}/extension/" + env: + PG_VERSION: ${{ matrix.postgres }} - name: Run tests run: cargo llvm-cov --workspace --lcov --output-path lantern-extras-lcov.info env: @@ -196,7 +199,7 @@ jobs: DB_URL: "postgres://postgres@127.0.0.1:5432/postgres" - name: Upload lantern_extras coverage uses: actions/upload-artifact@v4 - if: ${{ startsWith(matrix.os, 'ubuntu') }} + if: ${{ startsWith(matrix.os, 'ubuntu') && matrix.postgres == 15}} with: name: lantern-extras-lcov.info path: ./lantern-extras-lcov.info From f2a7ccc858a2f67634daa2c515fa70431b65f7e8 Mon Sep 17 00:00:00 2001 From: Varik Matevosyan Date: Tue, 29 Oct 2024 23:09:20 +0400 Subject: [PATCH 05/12] Replace `TypenameGetTypid` with custom `TypenameGetVectorTypid` function. After release of Postgres 17, search_path is restricted to (pg_catalog, pg_temp) for maintenance operations [ref: https://github.com/postgres/postgres/commit/2af07e2f749a9208ca1ed84fa1d8fe0e75833288] So when pgvector was installed in public schema and we will try to get the oid for vector type using `TypenameGetTypid("vector")` it would return `InvalidOid` By this change the function `TypenameGetVectorTypid` will call SQL function `get_vector_type_oid` which will query pg_type table and return the vector type oid --- lantern_extras/Cargo.toml | 2 +- lantern_hnsw/CMakeLists.txt | 3 +- lantern_hnsw/sql/lantern.sql | 16 +++++++++++ lantern_hnsw/sql/updates/0.4.1--0.4.2.sql | 16 +++++++++++ lantern_hnsw/src/hnsw.c | 2 +- lantern_hnsw/src/hnsw/utils.c | 28 ++++++++++++++++++- lantern_hnsw/src/hnsw/utils.h | 1 + lantern_hnsw/test/expected/ext_relocation.out | 3 +- lantern_hnsw/test/expected/hnsw_correct.out | 1 + .../test/expected/hnsw_cost_estimate.out | 1 + lantern_hnsw/test/expected/hnsw_create.out | 1 + .../test/expected/hnsw_create_expr.out | 6 ++++ .../test/expected/hnsw_create_unlogged.out | 1 + lantern_hnsw/test/expected/hnsw_dist_func.out | 1 + lantern_hnsw/test/expected/hnsw_extras.out | 1 + .../test/expected/hnsw_index_from_file.out | 1 + lantern_hnsw/test/expected/hnsw_insert.out | 1 + .../test/expected/hnsw_insert_unlogged.out | 1 + lantern_hnsw/test/expected/hnsw_operators.out | 1 + lantern_hnsw/test/expected/hnsw_select.out | 1 + lantern_hnsw/test/expected/hnsw_todo.out | 1 + lantern_hnsw/test/expected/hnsw_vector.out | 3 +- lantern_hnsw/test/sql/hnsw_vector.sql | 2 +- 23 files changed, 87 insertions(+), 7 deletions(-) create mode 100644 lantern_hnsw/sql/updates/0.4.1--0.4.2.sql diff --git a/lantern_extras/Cargo.toml b/lantern_extras/Cargo.toml index 1f183a1f5..df5429d66 100644 --- a/lantern_extras/Cargo.toml +++ b/lantern_extras/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "lantern_extras" -version = "0.4.1" +version = "0.4.2" edition = "2021" [lib] diff --git a/lantern_hnsw/CMakeLists.txt b/lantern_hnsw/CMakeLists.txt index 5b0e4ee8a..ca087bef2 100644 --- a/lantern_hnsw/CMakeLists.txt +++ b/lantern_hnsw/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.3) include(CheckSymbolExists) -set(LANTERN_VERSION 0.4.1) +set(LANTERN_VERSION 0.4.2) project( LanternDB @@ -267,6 +267,7 @@ set (_update_files sql/updates/0.3.3--0.3.4.sql sql/updates/0.3.4--0.4.0.sql sql/updates/0.4.0--0.4.1.sql + sql/updates/0.4.1--0.4.2.sql ) # Generate version information for the binary diff --git a/lantern_hnsw/sql/lantern.sql b/lantern_hnsw/sql/lantern.sql index ab115bb57..418574fbe 100644 --- a/lantern_hnsw/sql/lantern.sql +++ b/lantern_hnsw/sql/lantern.sql @@ -924,3 +924,19 @@ BEGIN RETURN jsonb_pretty(_lantern_internal.mask_order_by_in_plan(explain_output)); END $$ LANGUAGE plpgsql; +-- Get vector type oid +CREATE FUNCTION _lantern_internal.get_vector_type_oid() RETURNS OID AS $$ +DECLARE + type_oid OID; + pg_version INT; +BEGIN + pg_version := (SELECT setting FROM pg_settings WHERE name = 'server_version_num'); + + type_oid := (SELECT pg_type.oid FROM pg_type + JOIN pg_depend ON pg_type.oid = pg_depend.objid + JOIN pg_extension ON pg_depend.refobjid = pg_extension.oid + WHERE typname='vector' AND extname='vector' + LIMIT 1); + RETURN COALESCE(type_oid, 0); +END; +$$ LANGUAGE plpgsql; diff --git a/lantern_hnsw/sql/updates/0.4.1--0.4.2.sql b/lantern_hnsw/sql/updates/0.4.1--0.4.2.sql new file mode 100644 index 000000000..c16aae550 --- /dev/null +++ b/lantern_hnsw/sql/updates/0.4.1--0.4.2.sql @@ -0,0 +1,16 @@ +-- Get vector type oid +CREATE FUNCTION _lantern_internal.get_vector_type_oid() RETURNS OID AS $$ +DECLARE + type_oid OID; + pg_version INT; +BEGIN + pg_version := (SELECT setting FROM pg_settings WHERE name = 'server_version_num'); + + type_oid := (SELECT pg_type.oid FROM pg_type + JOIN pg_depend ON pg_type.oid = pg_depend.objid + JOIN pg_extension ON pg_depend.refobjid = pg_extension.oid + WHERE typname='vector' AND extname='vector' + LIMIT 1); + RETURN COALESCE(type_oid, 0); +END; +$$ LANGUAGE plpgsql; diff --git a/lantern_hnsw/src/hnsw.c b/lantern_hnsw/src/hnsw.c index 40921e6a8..b47d68676 100644 --- a/lantern_hnsw/src/hnsw.c +++ b/lantern_hnsw/src/hnsw.c @@ -452,7 +452,7 @@ HnswColumnType GetColumnTypeFromOid(Oid oid) if(oid == FLOAT4ARRAYOID) { return REAL_ARRAY; - } else if(oid == TypenameGetTypid("vector")) { + } else if(oid == TypenameGetVectorTypid()) { return VECTOR; } else if(oid == INT4ARRAYOID) { return INT_ARRAY; diff --git a/lantern_hnsw/src/hnsw/utils.c b/lantern_hnsw/src/hnsw/utils.c index 637851d45..631134278 100644 --- a/lantern_hnsw/src/hnsw/utils.c +++ b/lantern_hnsw/src/hnsw/utils.c @@ -2,14 +2,18 @@ #include "utils.h" +#include #include -#include +#include +#include #include #include #include #include #include #include +#include +#include #if PG_VERSION_NUM >= 130000 #include @@ -271,3 +275,25 @@ usearch_metric_kind_t GetMetricKindFromStr(char *metric_kind_str) elog(ERROR, "Unsupported metric kind: %s . Should be one of (l2sq, cos, hamming)", metric_kind_str); } + +/* + * We are not using existing TypenameGetTypid because after Postgres 17 + * The maintenance operations have restricted search_path for namepsaces (pg_catalog, pg_temp) + * Thus if the type will be installed in public schema, it will not be able to find the type + * Here we will call SQL function defined in lantern.sql file, which will lookup pg_type relation + */ +Oid TypenameGetVectorTypid() +{ + Oid function_oid = GetSysCacheOid(PROCNAMEARGSNSP, + Anum_pg_proc_oid, + CStringGetDatum("get_vector_type_oid"), + PointerGetDatum(buildoidvector(NULL, 0)), + ObjectIdGetDatum(get_namespace_oid("_lantern_internal", false)), + 0); + + if(!OidIsValid(function_oid)) { + elog(ERROR, "Please update lantern extension"); + } + + return DatumGetObjectId(OidFunctionCall0(function_oid)); +} diff --git a/lantern_hnsw/src/hnsw/utils.h b/lantern_hnsw/src/hnsw/utils.h index 9d8248472..24ee23c96 100644 --- a/lantern_hnsw/src/hnsw/utils.h +++ b/lantern_hnsw/src/hnsw/utils.h @@ -17,6 +17,7 @@ void CheckExtensionVersions(); uint32 EstimateRowCount(Relation heap); int32 GetColumnAttributeNumber(Relation rel, const char *columnName); usearch_metric_kind_t GetMetricKindFromStr(char *metric_kind_str); +Oid TypenameGetVectorTypid(); // hoping to throw the error via an assertion, if those are on, before elog(ERROR)-ing as a last resort // We prefer Assert() because this function is used in contexts where the stack contains non-POD types diff --git a/lantern_hnsw/test/expected/ext_relocation.out b/lantern_hnsw/test/expected/ext_relocation.out index 71fb2cd6c..c15cd8251 100644 --- a/lantern_hnsw/test/expected/ext_relocation.out +++ b/lantern_hnsw/test/expected/ext_relocation.out @@ -38,6 +38,7 @@ ORDER BY 1, 3, 2; schema1 | create_pq_codebook | _lantern_internal schema1 | failure_point_enable | _lantern_internal schema1 | forbid_table_change | _lantern_internal + schema1 | get_vector_type_oid | _lantern_internal schema1 | mask_arrays | _lantern_internal schema1 | mask_order_by_in_plan | _lantern_internal schema1 | quantize_vector | _lantern_internal @@ -62,7 +63,7 @@ ORDER BY 1, 3, 2; schema1 | ldb_pqvec_send | schema1 schema1 | quantize_table | schema1 schema1 | quantize_vector | schema1 -(28 rows) +(29 rows) -- show all the extension operators SELECT ne.nspname AS extschema, op.oprname, np.nspname AS proschema diff --git a/lantern_hnsw/test/expected/hnsw_correct.out b/lantern_hnsw/test/expected/hnsw_correct.out index 32bf7a07b..9e633d497 100644 --- a/lantern_hnsw/test/expected/hnsw_correct.out +++ b/lantern_hnsw/test/expected/hnsw_correct.out @@ -22,6 +22,7 @@ SELECT l2sq_dist(v, '{0,0}') AS dist FROM small_world; +WARNING: this hook is experimental and can cause undefined behaviour -- Get the results with the index CREATE TEMP TABLE results_w_index AS SELECT diff --git a/lantern_hnsw/test/expected/hnsw_cost_estimate.out b/lantern_hnsw/test/expected/hnsw_cost_estimate.out index b0ed80099..d98faeb40 100644 --- a/lantern_hnsw/test/expected/hnsw_cost_estimate.out +++ b/lantern_hnsw/test/expected/hnsw_cost_estimate.out @@ -63,6 +63,7 @@ INFO: done init usearch index INFO: inserted 0 elements INFO: done saving 0 vectors SET _lantern_internal.is_test = true; +WARNING: this hook is experimental and can cause undefined behaviour SELECT is_cost_estimate_within_error('EXPLAIN SELECT * FROM empty_table ORDER BY v ''{1,2}'' LIMIT 10', 0.47); DEBUG: LANTERN - Query cost estimator DEBUG: LANTERN - --------------------- diff --git a/lantern_hnsw/test/expected/hnsw_create.out b/lantern_hnsw/test/expected/hnsw_create.out index 8ed1fab6e..668a19b2d 100644 --- a/lantern_hnsw/test/expected/hnsw_create.out +++ b/lantern_hnsw/test/expected/hnsw_create.out @@ -71,6 +71,7 @@ CREATE TABLE IF NOT EXISTS sift_base10k ( \copy sift_base10k (v) FROM '/tmp/lantern/vector_datasets/siftsmall_base_arrays.csv' with csv; SET lantern.pgvector_compat=FALSE; CREATE INDEX hnsw_idx ON sift_base10k USING lantern_hnsw (v dist_l2sq_ops) WITH (M=2, ef_construction=10, ef=4, dim=128); +WARNING: this hook is experimental and can cause undefined behaviour INFO: done init usearch index INFO: inserted 10000 elements INFO: done saving 10000 vectors diff --git a/lantern_hnsw/test/expected/hnsw_create_expr.out b/lantern_hnsw/test/expected/hnsw_create_expr.out index 168ccfe36..b13ba88bf 100644 --- a/lantern_hnsw/test/expected/hnsw_create_expr.out +++ b/lantern_hnsw/test/expected/hnsw_create_expr.out @@ -67,10 +67,16 @@ SET enable_seqscan = false; SET lantern.pgvector_compat=FALSE; -- This should success CREATE INDEX ON test_table USING lantern_hnsw (int_to_fixed_binary_real_array(id)) WITH (M=2); +WARNING: this hook is experimental and can cause undefined behaviour +WARNING: this hook is experimental and can cause undefined behaviour +WARNING: this hook is experimental and can cause undefined behaviour +WARNING: this hook is experimental and can cause undefined behaviour +WARNING: this hook is experimental and can cause undefined behaviour INFO: done init usearch index INFO: inserted 3 elements INFO: done saving 3 vectors SELECT _lantern_internal.validate_index('test_table_int_to_fixed_binary_real_array_idx', false); +WARNING: this hook is experimental and can cause undefined behaviour INFO: validate_index() start for test_table_int_to_fixed_binary_real_array_idx INFO: validate_index() done, no issues found. validate_index diff --git a/lantern_hnsw/test/expected/hnsw_create_unlogged.out b/lantern_hnsw/test/expected/hnsw_create_unlogged.out index 3c62b2788..79bf53fe9 100644 --- a/lantern_hnsw/test/expected/hnsw_create_unlogged.out +++ b/lantern_hnsw/test/expected/hnsw_create_unlogged.out @@ -51,6 +51,7 @@ CREATE UNLOGGED TABLE IF NOT EXISTS sift_base10k ( \copy sift_base10k (v) FROM '/tmp/lantern/vector_datasets/siftsmall_base_arrays.csv' with csv; SET lantern.pgvector_compat=FALSE; CREATE INDEX hnsw_idx ON sift_base10k USING lantern_hnsw (v dist_l2sq_ops) WITH (M=2, ef_construction=10, ef=4, dim=128); +WARNING: this hook is experimental and can cause undefined behaviour INFO: done init usearch index INFO: inserted 10000 elements INFO: done saving 10000 vectors diff --git a/lantern_hnsw/test/expected/hnsw_dist_func.out b/lantern_hnsw/test/expected/hnsw_dist_func.out index 3414a212d..35853a2ce 100644 --- a/lantern_hnsw/test/expected/hnsw_dist_func.out +++ b/lantern_hnsw/test/expected/hnsw_dist_func.out @@ -38,6 +38,7 @@ SET enable_seqscan=FALSE; SET lantern.pgvector_compat=FALSE; -- Verify that the distance functions work (check distances) SELECT ROUND(l2sq_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_l2 ORDER BY v '{0,1,0}'; +WARNING: this hook is experimental and can cause undefined behaviour round ------- 0.00 diff --git a/lantern_hnsw/test/expected/hnsw_extras.out b/lantern_hnsw/test/expected/hnsw_extras.out index ca1d4459b..7abb8147c 100644 --- a/lantern_hnsw/test/expected/hnsw_extras.out +++ b/lantern_hnsw/test/expected/hnsw_extras.out @@ -51,6 +51,7 @@ EXPLAIN (COSTS FALSE) SELECT id FROM sift_base1k order by v <-> :'v777' LIMIT 10 SET lantern.pgvector_compat=FALSE; EXPLAIN (COSTS FALSE) SELECT id FROM sift_base1k order by v :'v777' LIMIT 10; +WARNING: this hook is experimental and can cause undefined behaviour QUERY PLAN --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Limit diff --git a/lantern_hnsw/test/expected/hnsw_index_from_file.out b/lantern_hnsw/test/expected/hnsw_index_from_file.out index 4e60ad9f3..739cf6990 100644 --- a/lantern_hnsw/test/expected/hnsw_index_from_file.out +++ b/lantern_hnsw/test/expected/hnsw_index_from_file.out @@ -55,6 +55,7 @@ SELECT * FROM ldb_get_indexes('sift_base1k'); SET enable_seqscan=FALSE; SET lantern.pgvector_compat=FALSE; SELECT v AS v777 FROM sift_base1k WHERE id = 777 \gset +WARNING: this hook is experimental and can cause undefined behaviour EXPLAIN (COSTS FALSE) SELECT ROUND(l2sq_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v :'v777' LIMIT 10; QUERY PLAN --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/lantern_hnsw/test/expected/hnsw_insert.out b/lantern_hnsw/test/expected/hnsw_insert.out index 5bdee5204..dd63ec5ce 100644 --- a/lantern_hnsw/test/expected/hnsw_insert.out +++ b/lantern_hnsw/test/expected/hnsw_insert.out @@ -68,6 +68,7 @@ SET enable_seqscan = false; SET lantern.pgvector_compat = false; -- Inserting vectors of the same dimension and nulls should work INSERT INTO small_world (v) VALUES ('{1,1,2}'); +WARNING: this hook is experimental and can cause undefined behaviour INSERT INTO small_world (v) VALUES (NULL); -- Inserting vectors of different dimension should fail \set ON_ERROR_STOP off diff --git a/lantern_hnsw/test/expected/hnsw_insert_unlogged.out b/lantern_hnsw/test/expected/hnsw_insert_unlogged.out index 97ce44ddf..d6b960a62 100644 --- a/lantern_hnsw/test/expected/hnsw_insert_unlogged.out +++ b/lantern_hnsw/test/expected/hnsw_insert_unlogged.out @@ -68,6 +68,7 @@ SET enable_seqscan = false; SET lantern.pgvector_compat = false; -- Inserting vectors of the same dimension and nulls should work INSERT INTO small_world (v) VALUES ('{1,1,2}'); +WARNING: this hook is experimental and can cause undefined behaviour INSERT INTO small_world (v) VALUES (NULL); -- Inserting vectors of different dimension should fail \set ON_ERROR_STOP off diff --git a/lantern_hnsw/test/expected/hnsw_operators.out b/lantern_hnsw/test/expected/hnsw_operators.out index acc95be8c..b2d0260d2 100644 --- a/lantern_hnsw/test/expected/hnsw_operators.out +++ b/lantern_hnsw/test/expected/hnsw_operators.out @@ -8,6 +8,7 @@ INFO: done saving 2 vectors -- should rewrite operator SET lantern.pgvector_compat=FALSE; SELECT * FROM op_test ORDER BY v ARRAY[1,1,1]; +WARNING: this hook is experimental and can cause undefined behaviour v --------- {1,1,1} diff --git a/lantern_hnsw/test/expected/hnsw_select.out b/lantern_hnsw/test/expected/hnsw_select.out index 09dc1717f..1f2f7b092 100644 --- a/lantern_hnsw/test/expected/hnsw_select.out +++ b/lantern_hnsw/test/expected/hnsw_select.out @@ -43,6 +43,7 @@ SET enable_seqscan=FALSE; SET lantern.pgvector_compat=FALSE; -- Verify that basic queries still work given our query parser and planner hooks SELECT 0 + 1; +WARNING: this hook is experimental and can cause undefined behaviour ?column? ---------- 1 diff --git a/lantern_hnsw/test/expected/hnsw_todo.out b/lantern_hnsw/test/expected/hnsw_todo.out index 8b701d53e..2d94916e1 100644 --- a/lantern_hnsw/test/expected/hnsw_todo.out +++ b/lantern_hnsw/test/expected/hnsw_todo.out @@ -22,6 +22,7 @@ INFO: done init usearch index INFO: inserted 8 elements INFO: done saving 8 vectors SELECT _lantern_internal.validate_index('small_world_l2_vector_idx', false); +WARNING: this hook is experimental and can cause undefined behaviour INFO: validate_index() start for small_world_l2_vector_idx INFO: validate_index() done, no issues found. validate_index diff --git a/lantern_hnsw/test/expected/hnsw_vector.out b/lantern_hnsw/test/expected/hnsw_vector.out index 5c2b925ef..14733cc28 100644 --- a/lantern_hnsw/test/expected/hnsw_vector.out +++ b/lantern_hnsw/test/expected/hnsw_vector.out @@ -10,6 +10,7 @@ RESET client_min_messages; SET lantern.pgvector_compat=FALSE; -- Verify basic functionality of pgvector SELECT '[1,2,3]'::vector; +WARNING: this hook is experimental and can cause undefined behaviour vector --------- [1,2,3] @@ -185,7 +186,7 @@ BEGIN LOOP real_array := array_append(real_array, CAST(substring(binary_string, i, 1) AS REAL)); END LOOP; - RETURN real_array::vector; + RETURN real_array::public.vector; END; $$ LANGUAGE plpgsql IMMUTABLE; CREATE INDEX ON test_table USING lantern_hnsw (int_to_fixed_binary_vector(id)) WITH (M=2); diff --git a/lantern_hnsw/test/sql/hnsw_vector.sql b/lantern_hnsw/test/sql/hnsw_vector.sql index 3704dd396..1c236a127 100644 --- a/lantern_hnsw/test/sql/hnsw_vector.sql +++ b/lantern_hnsw/test/sql/hnsw_vector.sql @@ -92,7 +92,7 @@ BEGIN LOOP real_array := array_append(real_array, CAST(substring(binary_string, i, 1) AS REAL)); END LOOP; - RETURN real_array::vector; + RETURN real_array::public.vector; END; $$ LANGUAGE plpgsql IMMUTABLE; From b99790d788e1a395107cf605a0dd5c5da0552c5b Mon Sep 17 00:00:00 2001 From: Varik Matevosyan Date: Tue, 29 Oct 2024 23:41:29 +0400 Subject: [PATCH 06/12] If an error would appear before setting `buildstate->index_file_fd = -1` in `InitBuildState` we would try to close that file descriptor (which would be 0) in `BuildIndexCleanup` function because of condition `if (buildstate->index_file_fd != -1)` and trying to close fd 0 was crashing the server. Now we will check if the fd is greater than 0 before closing it. --- lantern_hnsw/src/hnsw/build.c | 2 +- lantern_hnsw/test/expected/hnsw_correct.out | 1 - lantern_hnsw/test/expected/hnsw_cost_estimate.out | 1 - lantern_hnsw/test/expected/hnsw_create.out | 1 - lantern_hnsw/test/expected/hnsw_create_expr.out | 6 ------ lantern_hnsw/test/expected/hnsw_create_unlogged.out | 1 - lantern_hnsw/test/expected/hnsw_dist_func.out | 1 - lantern_hnsw/test/expected/hnsw_extras.out | 1 - lantern_hnsw/test/expected/hnsw_index_from_file.out | 1 - lantern_hnsw/test/expected/hnsw_insert.out | 1 - lantern_hnsw/test/expected/hnsw_insert_unlogged.out | 1 - lantern_hnsw/test/expected/hnsw_operators.out | 1 - lantern_hnsw/test/expected/hnsw_select.out | 1 - lantern_hnsw/test/expected/hnsw_todo.out | 1 - lantern_hnsw/test/expected/hnsw_vector.out | 1 - lantern_hnsw/test/test_runner.sh | 1 + 16 files changed, 2 insertions(+), 20 deletions(-) diff --git a/lantern_hnsw/src/hnsw/build.c b/lantern_hnsw/src/hnsw/build.c index 7c3e75bcf..33672dc40 100644 --- a/lantern_hnsw/src/hnsw/build.c +++ b/lantern_hnsw/src/hnsw/build.c @@ -455,7 +455,7 @@ static void BuildIndexCleanup(ldb_HnswBuildState *buildstate) buildstate->external_socket->close(buildstate->external_socket); } - if(buildstate->index_file_fd != -1) { + if(buildstate->index_file_fd > 0) { // index_file_fd will only exist when we mmap the index file to memory if(!buildstate->external && buildstate->index_buffer) { int munmap_ret = munmap(buildstate->index_buffer, buildstate->index_buffer_size); diff --git a/lantern_hnsw/test/expected/hnsw_correct.out b/lantern_hnsw/test/expected/hnsw_correct.out index 9e633d497..32bf7a07b 100644 --- a/lantern_hnsw/test/expected/hnsw_correct.out +++ b/lantern_hnsw/test/expected/hnsw_correct.out @@ -22,7 +22,6 @@ SELECT l2sq_dist(v, '{0,0}') AS dist FROM small_world; -WARNING: this hook is experimental and can cause undefined behaviour -- Get the results with the index CREATE TEMP TABLE results_w_index AS SELECT diff --git a/lantern_hnsw/test/expected/hnsw_cost_estimate.out b/lantern_hnsw/test/expected/hnsw_cost_estimate.out index d98faeb40..b0ed80099 100644 --- a/lantern_hnsw/test/expected/hnsw_cost_estimate.out +++ b/lantern_hnsw/test/expected/hnsw_cost_estimate.out @@ -63,7 +63,6 @@ INFO: done init usearch index INFO: inserted 0 elements INFO: done saving 0 vectors SET _lantern_internal.is_test = true; -WARNING: this hook is experimental and can cause undefined behaviour SELECT is_cost_estimate_within_error('EXPLAIN SELECT * FROM empty_table ORDER BY v ''{1,2}'' LIMIT 10', 0.47); DEBUG: LANTERN - Query cost estimator DEBUG: LANTERN - --------------------- diff --git a/lantern_hnsw/test/expected/hnsw_create.out b/lantern_hnsw/test/expected/hnsw_create.out index 668a19b2d..8ed1fab6e 100644 --- a/lantern_hnsw/test/expected/hnsw_create.out +++ b/lantern_hnsw/test/expected/hnsw_create.out @@ -71,7 +71,6 @@ CREATE TABLE IF NOT EXISTS sift_base10k ( \copy sift_base10k (v) FROM '/tmp/lantern/vector_datasets/siftsmall_base_arrays.csv' with csv; SET lantern.pgvector_compat=FALSE; CREATE INDEX hnsw_idx ON sift_base10k USING lantern_hnsw (v dist_l2sq_ops) WITH (M=2, ef_construction=10, ef=4, dim=128); -WARNING: this hook is experimental and can cause undefined behaviour INFO: done init usearch index INFO: inserted 10000 elements INFO: done saving 10000 vectors diff --git a/lantern_hnsw/test/expected/hnsw_create_expr.out b/lantern_hnsw/test/expected/hnsw_create_expr.out index b13ba88bf..168ccfe36 100644 --- a/lantern_hnsw/test/expected/hnsw_create_expr.out +++ b/lantern_hnsw/test/expected/hnsw_create_expr.out @@ -67,16 +67,10 @@ SET enable_seqscan = false; SET lantern.pgvector_compat=FALSE; -- This should success CREATE INDEX ON test_table USING lantern_hnsw (int_to_fixed_binary_real_array(id)) WITH (M=2); -WARNING: this hook is experimental and can cause undefined behaviour -WARNING: this hook is experimental and can cause undefined behaviour -WARNING: this hook is experimental and can cause undefined behaviour -WARNING: this hook is experimental and can cause undefined behaviour -WARNING: this hook is experimental and can cause undefined behaviour INFO: done init usearch index INFO: inserted 3 elements INFO: done saving 3 vectors SELECT _lantern_internal.validate_index('test_table_int_to_fixed_binary_real_array_idx', false); -WARNING: this hook is experimental and can cause undefined behaviour INFO: validate_index() start for test_table_int_to_fixed_binary_real_array_idx INFO: validate_index() done, no issues found. validate_index diff --git a/lantern_hnsw/test/expected/hnsw_create_unlogged.out b/lantern_hnsw/test/expected/hnsw_create_unlogged.out index 79bf53fe9..3c62b2788 100644 --- a/lantern_hnsw/test/expected/hnsw_create_unlogged.out +++ b/lantern_hnsw/test/expected/hnsw_create_unlogged.out @@ -51,7 +51,6 @@ CREATE UNLOGGED TABLE IF NOT EXISTS sift_base10k ( \copy sift_base10k (v) FROM '/tmp/lantern/vector_datasets/siftsmall_base_arrays.csv' with csv; SET lantern.pgvector_compat=FALSE; CREATE INDEX hnsw_idx ON sift_base10k USING lantern_hnsw (v dist_l2sq_ops) WITH (M=2, ef_construction=10, ef=4, dim=128); -WARNING: this hook is experimental and can cause undefined behaviour INFO: done init usearch index INFO: inserted 10000 elements INFO: done saving 10000 vectors diff --git a/lantern_hnsw/test/expected/hnsw_dist_func.out b/lantern_hnsw/test/expected/hnsw_dist_func.out index 35853a2ce..3414a212d 100644 --- a/lantern_hnsw/test/expected/hnsw_dist_func.out +++ b/lantern_hnsw/test/expected/hnsw_dist_func.out @@ -38,7 +38,6 @@ SET enable_seqscan=FALSE; SET lantern.pgvector_compat=FALSE; -- Verify that the distance functions work (check distances) SELECT ROUND(l2sq_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_l2 ORDER BY v '{0,1,0}'; -WARNING: this hook is experimental and can cause undefined behaviour round ------- 0.00 diff --git a/lantern_hnsw/test/expected/hnsw_extras.out b/lantern_hnsw/test/expected/hnsw_extras.out index 7abb8147c..ca1d4459b 100644 --- a/lantern_hnsw/test/expected/hnsw_extras.out +++ b/lantern_hnsw/test/expected/hnsw_extras.out @@ -51,7 +51,6 @@ EXPLAIN (COSTS FALSE) SELECT id FROM sift_base1k order by v <-> :'v777' LIMIT 10 SET lantern.pgvector_compat=FALSE; EXPLAIN (COSTS FALSE) SELECT id FROM sift_base1k order by v :'v777' LIMIT 10; -WARNING: this hook is experimental and can cause undefined behaviour QUERY PLAN --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Limit diff --git a/lantern_hnsw/test/expected/hnsw_index_from_file.out b/lantern_hnsw/test/expected/hnsw_index_from_file.out index 739cf6990..4e60ad9f3 100644 --- a/lantern_hnsw/test/expected/hnsw_index_from_file.out +++ b/lantern_hnsw/test/expected/hnsw_index_from_file.out @@ -55,7 +55,6 @@ SELECT * FROM ldb_get_indexes('sift_base1k'); SET enable_seqscan=FALSE; SET lantern.pgvector_compat=FALSE; SELECT v AS v777 FROM sift_base1k WHERE id = 777 \gset -WARNING: this hook is experimental and can cause undefined behaviour EXPLAIN (COSTS FALSE) SELECT ROUND(l2sq_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v :'v777' LIMIT 10; QUERY PLAN --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/lantern_hnsw/test/expected/hnsw_insert.out b/lantern_hnsw/test/expected/hnsw_insert.out index dd63ec5ce..5bdee5204 100644 --- a/lantern_hnsw/test/expected/hnsw_insert.out +++ b/lantern_hnsw/test/expected/hnsw_insert.out @@ -68,7 +68,6 @@ SET enable_seqscan = false; SET lantern.pgvector_compat = false; -- Inserting vectors of the same dimension and nulls should work INSERT INTO small_world (v) VALUES ('{1,1,2}'); -WARNING: this hook is experimental and can cause undefined behaviour INSERT INTO small_world (v) VALUES (NULL); -- Inserting vectors of different dimension should fail \set ON_ERROR_STOP off diff --git a/lantern_hnsw/test/expected/hnsw_insert_unlogged.out b/lantern_hnsw/test/expected/hnsw_insert_unlogged.out index d6b960a62..97ce44ddf 100644 --- a/lantern_hnsw/test/expected/hnsw_insert_unlogged.out +++ b/lantern_hnsw/test/expected/hnsw_insert_unlogged.out @@ -68,7 +68,6 @@ SET enable_seqscan = false; SET lantern.pgvector_compat = false; -- Inserting vectors of the same dimension and nulls should work INSERT INTO small_world (v) VALUES ('{1,1,2}'); -WARNING: this hook is experimental and can cause undefined behaviour INSERT INTO small_world (v) VALUES (NULL); -- Inserting vectors of different dimension should fail \set ON_ERROR_STOP off diff --git a/lantern_hnsw/test/expected/hnsw_operators.out b/lantern_hnsw/test/expected/hnsw_operators.out index b2d0260d2..acc95be8c 100644 --- a/lantern_hnsw/test/expected/hnsw_operators.out +++ b/lantern_hnsw/test/expected/hnsw_operators.out @@ -8,7 +8,6 @@ INFO: done saving 2 vectors -- should rewrite operator SET lantern.pgvector_compat=FALSE; SELECT * FROM op_test ORDER BY v ARRAY[1,1,1]; -WARNING: this hook is experimental and can cause undefined behaviour v --------- {1,1,1} diff --git a/lantern_hnsw/test/expected/hnsw_select.out b/lantern_hnsw/test/expected/hnsw_select.out index 1f2f7b092..09dc1717f 100644 --- a/lantern_hnsw/test/expected/hnsw_select.out +++ b/lantern_hnsw/test/expected/hnsw_select.out @@ -43,7 +43,6 @@ SET enable_seqscan=FALSE; SET lantern.pgvector_compat=FALSE; -- Verify that basic queries still work given our query parser and planner hooks SELECT 0 + 1; -WARNING: this hook is experimental and can cause undefined behaviour ?column? ---------- 1 diff --git a/lantern_hnsw/test/expected/hnsw_todo.out b/lantern_hnsw/test/expected/hnsw_todo.out index 2d94916e1..8b701d53e 100644 --- a/lantern_hnsw/test/expected/hnsw_todo.out +++ b/lantern_hnsw/test/expected/hnsw_todo.out @@ -22,7 +22,6 @@ INFO: done init usearch index INFO: inserted 8 elements INFO: done saving 8 vectors SELECT _lantern_internal.validate_index('small_world_l2_vector_idx', false); -WARNING: this hook is experimental and can cause undefined behaviour INFO: validate_index() start for small_world_l2_vector_idx INFO: validate_index() done, no issues found. validate_index diff --git a/lantern_hnsw/test/expected/hnsw_vector.out b/lantern_hnsw/test/expected/hnsw_vector.out index 14733cc28..6fa1cc9eb 100644 --- a/lantern_hnsw/test/expected/hnsw_vector.out +++ b/lantern_hnsw/test/expected/hnsw_vector.out @@ -10,7 +10,6 @@ RESET client_min_messages; SET lantern.pgvector_compat=FALSE; -- Verify basic functionality of pgvector SELECT '[1,2,3]'::vector; -WARNING: this hook is experimental and can cause undefined behaviour vector --------- [1,2,3] diff --git a/lantern_hnsw/test/test_runner.sh b/lantern_hnsw/test/test_runner.sh index 792d3d770..d7a3648f6 100755 --- a/lantern_hnsw/test/test_runner.sh +++ b/lantern_hnsw/test/test_runner.sh @@ -46,6 +46,7 @@ function run_regression_test { -e 's! Average Peak Memory: [0-9]\{1,\}kB!!' \ -e 's! time=[0-9]\+\.[0-9]\+\.\.[0-9]\+\.[0-9]\+!!' | \ grep -v 'DEBUG: rehashing catalog cache id' | \ + grep -v 'WARNING: this hook is experimental and can cause undefined behaviour' | \ grep -Gv '^ Planning Time:' | \ grep -Gv '^ Execution Time:' | \ # ignore lines in explain(format json) output that differ among pg12-pg16 From 3a6d92f9cea70e3030e4d36a9526ea3b2c62687d Mon Sep 17 00:00:00 2001 From: Varik Matevosyan Date: Wed, 30 Oct 2024 11:11:02 +0400 Subject: [PATCH 07/12] remove version mismatch check from _PG_init, to be able to drop the extension when invalid binary would be installed --- lantern_hnsw/src/hnsw/options.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/lantern_hnsw/src/hnsw/options.c b/lantern_hnsw/src/hnsw/options.c index 855f3aabd..29100d44e 100644 --- a/lantern_hnsw/src/hnsw/options.c +++ b/lantern_hnsw/src/hnsw/options.c @@ -229,8 +229,6 @@ static void ldb_wait_for_gdb(int sig) */ void _PG_init(void) { - (void)CheckExtensionVersions(); - if(process_shared_preload_libraries_in_progress) { elog(WARNING, "LanternDB HNSW index extension loaded inside shared_preload_libraries." From 8ab9a61ec028586eb5f05dc31d8f4da923f21be1 Mon Sep 17 00:00:00 2001 From: Varik Matevosyan Date: Wed, 30 Oct 2024 13:58:55 +0400 Subject: [PATCH 08/12] do not test updates on pg17 with old incompatible versions --- lantern_hnsw/scripts/test_updates.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lantern_hnsw/scripts/test_updates.py b/lantern_hnsw/scripts/test_updates.py index 36d3e7cf3..fba007065 100644 --- a/lantern_hnsw/scripts/test_updates.py +++ b/lantern_hnsw/scripts/test_updates.py @@ -49,7 +49,8 @@ def __repr__(self): return self.version INCOMPATIBLE_VERSIONS = { - '16': [Version('0.0.4')] + '16': [Version('0.0.4')], + '17': [Version('0.3.0'), Version('0.3.1'), Version('0.3.2'), Version('0.3.3'), Version('0.3.4'), Version('0.4.0'), Version('0.4.1')], } def shell(cmd, exit_on_error=True): From 4058c1a5dee26b7d2ceed42aaa4189bccb320960 Mon Sep 17 00:00:00 2001 From: Varik Matevosyan Date: Wed, 30 Oct 2024 14:34:20 +0400 Subject: [PATCH 09/12] Remove footer from test output as pg17 contains `Disabled: true|false` if enable_seqscan is set to false. Modify failing tests because of operator rewrite hook to not use operator. Modify test output files accordingly. --- lantern_hnsw/sql/lantern.sql | 3 -- lantern_hnsw/sql/updates/0.4.1--0.4.2.sql | 3 -- lantern_hnsw/src/hnsw/utils.c | 1 + lantern_hnsw/test/expected/async_tasks.out | 19 --------- lantern_hnsw/test/expected/ext_relocation.out | 7 ---- .../test/expected/hnsw_concurrent.out | 6 --- lantern_hnsw/test/expected/hnsw_config.out | 5 --- lantern_hnsw/test/expected/hnsw_correct.out | 2 - .../test/expected/hnsw_cost_estimate.out | 17 -------- lantern_hnsw/test/expected/hnsw_create.out | 8 ---- .../test/expected/hnsw_create_expr.out | 2 - .../test/expected/hnsw_create_unlogged.out | 6 --- lantern_hnsw/test/expected/hnsw_delete.out | 4 -- lantern_hnsw/test/expected/hnsw_dist_func.out | 17 -------- lantern_hnsw/test/expected/hnsw_extras.out | 17 -------- .../test/expected/hnsw_index_from_file.out | 11 ----- lantern_hnsw/test/expected/hnsw_insert.out | 7 ---- .../test/expected/hnsw_insert_unlogged.out | 7 ---- .../test/expected/hnsw_logged_unlogged.out | 18 -------- lantern_hnsw/test/expected/hnsw_operators.out | 22 ---------- lantern_hnsw/test/expected/hnsw_pq.out | 41 ------------------ lantern_hnsw/test/expected/hnsw_pq_index.out | 19 --------- lantern_hnsw/test/expected/hnsw_select.out | 42 ------------------- lantern_hnsw/test/expected/hnsw_sq.out | 10 ----- lantern_hnsw/test/expected/hnsw_todo.out | 6 --- lantern_hnsw/test/expected/hnsw_vector.out | 19 --------- .../missing_outer_snapshot_portal.out | 2 - .../test/expected/weighted_search.out | 17 -------- lantern_hnsw/test/parallel/expected/begin.out | 3 -- lantern_hnsw/test/parallel/expected/end.out | 2 - .../test/parallel/expected/select.out | 6 --- lantern_hnsw/test/test_runner.sh | 3 ++ 32 files changed, 4 insertions(+), 348 deletions(-) diff --git a/lantern_hnsw/sql/lantern.sql b/lantern_hnsw/sql/lantern.sql index 418574fbe..14cdd1625 100644 --- a/lantern_hnsw/sql/lantern.sql +++ b/lantern_hnsw/sql/lantern.sql @@ -928,10 +928,7 @@ END $$ LANGUAGE plpgsql; CREATE FUNCTION _lantern_internal.get_vector_type_oid() RETURNS OID AS $$ DECLARE type_oid OID; - pg_version INT; BEGIN - pg_version := (SELECT setting FROM pg_settings WHERE name = 'server_version_num'); - type_oid := (SELECT pg_type.oid FROM pg_type JOIN pg_depend ON pg_type.oid = pg_depend.objid JOIN pg_extension ON pg_depend.refobjid = pg_extension.oid diff --git a/lantern_hnsw/sql/updates/0.4.1--0.4.2.sql b/lantern_hnsw/sql/updates/0.4.1--0.4.2.sql index c16aae550..814c13723 100644 --- a/lantern_hnsw/sql/updates/0.4.1--0.4.2.sql +++ b/lantern_hnsw/sql/updates/0.4.1--0.4.2.sql @@ -2,10 +2,7 @@ CREATE FUNCTION _lantern_internal.get_vector_type_oid() RETURNS OID AS $$ DECLARE type_oid OID; - pg_version INT; BEGIN - pg_version := (SELECT setting FROM pg_settings WHERE name = 'server_version_num'); - type_oid := (SELECT pg_type.oid FROM pg_type JOIN pg_depend ON pg_type.oid = pg_depend.objid JOIN pg_extension ON pg_depend.refobjid = pg_extension.oid diff --git a/lantern_hnsw/src/hnsw/utils.c b/lantern_hnsw/src/hnsw/utils.c index 631134278..94f67652f 100644 --- a/lantern_hnsw/src/hnsw/utils.c +++ b/lantern_hnsw/src/hnsw/utils.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include diff --git a/lantern_hnsw/test/expected/async_tasks.out b/lantern_hnsw/test/expected/async_tasks.out index 0b6f78da1..954cef627 100644 --- a/lantern_hnsw/test/expected/async_tasks.out +++ b/lantern_hnsw/test/expected/async_tasks.out @@ -94,20 +94,17 @@ SELECT lantern.async_task($$SELECT pg_sleep(0.1);$$, 'Lantern job name'); async_task ------------ 1 -(1 row) SELECT lantern.async_task($$SELECT pg_sleep(70);$$::text); async_task ------------ 2 -(1 row) -- will fail since the task is not valid SQL SELECT lantern.async_task($$SELECT pg_sleep(haha);$$, 'Lantern job name'); async_task ------------ 3 -(1 row) SELECT jobid, query, pg_cron_job_name, job_name, duration IS NOT NULL AS is_done, status, error_message FROM lantern.tasks; jobid | query | pg_cron_job_name | job_name | is_done | status | error_message @@ -115,13 +112,11 @@ SELECT jobid, query, pg_cron_job_name, job_name, duration IS NOT NULL AS is_done 1 | SELECT pg_sleep(0.1); | async_task_1 | Lantern job name | f | | 2 | SELECT pg_sleep(70); | async_task_2 | | f | | 3 | SELECT pg_sleep(haha); | async_task_3 | Lantern job name | f | | -(3 rows) SELECT pg_sleep(3); pg_sleep ---------- -(1 row) SELECT jobid, query, pg_cron_job_name, job_name, duration IS NOT NULL AS is_done, status, error_message FROM lantern.tasks; jobid | query | pg_cron_job_name | job_name | is_done | status | error_message @@ -132,13 +127,11 @@ SELECT jobid, query, pg_cron_job_name, job_name, duration IS NOT NULL AS is_done | | | | | | ^ + | | | | | | 1 | SELECT pg_sleep(0.1); | async_task_1 | Lantern job name | t | succeeded | -(3 rows) SELECT lantern.cancel_all_async_tasks(); cancel_all_async_tasks ------------------------ -(1 row) -- test async tasks on index creation DROP TABLE IF EXISTS small_world; @@ -156,7 +149,6 @@ SELECT lantern.async_task($$CREATE INDEX idx ON "sift_base1k_UpperCase" USING la async_task ------------ 4 -(1 row) -- blocks DB deletions that is why it is disabled for now -- SELECT lantern.async_task($$CREATE INDEX CONCURRENTLY idx_concurrent ON "sift_base1k_UpperCase" USING lantern_hnsw (v) WITH (dim=128, M=6);$$, 'Indexing Job'); @@ -164,13 +156,11 @@ SELECT pg_sleep(5); pg_sleep ---------- -(1 row) SELECT * FROM ldb_get_indexes('sift_base1k_UpperCase'); indexname | size | indexdef | indisvalid -----------+--------+--------------------------------------------------------------------------------------------+------------ idx | 632 kB | CREATE INDEX idx ON "sift_base1k_UpperCase" USING lantern_hnsw (v) WITH (dim='128', m='6') | t -(1 row) SELECT _lantern_internal.validate_index('idx', false); INFO: validate_index() start for idx @@ -178,7 +168,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) SELECT jobid, query, pg_cron_job_name, job_name, duration IS NOT NULL AS is_done, status, error_message FROM lantern.tasks; jobid | query | pg_cron_job_name | job_name | is_done | status | error_message @@ -190,7 +179,6 @@ SELECT jobid, query, pg_cron_job_name, job_name, duration IS NOT NULL AS is_done 1 | SELECT pg_sleep(0.1); | async_task_1 | Lantern job name | t | succeeded | 2 | SELECT pg_sleep(70); | async_task_2 | | t | canceled | Canceled by user 4 | CREATE INDEX idx ON "sift_base1k_UpperCase" USING lantern_hnsw (v) WITH (dim=128, M=6); | async_task_4 | Indexing Job | t | succeeded | -(4 rows) -- NOTE: the test finishes but the async index creation may still be in progress -- create non superuser and test the function @@ -207,14 +195,12 @@ NOTICE: Job scheduled with pg_cron name: 'async_task_5' async_task ------------ 5 -(1 row) SELECT lantern.async_task($$CREATE INDEX idx2 ON "sift_base1k_UpperCase" USING lantern_hnsw (v) WITH (dim=128, M=6);$$, 'Indexing Job'); NOTICE: Job scheduled with pg_cron name: 'async_task_6' async_task ------------ 6 -(1 row) -- this should fail since test_user does not have permission to drop the table -- sql line for do not stop on error @@ -223,7 +209,6 @@ NOTICE: Job scheduled with pg_cron name: 'async_task_7' async_task ------------ 7 -(1 row) -- lantern.tasks jobid is distinct and independent from cron.jobid, even though they may often overlap -- make sure everything works even when they are out of sync @@ -231,20 +216,17 @@ SELECT nextval('lantern.tasks_jobid_seq'); nextval --------- 8 -(1 row) SELECT lantern.async_task($$SELECT 42$$, 'Life'); NOTICE: Job scheduled with pg_cron name: 'async_task_9' async_task ------------ 9 -(1 row) SELECT pg_sleep(4); pg_sleep ---------- -(1 row) SELECT jobid, query, pg_cron_job_name, job_name, duration IS NOT NULL AS is_done, status, error_message FROM lantern.tasks ORDER BY jobid; jobid | query | pg_cron_job_name | job_name | is_done | status | error_message @@ -255,5 +237,4 @@ SELECT jobid, query, pg_cron_job_name, job_name, duration IS NOT NULL AS is_done 7 | DROP TABLE "sift_base1k_UpperCase"; | async_task_7 | Dropping Table Job | t | failed | ERROR: must be owner of table sift_base1k_UpperCase+ | | | | | | 9 | SELECT 42 | async_task_9 | Life | t | succeeded | -(4 rows) diff --git a/lantern_hnsw/test/expected/ext_relocation.out b/lantern_hnsw/test/expected/ext_relocation.out index c15cd8251..dccc44dd0 100644 --- a/lantern_hnsw/test/expected/ext_relocation.out +++ b/lantern_hnsw/test/expected/ext_relocation.out @@ -63,7 +63,6 @@ ORDER BY 1, 3, 2; schema1 | ldb_pqvec_send | schema1 schema1 | quantize_table | schema1 schema1 | quantize_vector | schema1 -(29 rows) -- show all the extension operators SELECT ne.nspname AS extschema, op.oprname, np.nspname AS proschema @@ -81,7 +80,6 @@ ORDER BY 1, 3; schema1 | <-> | schema1 schema1 | <=> | schema1 schema1 | <+> | schema1 -(5 rows) SET search_path TO public, schema1; -- extension function is accessible @@ -89,7 +87,6 @@ SELECT l2sq_dist(ARRAY[1.0, 2.0, 3.0], ARRAY[4.0, 5.0, 6.0]); l2sq_dist ----------- 27 -(1 row) CREATE INDEX hnsw_index ON small_world USING lantern_hnsw(v) WITH (dim=3); INFO: done init usearch index @@ -101,7 +98,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) \set ON_ERROR_STOP off -- lantern does not support relocation. @@ -127,7 +123,6 @@ ORDER BY 1, 3; schema1 | <-> | schema1 schema1 | <=> | schema1 schema1 | <+> | schema1 -(5 rows) SET search_path TO public, schema2; --extension access method is still accessible since access methods are not schema-qualified @@ -141,7 +136,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) \set ON_ERROR_STOP off -- extension function cannot be found without schema-qualification @@ -152,5 +146,4 @@ SELECT schema1.l2sq_dist(ARRAY[1.0, 2.0, 3.0], ARRAY[4.0, 5.0, 6.0]); l2sq_dist ----------- 27 -(1 row) diff --git a/lantern_hnsw/test/expected/hnsw_concurrent.out b/lantern_hnsw/test/expected/hnsw_concurrent.out index a47d80c70..ab1673048 100644 --- a/lantern_hnsw/test/expected/hnsw_concurrent.out +++ b/lantern_hnsw/test/expected/hnsw_concurrent.out @@ -15,7 +15,6 @@ SELECT id, ROUND((v <-> :'v444')::numeric, 2) FROM sift_base1k ORDER BY v <-> :' 557 | 91664.00 62 | 93497.00 58 | 93637.00 -(6 rows) CREATE INDEX to_be_reindexed ON sift_base1k USING lantern_hnsw (v) WITH (dim=128, M=8); INFO: done init usearch index @@ -25,7 +24,6 @@ SELECT * FROM ldb_get_indexes('sift_base1k'); indexname | size | indexdef | indisvalid -----------------+--------+--------------------------------------------------------------------------------------------+------------ to_be_reindexed | 680 kB | CREATE INDEX to_be_reindexed ON sift_base1k USING lantern_hnsw (v) WITH (dim='128', m='8') | t -(1 row) SELECT _lantern_internal.validate_index('to_be_reindexed', false); INFO: validate_index() start for to_be_reindexed @@ -33,7 +31,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) REINDEX INDEX CONCURRENTLY to_be_reindexed; INFO: done init usearch index @@ -46,13 +43,11 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) SELECT * FROM ldb_get_indexes('sift_base1k'); indexname | size | indexdef | indisvalid -----------------+--------+--------------------------------------------------------------------------------------------+------------ to_be_reindexed | 680 kB | CREATE INDEX to_be_reindexed ON sift_base1k USING lantern_hnsw (v) WITH (dim='128', m='8') | t -(1 row) set enable_seqscan=FALSE; -- 6 closest vectors to the vector with ID 444. note all the duplicate results because of bad handling of REINDEX @@ -65,5 +60,4 @@ SELECT id, ROUND((v <-> :'v444')::numeric, 2) FROM sift_base1k ORDER BY v <-> :' 557 | 91664.00 62 | 93497.00 58 | 93637.00 -(6 rows) diff --git a/lantern_hnsw/test/expected/hnsw_config.out b/lantern_hnsw/test/expected/hnsw_config.out index fb8a80d6f..61592413b 100644 --- a/lantern_hnsw/test/expected/hnsw_config.out +++ b/lantern_hnsw/test/expected/hnsw_config.out @@ -27,14 +27,12 @@ SELECT * FROM ldb_get_indexes('small_world'); indexname | size | indexdef | indisvalid -------------------+-------+-------------------------------------------------------------------------------------+------------ small_world_v_idx | 16 kB | CREATE INDEX small_world_v_idx ON small_world USING lantern_hnsw (v) WITH (dim='3') | t -(1 row) -- Verify that lantern_hnsw.init_k exists after index creation SHOW lantern_hnsw.init_k; lantern_hnsw.init_k --------------------- 10 -(1 row) -- Modify lantern_hnsw.init_k and verify that it was modified SET lantern_hnsw.init_k = 45; @@ -42,7 +40,6 @@ SHOW lantern_hnsw.init_k; lantern_hnsw.init_k --------------------- 45 -(1 row) -- Reset all parameters and verify that lantern_hnsw.init_k was reset RESET ALL; @@ -50,7 +47,6 @@ SHOW lantern_hnsw.init_k; lantern_hnsw.init_k --------------------- 10 -(1 row) -- Validate the index data structures SELECT _lantern_internal.validate_index('small_world_v_idx', false); @@ -59,5 +55,4 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) diff --git a/lantern_hnsw/test/expected/hnsw_correct.out b/lantern_hnsw/test/expected/hnsw_correct.out index 32bf7a07b..cc428ac2a 100644 --- a/lantern_hnsw/test/expected/hnsw_correct.out +++ b/lantern_hnsw/test/expected/hnsw_correct.out @@ -46,7 +46,6 @@ WHERE a.id != b.id; row_num | id_with_index | id_without_index | dist_with_index | dist_without_index ---------+---------------+------------------+-----------------+-------------------- -(0 rows) -- Validate the index data structures SELECT _lantern_internal.validate_index('small_world_v_idx', false); @@ -55,5 +54,4 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) diff --git a/lantern_hnsw/test/expected/hnsw_cost_estimate.out b/lantern_hnsw/test/expected/hnsw_cost_estimate.out index b0ed80099..d026035c8 100644 --- a/lantern_hnsw/test/expected/hnsw_cost_estimate.out +++ b/lantern_hnsw/test/expected/hnsw_cost_estimate.out @@ -74,7 +74,6 @@ DEBUG: LANTERN - --------------------- is_cost_estimate_within_error ------------------------------- t -(1 row) SELECT _lantern_internal.validate_index('empty_idx', false); INFO: validate_index() start for empty_idx @@ -82,7 +81,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) DROP INDEX empty_idx; -- Case 1, more data in index. @@ -102,7 +100,6 @@ DEBUG: LANTERN - --------------------- is_cost_estimate_within_error ------------------------------- t -(1 row) SELECT _lantern_internal.validate_index('hnsw_idx', false); INFO: validate_index() start for hnsw_idx @@ -110,7 +107,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) DROP INDEX hnsw_idx; -- Case 2, higher M. @@ -130,7 +126,6 @@ DEBUG: LANTERN - --------------------- is_cost_estimate_within_error ------------------------------- t -(1 row) SELECT _lantern_internal.validate_index('hnsw_idx', false); INFO: validate_index() start for hnsw_idx @@ -138,7 +133,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) DROP INDEX hnsw_idx; -- Case 3, higher ef. @@ -158,7 +152,6 @@ DEBUG: LANTERN - --------------------- is_cost_estimate_within_error ------------------------------- t -(1 row) SELECT _lantern_internal.validate_index('hnsw_idx', false); INFO: validate_index() start for hnsw_idx @@ -166,7 +159,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) DROP INDEX hnsw_idx; -- Goal: Test cost estimation when number of pages in index is likely less than number of blockmaps allocated @@ -191,7 +183,6 @@ SELECT COUNT(*) FROM views_vec10k WHERE views < 100; count ------- 58 -(1 row) -- Create partial lantern index with (views < 100) filter CREATE INDEX hnsw_partial_views_100 ON views_vec10k USING lantern_hnsw (vec dist_l2sq_ops) WITH (M=8, dim=6) WHERE views < 100; @@ -212,7 +203,6 @@ DEBUG: LANTERN - --------------------- Limit -> Index Scan using hnsw_partial_views_100 on views_vec10k Order By: (vec <-> '{0,1,2,3,4,5}'::real[]) -(3 rows) -- Goal: Test that the index selectivity being calculated for partial indexes is correct -- note that these boundaries are selected so that mac num_pages and cost values align @@ -226,7 +216,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) CREATE INDEX hnsw_partial_views_2000 ON views_vec10k USING lantern_hnsw (vec dist_l2sq_ops) WITH (M=9, dim=6) WHERE views < 2000; INFO: done init usearch index @@ -238,7 +227,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) CREATE INDEX hnsw_partial_views_3000 ON views_vec10k USING lantern_hnsw (vec dist_l2sq_ops) WITH (M=9, dim=6) WHERE views < 3000; INFO: done init usearch index @@ -250,7 +238,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) CREATE INDEX hnsw_partial_views_4000 ON views_vec10k USING lantern_hnsw (vec dist_l2sq_ops) WITH (M=9, dim=6) WHERE views < 4000; INFO: done init usearch index @@ -262,7 +249,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) CREATE INDEX hnsw_partial_views_6000 ON views_vec10k USING lantern_hnsw (vec dist_l2sq_ops) WITH (M=9, dim=6) WHERE views < 6000; INFO: done init usearch index @@ -274,7 +260,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) CREATE INDEX hnsw_partial_views_8000 ON views_vec10k USING lantern_hnsw (vec dist_l2sq_ops) WITH (M=9, dim=6) WHERE views < 8000; INFO: done init usearch index @@ -286,7 +271,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) -- Trigger each partial index by using its exact filter in a filtered query -- Each indexSelectivity value for a partial index with the filter (views < N) should be around N/20000 @@ -341,5 +325,4 @@ DEBUG: LANTERN - --------------------- Limit -> Index Scan using hnsw_partial_views_1000 on views_vec10k Order By: (vec <-> '{0,1,2,3,4,5}'::real[]) -(3 rows) diff --git a/lantern_hnsw/test/expected/hnsw_create.out b/lantern_hnsw/test/expected/hnsw_create.out index 8ed1fab6e..f63986dd2 100644 --- a/lantern_hnsw/test/expected/hnsw_create.out +++ b/lantern_hnsw/test/expected/hnsw_create.out @@ -32,7 +32,6 @@ SELECT * FROM ldb_get_indexes('sift_base1k'); indexname | size | indexdef | indisvalid -------------------+--------+----------------------------------------------------------------------------------------------+------------ sift_base1k_v_idx | 680 kB | CREATE INDEX sift_base1k_v_idx ON sift_base1k USING lantern_hnsw (v) WITH (dim='128', m='8') | t -(1 row) SELECT _lantern_internal.validate_index('sift_base1k_v_idx', false); INFO: validate_index() start for sift_base1k_v_idx @@ -40,7 +39,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) -- Validate that creating a hamming index works CREATE TABLE sift_base1k_int as SELECT id, v::INT[] FROM sift_base1k; @@ -52,7 +50,6 @@ SELECT * FROM ldb_get_indexes('sift_base1k_int'); indexname | size | indexdef | indisvalid -----------------------+--------+------------------------------------------------------------------------------------------------------------+------------ sift_base1k_int_v_idx | 680 kB | CREATE INDEX sift_base1k_int_v_idx ON sift_base1k_int USING lantern_hnsw (v dist_hamming_ops) WITH (m='8') | t -(1 row) SELECT _lantern_internal.validate_index('sift_base1k_int_v_idx', false); INFO: validate_index() start for sift_base1k_int_v_idx @@ -60,7 +57,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) -- Validate that index creation works with a larger number of vectors \ir utils/sift10k_array.sql @@ -81,7 +77,6 @@ EXPLAIN (COSTS FALSE) SELECT * FROM sift_base10k order by v :'v4444' LIMIT 1 Limit -> Index Scan using hnsw_idx on sift_base10k Order By: (v '{55,61,11,4,5,2,13,24,65,49,13,9,23,37,94,38,54,11,14,14,40,31,50,44,53,4,0,0,27,17,8,34,12,10,4,4,22,52,68,53,9,2,0,0,2,116,119,64,119,2,0,0,2,30,119,119,116,5,0,8,47,9,5,60,7,7,10,23,56,50,23,5,28,68,6,18,24,65,50,9,119,75,3,0,1,8,12,85,119,11,4,6,8,9,5,74,25,11,8,20,18,12,2,21,11,90,25,32,33,15,2,9,84,67,8,4,22,31,11,33,119,30,3,6,0,0,0,26}'::real[]) -(3 rows) SELECT _lantern_internal.validate_index('hnsw_idx', false); INFO: validate_index() start for hnsw_idx @@ -89,7 +84,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) --- Validate that M values inside the allowed range [2, 128] do not throw an error CREATE INDEX ON small_world USING lantern_hnsw (v) WITH (M=2); @@ -141,7 +135,6 @@ SELECT * FROM ldb_get_indexes('small_world4'); indexname | size | indexdef | indisvalid -----------------------+-------+----------------------------------------------------------------------------------------------------------------------------+------------ small_world4_hnsw_idx | 16 kB | CREATE INDEX small_world4_hnsw_idx ON small_world4 USING lantern_hnsw (vector) WITH (m='14', ef='22', ef_construction='2') | t -(1 row) -- the index will not allow changing the dimension of a vector element \set ON_ERROR_STOP off @@ -160,7 +153,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) -- without the index, I can change the dimension of a vector element DROP INDEX small_world4_hnsw_idx; diff --git a/lantern_hnsw/test/expected/hnsw_create_expr.out b/lantern_hnsw/test/expected/hnsw_create_expr.out index 168ccfe36..b3e0ffb7f 100644 --- a/lantern_hnsw/test/expected/hnsw_create_expr.out +++ b/lantern_hnsw/test/expected/hnsw_create_expr.out @@ -76,7 +76,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) \set ON_ERROR_STOP off -- This should result in an error that dimensions does not match @@ -94,5 +93,4 @@ SELECT id FROM test_table ORDER BY int_to_fixed_binary_real_array(id) '{0,0, ---- 0 1 -(2 rows) diff --git a/lantern_hnsw/test/expected/hnsw_create_unlogged.out b/lantern_hnsw/test/expected/hnsw_create_unlogged.out index 3c62b2788..b556fd46a 100644 --- a/lantern_hnsw/test/expected/hnsw_create_unlogged.out +++ b/lantern_hnsw/test/expected/hnsw_create_unlogged.out @@ -32,7 +32,6 @@ SELECT * FROM ldb_get_indexes('sift_base1k'); indexname | size | indexdef | indisvalid -------------------+--------+----------------------------------------------------------------------------------------------+------------ sift_base1k_v_idx | 680 kB | CREATE INDEX sift_base1k_v_idx ON sift_base1k USING lantern_hnsw (v) WITH (dim='128', m='8') | t -(1 row) SELECT _lantern_internal.validate_index('sift_base1k_v_idx', false); INFO: validate_index() start for sift_base1k_v_idx @@ -40,7 +39,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) -- Validate that index creation works with a larger number of vectors \ir utils/sift10k_array_unlogged.sql @@ -61,7 +59,6 @@ EXPLAIN (COSTS FALSE) SELECT * FROM sift_base10k order by v :'v4444' LIMIT 1 Limit -> Index Scan using hnsw_idx on sift_base10k Order By: (v '{55,61,11,4,5,2,13,24,65,49,13,9,23,37,94,38,54,11,14,14,40,31,50,44,53,4,0,0,27,17,8,34,12,10,4,4,22,52,68,53,9,2,0,0,2,116,119,64,119,2,0,0,2,30,119,119,116,5,0,8,47,9,5,60,7,7,10,23,56,50,23,5,28,68,6,18,24,65,50,9,119,75,3,0,1,8,12,85,119,11,4,6,8,9,5,74,25,11,8,20,18,12,2,21,11,90,25,32,33,15,2,9,84,67,8,4,22,31,11,33,119,30,3,6,0,0,0,26}'::real[]) -(3 rows) SELECT _lantern_internal.validate_index('hnsw_idx', false); INFO: validate_index() start for hnsw_idx @@ -69,7 +66,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) --- Validate that M values inside the allowed range [2, 128] do not throw an error CREATE INDEX ON small_world USING lantern_hnsw (v) WITH (M=2); @@ -121,7 +117,6 @@ SELECT * FROM ldb_get_indexes('small_world4'); indexname | size | indexdef | indisvalid -----------------------+-------+----------------------------------------------------------------------------------------------------------------------------+------------ small_world4_hnsw_idx | 16 kB | CREATE INDEX small_world4_hnsw_idx ON small_world4 USING lantern_hnsw (vector) WITH (m='14', ef='22', ef_construction='2') | t -(1 row) -- the index will not allow changing the dimension of a vector element \set ON_ERROR_STOP off @@ -140,7 +135,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) -- without the index, I can change the dimension of a vector element DROP INDEX small_world4_hnsw_idx; diff --git a/lantern_hnsw/test/expected/hnsw_delete.out b/lantern_hnsw/test/expected/hnsw_delete.out index e03105fb0..5f467a86d 100644 --- a/lantern_hnsw/test/expected/hnsw_delete.out +++ b/lantern_hnsw/test/expected/hnsw_delete.out @@ -24,7 +24,6 @@ SELECT * FROM small_world; 101 | f | {1,0,1} 110 | f | {1,1,0} 111 | t | {1,1,1} -(8 rows) CREATE INDEX ON small_world USING lantern_hnsw (v) WITH (M=128) WHERE b = FALSE; INFO: done init usearch index @@ -37,14 +36,12 @@ SELECT * FROM small_world WHERE b = FALSE order by v <-> '{1,0,0}' LIMIT 3; 100 | f | {1,0,0} 110 | f | {1,1,0} 101 | f | {1,0,1} -(3 rows) DELETE FROM small_world WHERE v <> '{1,0,0}'; SELECT * FROM small_world WHERE b = FALSE order by v <-> '{1,0,0}' LIMIT 3; id | b | v -----+---+--------- 100 | f | {1,0,0} -(1 row) VACUUM small_world; WARNING: LanternDB: hnsw index deletes are currently not implemented. This is a no-op. No memory will be reclaimed @@ -56,5 +53,4 @@ SELECT * FROM small_world WHERE b = FALSE order by v <-> '{1,0,0}' LIMIT 3; id | b | v -----+---+--------- 100 | f | {1,0,0} -(1 row) diff --git a/lantern_hnsw/test/expected/hnsw_dist_func.out b/lantern_hnsw/test/expected/hnsw_dist_func.out index 3414a212d..04f17ae91 100644 --- a/lantern_hnsw/test/expected/hnsw_dist_func.out +++ b/lantern_hnsw/test/expected/hnsw_dist_func.out @@ -48,7 +48,6 @@ SELECT ROUND(l2sq_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_l2 ORDER BY v 2.00 2.00 3.00 -(8 rows) SELECT ROUND(cos_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_cos ORDER BY v '{0,1,0}'; round @@ -61,7 +60,6 @@ SELECT ROUND(cos_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_cos ORDER BY v 1.00 1.00 1.00 -(8 rows) SELECT ROUND(hamming_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_ham ORDER BY v '{0,1,0}'; round @@ -74,7 +72,6 @@ SELECT ROUND(hamming_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_ham ORDER 2.00 2.00 3.00 -(8 rows) -- Verify that the distance functions work (check IDs) SELECT ARRAY_AGG(id ORDER BY id), ROUND(l2sq_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_l2 GROUP BY 2 ORDER BY 2; @@ -84,7 +81,6 @@ SELECT ARRAY_AGG(id ORDER BY id), ROUND(l2sq_dist(v, '{0,1,0}')::numeric, 2) FRO {000,011,110} | 1.00 {001,100,111} | 2.00 {101} | 3.00 -(4 rows) SELECT ARRAY_AGG(id ORDER BY id), ROUND(cos_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_cos GROUP BY 2 ORDER BY 2; array_agg | round @@ -93,7 +89,6 @@ SELECT ARRAY_AGG(id ORDER BY id), ROUND(cos_dist(v, '{0,1,0}')::numeric, 2) FROM {011,110} | 0.29 {111} | 0.42 {000,001,100,101} | 1.00 -(4 rows) SELECT ARRAY_AGG(id ORDER BY id), ROUND(hamming_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_ham GROUP BY 2 ORDER BY 2; array_agg | round @@ -102,7 +97,6 @@ SELECT ARRAY_AGG(id ORDER BY id), ROUND(hamming_dist(v, '{0,1,0}')::numeric, 2) {000,011,110} | 1.00 {001,100,111} | 2.00 {101} | 3.00 -(4 rows) -- Verify that the indexes is being used EXPLAIN (COSTS false) SELECT id FROM small_world_l2 ORDER BY v '{0,1,0}'; @@ -110,21 +104,18 @@ EXPLAIN (COSTS false) SELECT id FROM small_world_l2 ORDER BY v '{0,1,0}'; --------------------------------------------------------- Index Scan using small_world_l2_v_idx on small_world_l2 Order By: (v '{0,1,0}'::real[]) -(2 rows) EXPLAIN (COSTS false) SELECT id FROM small_world_cos ORDER BY v '{0,1,0}'; QUERY PLAN ----------------------------------------------------------- Index Scan using small_world_cos_v_idx on small_world_cos Order By: (v '{0,1,0}'::real[]) -(2 rows) EXPLAIN (COSTS false) SELECT id FROM small_world_ham ORDER BY v '{0,1,0}'; QUERY PLAN ----------------------------------------------------------- Index Scan using small_world_ham_v_idx on small_world_ham Order By: (v '{0,1,0}'::integer[]) -(2 rows) \set ON_ERROR_STOP off -- Expect errors due to mismatching vector dimensions @@ -161,13 +152,11 @@ SELECT 0 + 1; ?column? ---------- 1 -(1 row) SELECT 1 FROM test1 WHERE id = 0 + 1; ?column? ---------- 1 -(1 row) \set ON_ERROR_STOP off -- Expect errors due to incorrect usage @@ -295,7 +284,6 @@ LIMIT 101 | {101,001,100,111} | {0,1,1,1} 110 | {110,010,100,111} | {0,1,1,1} 111 | {111,011,101,110} | {0,1,1,1} -(8 rows) -- Check that hamming distance query results are sorted correctly CREATE TABLE extra_small_world_ham ( @@ -314,7 +302,6 @@ SELECT ROUND(hamming_dist(v, '{0,0}')::numeric, 2) FROM extra_small_world_ham OR 2.00 2.00 4.00 -(4 rows) SELECT _lantern_internal.validate_index('small_world_l2_v_idx', false); INFO: validate_index() start for small_world_l2_v_idx @@ -322,7 +309,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) SELECT _lantern_internal.validate_index('small_world_cos_v_idx', false); INFO: validate_index() start for small_world_cos_v_idx @@ -330,7 +316,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) SELECT _lantern_internal.validate_index('small_world_ham_v_idx', false); INFO: validate_index() start for small_world_ham_v_idx @@ -338,7 +323,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) SELECT _lantern_internal.validate_index('extra_small_world_ham_v_idx', false); INFO: validate_index() start for extra_small_world_ham_v_idx @@ -346,5 +330,4 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) diff --git a/lantern_hnsw/test/expected/hnsw_extras.out b/lantern_hnsw/test/expected/hnsw_extras.out index ca1d4459b..d41825130 100644 --- a/lantern_hnsw/test/expected/hnsw_extras.out +++ b/lantern_hnsw/test/expected/hnsw_extras.out @@ -28,7 +28,6 @@ SELECT lantern_create_external_index('v', 'sift_base1k'); lantern_create_external_index ------------------------------- -(1 row) SELECT _lantern_internal.validate_index('sift_base1k_v_idx', false); INFO: validate_index() start for sift_base1k_v_idx @@ -36,7 +35,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) SELECT v AS v777 FROM sift_base1k WHERE id = 777 \gset -- Validate that using corresponding operator triggers index scan @@ -47,7 +45,6 @@ EXPLAIN (COSTS FALSE) SELECT id FROM sift_base1k order by v <-> :'v777' LIMIT 10 Limit -> Index Scan using sift_base1k_v_idx on sift_base1k Order By: (v <-> '{97,67,0,0,0,0,0,14,49,107,23,0,0,0,5,24,4,25,48,5,0,1,8,3,0,5,17,3,1,1,3,3,126,126,0,0,0,0,0,27,49,126,49,8,1,4,11,14,0,6,37,39,10,22,25,0,0,0,12,27,7,23,35,3,126,9,1,0,0,0,19,126,28,11,8,7,1,39,126,126,0,1,28,27,3,126,126,0,1,3,7,9,0,52,126,5,13,5,8,0,0,0,33,72,78,19,18,3,0,3,21,126,42,13,64,83,1,9,8,23,1,4,22,68,3,1,4,0}'::real[]) -(3 rows) SET lantern.pgvector_compat=FALSE; EXPLAIN (COSTS FALSE) SELECT id FROM sift_base1k order by v :'v777' LIMIT 10; @@ -56,7 +53,6 @@ EXPLAIN (COSTS FALSE) SELECT id FROM sift_base1k order by v :'v777' LIMIT 10 Limit -> Index Scan using sift_base1k_v_idx on sift_base1k Order By: (v '{97,67,0,0,0,0,0,14,49,107,23,0,0,0,5,24,4,25,48,5,0,1,8,3,0,5,17,3,1,1,3,3,126,126,0,0,0,0,0,27,49,126,49,8,1,4,11,14,0,6,37,39,10,22,25,0,0,0,12,27,7,23,35,3,126,9,1,0,0,0,19,126,28,11,8,7,1,39,126,126,0,1,28,27,3,126,126,0,1,3,7,9,0,52,126,5,13,5,8,0,0,0,33,72,78,19,18,3,0,3,21,126,42,13,64,83,1,9,8,23,1,4,22,68,3,1,4,0}'::real[]) -(3 rows) SET lantern.pgvector_compat=TRUE; DROP INDEX sift_base1k_v_idx; @@ -65,7 +61,6 @@ SELECT lantern_create_external_index('v', 'sift_base1k', 'public', 'cos', 128, 1 lantern_create_external_index ------------------------------- -(1 row) SELECT _lantern_internal.validate_index('hnsw_cos_index', false); INFO: validate_index() start for hnsw_cos_index @@ -73,7 +68,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) SET lantern.pgvector_compat=TRUE; EXPLAIN (COSTS FALSE) SELECT id FROM sift_base1k order by v <=> :'v777' LIMIT 10; @@ -82,7 +76,6 @@ EXPLAIN (COSTS FALSE) SELECT id FROM sift_base1k order by v <=> :'v777' LIMIT 10 Limit -> Index Scan using hnsw_cos_index on sift_base1k Order By: (v <=> '{97,67,0,0,0,0,0,14,49,107,23,0,0,0,5,24,4,25,48,5,0,1,8,3,0,5,17,3,1,1,3,3,126,126,0,0,0,0,0,27,49,126,49,8,1,4,11,14,0,6,37,39,10,22,25,0,0,0,12,27,7,23,35,3,126,9,1,0,0,0,19,126,28,11,8,7,1,39,126,126,0,1,28,27,3,126,126,0,1,3,7,9,0,52,126,5,13,5,8,0,0,0,33,72,78,19,18,3,0,3,21,126,42,13,64,83,1,9,8,23,1,4,22,68,3,1,4,0}'::real[]) -(3 rows) SET lantern.pgvector_compat=FALSE; EXPLAIN (COSTS FALSE) SELECT id FROM sift_base1k order by v :'v777' LIMIT 10; @@ -91,7 +84,6 @@ EXPLAIN (COSTS FALSE) SELECT id FROM sift_base1k order by v :'v777' LIMIT 10 Limit -> Index Scan using hnsw_cos_index on sift_base1k Order By: (v '{97,67,0,0,0,0,0,14,49,107,23,0,0,0,5,24,4,25,48,5,0,1,8,3,0,5,17,3,1,1,3,3,126,126,0,0,0,0,0,27,49,126,49,8,1,4,11,14,0,6,37,39,10,22,25,0,0,0,12,27,7,23,35,3,126,9,1,0,0,0,19,126,28,11,8,7,1,39,126,126,0,1,28,27,3,126,126,0,1,3,7,9,0,52,126,5,13,5,8,0,0,0,33,72,78,19,18,3,0,3,21,126,42,13,64,83,1,9,8,23,1,4,22,68,3,1,4,0}'::real[]) -(3 rows) SET lantern.pgvector_compat=TRUE; -- -- Reindex external index @@ -99,7 +91,6 @@ SELECT lantern_reindex_external_index('hnsw_cos_index'); lantern_reindex_external_index -------------------------------- -(1 row) SELECT _lantern_internal.validate_index('hnsw_cos_index', false); INFO: validate_index() start for hnsw_cos_index @@ -107,7 +98,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) -- Validate that using corresponding operator triggers index scan SET lantern.pgvector_compat=TRUE; @@ -117,7 +107,6 @@ EXPLAIN (COSTS FALSE) SELECT id FROM sift_base1k order by v <=> :'v777' LIMIT 10 Limit -> Index Scan using hnsw_cos_index on sift_base1k Order By: (v <=> '{97,67,0,0,0,0,0,14,49,107,23,0,0,0,5,24,4,25,48,5,0,1,8,3,0,5,17,3,1,1,3,3,126,126,0,0,0,0,0,27,49,126,49,8,1,4,11,14,0,6,37,39,10,22,25,0,0,0,12,27,7,23,35,3,126,9,1,0,0,0,19,126,28,11,8,7,1,39,126,126,0,1,28,27,3,126,126,0,1,3,7,9,0,52,126,5,13,5,8,0,0,0,33,72,78,19,18,3,0,3,21,126,42,13,64,83,1,9,8,23,1,4,22,68,3,1,4,0}'::real[]) -(3 rows) -- Create PQ Index SET client_min_messages=ERROR; @@ -135,13 +124,11 @@ INFO: Compressing vectors... quantize_table ---------------- -(1 row) SELECT lantern_create_external_index('v', 'sift_base1k', 'public', 'cos', 128, 10, 10, 10, true, 'hnsw_cos_index_pq'); lantern_create_external_index ------------------------------- -(1 row) SELECT _lantern_internal.validate_index('hnsw_cos_index_pq', false); INFO: validate_index() start for hnsw_cos_index_pq @@ -149,13 +136,11 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) SELECT lantern_reindex_external_index('hnsw_cos_index_pq'); lantern_reindex_external_index -------------------------------- -(1 row) SELECT _lantern_internal.validate_index('hnsw_cos_index_pq', false); INFO: validate_index() start for hnsw_cos_index_pq @@ -163,7 +148,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) SET lantern.pgvector_compat=TRUE; EXPLAIN (COSTS FALSE) SELECT id FROM sift_base1k order by v <=> :'v777' LIMIT 10; @@ -172,5 +156,4 @@ EXPLAIN (COSTS FALSE) SELECT id FROM sift_base1k order by v <=> :'v777' LIMIT 10 Limit -> Index Scan using hnsw_cos_index_pq on sift_base1k Order By: (v <=> '{97,67,0,0,0,0,0,14,49,107,23,0,0,0,5,24,4,25,48,5,0,1,8,3,0,5,17,3,1,1,3,3,126,126,0,0,0,0,0,27,49,126,49,8,1,4,11,14,0,6,37,39,10,22,25,0,0,0,12,27,7,23,35,3,126,9,1,0,0,0,19,126,28,11,8,7,1,39,126,126,0,1,28,27,3,126,126,0,1,3,7,9,0,52,126,5,13,5,8,0,0,0,33,72,78,19,18,3,0,3,21,126,42,13,64,83,1,9,8,23,1,4,22,68,3,1,4,0}'::real[]) -(3 rows) diff --git a/lantern_hnsw/test/expected/hnsw_index_from_file.out b/lantern_hnsw/test/expected/hnsw_index_from_file.out index 4e60ad9f3..c51fa83d8 100644 --- a/lantern_hnsw/test/expected/hnsw_index_from_file.out +++ b/lantern_hnsw/test/expected/hnsw_index_from_file.out @@ -44,13 +44,11 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) SELECT * FROM ldb_get_indexes('sift_base1k'); indexname | size | indexdef | indisvalid ---------------+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------+------------ hnsw_l2_index | 776 kB | CREATE INDEX hnsw_l2_index ON sift_base1k USING lantern_hnsw (v) WITH (_experimental_index_path='/tmp/lantern/files/index-sift1k-l2sq-0.3.0.usearch') | t -(1 row) SET enable_seqscan=FALSE; SET lantern.pgvector_compat=FALSE; @@ -61,7 +59,6 @@ EXPLAIN (COSTS FALSE) SELECT ROUND(l2sq_dist(v, :'v777')::numeric, 2) FROM sift_ Limit -> Index Scan using hnsw_l2_index on sift_base1k Order By: (v '{97,67,0,0,0,0,0,14,49,107,23,0,0,0,5,24,4,25,48,5,0,1,8,3,0,5,17,3,1,1,3,3,126,126,0,0,0,0,0,27,49,126,49,8,1,4,11,14,0,6,37,39,10,22,25,0,0,0,12,27,7,23,35,3,126,9,1,0,0,0,19,126,28,11,8,7,1,39,126,126,0,1,28,27,3,126,126,0,1,3,7,9,0,52,126,5,13,5,8,0,0,0,33,72,78,19,18,3,0,3,21,126,42,13,64,83,1,9,8,23,1,4,22,68,3,1,4,0}'::real[]) -(3 rows) SELECT ROUND(l2sq_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v :'v777' LIMIT 10; round @@ -76,7 +73,6 @@ SELECT ROUND(l2sq_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v 130663.00 130863.00 132455.00 -(10 rows) -- Validate that inserting rows on index created from file works as expected INSERT INTO sift_base1k (id, v) VALUES @@ -96,7 +92,6 @@ SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v 249589.00 249647.00 249652.00 -(10 rows) -- Drop and recreate table DROP TABLE sift_base1k CASCADE; @@ -117,13 +112,11 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) SELECT * FROM ldb_get_indexes('sift_base1k'); indexname | size | indexdef | indisvalid ----------------+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------+------------ hnsw_cos_index | 776 kB | CREATE INDEX hnsw_cos_index ON sift_base1k USING lantern_hnsw (v) WITH (_experimental_index_path='/tmp/lantern/files/index-sift1k-cos-0.3.0.usearch') | t -(1 row) SELECT v AS v777 FROM sift_base1k WHERE id = 777 \gset EXPLAIN (COSTS FALSE) SELECT ROUND(cos_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v :'v777' LIMIT 10; @@ -132,7 +125,6 @@ EXPLAIN (COSTS FALSE) SELECT ROUND(cos_dist(v, :'v777')::numeric, 2) FROM sift_b Limit -> Index Scan using hnsw_cos_index on sift_base1k Order By: (v '{97,67,0,0,0,0,0,14,49,107,23,0,0,0,5,24,4,25,48,5,0,1,8,3,0,5,17,3,1,1,3,3,126,126,0,0,0,0,0,27,49,126,49,8,1,4,11,14,0,6,37,39,10,22,25,0,0,0,12,27,7,23,35,3,126,9,1,0,0,0,19,126,28,11,8,7,1,39,126,126,0,1,28,27,3,126,126,0,1,3,7,9,0,52,126,5,13,5,8,0,0,0,33,72,78,19,18,3,0,3,21,126,42,13,64,83,1,9,8,23,1,4,22,68,3,1,4,0}'::real[]) -(3 rows) SELECT ROUND(cos_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v :'v777' LIMIT 10; round @@ -147,7 +139,6 @@ SELECT ROUND(cos_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v : 0.25 0.25 0.26 -(10 rows) --- Test scenarious --- ----------------------------------------- @@ -173,7 +164,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) -- This should not throw error, but the first result will not be 0 as vector 777 is deleted from the table SELECT ROUND(l2sq_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v :'v777' LIMIT 10; @@ -189,7 +179,6 @@ SELECT ROUND(l2sq_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v 130863.00 132455.00 132813.00 -(10 rows) -- Should throw error when lantern_extras is not installed \set ON_ERROR_STOP off diff --git a/lantern_hnsw/test/expected/hnsw_insert.out b/lantern_hnsw/test/expected/hnsw_insert.out index 5bdee5204..9463d8d7d 100644 --- a/lantern_hnsw/test/expected/hnsw_insert.out +++ b/lantern_hnsw/test/expected/hnsw_insert.out @@ -25,7 +25,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) -- Insert rows with valid vector data INSERT INTO small_world (v) VALUES ('{0,0,1}'), ('{0,1,0}'); @@ -92,14 +91,12 @@ ORDER BY 2.00 3.00 6.00 -(9 rows) -- Ensure the index size remains consistent after inserts SELECT * from ldb_get_indexes('small_world'); indexname | size | indexdef | indisvalid -------------------+-------+-------------------------------------------------------------------------------------+------------ small_world_v_idx | 16 kB | CREATE INDEX small_world_v_idx ON small_world USING lantern_hnsw (v) WITH (dim='3') | t -(1 row) -- Ensure the query plan remains consistent after inserts EXPLAIN (COSTS FALSE) @@ -115,7 +112,6 @@ LIMIT 10; Limit -> Index Scan using small_world_v_idx on small_world Order By: (v '{0,0,0}'::real[]) -(3 rows) SELECT _lantern_internal.validate_index('small_world_v_idx', false); INFO: validate_index() start for small_world_v_idx @@ -123,7 +119,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) -- Test the index with a larger number of vectors CREATE TABLE sift_base10k ( @@ -141,7 +136,6 @@ EXPLAIN (COSTS FALSE) SELECT * FROM sift_base10k order by v :'v4444'; ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Index Scan using hnsw_idx on sift_base10k Order By: (v '{55,61,11,4,5,2,13,24,65,49,13,9,23,37,94,38,54,11,14,14,40,31,50,44,53,4,0,0,27,17,8,34,12,10,4,4,22,52,68,53,9,2,0,0,2,116,119,64,119,2,0,0,2,30,119,119,116,5,0,8,47,9,5,60,7,7,10,23,56,50,23,5,28,68,6,18,24,65,50,9,119,75,3,0,1,8,12,85,119,11,4,6,8,9,5,74,25,11,8,20,18,12,2,21,11,90,25,32,33,15,2,9,84,67,8,4,22,31,11,33,119,30,3,6,0,0,0,26}'::real[]) -(2 rows) SELECT _lantern_internal.validate_index('hnsw_idx', false); INFO: validate_index() start for hnsw_idx @@ -149,5 +143,4 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) diff --git a/lantern_hnsw/test/expected/hnsw_insert_unlogged.out b/lantern_hnsw/test/expected/hnsw_insert_unlogged.out index 97ce44ddf..34c00e242 100644 --- a/lantern_hnsw/test/expected/hnsw_insert_unlogged.out +++ b/lantern_hnsw/test/expected/hnsw_insert_unlogged.out @@ -25,7 +25,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) -- Insert rows with valid vector data INSERT INTO small_world (v) VALUES ('{0,0,1}'), ('{0,1,0}'); @@ -92,14 +91,12 @@ ORDER BY 2.00 3.00 6.00 -(9 rows) -- Ensure the index size remains consistent after inserts SELECT * from ldb_get_indexes('small_world'); indexname | size | indexdef | indisvalid -------------------+-------+-------------------------------------------------------------------------------------+------------ small_world_v_idx | 16 kB | CREATE INDEX small_world_v_idx ON small_world USING lantern_hnsw (v) WITH (dim='3') | t -(1 row) -- Ensure the query plan remains consistent after inserts EXPLAIN (COSTS FALSE) @@ -115,7 +112,6 @@ LIMIT 10; Limit -> Index Scan using small_world_v_idx on small_world Order By: (v '{0,0,0}'::real[]) -(3 rows) SELECT _lantern_internal.validate_index('small_world_v_idx', false); INFO: validate_index() start for small_world_v_idx @@ -123,7 +119,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) -- Test the index with a larger number of vectors CREATE UNLOGGED TABLE sift_base10k ( @@ -141,7 +136,6 @@ EXPLAIN (COSTS FALSE) SELECT * FROM sift_base10k order by v :'v4444'; ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Index Scan using hnsw_idx on sift_base10k Order By: (v '{55,61,11,4,5,2,13,24,65,49,13,9,23,37,94,38,54,11,14,14,40,31,50,44,53,4,0,0,27,17,8,34,12,10,4,4,22,52,68,53,9,2,0,0,2,116,119,64,119,2,0,0,2,30,119,119,116,5,0,8,47,9,5,60,7,7,10,23,56,50,23,5,28,68,6,18,24,65,50,9,119,75,3,0,1,8,12,85,119,11,4,6,8,9,5,74,25,11,8,20,18,12,2,21,11,90,25,32,33,15,2,9,84,67,8,4,22,31,11,33,119,30,3,6,0,0,0,26}'::real[]) -(2 rows) SELECT _lantern_internal.validate_index('hnsw_idx', false); INFO: validate_index() start for hnsw_idx @@ -149,5 +143,4 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) diff --git a/lantern_hnsw/test/expected/hnsw_logged_unlogged.out b/lantern_hnsw/test/expected/hnsw_logged_unlogged.out index 4530f12f8..7c01989aa 100644 --- a/lantern_hnsw/test/expected/hnsw_logged_unlogged.out +++ b/lantern_hnsw/test/expected/hnsw_logged_unlogged.out @@ -28,7 +28,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) -- Query SET enable_seqscan = false; @@ -43,7 +42,6 @@ SELECT id, l2sq_dist(vector, '{0, 0, 0, 0}'), vector FROM small_world ORDER BY 101 | 6 | {1,2,0,1} 110 | 7 | {1,2,1,1} 111 | 12 | {2,2,2,0} -(8 rows) -- Switch table to be unlogged ALTER TABLE small_world SET UNLOGGED; @@ -62,7 +60,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) SELECT _lantern_internal.validate_index('small_world_idx2', false); INFO: validate_index() start for small_world_idx2 @@ -70,7 +67,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) -- Insert INSERT INTO small_world (id, vector) VALUES ('002', '{0,3,1,1}'); @@ -87,7 +83,6 @@ SELECT id, l2sq_dist(vector, '{0, 0, 0, 0}'), vector FROM small_world ORDER BY 110 | 7 | {1,2,1,1} 002 | 11 | {0,3,1,1} 111 | 12 | {2,2,2,0} -(9 rows) -- Switch table to be logged again ALTER TABLE small_world SET LOGGED; @@ -109,7 +104,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) SELECT _lantern_internal.validate_index('small_world_idx2', false); INFO: validate_index() start for small_world_idx2 @@ -117,7 +111,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) SELECT _lantern_internal.validate_index('small_world_idx3', false); INFO: validate_index() start for small_world_idx3 @@ -125,7 +118,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) -- Insert INSERT INTO small_world (id, vector) VALUES ('020', '{0,0,4,0}'); @@ -143,7 +135,6 @@ SELECT id, l2sq_dist(vector, '{0, 0, 0, 0}'), vector FROM small_world ORDER BY 002 | 11 | {0,3,1,1} 111 | 12 | {2,2,2,0} 020 | 16 | {0,0,4,0} -(10 rows) -- -------------------------- -- Start with unlogged table @@ -175,7 +166,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) -- Query SET enable_seqscan = false; @@ -190,7 +180,6 @@ SELECT id, l2sq_dist(vector, '{0, 0, 0, 0}'), vector FROM small_world ORDER BY v 101 | 6 | {1,2,0,1} 110 | 7 | {1,2,1,1} 111 | 12 | {2,2,2,0} -(8 rows) -- Switch table to be logged ALTER TABLE small_world SET LOGGED; @@ -209,7 +198,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) SELECT _lantern_internal.validate_index('small_world_idx2', false); INFO: validate_index() start for small_world_idx2 @@ -217,7 +205,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) -- Insert INSERT INTO small_world (id, vector) VALUES ('002', '{0,3,1,1}'); @@ -234,7 +221,6 @@ SELECT id, l2sq_dist(vector, '{0, 0, 0, 0}'), vector FROM small_world ORDER BY v 110 | 7 | {1,2,1,1} 002 | 11 | {0,3,1,1} 111 | 12 | {2,2,2,0} -(9 rows) -- Switch table to be unlogged again ALTER TABLE small_world SET UNLOGGED; @@ -256,7 +242,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) SELECT _lantern_internal.validate_index('small_world_idx2', false); INFO: validate_index() start for small_world_idx2 @@ -264,7 +249,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) SELECT _lantern_internal.validate_index('small_world_idx3', false); INFO: validate_index() start for small_world_idx3 @@ -272,7 +256,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) -- Insert INSERT INTO small_world (id, vector) VALUES ('020', '{0,0,4,0}'); @@ -290,5 +273,4 @@ SELECT id, l2sq_dist(vector, '{0, 0, 0, 0}'), vector FROM small_world ORDER BY v 002 | 11 | {0,3,1,1} 111 | 12 | {2,2,2,0} 020 | 16 | {0,0,4,0} -(10 rows) diff --git a/lantern_hnsw/test/expected/hnsw_operators.out b/lantern_hnsw/test/expected/hnsw_operators.out index acc95be8c..3ac3411ac 100644 --- a/lantern_hnsw/test/expected/hnsw_operators.out +++ b/lantern_hnsw/test/expected/hnsw_operators.out @@ -12,7 +12,6 @@ SELECT * FROM op_test ORDER BY v ARRAY[1,1,1]; --------- {1,1,1} {0,0,0} -(2 rows) \set ON_ERROR_STOP off SET lantern.pgvector_compat=TRUE; @@ -25,7 +24,6 @@ SELECT * FROM op_test ORDER BY v <=> ARRAY[1,1,1]; --------- {1,1,1} {0,0,0} -(2 rows) -- should not throw error SELECT * FROM op_test ORDER BY v::INTEGER[] <+> ARRAY[1,1,1]; @@ -33,7 +31,6 @@ SELECT * FROM op_test ORDER BY v::INTEGER[] <+> ARRAY[1,1,1]; --------- {1,1,1} {0,0,0} -(2 rows) -- should not throw error SELECT v <-> ARRAY[1,1,1] FROM op_test ORDER BY v <-> ARRAY[1,1,1]; @@ -41,7 +38,6 @@ SELECT v <-> ARRAY[1,1,1] FROM op_test ORDER BY v <-> ARRAY[1,1,1]; ---------- 0 3 -(2 rows) SET lantern.pgvector_compat=FALSE; SET enable_seqscan=OFF; @@ -52,56 +48,47 @@ SELECT ARRAY[0,0,0] <-> ARRAY[2,3,-4]; ?column? ---------- 29 -(1 row) -- with float arrays: SELECT ARRAY[0,0,0] <-> ARRAY[2,3,-4]::real[]; ?column? ---------- 29 -(1 row) SELECT ARRAY[0,0,0]::real[] <-> ARRAY[2,3,-4]::real[]; ?column? ---------- 29 -(1 row) SELECT '{1,0,1}' <-> '{0,1,0}'::integer[]; ?column? ---------- 3 -(1 row) SELECT '{1,0,1}' <=> '{0,1,0}'::integer[]; ?column? ---------- 1 -(1 row) SELECT ROUND(num::NUMERIC, 2) FROM (SELECT '{1,1,1}' <=> '{0,1,0}'::INTEGER[] AS num) _sub; round ------- 0.42 -(1 row) SELECT ARRAY[.1,0,0] <=> ARRAY[0,.5,0]; ?column? ---------- 1 -(1 row) SELECT cos_dist(ARRAY[.1,0,0]::real[], ARRAY[0,.5,0]::real[]); cos_dist ---------- 1 -(1 row) SELECT ARRAY[1,0,0] <+> ARRAY[0,1,0]; ?column? ---------- 2 -(1 row) -- NOW THIS IS TRIGGERING INDEX SCAN AS WELL -- BECAUSE WE ARE REGISTERING FOR ALL OPERATOR CLASSES @@ -111,7 +98,6 @@ EXPLAIN (COSTS FALSE) SELECT * FROM op_test ORDER BY v ARRAY[1,1,1]; --------------------------------------- Index Scan using cos_idx on op_test Order By: (v '{1,1,1}'::real[]) -(2 rows) -- should sort with index EXPLAIN (COSTS FALSE) SELECT * FROM op_test ORDER BY v <=> ARRAY[1,1,1]; @@ -119,7 +105,6 @@ EXPLAIN (COSTS FALSE) SELECT * FROM op_test ORDER BY v <=> ARRAY[1,1,1]; --------------------------------------- Index Scan using cos_idx on op_test Order By: (v <=> '{1,1,1}'::real[]) -(2 rows) -- should sort without index EXPLAIN (COSTS FALSE) SELECT * FROM op_test ORDER BY v::INTEGER[] <+> ARRAY[1,1,1]; @@ -128,7 +113,6 @@ EXPLAIN (COSTS FALSE) SELECT * FROM op_test ORDER BY v::INTEGER[] <+> ARRAY[1,1, Sort Sort Key: (((v)::integer[] <+> '{1,1,1}'::integer[])) -> Seq Scan on op_test -(3 rows) -- should not throw error \set ON_ERROR_STOP on @@ -137,7 +121,6 @@ SELECT v <=> ARRAY[1,1,1] FROM op_test ORDER BY v <=> ARRAY[1,1,1]; ---------- 0 1 -(2 rows) -- should not throw error SELECT v::INTEGER[] <+> ARRAY[1,1,1] FROM op_test ORDER BY v::INTEGER[] <+> ARRAY[1,1,1]; @@ -145,7 +128,6 @@ SELECT v::INTEGER[] <+> ARRAY[1,1,1] FROM op_test ORDER BY v::INTEGER[] <+> ARRA ---------- 0 3 -(2 rows) -- should not throw error SELECT v <-> ARRAY[1,1,1] FROM op_test ORDER BY v <-> ARRAY[1,1,1]; @@ -153,7 +135,6 @@ SELECT v <-> ARRAY[1,1,1] FROM op_test ORDER BY v <-> ARRAY[1,1,1]; ---------- 0 3 -(2 rows) RESET ALL; -- Set false twice to verify that no crash is happening @@ -165,7 +146,6 @@ SELECT * FROM op_test ORDER BY v ARRAY[1,1,1]; --------- {1,1,1} {0,0,0} -(2 rows) SET enable_seqscan=OFF; CREATE INDEX hamming_idx ON op_test USING lantern_hnsw(cast(v as INTEGER[]) dist_hamming_ops); @@ -178,7 +158,6 @@ EXPLAIN (COSTS FALSE) SELECT * FROM op_test ORDER BY v <=> ARRAY[1,1,1]; --------------------------------------- Index Scan using cos_idx on op_test Order By: (v <=> '{1,1,1}'::real[]) -(2 rows) -- should sort with hamming_idx index EXPLAIN (COSTS FALSE) SELECT * FROM op_test ORDER BY v::INTEGER[] <+> ARRAY[1,1,1]; @@ -186,5 +165,4 @@ EXPLAIN (COSTS FALSE) SELECT * FROM op_test ORDER BY v::INTEGER[] <+> ARRAY[1,1, ------------------------------------------------------- Index Scan using hamming_idx on op_test Order By: ((v)::integer[] <+> '{1,1,1}'::integer[]) -(2 rows) diff --git a/lantern_hnsw/test/expected/hnsw_pq.out b/lantern_hnsw/test/expected/hnsw_pq.out index 846ef26e9..7ff94a63e 100644 --- a/lantern_hnsw/test/expected/hnsw_pq.out +++ b/lantern_hnsw/test/expected/hnsw_pq.out @@ -77,19 +77,16 @@ SELECT '{84,1,4,128,255}'::pqvec; pqvec ------------------ {84,1,4,128,255} -(1 row) SELECT '{84,1,4,128,255}'::pqvec::INT[]; int4 ------------------ {84,1,4,128,255} -(1 row) SELECT '{84,1,4,128,255}'::INT[]::pqvec; pqvec ------------------ {84,1,4,128,255} -(1 row) \set ON_ERROR_STOP off -- Test PQVec type @@ -124,19 +121,16 @@ SELECT array_length(:'codebook'::REAL[][][], 1); array_length -------------- 1 -(1 row) SELECT array_length(:'codebook'::REAL[][][], 2); array_length -------------- 1 -(1 row) SELECT array_length(:'codebook'::REAL[][][], 3); array_length -------------- 128 -(1 row) -- This should create codebook[1][10][128] SELECT _lantern_internal.create_pq_codebook('sift_base1k'::regclass, 'v', 10, 1, 'l2sq', 0) as codebook \gset @@ -147,19 +141,16 @@ SELECT array_length(:'codebook'::REAL[][][], 1); array_length -------------- 1 -(1 row) SELECT array_length(:'codebook'::REAL[][][], 2); array_length -------------- 10 -(1 row) SELECT array_length(:'codebook'::REAL[][][], 3); array_length -------------- 128 -(1 row) -- This should create codebook[32][10][4] SELECT _lantern_internal.create_pq_codebook('sift_base1k'::regclass, 'v', 10, 32, 'l2sq', 0) as codebook \gset @@ -170,19 +161,16 @@ SELECT array_length(:'codebook'::REAL[][][], 1); array_length -------------- 32 -(1 row) SELECT array_length(:'codebook'::REAL[][][], 2); array_length -------------- 10 -(1 row) SELECT array_length(:'codebook'::REAL[][][], 3); array_length -------------- 4 -(1 row) -- This should create codebook _lantern_internal.pq_sift_base1k_v and add v_pq column in sift_base1k table with compressed vectors -- The codebook will be codebook[32][50][4], so in the table there should be 32 distinct subvector ids each with 50 centroid ids @@ -194,31 +182,26 @@ INFO: Compressing vectors... quantize_table ---------------- -(1 row) SELECT COUNT(DISTINCT subvector_id) FROM _lantern_internal.pq_sift_base1k_v; count ------- 32 -(1 row) SELECT COUNT(DISTINCT centroid_id) FROM _lantern_internal.pq_sift_base1k_v; count ------- 50 -(1 row) SELECT COUNT(*) FROM _lantern_internal.pq_sift_base1k_v; count ------- 1600 -(1 row) SELECT array_length(c, 1) FROM _lantern_internal.pq_sift_base1k_v LIMIT 1; array_length -------------- 4 -(1 row) -- Validate that table is readonly \set ON_ERROR_STOP off @@ -246,13 +229,11 @@ SELECT dequantize_vector(:'v1_pq', '_lantern_internal.pq_sift_base1k_v'::regclas ?column? ---------- 0 -(1 row) SELECT l2sq_dist(:'decompressed_1', :'decompressed_2'); l2sq_dist ----------- 0 -(1 row) -- Test recall for quantized vs non quantized vectors ALTER TABLE sift_base1k ADD COLUMN v_pq_dec REAL[]; @@ -264,14 +245,12 @@ SELECT :'recall_diff' < 0.2 as recall_diff_meets_threshold; recall_diff_meets_threshold ----------------------------- t -(1 row) -- Verify that column triggers for insert and update are working correctly INSERT INTO sift_base1k(id, v) VALUES (1001, random_array(128, 0.0, 5.0)); SELECT id FROM sift_base1k WHERE v_pq IS NULL; id ---- -(0 rows) SELECT v_pq::TEXT as old_pq FROM sift_base1k WHERE id=1001 \gset UPDATE sift_base1k SET v=(SELECT v FROM sift_base1k WHERE id=1) WHERE id=1001; @@ -280,20 +259,17 @@ SELECT :'old_pq' <> :'new_pq' as is_updated; is_updated ------------ t -(1 row) SELECT :'new_pq' = (SELECT v_pq::TEXT FROM sift_base1k WHERE id=1) as is_updated; is_updated ------------ t -(1 row) -- Verify that compressed column size is smaller than regular integer SELECT pg_column_size(v_pq) as compressed_size, pg_column_size(v_pq::int[]) as int_size FROM sift_base1k LIMIT 1; compressed_size | int_size -----------------+---------- 37 | 152 -(1 row) -- Verify that table can have multiple quantized vectors SELECT quantize_table('sift_base1k'::regclass, 'v_pq_dec', 10, 32, 'l2sq'); @@ -304,44 +280,37 @@ INFO: Compressing vectors... quantize_table ---------------- -(1 row) SELECT COUNT(DISTINCT subvector_id) FROM _lantern_internal.pq_sift_base1k_v_pq_dec; count ------- 32 -(1 row) SELECT COUNT(DISTINCT centroid_id) FROM _lantern_internal.pq_sift_base1k_v_pq_dec; count ------- 10 -(1 row) SELECT COUNT(*) FROM _lantern_internal.pq_sift_base1k_v_pq_dec; count ------- 320 -(1 row) SELECT array_length(c, 1) FROM _lantern_internal.pq_sift_base1k_v_pq_dec LIMIT 1; array_length -------------- 4 -(1 row) -- Test that resources are being cleared correctly SELECT drop_quantization('sift_base1k'::regclass, 'v'); drop_quantization ------------------- -(1 row) SELECT drop_quantization('sift_base1k'::regclass, 'v_pq_dec'); drop_quantization ------------------- -(1 row) SELECT column_name FROM information_schema.columns WHERE table_schema = 'public' AND table_name = 'sift_base1k'; column_name @@ -349,12 +318,10 @@ SELECT column_name FROM information_schema.columns WHERE table_schema = 'public' id v v_pq_dec -(3 rows) SELECT table_name FROM information_schema.tables WHERE table_schema = '_lantern_internal'; table_name ------------ -(0 rows) -- Test quantization over subset of data SELECT quantize_table('sift_base1k'::regclass, 'v', 10, 32, 'l2sq', 500); @@ -365,31 +332,26 @@ INFO: Compressing vectors... quantize_table ---------------- -(1 row) SELECT COUNT(DISTINCT subvector_id) FROM _lantern_internal.pq_sift_base1k_v; count ------- 32 -(1 row) SELECT COUNT(DISTINCT centroid_id) FROM _lantern_internal.pq_sift_base1k_v; count ------- 10 -(1 row) SELECT COUNT(*) FROM _lantern_internal.pq_sift_base1k_v; count ------- 320 -(1 row) SELECT array_length(c, 1) FROM _lantern_internal.pq_sift_base1k_v LIMIT 1; array_length -------------- 4 -(1 row) -- Test quantization with mixed case and schema qualified table name SELECT id, v AS "v_New" into "sift_Base1k_NEW" FROM sift_base1k; @@ -401,7 +363,6 @@ INFO: Compressing vectors... quantize_table ---------------- -(1 row) SELECT array_length( dequantize_vector( @@ -415,11 +376,9 @@ SELECT array_length( array_length -------------- 128 -(1 row) SELECT drop_quantization('"sift_Base1k_NEW"'::regclass, 'v_New'); drop_quantization ------------------- -(1 row) diff --git a/lantern_hnsw/test/expected/hnsw_pq_index.out b/lantern_hnsw/test/expected/hnsw_pq_index.out index a3be20de6..651c42abf 100644 --- a/lantern_hnsw/test/expected/hnsw_pq_index.out +++ b/lantern_hnsw/test/expected/hnsw_pq_index.out @@ -103,19 +103,16 @@ INFO: Compressing vectors... quantize_table ---------------- -(1 row) SELECT COUNT(DISTINCT subvector_id) FROM _lantern_internal.pq_small_world_pq_v; count ------- 4 -(1 row) SELECT COUNT(DISTINCT centroid_id) FROM _lantern_internal.pq_small_world_pq_v; count ------- 10 -(1 row) ALTER TABLE small_world_pq ADD COLUMN v_pq_dec REAL[]; UPDATE small_world_pq SET v_pq_dec=dequantize_vector(v_pq, '_lantern_internal.pq_small_world_pq_v'); @@ -132,19 +129,16 @@ EXPLAIN (COSTS FALSE) SELECT id, v, v_pq, v_pq_dec FROM small_world_pq ORDER BY Limit -> Index Scan using hnsw_l2_index on small_world_pq Order By: (v <-> '{0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4}'::real[]) -(3 rows) SELECT id FROM small_world_pq ORDER BY v <-> :'v4' LIMIT 1; id ---- 4 -(1 row) SELECT * FROM ldb_get_indexes('small_world_pq'); indexname | size | indexdef | indisvalid ---------------+-------+---------------------------------------------------------------------------------------+------------ hnsw_l2_index | 16 kB | CREATE INDEX hnsw_l2_index ON small_world_pq USING lantern_hnsw (v) WITH (pq='false') | t -(1 row) DROP INDEX hnsw_l2_index; -- index with pq @@ -158,7 +152,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) EXPLAIN (COSTS FALSE) SELECT id, v, v_pq, v_pq_dec, (v <-> :'v4') as dist, (v_pq_dec <-> :'v4') real_dist FROM small_world_pq ORDER BY dist LIMIT 1; QUERY PLAN @@ -166,13 +159,11 @@ EXPLAIN (COSTS FALSE) SELECT id, v, v_pq, v_pq_dec, (v <-> :'v4') as dist, (v_pq Limit -> Index Scan using hnsw_pq_l2_index on small_world_pq Order By: (v <-> '{0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4}'::real[]) -(3 rows) SELECT id FROM small_world_pq ORDER BY v <-> :'v4' LIMIT 1; id ---- 4 -(1 row) ALTER TABLE small_world_pq DROP COLUMN v_pq; ALTER TABLE small_world_pq DROP COLUMN v_pq_dec; @@ -186,7 +177,6 @@ INFO: Compressing vectors... quantize_table ---------------- -(1 row) ALTER TABLE small_world_pq ADD COLUMN v_pq_dec REAL[]; -- GENERATED ALWAYS AS (dequantize_vector("v_pq", '_lanternpq_small_world_pq')) STORED; -- << cannot do because genrated columns cannot refer to other generated columns UPDATE small_world_pq SET v_pq_dec=dequantize_vector(v_pq, '_lantern_internal.pq_small_world_pq_v'); @@ -200,7 +190,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) EXPLAIN (COSTS FALSE) SELECT id, v, v_pq, v_pq_dec, (v <-> :'v4') as dist, (v_pq_dec <-> :'v4') real_dist FROM small_world_pq ORDER BY dist LIMIT 1; QUERY PLAN @@ -208,7 +197,6 @@ EXPLAIN (COSTS FALSE) SELECT id, v, v_pq, v_pq_dec, (v <-> :'v4') as dist, (v_pq Limit -> Index Scan using hnsw_pq_l2_index on small_world_pq Order By: (v <-> '{0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4}'::real[]) -(3 rows) -- add another entry with vector v4, and search for it again INSERT INTO small_world_pq(id, v) VALUES (42, :'v4'); @@ -227,7 +215,6 @@ INFO: Compressing vectors... quantize_table ---------------- -(1 row) ALTER TABLE small_world_pq ADD COLUMN v_pq_dec REAL[]; UPDATE small_world_pq SET v_pq_dec=dequantize_vector(v_pq, '_lantern_internal.pq_small_world_pq_v'); @@ -241,7 +228,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) -- we had inserted a value with id=42 and vector=:'v4' above, before making the table unlogged -- disable these since they are flaky, depending on the the quality of the codebook @@ -261,7 +247,6 @@ INFO: Compressing vectors... quantize_table ---------------- -(1 row) SELECT v as v1 FROM sift_base1k WHERE id=1 \gset SELECT v_pq as v1_pq FROM sift_base1k WHERE id=1 \gset @@ -296,7 +281,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) SELECT calculate_table_recall('sift_base1k', 'sift_query1k', 'sift_truth1k', 'v', 10, 100) as recall_pq_index \gset SELECT (:'recall_pq'::float - :'recall_pq_index'::float)::float as recall_diff \gset @@ -306,7 +290,6 @@ SELECT :recall_diff >= 0 AND :recall_diff <= 0.1 as recall_within_range; recall_within_range --------------------- t -(1 row) -- inserts SELECT v as v2 FROM sift_base1k WHERE id=2 \gset @@ -318,14 +301,12 @@ SELECT SUM(id1002::int) = 1 as contains_id_1002 FROM (SELECT id = 1002 as id1002 contains_id_1002 ------------------ t -(1 row) -- the top two results must be the vectors corresponding to v2 SELECT ARRAY_AGG(id ORDER BY id) FROM (SELECT id FROM sift_base1k ORDER BY v <-> :'v2' LIMIT 2) b; array_agg ----------- {2,1001} -(1 row) -- since codebook are generated each time and are non deterministic, we cannot print them in regression tests. -- run something like the following to view the results diff --git a/lantern_hnsw/test/expected/hnsw_select.out b/lantern_hnsw/test/expected/hnsw_select.out index 09dc1717f..2701ce4cf 100644 --- a/lantern_hnsw/test/expected/hnsw_select.out +++ b/lantern_hnsw/test/expected/hnsw_select.out @@ -46,13 +46,11 @@ SELECT 0 + 1; ?column? ---------- 1 -(1 row) SELECT 1 FROM test1 WHERE id = 0 + 1; ?column? ---------- 1 -(1 row) -- Verify that the index is being used SET _lantern_internal.is_test = true; @@ -69,7 +67,6 @@ DEBUG: LANTERN - --------------------- Limit -> Index Scan using small_world_v_idx on small_world Order By: (v '{1,0,0}'::real[]) -(3 rows) -- Verify that this does not use the index EXPLAIN (COSTS FALSE) SELECT 1 FROM small_world WHERE v = '{0,0,0}'; @@ -77,7 +74,6 @@ EXPLAIN (COSTS FALSE) SELECT 1 FROM small_world WHERE v = '{0,0,0}'; ----------------------------------- Seq Scan on small_world Filter: (v = '{0,0,0}'::real[]) -(2 rows) -- Ensure we can query an index for more elements than the value of init_k WITH neighbors AS ( @@ -94,7 +90,6 @@ DEBUG: LANTERN querying index for 10 elements count ------- 3 -(1 row) WITH neighbors AS ( SELECT * FROM small_world order by v '{1,0,0}' LIMIT 15 @@ -110,7 +105,6 @@ DEBUG: LANTERN querying index for 10 elements count ------- 8 -(1 row) -- Change default k and make sure the number of usearch_searchs makes sense SET lantern_hnsw.init_k = 4; @@ -128,7 +122,6 @@ DEBUG: LANTERN querying index for 4 elements count ------- 3 -(1 row) WITH neighbors AS ( SELECT * FROM small_world order by v '{1,0,0}' LIMIT 15 @@ -146,7 +139,6 @@ DEBUG: LANTERN - querying index for 8 elements count ------- 8 -(1 row) RESET client_min_messages; SET _lantern_internal.is_test = false; @@ -155,56 +147,48 @@ SELECT has_index_scan('EXPLAIN SELECT * FROM small_world WHERE b IS TRUE ORDER B has_index_scan ---------------- t -(1 row) -- Verify that the index is not being used when there is no order by SELECT NOT has_index_scan('EXPLAIN SELECT COUNT(*) FROM small_world'); ?column? ---------- t -(1 row) -- Verify swapping order doesn't change anything and still uses index SELECT has_index_scan('EXPLAIN SELECT id FROM test1 ORDER BY ''{1,2}''::REAL[] v'); has_index_scan ---------------- t -(1 row) -- Verify group by works and uses index SELECT has_index_scan('EXPLAIN WITH t AS (SELECT id FROM test1 ORDER BY ''{1,2}''::REAL[] v LIMIT 1) SELECT id, COUNT(*) FROM t GROUP BY 1'); has_index_scan ---------------- t -(1 row) -- Validate distinct works and uses index SELECT has_index_scan('EXPLAIN WITH t AS (SELECT id FROM test1 ORDER BY v ''{1,2}'' LIMIT 1) SELECT DISTINCT id FROM t'); has_index_scan ---------------- t -(1 row) -- Validate join lateral works and uses index SELECT has_index_scan('EXPLAIN SELECT t1_results.id FROM test2 t2 JOIN LATERAL (SELECT t1.id FROM test1 t1 ORDER BY t2.v t1.v LIMIT 1) t1_results ON TRUE'); has_index_scan ---------------- t -(1 row) -- Validate union works and uses index SELECT has_index_scan('EXPLAIN (SELECT id FROM test1 ORDER BY v ''{1,4}'') UNION (SELECT id FROM test1 ORDER BY v IS NOT NULL LIMIT 1)'); has_index_scan ---------------- t -(1 row) -- Validate CTEs work and still use index SELECT has_index_scan('EXPLAIN WITH t AS (SELECT id FROM test1 ORDER BY v ''{1,4}'') SELECT id FROM t UNION SELECT id FROM t'); has_index_scan ---------------- t -(1 row) -- Validate is replaced with the matching function when an index is present set enable_seqscan = true; @@ -215,7 +199,6 @@ EXPLAIN (COSTS false) SELECT * from small_world ORDER BY v '{1,1,1}'; Sort Sort Key: (l2sq_dist(v, '{1,1,1}'::real[])) -> Seq Scan on small_world -(3 rows) SELECT * from small_world ORDER BY v '{1,1,1}'; id | b | v @@ -228,7 +211,6 @@ SELECT * from small_world ORDER BY v '{1,1,1}'; 001 | t | {0,0,1} 010 | f | {0,1,0} 000 | t | {0,0,0} -(8 rows) begin; INSERT INTO test2 (v) VALUES ('{1,4}'); @@ -243,7 +225,6 @@ EXPLAIN (COSTS false) SELECT * from test2 ORDER BY v '{1,4}'; Sort Sort Key: (cos_dist(v, '{1,4}'::real[])) -> Seq Scan on test2 -(3 rows) -- Some additional cases that trigger operator rewriting -- SampleScan @@ -254,7 +235,6 @@ EXPLAIN (COSTS false) SELECT * FROM small_world TABLESAMPLE BERNOULLI (20) ORDER Sort Key: (l2sq_dist(v, '{1,1,1}'::real[])) -> Sample Scan on small_world Sampling: bernoulli ('20'::real) -(4 rows) -- can't compare direct equality here because it's random SELECT results_match('EXPLAIN SELECT * FROM small_world TABLESAMPLE BERNOULLI (20) ORDER BY v ''{1,1,1}'' ASC', @@ -262,7 +242,6 @@ SELECT results_match('EXPLAIN SELECT * FROM small_world TABLESAMPLE BERNOULLI (2 results_match --------------- t -(1 row) -- SetOpt/HashSetOp EXPLAIN (COSTS false) (SELECT * FROM small_world ORDER BY v '{1,0,1}' ASC ) EXCEPT (SELECT * FROM small_world ORDER by v '{1,1,1}' ASC LIMIT 5); @@ -279,14 +258,12 @@ EXPLAIN (COSTS false) (SELECT * FROM small_world ORDER BY v '{1,0,1}' ASC ) -> Sort Sort Key: (l2sq_dist(small_world_1.v, '{1,1,1}'::real[])) -> Seq Scan on small_world small_world_1 -(11 rows) SELECT results_match('(SELECT * FROM small_world ORDER BY v ''{1,0,1}'' ASC ) EXCEPT (SELECT * FROM small_world ORDER by v ''{1,1,1}'' ASC LIMIT 5)', '(SELECT * FROM small_world ORDER BY l2sq_dist(v, ''{1,0,1}'') ASC ) EXCEPT (SELECT * FROM small_world ORDER by l2sq_dist(v, ''{1,1,1}'') ASC LIMIT 5)'); results_match --------------- t -(1 row) -- HashAggregate EXPLAIN (COSTS false) SELECT v, COUNT(*) FROM small_world GROUP BY v ORDER BY v '{1,1,1}'; @@ -297,14 +274,12 @@ EXPLAIN (COSTS false) SELECT v, COUNT(*) FROM small_world GROUP BY v ORDER BY v -> HashAggregate Group Key: v -> Seq Scan on small_world -(5 rows) SELECT results_match('SELECT v, COUNT(*) FROM small_world GROUP BY v ORDER BY v ''{1,1,1}''', 'SELECT v, COUNT(*) FROM small_world GROUP BY v ORDER BY l2sq_dist(v, ''{1,1,1}'')'); results_match --------------- t -(1 row) -- GroupBy this EXPLAIN (COSTS false) SELECT * FROM small_world GROUP BY id, v, b ORDER BY v '{1,1,1}'; @@ -315,14 +290,12 @@ EXPLAIN (COSTS false) SELECT * FROM small_world GROUP BY id, v, b ORDER BY v -> HashAggregate Group Key: id, v, b -> Seq Scan on small_world -(5 rows) SELECT results_match('SELECT * FROM small_world GROUP BY id, v, b ORDER BY v ''{1,1,1}''', 'SELECT * FROM small_world GROUP BY id, v, b ORDER BY l2sq_dist(v, ''{1,1,1}'')'); results_match --------------- t -(1 row) -- HashJoin/Hash CREATE TABLE small_world_2 AS (SELECT * FROM small_world); @@ -336,14 +309,12 @@ EXPLAIN (COSTS false) SELECT * FROM small_world JOIN small_world_2 using (v) ORD -> Seq Scan on small_world_2 -> Hash -> Seq Scan on small_world -(7 rows) SELECT results_match('SELECT * FROM small_world JOIN small_world_2 using (v) ORDER BY v ''{1,1,1}''', 'SELECT * FROM small_world JOIN small_world_2 using (v) ORDER BY l2sq_dist(v, ''{1,1,1}'')'); results_match --------------- t -(1 row) -- MixedAggregate (this doesn't require additional logic, but I include it here as an example of generating the path) EXPLAIN (COSTS false) SELECT v FROM small_world GROUP BY ROLLUP(v) ORDER BY v '{1,1,1}'; @@ -355,14 +326,12 @@ EXPLAIN (COSTS false) SELECT v FROM small_world GROUP BY ROLLUP(v) ORDER BY v Seq Scan on small_world -(6 rows) SELECT results_match('SELECT v FROM small_world GROUP BY ROLLUP(v) ORDER BY v ''{1,1,1}''', 'SELECT v FROM small_world GROUP BY ROLLUP(v) ORDER BY l2sq_dist(v, ''{1,1,1}'')'); results_match --------------- t -(1 row) -- WindowAgg EXPLAIN (COSTS false) SELECT v, EVERY(b) OVER () FROM small_world ORDER BY v '{1,1,1}'; @@ -372,14 +341,12 @@ EXPLAIN (COSTS false) SELECT v, EVERY(b) OVER () FROM small_world ORDER BY v Sort Key: (l2sq_dist(v, '{1,1,1}'::real[])) -> WindowAgg -> Seq Scan on small_world -(4 rows) SELECT results_match('SELECT v, EVERY(b) OVER () FROM small_world ORDER BY v ''{1,1,1}''', 'SELECT v, EVERY(b) OVER () FROM small_world ORDER BY l2sq_dist(v, ''{1,1,1}'')'); results_match --------------- t -(1 row) -- LockRows EXPLAIN (COSTS false) SELECT * FROM small_world ORDER BY v '{1,1,1}' ASC FOR UPDATE; @@ -389,14 +356,12 @@ EXPLAIN (COSTS false) SELECT * FROM small_world ORDER BY v '{1,1,1}' ASC FOR -> Sort Sort Key: (l2sq_dist(v, '{1,1,1}'::real[])) -> Seq Scan on small_world -(4 rows) SELECT results_match('SELECT * FROM small_world ORDER BY v ''{1,1,1}'' ASC FOR UPDATE', 'SELECT * FROM small_world ORDER BY l2sq_dist(v, ''{1,1,1}'') ASC FOR UPDATE'); results_match --------------- t -(1 row) rollback; set enable_indexscan = true; @@ -434,7 +399,6 @@ SELECT fill_same(); fill_same ----------- -(1 row) CREATE INDEX hnsw_l2_index_repeat ON small_world_repeat USING lantern_hnsw(v); INFO: done init usearch index @@ -454,7 +418,6 @@ explain (costs false) select id, ARRAY_AGG(dist) as dists, count(id) as cnt from -> Limit -> Index Scan using hnsw_l2_index_repeat on small_world_repeat Order By: (v <-> '{0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4}'::real[]) -(8 rows) select case when s.cnt > 1 then 'incorrect' else 'correct' end from ( select id, ARRAY_AGG(dist) as dists, count(id) as cnt from (select id, (v <-> ARRAY[0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4]) as dist FROM small_world_repeat order by dist LIMIT 200) b group by id order by cnt DESC, dists, id limit 10 @@ -471,7 +434,6 @@ explain (costs false) select id, ARRAY_AGG(dist) as dists, count(id) as cnt from correct correct correct -(10 rows) set lantern_hnsw.init_k=200; select id, ARRAY_AGG(dist) as dists, count(id) as cnt from (select id, (v <-> ARRAY[0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4]) as dist FROM small_world_repeat order by dist LIMIT 200) b group by id order by cnt DESC, dists, id limit 10; @@ -487,7 +449,6 @@ set lantern_hnsw.init_k=200; 1007 | {0} | 1 1008 | {0} | 1 1009 | {0} | 1 -(10 rows) -- todo:: Verify joins work and still use index -- todo:: Verify incremental sorts work @@ -498,7 +459,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) SELECT _lantern_internal.validate_index('sift_base1k_v_idx', false); INFO: validate_index() start for sift_base1k_v_idx @@ -506,7 +466,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) SELECT _lantern_internal.validate_index('test1_v_idx', false); INFO: validate_index() start for test1_v_idx @@ -514,5 +473,4 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) diff --git a/lantern_hnsw/test/expected/hnsw_sq.out b/lantern_hnsw/test/expected/hnsw_sq.out index f7ea4198d..67cc68d44 100644 --- a/lantern_hnsw/test/expected/hnsw_sq.out +++ b/lantern_hnsw/test/expected/hnsw_sq.out @@ -47,7 +47,6 @@ SELECT * FROM ldb_get_indexes('sift_base1k'); -----------+--------+---------------------------------------------------------------------------------------------------+------------ ind16 | 400 kB | CREATE INDEX ind16 ON sift_base1k USING lantern_hnsw (v) WITH (dim='128', m='8', quant_bits='16') | t ind32 | 680 kB | CREATE INDEX ind32 ON sift_base1k USING lantern_hnsw (v) WITH (dim='128', m='8', quant_bits='32') | t -(2 rows) SELECT v as v42 from sift_base1k WHERE id = 42 \gset BEGIN; @@ -59,7 +58,6 @@ EXPLAIN (COSTS FALSE) SELECT id, ROUND((v <-> :'v42')::numeric, 1) as dist FROM Limit -> Index Scan using ind32 on sift_base1k Order By: (v <-> '{1,0,0,0,0,0,21,35,1,0,0,0,0,77,51,42,66,2,0,0,0,86,140,71,52,1,0,0,0,0,23,70,2,0,0,0,0,64,73,50,11,0,0,0,0,140,97,18,140,64,0,0,0,99,51,65,78,11,0,0,0,0,41,76,0,0,0,0,0,124,82,2,48,1,0,0,0,118,31,5,140,21,0,0,0,4,12,78,12,0,0,0,0,0,58,117,1,0,0,0,2,25,7,2,46,2,0,0,1,12,4,8,140,9,0,0,0,1,8,16,3,0,0,0,0,0,21,34}'::real[]) -(3 rows) SELECT id, ROUND((v <-> :'v42')::numeric, 1) as dist, l2sq_dist(v, :'v42') FROM sift_base1k ORDER BY v <-> :'v42' LIMIT 10; id | dist | l2sq_dist @@ -74,7 +72,6 @@ EXPLAIN (COSTS FALSE) SELECT id, ROUND((v <-> :'v42')::numeric, 1) as dist FROM 340 | 87261.0 | 87261 331 | 87796.0 | 87796 682 | 94988.0 | 94988 -(10 rows) ROLLBACK; DROP INDEX ind32, ind16; @@ -94,7 +91,6 @@ SELECT * FROM ldb_get_indexes('sift_base1k'); indexname | size | indexdef | indisvalid -----------+--------+-------------------------------------------------------------------------------------------------------------+------------ ind8 | 272 kB | CREATE INDEX ind8 ON sift_base1k USING lantern_hnsw (v_transformed) WITH (dim='128', m='8', quant_bits='8') | t -(1 row) EXPLAIN SELECT id, ROUND((v_transformed <-> :'v_transformed')::numeric, 1) as dist FROM sift_base1k ORDER BY v_transformed <-> :'v_transformed' LIMIT 10; QUERY PLAN @@ -102,7 +98,6 @@ EXPLAIN SELECT id, ROUND((v_transformed <-> :'v_transformed')::numeric, 1) as di Limit (cost=0.00..9.26 rows=10 width=40) -> Index Scan using ind8 on sift_base1k (cost=0.00..926.29 rows=1000 width=40) Order By: (v_transformed <-> '{-0.49,-0.5,-0.5,-0.5,-0.5,-0.5,-0.29,-0.15,-0.49,-0.5,-0.5,-0.5,-0.5,0.27,0.01,-0.08,0.16,-0.48,-0.5,-0.5,-0.5,0.36,0.9,0.21,0.02,-0.49,-0.5,-0.5,-0.5,-0.5,-0.27,0.2,-0.48,-0.5,-0.5,-0.5,-0.5,0.14,0.23,0,-0.39,-0.5,-0.5,-0.5,-0.5,0.9,0.47,-0.32,0.9,0.14,-0.5,-0.5,-0.5,0.49,0.01,0.15,0.28,-0.39,-0.5,-0.5,-0.5,-0.5,-0.09,0.26,-0.5,-0.5,-0.5,-0.5,-0.5,0.74,0.32,-0.48,-0.02,-0.49,-0.5,-0.5,-0.5,0.68,-0.19,-0.45,0.9,-0.29,-0.5,-0.5,-0.5,-0.46,-0.38,0.28,-0.38,-0.5,-0.5,-0.5,-0.5,-0.5,0.08,0.67,-0.49,-0.5,-0.5,-0.5,-0.48,-0.25,-0.43,-0.48,-0.04,-0.48,-0.5,-0.5,-0.49,-0.38,-0.46,-0.42,0.9,-0.41,-0.5,-0.5,-0.5,-0.49,-0.42,-0.34,-0.47,-0.5,-0.5,-0.5,-0.5,-0.5,-0.29,-0.16}'::real[]) -(3 rows) SELECT id, ROUND((v_transformed <-> :'v_transformed')::numeric, 1) as dist FROM sift_base1k ORDER BY v_transformed <-> :'v_transformed' LIMIT 10; id | dist @@ -117,13 +112,11 @@ EXPLAIN SELECT id, ROUND((v_transformed <-> :'v_transformed')::numeric, 1) as di 340 | 8.7 331 | 8.8 682 | 9.5 -(10 rows) DROP INDEX ind8; SELECT * FROM ldb_get_indexes('sift_base1k'); indexname | size | indexdef | indisvalid -----------+------+----------+------------ -(0 rows) SELECT v_transformed as v_transformed42 from sift_base1k WHERE id = 42 \gset CREATE INDEX ind1 ON sift_base1k USING lantern_hnsw (v_transformed) WITH (dim=128, M=8, quant_bits=1); @@ -134,7 +127,6 @@ SELECT * FROM ldb_get_indexes('sift_base1k'); indexname | size | indexdef | indisvalid -----------+--------+-------------------------------------------------------------------------------------------------------------+------------ ind1 | 160 kB | CREATE INDEX ind1 ON sift_base1k USING lantern_hnsw (v_transformed) WITH (dim='128', m='8', quant_bits='1') | t -(1 row) EXPLAIN SELECT id, ROUND((v_transformed <-> :'v_transformed42')::numeric, 1) as dist FROM sift_base1k ORDER BY v_transformed <-> :'v_transformed42' LIMIT 4; QUERY PLAN @@ -142,7 +134,6 @@ EXPLAIN SELECT id, ROUND((v_transformed <-> :'v_transformed42')::numeric, 1) as Limit (cost=0.00..3.64 rows=4 width=40) -> Index Scan using ind1 on sift_base1k (cost=0.00..910.50 rows=1000 width=40) Order By: (v_transformed <-> '{-0.49,-0.5,-0.5,-0.5,-0.5,-0.5,-0.29,-0.15,-0.49,-0.5,-0.5,-0.5,-0.5,0.27,0.01,-0.08,0.16,-0.48,-0.5,-0.5,-0.5,0.36,0.9,0.21,0.02,-0.49,-0.5,-0.5,-0.5,-0.5,-0.27,0.2,-0.48,-0.5,-0.5,-0.5,-0.5,0.14,0.23,0,-0.39,-0.5,-0.5,-0.5,-0.5,0.9,0.47,-0.32,0.9,0.14,-0.5,-0.5,-0.5,0.49,0.01,0.15,0.28,-0.39,-0.5,-0.5,-0.5,-0.5,-0.09,0.26,-0.5,-0.5,-0.5,-0.5,-0.5,0.74,0.32,-0.48,-0.02,-0.49,-0.5,-0.5,-0.5,0.68,-0.19,-0.45,0.9,-0.29,-0.5,-0.5,-0.5,-0.46,-0.38,0.28,-0.38,-0.5,-0.5,-0.5,-0.5,-0.5,0.08,0.67,-0.49,-0.5,-0.5,-0.5,-0.48,-0.25,-0.43,-0.48,-0.04,-0.48,-0.5,-0.5,-0.49,-0.38,-0.46,-0.42,0.9,-0.41,-0.5,-0.5,-0.5,-0.49,-0.42,-0.34,-0.47,-0.5,-0.5,-0.5,-0.5,-0.5,-0.29,-0.16}'::real[]) -(3 rows) SELECT id, ROUND((v_transformed <-> :'v_transformed42')::numeric, 1) as dist FROM sift_base1k ORDER BY v_transformed <-> :'v_transformed42' LIMIT 4; id | dist @@ -151,6 +142,5 @@ EXPLAIN SELECT id, ROUND((v_transformed <-> :'v_transformed42')::numeric, 1) as 36 | 1.1 886 | 7.2 340 | 8.7 -(4 rows) -- test on 2000+ dim vectors diff --git a/lantern_hnsw/test/expected/hnsw_todo.out b/lantern_hnsw/test/expected/hnsw_todo.out index 8b701d53e..6cecd9231 100644 --- a/lantern_hnsw/test/expected/hnsw_todo.out +++ b/lantern_hnsw/test/expected/hnsw_todo.out @@ -27,7 +27,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) -- this should be supported CREATE INDEX ON small_world_l2 USING lantern_hnsw (vector_int dist_l2sq_int_ops); @@ -46,7 +45,6 @@ ORDER BY vector_int array[0,1,0] LIMIT 7; -> Sort Sort Key: (l2sq_dist(vector_int, '{0,1,0}'::integer[])) -> Seq Scan on small_world_l2 -(5 rows) --- Test scenarious --- ----------------------------------------- @@ -77,14 +75,12 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) -- The 1001 and 1002 vectors will be ignored in search, so the first row will not be 0 in result SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 1; round ----------- 249249.00 -(1 row) -- Case: -- Index is created externally @@ -108,7 +104,6 @@ INFO: validate_index() done, no issues found. validate_index ---------------- -(1 row) -- The first row will not be 0 now as the vector under id=777 was updated to 1,1,1,1... but it was indexed with different vector -- So the usearch index can not find 1,1,1,1,1.. vector in the index and wrong results will be returned @@ -117,7 +112,6 @@ SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v round ----------- 249249.00 -(1 row) ---- Query on expression based index is failing to check correct operator usage -------- CREATE OR REPLACE FUNCTION int_to_fixed_binary_real_array(n INT) RETURNS REAL[] AS $$ diff --git a/lantern_hnsw/test/expected/hnsw_vector.out b/lantern_hnsw/test/expected/hnsw_vector.out index 6fa1cc9eb..fd78e445a 100644 --- a/lantern_hnsw/test/expected/hnsw_vector.out +++ b/lantern_hnsw/test/expected/hnsw_vector.out @@ -13,7 +13,6 @@ SELECT '[1,2,3]'::vector; vector --------- [1,2,3] -(1 row) -- Test index creation x2 on empty table and subsequent inserts CREATE TABLE items (id SERIAL PRIMARY KEY, trait_ai VECTOR(3)); @@ -34,7 +33,6 @@ SELECT * FROM items ORDER BY trait_ai '[0,0,0]' LIMIT 3; 1 | [1,2,3] 2 | [4,5,6] 3 | [6,7,8] -(3 rows) SELECT * FROM ldb_get_indexes('items'); indexname | size | indexdef | indisvalid @@ -42,7 +40,6 @@ SELECT * FROM ldb_get_indexes('items'); items_pkey | 16 kB | CREATE UNIQUE INDEX items_pkey ON items USING btree (id) | t items_trait_ai_idx | 16 kB | CREATE INDEX items_trait_ai_idx ON items USING lantern_hnsw (trait_ai) WITH (dim='3', m='2') | t items_trait_ai_idx1 | 16 kB | CREATE INDEX items_trait_ai_idx1 ON items USING lantern_hnsw (trait_ai) WITH (dim='3', m='4') | t -(3 rows) -- Test index creation on table with existing data \ir utils/small_world_vector.sql @@ -69,7 +66,6 @@ SELECT * FROM ldb_get_indexes('small_world'); indexname | size | indexdef | indisvalid -------------------+-------+---------------------------------------------------------------------------------------------------------------------------+------------ small_world_v_idx | 16 kB | CREATE INDEX small_world_v_idx ON small_world USING lantern_hnsw (v) WITH (dim='3', m='5', ef='20', ef_construction='20') | t -(1 row) INSERT INTO small_world (v) VALUES ('[99,99,2]'); INSERT INTO small_world (v) VALUES (NULL); @@ -85,7 +81,6 @@ FROM small_world ORDER BY v '[0,1,0]'::VECTOR LIMIT 7; 2.00 2.00 2.00 -(7 rows) EXPLAIN (COSTS FALSE) SELECT ROUND(l2sq_dist(v, '[0,1,0]'::VECTOR)::numeric, 2) as dist FROM small_world ORDER BY v '[0,1,0]'::VECTOR LIMIT 7; @@ -94,7 +89,6 @@ FROM small_world ORDER BY v '[0,1,0]'::VECTOR LIMIT 7; Limit -> Index Scan using small_world_v_idx on small_world Order By: (v '[0,1,0]'::vector) -(3 rows) SELECT ROUND(l2sq_dist(v, '[0,1,0]'::VECTOR)::numeric, 2) as dist FROM small_world ORDER BY v '[0,1,0]'::VECTOR LIMIT 7; @@ -107,7 +101,6 @@ FROM small_world ORDER BY v '[0,1,0]'::VECTOR LIMIT 7; 2.00 2.00 2.00 -(7 rows) EXPLAIN (COSTS FALSE) SELECT ROUND(l2sq_dist(v, '[0,1,0]'::VECTOR)::numeric, 2) as dist FROM small_world ORDER BY v '[0,1,0]'::VECTOR LIMIT 7; @@ -116,7 +109,6 @@ FROM small_world ORDER BY v '[0,1,0]'::VECTOR LIMIT 7; Limit -> Index Scan using small_world_v_idx on small_world Order By: (v '[0,1,0]'::vector) -(3 rows) -- Verify that index creation on a large vector produces an error CREATE TABLE large_vector (v VECTOR(2001)); @@ -141,7 +133,6 @@ EXPLAIN (COSTS FALSE) SELECT * FROM sift_base10k ORDER BY v :'v4444' LIMIT 1 Limit -> Index Scan using hnsw_idx on sift_base10k Order By: (v '[55,61,11,4,5,2,13,24,65,49,13,9,23,37,94,38,54,11,14,14,40,31,50,44,53,4,0,0,27,17,8,34,12,10,4,4,22,52,68,53,9,2,0,0,2,116,119,64,119,2,0,0,2,30,119,119,116,5,0,8,47,9,5,60,7,7,10,23,56,50,23,5,28,68,6,18,24,65,50,9,119,75,3,0,1,8,12,85,119,11,4,6,8,9,5,74,25,11,8,20,18,12,2,21,11,90,25,32,33,15,2,9,84,67,8,4,22,31,11,33,119,30,3,6,0,0,0,26]'::vector) -(3 rows) -- Ensure we can query an index for more elements than the value of init_k SET lantern_hnsw.init_k = 4; @@ -151,7 +142,6 @@ WITH neighbors AS ( count ------- 3 -(1 row) WITH neighbors AS ( SELECT * FROM small_world order by v '[1,0,0]' LIMIT 15 @@ -159,7 +149,6 @@ WITH neighbors AS ( count ------- 9 -(1 row) RESET client_min_messages; \set ON_ERROR_STOP off @@ -204,7 +193,6 @@ EXPLAIN (COSTS FALSE) SELECT id FROM small_world_arr ORDER BY v ARRAY[0,0,0] -------------------------------------------- Index Scan using l2_idx on small_world_arr Order By: (v '{0,0,0}'::real[]) -(2 rows) SELECT id FROM small_world_arr ORDER BY v ARRAY[0,0,0]; id @@ -212,7 +200,6 @@ SELECT id FROM small_world_arr ORDER BY v ARRAY[0,0,0]; 1 2 3 -(3 rows) DROP INDEX l2_idx; CREATE INDEX cos_idx ON small_world_arr USING lantern_hnsw(v) WITH (m=2); @@ -225,7 +212,6 @@ SELECT id FROM small_world_arr ORDER BY v ARRAY[0,0,0]; 1 2 3 -(3 rows) DROP INDEX cos_idx; CREATE INDEX ham_idx ON small_world_arr USING lantern_hnsw(v) WITH (m=3); @@ -238,7 +224,6 @@ SELECT id FROM small_world_arr ORDER BY v ARRAY[0,0,0]; 1 2 3 -(3 rows) -- Test pgvector in lantern.pgvector_compat=TRUE mode DROP TABLE small_world; @@ -280,7 +265,6 @@ FROM small_world ORDER BY v <-> '[0,1,0]'::VECTOR LIMIT 7; 2.00 2.00 2.00 -(7 rows) EXPLAIN (COSTS FALSE) SELECT ROUND(l2sq_dist(v, '[0,1,0]'::VECTOR)::numeric, 2) as dist FROM small_world ORDER BY v <-> '[0,1,0]'::VECTOR LIMIT 7; @@ -289,7 +273,6 @@ FROM small_world ORDER BY v <-> '[0,1,0]'::VECTOR LIMIT 7; Limit -> Index Scan using l2_idx on small_world Order By: (v <-> '[0,1,0]'::vector) -(3 rows) -- cosine index CREATE INDEX cos_idx ON small_world USING lantern_hnsw (v dist_vec_cos_ops); @@ -307,7 +290,6 @@ FROM small_world ORDER BY v <=> '[0,1,0]'::VECTOR LIMIT 7; 1.00 1.00 1.00 -(7 rows) EXPLAIN (COSTS FALSE) SELECT ROUND(cos_dist(v, '[0,1,0]'::VECTOR)::numeric, 2) as dist FROM small_world ORDER BY v <=> '[0,1,0]'::VECTOR LIMIT 7; @@ -316,5 +298,4 @@ FROM small_world ORDER BY v <=> '[0,1,0]'::VECTOR LIMIT 7; Limit -> Index Scan using cos_idx on small_world Order By: (v <=> '[0,1,0]'::vector) -(3 rows) diff --git a/lantern_hnsw/test/expected/missing_outer_snapshot_portal.out b/lantern_hnsw/test/expected/missing_outer_snapshot_portal.out index f43e8c2ec..5cf949713 100644 --- a/lantern_hnsw/test/expected/missing_outer_snapshot_portal.out +++ b/lantern_hnsw/test/expected/missing_outer_snapshot_portal.out @@ -26,11 +26,9 @@ EXPLAIN (COSTS false) SELECT COUNT(*) FROM ourtable; Workers Planned: 4 -> Partial Aggregate -> Parallel Seq Scan on ourtable -(5 rows) SELECT COUNT(*) FROM ourtable; count ------- 10000 -(1 row) diff --git a/lantern_hnsw/test/expected/weighted_search.out b/lantern_hnsw/test/expected/weighted_search.out index a58c7142d..b9d9f6551 100644 --- a/lantern_hnsw/test/expected/weighted_search.out +++ b/lantern_hnsw/test/expected/weighted_search.out @@ -29,7 +29,6 @@ SELECT id, ROUND((v <-> :'v4')::numeric, 2) as dist FROM sift_Base1k ORDER BY v 183 | 259.18 254 | 263.45 116 | 264.64 -(10 rows) SELECT id, ROUND((v <-> :'v44')::numeric, 2) as dist FROM sift_Base1k ORDER BY v <-> :'v44' LIMIT 10; id | dist @@ -44,14 +43,12 @@ SELECT id, ROUND((v <-> :'v44')::numeric, 2) as dist FROM sift_Base1k ORDER BY v 950 | 338.17 744 | 343.25 539 | 344.02 -(10 rows) -- Make sure the function does not modify the global hnsw.ef_search SHOW hnsw.ef_search; hnsw.ef_search ---------------- 40 -(1 row) SELECT id, ROUND((v <-> :'v4')::numeric, 2) as v4_dist, ROUND((v <-> :'v44')::numeric, 2) v44_dist FROM lantern.weighted_vector_search(CAST(NULL as "sift_base1k"), max_dist => 750., debug_output => true, exact => false, @@ -170,7 +167,6 @@ WARNING: weighted vector search explain: [ 44 | 634.43 | 0.00 2 | 122.45 | 611.24 15 | 141.39 | 607.78 -(4 rows) SELECT id, ROUND((v <-> :'v4')::numeric, 2) as v4_dist, ROUND((v <-> :'v44')::numeric, 2) v44_dist FROM lantern.weighted_vector_search(CAST(NULL as "sift_base1k"), max_dist => 750., debug_output => true, exact => true, @@ -205,14 +201,12 @@ WARNING: weighted vector search explain(exact=true): [ 44 | 634.43 | 0.00 2 | 122.45 | 611.24 15 | 141.39 | 607.78 -(4 rows) -- Make sure the function does not modify the global hnsw.ef_search SHOW hnsw.ef_search; hnsw.ef_search ---------------- 40 -(1 row) SELECT *, 0.03 * v4_dist + 0.45 * v44_dist + 0.52 * v444_dist as weighted_dist FROM (SELECT id, ROUND((v <-> :'v4')::numeric, 2) as v4_dist, ROUND((v <-> :'v44')::numeric, 2) v44_dist, ROUND((v <-> :'v444')::numeric, 2) v444_dist @@ -361,7 +355,6 @@ WARNING: weighted vector search explain: [ 830 | 437.72 | 586.69 | 214.16 | 388.5053 77 | 615.89 | 218.82 | 595.58 | 426.6473 76 | 316.78 | 569.97 | 313.14 | 428.8227 -(5 rows) SELECT *, 0.03 * v4_dist + 0.45 * v44_dist + 0.52 * v444_dist as weighted_dist FROM (SELECT id, ROUND((v <-> :'v4')::numeric, 2) as v4_dist, ROUND((v <-> :'v44')::numeric, 2) v44_dist, ROUND((v <-> :'v444')::numeric, 2) v444_dist @@ -400,7 +393,6 @@ WARNING: weighted vector search explain(exact=true): [ 830 | 437.72 | 586.69 | 214.16 | 388.5053 77 | 615.89 | 218.82 | 595.58 | 426.6473 76 | 316.78 | 569.97 | 313.14 | 428.8227 -(5 rows) -- when max_dist is not specified, number of returned values dicreases with smaller ef SELECT count(*) @@ -433,7 +425,6 @@ WARNING: weighted vector search explain(exact=true): [ count ------- 1000 -(1 row) SELECT count(*) FROM lantern.weighted_vector_search(CAST(NULL as "sift_base1k"), exact => false, ef => 100, -- default @@ -444,7 +435,6 @@ SELECT count(*) count ------- 272 -(1 row) SELECT count(*) FROM lantern.weighted_vector_search(CAST(NULL as "sift_base1k"), exact => false, ef => 10, @@ -455,7 +445,6 @@ SELECT count(*) count ------- 30 -(1 row) SELECT count(*) FROM lantern.weighted_vector_search(CAST(NULL as "sift_base1k"), exact => false, ef => 5, @@ -466,7 +455,6 @@ SELECT count(*) count ------- 15 -(1 row) CREATE INDEX ON sift_base1k USING hnsw (v vector_cosine_ops) WITH (M=5, ef_construction=128); SELECT count(*) @@ -478,7 +466,6 @@ SELECT count(*) count ------- 15 -(1 row) -- test the API-shortcut helper (should be same as the one above) SELECT count(*) @@ -490,7 +477,6 @@ SELECT count(*) count ------- 15 -(1 row) -- Make sure API still works when the table stores real[] for vectors ALTER TABLE sift_base1k ADD COLUMN v_real real[]; @@ -509,7 +495,6 @@ SELECT count(*) count ------- 15 -(1 row) SELECT count(*) FROM lantern.weighted_vector_search_cos(CAST(NULL as "sift_base1k"), exact => false, ef => 5, @@ -520,7 +505,6 @@ SELECT count(*) count ------- 15 -(1 row) -- create non superuser and test the function SET client_min_messages = WARNING; @@ -540,5 +524,4 @@ SELECT count(*) count ------- 15 -(1 row) diff --git a/lantern_hnsw/test/parallel/expected/begin.out b/lantern_hnsw/test/parallel/expected/begin.out index 87c9640f4..560fc4c67 100644 --- a/lantern_hnsw/test/parallel/expected/begin.out +++ b/lantern_hnsw/test/parallel/expected/begin.out @@ -4,7 +4,6 @@ SELECT pg_reload_conf(); pg_reload_conf ---------------- t -(1 row) \ir utils/sift10k_array.sql CREATE TABLE IF NOT EXISTS sift_base10k ( @@ -34,7 +33,6 @@ SELECT id, ROUND((v <-> :'v444')::numeric, 2) FROM sift_base10k ORDER BY v <-> : 1336 | 73429.00 2654 | 78240.00 7642 | 78451.00 -(6 rows) CREATE INDEX to_be_reindexed ON sift_base10k USING lantern_hnsw (v) WITH (M=7, ef=20, ef_construction=20); INFO: done init usearch index @@ -49,7 +47,6 @@ SELECT id, ROUND((v <-> :'v444')::numeric, 2) FROM sift_base10k ORDER BY v <-> : 1336 | 73429.00 2654 | 78240.00 9185 | 78983.00 -(6 rows) REINDEX INDEX CONCURRENTLY to_be_reindexed; INFO: done init usearch index diff --git a/lantern_hnsw/test/parallel/expected/end.out b/lantern_hnsw/test/parallel/expected/end.out index 59130eafc..85a959b71 100644 --- a/lantern_hnsw/test/parallel/expected/end.out +++ b/lantern_hnsw/test/parallel/expected/end.out @@ -3,11 +3,9 @@ SELECT COUNT(*) FROM sift_base10k; count ------- 10030 -(1 row) SELECT * from sift_base10k WHERE id=4444; id | v ------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 4444 | {55,61,11,4,5,2,13,24,65,49,13,9,23,37,94,38,54,11,14,14,40,31,50,44,53,4,0,0,27,17,8,34,12,10,4,4,22,52,68,53,9,2,0,0,2,116,119,64,119,2,0,0,2,30,119,119,116,5,0,8,47,9,5,60,7,7,10,23,56,50,23,5,28,68,6,18,24,65,50,9,119,75,3,0,1,8,12,85,119,11,4,6,8,9,5,74,25,11,8,20,18,12,2,21,11,90,25,32,33,15,2,9,84,67,8,4,22,31,11,33,119,30,3,6,0,0,0,26} -(1 row) diff --git a/lantern_hnsw/test/parallel/expected/select.out b/lantern_hnsw/test/parallel/expected/select.out index c807d51e5..26d898359 100644 --- a/lantern_hnsw/test/parallel/expected/select.out +++ b/lantern_hnsw/test/parallel/expected/select.out @@ -10,7 +10,6 @@ EXPLAIN (COSTS false) SELECT id FROM sift_base10k ORDER BY v <-> :'v1111' ASC Limit -> Index Scan using to_be_reindexed on sift_base10k Order By: (v <-> '{21,24,5,0,0,26,22,6,16,16,10,9,0,18,114,19,13,13,9,1,2,53,111,19,39,32,5,0,4,9,10,13,6,10,8,0,2,130,77,4,2,0,0,0,3,130,130,11,130,0,0,0,0,37,130,84,130,5,0,1,17,11,4,28,17,39,3,3,30,77,28,3,20,0,0,1,49,125,13,7,130,6,0,0,0,5,11,61,130,2,0,1,12,84,48,73,1,12,2,0,31,57,9,2,16,12,1,0,32,36,0,1,63,6,3,1,0,0,24,51,9,0,0,0,0,44,88,48}'::real[]) -(3 rows) -- Do the queries -- Make sure the new delete hook works to fix concurrent builds in 0.2.5->0.2.6 @@ -27,29 +26,24 @@ SELECT id, ROUND((v <-> :'v444')::numeric, 2) FROM sift_base10k ORDER BY v <-> : 1336 | 73429.00 2654 | 78240.00 9185 | 78983.00 -(6 rows) SELECT id FROM sift_base10k ORDER BY v <-> :'v1111' ASC LIMIT 1; id ------ 1111 -(1 row) SELECT id FROM sift_base10k ORDER BY v <-> :'v2222' ASC LIMIT 1; id ------ 2222 -(1 row) SELECT id FROM sift_base10k ORDER BY v <-> :'v3333' ASC LIMIT 1; id ------ 3333 -(1 row) SELECT id FROM sift_base10k ORDER BY v <-> :'v4444' ASC LIMIT 1; id ------ 4444 -(1 row) diff --git a/lantern_hnsw/test/test_runner.sh b/lantern_hnsw/test/test_runner.sh index d7a3648f6..46473c1de 100755 --- a/lantern_hnsw/test/test_runner.sh +++ b/lantern_hnsw/test/test_runner.sh @@ -37,6 +37,7 @@ function run_regression_test { # Exclude debug/inconsistent output from psql # So tests will always have the same output psql -U ${DB_USER} \ + -P footer=off \ -v ON_ERROR_STOP=1 \ -v VERBOSITY=terse \ -v ECHO=all \ @@ -47,6 +48,8 @@ function run_regression_test { -e 's! time=[0-9]\+\.[0-9]\+\.\.[0-9]\+\.[0-9]\+!!' | \ grep -v 'DEBUG: rehashing catalog cache id' | \ grep -v 'WARNING: this hook is experimental and can cause undefined behaviour' | \ + grep -Ev '^[[:space:]]*Disabled:' | \ + grep -Gv '"Disabled": \(true\|false\),' | \ grep -Gv '^ Planning Time:' | \ grep -Gv '^ Execution Time:' | \ # ignore lines in explain(format json) output that differ among pg12-pg16 From 18f83e160c7cc6fd3edac17333b8f7b404a635bd Mon Sep 17 00:00:00 2001 From: Varik Matevosyan Date: Fri, 1 Nov 2024 00:44:41 +0400 Subject: [PATCH 10/12] disable plan caching when operator rewrite hooks are enabled --- lantern_hnsw/src/hooks/post_parse.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lantern_hnsw/src/hooks/post_parse.c b/lantern_hnsw/src/hooks/post_parse.c index 4e1b55f52..428769f9b 100644 --- a/lantern_hnsw/src/hooks/post_parse.c +++ b/lantern_hnsw/src/hooks/post_parse.c @@ -179,6 +179,12 @@ void post_parse_analyze_hook_with_operator_check(ParseState *pstate, return; } + const char *current_mode = GetConfigOption("plan_cache_mode", false, false); + + if(strcmp(current_mode, "force_custom_plan") != 0) { + SetConfigOption("plan_cache_mode", "force_custom_plan", PGC_SUSET, PGC_S_SESSION); + } + if(!oidList) { elog(WARNING, "this hook is experimental and can cause undefined behaviour"); MemoryContext oldCtx = MemoryContextSwitchTo(CacheMemoryContext); From bd85111258eb7ab239b8ac32d1e09bf90f6b2b94 Mon Sep 17 00:00:00 2001 From: Varik Matevosyan Date: Fri, 1 Nov 2024 12:03:21 +0400 Subject: [PATCH 11/12] Deprecate operator, remove `post_parse_analyze_hook` and `ExecutorStart_hook` hooks and related codes. Update tests to not use operator --- README.md | 14 +- ci/scripts/run-tests-linux.sh | 11 - lantern_hnsw/scripts/integration_tests.py | 14 +- lantern_hnsw/src/hnsw.c | 4 +- lantern_hnsw/src/hnsw/options.c | 40 --- lantern_hnsw/src/hnsw/options.h | 1 - lantern_hnsw/src/hooks/executor_start.c | 109 ------- lantern_hnsw/src/hooks/executor_start.h | 12 - lantern_hnsw/src/hooks/op_rewrite.c | 284 ------------------ lantern_hnsw/src/hooks/op_rewrite.h | 15 - lantern_hnsw/src/hooks/plan_tree_walker.c | 158 ---------- lantern_hnsw/src/hooks/plan_tree_walker.h | 20 -- lantern_hnsw/src/hooks/post_parse.c | 203 ------------- lantern_hnsw/src/hooks/post_parse.h | 19 -- lantern_hnsw/src/hooks/utils.c | 27 -- lantern_hnsw/src/hooks/utils.h | 12 - lantern_hnsw/test/expected/hnsw_correct.out | 3 +- .../test/expected/hnsw_cost_estimate.out | 5 +- lantern_hnsw/test/expected/hnsw_create.out | 5 +- .../test/expected/hnsw_create_expr.out | 3 +- .../test/expected/hnsw_create_unlogged.out | 5 +- lantern_hnsw/test/expected/hnsw_dist_func.out | 122 +------- lantern_hnsw/test/expected/hnsw_ef_search.out | 21 +- lantern_hnsw/test/expected/hnsw_extras.out | 22 -- .../test/expected/hnsw_index_from_file.out | 50 +-- lantern_hnsw/test/expected/hnsw_insert.out | 11 +- .../test/expected/hnsw_insert_unlogged.out | 11 +- lantern_hnsw/test/expected/hnsw_operators.out | 36 +-- lantern_hnsw/test/expected/hnsw_select.out | 201 +------------ lantern_hnsw/test/expected/hnsw_todo.out | 43 +-- lantern_hnsw/test/expected/hnsw_vector.out | 40 ++- lantern_hnsw/test/sql/hnsw_correct.sql | 3 +- lantern_hnsw/test/sql/hnsw_cost_estimate.sql | 6 +- lantern_hnsw/test/sql/hnsw_create.sql | 3 +- lantern_hnsw/test/sql/hnsw_create_expr.sql | 4 +- .../test/sql/hnsw_create_unlogged.sql | 3 +- lantern_hnsw/test/sql/hnsw_dist_func.sql | 88 +----- lantern_hnsw/test/sql/hnsw_ef_search.sql | 21 +- lantern_hnsw/test/sql/hnsw_extras.sql | 12 - .../test/sql/hnsw_index_from_file.sql | 18 +- lantern_hnsw/test/sql/hnsw_insert.sql | 7 +- .../test/sql/hnsw_insert_unlogged.sql | 7 +- lantern_hnsw/test/sql/hnsw_operators.sql | 22 +- lantern_hnsw/test/sql/hnsw_select.sql | 74 +---- lantern_hnsw/test/sql/hnsw_todo.sql | 39 +-- lantern_hnsw/test/sql/hnsw_vector.sql | 32 +- lantern_hnsw/test/test_runner.sh | 1 - 47 files changed, 157 insertions(+), 1704 deletions(-) delete mode 100644 lantern_hnsw/src/hooks/executor_start.c delete mode 100644 lantern_hnsw/src/hooks/executor_start.h delete mode 100644 lantern_hnsw/src/hooks/op_rewrite.c delete mode 100644 lantern_hnsw/src/hooks/op_rewrite.h delete mode 100644 lantern_hnsw/src/hooks/plan_tree_walker.c delete mode 100644 lantern_hnsw/src/hooks/plan_tree_walker.h delete mode 100644 lantern_hnsw/src/hooks/post_parse.c delete mode 100644 lantern_hnsw/src/hooks/post_parse.h delete mode 100644 lantern_hnsw/src/hooks/utils.c delete mode 100644 lantern_hnsw/src/hooks/utils.h diff --git a/README.md b/README.md index a6ea99e82..f0e091784 100644 --- a/README.md +++ b/README.md @@ -106,19 +106,9 @@ FROM small_world ORDER BY vector <-> ARRAY[0,0,0] LIMIT 1; ### A note on operators and operator classes -Lantern supports several distance functions in the index and it has 2 modes for operators: +Lantern supports several distance functions in the index -1. `lantern.pgvector_compat=TRUE` (default) - In this mode there are 3 operators available `<->` (l2sq), `<=>` (cosine), `<+>` (hamming). - - Note that in this mode, you need to use right operator in order to trigger an index scan. - -2. `lantern.pgvector_compat=FALSE` - In this mode you only need to specify the distance function used for a column at index creation time. Lantern will automatically infer the distance function to use for search so you always use `` operator in search queries. - - Note that in this mode, the operator `` is intended exclusively for use with index lookups. If you expect to not use the index in a query, use the distance function directly (e.g. `l2sq_dist(v1, v2)`) - -> To switch between modes set `lantern.pgvector_compat` variable to `TRUE` or `FALSE`. +There are 3 operators available `<->` (l2sq), `<=>` (cosine), `<+>` (hamming). There are four defined operator classes that can be employed during index creation: diff --git a/ci/scripts/run-tests-linux.sh b/ci/scripts/run-tests-linux.sh index 0c5d9e3f9..26acff89f 100755 --- a/ci/scripts/run-tests-linux.sh +++ b/ci/scripts/run-tests-linux.sh @@ -20,17 +20,6 @@ function run_pgvector_tests(){ pushd /tmp/pgvector # Add lantern to load-extension in pgregress sed -i '/REGRESS_OPTS \=/ s/$/ --load-extension lantern/' Makefile - - # Set pgvector_compat flag in test files - for file in ./test/sql/*; do - echo 'SET lantern.pgvector_compat=TRUE;' | cat - $file > temp && mv temp $file - done - - # Set pgvector_compat flag in result files - for file in ./test/expected/*.out; do - echo 'SET lantern.pgvector_compat=TRUE;' | cat - $file > temp && mv temp $file - done - # Run tests make installcheck popd diff --git a/lantern_hnsw/scripts/integration_tests.py b/lantern_hnsw/scripts/integration_tests.py index 35fab0031..5d4911c36 100644 --- a/lantern_hnsw/scripts/integration_tests.py +++ b/lantern_hnsw/scripts/integration_tests.py @@ -46,7 +46,6 @@ def primary(): node.init() node.append_conf("enable_seqscan = off") node.append_conf("maintenance_work_mem = '1GB'") - node.append_conf("lantern.pgvector_compat=FALSE") node.append_conf("checkpoint_timeout = '100min'") node.append_conf("min_wal_size = '1GB'") node.append_conf("checkpoint_completion_target = '0.9'") @@ -158,7 +157,6 @@ def generic_vector_query( dist_with_function = f"{distance_metric}_dist(v, ({query_vector}))" dist_with_concrete_op = f"v {DIST_OPS[distance_metric]} ({query_vector})" - dist_with_generic_op = f"v ({query_vector})" query_generator = ( lambda order_by: f""" @@ -173,8 +171,6 @@ def generic_vector_query( return query_generator(dist_with_function) elif kind == "concrete": return query_generator(dist_with_concrete_op) - elif kind == "generic": - return query_generator(dist_with_generic_op) @pytest.mark.parametrize("distance_metric", ["l2sq", "cos"], scope="session") @@ -197,9 +193,6 @@ def test_selects(db, setup_copy_table_with_index, distance_metric, quant_bits, r concrete_op_query = generic_vector_query( table_name, distance_metric, "concrete", query_vector_id=q_vec_id ) - generic_op_query = generic_vector_query( - table_name, distance_metric, "generic", query_vector_id=q_vec_id - ) exact_explain_query = f"EXPLAIN {exact_query}" exact_plan = primary.execute("testdb", exact_explain_query) @@ -214,7 +207,7 @@ def test_selects(db, setup_copy_table_with_index, distance_metric, quant_bits, r q_vec_id == exact_res[0][0] ), "First result in exact query result should be the query vector" - for query in [generic_op_query, concrete_op_query]: + for query in [concrete_op_query]: explain_query = f"EXPLAIN {query}" plan = primary.execute("testdb", explain_query) assert f"Index Scan using idx_{table_name}" in str( @@ -348,9 +341,6 @@ def test_inserts(setup_copy_table_with_index, distance_metric, quant_bits, reque concrete_op_query = generic_vector_query( table_name, distance_metric, "concrete", query_vector_id=q_vec_id ) - generic_op_query = generic_vector_query( - table_name, distance_metric, "generic", query_vector_id=q_vec_id - ) exact_explain_query = f"EXPLAIN {exact_query}" for db in [primary, replica]: @@ -367,7 +357,7 @@ def test_inserts(setup_copy_table_with_index, distance_metric, quant_bits, reque exact_res[0][0] in inserted_vector_orig_ids[q_vec_id] ), "First result in exact query result should be the query vector" - for query in [generic_op_query, concrete_op_query]: + for query in [concrete_op_query]: explain_query = f"EXPLAIN {query}" plan = db.execute("testdb", explain_query) assert f"Index Scan using idx_{table_name}" in str( diff --git a/lantern_hnsw/src/hnsw.c b/lantern_hnsw/src/hnsw.c index b47d68676..846b80009 100644 --- a/lantern_hnsw/src/hnsw.c +++ b/lantern_hnsw/src/hnsw.c @@ -347,9 +347,7 @@ static float8 vector_dist(Vector *a, Vector *b, usearch_metric_kind_t metric_kin PGDLLEXPORT PG_FUNCTION_INFO_V1(ldb_generic_dist); Datum ldb_generic_dist(PG_FUNCTION_ARGS) { - if(ldb_pgvector_compat) { - elog(ERROR, "Operator can only be used when lantern.pgvector_compat=FALSE"); - } + elog(ERROR, "Operator is deprecated. Please explicitly use the operator that matches your distance function."); PG_RETURN_NULL(); } diff --git a/lantern_hnsw/src/hnsw/options.c b/lantern_hnsw/src/hnsw/options.c index 29100d44e..be4d346ad 100644 --- a/lantern_hnsw/src/hnsw/options.c +++ b/lantern_hnsw/src/hnsw/options.c @@ -15,9 +15,6 @@ #include // RelationData #include -#include "../hooks/executor_start.h" -#include "../hooks/post_parse.h" - // We import this header file // to access the op class support function pointers #include "../hnsw.h" @@ -54,10 +51,6 @@ int ldb_external_index_port; char *ldb_external_index_host; bool ldb_external_index_secure; -// if this variable is set to true -// our operator rewriting hooks will be disabled -bool ldb_pgvector_compat; - // this variable is only set during testing and controls whether // certain elog() calls are made // see ldb_dlog() definition and callsites for details @@ -366,17 +359,6 @@ void _PG_init(void) NULL, NULL); - DefineCustomBoolVariable("lantern.pgvector_compat", - "Whether or not the operator <-> should automatically detect the right distance function", - "set this to 1 to disable operator rewriting hooks", - &ldb_pgvector_compat, - true, - PGC_USERSET, - 0, - NULL, - NULL, - NULL); - DefineCustomIntVariable("lantern.external_index_port", "Port for external indexing", "Change this value if you run lantern daemon on different port", @@ -417,30 +399,8 @@ void _PG_init(void) MarkGUCPrefixReserved("_lantern_internal"); #endif - original_post_parse_analyze_hook = post_parse_analyze_hook; - original_ExecutorStart_hook = ExecutorStart_hook; - - post_parse_analyze_hook = post_parse_analyze_hook_with_operator_check; - ExecutorStart_hook = ExecutorStart_hook_with_operator_check; - #ifndef NDEBUG signal(SIGSEGV, ldb_wait_for_gdb); signal(SIGABRT, ldb_wait_for_gdb); #endif } - -// Called with extension unload. -void _PG_fini(void) -{ - // Return back the original hook value. - // This check is because there might be case if while we stop the hooks (in pgvector_compat mode) - // Another extension will be loaded and it will overwrite the hooks - // And when lantern extension will be unloaded it will set the hooks to original values - // Overwriting the current changed hooks set by another extension - if(ExecutorStart_hook == ExecutorStart_hook_with_operator_check) { - ExecutorStart_hook = original_ExecutorStart_hook; - } - if(post_parse_analyze_hook == post_parse_analyze_hook_with_operator_check) { - post_parse_analyze_hook = original_post_parse_analyze_hook; - } -} diff --git a/lantern_hnsw/src/hnsw/options.h b/lantern_hnsw/src/hnsw/options.h index edc3613e6..bdd4fc197 100644 --- a/lantern_hnsw/src/hnsw/options.h +++ b/lantern_hnsw/src/hnsw/options.h @@ -78,7 +78,6 @@ bytea* ldb_amoptions(Datum reloptions, bool validate); extern int ldb_hnsw_init_k; extern int ldb_hnsw_ef_search; extern bool ldb_is_test; -extern bool ldb_pgvector_compat; extern int ldb_external_index_port; extern char* ldb_external_index_host; extern bool ldb_external_index_secure; diff --git a/lantern_hnsw/src/hooks/executor_start.c b/lantern_hnsw/src/hooks/executor_start.c deleted file mode 100644 index 53778a92f..000000000 --- a/lantern_hnsw/src/hooks/executor_start.c +++ /dev/null @@ -1,109 +0,0 @@ -#include - -#include "executor_start.h" - -#include -#include -#include -#include -#include -#include - -#include "../hnsw/options.h" -#include "../hnsw/utils.h" -#include "op_rewrite.h" -#include "plan_tree_walker.h" -#include "utils.h" - -ExecutorStart_hook_type original_ExecutorStart_hook = NULL; - -typedef struct -{ - List *oidList; - bool isIndexScan; -} OperatorUsedCorrectlyContext; - -static bool operator_used_incorrectly_walker(Node *node, void *context) -{ - OperatorUsedCorrectlyContext *context_typed = (OperatorUsedCorrectlyContext *)context; - if(node == NULL) return false; - if(IsA(node, IndexScan)) { - context_typed->isIndexScan = true; - bool status = plan_tree_walker((Plan *)node, operator_used_incorrectly_walker, context); - context_typed->isIndexScan = false; - return status; - } - if(IsA(node, OpExpr)) { - OpExpr *opExpr = (OpExpr *)node; - if(list_member_oid(context_typed->oidList, opExpr->opno) && !context_typed->isIndexScan) { - return true; - } - } - if(IsA(node, List)) { - List *list = (List *)node; - ListCell *lc; - foreach(lc, list) { - if(operator_used_incorrectly_walker(lfirst(lc), context)) return true; - } - return false; - } - - if(is_plan_node(node)) { - return plan_tree_walker((Plan *)node, operator_used_incorrectly_walker, (void *)context); - } else { - return expression_tree_walker(node, operator_used_incorrectly_walker, (void *)context); - } - return false; -} - -static void validate_operator_usage(Plan *plan, List *oidList) -{ - OperatorUsedCorrectlyContext context; - context.oidList = oidList; - context.isIndexScan = false; - if(operator_used_incorrectly_walker((Node *)plan, (void *)&context)) { - elog(ERROR, "Operator can only be used inside of an index"); - } -} - -void ExecutorStart_hook_with_operator_check(QueryDesc *queryDesc, int eflags) -{ - if(ldb_pgvector_compat) { - if(original_ExecutorStart_hook) { - original_ExecutorStart_hook(queryDesc, eflags); - } else { - standard_ExecutorStart(queryDesc, eflags); - } - return; - } - - if(creating_extension) { - // this is true in only CREATE EXTENSION and ALTER EXTENSION UPDATE commands - // these statements are guaranteed to not use our operators and state necessary - // to run our hooks is not ready anyway so it would be wrong to run this - elog(DEBUG2, "Skipping executor start hook for CREATE EXTENSION ... statement"); - standard_ExecutorStart(queryDesc, eflags); - return; - } - - List *oidList = ldb_get_operator_oids(); - if(oidList != NULL) { - // oidList will be NULL if LanternDB extension is not fully initialized - // e.g. in statements executed as a result of CREATE EXTENSION ... statement - ldb_rewrite_ops(queryDesc->plannedstmt->planTree, oidList, queryDesc->plannedstmt->rtable); - validate_operator_usage(queryDesc->plannedstmt->planTree, oidList); - ListCell *lc; - foreach(lc, queryDesc->plannedstmt->subplans) { - Plan *subplan = (Plan *)lfirst(lc); - ldb_rewrite_ops(subplan, oidList, queryDesc->plannedstmt->rtable); - validate_operator_usage(subplan, oidList); - } - list_free(oidList); - } - - if(original_ExecutorStart_hook) { - original_ExecutorStart_hook(queryDesc, eflags); - } else { - standard_ExecutorStart(queryDesc, eflags); - } -} diff --git a/lantern_hnsw/src/hooks/executor_start.h b/lantern_hnsw/src/hooks/executor_start.h deleted file mode 100644 index 73ee5879b..000000000 --- a/lantern_hnsw/src/hooks/executor_start.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef LDB_HOOKS_EXECUTOR_START_H -#define LDB_HOOKS_EXECUTOR_START_H - -#include - -#include - -extern ExecutorStart_hook_type original_ExecutorStart_hook; - -void ExecutorStart_hook_with_operator_check(QueryDesc *queryDesc, int eflags); - -#endif // LDB_HOOKS_EXECUTOR_START_H \ No newline at end of file diff --git a/lantern_hnsw/src/hooks/op_rewrite.c b/lantern_hnsw/src/hooks/op_rewrite.c deleted file mode 100644 index 3ad91c1bf..000000000 --- a/lantern_hnsw/src/hooks/op_rewrite.c +++ /dev/null @@ -1,284 +0,0 @@ -#include - -#include "op_rewrite.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "plan_tree_walker.h" -#include "utils.h" - -#if PG_VERSION_NUM < 120000 -#include -#include -#else -#include -#endif - -static Node *operator_rewriting_mutator(Node *node, void *ctx); - -void base_plan_mutator(Plan *plan, void *context) -{ - plan->lefttree = (Plan *)operator_rewriting_mutator((Node *)plan->lefttree, context); - plan->righttree = (Plan *)operator_rewriting_mutator((Node *)plan->righttree, context); - plan->initPlan = (List *)operator_rewriting_mutator((Node *)plan->initPlan, context); - // checking qual and target list at the end covers some edge cases, if you modify this leave them here - plan->qual = (List *)operator_rewriting_mutator((Node *)plan->qual, context); - plan->targetlist = (List *)operator_rewriting_mutator((Node *)plan->targetlist, context); -} - -// recursively descend the plan tree searching for expressions with the <-> operator that are part of a non-index scan -// src/include/nodes/plannodes.h and src/include/nodes/nodes.h contain relevant definitions -Node *plan_tree_mutator(Plan *plan, void *context) -{ - check_stack_depth(); - - switch(nodeTag(plan)) { - case T_SubqueryScan: - { - SubqueryScan *subqueryscan = (SubqueryScan *)plan; - base_plan_mutator(&(subqueryscan->scan.plan), context); - subqueryscan->subplan = (Plan *)operator_rewriting_mutator((Node *)subqueryscan->subplan, context); - return (Node *)subqueryscan; - } - case T_CteScan: - { - CteScan *ctescan = (CteScan *)plan; - base_plan_mutator(&(ctescan->scan.plan), context); - return (Node *)ctescan; - } -#if PG_VERSION_NUM < 160000 - case T_Join: - { - Join *join = (Join *)plan; - base_plan_mutator(&(join->plan), context); - join->joinqual = (List *)operator_rewriting_mutator((Node *)join->joinqual, context); - return (Node *)join; - } -#endif - case T_NestLoop: - { - NestLoop *nestloop = (NestLoop *)plan; - base_plan_mutator((Plan *)&(nestloop->join), context); - return (Node *)nestloop; - } - case T_Result: - { - Result *result = (Result *)plan; - base_plan_mutator(&(result->plan), context); - result->resconstantqual = operator_rewriting_mutator((Node *)result->resconstantqual, context); - return (Node *)result; - } - case T_Limit: - { - Limit *limit = (Limit *)plan; - base_plan_mutator(&(limit->plan), context); - limit->limitOffset = operator_rewriting_mutator((Node *)limit->limitOffset, context); - limit->limitCount = operator_rewriting_mutator((Node *)limit->limitCount, context); - return (Node *)limit; - } - case T_Append: - { - Append *append = (Append *)plan; - base_plan_mutator(&(append->plan), context); - append->appendplans = (List *)operator_rewriting_mutator((Node *)append->appendplans, context); - return (Node *)append; - } - // case T_IncrementalSort: // We will eventually support this - case T_Agg: - case T_Group: - case T_Sort: - case T_Unique: - case T_SetOp: - case T_Hash: - case T_HashJoin: - case T_WindowAgg: - case T_LockRows: - { - base_plan_mutator(plan, context); - return (Node *)plan; - } - case T_ModifyTable: // No order by when modifying a table (update/delete etc) - case T_BitmapAnd: // We do not provide a bitmap index - case T_BitmapOr: - case T_BitmapHeapScan: - case T_BitmapIndexScan: - case T_FunctionScan: // SELECT * FROM fn(x, y, z) - case T_ValuesScan: // VALUES (1), (2) - case T_Material: // https://stackoverflow.com/questions/31410030/ -#if PG_VERSION_NUM >= 140000 - case T_Memoize: // memoized inner loop must have an index to be memoized -#endif - case T_WorkTableScan: // temporary table, shouldn't have index - case T_ProjectSet: // "execute set returning functions" feels safe to exclude - case T_TableFuncScan: // scan of a function that returns a table, shouldn't have an index - case T_ForeignScan: // if the relation is foreign we can't determine if it has an index - default: - break; - } - return (Node *)plan; -} - -// To write syscache calls look for the 'static const struct cachedesc cacheinfo[]' in utils/cache/syscache.c -// These describe the different caches that will be initialized into SysCache and the keys they support in searches -// The anums tell you the table and the column that the key will be compared to this is afaict the only way to match -// them to SQL for example pg_am.oid -> Anum_pg_am_oid the keys must be in order but they need not all be included the -// comment next to the top label is the name of the #defined cacheid that you should use as your first argument you can -// destructure the tuple int a From_(table_name) with GETSTRUCT to pull individual rows out -static Oid get_func_id_from_index(Relation index) -{ - Oid hnswamoid = get_index_am_oid("lantern_hnsw", false); - if(index->rd_rel->relam != hnswamoid) return InvalidOid; - - // indclass is inaccessible on the form data - // https://www.postgresql.org/docs/current/system-catalog-declarations.html - bool isNull; - Oid idxopclassoid; - Datum classDatum = SysCacheGetAttr(INDEXRELID, index->rd_indextuple, Anum_pg_index_indclass, &isNull); - if(!isNull) { - oidvector *indclass = (oidvector *)DatumGetPointer(classDatum); - assert(indclass->dim1 == 1); - idxopclassoid = indclass->values[ 0 ]; - } else { - index_close(index, AccessShareLock); - elog(ERROR, "Failed to retrieve indclass oid from index class"); - } - - // SELECT * FROM pg_opclass WHERE opcmethod=hnswamoid AND opcname=dist_cos_ops - HeapTuple opclassTuple = SearchSysCache1(CLAOID, ObjectIdGetDatum(idxopclassoid)); - if(!HeapTupleIsValid(opclassTuple)) { - index_close(index, AccessShareLock); - elog(ERROR, "Failed to find operator class for key column"); - } - - Oid opclassOid = ((Form_pg_opclass)GETSTRUCT(opclassTuple))->opcfamily; - ReleaseSysCache(opclassTuple); - - // SELECT * FROM pg_amproc WHERE amprocfamily=opclassOid - // SearchSysCache1 is what we want and in fact it runs fine against release builds. However debug builds assert that - // AMPROCNUM takes only 1 arg which isn't true and so they fail. We therefore have to use SearchSysCacheList1 since - // it doesn't enforce this invariant. Ideally we would call SearchCatCache1 directly but postgres doesn't expose - // necessary constants - CatCList *opList = SearchSysCacheList1(AMPROCNUM, ObjectIdGetDatum(opclassOid)); - HeapTuple opTuple = &opList->members[ 0 ]->tuple; - if(!HeapTupleIsValid(opTuple)) { - index_close(index, AccessShareLock); - elog(ERROR, "Failed to find the function for operator class"); - } - Oid functionId = ((Form_pg_amproc)GETSTRUCT(opTuple))->amproc; - ReleaseCatCacheList(opList); - - return functionId; -} - -static Node *operator_rewriting_mutator(Node *node, void *ctx) -{ - OpRewriterContext *context = (OpRewriterContext *)ctx; - - if(node == NULL) return node; - - if(IsA(node, OpExpr)) { - OpExpr *opExpr = (OpExpr *)node; - if(list_member_oid(context->ldb_ops, opExpr->opno)) { - if(context->indices == NULL) { - return node; - } else { - ListCell *lc; - foreach(lc, context->indices) { - uintptr_t intermediate = (uintptr_t)lfirst(lc); - Oid indexid = (Oid)intermediate; - Relation index = index_open(indexid, AccessShareLock); - Oid indexfunc = get_func_id_from_index(index); - if(OidIsValid(indexfunc)) { - MemoryContext old = MemoryContextSwitchTo(PortalContext); - FuncExpr *fnExpr = makeNode(FuncExpr); - fnExpr->funcresulttype = opExpr->opresulttype; - fnExpr->funcretset = opExpr->opretset; - fnExpr->funccollid = opExpr->opcollid; - fnExpr->inputcollid = opExpr->inputcollid; - fnExpr->args = opExpr->args; - fnExpr->location = opExpr->location; - // operators can't take variadic arguments - fnExpr->funcvariadic = false; - // print it as a function - fnExpr->funcformat = COERCE_EXPLICIT_CALL; - fnExpr->funcid = indexfunc; - MemoryContextSwitchTo(old); - - index_close(index, AccessShareLock); - - return (Node *)fnExpr; - } - index_close(index, AccessShareLock); - } - return node; - } - } - } - - if(IsA(node, IndexScan) || IsA(node, IndexOnlyScan)) { - return node; - } - if(IsA(node, SeqScan) || IsA(node, SampleScan)) { - Scan *scan = (Scan *)node; - Plan *scanPlan = &scan->plan; - Oid rtrelid = scan->scanrelid; - RangeTblEntry *rte = rt_fetch(rtrelid, context->rtable); - Oid relid = rte->relid; - Relation rel = relation_open(relid, AccessShareLock); - if(rel->rd_indexvalid) { - context->indices = RelationGetIndexList(rel); - } - relation_close(rel, AccessShareLock); - - base_plan_mutator(scanPlan, context); - return (Node *)scan; - } - - if(IsA(node, List)) { - MemoryContext old = MemoryContextSwitchTo(PortalContext); - List *list = (List *)node; - List *ret = NIL; - ListCell *lc; - foreach(lc, list) { - ret = lappend(ret, operator_rewriting_mutator((Node *)lfirst(lc), ctx)); - } - MemoryContextSwitchTo(old); - return (Node *)ret; - } - - if(is_plan_node(node)) { - return (Node *)plan_tree_mutator((Plan *)node, ctx); - } else { - return expression_tree_mutator(node, operator_rewriting_mutator, ctx); - } -} - -bool ldb_rewrite_ops(Plan *plan, List *oidList, List *rtable) -{ - Node *node = (Node *)plan; - - OpRewriterContext context; - context.ldb_ops = oidList; - context.indices = NULL; - context.rtable = rtable; - - if(IsA(node, IndexScan) || IsA(node, IndexOnlyScan)) { - return false; - } - - operator_rewriting_mutator(node, (void *)&context); - return true; -} diff --git a/lantern_hnsw/src/hooks/op_rewrite.h b/lantern_hnsw/src/hooks/op_rewrite.h deleted file mode 100644 index 8db3a04e7..000000000 --- a/lantern_hnsw/src/hooks/op_rewrite.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef LDB_HOOKS_OP_REWRITE_H -#define LDB_HOOKS_OP_REWRITE_H - -#include -#include - -typedef struct OpRewriterContext -{ - List *ldb_ops; - List *indices; - List *rtable; -} OpRewriterContext; - -bool ldb_rewrite_ops(Plan *plan, List *oidList, List *rtable); -#endif diff --git a/lantern_hnsw/src/hooks/plan_tree_walker.c b/lantern_hnsw/src/hooks/plan_tree_walker.c deleted file mode 100644 index fabafc624..000000000 --- a/lantern_hnsw/src/hooks/plan_tree_walker.c +++ /dev/null @@ -1,158 +0,0 @@ -#include - -#include "plan_tree_walker.h" - -#include -#include -#include -#include -#include - -bool base_plan_walker(Plan *plan, bool (*walker_func)(Node *plan, void *context), void *context) -{ - /* - If there is a need to debug this function, follow the steps below: - 0. Add the following as the default branch in plan_tree_walker - default: - { - ldb_dlog("plan_tree_walker: unsupported plan node type: %d", nodeTag(plan)); - return false; - } - This will print all nodes that are not explicitly handled by the walker. - Currently there are several such nodes which probably means there are more - latent issues here. - 1. Attach gdb to the postgres process - 2. Set a breakpoint at the function entry - 3. navitate through relevant paths via gdb - 4. debug print Plan* nodes via - p (char*) nodeToString(plan); - - Note: for non-trivial Plan* nodes you may need to run: - set print elements 0 - in gdb to make sure the node string is not truncated. - */ - if(walker_func((Node *)plan->targetlist, context)) return true; - if(walker_func((Node *)plan->qual, context)) return true; - if(walker_func((Node *)plan->lefttree, context)) return true; - if(walker_func((Node *)plan->righttree, context)) return true; - if(walker_func((Node *)plan->initPlan, context)) return true; - return false; -} - -bool plan_tree_walker(Plan *plan, bool (*walker_func)(Node *plan, void *context), void *context) -{ - check_stack_depth(); - - switch(nodeTag(plan)) { - case T_SeqScan: - { - SeqScan *seqscan = (SeqScan *)plan; -#if PG_VERSION_NUM >= 150000 - Plan seqscanplan = seqscan->scan.plan; -#else - Plan seqscanplan = seqscan->plan; -#endif - if(base_plan_walker(&seqscanplan, walker_func, context)) return true; - break; - } - case T_IndexScan: - { - IndexScan *indexscan = (IndexScan *)plan; - if(base_plan_walker(&(indexscan->scan.plan), walker_func, context)) return true; - if(walker_func((Node *)indexscan->indexqual, context)) return true; - if(walker_func((Node *)indexscan->indexorderby, context)) return true; - break; - } - case T_IndexOnlyScan: - { - IndexOnlyScan *indexonlyscan = (IndexOnlyScan *)plan; - if(base_plan_walker(&(indexonlyscan->scan.plan), walker_func, context)) return true; - if(walker_func((Node *)indexonlyscan->indexqual, context)) return true; - if(walker_func((Node *)indexonlyscan->indexorderby, context)) return true; - break; - } - case T_SubqueryScan: - { - SubqueryScan *subqueryscan = (SubqueryScan *)plan; - if(base_plan_walker(&(subqueryscan->scan.plan), walker_func, context)) return true; - if(walker_func((Node *)subqueryscan->subplan, context)) return true; - break; - } - case T_CteScan: - { - CteScan *ctescan = (CteScan *)plan; - if(base_plan_walker(&(ctescan->scan.plan), walker_func, context)) return true; - break; - } -#if PG_VERSION_NUM < 160000 - case T_Join: - { - Join *join = (Join *)plan; - if(base_plan_walker(&(join->plan), walker_func, context)) return true; - if(walker_func((Node *)join->joinqual, context)) return true; - break; - } -#endif - case T_Agg: - { - Agg *agg = (Agg *)plan; - if(base_plan_walker(&(agg->plan), walker_func, context)) return true; - break; - } - case T_Group: - { - Group *group = (Group *)plan; - if(base_plan_walker(&(group->plan), walker_func, context)) return true; - break; - } - case T_Sort: - { - Sort *sort = (Sort *)plan; - if(base_plan_walker(&(sort->plan), walker_func, context)) return true; - break; - } - case T_Unique: - { - Unique *unique = (Unique *)plan; - if(base_plan_walker(&(unique->plan), walker_func, context)) return true; - break; - } - case T_NestLoop: - { - NestLoop *nestloop = (NestLoop *)plan; - if(base_plan_walker((Plan *)&(nestloop->join), walker_func, context)) return true; - break; - } - case T_Result: - { - Result *result = (Result *)plan; - if(base_plan_walker(&(result->plan), walker_func, context)) return true; - if(walker_func((Node *)result->resconstantqual, context)) return true; - break; - } - case T_Limit: - { - Limit *limit = (Limit *)plan; - if(base_plan_walker(&(limit->plan), walker_func, context)) return true; - if(walker_func((Node *)limit->limitOffset, context)) return true; - if(walker_func((Node *)limit->limitCount, context)) return true; - break; - } - case T_Append: - { - Append *append = (Append *)plan; - if(base_plan_walker(&(append->plan), walker_func, context)) return true; - if(walker_func((Node *)append->appendplans, context)) return true; - break; - } - case T_Material: - { - Material *material = (Material *)plan; - if(base_plan_walker(&(material->plan), walker_func, context)) return true; - break; - } - default: - return false; - } - return false; -} diff --git a/lantern_hnsw/src/hooks/plan_tree_walker.h b/lantern_hnsw/src/hooks/plan_tree_walker.h deleted file mode 100644 index 03f885c43..000000000 --- a/lantern_hnsw/src/hooks/plan_tree_walker.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef LDB_HOOKS_PLAN_TREE_WALKER_H -#define LDB_HOOKS_PLAN_TREE_WALKER_H - -#include - -#include -#include - -static inline bool is_plan_node(Node *node) -{ -#if PG_VERSION_NUM >= 160000 - return nodeTag(node) >= T_Result && nodeTag(node) <= T_PlanInvalItem; -#else - return nodeTag(node) >= T_Plan && nodeTag(node) < T_PlanState; -#endif -} - -bool plan_tree_walker(Plan *plan, bool (*walker_func)(Node *node, void *context), void *context); - -#endif // LDB_HOOKS_PLAN_TREE_WALKER_H diff --git a/lantern_hnsw/src/hooks/post_parse.c b/lantern_hnsw/src/hooks/post_parse.c deleted file mode 100644 index 428769f9b..000000000 --- a/lantern_hnsw/src/hooks/post_parse.c +++ /dev/null @@ -1,203 +0,0 @@ -#include - -#include "post_parse.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "../hnsw/options.h" -#include "utils.h" - -post_parse_analyze_hook_type original_post_parse_analyze_hook = NULL; - -List *oidList; - -typedef struct -{ - List *oidList; -} OperatorUsedContext; - -static bool operator_used_walker(Node *node, OperatorUsedContext *context) -{ - if(node == NULL) return false; - if(IsA(node, Query)) return query_tree_walker((Query *)node, operator_used_walker, (void *)context, 0); - if(IsA(node, OpExpr)) { - OpExpr *opExpr = (OpExpr *)node; - if(list_member_oid(context->oidList, opExpr->opno)) { - return true; - } - } - return expression_tree_walker(node, operator_used_walker, (void *)context); -} - -static bool is_operator_used(Node *node, List *oidList) -{ - OperatorUsedContext context; - context.oidList = oidList; - return operator_used_walker(node, &context); -} - -typedef struct -{ - List *sortGroupRefs; -} SortGroupRefContext; - -static bool sort_group_ref_walker(Node *node, SortGroupRefContext *context) -{ - if(node == NULL) return false; - if(IsA(node, Query)) { - Query *query = (Query *)node; - ListCell *lc; - foreach(lc, query->sortClause) { - SortGroupClause *sortGroupClause = (SortGroupClause *)lfirst(lc); - context->sortGroupRefs = lappend_int(context->sortGroupRefs, sortGroupClause->tleSortGroupRef); - } - return query_tree_walker((Query *)node, sort_group_ref_walker, (void *)context, 0); - } - return expression_tree_walker(node, sort_group_ref_walker, (void *)context); -} - -static List *get_sort_group_refs(Node *node) -{ - SortGroupRefContext context; - context.sortGroupRefs = NIL; - sort_group_ref_walker(node, &context); - return context.sortGroupRefs; -} - -typedef struct -{ - List *oidList; - List *sortGroupRefs; - bool usedCorrectly; -} OperatorUsedCorrectlyContext; - -static bool is_var_or_func_of_vars(Node *node) -{ - if(IsA(node, Var)) { - return true; - } else if(IsA(node, FuncExpr)) { - List *args = ((FuncExpr *)node)->args; - ListCell *cell; - foreach(cell, args) { - if(is_var_or_func_of_vars(lfirst(cell))) { - return true; - } - } - } - return false; -} - -static bool operator_used_incorrectly_walker(Node *node, OperatorUsedCorrectlyContext *context) -{ - if(node == NULL) return false; - if(IsA(node, Query)) return query_tree_walker((Query *)node, operator_used_incorrectly_walker, (void *)context, 0); - if(IsA(node, TargetEntry)) { - TargetEntry *te = (TargetEntry *)node; - if(te->resjunk && list_member_int(context->sortGroupRefs, te->ressortgroupref)) { - if(IsA(te->expr, OpExpr)) { - OpExpr *opExpr = (OpExpr *)te->expr; - if(list_member_oid(context->oidList, opExpr->opno)) { - Node *arg1 = (Node *)linitial(opExpr->args); - Node *arg2 = (Node *)lsecond(opExpr->args); - bool isVar1 = IsA(arg1, Var); - bool isVar2 = IsA(arg2, Var); - /* There is a case when operator is used with index - * that was created via expression (CREATE INDEX ON t USING hnsw (func(id)) WITH (M=2)) - * in this case the query may look like this - * SELECT id FROM test ORDER BY func(id) <-> ARRAY[0,0,0] LIMIT 2 - * or like this - * SELECT id FROM test ORDER BY func(id) <-> func(n) LIMIT 2 - * we should check if IsA(arg1, FuncExpr) || IsA(arg2, FuncExpr) - * if true we may go and check the oid of function result to see if it is an array type - * we also can check that the argument of FuncExpr is at least one of the arg1 and arg2 - * will contain column of the table (e.g iterate over list and check IsA(arg, Var)) - * so the function will not be called with constant arguments on both sides - */ - if(isVar1 && isVar2) { - return false; - } else if(isVar1 && !isVar2) { - return operator_used_incorrectly_walker(arg2, context); - } else if(!isVar1 && isVar2) { - return operator_used_incorrectly_walker(arg1, context); - } else { - bool isFuncOfVars1 = is_var_or_func_of_vars(arg1); - bool isFuncOfVars2 = is_var_or_func_of_vars(arg2); - if(!isFuncOfVars1 && !isFuncOfVars2) { - return true; - } else { - return operator_used_incorrectly_walker(arg1, context) - || operator_used_incorrectly_walker(arg2, context); - } - } - } - } - } - } - if(IsA(node, OpExpr)) { - OpExpr *opExpr = (OpExpr *)node; - if(list_member_oid(context->oidList, opExpr->opno)) { - return true; - } - } - - return expression_tree_walker(node, operator_used_incorrectly_walker, (void *)context); -} - -static bool is_operator_used_incorrectly(Node *node, List *oidList, List *sortGroupRefs) -{ - OperatorUsedCorrectlyContext context; - context.oidList = oidList; - context.sortGroupRefs = sortGroupRefs; - return operator_used_incorrectly_walker(node, &context); -} - -void post_parse_analyze_hook_with_operator_check(ParseState *pstate, - Query *query -#if PG_VERSION_NUM >= 140000 - , - JumbleState *jstate -#endif -) -{ - if(original_post_parse_analyze_hook) { -#if PG_VERSION_NUM >= 140000 - original_post_parse_analyze_hook(pstate, query, jstate); -#else - original_post_parse_analyze_hook(pstate, query); -#endif - } - - if(ldb_pgvector_compat || creating_extension) { - return; - } - - const char *current_mode = GetConfigOption("plan_cache_mode", false, false); - - if(strcmp(current_mode, "force_custom_plan") != 0) { - SetConfigOption("plan_cache_mode", "force_custom_plan", PGC_SUSET, PGC_S_SESSION); - } - - if(!oidList) { - elog(WARNING, "this hook is experimental and can cause undefined behaviour"); - MemoryContext oldCtx = MemoryContextSwitchTo(CacheMemoryContext); - oidList = ldb_get_operator_oids(); - MemoryContextSwitchTo(oldCtx); - } - - Node *query_as_node = (Node *)query; - if(is_operator_used(query_as_node, oidList)) { - List *sort_group_refs = get_sort_group_refs(query_as_node); - if(is_operator_used_incorrectly(query_as_node, oidList, sort_group_refs)) { - elog(ERROR, "Operator is invalid outside of ORDER BY context"); - } - list_free(sort_group_refs); - } -} diff --git a/lantern_hnsw/src/hooks/post_parse.h b/lantern_hnsw/src/hooks/post_parse.h deleted file mode 100644 index 67ecfb45d..000000000 --- a/lantern_hnsw/src/hooks/post_parse.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef LDB_HOOKS_POST_PARSE_H -#define LDB_HOOKS_POST_PARSE_H - -#include - -#include -#include - -extern post_parse_analyze_hook_type original_post_parse_analyze_hook; - -void post_parse_analyze_hook_with_operator_check(ParseState *pstate, - Query *query -#if PG_VERSION_NUM >= 140000 - , - JumbleState *jstate -#endif -); - -#endif // LDB_HOOKS_POST_PARSE_H \ No newline at end of file diff --git a/lantern_hnsw/src/hooks/utils.c b/lantern_hnsw/src/hooks/utils.c deleted file mode 100644 index c46b98dc0..000000000 --- a/lantern_hnsw/src/hooks/utils.c +++ /dev/null @@ -1,27 +0,0 @@ -#include - -#include -#include -#include -#include - -List *ldb_get_operator_oids() -{ - List *oidList = NIL; - - List *nameList = lappend(NIL, makeString("")); - - Oid intOperator = LookupOperName(NULL, nameList, INT4ARRAYOID, INT4ARRAYOID, true, -1); - Oid floatOperator = LookupOperName(NULL, nameList, FLOAT4ARRAYOID, FLOAT4ARRAYOID, true, -1); - - if(OidIsValid(intOperator)) { - oidList = lappend_oid(oidList, intOperator); - } - if(OidIsValid(floatOperator)) { - oidList = lappend_oid(oidList, floatOperator); - } - - list_free(nameList); - - return oidList; -} diff --git a/lantern_hnsw/src/hooks/utils.h b/lantern_hnsw/src/hooks/utils.h deleted file mode 100644 index be89baaf4..000000000 --- a/lantern_hnsw/src/hooks/utils.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef LDB_HOOKS_UTILS_H -#define LDB_HOOKS_UTILS_H - -#include - -#include - -List *ldb_get_operator_oids(); - -List *ldb_get_operator_class_oids(Oid amId); - -#endif // LDB_HOOKS_UTILS_H diff --git a/lantern_hnsw/test/expected/hnsw_correct.out b/lantern_hnsw/test/expected/hnsw_correct.out index cc428ac2a..e969ac258 100644 --- a/lantern_hnsw/test/expected/hnsw_correct.out +++ b/lantern_hnsw/test/expected/hnsw_correct.out @@ -13,7 +13,6 @@ INFO: done init usearch index INFO: inserted 4 elements INFO: done saving 4 vectors SET enable_seqscan=FALSE; -SET lantern.pgvector_compat=FALSE; -- Get the results without the index CREATE TEMP TABLE results_wo_index AS SELECT @@ -25,7 +24,7 @@ FROM -- Get the results with the index CREATE TEMP TABLE results_w_index AS SELECT - ROW_NUMBER() OVER (ORDER BY v '{0,0}') AS row_num, + ROW_NUMBER() OVER (ORDER BY v <-> '{0,0}') AS row_num, id, l2sq_dist(v, '{0,0}') AS dist FROM diff --git a/lantern_hnsw/test/expected/hnsw_cost_estimate.out b/lantern_hnsw/test/expected/hnsw_cost_estimate.out index d026035c8..cd763cabb 100644 --- a/lantern_hnsw/test/expected/hnsw_cost_estimate.out +++ b/lantern_hnsw/test/expected/hnsw_cost_estimate.out @@ -50,11 +50,10 @@ BEGIN RETURN is_within_error(get_cost_estimate(explain_query), expected_cost, error_margin); END; $$ LANGUAGE plpgsql; -SET lantern.pgvector_compat=FALSE; -- Goal: make sure query cost estimate is accurate -- when index is created with varying costruction parameters. SELECT v AS v4444 FROM sift_base10k WHERE id = 4444 \gset -\set explain_query_template 'EXPLAIN SELECT * FROM sift_base10k ORDER BY v ''%s'' LIMIT 10' +\set explain_query_template 'EXPLAIN SELECT * FROM sift_base10k ORDER BY v <-> ''%s'' LIMIT 10' \set enable_seqscan = off; -- Case 0, sanity check. No data. CREATE TABLE empty_table(id SERIAL PRIMARY KEY, v REAL[2]); @@ -63,7 +62,7 @@ INFO: done init usearch index INFO: inserted 0 elements INFO: done saving 0 vectors SET _lantern_internal.is_test = true; -SELECT is_cost_estimate_within_error('EXPLAIN SELECT * FROM empty_table ORDER BY v ''{1,2}'' LIMIT 10', 0.47); +SELECT is_cost_estimate_within_error('EXPLAIN SELECT * FROM empty_table ORDER BY v <-> ''{1,2}'' LIMIT 10', 0.47); DEBUG: LANTERN - Query cost estimator DEBUG: LANTERN - --------------------- DEBUG: LANTERN - Total cost: 4.225000 diff --git a/lantern_hnsw/test/expected/hnsw_create.out b/lantern_hnsw/test/expected/hnsw_create.out index f63986dd2..9c6ea0fed 100644 --- a/lantern_hnsw/test/expected/hnsw_create.out +++ b/lantern_hnsw/test/expected/hnsw_create.out @@ -65,18 +65,17 @@ CREATE TABLE IF NOT EXISTS sift_base10k ( v REAL[128] ); \copy sift_base10k (v) FROM '/tmp/lantern/vector_datasets/siftsmall_base_arrays.csv' with csv; -SET lantern.pgvector_compat=FALSE; CREATE INDEX hnsw_idx ON sift_base10k USING lantern_hnsw (v dist_l2sq_ops) WITH (M=2, ef_construction=10, ef=4, dim=128); INFO: done init usearch index INFO: inserted 10000 elements INFO: done saving 10000 vectors SELECT v AS v4444 FROM sift_base10k WHERE id = 4444 \gset -EXPLAIN (COSTS FALSE) SELECT * FROM sift_base10k order by v :'v4444' LIMIT 10; +EXPLAIN (COSTS FALSE) SELECT * FROM sift_base10k order by v <-> :'v4444' LIMIT 10; QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Limit -> Index Scan using hnsw_idx on sift_base10k - Order By: (v '{55,61,11,4,5,2,13,24,65,49,13,9,23,37,94,38,54,11,14,14,40,31,50,44,53,4,0,0,27,17,8,34,12,10,4,4,22,52,68,53,9,2,0,0,2,116,119,64,119,2,0,0,2,30,119,119,116,5,0,8,47,9,5,60,7,7,10,23,56,50,23,5,28,68,6,18,24,65,50,9,119,75,3,0,1,8,12,85,119,11,4,6,8,9,5,74,25,11,8,20,18,12,2,21,11,90,25,32,33,15,2,9,84,67,8,4,22,31,11,33,119,30,3,6,0,0,0,26}'::real[]) + Order By: (v <-> '{55,61,11,4,5,2,13,24,65,49,13,9,23,37,94,38,54,11,14,14,40,31,50,44,53,4,0,0,27,17,8,34,12,10,4,4,22,52,68,53,9,2,0,0,2,116,119,64,119,2,0,0,2,30,119,119,116,5,0,8,47,9,5,60,7,7,10,23,56,50,23,5,28,68,6,18,24,65,50,9,119,75,3,0,1,8,12,85,119,11,4,6,8,9,5,74,25,11,8,20,18,12,2,21,11,90,25,32,33,15,2,9,84,67,8,4,22,31,11,33,119,30,3,6,0,0,0,26}'::real[]) SELECT _lantern_internal.validate_index('hnsw_idx', false); INFO: validate_index() start for hnsw_idx diff --git a/lantern_hnsw/test/expected/hnsw_create_expr.out b/lantern_hnsw/test/expected/hnsw_create_expr.out index b3e0ffb7f..33e7b2c7e 100644 --- a/lantern_hnsw/test/expected/hnsw_create_expr.out +++ b/lantern_hnsw/test/expected/hnsw_create_expr.out @@ -64,7 +64,6 @@ CREATE TABLE test_table (id INTEGER); INSERT INTO test_table VALUES (0), (1), (7); \set enable_seqscan = off; SET enable_seqscan = false; -SET lantern.pgvector_compat=FALSE; -- This should success CREATE INDEX ON test_table USING lantern_hnsw (int_to_fixed_binary_real_array(id)) WITH (M=2); INFO: done init usearch index @@ -88,7 +87,7 @@ ERROR: data type text has no default operator class for access method "lantern_ -- This should result in error about multicolumn expressions support CREATE INDEX ON test_table USING lantern_hnsw (int_to_fixed_binary_real_array(id), int_to_dynamic_binary_real_array(id)) WITH (M=2); ERROR: access method "lantern_hnsw" does not support multicolumn indexes -SELECT id FROM test_table ORDER BY int_to_fixed_binary_real_array(id) '{0,0,0}'::REAL[] LIMIT 2; +SELECT id FROM test_table ORDER BY int_to_fixed_binary_real_array(id) <-> '{0,0,0}'::REAL[] LIMIT 2; id ---- 0 diff --git a/lantern_hnsw/test/expected/hnsw_create_unlogged.out b/lantern_hnsw/test/expected/hnsw_create_unlogged.out index b556fd46a..d9f3c3ed2 100644 --- a/lantern_hnsw/test/expected/hnsw_create_unlogged.out +++ b/lantern_hnsw/test/expected/hnsw_create_unlogged.out @@ -47,18 +47,17 @@ CREATE UNLOGGED TABLE IF NOT EXISTS sift_base10k ( v REAL[128] ); \copy sift_base10k (v) FROM '/tmp/lantern/vector_datasets/siftsmall_base_arrays.csv' with csv; -SET lantern.pgvector_compat=FALSE; CREATE INDEX hnsw_idx ON sift_base10k USING lantern_hnsw (v dist_l2sq_ops) WITH (M=2, ef_construction=10, ef=4, dim=128); INFO: done init usearch index INFO: inserted 10000 elements INFO: done saving 10000 vectors SELECT v AS v4444 FROM sift_base10k WHERE id = 4444 \gset -EXPLAIN (COSTS FALSE) SELECT * FROM sift_base10k order by v :'v4444' LIMIT 10; +EXPLAIN (COSTS FALSE) SELECT * FROM sift_base10k order by v <-> :'v4444' LIMIT 10; QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Limit -> Index Scan using hnsw_idx on sift_base10k - Order By: (v '{55,61,11,4,5,2,13,24,65,49,13,9,23,37,94,38,54,11,14,14,40,31,50,44,53,4,0,0,27,17,8,34,12,10,4,4,22,52,68,53,9,2,0,0,2,116,119,64,119,2,0,0,2,30,119,119,116,5,0,8,47,9,5,60,7,7,10,23,56,50,23,5,28,68,6,18,24,65,50,9,119,75,3,0,1,8,12,85,119,11,4,6,8,9,5,74,25,11,8,20,18,12,2,21,11,90,25,32,33,15,2,9,84,67,8,4,22,31,11,33,119,30,3,6,0,0,0,26}'::real[]) + Order By: (v <-> '{55,61,11,4,5,2,13,24,65,49,13,9,23,37,94,38,54,11,14,14,40,31,50,44,53,4,0,0,27,17,8,34,12,10,4,4,22,52,68,53,9,2,0,0,2,116,119,64,119,2,0,0,2,30,119,119,116,5,0,8,47,9,5,60,7,7,10,23,56,50,23,5,28,68,6,18,24,65,50,9,119,75,3,0,1,8,12,85,119,11,4,6,8,9,5,74,25,11,8,20,18,12,2,21,11,90,25,32,33,15,2,9,84,67,8,4,22,31,11,33,119,30,3,6,0,0,0,26}'::real[]) SELECT _lantern_internal.validate_index('hnsw_idx', false); INFO: validate_index() start for hnsw_idx diff --git a/lantern_hnsw/test/expected/hnsw_dist_func.out b/lantern_hnsw/test/expected/hnsw_dist_func.out index 04f17ae91..ebc373238 100644 --- a/lantern_hnsw/test/expected/hnsw_dist_func.out +++ b/lantern_hnsw/test/expected/hnsw_dist_func.out @@ -35,9 +35,8 @@ INSERT INTO small_world_l2 SELECT id, v FROM small_world; INSERT INTO small_world_cos SELECT id, v FROM small_world; INSERT INTO small_world_ham SELECT id, ARRAY[CAST(v[1] AS INTEGER), CAST(v[2] AS INTEGER), CAST(v[3] AS INTEGER)] FROM small_world; SET enable_seqscan=FALSE; -SET lantern.pgvector_compat=FALSE; -- Verify that the distance functions work (check distances) -SELECT ROUND(l2sq_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_l2 ORDER BY v '{0,1,0}'; +SELECT ROUND(l2sq_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_l2 ORDER BY v <-> '{0,1,0}'; round ------- 0.00 @@ -49,7 +48,7 @@ SELECT ROUND(l2sq_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_l2 ORDER BY v 2.00 3.00 -SELECT ROUND(cos_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_cos ORDER BY v '{0,1,0}'; +SELECT ROUND(cos_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_cos ORDER BY v <=> '{0,1,0}'; round ------- 0.00 @@ -61,7 +60,7 @@ SELECT ROUND(cos_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_cos ORDER BY v 1.00 1.00 -SELECT ROUND(hamming_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_ham ORDER BY v '{0,1,0}'; +SELECT ROUND(hamming_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_ham ORDER BY v <+> '{0,1,0}'; round ------- 0.00 @@ -99,31 +98,31 @@ SELECT ARRAY_AGG(id ORDER BY id), ROUND(hamming_dist(v, '{0,1,0}')::numeric, 2) {101} | 3.00 -- Verify that the indexes is being used -EXPLAIN (COSTS false) SELECT id FROM small_world_l2 ORDER BY v '{0,1,0}'; +EXPLAIN (COSTS false) SELECT id FROM small_world_l2 ORDER BY v <-> '{0,1,0}'; QUERY PLAN --------------------------------------------------------- Index Scan using small_world_l2_v_idx on small_world_l2 - Order By: (v '{0,1,0}'::real[]) + Order By: (v <-> '{0,1,0}'::real[]) -EXPLAIN (COSTS false) SELECT id FROM small_world_cos ORDER BY v '{0,1,0}'; +EXPLAIN (COSTS false) SELECT id FROM small_world_cos ORDER BY v <=> '{0,1,0}'; QUERY PLAN ----------------------------------------------------------- Index Scan using small_world_cos_v_idx on small_world_cos - Order By: (v '{0,1,0}'::real[]) + Order By: (v <=> '{0,1,0}'::real[]) -EXPLAIN (COSTS false) SELECT id FROM small_world_ham ORDER BY v '{0,1,0}'; +EXPLAIN (COSTS false) SELECT id FROM small_world_ham ORDER BY v <+> '{0,1,0}'; QUERY PLAN ----------------------------------------------------------- Index Scan using small_world_ham_v_idx on small_world_ham - Order By: (v '{0,1,0}'::integer[]) + Order By: (v <+> '{0,1,0}'::integer[]) \set ON_ERROR_STOP off -- Expect errors due to mismatching vector dimensions -SELECT 1 FROM small_world_l2 ORDER BY v '{0,1,0,1}' LIMIT 1; +SELECT 1 FROM small_world_l2 ORDER BY v <-> '{0,1,0,1}' LIMIT 1; ERROR: Expected real array with dimension 3, got 4 -SELECT 1 FROM small_world_cos ORDER BY v '{0,1,0,1}' LIMIT 1; +SELECT 1 FROM small_world_cos ORDER BY v <=> '{0,1,0,1}' LIMIT 1; ERROR: Expected real array with dimension 3, got 4 -SELECT 1 FROM small_world_ham ORDER BY v '{0,1,0,1}' LIMIT 1; +SELECT 1 FROM small_world_ham ORDER BY v <+> '{0,1,0,1}' LIMIT 1; ERROR: Expected int array with dimension 3, got 4 SELECT l2sq_dist('{1,1}'::REAL[], '{0,1,0}'::REAL[]); ERROR: expected equally sized arrays but got arrays with dimensions 2 and 3 @@ -134,13 +133,6 @@ SELECT cos_dist('{1,1}', '{0,1,0}'); ERROR: expected equally sized arrays but got arrays with dimensions 2 and 3 SELECT hamming_dist('{1,1}', '{0,1,0}'); ERROR: expected equally sized arrays but got arrays with dimensions 2 and 3 --- Expect errors due to improper use of the operator outside of its supported context -SELECT ARRAY[1,2,3] ARRAY[3,2,1]; -ERROR: Operator is invalid outside of ORDER BY context -SELECT ROUND((v ARRAY[0,1,0])::numeric, 2) FROM small_world_cos ORDER BY v '{0,1,0}' LIMIT 7; -ERROR: Operator is invalid outside of ORDER BY context -SELECT ROUND((v ARRAY[0,1,0])::numeric, 2) FROM small_world_ham ORDER BY v '{0,1,0}' LIMIT 7; -ERROR: Operator is invalid outside of ORDER BY context \set ON_ERROR_STOP on -- More robust distance operator tests CREATE TABLE test1 (id SERIAL, v REAL[]); @@ -158,92 +150,6 @@ SELECT 1 FROM test1 WHERE id = 0 + 1; ---------- 1 -\set ON_ERROR_STOP off --- Expect errors due to incorrect usage -INSERT INTO test1 (v) VALUES (ARRAY['{1,2}'::REAL[] '{4,2}'::REAL[], 0]); -ERROR: Operator is invalid outside of ORDER BY context -SELECT v '{1,2}' FROM test1 ORDER BY v '{1,3}'; -ERROR: Operator is invalid outside of ORDER BY context -SELECT v '{1,2}' FROM test1; -ERROR: Operator is invalid outside of ORDER BY context -WITH temp AS (SELECT v '{1,2}' FROM test1) SELECT 1 FROM temp; -ERROR: Operator is invalid outside of ORDER BY context -SELECT t.res FROM (SELECT v '{1,2}' AS res FROM test1) t; -ERROR: Operator is invalid outside of ORDER BY context -SELECT (SELECT v '{1,2}' FROM test1 LIMIT 1) FROM test1; -ERROR: Operator is invalid outside of ORDER BY context -SELECT COALESCE(v '{1,2}', 0) FROM test1; -ERROR: Operator is invalid outside of ORDER BY context -SELECT EXISTS (SELECT v '{1,2}' FROM test1); -ERROR: Operator is invalid outside of ORDER BY context -SELECT test1.v test2.v FROM test1 JOIN test2 USING (id); -ERROR: Operator is invalid outside of ORDER BY context -SELECT v '{1,2}' FROM test1 UNION SELECT v '{1,3}' FROM test1; -ERROR: Operator is invalid outside of ORDER BY context -(SELECT v '{1,2}' FROM test1 WHERE id < 5) UNION (SELECT v '{1,3}' FROM test1 WHERE id >= 5); -ERROR: Operator is invalid outside of ORDER BY context -SELECT MAX(v '{1,2}') FROM test1; -ERROR: Operator is invalid outside of ORDER BY context -SELECT * FROM test1 JOIN test2 ON test1.v test2.v < 0.5; -ERROR: Operator is invalid outside of ORDER BY context -SELECT test1.v FROM test1 JOIN test2 ON test1.v '{1,2}' = test2.v '{1,3}'; -ERROR: Operator is invalid outside of ORDER BY context -SELECT (v '{1,2}') + (v '{1,3}') FROM test1; -ERROR: Operator is invalid outside of ORDER BY context -SELECT CASE WHEN v '{1,2}' > 1 THEN 'High' ELSE 'Low' END FROM test1; -ERROR: Operator is invalid outside of ORDER BY context -INSERT INTO test1 (v) VALUES ('{2,3}') RETURNING v '{1,2}'; -ERROR: Operator is invalid outside of ORDER BY context -SELECT 1 FROM test1 GROUP BY v '{1,3}'; -ERROR: Operator is invalid outside of ORDER BY context -SELECT 1 FROM test1 ORDER BY (('{1,2}'::real[] '{3,4}'::real[]) - 0); -ERROR: Operator is invalid outside of ORDER BY context -SELECT 1 FROM test1 ORDER BY '{1,2}'::REAL[] '{3,4}'::REAL[]; -ERROR: Operator is invalid outside of ORDER BY context -SELECT 1 FROM test1 ORDER BY v ARRAY[(SELECT '{1,4}'::REAL[] '{4,2}'::REAL[]), 3]; -ERROR: Operator is invalid outside of ORDER BY context --- Expect errors due to index not existing -SELECT id FROM test1 ORDER BY v '{1,2}'; -ERROR: Operator can only be used inside of an index -SELECT 1 FROM test1 ORDER BY v (SELECT '{1,3}'::real[]); -ERROR: Operator can only be used inside of an index -SELECT t2_results.id FROM test1 t1 JOIN LATERAL (SELECT t2.id FROM test2 t2 ORDER BY t1.v t2.v LIMIT 1) t2_results ON TRUE; -ERROR: Operator can only be used inside of an index -WITH t AS (SELECT id FROM test1 ORDER BY v '{1,2}' LIMIT 1) SELECT DISTINCT id FROM t; -ERROR: Operator can only be used inside of an index -WITH t AS (SELECT id FROM test1 ORDER BY v '{1,2}' LIMIT 1) SELECT id, COUNT(*) FROM t GROUP BY 1; -ERROR: Operator can only be used inside of an index -WITH t AS (SELECT id FROM test1 ORDER BY v '{1,2}') SELECT id FROM t UNION SELECT id FROM t; -ERROR: Operator can only be used inside of an index --- issue #227 -SELECT * from test2 JOIN LATERAL (SELECT * FROM (SELECT id FROM test2 ORDER BY v '{1,2}') as forall) haha on TRUE; -ERROR: Operator can only be used inside of an index --- more complex setup of the above -SELECT forall.id, nearest_per_id.* FROM -(SELECT * FROM - test2) AS forall - JOIN LATERAL ( - SELECT - ARRAY_AGG(id ORDER BY id) AS near_ids, - ARRAY_AGG(dist ORDER BY id) AS near_dists - FROM - ( - SELECT - id, - l2sq_dist(v, forall.v) as dist - FROM - test2 - ORDER BY - v forall.v - LIMIT - 5 - ) as __unused_name - ) nearest_per_id on TRUE -ORDER BY - forall.id -LIMIT - 9; -ERROR: Operator can only be used inside of an index \set ON_ERROR_STOP on -- cross-lateral joins work as expected when appropriate index exists -- nearest element for each vector @@ -265,7 +171,7 @@ SELECT forall.id, nearest_per_id.* FROM FROM small_world_l2 ORDER BY - v forall.v + v <-> forall.v LIMIT 4 ) as __unused_name @@ -295,7 +201,7 @@ CREATE INDEX ON extra_small_world_ham USING lantern_hnsw (v dist_hamming_ops) WI INFO: done init usearch index INFO: inserted 4 elements INFO: done saving 4 vectors -SELECT ROUND(hamming_dist(v, '{0,0}')::numeric, 2) FROM extra_small_world_ham ORDER BY v '{0,0}'; +SELECT ROUND(hamming_dist(v, '{0,0}')::numeric, 2) FROM extra_small_world_ham ORDER BY v <+> '{0,0}'; round ------- 0.00 diff --git a/lantern_hnsw/test/expected/hnsw_ef_search.out b/lantern_hnsw/test/expected/hnsw_ef_search.out index aba0026c1..6294135da 100644 --- a/lantern_hnsw/test/expected/hnsw_ef_search.out +++ b/lantern_hnsw/test/expected/hnsw_ef_search.out @@ -31,11 +31,10 @@ ERROR: 401 is outside the valid range for parameter "lantern_hnsw.ef" (0 .. 400 -- Repeat the same query while varying ef parameter -- NOTE: it is not entirely known if the results of these are deterministic SET enable_seqscan=FALSE; -SET lantern.pgvector_compat=FALSE; SELECT v AS v1001 FROM sift_base1k WHERE id = 1001 \gset -- Queries below have the same result SET lantern_hnsw.ef = 1; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; round ----------- 0.00 @@ -51,7 +50,7 @@ SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v (10 rows) SET lantern_hnsw.ef = 2; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; round ----------- 0.00 @@ -67,7 +66,7 @@ SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v (10 rows) SET lantern_hnsw.ef = 4; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; round ----------- 0.00 @@ -83,7 +82,7 @@ SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v (10 rows) SET lantern_hnsw.ef = 8; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; round ----------- 0.00 @@ -99,7 +98,7 @@ SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v (10 rows) SET lantern_hnsw.ef = 16; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; round ----------- 0.00 @@ -116,7 +115,7 @@ SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v -- Queries below have the same result, which is different from above SET lantern_hnsw.ef = 32; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; round ----------- 0.00 @@ -132,7 +131,7 @@ SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v (10 rows) SET lantern_hnsw.ef = 64; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; round ----------- 0.00 @@ -148,7 +147,7 @@ SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v (10 rows) SET lantern_hnsw.ef = 128; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; round ----------- 0.00 @@ -164,7 +163,7 @@ SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v (10 rows) SET lantern_hnsw.ef = 256; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; round ----------- 0.00 @@ -180,7 +179,7 @@ SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v (10 rows) SET lantern_hnsw.ef = 400; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; round ----------- 0.00 diff --git a/lantern_hnsw/test/expected/hnsw_extras.out b/lantern_hnsw/test/expected/hnsw_extras.out index d41825130..b3ef3e461 100644 --- a/lantern_hnsw/test/expected/hnsw_extras.out +++ b/lantern_hnsw/test/expected/hnsw_extras.out @@ -38,7 +38,6 @@ INFO: validate_index() done, no issues found. SELECT v AS v777 FROM sift_base1k WHERE id = 777 \gset -- Validate that using corresponding operator triggers index scan -SET lantern.pgvector_compat=TRUE; EXPLAIN (COSTS FALSE) SELECT id FROM sift_base1k order by v <-> :'v777' LIMIT 10; QUERY PLAN --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- @@ -46,15 +45,6 @@ EXPLAIN (COSTS FALSE) SELECT id FROM sift_base1k order by v <-> :'v777' LIMIT 10 -> Index Scan using sift_base1k_v_idx on sift_base1k Order By: (v <-> '{97,67,0,0,0,0,0,14,49,107,23,0,0,0,5,24,4,25,48,5,0,1,8,3,0,5,17,3,1,1,3,3,126,126,0,0,0,0,0,27,49,126,49,8,1,4,11,14,0,6,37,39,10,22,25,0,0,0,12,27,7,23,35,3,126,9,1,0,0,0,19,126,28,11,8,7,1,39,126,126,0,1,28,27,3,126,126,0,1,3,7,9,0,52,126,5,13,5,8,0,0,0,33,72,78,19,18,3,0,3,21,126,42,13,64,83,1,9,8,23,1,4,22,68,3,1,4,0}'::real[]) -SET lantern.pgvector_compat=FALSE; -EXPLAIN (COSTS FALSE) SELECT id FROM sift_base1k order by v :'v777' LIMIT 10; - QUERY PLAN ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - Limit - -> Index Scan using sift_base1k_v_idx on sift_base1k - Order By: (v '{97,67,0,0,0,0,0,14,49,107,23,0,0,0,5,24,4,25,48,5,0,1,8,3,0,5,17,3,1,1,3,3,126,126,0,0,0,0,0,27,49,126,49,8,1,4,11,14,0,6,37,39,10,22,25,0,0,0,12,27,7,23,35,3,126,9,1,0,0,0,19,126,28,11,8,7,1,39,126,126,0,1,28,27,3,126,126,0,1,3,7,9,0,52,126,5,13,5,8,0,0,0,33,72,78,19,18,3,0,3,21,126,42,13,64,83,1,9,8,23,1,4,22,68,3,1,4,0}'::real[]) - -SET lantern.pgvector_compat=TRUE; DROP INDEX sift_base1k_v_idx; -- Create with params SELECT lantern_create_external_index('v', 'sift_base1k', 'public', 'cos', 128, 10, 10, 10, false, 'hnsw_cos_index'); @@ -69,7 +59,6 @@ INFO: validate_index() done, no issues found. ---------------- -SET lantern.pgvector_compat=TRUE; EXPLAIN (COSTS FALSE) SELECT id FROM sift_base1k order by v <=> :'v777' LIMIT 10; QUERY PLAN --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- @@ -77,15 +66,6 @@ EXPLAIN (COSTS FALSE) SELECT id FROM sift_base1k order by v <=> :'v777' LIMIT 10 -> Index Scan using hnsw_cos_index on sift_base1k Order By: (v <=> '{97,67,0,0,0,0,0,14,49,107,23,0,0,0,5,24,4,25,48,5,0,1,8,3,0,5,17,3,1,1,3,3,126,126,0,0,0,0,0,27,49,126,49,8,1,4,11,14,0,6,37,39,10,22,25,0,0,0,12,27,7,23,35,3,126,9,1,0,0,0,19,126,28,11,8,7,1,39,126,126,0,1,28,27,3,126,126,0,1,3,7,9,0,52,126,5,13,5,8,0,0,0,33,72,78,19,18,3,0,3,21,126,42,13,64,83,1,9,8,23,1,4,22,68,3,1,4,0}'::real[]) -SET lantern.pgvector_compat=FALSE; -EXPLAIN (COSTS FALSE) SELECT id FROM sift_base1k order by v :'v777' LIMIT 10; - QUERY PLAN ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - Limit - -> Index Scan using hnsw_cos_index on sift_base1k - Order By: (v '{97,67,0,0,0,0,0,14,49,107,23,0,0,0,5,24,4,25,48,5,0,1,8,3,0,5,17,3,1,1,3,3,126,126,0,0,0,0,0,27,49,126,49,8,1,4,11,14,0,6,37,39,10,22,25,0,0,0,12,27,7,23,35,3,126,9,1,0,0,0,19,126,28,11,8,7,1,39,126,126,0,1,28,27,3,126,126,0,1,3,7,9,0,52,126,5,13,5,8,0,0,0,33,72,78,19,18,3,0,3,21,126,42,13,64,83,1,9,8,23,1,4,22,68,3,1,4,0}'::real[]) - -SET lantern.pgvector_compat=TRUE; -- -- Reindex external index SELECT lantern_reindex_external_index('hnsw_cos_index'); lantern_reindex_external_index @@ -100,7 +80,6 @@ INFO: validate_index() done, no issues found. -- Validate that using corresponding operator triggers index scan -SET lantern.pgvector_compat=TRUE; EXPLAIN (COSTS FALSE) SELECT id FROM sift_base1k order by v <=> :'v777' LIMIT 10; QUERY PLAN --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- @@ -149,7 +128,6 @@ INFO: validate_index() done, no issues found. ---------------- -SET lantern.pgvector_compat=TRUE; EXPLAIN (COSTS FALSE) SELECT id FROM sift_base1k order by v <=> :'v777' LIMIT 10; QUERY PLAN --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/lantern_hnsw/test/expected/hnsw_index_from_file.out b/lantern_hnsw/test/expected/hnsw_index_from_file.out index c51fa83d8..156ad45fd 100644 --- a/lantern_hnsw/test/expected/hnsw_index_from_file.out +++ b/lantern_hnsw/test/expected/hnsw_index_from_file.out @@ -51,16 +51,15 @@ SELECT * FROM ldb_get_indexes('sift_base1k'); hnsw_l2_index | 776 kB | CREATE INDEX hnsw_l2_index ON sift_base1k USING lantern_hnsw (v) WITH (_experimental_index_path='/tmp/lantern/files/index-sift1k-l2sq-0.3.0.usearch') | t SET enable_seqscan=FALSE; -SET lantern.pgvector_compat=FALSE; SELECT v AS v777 FROM sift_base1k WHERE id = 777 \gset -EXPLAIN (COSTS FALSE) SELECT ROUND(l2sq_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v :'v777' LIMIT 10; +EXPLAIN (COSTS FALSE) SELECT ROUND(l2sq_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v <-> :'v777' LIMIT 10; QUERY PLAN --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Limit -> Index Scan using hnsw_l2_index on sift_base1k - Order By: (v '{97,67,0,0,0,0,0,14,49,107,23,0,0,0,5,24,4,25,48,5,0,1,8,3,0,5,17,3,1,1,3,3,126,126,0,0,0,0,0,27,49,126,49,8,1,4,11,14,0,6,37,39,10,22,25,0,0,0,12,27,7,23,35,3,126,9,1,0,0,0,19,126,28,11,8,7,1,39,126,126,0,1,28,27,3,126,126,0,1,3,7,9,0,52,126,5,13,5,8,0,0,0,33,72,78,19,18,3,0,3,21,126,42,13,64,83,1,9,8,23,1,4,22,68,3,1,4,0}'::real[]) + Order By: (v <-> '{97,67,0,0,0,0,0,14,49,107,23,0,0,0,5,24,4,25,48,5,0,1,8,3,0,5,17,3,1,1,3,3,126,126,0,0,0,0,0,27,49,126,49,8,1,4,11,14,0,6,37,39,10,22,25,0,0,0,12,27,7,23,35,3,126,9,1,0,0,0,19,126,28,11,8,7,1,39,126,126,0,1,28,27,3,126,126,0,1,3,7,9,0,52,126,5,13,5,8,0,0,0,33,72,78,19,18,3,0,3,21,126,42,13,64,83,1,9,8,23,1,4,22,68,3,1,4,0}'::real[]) -SELECT ROUND(l2sq_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v :'v777' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v <-> :'v777' LIMIT 10; round ----------- 0.00 @@ -79,7 +78,7 @@ INSERT INTO sift_base1k (id, v) VALUES (1001, array_fill(1, ARRAY[128])), (1002, array_fill(2, ARRAY[128])); SELECT v AS v1001 FROM sift_base1k WHERE id = 1001 \gset -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; round ----------- 0.00 @@ -101,45 +100,6 @@ CREATE TABLE IF NOT EXISTS sift_base1k ( v REAL[] ); COPY sift_base1k (v) FROM '/tmp/lantern/vector_datasets/sift_base1k_arrays.csv' WITH csv; --- Validate that creating an index from file works with cosine distance function -CREATE INDEX hnsw_cos_index ON sift_base1k USING lantern_hnsw (v) WITH (_experimental_index_path='/tmp/lantern/files/index-sift1k-cos-0.3.0.usearch'); -INFO: done init usearch index -INFO: done loading usearch index -INFO: done saving 1000 vectors -SELECT _lantern_internal.validate_index('hnsw_cos_index', false); -INFO: validate_index() start for hnsw_cos_index -INFO: validate_index() done, no issues found. - validate_index ----------------- - - -SELECT * FROM ldb_get_indexes('sift_base1k'); - indexname | size | indexdef | indisvalid -----------------+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------+------------ - hnsw_cos_index | 776 kB | CREATE INDEX hnsw_cos_index ON sift_base1k USING lantern_hnsw (v) WITH (_experimental_index_path='/tmp/lantern/files/index-sift1k-cos-0.3.0.usearch') | t - -SELECT v AS v777 FROM sift_base1k WHERE id = 777 \gset -EXPLAIN (COSTS FALSE) SELECT ROUND(cos_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v :'v777' LIMIT 10; - QUERY PLAN ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - Limit - -> Index Scan using hnsw_cos_index on sift_base1k - Order By: (v '{97,67,0,0,0,0,0,14,49,107,23,0,0,0,5,24,4,25,48,5,0,1,8,3,0,5,17,3,1,1,3,3,126,126,0,0,0,0,0,27,49,126,49,8,1,4,11,14,0,6,37,39,10,22,25,0,0,0,12,27,7,23,35,3,126,9,1,0,0,0,19,126,28,11,8,7,1,39,126,126,0,1,28,27,3,126,126,0,1,3,7,9,0,52,126,5,13,5,8,0,0,0,33,72,78,19,18,3,0,3,21,126,42,13,64,83,1,9,8,23,1,4,22,68,3,1,4,0}'::real[]) - -SELECT ROUND(cos_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v :'v777' LIMIT 10; - round -------- - 0.00 - 0.19 - 0.21 - 0.22 - 0.23 - 0.25 - 0.25 - 0.25 - 0.25 - 0.26 - --- Test scenarious --- ----------------------------------------- -- Case: @@ -166,7 +126,7 @@ INFO: validate_index() done, no issues found. -- This should not throw error, but the first result will not be 0 as vector 777 is deleted from the table -SELECT ROUND(l2sq_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v :'v777' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v <-> :'v777' LIMIT 10; round ----------- 98486.00 diff --git a/lantern_hnsw/test/expected/hnsw_insert.out b/lantern_hnsw/test/expected/hnsw_insert.out index 9463d8d7d..e48d9b3b7 100644 --- a/lantern_hnsw/test/expected/hnsw_insert.out +++ b/lantern_hnsw/test/expected/hnsw_insert.out @@ -64,7 +64,6 @@ INFO: done init usearch index INFO: inserted 8 elements INFO: done saving 8 vectors SET enable_seqscan = false; -SET lantern.pgvector_compat = false; -- Inserting vectors of the same dimension and nulls should work INSERT INTO small_world (v) VALUES ('{1,1,2}'); INSERT INTO small_world (v) VALUES (NULL); @@ -79,7 +78,7 @@ SELECT FROM small_world ORDER BY - v '{0,0,0}'; + v <-> '{0,0,0}'; round ------- 0.00 @@ -105,13 +104,13 @@ SELECT FROM small_world ORDER BY - v '{0,0,0}' + v <-> '{0,0,0}' LIMIT 10; QUERY PLAN --------------------------------------------------------- Limit -> Index Scan using small_world_v_idx on small_world - Order By: (v '{0,0,0}'::real[]) + Order By: (v <-> '{0,0,0}'::real[]) SELECT _lantern_internal.validate_index('small_world_v_idx', false); INFO: validate_index() start for small_world_v_idx @@ -131,11 +130,11 @@ INFO: inserted 0 elements INFO: done saving 0 vectors \COPY sift_base10k (v) FROM '/tmp/lantern/vector_datasets/siftsmall_base_arrays.csv' WITH CSV; SELECT v AS v4444 FROM sift_base10k WHERE id = 4444 \gset -EXPLAIN (COSTS FALSE) SELECT * FROM sift_base10k order by v :'v4444'; +EXPLAIN (COSTS FALSE) SELECT * FROM sift_base10k order by v <-> :'v4444'; QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Index Scan using hnsw_idx on sift_base10k - Order By: (v '{55,61,11,4,5,2,13,24,65,49,13,9,23,37,94,38,54,11,14,14,40,31,50,44,53,4,0,0,27,17,8,34,12,10,4,4,22,52,68,53,9,2,0,0,2,116,119,64,119,2,0,0,2,30,119,119,116,5,0,8,47,9,5,60,7,7,10,23,56,50,23,5,28,68,6,18,24,65,50,9,119,75,3,0,1,8,12,85,119,11,4,6,8,9,5,74,25,11,8,20,18,12,2,21,11,90,25,32,33,15,2,9,84,67,8,4,22,31,11,33,119,30,3,6,0,0,0,26}'::real[]) + Order By: (v <-> '{55,61,11,4,5,2,13,24,65,49,13,9,23,37,94,38,54,11,14,14,40,31,50,44,53,4,0,0,27,17,8,34,12,10,4,4,22,52,68,53,9,2,0,0,2,116,119,64,119,2,0,0,2,30,119,119,116,5,0,8,47,9,5,60,7,7,10,23,56,50,23,5,28,68,6,18,24,65,50,9,119,75,3,0,1,8,12,85,119,11,4,6,8,9,5,74,25,11,8,20,18,12,2,21,11,90,25,32,33,15,2,9,84,67,8,4,22,31,11,33,119,30,3,6,0,0,0,26}'::real[]) SELECT _lantern_internal.validate_index('hnsw_idx', false); INFO: validate_index() start for hnsw_idx diff --git a/lantern_hnsw/test/expected/hnsw_insert_unlogged.out b/lantern_hnsw/test/expected/hnsw_insert_unlogged.out index 34c00e242..0a7f7a0f4 100644 --- a/lantern_hnsw/test/expected/hnsw_insert_unlogged.out +++ b/lantern_hnsw/test/expected/hnsw_insert_unlogged.out @@ -64,7 +64,6 @@ INFO: done init usearch index INFO: inserted 8 elements INFO: done saving 8 vectors SET enable_seqscan = false; -SET lantern.pgvector_compat = false; -- Inserting vectors of the same dimension and nulls should work INSERT INTO small_world (v) VALUES ('{1,1,2}'); INSERT INTO small_world (v) VALUES (NULL); @@ -79,7 +78,7 @@ SELECT FROM small_world ORDER BY - v '{0,0,0}'; + v <-> '{0,0,0}'; round ------- 0.00 @@ -105,13 +104,13 @@ SELECT FROM small_world ORDER BY - v '{0,0,0}' + v <-> '{0,0,0}' LIMIT 10; QUERY PLAN --------------------------------------------------------- Limit -> Index Scan using small_world_v_idx on small_world - Order By: (v '{0,0,0}'::real[]) + Order By: (v <-> '{0,0,0}'::real[]) SELECT _lantern_internal.validate_index('small_world_v_idx', false); INFO: validate_index() start for small_world_v_idx @@ -131,11 +130,11 @@ INFO: inserted 0 elements INFO: done saving 0 vectors \COPY sift_base10k (v) FROM '/tmp/lantern/vector_datasets/siftsmall_base_arrays.csv' WITH CSV; SELECT v AS v4444 FROM sift_base10k WHERE id = 4444 \gset -EXPLAIN (COSTS FALSE) SELECT * FROM sift_base10k order by v :'v4444'; +EXPLAIN (COSTS FALSE) SELECT * FROM sift_base10k order by v <-> :'v4444'; QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Index Scan using hnsw_idx on sift_base10k - Order By: (v '{55,61,11,4,5,2,13,24,65,49,13,9,23,37,94,38,54,11,14,14,40,31,50,44,53,4,0,0,27,17,8,34,12,10,4,4,22,52,68,53,9,2,0,0,2,116,119,64,119,2,0,0,2,30,119,119,116,5,0,8,47,9,5,60,7,7,10,23,56,50,23,5,28,68,6,18,24,65,50,9,119,75,3,0,1,8,12,85,119,11,4,6,8,9,5,74,25,11,8,20,18,12,2,21,11,90,25,32,33,15,2,9,84,67,8,4,22,31,11,33,119,30,3,6,0,0,0,26}'::real[]) + Order By: (v <-> '{55,61,11,4,5,2,13,24,65,49,13,9,23,37,94,38,54,11,14,14,40,31,50,44,53,4,0,0,27,17,8,34,12,10,4,4,22,52,68,53,9,2,0,0,2,116,119,64,119,2,0,0,2,30,119,119,116,5,0,8,47,9,5,60,7,7,10,23,56,50,23,5,28,68,6,18,24,65,50,9,119,75,3,0,1,8,12,85,119,11,4,6,8,9,5,74,25,11,8,20,18,12,2,21,11,90,25,32,33,15,2,9,84,67,8,4,22,31,11,33,119,30,3,6,0,0,0,26}'::real[]) SELECT _lantern_internal.validate_index('hnsw_idx', false); INFO: validate_index() start for hnsw_idx diff --git a/lantern_hnsw/test/expected/hnsw_operators.out b/lantern_hnsw/test/expected/hnsw_operators.out index 3ac3411ac..949c73df1 100644 --- a/lantern_hnsw/test/expected/hnsw_operators.out +++ b/lantern_hnsw/test/expected/hnsw_operators.out @@ -1,23 +1,13 @@ --- Validate that lantern.pgvector_compat disables the operator rewriting hooks +\set ON_ERROR_STOP off CREATE TABLE op_test (v REAL[]); INSERT INTO op_test (v) VALUES (ARRAY[0,0,0]), (ARRAY[1,1,1]); CREATE INDEX cos_idx ON op_test USING lantern_hnsw(v dist_cos_ops); INFO: done init usearch index INFO: inserted 2 elements INFO: done saving 2 vectors --- should rewrite operator -SET lantern.pgvector_compat=FALSE; -SELECT * FROM op_test ORDER BY v ARRAY[1,1,1]; - v ---------- - {1,1,1} - {0,0,0} - -\set ON_ERROR_STOP off -SET lantern.pgvector_compat=TRUE; --- should throw error -SELECT * FROM op_test ORDER BY v ARRAY[1,1,1]; -ERROR: Operator can only be used when lantern.pgvector_compat=FALSE +-- Expect deprecation error due to use of the operator +SELECT ARRAY[1,2,3] ARRAY[3,2,1]; +ERROR: Operator is deprecated. Please explicitly use the operator that matches your distance function. -- should not throw error SELECT * FROM op_test ORDER BY v <=> ARRAY[1,1,1]; v @@ -39,7 +29,6 @@ SELECT v <-> ARRAY[1,1,1] FROM op_test ORDER BY v <-> ARRAY[1,1,1]; 0 3 -SET lantern.pgvector_compat=FALSE; SET enable_seqscan=OFF; \set ON_ERROR_STOP on -- one-off vector distance calculations should work with relevant operator @@ -90,15 +79,6 @@ SELECT ARRAY[1,0,0] <+> ARRAY[0,1,0]; ---------- 2 --- NOW THIS IS TRIGGERING INDEX SCAN AS WELL --- BECAUSE WE ARE REGISTERING FOR ALL OPERATOR CLASSES --- IDEALLY THIS SHOULD NOT TRIGGER INDEX SCAN WHEN lantern.pgvector_compat=TRUE -EXPLAIN (COSTS FALSE) SELECT * FROM op_test ORDER BY v ARRAY[1,1,1]; - QUERY PLAN ---------------------------------------- - Index Scan using cos_idx on op_test - Order By: (v '{1,1,1}'::real[]) - -- should sort with index EXPLAIN (COSTS FALSE) SELECT * FROM op_test ORDER BY v <=> ARRAY[1,1,1]; QUERY PLAN @@ -138,15 +118,7 @@ SELECT v <-> ARRAY[1,1,1] FROM op_test ORDER BY v <-> ARRAY[1,1,1]; RESET ALL; -- Set false twice to verify that no crash is happening -SET lantern.pgvector_compat=FALSE; \set ON_ERROR_STOP off --- should rewrite operator -SELECT * FROM op_test ORDER BY v ARRAY[1,1,1]; - v ---------- - {1,1,1} - {0,0,0} - SET enable_seqscan=OFF; CREATE INDEX hamming_idx ON op_test USING lantern_hnsw(cast(v as INTEGER[]) dist_hamming_ops); INFO: done init usearch index diff --git a/lantern_hnsw/test/expected/hnsw_select.out b/lantern_hnsw/test/expected/hnsw_select.out index 2701ce4cf..279e1e2de 100644 --- a/lantern_hnsw/test/expected/hnsw_select.out +++ b/lantern_hnsw/test/expected/hnsw_select.out @@ -40,7 +40,6 @@ INFO: done init usearch index INFO: inserted 1 elements INFO: done saving 1 vectors SET enable_seqscan=FALSE; -SET lantern.pgvector_compat=FALSE; -- Verify that basic queries still work given our query parser and planner hooks SELECT 0 + 1; ?column? @@ -54,7 +53,7 @@ SELECT 1 FROM test1 WHERE id = 0 + 1; -- Verify that the index is being used SET _lantern_internal.is_test = true; -EXPLAIN (COSTS FALSE) SELECT * FROM small_world order by v '{1,0,0}' LIMIT 1; +EXPLAIN (COSTS FALSE) SELECT * FROM small_world order by v <-> '{1,0,0}' LIMIT 1; DEBUG: LANTERN - Query cost estimator DEBUG: LANTERN - --------------------- DEBUG: LANTERN - Total cost: 4.015000 @@ -66,7 +65,7 @@ DEBUG: LANTERN - --------------------- --------------------------------------------------------- Limit -> Index Scan using small_world_v_idx on small_world - Order By: (v '{1,0,0}'::real[]) + Order By: (v <-> '{1,0,0}'::real[]) -- Verify that this does not use the index EXPLAIN (COSTS FALSE) SELECT 1 FROM small_world WHERE v = '{0,0,0}'; @@ -77,7 +76,7 @@ EXPLAIN (COSTS FALSE) SELECT 1 FROM small_world WHERE v = '{0,0,0}'; -- Ensure we can query an index for more elements than the value of init_k WITH neighbors AS ( - SELECT * FROM small_world order by v '{1,0,0}' LIMIT 3 + SELECT * FROM small_world order by v <-> '{1,0,0}' LIMIT 3 ) SELECT COUNT(*) from neighbors; DEBUG: LANTERN - Query cost estimator DEBUG: LANTERN - --------------------- @@ -92,7 +91,7 @@ DEBUG: LANTERN querying index for 10 elements 3 WITH neighbors AS ( - SELECT * FROM small_world order by v '{1,0,0}' LIMIT 15 + SELECT * FROM small_world order by v <-> '{1,0,0}' LIMIT 15 ) SELECT COUNT(*) from neighbors; DEBUG: LANTERN - Query cost estimator DEBUG: LANTERN - --------------------- @@ -109,7 +108,7 @@ DEBUG: LANTERN querying index for 10 elements -- Change default k and make sure the number of usearch_searchs makes sense SET lantern_hnsw.init_k = 4; WITH neighbors AS ( - SELECT * FROM small_world order by v '{1,0,0}' LIMIT 3 + SELECT * FROM small_world order by v <-> '{1,0,0}' LIMIT 3 ) SELECT COUNT(*) from neighbors; DEBUG: LANTERN - Query cost estimator DEBUG: LANTERN - --------------------- @@ -124,7 +123,7 @@ DEBUG: LANTERN querying index for 4 elements 3 WITH neighbors AS ( - SELECT * FROM small_world order by v '{1,0,0}' LIMIT 15 + SELECT * FROM small_world order by v <-> '{1,0,0}' LIMIT 15 ) SELECT COUNT(*) from neighbors; DEBUG: LANTERN - Query cost estimator DEBUG: LANTERN - --------------------- @@ -143,7 +142,7 @@ DEBUG: LANTERN - querying index for 8 elements RESET client_min_messages; SET _lantern_internal.is_test = false; -- Verify where condition works properly and still uses index -SELECT has_index_scan('EXPLAIN SELECT * FROM small_world WHERE b IS TRUE ORDER BY v ''{0,0,0}'''); +SELECT has_index_scan('EXPLAIN SELECT * FROM small_world WHERE b IS TRUE ORDER BY v <-> ''{0,0,0}'''); has_index_scan ---------------- t @@ -155,215 +154,41 @@ SELECT NOT has_index_scan('EXPLAIN SELECT COUNT(*) FROM small_world'); t -- Verify swapping order doesn't change anything and still uses index -SELECT has_index_scan('EXPLAIN SELECT id FROM test1 ORDER BY ''{1,2}''::REAL[] v'); +SELECT has_index_scan('EXPLAIN SELECT id FROM test1 ORDER BY ''{1,2}''::REAL[] <-> v'); has_index_scan ---------------- t -- Verify group by works and uses index -SELECT has_index_scan('EXPLAIN WITH t AS (SELECT id FROM test1 ORDER BY ''{1,2}''::REAL[] v LIMIT 1) SELECT id, COUNT(*) FROM t GROUP BY 1'); +SELECT has_index_scan('EXPLAIN WITH t AS (SELECT id FROM test1 ORDER BY ''{1,2}''::REAL[] <-> v LIMIT 1) SELECT id, COUNT(*) FROM t GROUP BY 1'); has_index_scan ---------------- t -- Validate distinct works and uses index -SELECT has_index_scan('EXPLAIN WITH t AS (SELECT id FROM test1 ORDER BY v ''{1,2}'' LIMIT 1) SELECT DISTINCT id FROM t'); +SELECT has_index_scan('EXPLAIN WITH t AS (SELECT id FROM test1 ORDER BY v <-> ''{1,2}'' LIMIT 1) SELECT DISTINCT id FROM t'); has_index_scan ---------------- t -- Validate join lateral works and uses index -SELECT has_index_scan('EXPLAIN SELECT t1_results.id FROM test2 t2 JOIN LATERAL (SELECT t1.id FROM test1 t1 ORDER BY t2.v t1.v LIMIT 1) t1_results ON TRUE'); +SELECT has_index_scan('EXPLAIN SELECT t1_results.id FROM test2 t2 JOIN LATERAL (SELECT t1.id FROM test1 t1 ORDER BY t2.v <-> t1.v LIMIT 1) t1_results ON TRUE'); has_index_scan ---------------- t -- Validate union works and uses index -SELECT has_index_scan('EXPLAIN (SELECT id FROM test1 ORDER BY v ''{1,4}'') UNION (SELECT id FROM test1 ORDER BY v IS NOT NULL LIMIT 1)'); +SELECT has_index_scan('EXPLAIN (SELECT id FROM test1 ORDER BY v <-> ''{1,4}'') UNION (SELECT id FROM test1 ORDER BY v IS NOT NULL LIMIT 1)'); has_index_scan ---------------- t -- Validate CTEs work and still use index -SELECT has_index_scan('EXPLAIN WITH t AS (SELECT id FROM test1 ORDER BY v ''{1,4}'') SELECT id FROM t UNION SELECT id FROM t'); +SELECT has_index_scan('EXPLAIN WITH t AS (SELECT id FROM test1 ORDER BY v <-> ''{1,4}'') SELECT id FROM t UNION SELECT id FROM t'); has_index_scan ---------------- t --- Validate is replaced with the matching function when an index is present -set enable_seqscan = true; -set enable_indexscan = false; -EXPLAIN (COSTS false) SELECT * from small_world ORDER BY v '{1,1,1}'; - QUERY PLAN ------------------------------------------------ - Sort - Sort Key: (l2sq_dist(v, '{1,1,1}'::real[])) - -> Seq Scan on small_world - -SELECT * from small_world ORDER BY v '{1,1,1}'; - id | b | v ------+---+--------- - 111 | t | {1,1,1} - 101 | f | {1,0,1} - 110 | f | {1,1,0} - 011 | t | {0,1,1} - 100 | f | {1,0,0} - 001 | t | {0,0,1} - 010 | f | {0,1,0} - 000 | t | {0,0,0} - -begin; -INSERT INTO test2 (v) VALUES ('{1,4}'); -INSERT INTO test2 (v) VALUES ('{2,4}'); -CREATE INDEX test2_cos ON test2 USING lantern_hnsw(v dist_cos_ops); -INFO: done init usearch index -INFO: inserted 3 elements -INFO: done saving 3 vectors -EXPLAIN (COSTS false) SELECT * from test2 ORDER BY v '{1,4}'; - QUERY PLAN --------------------------------------------- - Sort - Sort Key: (cos_dist(v, '{1,4}'::real[])) - -> Seq Scan on test2 - --- Some additional cases that trigger operator rewriting --- SampleScan -EXPLAIN (COSTS false) SELECT * FROM small_world TABLESAMPLE BERNOULLI (20) ORDER BY v '{1,1,1}' ASC; - QUERY PLAN ------------------------------------------------ - Sort - Sort Key: (l2sq_dist(v, '{1,1,1}'::real[])) - -> Sample Scan on small_world - Sampling: bernoulli ('20'::real) - --- can't compare direct equality here because it's random -SELECT results_match('EXPLAIN SELECT * FROM small_world TABLESAMPLE BERNOULLI (20) ORDER BY v ''{1,1,1}'' ASC', - 'EXPLAIN SELECT * FROM small_world TABLESAMPLE BERNOULLI (20) ORDER BY l2sq_dist(v, ''{1,1,1}'') ASC'); - results_match ---------------- - t - --- SetOpt/HashSetOp -EXPLAIN (COSTS false) (SELECT * FROM small_world ORDER BY v '{1,0,1}' ASC ) EXCEPT (SELECT * FROM small_world ORDER by v '{1,1,1}' ASC LIMIT 5); - QUERY PLAN -------------------------------------------------------------------------------------- - HashSetOp Except - -> Append - -> Subquery Scan on "*SELECT* 1" - -> Sort - Sort Key: (l2sq_dist(small_world.v, '{1,0,1}'::real[])) - -> Seq Scan on small_world - -> Subquery Scan on "*SELECT* 2" - -> Limit - -> Sort - Sort Key: (l2sq_dist(small_world_1.v, '{1,1,1}'::real[])) - -> Seq Scan on small_world small_world_1 - -SELECT results_match('(SELECT * FROM small_world ORDER BY v ''{1,0,1}'' ASC ) EXCEPT (SELECT * FROM small_world ORDER by v ''{1,1,1}'' ASC LIMIT 5)', - '(SELECT * FROM small_world ORDER BY l2sq_dist(v, ''{1,0,1}'') ASC ) EXCEPT (SELECT * FROM small_world ORDER by l2sq_dist(v, ''{1,1,1}'') ASC LIMIT 5)'); - results_match ---------------- - t - --- HashAggregate -EXPLAIN (COSTS false) SELECT v, COUNT(*) FROM small_world GROUP BY v ORDER BY v '{1,1,1}'; - QUERY PLAN ------------------------------------------------ - Sort - Sort Key: (l2sq_dist(v, '{1,1,1}'::real[])) - -> HashAggregate - Group Key: v - -> Seq Scan on small_world - -SELECT results_match('SELECT v, COUNT(*) FROM small_world GROUP BY v ORDER BY v ''{1,1,1}''', - 'SELECT v, COUNT(*) FROM small_world GROUP BY v ORDER BY l2sq_dist(v, ''{1,1,1}'')'); - results_match ---------------- - t - --- GroupBy this -EXPLAIN (COSTS false) SELECT * FROM small_world GROUP BY id, v, b ORDER BY v '{1,1,1}'; - QUERY PLAN ------------------------------------------------ - Sort - Sort Key: (l2sq_dist(v, '{1,1,1}'::real[])) - -> HashAggregate - Group Key: id, v, b - -> Seq Scan on small_world - -SELECT results_match('SELECT * FROM small_world GROUP BY id, v, b ORDER BY v ''{1,1,1}''', - 'SELECT * FROM small_world GROUP BY id, v, b ORDER BY l2sq_dist(v, ''{1,1,1}'')'); - results_match ---------------- - t - --- HashJoin/Hash -CREATE TABLE small_world_2 AS (SELECT * FROM small_world); -EXPLAIN (COSTS false) SELECT * FROM small_world JOIN small_world_2 using (v) ORDER BY v '{1,1,1}'; - QUERY PLAN ------------------------------------------------------------ - Sort - Sort Key: (l2sq_dist(small_world.v, '{1,1,1}'::real[])) - -> Hash Join - Hash Cond: (small_world_2.v = small_world.v) - -> Seq Scan on small_world_2 - -> Hash - -> Seq Scan on small_world - -SELECT results_match('SELECT * FROM small_world JOIN small_world_2 using (v) ORDER BY v ''{1,1,1}''', - 'SELECT * FROM small_world JOIN small_world_2 using (v) ORDER BY l2sq_dist(v, ''{1,1,1}'')'); - results_match ---------------- - t - --- MixedAggregate (this doesn't require additional logic, but I include it here as an example of generating the path) -EXPLAIN (COSTS false) SELECT v FROM small_world GROUP BY ROLLUP(v) ORDER BY v '{1,1,1}'; - QUERY PLAN ------------------------------------------------ - Sort - Sort Key: (l2sq_dist(v, '{1,1,1}'::real[])) - -> MixedAggregate - Hash Key: v - Group Key: () - -> Seq Scan on small_world - -SELECT results_match('SELECT v FROM small_world GROUP BY ROLLUP(v) ORDER BY v ''{1,1,1}''', - 'SELECT v FROM small_world GROUP BY ROLLUP(v) ORDER BY l2sq_dist(v, ''{1,1,1}'')'); - results_match ---------------- - t - --- WindowAgg -EXPLAIN (COSTS false) SELECT v, EVERY(b) OVER () FROM small_world ORDER BY v '{1,1,1}'; - QUERY PLAN ------------------------------------------------ - Sort - Sort Key: (l2sq_dist(v, '{1,1,1}'::real[])) - -> WindowAgg - -> Seq Scan on small_world - -SELECT results_match('SELECT v, EVERY(b) OVER () FROM small_world ORDER BY v ''{1,1,1}''', - 'SELECT v, EVERY(b) OVER () FROM small_world ORDER BY l2sq_dist(v, ''{1,1,1}'')'); - results_match ---------------- - t - --- LockRows -EXPLAIN (COSTS false) SELECT * FROM small_world ORDER BY v '{1,1,1}' ASC FOR UPDATE; - QUERY PLAN ------------------------------------------------------ - LockRows - -> Sort - Sort Key: (l2sq_dist(v, '{1,1,1}'::real[])) - -> Seq Scan on small_world - -SELECT results_match('SELECT * FROM small_world ORDER BY v ''{1,1,1}'' ASC FOR UPDATE', - 'SELECT * FROM small_world ORDER BY l2sq_dist(v, ''{1,1,1}'') ASC FOR UPDATE'); - results_match ---------------- - t - -rollback; set enable_indexscan = true; set enable_seqscan = false; -- test pagination in face of duplicates diff --git a/lantern_hnsw/test/expected/hnsw_todo.out b/lantern_hnsw/test/expected/hnsw_todo.out index 6cecd9231..6e7248d5b 100644 --- a/lantern_hnsw/test/expected/hnsw_todo.out +++ b/lantern_hnsw/test/expected/hnsw_todo.out @@ -15,7 +15,6 @@ INSERT INTO small_world_l2 (id, vector) VALUES ('110', '{1,1,0}'), ('111', '{1,1,1}'); SET enable_seqscan=FALSE; -SET lantern.pgvector_compat=FALSE; \set ON_ERROR_STOP off CREATE INDEX ON small_world_l2 USING lantern_hnsw (vector dist_l2sq_ops); INFO: done init usearch index @@ -33,19 +32,6 @@ CREATE INDEX ON small_world_l2 USING lantern_hnsw (vector_int dist_l2sq_int_ops) ERROR: operator class "dist_l2sq_int_ops" does not exist for access method "lantern_hnsw" SELECT _lantern_internal.validate_index('small_world_l2_vector_int_idx', false); ERROR: relation "small_world_l2_vector_int_idx" does not exist at character 41 --- this should use index -EXPLAIN (COSTS FALSE) -SELECT id, ROUND(l2sq_dist(vector_int, array[0,1,0])::numeric, 2) as dist -FROM small_world_l2 -ORDER BY vector_int array[0,1,0] LIMIT 7; - QUERY PLAN ------------------------------------------------------------------------ - Limit - -> Result - -> Sort - Sort Key: (l2sq_dist(vector_int, '{0,1,0}'::integer[])) - -> Seq Scan on small_world_l2 - --- Test scenarious --- ----------------------------------------- -- Case: @@ -77,7 +63,7 @@ INFO: validate_index() done, no issues found. -- The 1001 and 1002 vectors will be ignored in search, so the first row will not be 0 in result -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 1; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 1; round ----------- 249249.00 @@ -108,36 +94,11 @@ INFO: validate_index() done, no issues found. -- The first row will not be 0 now as the vector under id=777 was updated to 1,1,1,1... but it was indexed with different vector -- So the usearch index can not find 1,1,1,1,1.. vector in the index and wrong results will be returned -- This is an expected behaviour for now -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 1; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 1; round ----------- 249249.00 ----- Query on expression based index is failing to check correct operator usage -------- -CREATE OR REPLACE FUNCTION int_to_fixed_binary_real_array(n INT) RETURNS REAL[] AS $$ -DECLARE - binary_string TEXT; - real_array REAL[] := '{}'; - i INT; -BEGIN - binary_string := lpad(CAST(n::BIT(3) AS TEXT), 3, '0'); - FOR i IN 1..length(binary_string) - LOOP - real_array := array_append(real_array, CAST(substring(binary_string, i, 1) AS REAL)); - END LOOP; - RETURN real_array; -END; -$$ LANGUAGE plpgsql IMMUTABLE; CREATE TABLE test_table (id INTEGER); INSERT INTO test_table VALUES (0), (1), (7); \set enable_seqscan = off; --- This currently results in an error about using the operator outside of index --- This case should be fixed -SELECT id FROM test_table ORDER BY int_to_fixed_binary_real_array(id) '{0,0,0}'::REAL[] LIMIT 2; -ERROR: Operator can only be used inside of an index --- =========== THIS CAUSES SERVER CRASH =============== - --- create extension lantern_extras; --- select v as v777 from sift_base1k where id = 777 \gset --- set lantern.pgvector_compat=false; --- select lantern_create_external_index('v', 'sift_base1k', 'public', 'cos', 128, 10, 10, 10, 'hnsw_cos_index'); --- ===================================================== - diff --git a/lantern_hnsw/test/expected/hnsw_vector.out b/lantern_hnsw/test/expected/hnsw_vector.out index fd78e445a..d62a20341 100644 --- a/lantern_hnsw/test/expected/hnsw_vector.out +++ b/lantern_hnsw/test/expected/hnsw_vector.out @@ -7,7 +7,6 @@ DROP EXTENSION IF EXISTS lantern; CREATE EXTENSION IF NOT EXISTS vector; CREATE EXTENSION lantern; RESET client_min_messages; -SET lantern.pgvector_compat=FALSE; -- Verify basic functionality of pgvector SELECT '[1,2,3]'::vector; vector @@ -27,7 +26,7 @@ INFO: done init usearch index INFO: inserted 3 elements INFO: done saving 3 vectors INSERT INTO items (trait_ai) VALUES ('[10,10,10]'), (NULL); -SELECT * FROM items ORDER BY trait_ai '[0,0,0]' LIMIT 3; +SELECT * FROM items ORDER BY trait_ai <-> '[0,0,0]' LIMIT 3; id | trait_ai ----+---------- 1 | [1,2,3] @@ -71,7 +70,7 @@ INSERT INTO small_world (v) VALUES ('[99,99,2]'); INSERT INTO small_world (v) VALUES (NULL); -- Distance functions SELECT ROUND(l2sq_dist(v, '[0,1,0]'::VECTOR)::numeric, 2) as dist -FROM small_world ORDER BY v '[0,1,0]'::VECTOR LIMIT 7; +FROM small_world ORDER BY v <-> '[0,1,0]'::VECTOR LIMIT 7; dist ------ 0.00 @@ -83,15 +82,15 @@ FROM small_world ORDER BY v '[0,1,0]'::VECTOR LIMIT 7; 2.00 EXPLAIN (COSTS FALSE) SELECT ROUND(l2sq_dist(v, '[0,1,0]'::VECTOR)::numeric, 2) as dist -FROM small_world ORDER BY v '[0,1,0]'::VECTOR LIMIT 7; +FROM small_world ORDER BY v <-> '[0,1,0]'::VECTOR LIMIT 7; QUERY PLAN --------------------------------------------------------- Limit -> Index Scan using small_world_v_idx on small_world - Order By: (v '[0,1,0]'::vector) + Order By: (v <-> '[0,1,0]'::vector) SELECT ROUND(l2sq_dist(v, '[0,1,0]'::VECTOR)::numeric, 2) as dist -FROM small_world ORDER BY v '[0,1,0]'::VECTOR LIMIT 7; +FROM small_world ORDER BY v <-> '[0,1,0]'::VECTOR LIMIT 7; dist ------ 0.00 @@ -103,12 +102,12 @@ FROM small_world ORDER BY v '[0,1,0]'::VECTOR LIMIT 7; 2.00 EXPLAIN (COSTS FALSE) SELECT ROUND(l2sq_dist(v, '[0,1,0]'::VECTOR)::numeric, 2) as dist -FROM small_world ORDER BY v '[0,1,0]'::VECTOR LIMIT 7; +FROM small_world ORDER BY v <-> '[0,1,0]'::VECTOR LIMIT 7; QUERY PLAN --------------------------------------------------------- Limit -> Index Scan using small_world_v_idx on small_world - Order By: (v '[0,1,0]'::vector) + Order By: (v <-> '[0,1,0]'::vector) -- Verify that index creation on a large vector produces an error CREATE TABLE large_vector (v VECTOR(2001)); @@ -127,24 +126,24 @@ INFO: done init usearch index INFO: inserted 10000 elements INFO: done saving 10000 vectors SELECT v AS v4444 FROM sift_base10k WHERE id = 4444 \gset -EXPLAIN (COSTS FALSE) SELECT * FROM sift_base10k ORDER BY v :'v4444' LIMIT 10; +EXPLAIN (COSTS FALSE) SELECT * FROM sift_base10k ORDER BY v <-> :'v4444' LIMIT 10; QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Limit -> Index Scan using hnsw_idx on sift_base10k - Order By: (v '[55,61,11,4,5,2,13,24,65,49,13,9,23,37,94,38,54,11,14,14,40,31,50,44,53,4,0,0,27,17,8,34,12,10,4,4,22,52,68,53,9,2,0,0,2,116,119,64,119,2,0,0,2,30,119,119,116,5,0,8,47,9,5,60,7,7,10,23,56,50,23,5,28,68,6,18,24,65,50,9,119,75,3,0,1,8,12,85,119,11,4,6,8,9,5,74,25,11,8,20,18,12,2,21,11,90,25,32,33,15,2,9,84,67,8,4,22,31,11,33,119,30,3,6,0,0,0,26]'::vector) + Order By: (v <-> '[55,61,11,4,5,2,13,24,65,49,13,9,23,37,94,38,54,11,14,14,40,31,50,44,53,4,0,0,27,17,8,34,12,10,4,4,22,52,68,53,9,2,0,0,2,116,119,64,119,2,0,0,2,30,119,119,116,5,0,8,47,9,5,60,7,7,10,23,56,50,23,5,28,68,6,18,24,65,50,9,119,75,3,0,1,8,12,85,119,11,4,6,8,9,5,74,25,11,8,20,18,12,2,21,11,90,25,32,33,15,2,9,84,67,8,4,22,31,11,33,119,30,3,6,0,0,0,26]'::vector) -- Ensure we can query an index for more elements than the value of init_k SET lantern_hnsw.init_k = 4; WITH neighbors AS ( - SELECT * FROM small_world order by v '[1,0,0]' LIMIT 3 + SELECT * FROM small_world order by v <-> '[1,0,0]' LIMIT 3 ) SELECT COUNT(*) from neighbors; count ------- 3 WITH neighbors AS ( - SELECT * FROM small_world order by v '[1,0,0]' LIMIT 15 + SELECT * FROM small_world order by v <-> '[1,0,0]' LIMIT 15 ) SELECT COUNT(*) from neighbors; count ------- @@ -152,11 +151,8 @@ WITH neighbors AS ( RESET client_min_messages; \set ON_ERROR_STOP off --- Expect error due to improper use of the operator outside of its supported context -SELECT ARRAY[1,2,3] ARRAY[3,2,1]; -ERROR: Operator is invalid outside of ORDER BY context -- Expect error due to mismatching vector dimensions -SELECT 1 FROM small_world ORDER BY v '[0,1,0,1]' LIMIT 1; +SELECT 1 FROM small_world ORDER BY v <-> '[0,1,0,1]' LIMIT 1; ERROR: Expected vector with dimension 3, got 4 SELECT l2sq_dist('[1,1]'::vector, '[0,1,0]'::vector); ERROR: expected equally sized vectors but got vectors with dimensions 2 and 3 @@ -188,13 +184,13 @@ CREATE INDEX l2_idx ON small_world_arr USING lantern_hnsw(v) WITH (dim=3, m=2); INFO: done init usearch index INFO: inserted 3 elements INFO: done saving 3 vectors -EXPLAIN (COSTS FALSE) SELECT id FROM small_world_arr ORDER BY v ARRAY[0,0,0]; +EXPLAIN (COSTS FALSE) SELECT id FROM small_world_arr ORDER BY v <-> ARRAY[0,0,0]; QUERY PLAN -------------------------------------------- Index Scan using l2_idx on small_world_arr - Order By: (v '{0,0,0}'::real[]) + Order By: (v <-> '{0,0,0}'::real[]) -SELECT id FROM small_world_arr ORDER BY v ARRAY[0,0,0]; +SELECT id FROM small_world_arr ORDER BY v <-> ARRAY[0,0,0]; id ---- 1 @@ -206,7 +202,7 @@ CREATE INDEX cos_idx ON small_world_arr USING lantern_hnsw(v) WITH (m=2); INFO: done init usearch index INFO: inserted 3 elements INFO: done saving 3 vectors -SELECT id FROM small_world_arr ORDER BY v ARRAY[0,0,0]; +SELECT id FROM small_world_arr ORDER BY v <=> ARRAY[0,0,0]; id ---- 1 @@ -218,14 +214,13 @@ CREATE INDEX ham_idx ON small_world_arr USING lantern_hnsw(v) WITH (m=3); INFO: done init usearch index INFO: inserted 3 elements INFO: done saving 3 vectors -SELECT id FROM small_world_arr ORDER BY v ARRAY[0,0,0]; +SELECT id FROM small_world_arr ORDER BY v::INT[] <+> ARRAY[0,0,0]; id ---- 1 2 3 --- Test pgvector in lantern.pgvector_compat=TRUE mode DROP TABLE small_world; \ir utils/small_world_vector.sql CREATE TABLE small_world ( @@ -243,7 +238,6 @@ INSERT INTO small_world (id, b, v) VALUES ('110', FALSE, '[1,1,0]'), ('111', TRUE, '[1,1,1]'); -- Distance functions -SET lantern.pgvector_compat=TRUE; SET enable_seqscan=OFF; -- Note: -- For l2sqs and cosine distances in SELECT statement diff --git a/lantern_hnsw/test/sql/hnsw_correct.sql b/lantern_hnsw/test/sql/hnsw_correct.sql index 53a99e22f..b2298f19e 100644 --- a/lantern_hnsw/test/sql/hnsw_correct.sql +++ b/lantern_hnsw/test/sql/hnsw_correct.sql @@ -12,7 +12,6 @@ INSERT INTO small_world (v) VALUES ('{0,0}'), ('{1,1}'), ('{2,2}'), ('{3,3}'); -- Create index CREATE INDEX ON small_world USING lantern_hnsw (v dist_l2sq_ops) WITH (dim=2, M=4); SET enable_seqscan=FALSE; -SET lantern.pgvector_compat=FALSE; -- Get the results without the index @@ -27,7 +26,7 @@ FROM -- Get the results with the index CREATE TEMP TABLE results_w_index AS SELECT - ROW_NUMBER() OVER (ORDER BY v '{0,0}') AS row_num, + ROW_NUMBER() OVER (ORDER BY v <-> '{0,0}') AS row_num, id, l2sq_dist(v, '{0,0}') AS dist FROM diff --git a/lantern_hnsw/test/sql/hnsw_cost_estimate.sql b/lantern_hnsw/test/sql/hnsw_cost_estimate.sql index c9c0d85f2..d11d66b4b 100644 --- a/lantern_hnsw/test/sql/hnsw_cost_estimate.sql +++ b/lantern_hnsw/test/sql/hnsw_cost_estimate.sql @@ -50,19 +50,17 @@ BEGIN END; $$ LANGUAGE plpgsql; -SET lantern.pgvector_compat=FALSE; - -- Goal: make sure query cost estimate is accurate -- when index is created with varying costruction parameters. SELECT v AS v4444 FROM sift_base10k WHERE id = 4444 \gset -\set explain_query_template 'EXPLAIN SELECT * FROM sift_base10k ORDER BY v ''%s'' LIMIT 10' +\set explain_query_template 'EXPLAIN SELECT * FROM sift_base10k ORDER BY v <-> ''%s'' LIMIT 10' \set enable_seqscan = off; -- Case 0, sanity check. No data. CREATE TABLE empty_table(id SERIAL PRIMARY KEY, v REAL[2]); CREATE INDEX empty_idx ON empty_table USING lantern_hnsw (v dist_l2sq_ops) WITH (M=2, ef_construction=10, ef=2, dim=2); SET _lantern_internal.is_test = true; -SELECT is_cost_estimate_within_error('EXPLAIN SELECT * FROM empty_table ORDER BY v ''{1,2}'' LIMIT 10', 0.47); +SELECT is_cost_estimate_within_error('EXPLAIN SELECT * FROM empty_table ORDER BY v <-> ''{1,2}'' LIMIT 10', 0.47); SELECT _lantern_internal.validate_index('empty_idx', false); DROP INDEX empty_idx; diff --git a/lantern_hnsw/test/sql/hnsw_create.sql b/lantern_hnsw/test/sql/hnsw_create.sql index a4bd6d5ea..40f26fecc 100644 --- a/lantern_hnsw/test/sql/hnsw_create.sql +++ b/lantern_hnsw/test/sql/hnsw_create.sql @@ -19,11 +19,10 @@ SELECT _lantern_internal.validate_index('sift_base1k_int_v_idx', false); -- Validate that index creation works with a larger number of vectors \ir utils/sift10k_array.sql -SET lantern.pgvector_compat=FALSE; CREATE INDEX hnsw_idx ON sift_base10k USING lantern_hnsw (v dist_l2sq_ops) WITH (M=2, ef_construction=10, ef=4, dim=128); SELECT v AS v4444 FROM sift_base10k WHERE id = 4444 \gset -EXPLAIN (COSTS FALSE) SELECT * FROM sift_base10k order by v :'v4444' LIMIT 10; +EXPLAIN (COSTS FALSE) SELECT * FROM sift_base10k order by v <-> :'v4444' LIMIT 10; SELECT _lantern_internal.validate_index('hnsw_idx', false); --- Validate that M values inside the allowed range [2, 128] do not throw an error diff --git a/lantern_hnsw/test/sql/hnsw_create_expr.sql b/lantern_hnsw/test/sql/hnsw_create_expr.sql index c51bf4015..20b408124 100644 --- a/lantern_hnsw/test/sql/hnsw_create_expr.sql +++ b/lantern_hnsw/test/sql/hnsw_create_expr.sql @@ -69,8 +69,6 @@ INSERT INTO test_table VALUES (0), (1), (7); \set enable_seqscan = off; SET enable_seqscan = false; -SET lantern.pgvector_compat=FALSE; - -- This should success CREATE INDEX ON test_table USING lantern_hnsw (int_to_fixed_binary_real_array(id)) WITH (M=2); @@ -86,4 +84,4 @@ CREATE INDEX ON test_table USING lantern_hnsw (int_to_string(id)) WITH (M=2); -- This should result in error about multicolumn expressions support CREATE INDEX ON test_table USING lantern_hnsw (int_to_fixed_binary_real_array(id), int_to_dynamic_binary_real_array(id)) WITH (M=2); -SELECT id FROM test_table ORDER BY int_to_fixed_binary_real_array(id) '{0,0,0}'::REAL[] LIMIT 2; +SELECT id FROM test_table ORDER BY int_to_fixed_binary_real_array(id) <-> '{0,0,0}'::REAL[] LIMIT 2; diff --git a/lantern_hnsw/test/sql/hnsw_create_unlogged.sql b/lantern_hnsw/test/sql/hnsw_create_unlogged.sql index 0e56c9ff6..09b1f763e 100644 --- a/lantern_hnsw/test/sql/hnsw_create_unlogged.sql +++ b/lantern_hnsw/test/sql/hnsw_create_unlogged.sql @@ -13,11 +13,10 @@ SELECT _lantern_internal.validate_index('sift_base1k_v_idx', false); -- Validate that index creation works with a larger number of vectors \ir utils/sift10k_array_unlogged.sql -SET lantern.pgvector_compat=FALSE; CREATE INDEX hnsw_idx ON sift_base10k USING lantern_hnsw (v dist_l2sq_ops) WITH (M=2, ef_construction=10, ef=4, dim=128); SELECT v AS v4444 FROM sift_base10k WHERE id = 4444 \gset -EXPLAIN (COSTS FALSE) SELECT * FROM sift_base10k order by v :'v4444' LIMIT 10; +EXPLAIN (COSTS FALSE) SELECT * FROM sift_base10k order by v <-> :'v4444' LIMIT 10; SELECT _lantern_internal.validate_index('hnsw_idx', false); --- Validate that M values inside the allowed range [2, 128] do not throw an error diff --git a/lantern_hnsw/test/sql/hnsw_dist_func.sql b/lantern_hnsw/test/sql/hnsw_dist_func.sql index d47981915..8d8474345 100644 --- a/lantern_hnsw/test/sql/hnsw_dist_func.sql +++ b/lantern_hnsw/test/sql/hnsw_dist_func.sql @@ -17,12 +17,11 @@ INSERT INTO small_world_cos SELECT id, v FROM small_world; INSERT INTO small_world_ham SELECT id, ARRAY[CAST(v[1] AS INTEGER), CAST(v[2] AS INTEGER), CAST(v[3] AS INTEGER)] FROM small_world; SET enable_seqscan=FALSE; -SET lantern.pgvector_compat=FALSE; -- Verify that the distance functions work (check distances) -SELECT ROUND(l2sq_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_l2 ORDER BY v '{0,1,0}'; -SELECT ROUND(cos_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_cos ORDER BY v '{0,1,0}'; -SELECT ROUND(hamming_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_ham ORDER BY v '{0,1,0}'; +SELECT ROUND(l2sq_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_l2 ORDER BY v <-> '{0,1,0}'; +SELECT ROUND(cos_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_cos ORDER BY v <=> '{0,1,0}'; +SELECT ROUND(hamming_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_ham ORDER BY v <+> '{0,1,0}'; -- Verify that the distance functions work (check IDs) SELECT ARRAY_AGG(id ORDER BY id), ROUND(l2sq_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_l2 GROUP BY 2 ORDER BY 2; @@ -30,26 +29,22 @@ SELECT ARRAY_AGG(id ORDER BY id), ROUND(cos_dist(v, '{0,1,0}')::numeric, 2) FROM SELECT ARRAY_AGG(id ORDER BY id), ROUND(hamming_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_ham GROUP BY 2 ORDER BY 2; -- Verify that the indexes is being used -EXPLAIN (COSTS false) SELECT id FROM small_world_l2 ORDER BY v '{0,1,0}'; -EXPLAIN (COSTS false) SELECT id FROM small_world_cos ORDER BY v '{0,1,0}'; -EXPLAIN (COSTS false) SELECT id FROM small_world_ham ORDER BY v '{0,1,0}'; +EXPLAIN (COSTS false) SELECT id FROM small_world_l2 ORDER BY v <-> '{0,1,0}'; +EXPLAIN (COSTS false) SELECT id FROM small_world_cos ORDER BY v <=> '{0,1,0}'; +EXPLAIN (COSTS false) SELECT id FROM small_world_ham ORDER BY v <+> '{0,1,0}'; \set ON_ERROR_STOP off -- Expect errors due to mismatching vector dimensions -SELECT 1 FROM small_world_l2 ORDER BY v '{0,1,0,1}' LIMIT 1; -SELECT 1 FROM small_world_cos ORDER BY v '{0,1,0,1}' LIMIT 1; -SELECT 1 FROM small_world_ham ORDER BY v '{0,1,0,1}' LIMIT 1; +SELECT 1 FROM small_world_l2 ORDER BY v <-> '{0,1,0,1}' LIMIT 1; +SELECT 1 FROM small_world_cos ORDER BY v <=> '{0,1,0,1}' LIMIT 1; +SELECT 1 FROM small_world_ham ORDER BY v <+> '{0,1,0,1}' LIMIT 1; SELECT l2sq_dist('{1,1}'::REAL[], '{0,1,0}'::REAL[]); SELECT cos_dist('{1,1}'::real[], '{0,1,0}'::real[]); -- the one below is umbiguous if pgvector's vector type is present SELECT cos_dist('{1,1}', '{0,1,0}'); SELECT hamming_dist('{1,1}', '{0,1,0}'); --- Expect errors due to improper use of the operator outside of its supported context -SELECT ARRAY[1,2,3] ARRAY[3,2,1]; -SELECT ROUND((v ARRAY[0,1,0])::numeric, 2) FROM small_world_cos ORDER BY v '{0,1,0}' LIMIT 7; -SELECT ROUND((v ARRAY[0,1,0])::numeric, 2) FROM small_world_ham ORDER BY v '{0,1,0}' LIMIT 7; \set ON_ERROR_STOP on @@ -63,67 +58,6 @@ INSERT INTO test2 (v) VALUES ('{5,4}'); SELECT 0 + 1; SELECT 1 FROM test1 WHERE id = 0 + 1; -\set ON_ERROR_STOP off - --- Expect errors due to incorrect usage -INSERT INTO test1 (v) VALUES (ARRAY['{1,2}'::REAL[] '{4,2}'::REAL[], 0]); -SELECT v '{1,2}' FROM test1 ORDER BY v '{1,3}'; -SELECT v '{1,2}' FROM test1; -WITH temp AS (SELECT v '{1,2}' FROM test1) SELECT 1 FROM temp; -SELECT t.res FROM (SELECT v '{1,2}' AS res FROM test1) t; -SELECT (SELECT v '{1,2}' FROM test1 LIMIT 1) FROM test1; -SELECT COALESCE(v '{1,2}', 0) FROM test1; -SELECT EXISTS (SELECT v '{1,2}' FROM test1); -SELECT test1.v test2.v FROM test1 JOIN test2 USING (id); -SELECT v '{1,2}' FROM test1 UNION SELECT v '{1,3}' FROM test1; -(SELECT v '{1,2}' FROM test1 WHERE id < 5) UNION (SELECT v '{1,3}' FROM test1 WHERE id >= 5); -SELECT MAX(v '{1,2}') FROM test1; -SELECT * FROM test1 JOIN test2 ON test1.v test2.v < 0.5; -SELECT test1.v FROM test1 JOIN test2 ON test1.v '{1,2}' = test2.v '{1,3}'; -SELECT (v '{1,2}') + (v '{1,3}') FROM test1; -SELECT CASE WHEN v '{1,2}' > 1 THEN 'High' ELSE 'Low' END FROM test1; -INSERT INTO test1 (v) VALUES ('{2,3}') RETURNING v '{1,2}'; -SELECT 1 FROM test1 GROUP BY v '{1,3}'; -SELECT 1 FROM test1 ORDER BY (('{1,2}'::real[] '{3,4}'::real[]) - 0); -SELECT 1 FROM test1 ORDER BY '{1,2}'::REAL[] '{3,4}'::REAL[]; -SELECT 1 FROM test1 ORDER BY v ARRAY[(SELECT '{1,4}'::REAL[] '{4,2}'::REAL[]), 3]; - --- Expect errors due to index not existing -SELECT id FROM test1 ORDER BY v '{1,2}'; -SELECT 1 FROM test1 ORDER BY v (SELECT '{1,3}'::real[]); -SELECT t2_results.id FROM test1 t1 JOIN LATERAL (SELECT t2.id FROM test2 t2 ORDER BY t1.v t2.v LIMIT 1) t2_results ON TRUE; -WITH t AS (SELECT id FROM test1 ORDER BY v '{1,2}' LIMIT 1) SELECT DISTINCT id FROM t; -WITH t AS (SELECT id FROM test1 ORDER BY v '{1,2}' LIMIT 1) SELECT id, COUNT(*) FROM t GROUP BY 1; -WITH t AS (SELECT id FROM test1 ORDER BY v '{1,2}') SELECT id FROM t UNION SELECT id FROM t; - --- issue #227 -SELECT * from test2 JOIN LATERAL (SELECT * FROM (SELECT id FROM test2 ORDER BY v '{1,2}') as forall) haha on TRUE; --- more complex setup of the above -SELECT forall.id, nearest_per_id.* FROM -(SELECT * FROM - test2) AS forall - JOIN LATERAL ( - SELECT - ARRAY_AGG(id ORDER BY id) AS near_ids, - ARRAY_AGG(dist ORDER BY id) AS near_dists - FROM - ( - SELECT - id, - l2sq_dist(v, forall.v) as dist - FROM - test2 - ORDER BY - v forall.v - LIMIT - 5 - ) as __unused_name - ) nearest_per_id on TRUE -ORDER BY - forall.id -LIMIT - 9; - \set ON_ERROR_STOP on -- cross-lateral joins work as expected when appropriate index exists -- nearest element for each vector @@ -145,7 +79,7 @@ SELECT forall.id, nearest_per_id.* FROM FROM small_world_l2 ORDER BY - v forall.v + v <-> forall.v LIMIT 4 ) as __unused_name @@ -162,7 +96,7 @@ CREATE TABLE extra_small_world_ham ( ); INSERT INTO extra_small_world_ham (v) VALUES ('{0,0}'), ('{1,1}'), ('{2,2}'), ('{3,3}'); CREATE INDEX ON extra_small_world_ham USING lantern_hnsw (v dist_hamming_ops) WITH (dim=2); -SELECT ROUND(hamming_dist(v, '{0,0}')::numeric, 2) FROM extra_small_world_ham ORDER BY v '{0,0}'; +SELECT ROUND(hamming_dist(v, '{0,0}')::numeric, 2) FROM extra_small_world_ham ORDER BY v <+> '{0,0}'; SELECT _lantern_internal.validate_index('small_world_l2_v_idx', false); SELECT _lantern_internal.validate_index('small_world_cos_v_idx', false); diff --git a/lantern_hnsw/test/sql/hnsw_ef_search.sql b/lantern_hnsw/test/sql/hnsw_ef_search.sql index 57b563037..d5196f11d 100644 --- a/lantern_hnsw/test/sql/hnsw_ef_search.sql +++ b/lantern_hnsw/test/sql/hnsw_ef_search.sql @@ -21,37 +21,36 @@ SET lantern_hnsw.ef = 401; -- Repeat the same query while varying ef parameter -- NOTE: it is not entirely known if the results of these are deterministic SET enable_seqscan=FALSE; -SET lantern.pgvector_compat=FALSE; SELECT v AS v1001 FROM sift_base1k WHERE id = 1001 \gset -- Queries below have the same result SET lantern_hnsw.ef = 1; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; SET lantern_hnsw.ef = 2; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; SET lantern_hnsw.ef = 4; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; SET lantern_hnsw.ef = 8; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; SET lantern_hnsw.ef = 16; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; -- Queries below have the same result, which is different from above SET lantern_hnsw.ef = 32; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; SET lantern_hnsw.ef = 64; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; SET lantern_hnsw.ef = 128; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; SET lantern_hnsw.ef = 256; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; SET lantern_hnsw.ef = 400; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; diff --git a/lantern_hnsw/test/sql/hnsw_extras.sql b/lantern_hnsw/test/sql/hnsw_extras.sql index 62e510e64..6a52a70f3 100644 --- a/lantern_hnsw/test/sql/hnsw_extras.sql +++ b/lantern_hnsw/test/sql/hnsw_extras.sql @@ -23,30 +23,19 @@ SELECT _lantern_internal.validate_index('sift_base1k_v_idx', false); SELECT v AS v777 FROM sift_base1k WHERE id = 777 \gset -- Validate that using corresponding operator triggers index scan -SET lantern.pgvector_compat=TRUE; EXPLAIN (COSTS FALSE) SELECT id FROM sift_base1k order by v <-> :'v777' LIMIT 10; - -SET lantern.pgvector_compat=FALSE; -EXPLAIN (COSTS FALSE) SELECT id FROM sift_base1k order by v :'v777' LIMIT 10; -SET lantern.pgvector_compat=TRUE; DROP INDEX sift_base1k_v_idx; -- Create with params SELECT lantern_create_external_index('v', 'sift_base1k', 'public', 'cos', 128, 10, 10, 10, false, 'hnsw_cos_index'); SELECT _lantern_internal.validate_index('hnsw_cos_index', false); -SET lantern.pgvector_compat=TRUE; EXPLAIN (COSTS FALSE) SELECT id FROM sift_base1k order by v <=> :'v777' LIMIT 10; -SET lantern.pgvector_compat=FALSE; -EXPLAIN (COSTS FALSE) SELECT id FROM sift_base1k order by v :'v777' LIMIT 10; -SET lantern.pgvector_compat=TRUE; - -- -- Reindex external index SELECT lantern_reindex_external_index('hnsw_cos_index'); SELECT _lantern_internal.validate_index('hnsw_cos_index', false); -- Validate that using corresponding operator triggers index scan -SET lantern.pgvector_compat=TRUE; EXPLAIN (COSTS FALSE) SELECT id FROM sift_base1k order by v <=> :'v777' LIMIT 10; -- Create PQ Index @@ -61,5 +50,4 @@ SELECT lantern_create_external_index('v', 'sift_base1k', 'public', 'cos', 128, 1 SELECT _lantern_internal.validate_index('hnsw_cos_index_pq', false); SELECT lantern_reindex_external_index('hnsw_cos_index_pq'); SELECT _lantern_internal.validate_index('hnsw_cos_index_pq', false); -SET lantern.pgvector_compat=TRUE; EXPLAIN (COSTS FALSE) SELECT id FROM sift_base1k order by v <=> :'v777' LIMIT 10; diff --git a/lantern_hnsw/test/sql/hnsw_index_from_file.sql b/lantern_hnsw/test/sql/hnsw_index_from_file.sql index 16c0e129c..7f2924164 100644 --- a/lantern_hnsw/test/sql/hnsw_index_from_file.sql +++ b/lantern_hnsw/test/sql/hnsw_index_from_file.sql @@ -26,31 +26,21 @@ SELECT _lantern_internal.validate_index('hnsw_l2_index', false); SELECT * FROM ldb_get_indexes('sift_base1k'); SET enable_seqscan=FALSE; -SET lantern.pgvector_compat=FALSE; SELECT v AS v777 FROM sift_base1k WHERE id = 777 \gset -EXPLAIN (COSTS FALSE) SELECT ROUND(l2sq_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v :'v777' LIMIT 10; -SELECT ROUND(l2sq_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v :'v777' LIMIT 10; +EXPLAIN (COSTS FALSE) SELECT ROUND(l2sq_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v <-> :'v777' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v <-> :'v777' LIMIT 10; -- Validate that inserting rows on index created from file works as expected INSERT INTO sift_base1k (id, v) VALUES (1001, array_fill(1, ARRAY[128])), (1002, array_fill(2, ARRAY[128])); SELECT v AS v1001 FROM sift_base1k WHERE id = 1001 \gset -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; -- Drop and recreate table DROP TABLE sift_base1k CASCADE; \ir utils/sift1k_array.sql --- Validate that creating an index from file works with cosine distance function -CREATE INDEX hnsw_cos_index ON sift_base1k USING lantern_hnsw (v) WITH (_experimental_index_path='/tmp/lantern/files/index-sift1k-cos-0.3.0.usearch'); -SELECT _lantern_internal.validate_index('hnsw_cos_index', false); -SELECT * FROM ldb_get_indexes('sift_base1k'); - -SELECT v AS v777 FROM sift_base1k WHERE id = 777 \gset -EXPLAIN (COSTS FALSE) SELECT ROUND(cos_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v :'v777' LIMIT 10; -SELECT ROUND(cos_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v :'v777' LIMIT 10; - --- Test scenarious --- ----------------------------------------- @@ -64,7 +54,7 @@ DELETE FROM sift_base1k WHERE id=777; CREATE INDEX hnsw_l2_index ON sift_base1k USING lantern_hnsw (v) WITH (_experimental_index_path='/tmp/lantern/files/index-sift1k-l2sq-0.3.0.usearch'); SELECT _lantern_internal.validate_index('hnsw_l2_index', false); -- This should not throw error, but the first result will not be 0 as vector 777 is deleted from the table -SELECT ROUND(l2sq_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v :'v777' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v <-> :'v777' LIMIT 10; -- Should throw error when lantern_extras is not installed \set ON_ERROR_STOP off diff --git a/lantern_hnsw/test/sql/hnsw_insert.sql b/lantern_hnsw/test/sql/hnsw_insert.sql index 99bcf0bee..2e014c119 100644 --- a/lantern_hnsw/test/sql/hnsw_insert.sql +++ b/lantern_hnsw/test/sql/hnsw_insert.sql @@ -47,7 +47,6 @@ set work_mem = '10MB'; CREATE INDEX ON small_world USING lantern_hnsw (v) WITH (dim=3); SET enable_seqscan = false; -SET lantern.pgvector_compat = false; -- Inserting vectors of the same dimension and nulls should work INSERT INTO small_world (v) VALUES ('{1,1,2}'); @@ -64,7 +63,7 @@ SELECT FROM small_world ORDER BY - v '{0,0,0}'; + v <-> '{0,0,0}'; -- Ensure the index size remains consistent after inserts SELECT * from ldb_get_indexes('small_world'); @@ -76,7 +75,7 @@ SELECT FROM small_world ORDER BY - v '{0,0,0}' + v <-> '{0,0,0}' LIMIT 10; SELECT _lantern_internal.validate_index('small_world_v_idx', false); @@ -89,6 +88,6 @@ CREATE TABLE sift_base10k ( CREATE INDEX hnsw_idx ON sift_base10k USING lantern_hnsw (v dist_l2sq_ops) WITH (M=2, ef_construction=10, ef=4, dim=128); \COPY sift_base10k (v) FROM '/tmp/lantern/vector_datasets/siftsmall_base_arrays.csv' WITH CSV; SELECT v AS v4444 FROM sift_base10k WHERE id = 4444 \gset -EXPLAIN (COSTS FALSE) SELECT * FROM sift_base10k order by v :'v4444'; +EXPLAIN (COSTS FALSE) SELECT * FROM sift_base10k order by v <-> :'v4444'; SELECT _lantern_internal.validate_index('hnsw_idx', false); diff --git a/lantern_hnsw/test/sql/hnsw_insert_unlogged.sql b/lantern_hnsw/test/sql/hnsw_insert_unlogged.sql index ec7dff67b..f682297bc 100644 --- a/lantern_hnsw/test/sql/hnsw_insert_unlogged.sql +++ b/lantern_hnsw/test/sql/hnsw_insert_unlogged.sql @@ -47,7 +47,6 @@ set work_mem = '10MB'; CREATE INDEX ON small_world USING lantern_hnsw (v) WITH (dim=3); SET enable_seqscan = false; -SET lantern.pgvector_compat = false; -- Inserting vectors of the same dimension and nulls should work INSERT INTO small_world (v) VALUES ('{1,1,2}'); @@ -64,7 +63,7 @@ SELECT FROM small_world ORDER BY - v '{0,0,0}'; + v <-> '{0,0,0}'; -- Ensure the index size remains consistent after inserts SELECT * from ldb_get_indexes('small_world'); @@ -76,7 +75,7 @@ SELECT FROM small_world ORDER BY - v '{0,0,0}' + v <-> '{0,0,0}' LIMIT 10; SELECT _lantern_internal.validate_index('small_world_v_idx', false); @@ -89,6 +88,6 @@ CREATE UNLOGGED TABLE sift_base10k ( CREATE INDEX hnsw_idx ON sift_base10k USING lantern_hnsw (v dist_l2sq_ops) WITH (M=2, ef_construction=10, ef=4, dim=128); \COPY sift_base10k (v) FROM '/tmp/lantern/vector_datasets/siftsmall_base_arrays.csv' WITH CSV; SELECT v AS v4444 FROM sift_base10k WHERE id = 4444 \gset -EXPLAIN (COSTS FALSE) SELECT * FROM sift_base10k order by v :'v4444'; +EXPLAIN (COSTS FALSE) SELECT * FROM sift_base10k order by v <-> :'v4444'; SELECT _lantern_internal.validate_index('hnsw_idx', false); diff --git a/lantern_hnsw/test/sql/hnsw_operators.sql b/lantern_hnsw/test/sql/hnsw_operators.sql index 8134d3b0f..7f4150a17 100644 --- a/lantern_hnsw/test/sql/hnsw_operators.sql +++ b/lantern_hnsw/test/sql/hnsw_operators.sql @@ -1,15 +1,11 @@ --- Validate that lantern.pgvector_compat disables the operator rewriting hooks +\set ON_ERROR_STOP off CREATE TABLE op_test (v REAL[]); INSERT INTO op_test (v) VALUES (ARRAY[0,0,0]), (ARRAY[1,1,1]); CREATE INDEX cos_idx ON op_test USING lantern_hnsw(v dist_cos_ops); --- should rewrite operator -SET lantern.pgvector_compat=FALSE; -SELECT * FROM op_test ORDER BY v ARRAY[1,1,1]; -\set ON_ERROR_STOP off -SET lantern.pgvector_compat=TRUE; --- should throw error -SELECT * FROM op_test ORDER BY v ARRAY[1,1,1]; +-- Expect deprecation error due to use of the operator +SELECT ARRAY[1,2,3] ARRAY[3,2,1]; + -- should not throw error SELECT * FROM op_test ORDER BY v <=> ARRAY[1,1,1]; @@ -19,7 +15,6 @@ SELECT * FROM op_test ORDER BY v::INTEGER[] <+> ARRAY[1,1,1]; -- should not throw error SELECT v <-> ARRAY[1,1,1] FROM op_test ORDER BY v <-> ARRAY[1,1,1]; -SET lantern.pgvector_compat=FALSE; SET enable_seqscan=OFF; \set ON_ERROR_STOP on @@ -36,11 +31,6 @@ SELECT ARRAY[.1,0,0] <=> ARRAY[0,.5,0]; SELECT cos_dist(ARRAY[.1,0,0]::real[], ARRAY[0,.5,0]::real[]); SELECT ARRAY[1,0,0] <+> ARRAY[0,1,0]; --- NOW THIS IS TRIGGERING INDEX SCAN AS WELL --- BECAUSE WE ARE REGISTERING FOR ALL OPERATOR CLASSES --- IDEALLY THIS SHOULD NOT TRIGGER INDEX SCAN WHEN lantern.pgvector_compat=TRUE -EXPLAIN (COSTS FALSE) SELECT * FROM op_test ORDER BY v ARRAY[1,1,1]; - -- should sort with index EXPLAIN (COSTS FALSE) SELECT * FROM op_test ORDER BY v <=> ARRAY[1,1,1]; @@ -60,11 +50,7 @@ SELECT v <-> ARRAY[1,1,1] FROM op_test ORDER BY v <-> ARRAY[1,1,1]; RESET ALL; -- Set false twice to verify that no crash is happening -SET lantern.pgvector_compat=FALSE; \set ON_ERROR_STOP off --- should rewrite operator -SELECT * FROM op_test ORDER BY v ARRAY[1,1,1]; - SET enable_seqscan=OFF; CREATE INDEX hamming_idx ON op_test USING lantern_hnsw(cast(v as INTEGER[]) dist_hamming_ops); diff --git a/lantern_hnsw/test/sql/hnsw_select.sql b/lantern_hnsw/test/sql/hnsw_select.sql index 4f5a3be58..fbbe023ba 100644 --- a/lantern_hnsw/test/sql/hnsw_select.sql +++ b/lantern_hnsw/test/sql/hnsw_select.sql @@ -16,7 +16,6 @@ INSERT INTO test2 (v) VALUES ('{5,4}'); CREATE INDEX ON test1 USING lantern_hnsw (v); SET enable_seqscan=FALSE; -SET lantern.pgvector_compat=FALSE; -- Verify that basic queries still work given our query parser and planner hooks SELECT 0 + 1; @@ -24,101 +23,54 @@ SELECT 1 FROM test1 WHERE id = 0 + 1; -- Verify that the index is being used SET _lantern_internal.is_test = true; -EXPLAIN (COSTS FALSE) SELECT * FROM small_world order by v '{1,0,0}' LIMIT 1; +EXPLAIN (COSTS FALSE) SELECT * FROM small_world order by v <-> '{1,0,0}' LIMIT 1; -- Verify that this does not use the index EXPLAIN (COSTS FALSE) SELECT 1 FROM small_world WHERE v = '{0,0,0}'; -- Ensure we can query an index for more elements than the value of init_k WITH neighbors AS ( - SELECT * FROM small_world order by v '{1,0,0}' LIMIT 3 + SELECT * FROM small_world order by v <-> '{1,0,0}' LIMIT 3 ) SELECT COUNT(*) from neighbors; WITH neighbors AS ( - SELECT * FROM small_world order by v '{1,0,0}' LIMIT 15 + SELECT * FROM small_world order by v <-> '{1,0,0}' LIMIT 15 ) SELECT COUNT(*) from neighbors; -- Change default k and make sure the number of usearch_searchs makes sense SET lantern_hnsw.init_k = 4; WITH neighbors AS ( - SELECT * FROM small_world order by v '{1,0,0}' LIMIT 3 + SELECT * FROM small_world order by v <-> '{1,0,0}' LIMIT 3 ) SELECT COUNT(*) from neighbors; WITH neighbors AS ( - SELECT * FROM small_world order by v '{1,0,0}' LIMIT 15 + SELECT * FROM small_world order by v <-> '{1,0,0}' LIMIT 15 ) SELECT COUNT(*) from neighbors; RESET client_min_messages; SET _lantern_internal.is_test = false; -- Verify where condition works properly and still uses index -SELECT has_index_scan('EXPLAIN SELECT * FROM small_world WHERE b IS TRUE ORDER BY v ''{0,0,0}'''); +SELECT has_index_scan('EXPLAIN SELECT * FROM small_world WHERE b IS TRUE ORDER BY v <-> ''{0,0,0}'''); -- Verify that the index is not being used when there is no order by SELECT NOT has_index_scan('EXPLAIN SELECT COUNT(*) FROM small_world'); -- Verify swapping order doesn't change anything and still uses index -SELECT has_index_scan('EXPLAIN SELECT id FROM test1 ORDER BY ''{1,2}''::REAL[] v'); +SELECT has_index_scan('EXPLAIN SELECT id FROM test1 ORDER BY ''{1,2}''::REAL[] <-> v'); -- Verify group by works and uses index -SELECT has_index_scan('EXPLAIN WITH t AS (SELECT id FROM test1 ORDER BY ''{1,2}''::REAL[] v LIMIT 1) SELECT id, COUNT(*) FROM t GROUP BY 1'); +SELECT has_index_scan('EXPLAIN WITH t AS (SELECT id FROM test1 ORDER BY ''{1,2}''::REAL[] <-> v LIMIT 1) SELECT id, COUNT(*) FROM t GROUP BY 1'); -- Validate distinct works and uses index -SELECT has_index_scan('EXPLAIN WITH t AS (SELECT id FROM test1 ORDER BY v ''{1,2}'' LIMIT 1) SELECT DISTINCT id FROM t'); +SELECT has_index_scan('EXPLAIN WITH t AS (SELECT id FROM test1 ORDER BY v <-> ''{1,2}'' LIMIT 1) SELECT DISTINCT id FROM t'); -- Validate join lateral works and uses index -SELECT has_index_scan('EXPLAIN SELECT t1_results.id FROM test2 t2 JOIN LATERAL (SELECT t1.id FROM test1 t1 ORDER BY t2.v t1.v LIMIT 1) t1_results ON TRUE'); +SELECT has_index_scan('EXPLAIN SELECT t1_results.id FROM test2 t2 JOIN LATERAL (SELECT t1.id FROM test1 t1 ORDER BY t2.v <-> t1.v LIMIT 1) t1_results ON TRUE'); -- Validate union works and uses index -SELECT has_index_scan('EXPLAIN (SELECT id FROM test1 ORDER BY v ''{1,4}'') UNION (SELECT id FROM test1 ORDER BY v IS NOT NULL LIMIT 1)'); +SELECT has_index_scan('EXPLAIN (SELECT id FROM test1 ORDER BY v <-> ''{1,4}'') UNION (SELECT id FROM test1 ORDER BY v IS NOT NULL LIMIT 1)'); -- Validate CTEs work and still use index -SELECT has_index_scan('EXPLAIN WITH t AS (SELECT id FROM test1 ORDER BY v ''{1,4}'') SELECT id FROM t UNION SELECT id FROM t'); - --- Validate is replaced with the matching function when an index is present -set enable_seqscan = true; -set enable_indexscan = false; -EXPLAIN (COSTS false) SELECT * from small_world ORDER BY v '{1,1,1}'; -SELECT * from small_world ORDER BY v '{1,1,1}'; -begin; -INSERT INTO test2 (v) VALUES ('{1,4}'); -INSERT INTO test2 (v) VALUES ('{2,4}'); -CREATE INDEX test2_cos ON test2 USING lantern_hnsw(v dist_cos_ops); -EXPLAIN (COSTS false) SELECT * from test2 ORDER BY v '{1,4}'; --- Some additional cases that trigger operator rewriting --- SampleScan -EXPLAIN (COSTS false) SELECT * FROM small_world TABLESAMPLE BERNOULLI (20) ORDER BY v '{1,1,1}' ASC; --- can't compare direct equality here because it's random -SELECT results_match('EXPLAIN SELECT * FROM small_world TABLESAMPLE BERNOULLI (20) ORDER BY v ''{1,1,1}'' ASC', - 'EXPLAIN SELECT * FROM small_world TABLESAMPLE BERNOULLI (20) ORDER BY l2sq_dist(v, ''{1,1,1}'') ASC'); --- SetOpt/HashSetOp -EXPLAIN (COSTS false) (SELECT * FROM small_world ORDER BY v '{1,0,1}' ASC ) EXCEPT (SELECT * FROM small_world ORDER by v '{1,1,1}' ASC LIMIT 5); -SELECT results_match('(SELECT * FROM small_world ORDER BY v ''{1,0,1}'' ASC ) EXCEPT (SELECT * FROM small_world ORDER by v ''{1,1,1}'' ASC LIMIT 5)', - '(SELECT * FROM small_world ORDER BY l2sq_dist(v, ''{1,0,1}'') ASC ) EXCEPT (SELECT * FROM small_world ORDER by l2sq_dist(v, ''{1,1,1}'') ASC LIMIT 5)'); --- HashAggregate -EXPLAIN (COSTS false) SELECT v, COUNT(*) FROM small_world GROUP BY v ORDER BY v '{1,1,1}'; -SELECT results_match('SELECT v, COUNT(*) FROM small_world GROUP BY v ORDER BY v ''{1,1,1}''', - 'SELECT v, COUNT(*) FROM small_world GROUP BY v ORDER BY l2sq_dist(v, ''{1,1,1}'')'); --- GroupBy this -EXPLAIN (COSTS false) SELECT * FROM small_world GROUP BY id, v, b ORDER BY v '{1,1,1}'; -SELECT results_match('SELECT * FROM small_world GROUP BY id, v, b ORDER BY v ''{1,1,1}''', - 'SELECT * FROM small_world GROUP BY id, v, b ORDER BY l2sq_dist(v, ''{1,1,1}'')'); --- HashJoin/Hash -CREATE TABLE small_world_2 AS (SELECT * FROM small_world); -EXPLAIN (COSTS false) SELECT * FROM small_world JOIN small_world_2 using (v) ORDER BY v '{1,1,1}'; -SELECT results_match('SELECT * FROM small_world JOIN small_world_2 using (v) ORDER BY v ''{1,1,1}''', - 'SELECT * FROM small_world JOIN small_world_2 using (v) ORDER BY l2sq_dist(v, ''{1,1,1}'')'); --- MixedAggregate (this doesn't require additional logic, but I include it here as an example of generating the path) -EXPLAIN (COSTS false) SELECT v FROM small_world GROUP BY ROLLUP(v) ORDER BY v '{1,1,1}'; -SELECT results_match('SELECT v FROM small_world GROUP BY ROLLUP(v) ORDER BY v ''{1,1,1}''', - 'SELECT v FROM small_world GROUP BY ROLLUP(v) ORDER BY l2sq_dist(v, ''{1,1,1}'')'); --- WindowAgg -EXPLAIN (COSTS false) SELECT v, EVERY(b) OVER () FROM small_world ORDER BY v '{1,1,1}'; -SELECT results_match('SELECT v, EVERY(b) OVER () FROM small_world ORDER BY v ''{1,1,1}''', - 'SELECT v, EVERY(b) OVER () FROM small_world ORDER BY l2sq_dist(v, ''{1,1,1}'')'); --- LockRows -EXPLAIN (COSTS false) SELECT * FROM small_world ORDER BY v '{1,1,1}' ASC FOR UPDATE; -SELECT results_match('SELECT * FROM small_world ORDER BY v ''{1,1,1}'' ASC FOR UPDATE', - 'SELECT * FROM small_world ORDER BY l2sq_dist(v, ''{1,1,1}'') ASC FOR UPDATE'); - -rollback; +SELECT has_index_scan('EXPLAIN WITH t AS (SELECT id FROM test1 ORDER BY v <-> ''{1,4}'') SELECT id FROM t UNION SELECT id FROM t'); + set enable_indexscan = true; set enable_seqscan = false; diff --git a/lantern_hnsw/test/sql/hnsw_todo.sql b/lantern_hnsw/test/sql/hnsw_todo.sql index fcf84324f..d1076c04f 100644 --- a/lantern_hnsw/test/sql/hnsw_todo.sql +++ b/lantern_hnsw/test/sql/hnsw_todo.sql @@ -18,8 +18,6 @@ INSERT INTO small_world_l2 (id, vector) VALUES ('111', '{1,1,1}'); SET enable_seqscan=FALSE; -SET lantern.pgvector_compat=FALSE; - \set ON_ERROR_STOP off CREATE INDEX ON small_world_l2 USING lantern_hnsw (vector dist_l2sq_ops); @@ -29,12 +27,6 @@ SELECT _lantern_internal.validate_index('small_world_l2_vector_idx', false); CREATE INDEX ON small_world_l2 USING lantern_hnsw (vector_int dist_l2sq_int_ops); SELECT _lantern_internal.validate_index('small_world_l2_vector_int_idx', false); --- this should use index -EXPLAIN (COSTS FALSE) -SELECT id, ROUND(l2sq_dist(vector_int, array[0,1,0])::numeric, 2) as dist -FROM small_world_l2 -ORDER BY vector_int array[0,1,0] LIMIT 7; - --- Test scenarious --- ----------------------------------------- -- Case: @@ -53,7 +45,7 @@ SELECT v AS v1001 FROM sift_base1k WHERE id = 1001 \gset CREATE INDEX hnsw_l2_index ON sift_base1k USING lantern_hnsw (v) WITH (_experimental_index_path='/tmp/lantern/files/index-sift1k-l2sq-0.3.0.usearch'); SELECT _lantern_internal.validate_index('hnsw_l2_index', false); -- The 1001 and 1002 vectors will be ignored in search, so the first row will not be 0 in result -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 1; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 1; -- Case: -- Index is created externally @@ -67,36 +59,9 @@ SELECT _lantern_internal.validate_index('hnsw_l2_index', false); -- The first row will not be 0 now as the vector under id=777 was updated to 1,1,1,1... but it was indexed with different vector -- So the usearch index can not find 1,1,1,1,1.. vector in the index and wrong results will be returned -- This is an expected behaviour for now -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 1; - ----- Query on expression based index is failing to check correct operator usage -------- -CREATE OR REPLACE FUNCTION int_to_fixed_binary_real_array(n INT) RETURNS REAL[] AS $$ -DECLARE - binary_string TEXT; - real_array REAL[] := '{}'; - i INT; -BEGIN - binary_string := lpad(CAST(n::BIT(3) AS TEXT), 3, '0'); - FOR i IN 1..length(binary_string) - LOOP - real_array := array_append(real_array, CAST(substring(binary_string, i, 1) AS REAL)); - END LOOP; - RETURN real_array; -END; -$$ LANGUAGE plpgsql IMMUTABLE; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 1; CREATE TABLE test_table (id INTEGER); INSERT INTO test_table VALUES (0), (1), (7); \set enable_seqscan = off; --- This currently results in an error about using the operator outside of index --- This case should be fixed -SELECT id FROM test_table ORDER BY int_to_fixed_binary_real_array(id) '{0,0,0}'::REAL[] LIMIT 2; - --- =========== THIS CAUSES SERVER CRASH =============== - --- create extension lantern_extras; --- select v as v777 from sift_base1k where id = 777 \gset --- set lantern.pgvector_compat=false; --- select lantern_create_external_index('v', 'sift_base1k', 'public', 'cos', 128, 10, 10, 10, 'hnsw_cos_index'); --- ===================================================== - - diff --git a/lantern_hnsw/test/sql/hnsw_vector.sql b/lantern_hnsw/test/sql/hnsw_vector.sql index 1c236a127..bffc928ee 100644 --- a/lantern_hnsw/test/sql/hnsw_vector.sql +++ b/lantern_hnsw/test/sql/hnsw_vector.sql @@ -8,7 +8,6 @@ DROP EXTENSION IF EXISTS lantern; CREATE EXTENSION IF NOT EXISTS vector; CREATE EXTENSION lantern; RESET client_min_messages; -SET lantern.pgvector_compat=FALSE; -- Verify basic functionality of pgvector SELECT '[1,2,3]'::vector; @@ -20,7 +19,7 @@ CREATE INDEX ON items USING lantern_hnsw (trait_ai dist_vec_l2sq_ops) WITH (dim= INSERT INTO items (trait_ai) VALUES ('[6,7,8]'); CREATE INDEX ON items USING lantern_hnsw (trait_ai dist_vec_l2sq_ops) WITH (dim=3, M=4); INSERT INTO items (trait_ai) VALUES ('[10,10,10]'), (NULL); -SELECT * FROM items ORDER BY trait_ai '[0,0,0]' LIMIT 3; +SELECT * FROM items ORDER BY trait_ai <-> '[0,0,0]' LIMIT 3; SELECT * FROM ldb_get_indexes('items'); -- Test index creation on table with existing data @@ -33,14 +32,14 @@ INSERT INTO small_world (v) VALUES (NULL); -- Distance functions SELECT ROUND(l2sq_dist(v, '[0,1,0]'::VECTOR)::numeric, 2) as dist -FROM small_world ORDER BY v '[0,1,0]'::VECTOR LIMIT 7; +FROM small_world ORDER BY v <-> '[0,1,0]'::VECTOR LIMIT 7; EXPLAIN (COSTS FALSE) SELECT ROUND(l2sq_dist(v, '[0,1,0]'::VECTOR)::numeric, 2) as dist -FROM small_world ORDER BY v '[0,1,0]'::VECTOR LIMIT 7; +FROM small_world ORDER BY v <-> '[0,1,0]'::VECTOR LIMIT 7; SELECT ROUND(l2sq_dist(v, '[0,1,0]'::VECTOR)::numeric, 2) as dist -FROM small_world ORDER BY v '[0,1,0]'::VECTOR LIMIT 7; +FROM small_world ORDER BY v <-> '[0,1,0]'::VECTOR LIMIT 7; EXPLAIN (COSTS FALSE) SELECT ROUND(l2sq_dist(v, '[0,1,0]'::VECTOR)::numeric, 2) as dist -FROM small_world ORDER BY v '[0,1,0]'::VECTOR LIMIT 7; +FROM small_world ORDER BY v <-> '[0,1,0]'::VECTOR LIMIT 7; -- Verify that index creation on a large vector produces an error CREATE TABLE large_vector (v VECTOR(2001)); @@ -56,25 +55,22 @@ CREATE TABLE sift_base10k ( \COPY sift_base10k (v) FROM '/tmp/lantern/vector_datasets/siftsmall_base.csv' WITH CSV; CREATE INDEX hnsw_idx ON sift_base10k USING lantern_hnsw (v); SELECT v AS v4444 FROM sift_base10k WHERE id = 4444 \gset -EXPLAIN (COSTS FALSE) SELECT * FROM sift_base10k ORDER BY v :'v4444' LIMIT 10; +EXPLAIN (COSTS FALSE) SELECT * FROM sift_base10k ORDER BY v <-> :'v4444' LIMIT 10; -- Ensure we can query an index for more elements than the value of init_k SET lantern_hnsw.init_k = 4; WITH neighbors AS ( - SELECT * FROM small_world order by v '[1,0,0]' LIMIT 3 + SELECT * FROM small_world order by v <-> '[1,0,0]' LIMIT 3 ) SELECT COUNT(*) from neighbors; WITH neighbors AS ( - SELECT * FROM small_world order by v '[1,0,0]' LIMIT 15 + SELECT * FROM small_world order by v <-> '[1,0,0]' LIMIT 15 ) SELECT COUNT(*) from neighbors; RESET client_min_messages; \set ON_ERROR_STOP off --- Expect error due to improper use of the operator outside of its supported context -SELECT ARRAY[1,2,3] ARRAY[3,2,1]; - -- Expect error due to mismatching vector dimensions -SELECT 1 FROM small_world ORDER BY v '[0,1,0,1]' LIMIT 1; +SELECT 1 FROM small_world ORDER BY v <-> '[0,1,0,1]' LIMIT 1; SELECT l2sq_dist('[1,1]'::vector, '[0,1,0]'::vector); -- Test creating index with expression @@ -102,21 +98,19 @@ CREATE INDEX ON test_table USING lantern_hnsw (int_to_fixed_binary_vector(id)) W CREATE TABLE small_world_arr (id SERIAL PRIMARY KEY, v REAL[]); INSERT INTO small_world_arr (v) VALUES ('{0,0,0}'), ('{0,0,1}'), ('{0,0,2}'); CREATE INDEX l2_idx ON small_world_arr USING lantern_hnsw(v) WITH (dim=3, m=2); -EXPLAIN (COSTS FALSE) SELECT id FROM small_world_arr ORDER BY v ARRAY[0,0,0]; -SELECT id FROM small_world_arr ORDER BY v ARRAY[0,0,0]; +EXPLAIN (COSTS FALSE) SELECT id FROM small_world_arr ORDER BY v <-> ARRAY[0,0,0]; +SELECT id FROM small_world_arr ORDER BY v <-> ARRAY[0,0,0]; DROP INDEX l2_idx; CREATE INDEX cos_idx ON small_world_arr USING lantern_hnsw(v) WITH (m=2); -SELECT id FROM small_world_arr ORDER BY v ARRAY[0,0,0]; +SELECT id FROM small_world_arr ORDER BY v <=> ARRAY[0,0,0]; DROP INDEX cos_idx; CREATE INDEX ham_idx ON small_world_arr USING lantern_hnsw(v) WITH (m=3); -SELECT id FROM small_world_arr ORDER BY v ARRAY[0,0,0]; +SELECT id FROM small_world_arr ORDER BY v::INT[] <+> ARRAY[0,0,0]; --- Test pgvector in lantern.pgvector_compat=TRUE mode DROP TABLE small_world; \ir utils/small_world_vector.sql -- Distance functions -SET lantern.pgvector_compat=TRUE; SET enable_seqscan=OFF; -- Note: diff --git a/lantern_hnsw/test/test_runner.sh b/lantern_hnsw/test/test_runner.sh index 46473c1de..5f62c0c09 100755 --- a/lantern_hnsw/test/test_runner.sh +++ b/lantern_hnsw/test/test_runner.sh @@ -47,7 +47,6 @@ function run_regression_test { -e 's! Average Peak Memory: [0-9]\{1,\}kB!!' \ -e 's! time=[0-9]\+\.[0-9]\+\.\.[0-9]\+\.[0-9]\+!!' | \ grep -v 'DEBUG: rehashing catalog cache id' | \ - grep -v 'WARNING: this hook is experimental and can cause undefined behaviour' | \ grep -Ev '^[[:space:]]*Disabled:' | \ grep -Gv '"Disabled": \(true\|false\),' | \ grep -Gv '^ Planning Time:' | \ From 5cd1429b1829d30336e2ee364aca12f60da2ce19 Mon Sep 17 00:00:00 2001 From: Varik Matevosyan Date: Sun, 3 Nov 2024 15:40:09 +0400 Subject: [PATCH 12/12] fix hnsw_vector test --- lantern_hnsw/test/expected/hnsw_vector.out | 12 +++++++----- lantern_hnsw/test/sql/hnsw_vector.sql | 8 +++++--- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/lantern_hnsw/test/expected/hnsw_vector.out b/lantern_hnsw/test/expected/hnsw_vector.out index d62a20341..45fdb879d 100644 --- a/lantern_hnsw/test/expected/hnsw_vector.out +++ b/lantern_hnsw/test/expected/hnsw_vector.out @@ -198,7 +198,7 @@ SELECT id FROM small_world_arr ORDER BY v <-> ARRAY[0,0,0]; 3 DROP INDEX l2_idx; -CREATE INDEX cos_idx ON small_world_arr USING lantern_hnsw(v) WITH (m=2); +CREATE INDEX cos_idx ON small_world_arr USING lantern_hnsw(v dist_cos_ops) WITH (m=2); INFO: done init usearch index INFO: inserted 3 elements INFO: done saving 3 vectors @@ -206,20 +206,22 @@ SELECT id FROM small_world_arr ORDER BY v <=> ARRAY[0,0,0]; id ---- 1 - 2 3 + 2 DROP INDEX cos_idx; -CREATE INDEX ham_idx ON small_world_arr USING lantern_hnsw(v) WITH (m=3); +ALTER TABLE small_world_arr ADD COLUMN v_int INT[]; +UPDATE small_world_arr SET v_int=v::INT[]; +CREATE INDEX ham_idx ON small_world_arr USING lantern_hnsw(v_int dist_hamming_ops) WITH (m=3); INFO: done init usearch index INFO: inserted 3 elements INFO: done saving 3 vectors -SELECT id FROM small_world_arr ORDER BY v::INT[] <+> ARRAY[0,0,0]; +SELECT id FROM small_world_arr ORDER BY v_int <+> ARRAY[0,0,0]; id ---- 1 - 2 3 + 2 DROP TABLE small_world; \ir utils/small_world_vector.sql diff --git a/lantern_hnsw/test/sql/hnsw_vector.sql b/lantern_hnsw/test/sql/hnsw_vector.sql index bffc928ee..c0ab3c106 100644 --- a/lantern_hnsw/test/sql/hnsw_vector.sql +++ b/lantern_hnsw/test/sql/hnsw_vector.sql @@ -101,11 +101,13 @@ CREATE INDEX l2_idx ON small_world_arr USING lantern_hnsw(v) WITH (dim=3, m=2); EXPLAIN (COSTS FALSE) SELECT id FROM small_world_arr ORDER BY v <-> ARRAY[0,0,0]; SELECT id FROM small_world_arr ORDER BY v <-> ARRAY[0,0,0]; DROP INDEX l2_idx; -CREATE INDEX cos_idx ON small_world_arr USING lantern_hnsw(v) WITH (m=2); +CREATE INDEX cos_idx ON small_world_arr USING lantern_hnsw(v dist_cos_ops) WITH (m=2); SELECT id FROM small_world_arr ORDER BY v <=> ARRAY[0,0,0]; DROP INDEX cos_idx; -CREATE INDEX ham_idx ON small_world_arr USING lantern_hnsw(v) WITH (m=3); -SELECT id FROM small_world_arr ORDER BY v::INT[] <+> ARRAY[0,0,0]; +ALTER TABLE small_world_arr ADD COLUMN v_int INT[]; +UPDATE small_world_arr SET v_int=v::INT[]; +CREATE INDEX ham_idx ON small_world_arr USING lantern_hnsw(v_int dist_hamming_ops) WITH (m=3); +SELECT id FROM small_world_arr ORDER BY v_int <+> ARRAY[0,0,0]; DROP TABLE small_world; \ir utils/small_world_vector.sql