Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pg17 support #348

Merged
merged 12 commits into from
Nov 3, 2024
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-22.04, ubicloud-standard-4-arm, macos-13]
postgres: [12, 13, 14, 15, 16]
postgres: [12, 13, 14, 15, 16, 17]
steps:
- uses: actions/checkout@v4
with:
Expand Down Expand Up @@ -53,7 +53,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-22.04, ubicloud-standard-4-arm, macos-13]
postgres: [12, 13, 14, 15, 16]
postgres: [12, 13, 14, 15, 16, 17]
steps:
- uses: actions/checkout@v4
with:
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/publish-docker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ jobs:
fail-fast: false
matrix:
include:
- postgres: 17
- postgres: 16
- postgres: 15
- postgres: 14
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/sanitizer-build-and-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ jobs:
fail-fast: false
matrix:
os: ["ubuntu-22.04"]
pg: ["12.16", "13.12", "14.9", "15.4", "16.0"]
pg: ["12.16", "13.12", "14.9", "15.4", "16.0", "17.0"]
steps:
- name: Enable UBSan if this is a release
if: ${{ github.event_name == 'release' }}
Expand Down
13 changes: 8 additions & 5 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-22.04, ubicloud-standard-4-arm, macos-13]
postgres: [12, 13, 14, 15, 16]
postgres: [12, 13, 14, 15, 16, 17]
steps:
- uses: actions/checkout@v4
with:
Expand Down Expand Up @@ -148,7 +148,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-22.04, ubicloud-standard-4-arm]
postgres: [15]
postgres: [15, 17]
steps:
- uses: actions/checkout@v4
with:
Expand Down Expand Up @@ -177,6 +177,7 @@ jobs:
run: |
cargo install cargo-pgrx --version 0.12.7
cargo pgrx init "--pg$PG_VERSION" /usr/bin/pg_config
sed -i -e "s/default = .*/default=[\"pg${PG_VERSION}\"]/" lantern_extras/Cargo.toml
RUSTFLAGS="--cfg profile=\"ci-build\"" cargo pgrx install --sudo --pg-config /usr/bin/pg_config --package lantern_extras
env:
PG_VERSION: ${{ matrix.postgres }}
Expand All @@ -186,8 +187,10 @@ jobs:
PG_VERSION: ${{ matrix.postgres }}
- name: Setup permissions
run: |
sudo chmod 777 -R /usr/lib/postgresql/15/lib/
sudo chmod 777 -R /usr/share/postgresql/15/extension/
sudo chmod 777 -R "/usr/lib/postgresql/${PG_VERSION}/lib/"
sudo chmod 777 -R "/usr/share/postgresql/${PG_VERSION}/extension/"
env:
PG_VERSION: ${{ matrix.postgres }}
- name: Run tests
run: cargo llvm-cov --workspace --lcov --output-path lantern-extras-lcov.info
env:
Expand All @@ -196,7 +199,7 @@ jobs:
DB_URL: "postgres://[email protected]:5432/postgres"
- name: Upload lantern_extras coverage
uses: actions/upload-artifact@v4
if: ${{ startsWith(matrix.os, 'ubuntu') }}
if: ${{ startsWith(matrix.os, 'ubuntu') && matrix.postgres == 15}}
with:
name: lantern-extras-lcov.info
path: ./lantern-extras-lcov.info
Expand Down
14 changes: 2 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,19 +106,9 @@ FROM small_world ORDER BY vector <-> ARRAY[0,0,0] LIMIT 1;

### A note on operators and operator classes

Lantern supports several distance functions in the index and it has 2 modes for operators:
Lantern supports several distance functions in the index

1. `lantern.pgvector_compat=TRUE` (default)
In this mode there are 3 operators available `<->` (l2sq), `<=>` (cosine), `<+>` (hamming).

Note that in this mode, you need to use right operator in order to trigger an index scan.

2. `lantern.pgvector_compat=FALSE`
In this mode you only need to specify the distance function used for a column at index creation time. Lantern will automatically infer the distance function to use for search so you always use `<?>` operator in search queries.

Note that in this mode, the operator `<?>` is intended exclusively for use with index lookups. If you expect to not use the index in a query, use the distance function directly (e.g. `l2sq_dist(v1, v2)`)

> To switch between modes set `lantern.pgvector_compat` variable to `TRUE` or `FALSE`.
There are 3 operators available `<->` (l2sq), `<=>` (cosine), `<+>` (hamming).

There are four defined operator classes that can be employed during index creation:

Expand Down
11 changes: 0 additions & 11 deletions ci/scripts/run-tests-linux.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,6 @@ function run_pgvector_tests(){
pushd /tmp/pgvector
# Add lantern to load-extension in pgregress
sed -i '/REGRESS_OPTS \=/ s/$/ --load-extension lantern/' Makefile

# Set pgvector_compat flag in test files
for file in ./test/sql/*; do
echo 'SET lantern.pgvector_compat=TRUE;' | cat - $file > temp && mv temp $file
done

# Set pgvector_compat flag in result files
for file in ./test/expected/*.out; do
echo 'SET lantern.pgvector_compat=TRUE;' | cat - $file > temp && mv temp $file
done

# Run tests
make installcheck
popd
Expand Down
2 changes: 1 addition & 1 deletion ci/scripts/utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ function setup_environment() {
export GITHUB_OUTPUT=${GITHUB_OUTPUT:-/dev/null}
export PGVECTOR_VERSION=0.7.4-lanterncloud
#fix pg_cron at the latest commit of the time
export PG_CRON_COMMIT_SHA=7e91e72b1bebc5869bb900d9253cc9e92518b33f
export PG_CRON_COMMIT_SHA=9490f9cc9803f75105f2f7d89839a998f011f8d8
}

function setup_rust() {
Expand Down
2 changes: 1 addition & 1 deletion docker/Dockerfile.dev
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
ARG VERSION=15
ARG PGVECTOR_VERSION=0.5.1
#fix pg_cron at the latest commit of the time
ARG PG_CRON_COMMIT_SHA=7e91e72b1bebc5869bb900d9253cc9e92518b33f
ARG PG_CRON_COMMIT_SHA=9490f9cc9803f75105f2f7d89839a998f011f8d8

# If you want to build the base image for different versions use Dockerfile.pg
# To use GDB inside container run docker like this:
Expand Down
5 changes: 3 additions & 2 deletions lantern_extras/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
[package]
name = "lantern_extras"
version = "0.4.1"
version = "0.4.2"
edition = "2021"

[lib]
crate-type = ["cdylib", "lib"]
doctest = false

[features]
default = ["pg15"]
default = ["pg17"]
pg12 = ["pgrx/pg12", "pgrx-tests/pg12"]
pg13 = ["pgrx/pg13", "pgrx-tests/pg13"]
pg14 = ["pgrx/pg14", "pgrx-tests/pg14"]
pg15 = ["pgrx/pg15", "pgrx-tests/pg15"]
pg16 = ["pgrx/pg16", "pgrx-tests/pg16"]
pg17 = ["pgrx/pg17", "pgrx-tests/pg17"]
pg_test = []

[dependencies]
Expand Down
2 changes: 1 addition & 1 deletion lantern_extras/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ pub mod pg_test {

pub fn postgresql_conf_options() -> Vec<&'static str> {
vec![
"shared_preload_libraries='lantern_extras.so'",
"shared_preload_libraries='lantern_extras'",
"lantern_extras.daemon_databases='pgrx_tests'",
"lantern_extras.enable_daemon=true",
]
Expand Down
3 changes: 2 additions & 1 deletion lantern_hnsw/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.3)
include(CheckSymbolExists)

set(LANTERN_VERSION 0.4.1)
set(LANTERN_VERSION 0.4.2)

project(
LanternDB
Expand Down Expand Up @@ -267,6 +267,7 @@ set (_update_files
sql/updates/0.3.3--0.3.4.sql
sql/updates/0.3.4--0.4.0.sql
sql/updates/0.4.0--0.4.1.sql
sql/updates/0.4.1--0.4.2.sql
)

# Generate version information for the binary
Expand Down
14 changes: 2 additions & 12 deletions lantern_hnsw/scripts/integration_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ def primary():
node.init()
node.append_conf("enable_seqscan = off")
node.append_conf("maintenance_work_mem = '1GB'")
node.append_conf("lantern.pgvector_compat=FALSE")
node.append_conf("checkpoint_timeout = '100min'")
node.append_conf("min_wal_size = '1GB'")
node.append_conf("checkpoint_completion_target = '0.9'")
Expand Down Expand Up @@ -158,7 +157,6 @@ def generic_vector_query(

dist_with_function = f"{distance_metric}_dist(v, ({query_vector}))"
dist_with_concrete_op = f"v {DIST_OPS[distance_metric]} ({query_vector})"
dist_with_generic_op = f"v <?> ({query_vector})"

query_generator = (
lambda order_by: f"""
Expand All @@ -173,8 +171,6 @@ def generic_vector_query(
return query_generator(dist_with_function)
elif kind == "concrete":
return query_generator(dist_with_concrete_op)
elif kind == "generic":
return query_generator(dist_with_generic_op)


@pytest.mark.parametrize("distance_metric", ["l2sq", "cos"], scope="session")
Expand All @@ -197,9 +193,6 @@ def test_selects(db, setup_copy_table_with_index, distance_metric, quant_bits, r
concrete_op_query = generic_vector_query(
table_name, distance_metric, "concrete", query_vector_id=q_vec_id
)
generic_op_query = generic_vector_query(
table_name, distance_metric, "generic", query_vector_id=q_vec_id
)

exact_explain_query = f"EXPLAIN {exact_query}"
exact_plan = primary.execute("testdb", exact_explain_query)
Expand All @@ -214,7 +207,7 @@ def test_selects(db, setup_copy_table_with_index, distance_metric, quant_bits, r
q_vec_id == exact_res[0][0]
), "First result in exact query result should be the query vector"

for query in [generic_op_query, concrete_op_query]:
for query in [concrete_op_query]:
explain_query = f"EXPLAIN {query}"
plan = primary.execute("testdb", explain_query)
assert f"Index Scan using idx_{table_name}" in str(
Expand Down Expand Up @@ -348,9 +341,6 @@ def test_inserts(setup_copy_table_with_index, distance_metric, quant_bits, reque
concrete_op_query = generic_vector_query(
table_name, distance_metric, "concrete", query_vector_id=q_vec_id
)
generic_op_query = generic_vector_query(
table_name, distance_metric, "generic", query_vector_id=q_vec_id
)

exact_explain_query = f"EXPLAIN {exact_query}"
for db in [primary, replica]:
Expand All @@ -367,7 +357,7 @@ def test_inserts(setup_copy_table_with_index, distance_metric, quant_bits, reque
exact_res[0][0] in inserted_vector_orig_ids[q_vec_id]
), "First result in exact query result should be the query vector"

for query in [generic_op_query, concrete_op_query]:
for query in [concrete_op_query]:
explain_query = f"EXPLAIN {query}"
plan = db.execute("testdb", explain_query)
assert f"Index Scan using idx_{table_name}" in str(
Expand Down
3 changes: 2 additions & 1 deletion lantern_hnsw/scripts/test_updates.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ def __repr__(self):
return self.version

INCOMPATIBLE_VERSIONS = {
'16': [Version('0.0.4')]
'16': [Version('0.0.4')],
'17': [Version('0.3.0'), Version('0.3.1'), Version('0.3.2'), Version('0.3.3'), Version('0.3.4'), Version('0.4.0'), Version('0.4.1')],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we no longer have servers at an older 0.3.* version that we need to upgrade from, you can also move old update scripts to old_updates

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there are still servers with 0.3.x versions, will move the update files to old_updates once all servers are upgraded

}

def shell(cmd, exit_on_error=True):
Expand Down
13 changes: 13 additions & 0 deletions lantern_hnsw/sql/lantern.sql
Original file line number Diff line number Diff line change
Expand Up @@ -924,3 +924,16 @@ BEGIN
RETURN jsonb_pretty(_lantern_internal.mask_order_by_in_plan(explain_output));
END $$ LANGUAGE plpgsql;

-- Get vector type oid
CREATE FUNCTION _lantern_internal.get_vector_type_oid() RETURNS OID AS $$
DECLARE
type_oid OID;
BEGIN
type_oid := (SELECT pg_type.oid FROM pg_type
JOIN pg_depend ON pg_type.oid = pg_depend.objid
JOIN pg_extension ON pg_depend.refobjid = pg_extension.oid
WHERE typname='vector' AND extname='vector'
LIMIT 1);
RETURN COALESCE(type_oid, 0);
END;
$$ LANGUAGE plpgsql;
13 changes: 13 additions & 0 deletions lantern_hnsw/sql/updates/0.4.1--0.4.2.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
-- Get vector type oid
CREATE FUNCTION _lantern_internal.get_vector_type_oid() RETURNS OID AS $$
DECLARE
type_oid OID;
BEGIN
type_oid := (SELECT pg_type.oid FROM pg_type
JOIN pg_depend ON pg_type.oid = pg_depend.objid
JOIN pg_extension ON pg_depend.refobjid = pg_extension.oid
WHERE typname='vector' AND extname='vector'
LIMIT 1);
RETURN COALESCE(type_oid, 0);
END;
$$ LANGUAGE plpgsql;
6 changes: 2 additions & 4 deletions lantern_hnsw/src/hnsw.c
Original file line number Diff line number Diff line change
Expand Up @@ -347,9 +347,7 @@ static float8 vector_dist(Vector *a, Vector *b, usearch_metric_kind_t metric_kin
PGDLLEXPORT PG_FUNCTION_INFO_V1(ldb_generic_dist);
Datum ldb_generic_dist(PG_FUNCTION_ARGS)
{
if(ldb_pgvector_compat) {
elog(ERROR, "Operator can only be used when lantern.pgvector_compat=FALSE");
}
elog(ERROR, "Operator <?> is deprecated. Please explicitly use the operator that matches your distance function.");
PG_RETURN_NULL();
}

Expand Down Expand Up @@ -452,7 +450,7 @@ HnswColumnType GetColumnTypeFromOid(Oid oid)

if(oid == FLOAT4ARRAYOID) {
return REAL_ARRAY;
} else if(oid == TypenameGetTypid("vector")) {
} else if(oid == TypenameGetVectorTypid()) {
return VECTOR;
} else if(oid == INT4ARRAYOID) {
return INT_ARRAY;
Expand Down
2 changes: 1 addition & 1 deletion lantern_hnsw/src/hnsw/build.c
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,7 @@ static void BuildIndexCleanup(ldb_HnswBuildState *buildstate)
buildstate->external_socket->close(buildstate->external_socket);
}

if(buildstate->index_file_fd != -1) {
if(buildstate->index_file_fd > 0) {
// index_file_fd will only exist when we mmap the index file to memory
if(!buildstate->external && buildstate->index_buffer) {
int munmap_ret = munmap(buildstate->index_buffer, buildstate->index_buffer_size);
Expand Down
Loading
Loading