diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 11547b42a..61a6e167e 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -43,6 +43,16 @@ jobs: env: PG_VERSION: ${{ matrix.postgres }} if: ${{ startsWith(matrix.os, 'ubuntu') }} + - name: Run update tests linux + id: update-test-linux + run: | + sudo pip install GitPython &&\ + # Start postgres + sudo su postgres -c "PG_VERSION=$PG_VERSION RUN_TESTS=0 ./ci/scripts/run-tests-linux.sh" && \ + sudo su -c "PG_VERSION=$PG_VERSION python3 ./scripts/test_updates.py -U postgres" + env: + PG_VERSION: ${{ matrix.postgres }} + if: ${{ startsWith(matrix.os, 'ubuntu') }} - name: Run tests mac id: test-mac run: ./ci/scripts/run-tests-mac.sh diff --git a/CMakeLists.txt b/CMakeLists.txt index 938075f64..4553ad203 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -146,8 +146,7 @@ endif() set(_script_file "lantern--${LANTERNDB_VERSION}.sql") set (_update_files - sql/updates/0.0.1--0.0.2.sql - sql/updates/0.0.2--0.0.3.sql) + sql/updates/0.0.4--latest.sql) add_custom_command( OUTPUT ${CMAKE_BINARY_DIR}/${_script_file} diff --git a/Dockerfile.dev b/Dockerfile.dev index f675ff9d6..ceae49f31 100644 --- a/Dockerfile.dev +++ b/Dockerfile.dev @@ -24,7 +24,7 @@ RUN apt update \ git-all \ tmux \ clang-format \ - && pip install libtmux --break-system-packages && \ + && pip install GitPython libtmux --break-system-packages && \ wget -O pgvector.tar.gz https://github.com/pgvector/pgvector/archive/refs/tags/v${PGVECTOR_VERSION}.tar.gz && \ tar xzf pgvector.tar.gz && \ cd pgvector-${PGVECTOR_VERSION} && \ diff --git a/ci/scripts/run-tests-linux.sh b/ci/scripts/run-tests-linux.sh index de8fb3e48..d46a0de22 100755 --- a/ci/scripts/run-tests-linux.sh +++ b/ci/scripts/run-tests-linux.sh @@ -4,10 +4,11 @@ set -e WORKDIR=/tmp/lantern GITHUB_OUTPUT=${GITHUB_OUTPUT:-/dev/null} PG_VERSION=${PG_VERSION:-15} +RUN_TESTS=${RUN_TESTS:-1} export PGDATA=/etc/postgresql/$PG_VERSION/main -wait_for_pg(){ +function wait_for_pg(){ tries=0 until pg_isready -U postgres 2>/dev/null; do if [ $tries -eq 10 ]; @@ -21,12 +22,38 @@ wait_for_pg(){ done } -# Set port -echo "port = 5432" >> ${PGDATA}/postgresql.conf -# Run postgres database -GCOV_PREFIX=$WORKDIR/build/CMakeFiles/lantern.dir/ GCOV_PREFIX_STRIP=5 POSTGRES_HOST_AUTH_METHOD=trust /usr/lib/postgresql/$PG_VERSION/bin/postgres 1>/tmp/pg-out.log 2>/tmp/pg-error.log & +function run_db_tests(){ + if [[ "$RUN_TESTS" == "1" ]] + then + cd $WORKDIR/build && \ + make test && \ + killall postgres && \ + gcovr -r $WORKDIR/src/ --object-directory $WORKDIR/build/ --xml /tmp/coverage.xml + fi +} + +function start_pg() { + pg_response=$(pg_isready -U postgres 2>&1) + + if [[ $pg_response == *"accepting"* ]]; then + echo "Postgres already running" + elif [[ $pg_response == *"rejecting"* ]]; then + echo "Postgres process is being killed retrying..." + sleep 1 + start_pg + else + echo "port = 5432" >> ${PGDATA}/postgresql.conf + # Enable auth without password + echo "local all all trust" > $PGDATA/pg_hba.conf + echo "host all all 127.0.0.1/32 trust" >> $PGDATA/pg_hba.conf + echo "host all all ::1/128 trust" >> $PGDATA/pg_hba.conf + + + # Set port + echo "port = 5432" >> ${PGDATA}/postgresql.conf + # Run postgres database + GCOV_PREFIX=$WORKDIR/build/CMakeFiles/lantern.dir/ GCOV_PREFIX_STRIP=5 POSTGRES_HOST_AUTH_METHOD=trust /usr/lib/postgresql/$PG_VERSION/bin/postgres 1>/tmp/pg-out.log 2>/tmp/pg-error.log & + fi +} # Wait for start and run tests -wait_for_pg && cd $WORKDIR/build && make test && \ -make test-parallel && \ -killall postgres && \ -gcovr -r $WORKDIR/src/ --object-directory $WORKDIR/build/ --xml /tmp/coverage.xml +start_pg && wait_for_pg && run_db_tests diff --git a/scripts/run_all_tests.sh b/scripts/run_all_tests.sh index 7496442b3..3bb9301d3 100755 --- a/scripts/run_all_tests.sh +++ b/scripts/run_all_tests.sh @@ -6,9 +6,15 @@ PSQL=psql TMP_ROOT=/tmp/lantern TMP_OUTDIR=$TMP_ROOT/tmp_output FILTER="${FILTER:-}" +EXCLUDE="${EXCLUDE:-}" # $USER is not set in docker containers, so use whoami DEFAULT_USER=$(whoami) +if [[ -n "$FILTER" && -n "$EXCLUDE" ]]; then + echo "-FILTER and -EXCLUDE cannot be used together, please use only one" + exit 1 +fi + # typically default user is root in a docker container # and in those cases postgres is the user with appropriate permissions # to the database @@ -78,6 +84,26 @@ while [[ "$#" -gt 0 ]]; do shift done +FIRST_TEST=1 +function print_test { + if [ "$PARALLEL" -eq 1 ]; then + if [ $1 == end ]; then + echo -e "\ntest: $1" >> $2 + elif [ $1 == begin ]; then + echo -e "\ntest: $1" >> $2 + else + if [ "$FIRST_TEST" -eq 1 ]; then + echo -n "test: $1" >> $2 + FIRST_TEST=0 + else + echo -n " $1" >> $2 + fi + fi + else + echo "test: $1" >> $2 + fi +} + # Generate schedule.txt rm -rf $TMP_OUTDIR/schedule.txt if [ "$PARALLEL" -eq 1 ]; then @@ -85,21 +111,27 @@ if [ "$PARALLEL" -eq 1 ]; then else SCHEDULE='schedule.txt' fi -if [ -n "$FILTER" ]; then +if [[ -n "$FILTER" || -n "$EXCLUDE" ]]; then if [ "$PARALLEL" -eq 1 ]; then TEST_FILES=$(cat $SCHEDULE | grep -E '^(test:|test_begin:|test_end:)' | sed -E -e 's/^test:|test_begin:|test_end://' | tr " " "\n" | sed -e '/^$/d') else - if [[ "$pgvector_installed" == "1" ]]; then - TEST_FILES=$(cat $SCHEDULE | grep -E '^(test:|test_pgvector:)' | sed -E -e 's/^test:|test_pgvector://' | tr " " "\n" | sed -e '/^$/d') - else - TEST_FILES=$(cat $SCHEDULE | grep '^test:' | sed -e 's/^test://' | tr " " "\n" | sed -e '/^$/d') - fi + if [[ "$pgvector_installed" == "1" ]]; then + TEST_FILES=$(cat $SCHEDULE | grep -E '^(test:|test_pgvector:)' | sed -E -e 's/^test:|test_pgvector://' | tr " " "\n" | sed -e '/^$/d') + else + TEST_FILES=$(cat $SCHEDULE | grep '^test:' | sed -e 's/^test://' | tr " " "\n" | sed -e '/^$/d') + fi fi while IFS= read -r f; do - if [[ $f == *"$FILTER"* ]]; then - echo "HERE $f" - echo "test: $f" >> $TMP_OUTDIR/schedule.txt + if [ -n "$FILTER" ]; then + if [[ $f == *"$FILTER"* ]]; then + print_test $f $TMP_OUTDIR/schedule.txt $FIRST_TEST + fi + elif [ -n "$EXCLUDE" ]; then + if [[ $f == *"$EXCLUDE"* ]]; then + continue + fi + print_test $f $TMP_OUTDIR/schedule.txt $FIRST_TEST fi done <<< "$TEST_FILES" diff --git a/scripts/test_updates.py b/scripts/test_updates.py new file mode 100644 index 000000000..46ed6328a --- /dev/null +++ b/scripts/test_updates.py @@ -0,0 +1,86 @@ +import argparse +import subprocess +import getpass +import git +import os + + +INCOMPATIBLE_VERSIONS = { + '16': ['0.0.4'] +} + +def update_from_tag(from_version: str, to_version: str): + from_tag = "v" + from_version + repo = git.Repo(search_parent_directories=True) + sha_before = repo.head.object.hexsha + repo.remotes[0].fetch() + repo.git.checkout(from_tag) + sha_after = repo.head.object.hexsha + print("sha_after", sha_after) + + # run "mkdir build && cd build && cmake .. && make -j4 && make install" + res = subprocess.run(f"mkdir -p {args.builddir} ; cd {args.builddir} && git submodule update && cmake .. && make -j4 && make install", shell=True) + if res.returncode != 0: + if res.stderr: + print("Error building from tag" + res.stderr) + print("res stdout", res.stdout, res.stderr, res) + exit(1) + + res = subprocess.run(f"psql postgres -U {args.user} -c 'DROP DATABASE IF EXISTS {args.db};'", shell=True) + res = subprocess.run(f"psql postgres -U {args.user} -c 'CREATE DATABASE {args.db};'", shell=True) + res = subprocess.run(f"psql postgres -U {args.user} -c 'DROP EXTENSION IF EXISTS lantern CASCADE; CREATE EXTENSION lantern;' -d {args.db};", shell=True) + # todo:: run init() portion of parallel tests + + repo.git.checkout(sha_before) + res = subprocess.run(f"cd {args.builddir} ; git submodule update && cmake .. && make -j4 && make install && make test", shell=True) + res = subprocess.run(f"cd {args.builddir} ; UPDATE_EXTENSION=1 UPDATE_FROM={from_version} UPDATE_TO={to_version} make test", shell=True) + res = subprocess.run(f"cd {args.builddir} ; UPDATE_EXTENSION=1 UPDATE_FROM={from_version} UPDATE_TO={from_version} make test-parallel FILTER=begin", shell=True) + res = subprocess.run(f"cd {args.builddir} ; UPDATE_EXTENSION=1 UPDATE_FROM={from_version} UPDATE_TO={to_version} make test-parallel EXCLUDE=begin", shell=True) + #todo:: run query and check portion of parallel tests + +def incompatible_version(pg_version, version_tag): + if not pg_version or pg_version not in INCOMPATIBLE_VERSIONS: + return False + return version_tag in INCOMPATIBLE_VERSIONS[pg_version] + +if __name__ == "__main__": + + default_user = getpass.getuser() + + # collect the tag from command line to upgrade from + + parser = argparse.ArgumentParser(description='Update from tag') + parser.add_argument('-from_tag', '--from_tag', metavar='from_tag', type=str, + help='Tag to update from', required=False) + parser.add_argument('-to_tag','--to_tag', metavar='to_tag', type=str, + help='Tag to update to', required=False) + parser.add_argument("-db", "--db", default="update_db", type=str, help="Database name used for updates") + parser.add_argument("-U", "--user", default=default_user, help="Database user") + parser.add_argument("-builddir", "--builddir", default="build_updates", help="Database user") + + args = parser.parse_args() + + from_tag = args.from_tag + to_tag = args.to_tag + if from_tag and to_tag: + update_from_tag(from_tag, to_tag) + + if from_tag or to_tag: + print("Must specify both or neither from_tag and to_tag") + exit(1) + + # test updates from all tags + from_tags = [update_fname.split("--")[0] for update_fname in os.listdir("sql/updates")] + latest_version = "0.0.5" + + pg_version = None if not 'PG_VERSION' in os.environ else os.environ['PG_VERSION'] + for from_tag in from_tags: + if incompatible_version(pg_version, from_tag): + continue + update_from_tag(from_tag, latest_version) + + + + + + diff --git a/sql/updates/0.0.1--0.0.2.sql b/sql/updates/0.0.1--0.0.2.sql deleted file mode 100644 index 1c414ebce..000000000 --- a/sql/updates/0.0.1--0.0.2.sql +++ /dev/null @@ -1,18 +0,0 @@ --- here for reference of directory file structure -DROP OPERATOR CLASS IF EXISTS vector_l2_ops using embedding CASCADE; -DROP ACCESS METHOD IF EXISTS embedding; -DROP FUNCTION IF EXISTS hnsw_handler; - -CREATE FUNCTION hnsw_handler(internal) RETURNS index_am_handler - AS 'MODULE_PATHNAME' LANGUAGE C; - -CREATE ACCESS METHOD hnsw TYPE INDEX HANDLER hnsw_handler; - -COMMENT ON ACCESS METHOD hnsw IS 'LanternDB vector index access method. Can be configured to use various strategies such hs hnsw, graph-based, disk-optimized etc.'; - --- taken from pgvector so our index can work with pgvector types -CREATE OPERATOR CLASS vector_l2_ops - DEFAULT FOR TYPE vector USING hnsw AS - OPERATOR 1 <-> (vector, vector) FOR ORDER BY float_ops, - FUNCTION 1 vector_l2_squared_distance(vector, vector), - FUNCTION 3 l2_distance(vector, vector); \ No newline at end of file diff --git a/sql/updates/0.0.2--0.0.3.sql b/sql/updates/0.0.2--0.0.3.sql deleted file mode 100644 index a728ce22e..000000000 --- a/sql/updates/0.0.2--0.0.3.sql +++ /dev/null @@ -1,17 +0,0 @@ --- functions -CREATE FUNCTION l2sq_dist(real[], real[]) RETURNS real - AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; - --- operators - -CREATE OPERATOR <-> ( - LEFTARG = real[], RIGHTARG = real[], PROCEDURE = l2sq_dist, - COMMUTATOR = '<->' -); - --- operator classes -CREATE OPERATOR CLASS ann_l2_ops - DEFAULT FOR TYPE real[] USING hnsw AS - OPERATOR 1 <-> (real[], real[]) FOR ORDER BY float_ops, - FUNCTION 1 l2sq_dist(real[], real[]); - diff --git a/sql/updates/0.0.4--latest.sql b/sql/updates/0.0.4--latest.sql new file mode 100644 index 000000000..b343dc034 --- /dev/null +++ b/sql/updates/0.0.4--latest.sql @@ -0,0 +1,31 @@ + +DO $BODY$ +DECLARE + pgvector_exists boolean; +BEGIN + -- replace is with overloaded version + -- Check if the vector type from pgvector exists + SELECT EXISTS ( + SELECT 1 + FROM pg_type + WHERE typname = 'vector' + ) INTO pgvector_exists; + + IF pgvector_exists THEN + CREATE FUNCTION l2sq_dist(vector, vector) RETURNS float8 + AS 'MODULE_PATHNAME', 'vector_l2sq_dist' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + + -- change the operator class to use the new function + DROP OPERATOR CLASS dist_vec_l2sq_ops USING lantern_hnsw; + CREATE OPERATOR CLASS dist_vec_l2sq_ops + DEFAULT FOR TYPE vector USING lantern_hnsw AS + OPERATOR 1 <-> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 l2sq_dist(vector, vector); + + -- drop the old implementation + DROP FUNCTION IF EXISTS vector_l2sq_dist(vector, vector); + + END IF; +END; +$BODY$ +LANGUAGE plpgsql; \ No newline at end of file diff --git a/sql/updates/0.0.4-latest.sql b/sql/updates/0.0.4-latest.sql deleted file mode 100644 index 4654ea412..000000000 --- a/sql/updates/0.0.4-latest.sql +++ /dev/null @@ -1,53 +0,0 @@ --- drop the old implementation -DROP FUNCTION IF EXISTS vector_l2sq_dist(vector, vector); - --- replace is with overloaded version --- Check if the vector type from pgvector exists -SELECT EXISTS ( - SELECT 1 - FROM pg_type - WHERE typname = 'vector' -) INTO pgvector_exists; - -CREATE OR REPLACE FUNCTION _create_ldb_operator_classes(access_method_name TEXT) RETURNS BOOLEAN AS $$ -DECLARE - dist_l2sq_ops TEXT; - dist_cos_ops TEXT; - dist_hamming_ops TEXT; -BEGIN - -- Construct the SQL statement to create the operator classes dynamically. - dist_l2sq_ops := ' - CREATE OPERATOR CLASS dist_l2sq_ops - DEFAULT FOR TYPE real[] USING ' || access_method_name || ' AS - OPERATOR 1 <-> (real[], real[]) FOR ORDER BY float_ops, - FUNCTION 1 l2sq_dist(real[], real[]); - '; - - dist_cos_ops := ' - CREATE OPERATOR CLASS dist_cos_ops - FOR TYPE real[] USING ' || access_method_name || ' AS - OPERATOR 1 <-> (real[], real[]) FOR ORDER BY float_ops, - FUNCTION 1 cos_dist(real[], real[]); - '; - - dist_hamming_ops := ' - CREATE OPERATOR CLASS dist_hamming_ops - FOR TYPE integer[] USING ' || access_method_name || ' AS - OPERATOR 1 <-> (integer[], integer[]) FOR ORDER BY float_ops, - FUNCTION 1 hamming_dist(integer[], integer[]); - '; - - -- Execute the dynamic SQL statement. - EXECUTE dist_l2sq_ops; - EXECUTE dist_cos_ops; - EXECUTE dist_hamming_ops; - - RETURN TRUE; -END; -$$ LANGUAGE plpgsql VOLATILE; - -IF pgvector_exists THEN - CREATE FUNCTION l2sq_dist(vector, vector) RETURNS float8 - AS 'MODULE_PATHNAME', 'vector_l2sq_dist' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; - PERFORM _create_ldb_operator_classes('lantern_hnsw'); -END IF diff --git a/test/test_runner.sh b/test/test_runner.sh index dd888165c..e34317ebd 100755 --- a/test/test_runner.sh +++ b/test/test_runner.sh @@ -27,42 +27,56 @@ function drop_db { EOF } -# If these aren't parallel tests always drop the db after the test -# if they are though we only want to drop after end which is where we check invariants -# this allows the parallel tests to be run against the same db -if [ "$PARALLEL" -eq 0 ]; then - trap drop_db EXIT -elif [[ "$TESTFILE_NAME" =~ ^end ]]; then - trap drop_db EXIT -fi +function run_regression_test { + # Exclude debug/inconsistent output from psql + # So tests will always have the same output + psql -U ${DB_USER} \ + -v ON_ERROR_STOP=1 \ + -v VERBOSITY=terse \ + -v ECHO=all \ + "$@" -d ${TEST_CASE_DB} 2>&1 | \ + sed -e 's! Memory: [0-9]\{1,\}kB!!' \ + -e 's! Memory Usage: [0-9]\{1,\}kB!!' \ + -e 's! Average Peak Memory: [0-9]\{1,\}kB!!' \ + -e 's! time=[0-9]\+\.[0-9]\+\.\.[0-9]\+\.[0-9]\+!!' | \ + grep -v 'DEBUG: rehashing catalog cache id' | \ + grep -Gv '^ Planning Time:' | \ + grep -Gv '^ Execution Time:' | \ + # Only print debug messages followed by LANTERN + perl -nle'print if !m{DEBUG:(?!.*LANTERN)}' +} -# Change directory to sql directory so sql imports will work correctly + +# Change directory to sql so sql imports will work correctly cd sql/ # install lantern extension -# if tests are parallel we only do this for the begin tests as we won't be dropping the database until the end -# begin will handle initialization specific to the tests but expects the database already exists -if [ "$PARALLEL" -eq 0 ] || ( [[ "$TESTFILE_NAME" =~ ^begin ]] && [ "$PARALLEL" -eq 1 ] ); then - psql "$@" -U ${DB_USER} -d postgres -v ECHO=none -q -c "DROP DATABASE IF EXISTS ${TEST_CASE_DB};" 2>/dev/null - psql "$@" -U ${DB_USER} -d postgres -v ECHO=none -q -c "CREATE DATABASE ${TEST_CASE_DB};" 2>/dev/null - psql "$@" -U ${DB_USER} -d ${TEST_CASE_DB} -v ECHO=none -q -c "SET client_min_messages=error; CREATE EXTENSION lantern;" 2>/dev/null - psql "$@" -U ${DB_USER} -d ${TEST_CASE_DB} -v ECHO=none -q -f utils/common.sql 2>/dev/null +if [[ "$PARALLEL" -eq 0 || "$TESTFILE_NAME" == "begin" ]]; then + psql "$@" -U ${DB_USER} -d postgres -v ECHO=none -q -c "DROP DATABASE IF EXISTS ${TEST_CASE_DB};" 2>/dev/null + psql "$@" -U ${DB_USER} -d postgres -v ECHO=none -q -c "CREATE DATABASE ${TEST_CASE_DB};" 2>/dev/null fi +if [ ! -z "$UPDATE_EXTENSION" ] +then + if [ -z "$UPDATE_FROM" ] || [ -z "$UPDATE_TO" ] + then + echo "ERROR: UPDATE_FROM and UPDATE_TO environment variables must be set before test_runner.sh whenever UPDATE_EXTENSION is set" + exit 1 + fi -# Exclude debug/inconsistent output from psql -# So tests will always have the same output -psql -U ${DB_USER} \ - -v ON_ERROR_STOP=1 \ - -v VERBOSITY=terse \ - -v ECHO=all \ - "$@" -d ${TEST_CASE_DB} 2>&1 | \ - sed -e 's! Memory: [0-9]\{1,\}kB!!' \ - -e 's! Memory Usage: [0-9]\{1,\}kB!!' \ - -e 's! Average Peak Memory: [0-9]\{1,\}kB!!' \ - -e 's! time=[0-9]\+\.[0-9]\+\.\.[0-9]\+\.[0-9]\+!!' | \ - grep -v 'DEBUG: rehashing catalog cache id' | \ - grep -Gv '^ Planning Time:' | \ - grep -Gv '^ Execution Time:' | \ - # Only print debug messages followed by LANTERN - perl -nle'print if !m{DEBUG:(?!.*LANTERN)}' + # install the old version of the extension and sanity-check that all tests pass + psql "$@" -U ${DB_USER} -d ${TEST_CASE_DB} -v ECHO=none -q -c "SET client_min_messages=error; CREATE EXTENSION IF NOT EXISTS lantern VERSION '$UPDATE_FROM';" 2>/dev/null + psql "$@" -U ${DB_USER} -d ${TEST_CASE_DB} -v ECHO=none -q -f utils/common.sql 2>/dev/null + run_regression_test $@ + # upgrade to the new version of the extension and make sure that all existing tests still pass + # todo:: this approach currently is broken for pgvector-compat related upgrade scripts as that regression test drops + # and recreates the extension so whatever we do here is ignored + psql "$@" -U ${DB_USER} -d ${TEST_CASE_DB} -v ECHO=none -q -c "SET client_min_messages=error; ALTER EXTENSION lantern UPDATE TO '$UPDATE_TO';" 2>/dev/null + run_regression_test $@ +else + + psql "$@" -U ${DB_USER} -d ${TEST_CASE_DB} -v ECHO=none -q -c "SET client_min_messages=error; CREATE EXTENSION lantern;" 2>/dev/null + psql "$@" -U ${DB_USER} -d ${TEST_CASE_DB} -v ECHO=none -q -f utils/common.sql 2>/dev/null + + run_regression_test $@ +fi