Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add database version updates #193

Merged
merged 6 commits into from
Oct 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,16 @@ jobs:
env:
PG_VERSION: ${{ matrix.postgres }}
if: ${{ startsWith(matrix.os, 'ubuntu') }}
- name: Run update tests linux
id: update-test-linux
run: |
sudo pip install GitPython &&\
# Start postgres
sudo su postgres -c "PG_VERSION=$PG_VERSION RUN_TESTS=0 ./ci/scripts/run-tests-linux.sh" && \
sudo su -c "PG_VERSION=$PG_VERSION python3 ./scripts/test_updates.py -U postgres"
env:
PG_VERSION: ${{ matrix.postgres }}
if: ${{ startsWith(matrix.os, 'ubuntu') }}
- name: Run tests mac
id: test-mac
run: ./ci/scripts/run-tests-mac.sh
Expand Down
3 changes: 1 addition & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,7 @@ endif()

set(_script_file "lantern--${LANTERNDB_VERSION}.sql")
set (_update_files
sql/updates/0.0.1--0.0.2.sql
sql/updates/0.0.2--0.0.3.sql)
sql/updates/0.0.4--latest.sql)

add_custom_command(
OUTPUT ${CMAKE_BINARY_DIR}/${_script_file}
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile.dev
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ RUN apt update \
git-all \
tmux \
clang-format \
&& pip install libtmux --break-system-packages && \
&& pip install GitPython libtmux --break-system-packages && \
wget -O pgvector.tar.gz https://github.com/pgvector/pgvector/archive/refs/tags/v${PGVECTOR_VERSION}.tar.gz && \
tar xzf pgvector.tar.gz && \
cd pgvector-${PGVECTOR_VERSION} && \
Expand Down
45 changes: 36 additions & 9 deletions ci/scripts/run-tests-linux.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@ set -e
WORKDIR=/tmp/lantern
GITHUB_OUTPUT=${GITHUB_OUTPUT:-/dev/null}
PG_VERSION=${PG_VERSION:-15}
RUN_TESTS=${RUN_TESTS:-1}

export PGDATA=/etc/postgresql/$PG_VERSION/main

wait_for_pg(){
function wait_for_pg(){
tries=0
until pg_isready -U postgres 2>/dev/null; do
if [ $tries -eq 10 ];
Expand All @@ -21,12 +22,38 @@ wait_for_pg(){
done
}

# Set port
echo "port = 5432" >> ${PGDATA}/postgresql.conf
# Run postgres database
GCOV_PREFIX=$WORKDIR/build/CMakeFiles/lantern.dir/ GCOV_PREFIX_STRIP=5 POSTGRES_HOST_AUTH_METHOD=trust /usr/lib/postgresql/$PG_VERSION/bin/postgres 1>/tmp/pg-out.log 2>/tmp/pg-error.log &
function run_db_tests(){
if [[ "$RUN_TESTS" == "1" ]]
then
cd $WORKDIR/build && \
make test && \
killall postgres && \
gcovr -r $WORKDIR/src/ --object-directory $WORKDIR/build/ --xml /tmp/coverage.xml
fi
}

function start_pg() {
pg_response=$(pg_isready -U postgres 2>&1)

if [[ $pg_response == *"accepting"* ]]; then
echo "Postgres already running"
elif [[ $pg_response == *"rejecting"* ]]; then
echo "Postgres process is being killed retrying..."
sleep 1
start_pg
else
echo "port = 5432" >> ${PGDATA}/postgresql.conf
# Enable auth without password
echo "local all all trust" > $PGDATA/pg_hba.conf
echo "host all all 127.0.0.1/32 trust" >> $PGDATA/pg_hba.conf
echo "host all all ::1/128 trust" >> $PGDATA/pg_hba.conf


# Set port
echo "port = 5432" >> ${PGDATA}/postgresql.conf
# Run postgres database
GCOV_PREFIX=$WORKDIR/build/CMakeFiles/lantern.dir/ GCOV_PREFIX_STRIP=5 POSTGRES_HOST_AUTH_METHOD=trust /usr/lib/postgresql/$PG_VERSION/bin/postgres 1>/tmp/pg-out.log 2>/tmp/pg-error.log &
fi
}
# Wait for start and run tests
wait_for_pg && cd $WORKDIR/build && make test && \
make test-parallel && \
killall postgres && \
gcovr -r $WORKDIR/src/ --object-directory $WORKDIR/build/ --xml /tmp/coverage.xml
start_pg && wait_for_pg && run_db_tests
50 changes: 41 additions & 9 deletions scripts/run_all_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,15 @@ PSQL=psql
TMP_ROOT=/tmp/lantern
TMP_OUTDIR=$TMP_ROOT/tmp_output
FILTER="${FILTER:-}"
EXCLUDE="${EXCLUDE:-}"
# $USER is not set in docker containers, so use whoami
DEFAULT_USER=$(whoami)

if [[ -n "$FILTER" && -n "$EXCLUDE" ]]; then
echo "-FILTER and -EXCLUDE cannot be used together, please use only one"
exit 1
fi

# typically default user is root in a docker container
# and in those cases postgres is the user with appropriate permissions
# to the database
Expand Down Expand Up @@ -78,28 +84,54 @@ while [[ "$#" -gt 0 ]]; do
shift
done

FIRST_TEST=1
function print_test {
if [ "$PARALLEL" -eq 1 ]; then
if [ $1 == end ]; then
echo -e "\ntest: $1" >> $2
elif [ $1 == begin ]; then
echo -e "\ntest: $1" >> $2
else
if [ "$FIRST_TEST" -eq 1 ]; then
echo -n "test: $1" >> $2
FIRST_TEST=0
else
echo -n " $1" >> $2
fi
fi
else
echo "test: $1" >> $2
fi
}

# Generate schedule.txt
rm -rf $TMP_OUTDIR/schedule.txt
if [ "$PARALLEL" -eq 1 ]; then
SCHEDULE='parallel_schedule.txt'
else
SCHEDULE='schedule.txt'
fi
if [ -n "$FILTER" ]; then
if [[ -n "$FILTER" || -n "$EXCLUDE" ]]; then
if [ "$PARALLEL" -eq 1 ]; then
TEST_FILES=$(cat $SCHEDULE | grep -E '^(test:|test_begin:|test_end:)' | sed -E -e 's/^test:|test_begin:|test_end://' | tr " " "\n" | sed -e '/^$/d')
else
if [[ "$pgvector_installed" == "1" ]]; then
TEST_FILES=$(cat $SCHEDULE | grep -E '^(test:|test_pgvector:)' | sed -E -e 's/^test:|test_pgvector://' | tr " " "\n" | sed -e '/^$/d')
else
TEST_FILES=$(cat $SCHEDULE | grep '^test:' | sed -e 's/^test://' | tr " " "\n" | sed -e '/^$/d')
fi
if [[ "$pgvector_installed" == "1" ]]; then
TEST_FILES=$(cat $SCHEDULE | grep -E '^(test:|test_pgvector:)' | sed -E -e 's/^test:|test_pgvector://' | tr " " "\n" | sed -e '/^$/d')
else
TEST_FILES=$(cat $SCHEDULE | grep '^test:' | sed -e 's/^test://' | tr " " "\n" | sed -e '/^$/d')
fi
fi

while IFS= read -r f; do
if [[ $f == *"$FILTER"* ]]; then
echo "HERE $f"
echo "test: $f" >> $TMP_OUTDIR/schedule.txt
if [ -n "$FILTER" ]; then
if [[ $f == *"$FILTER"* ]]; then
print_test $f $TMP_OUTDIR/schedule.txt $FIRST_TEST
fi
elif [ -n "$EXCLUDE" ]; then
if [[ $f == *"$EXCLUDE"* ]]; then
continue
fi
print_test $f $TMP_OUTDIR/schedule.txt $FIRST_TEST
fi
done <<< "$TEST_FILES"

Expand Down
86 changes: 86 additions & 0 deletions scripts/test_updates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import argparse
import subprocess
import getpass
import git
import os


INCOMPATIBLE_VERSIONS = {
'16': ['0.0.4']
}

def update_from_tag(from_version: str, to_version: str):
from_tag = "v" + from_version
repo = git.Repo(search_parent_directories=True)
sha_before = repo.head.object.hexsha
repo.remotes[0].fetch()
repo.git.checkout(from_tag)
sha_after = repo.head.object.hexsha
print("sha_after", sha_after)

# run "mkdir build && cd build && cmake .. && make -j4 && make install"
res = subprocess.run(f"mkdir -p {args.builddir} ; cd {args.builddir} && git submodule update && cmake .. && make -j4 && make install", shell=True)
if res.returncode != 0:
if res.stderr:
print("Error building from tag" + res.stderr)
print("res stdout", res.stdout, res.stderr, res)
exit(1)

res = subprocess.run(f"psql postgres -U {args.user} -c 'DROP DATABASE IF EXISTS {args.db};'", shell=True)
res = subprocess.run(f"psql postgres -U {args.user} -c 'CREATE DATABASE {args.db};'", shell=True)
res = subprocess.run(f"psql postgres -U {args.user} -c 'DROP EXTENSION IF EXISTS lantern CASCADE; CREATE EXTENSION lantern;' -d {args.db};", shell=True)
# todo:: run init() portion of parallel tests

repo.git.checkout(sha_before)
res = subprocess.run(f"cd {args.builddir} ; git submodule update && cmake .. && make -j4 && make install && make test", shell=True)
res = subprocess.run(f"cd {args.builddir} ; UPDATE_EXTENSION=1 UPDATE_FROM={from_version} UPDATE_TO={to_version} make test", shell=True)
res = subprocess.run(f"cd {args.builddir} ; UPDATE_EXTENSION=1 UPDATE_FROM={from_version} UPDATE_TO={from_version} make test-parallel FILTER=begin", shell=True)
res = subprocess.run(f"cd {args.builddir} ; UPDATE_EXTENSION=1 UPDATE_FROM={from_version} UPDATE_TO={to_version} make test-parallel EXCLUDE=begin", shell=True)
#todo:: run query and check portion of parallel tests

def incompatible_version(pg_version, version_tag):
if not pg_version or pg_version not in INCOMPATIBLE_VERSIONS:
return False
return version_tag in INCOMPATIBLE_VERSIONS[pg_version]

if __name__ == "__main__":

default_user = getpass.getuser()

# collect the tag from command line to upgrade from

parser = argparse.ArgumentParser(description='Update from tag')
parser.add_argument('-from_tag', '--from_tag', metavar='from_tag', type=str,
help='Tag to update from', required=False)
parser.add_argument('-to_tag','--to_tag', metavar='to_tag', type=str,
help='Tag to update to', required=False)
parser.add_argument("-db", "--db", default="update_db", type=str, help="Database name used for updates")
parser.add_argument("-U", "--user", default=default_user, help="Database user")
parser.add_argument("-builddir", "--builddir", default="build_updates", help="Database user")

args = parser.parse_args()

from_tag = args.from_tag
to_tag = args.to_tag
if from_tag and to_tag:
update_from_tag(from_tag, to_tag)

if from_tag or to_tag:
print("Must specify both or neither from_tag and to_tag")
exit(1)

# test updates from all tags
from_tags = [update_fname.split("--")[0] for update_fname in os.listdir("sql/updates")]
latest_version = "0.0.5"

pg_version = None if not 'PG_VERSION' in os.environ else os.environ['PG_VERSION']
for from_tag in from_tags:
if incompatible_version(pg_version, from_tag):
continue
update_from_tag(from_tag, latest_version)






18 changes: 0 additions & 18 deletions sql/updates/0.0.1--0.0.2.sql

This file was deleted.

17 changes: 0 additions & 17 deletions sql/updates/0.0.2--0.0.3.sql

This file was deleted.

31 changes: 31 additions & 0 deletions sql/updates/0.0.4--latest.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@

DO $BODY$
DECLARE
pgvector_exists boolean;
BEGIN
-- replace is with overloaded version
-- Check if the vector type from pgvector exists
SELECT EXISTS (
SELECT 1
FROM pg_type
WHERE typname = 'vector'
) INTO pgvector_exists;

IF pgvector_exists THEN
CREATE FUNCTION l2sq_dist(vector, vector) RETURNS float8
AS 'MODULE_PATHNAME', 'vector_l2sq_dist' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;

-- change the operator class to use the new function
DROP OPERATOR CLASS dist_vec_l2sq_ops USING lantern_hnsw;
CREATE OPERATOR CLASS dist_vec_l2sq_ops
DEFAULT FOR TYPE vector USING lantern_hnsw AS
OPERATOR 1 <-> (vector, vector) FOR ORDER BY float_ops,
FUNCTION 1 l2sq_dist(vector, vector);

-- drop the old implementation
DROP FUNCTION IF EXISTS vector_l2sq_dist(vector, vector);

END IF;
END;
$BODY$
LANGUAGE plpgsql;
53 changes: 0 additions & 53 deletions sql/updates/0.0.4-latest.sql

This file was deleted.

Loading