From 63583da3e616a161d619237ba1138a3c16da9182 Mon Sep 17 00:00:00 2001 From: Yingchun Lai Date: Tue, 5 Dec 2023 12:02:32 +0800 Subject: [PATCH] feat: source config_hdfs.sh automatically before starting onebox (#1708) When run Pegasus to use the feature of backup data to HDFS (restore data from HDFS is the same), it's needed to set the CLASSPATH enviroment variables, now it is done by scripts/config_hdfs.sh. However, we have to source this script manually before starting onebox. This patch sources the script in ./run.sh automatically, updates config_hdfs.sh to download hadoop package is it's not exist, and removes explicilt calling of scripts/config_hdfs.sh in CI. --- .github/workflows/lint_and_test_cpp.yaml | 4 ---- run.sh | 3 ++- scripts/config_hdfs.sh | 16 +++++++++++++--- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/.github/workflows/lint_and_test_cpp.yaml b/.github/workflows/lint_and_test_cpp.yaml index 1f5af4ee2d..d780f683e8 100644 --- a/.github/workflows/lint_and_test_cpp.yaml +++ b/.github/workflows/lint_and_test_cpp.yaml @@ -252,7 +252,6 @@ jobs: run: | export LD_LIBRARY_PATH=`pwd`/thirdparty/output/lib:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/amd64/server ulimit -s unlimited - . ./scripts/config_hdfs.sh ./run.sh test --onebox_opts "$ONEBOX_OPTS" --test_opts "$TEST_OPTS" -m ${{ matrix.test_module }} build_ASAN: @@ -392,7 +391,6 @@ jobs: run: | export LD_LIBRARY_PATH=`pwd`/thirdparty/output/lib:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/amd64/server ulimit -s unlimited - . ./scripts/config_hdfs.sh ./run.sh test --onebox_opts "$ONEBOX_OPTS" --test_opts "$TEST_OPTS" -m ${{ matrix.test_module }} # TODO(yingchun): Build and test UBSAN version would cost a very long time, we will run these tests @@ -534,7 +532,6 @@ jobs: # run: | # export LD_LIBRARY_PATH=`pwd`/thirdparty/output/lib:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/amd64/server # ulimit -s unlimited -# . ./scripts/config_hdfs.sh # ./run.sh test --onebox_opts "$ONEBOX_OPTS" --test_opts "$TEST_OPTS" -m ${{ matrix.test_module }} build_with_jemalloc: @@ -647,7 +644,6 @@ jobs: run: | export LD_LIBRARY_PATH=`pwd`/thirdparty/output/lib:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/amd64/server ulimit -s unlimited - . ./scripts/config_hdfs.sh ./run.sh test --onebox_opts "$ONEBOX_OPTS" --test_opts "$TEST_OPTS" -m ${{ matrix.test_module }} build_pegasus_on_macos: diff --git a/run.sh b/run.sh index 3c86f669ca..dd81061022 100755 --- a/run.sh +++ b/run.sh @@ -784,7 +784,7 @@ function run_start_onebox() exit 1 fi - echo "HDFS_SERVICE_ARGS $HDFS_SERVICE_ARGS" + source "${ROOT}"/scripts/config_hdfs.sh if [ $USE_PRODUCT_CONFIG == "true" ]; then [ -z "${CONFIG_FILE}" ] && CONFIG_FILE=${ROOT}/src/server/config.ini [ ! -f "${CONFIG_FILE}" ] && { echo "${CONFIG_FILE} is not exist"; exit 1; } @@ -1028,6 +1028,7 @@ function run_start_onebox_instance() esac shift done + source "${ROOT}"/scripts/config_hdfs.sh if [ $META_ID = "0" -a $REPLICA_ID = "0" -a $COLLECTOR_ID = "0" ]; then echo "ERROR: no meta_id or replica_id or collector set" exit 1 diff --git a/scripts/config_hdfs.sh b/scripts/config_hdfs.sh index caa1cb342e..0ad1260c61 100755 --- a/scripts/config_hdfs.sh +++ b/scripts/config_hdfs.sh @@ -19,6 +19,8 @@ set -e +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +ROOT=$(dirname "${SCRIPT_DIR}") # This file should be sourced to set up LD_LIBRARY_PATH and CLASSPATH to # run Pegasus binaries which use libhdfs in the context of a dev environment. @@ -39,11 +41,19 @@ fi JAVA_JVM_LIBRARY_DIR=$(dirname $(find "${JAVA_HOME}/" -name libjvm.so | head -1)) export LD_LIBRARY_PATH=${JAVA_JVM_LIBRARY_DIR}:$LD_LIBRARY_PATH +if [ ! -d "$HADOOP_HOME" ]; then + PEGASUS_HADOOP_HOME=${ROOT}/hadoop-bin + if [ ! -d "$PEGASUS_HADOOP_HOME" ]; then + "${SCRIPT_DIR}"/download_hadoop.sh "${PEGASUS_HADOOP_HOME}" + fi + + # Set the HADOOP_HOME to the pegasus's hadoop directory. + export HADOOP_HOME="${PEGASUS_HADOOP_HOME}" + echo "set HADOOP_HOME to ${PEGASUS_HADOOP_HOME}" +fi + # Set CLASSPATH to all the Hadoop jars needed to run Hadoop itself as well as # the right configuration directory containing core-site.xml or hdfs-site.xml. -PEGASUS_HADOOP_HOME=`pwd`/hadoop-bin -# Prefer the HADOOP_HOME set in the environment, but use the pegasus's hadoop dir otherwise. -export HADOOP_HOME="${HADOOP_HOME:-${PEGASUS_HADOOP_HOME}}" if [ ! -d "$HADOOP_HOME/etc/hadoop" ] || [ ! -d "$HADOOP_HOME/share/hadoop" ]; then echo "HADOOP_HOME must be set to the location of your Hadoop jars and core-site.xml." return 1