diff --git a/Dockerfile b/Dockerfile index c70dd81b..8f2ecb3d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,9 +9,9 @@ RUN mkdir -p /assets/ && cd /assets && \ curl -OL https://downloads.datastax.com/enterprise/cqlsh-astra.tar.gz && \ tar -xzf ./cqlsh-astra.tar.gz && \ rm ./cqlsh-astra.tar.gz && \ - curl -OL https://archive.apache.org/dist/spark/spark-3.4.2/spark-3.4.2-bin-hadoop3-scala2.13.tgz && \ - tar -xzf ./spark-3.4.2-bin-hadoop3-scala2.13.tgz && \ - rm ./spark-3.4.2-bin-hadoop3-scala2.13.tgz + curl -OL https://archive.apache.org/dist/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3-scala2.13.tgz && \ + tar -xzf ./spark-3.5.1-bin-hadoop3-scala2.13.tgz && \ + rm ./spark-3.5.1-bin-hadoop3-scala2.13.tgz RUN apt-get update && apt-get install -y openssh-server vim python3 --no-install-recommends && \ rm -rf /var/lib/apt/lists/* && \ @@ -46,7 +46,7 @@ RUN chmod +x ./get-latest-maven-version.sh && \ rm -rf "$USER_HOME_DIR/.m2" # Add all migration tools to path -ENV PATH="${PATH}:/assets/dsbulk/bin/:/assets/cqlsh-astra/bin/:/assets/spark-3.4.2-bin-hadoop3-scala2.13/bin/" +ENV PATH="${PATH}:/assets/dsbulk/bin/:/assets/cqlsh-astra/bin/:/assets/spark-3.5.1-bin-hadoop3-scala2.13/bin/" EXPOSE 22 diff --git a/README.md b/README.md index b997ef81..28cd4ce0 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ Migrate and Validate Tables between Origin and Target Cassandra Clusters. -> :warning: Please note this job has been tested with spark version [3.4.2](https://archive.apache.org/dist/spark/spark-3.4.2/) +> :warning: Please note this job has been tested with spark version [3.5.1](https://archive.apache.org/dist/spark/spark-3.5.1/) ## Install as a Container - Get the latest image that includes all dependencies from [DockerHub](https://hub.docker.com/r/datastax/cassandra-data-migrator) @@ -18,10 +18,10 @@ Migrate and Validate Tables between Origin and Target Cassandra Clusters. ### Prerequisite - Install **Java11** (minimum) as Spark binaries are compiled with it. -- Install Spark version [`3.4.2`](https://archive.apache.org/dist/spark/spark-3.4.2/spark-3.4.2-bin-hadoop3-scala2.13.tgz) on a single VM (no cluster necessary) where you want to run this job. Spark can be installed by running the following: - +- Install Spark version [`3.5.1`](https://archive.apache.org/dist/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3-scala2.13.tgz) on a single VM (no cluster necessary) where you want to run this job. Spark can be installed by running the following: - ``` -wget https://archive.apache.org/dist/spark/spark-3.4.2/spark-3.4.2-bin-hadoop3-scala2.13.tgz -tar -xvzf spark-3.4.2-bin-hadoop3-scala2.13.tgz +wget https://archive.apache.org/dist/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3-scala2.13.tgz +tar -xvzf spark-3.5.1-bin-hadoop3-scala2.13.tgz ``` > :warning: If the above Spark and Scala version is not properly installed, you'll then see a similar exception like below when running the CDM jobs, diff --git a/RELEASE.md b/RELEASE.md index e8965c57..1b50d14a 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,4 +1,7 @@ # Release Notes +## [4.1.13] - 2024-02-27 +- Upgraded to use Spark `3.5.1`. + ## [4.1.12] - 2024-01-22 - Upgraded to use Spark `3.4.2`. - Added Java `11` as the minimally required pre-requisite to run CDM jobs. diff --git a/pom.xml b/pom.xml index 4fd43d89..59c3fd6e 100644 --- a/pom.xml +++ b/pom.xml @@ -10,9 +10,9 @@ UTF-8 2.13.12 2.13 - 3.4.1 - 3.4.1 - 5.0-alpha1 + 3.5.1 + 3.5.0 + 5.0-beta1 5.9.1 4.11.0 4.17.0 @@ -198,7 +198,7 @@ net.alchim31.maven scala-maven-plugin - 4.8.0 + 4.8.1 process-sources diff --git a/rat-excludes.txt b/rat-excludes.txt index 90a59e8c..90265407 100644 --- a/rat-excludes.txt +++ b/rat-excludes.txt @@ -6,6 +6,7 @@ .github/workflows/maven.yml .github/workflows/snyk-cli-scan.yml .github/workflows/snyk-pr-cleanup.yml +.github/workflows/dependabot.yml README.md rat-excludes.txt pom.xml @@ -19,7 +20,9 @@ Dockerfile .snyk .snyk.ignore.example PERF/* +PERF/*/*/output/* SIT/* +SIT/*/*/output/* scripts/* test-backup/feature/* src/resources/partitions.csv @@ -81,6 +84,7 @@ SIT/smoke/04_counters/cdm.validateData.assert SIT/smoke/04_counters/cdm.fixForce.assert SIT/smoke/05_reserved_keyword/cdm.txt SIT/smoke/05_reserved_keyword/expected.out +SIT/smoke_inflight/06_vector/cdm.sh PERF/logs/scenario_20230523_162859_122.log PERF/logs/scenario_20230523_162126_056.log PERF/logs/scenario_20230523_162204_904.log diff --git a/src/resources/migrate_data.sh b/src/resources/migrate_data.sh index 3147cba6..e285cbf6 100644 --- a/src/resources/migrate_data.sh +++ b/src/resources/migrate_data.sh @@ -35,7 +35,7 @@ ########################################################################################################################### # Path to spark-submit -SPARK_SUBMIT=/home/ubuntu/spark-3.4.2-bin-hadoop3-scala2.13/bin/spark-submit +SPARK_SUBMIT=/home/ubuntu/spark-3.5.1-bin-hadoop3-scala2.13/bin/spark-submit # Path to spark configuration for the table PROPS_FILE=/home/ubuntu/sparkConf.properties