From 02e2aca75b0d0d029bbb2161b0e782cb1901dbf4 Mon Sep 17 00:00:00 2001 From: Pravin Bhat Date: Thu, 3 Oct 2024 08:53:21 -0400 Subject: [PATCH 1/3] Implemented capability to separate diff logs via log4j2 --- README.md | 15 ++++- RELEASE.md | 3 +- pom.xml | 12 ++++ .../cdm/properties/PropertyHelper.java | 3 - src/resources/log4j.properties | 22 -------- src/resources/log4j.xml | 16 ------ src/resources/log4j2.properties | 55 +++++++++++++++++++ 7 files changed, 82 insertions(+), 44 deletions(-) delete mode 100644 src/resources/log4j.properties delete mode 100644 src/resources/log4j.xml create mode 100644 src/resources/log4j2.properties diff --git a/README.md b/README.md index 3a209e1c..c0aa0a15 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,7 @@ Note: --class com.datastax.cdm.job.DiffData cassandra-data-migrator-4.x.x.jar &> logfile_name_$(date +%Y%m%d_%H_%M).txt ``` -- Validation job will report differences as “ERRORS” in the log file as shown below +- Validation job will report differences as “ERRORS” in the log file as shown below. ``` 23/04/06 08:43:06 ERROR DiffJobSession: Mismatch row found for key: [key3] Mismatch: Target Index: 1 Origin: valueC Target: value999) @@ -79,6 +79,17 @@ Note: - Please grep for all `ERROR` from the output log files to get the list of missing and mismatched records. - Note that it lists differences by primary-key values. +- If you would like to redirect such logs into a separate file, you could use the `log4j2.properties` file [provided here](./src/resources/log4j2.properties) as shown below + +``` +./spark-submit --properties-file cdm.properties \ +--conf spark.cdm.schema.origin.keyspaceTable="." \ +--conf "spark.executor.extraJavaOptions='-Dlog4j.configurationFile=log4j2.properties'" \ +--conf "spark.driver.extraJavaOptions='-Dlog4j.configurationFile=log4j2.properties'" \ +--master "local[*]" --driver-memory 25G --executor-memory 25G \ +--class com.datastax.cdm.job.DiffData cassandra-data-migrator-4.x.x.jar &> logfile_name_$(date +%Y%m%d_%H_%M).txt +``` + - The Validation job can also be run in an AutoCorrect mode. This mode can - Add any missing records from origin to target - Update any mismatched records between origin and target (makes target same as origin). @@ -102,7 +113,7 @@ Note: ``` # Perform large-field Guardrail violation checks -- The tool can be used to identify large fields from a table that may break you cluster guardrails (e.g. AstraDB has a 10MB limit for a single large field) `--class com.datastax.cdm.job.GuardrailCheck` as shown below +- The tool can be used to identify large fields from a table that may break you cluster guardrails (e.g. AstraDB has a 10MB limit for a single large field), use class option `--class com.datastax.cdm.job.GuardrailCheck` as shown below ``` ./spark-submit --properties-file cdm.properties \ diff --git a/RELEASE.md b/RELEASE.md index e3b604e4..f4ba4470 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,5 +1,6 @@ # Release Notes -## [4.4.2] - 2024-10-TBD +## [4.4.2] - 2024-10-03 +- Upgraded to use log4j 2.x and included a template properties file that will help separate general logs from CDM class specific logs including a separate log for rows identified by `DiffData` (Validation) errors. - Upgraded to use Spark `3.5.3`. ## [4.4.1] - 2024-09-20 diff --git a/pom.xml b/pom.xml index 25aa2302..2f8bf1e1 100644 --- a/pom.xml +++ b/pom.xml @@ -45,6 +45,18 @@ log4j log4j + + org.apache.logging.log4j + log4j-api + + + org.apache.logging.log4j + log4j-core + + + org.apache.logging.log4j + log4j-1.2-api + org.apache.logging.log4j log4j-slf4j-impl diff --git a/src/main/java/com/datastax/cdm/properties/PropertyHelper.java b/src/main/java/com/datastax/cdm/properties/PropertyHelper.java index e93d8aa6..9f8f03fc 100644 --- a/src/main/java/com/datastax/cdm/properties/PropertyHelper.java +++ b/src/main/java/com/datastax/cdm/properties/PropertyHelper.java @@ -173,8 +173,6 @@ public List getNumberList(String propertyName) { @Override public List getIntegerList(String propertyName) { - List intList = new ArrayList<>(); - Integer i; if (null == propertyName || PropertyType.NUMBER_LIST != getType(propertyName) || null == getNumberList(propertyName)) return null; @@ -188,7 +186,6 @@ public Boolean getBoolean(String propertyName) { @Override public String getAsString(String propertyName) { - String rtn; if (null == propertyName) return null; PropertyType t = getType(propertyName); diff --git a/src/resources/log4j.properties b/src/resources/log4j.properties deleted file mode 100644 index 3d2af4ff..00000000 --- a/src/resources/log4j.properties +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright DataStax, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# Root logger option -log4j.rootLogger=INFO, stdout -# Direct log messages to stdout -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.Target=System.out -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p [THREAD ID=%t] %c{1}:%L - %m%n diff --git a/src/resources/log4j.xml b/src/resources/log4j.xml deleted file mode 100644 index 393d8b7f..00000000 --- a/src/resources/log4j.xml +++ /dev/null @@ -1,16 +0,0 @@ - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/src/resources/log4j2.properties b/src/resources/log4j2.properties new file mode 100644 index 00000000..84aee1fd --- /dev/null +++ b/src/resources/log4j2.properties @@ -0,0 +1,55 @@ +# Copyright DataStax, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +appender.0.type = Console +appender.0.name = CONSOLE +appender.0.layout.type = PatternLayout +appender.0.layout.pattern = %d %-5p [%t] %c{1}:%L - %m%n + +appender.1.type = RollingFile +appender.1.name = MAIN +appender.1.fileName = cdm_logs/cdm.log +appender.1.filePattern = cdm_logs/cdm.%d{yyyy-MM-dd-HHmm}.%i.log +appender.1.layout.type = PatternLayout +appender.1.layout.pattern = %d %-5p [%t] %c{1}:%L - %m%n +appender.1.policy.type = Policies +appender.1.policy.0.type = OnStartupTriggeringPolicy +appender.1.policy.1.type = SizeBasedTriggeringPolicy +appender.1.policy.1.size = 10m + +appender.2.type = RollingFile +appender.2.name = DIFF +appender.2.fileName = cdm_logs/cdm_diff.log +appender.2.filePattern = cdm_logs/cdm_diff.%d{yyyy-MM-dd-HHmm}.%i.log +appender.2.layout.type = PatternLayout +appender.2.layout.pattern = %d %-5p [%t] %c{1}:%L - %m%n +appender.2.policy.type = Policies +appender.2.policy.0.type = OnStartupTriggeringPolicy +appender.2.policy.1.type = SizeBasedTriggeringPolicy +appender.2.policy.1.size = 10m + +rootLogger.level = INFO +rootLogger.appenderRef.0.ref = CONSOLE +rootLogger.appenderRef.0.level = INFO + +logger.0.name = com.datastax.cdm +logger.0.level = INFO +logger.0.additivity = false +logger.0.appenderRef.0.ref = MAIN + +logger.1.name = com.datastax.cdm.job.DiffJobSession +logger.1.level = ERROR +logger.1.additivity = false +logger.1.appenderRef.0.ref = DIFF From ae1be3f4468d9a93bcc63e2ec709202d8fc33e8d Mon Sep 17 00:00:00 2001 From: Pravin Bhat Date: Thu, 3 Oct 2024 14:21:18 -0400 Subject: [PATCH 2/3] Minor changes to log config --- RELEASE.md | 2 +- src/resources/log4j2.properties | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/RELEASE.md b/RELEASE.md index f4ba4470..44700488 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,5 +1,5 @@ # Release Notes -## [4.4.2] - 2024-10-03 +## [4.5.0] - 2024-10-03 - Upgraded to use log4j 2.x and included a template properties file that will help separate general logs from CDM class specific logs including a separate log for rows identified by `DiffData` (Validation) errors. - Upgraded to use Spark `3.5.3`. diff --git a/src/resources/log4j2.properties b/src/resources/log4j2.properties index 84aee1fd..67cffaea 100644 --- a/src/resources/log4j2.properties +++ b/src/resources/log4j2.properties @@ -28,6 +28,8 @@ appender.1.policy.type = Policies appender.1.policy.0.type = OnStartupTriggeringPolicy appender.1.policy.1.type = SizeBasedTriggeringPolicy appender.1.policy.1.size = 10m +appender.1.strategy.type = DefaultRolloverStrategy +appender.1.strategy.max = 100 appender.2.type = RollingFile appender.2.name = DIFF @@ -39,6 +41,8 @@ appender.2.policy.type = Policies appender.2.policy.0.type = OnStartupTriggeringPolicy appender.2.policy.1.type = SizeBasedTriggeringPolicy appender.2.policy.1.size = 10m +appender.2.strategy.type = DefaultRolloverStrategy +appender.2.strategy.max = 100 rootLogger.level = INFO rootLogger.appenderRef.0.ref = CONSOLE From ae57f2c3acaac45cfb94cc1b5160018e5c0b04a0 Mon Sep 17 00:00:00 2001 From: Pravin Bhat Date: Fri, 4 Oct 2024 06:17:00 -0400 Subject: [PATCH 3/3] Log4j2 fixes --- SIT/cdm.sh | 8 ++++---- src/resources/log4j2_docker.properties | 23 +++++++++++++++++++++++ 2 files changed, 27 insertions(+), 4 deletions(-) create mode 100644 src/resources/log4j2_docker.properties diff --git a/SIT/cdm.sh b/SIT/cdm.sh index f4e22a8f..1e5c45a6 100755 --- a/SIT/cdm.sh +++ b/SIT/cdm.sh @@ -91,14 +91,14 @@ if [ $argErrors -ne 0 ]; then _usage fi -if [ ! -f /local/log4j.xml ]; then - cd /local && jar xvf /local/cassandra-data-migrator.jar log4j.xml && cd - +if [ ! -f /local/log4j2_docker.properties ]; then + cd /local && jar xvf /local/cassandra-data-migrator.jar log4j2_docker.properties && cd - fi spark-submit --properties-file "${PROPERTIES}" \ --master "local[*]" \ - --conf "spark.driver.extraJavaOptions=-Dlog4j.configurationFile=file:///local/log4j.xml -Dcom.datastax.cdm.log.level=DEBUG" \ - --conf "spark.executor.extraJavaOptions=-Dlog4j.configurationFile=file:///local/log4j.xml -Dcom.datastax.cdm.log.level=DEBUG" \ + --conf "spark.driver.extraJavaOptions=-Dlog4j.configurationFile=file:///local/log4j2_docker.properties -Dcom.datastax.cdm.log.level=DEBUG" \ + --conf "spark.executor.extraJavaOptions=-Dlog4j.configurationFile=file:///local/log4j2_docker.properties -Dcom.datastax.cdm.log.level=DEBUG" \ --class ${CLASS} \ /local/cassandra-data-migrator.jar diff --git a/src/resources/log4j2_docker.properties b/src/resources/log4j2_docker.properties new file mode 100644 index 00000000..e03ea87d --- /dev/null +++ b/src/resources/log4j2_docker.properties @@ -0,0 +1,23 @@ +# Copyright DataStax, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +appender.0.type = Console +appender.0.name = CONSOLE +appender.0.layout.type = PatternLayout +appender.0.layout.pattern = %d %-5p [%t] %c{1}:%L - %m%n + +rootLogger.level = INFO +rootLogger.appenderRef.0.ref = CONSOLE +rootLogger.appenderRef.0.level = INFO