From a51c331e6da5a0437b2555a1eda94f7bce62ea54 Mon Sep 17 00:00:00 2001 From: Ranga Reddy Date: Thu, 26 Sep 2024 22:14:59 +0530 Subject: [PATCH] [529] Build xtable with scala version(s) --- Dockerfile | 2 +- README.md | 8 +- demo/start_demo.sh | 2 +- pom.xml | 77 +++++++++++++++---- website/docs/biglake-metastore.md | 4 +- website/docs/fabric.md | 2 +- website/docs/glue-catalog.md | 4 +- website/docs/hms.md | 4 +- website/docs/how-to.md | 4 +- website/docs/unity-catalog.md | 4 +- xtable-core/pom.xml | 18 +++-- .../xtable-hudi-support-extensions/pom.xml | 14 ++-- xtable-utilities/pom.xml | 13 ++-- 13 files changed, 107 insertions(+), 49 deletions(-) diff --git a/Dockerfile b/Dockerfile index c749651ac..970332940 100644 --- a/Dockerfile +++ b/Dockerfile @@ -23,7 +23,7 @@ WORKDIR /build COPY ./ ./ RUN --mount=type=cache,target=/root/.m2 \ MAVEN_OPTS=-Dorg.slf4j.simpleLogger.defaultLogLevel=warn mvn -B package -DskipTests -RUN mv xtable-utilities/target/xtable-utilities-$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout)-bundled.jar target/app.jar +RUN mv xtable-utilities/target/xtable-utilities_2.12-$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout)-bundled.jar target/app.jar FROM eclipse-temurin:17-jre-jammy AS final diff --git a/README.md b/README.md index 9eee56f8c..8d31df474 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,10 @@ future. by something like `mvn test -Dtest=TestDeltaSync -pl xtable-core`. 4. Similarly, use `mvn clean verify` or `mvn verify` to run integration tests. +**Note:** When using Maven version 3.9 or above, Maven automatically caches the build. To ignore build caching, you can +add the `-Dmaven.build.cache.enabled=false` parameter. For example, `mvn clean package -DskipTests -Dmaven.build.cache.enabled=false` + + # Style guide 1. We use [Maven Spotless plugin](https://github.com/diffplug/spotless/tree/main/plugin-maven) and [Google java format](https://github.com/google/google-java-format) for code style. @@ -46,7 +50,7 @@ future. # Running the bundled jar 1. Get a pre-built bundled jar or create the jar with `mvn install -DskipTests` -2. create a yaml file that follows the format below: +2. Create a yaml file that follows the format below: ```yaml sourceFormat: HUDI targetFormats: @@ -110,7 +114,7 @@ catalogOptions: # all other options are passed through in a map key1: value1 key2: value2 ``` -5. run with `java -jar xtable-utilities/target/xtable-utilities-0.2.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml [--hadoopConfig hdfs-site.xml] [--convertersConfig converters.yaml] [--icebergCatalogConfig catalog.yaml]` +5. Run with `java -jar xtable-utilities/target/xtable-utilities_2.12-0.2.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml [--hadoopConfig hdfs-site.xml] [--convertersConfig converters.yaml] [--icebergCatalogConfig catalog.yaml]` The bundled jar includes hadoop dependencies for AWS, Azure, and GCP. Sample hadoop configurations for configuring the converters can be found in the [xtable-hadoop-defaults.xml](https://github.com/apache/incubator-xtable/blob/main/utilities/src/main/resources/xtable-hadoop-defaults.xml) file. The custom hadoop configurations can be passed in with the `--hadoopConfig [custom-hadoop-config-file]` option. diff --git a/demo/start_demo.sh b/demo/start_demo.sh index e2c6d4dc9..3e65d0c8f 100755 --- a/demo/start_demo.sh +++ b/demo/start_demo.sh @@ -25,7 +25,7 @@ mvn install -am -pl xtable-core -DskipTests -T 2 mkdir -p demo/jars cp xtable-hudi-support/xtable-hudi-support-utils/target/xtable-hudi-support-utils-0.2.0-SNAPSHOT.jar demo/jars cp xtable-api/target/xtable-api-0.2.0-SNAPSHOT.jar demo/jars -cp xtable-core/target/xtable-core-0.2.0-SNAPSHOT.jar demo/jars +cp xtable-core/target/xtable-core_2.12-0.2.0-SNAPSHOT.jar demo/jars cd demo docker-compose up diff --git a/pom.xml b/pom.xml index b10bf31cc..9ab04aeda 100644 --- a/pom.xml +++ b/pom.xml @@ -48,12 +48,13 @@ xtable-api + xtable-hudi-support xtable-core xtable-utilities - xtable-hudi-support + 0.2.0-SNAPSHOT 8 1.11.3 2.22.0 @@ -68,8 +69,10 @@ 3.1.1 2.5.3 1.12.2 - 2.12.15 - 2.12 + 2.12.20 + 2.13.14 + ${scala12.version} + 2.12 3.4.2 3.4 1.4.2 @@ -84,7 +87,8 @@ ${project.build.directory}/delombok 1.7 1.7 - + 2.8.1 + false ${skipTests} @@ -125,8 +129,8 @@ org.scala-lang.modules - scala-collection-compat_${scala.version.prefix} - 2.8.1 + scala-collection-compat_${scala.binary.version} + ${scala-collection-compat.version} @@ -229,7 +233,7 @@ org.apache.hudi - hudi-spark${spark.version.prefix}-bundle_${scala.version.prefix} + hudi-spark${spark.version.prefix}-bundle_${scala.binary.version} ${hudi.version} test @@ -265,7 +269,7 @@ org.apache.iceberg - iceberg-spark-runtime-${spark.version.prefix}_${scala.version.prefix} + iceberg-spark-runtime-${spark.version.prefix}_${scala.binary.version} ${iceberg.version} test @@ -273,12 +277,12 @@ io.delta - delta-core_${scala.version.prefix} + delta-core_${scala.binary.version} ${delta.version} io.delta - delta-standalone_${scala.version.prefix} + delta-standalone_${scala.binary.version} ${delta.standalone.version} test @@ -286,7 +290,7 @@ org.apache.spark - spark-core_${scala.version.prefix} + spark-core_${scala.binary.version} ${spark.version} @@ -306,7 +310,7 @@ org.apache.spark - spark-sql_${scala.version.prefix} + spark-sql_${scala.binary.version} ${spark.version} provided @@ -464,7 +468,7 @@ com.fasterxml.jackson.module - jackson-module-scala_${scala.version.prefix} + jackson-module-scala_${scala.binary.version} ${jackson.version} @@ -867,6 +871,53 @@ + + + scala-2.12 + + true + + + ${scala12.version} + 2.12 + + + + + + + + + + scala-2.13 + + false + + + ${scala13.version} + 2.13 + + + + + + net.alchim31.maven + scala-maven-plugin + + + -unchecked + -deprecation + -feature + -explaintypes + -target:jvm-1.8 + + + + + + + + release diff --git a/website/docs/biglake-metastore.md b/website/docs/biglake-metastore.md index 4ee4c2c2b..b292ea694 100644 --- a/website/docs/biglake-metastore.md +++ b/website/docs/biglake-metastore.md @@ -25,7 +25,7 @@ This document walks through the steps to register an Apache XTable™ (Incubatin export GOOGLE_APPLICATION_CREDENTIALS=/path/to/service_account_key.json ``` 5. Clone the Apache XTable™ (Incubating) [repository](https://github.com/apache/incubator-xtable) and create the - `xtable-utilities-0.2.0-SNAPSHOT-bundled.jar` by following the steps on the [Installation page](/docs/setup) + `xtable-utilities_2.12-0.2.0-SNAPSHOT-bundled.jar` by following the steps on the [Installation page](/docs/setup) 6. Download the [BigLake Iceberg JAR](gs://spark-lib/biglake/biglake-catalog-iceberg1.2.0-0.1.0-with-dependencies.jar) locally. Apache XTable™ (Incubating) requires the JAR to be present in the classpath. @@ -117,7 +117,7 @@ catalogOptions: From your terminal under the cloned Apache XTable™ (Incubating) directory, run the sync process using the below command. ```shell md title="shell" -java -cp xtable-utilities/target/xtable-utilities-0.2.0-SNAPSHOT-bundled.jar:/path/to/downloaded/biglake-catalog-iceberg1.2.0-0.1.0-with-dependencies.jar org.apache.xtable.utilities.RunSync --datasetConfig my_config.yaml --icebergCatalogConfig catalog.yaml +java -cp xtable-utilities/target/xtable-utilities_2.12-0.2.0-SNAPSHOT-bundled.jar:/path/to/downloaded/biglake-catalog-iceberg1.2.0-0.1.0-with-dependencies.jar org.apache.xtable.utilities.RunSync --datasetConfig my_config.yaml --icebergCatalogConfig catalog.yaml ``` :::tip Note: diff --git a/website/docs/fabric.md b/website/docs/fabric.md index 9bae2d9b7..10572cbb4 100644 --- a/website/docs/fabric.md +++ b/website/docs/fabric.md @@ -98,7 +98,7 @@ An example hadoop configuration for authenticating to ADLS storage account is as ``` ```shell md title="shell" -java -jar xtable-utilities/target/xtable-utilities-0.2.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml --hadoopConfig hadoop.xml +java -jar xtable-utilities/target/xtable-utilities_2.12-0.2.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml --hadoopConfig hadoop.xml ``` Running the above command will translate the table `people` in Iceberg or Hudi format to Delta Lake format. To validate diff --git a/website/docs/glue-catalog.md b/website/docs/glue-catalog.md index 6d1388c96..113178206 100644 --- a/website/docs/glue-catalog.md +++ b/website/docs/glue-catalog.md @@ -19,7 +19,7 @@ This document walks through the steps to register an Apache XTable™ (Incubatin also set up access credentials by following the steps [here](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-quickstart.html) 3. Clone the Apache XTable™ (Incubating) [repository](https://github.com/apache/incubator-xtable) and create the - `xtable-utilities-0.2.0-SNAPSHOT-bundled.jar` by following the steps on the [Installation page](/docs/setup) + `xtable-utilities_2.12-0.2.0-SNAPSHOT-bundled.jar` by following the steps on the [Installation page](/docs/setup) ## Steps ### Running sync @@ -84,7 +84,7 @@ Replace with appropriate values for `sourceFormat`, `tableBasePath` and `tableNa From your terminal under the cloned xtable directory, run the sync process using the below command. ```shell md title="shell" - java -jar xtable-utilities/target/xtable-utilities-0.2.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml + java -jar xtable-utilities/target/xtable-utilities_2.12-0.2.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml ``` :::tip Note: diff --git a/website/docs/hms.md b/website/docs/hms.md index 7a4696e89..98682df57 100644 --- a/website/docs/hms.md +++ b/website/docs/hms.md @@ -17,7 +17,7 @@ This document walks through the steps to register an Apache XTable™ (Incubatin or a distributed system like Amazon EMR, Google Cloud's Dataproc, Azure HDInsight etc. This is a required step to register the table in HMS using a Spark client. 3. Clone the XTable™ (Incubating) [repository](https://github.com/apache/incubator-xtable) and create the - `xtable-utilities-0.2.0-SNAPSHOT-bundled.jar` by following the steps on the [Installation page](/docs/setup) + `xtable-utilities_2.12-0.2.0-SNAPSHOT-bundled.jar` by following the steps on the [Installation page](/docs/setup) 4. This guide also assumes that you have configured the Hive Metastore locally or on EMR/Dataproc/HDInsight and is already running. @@ -88,7 +88,7 @@ datasets: From your terminal under the cloned Apache XTable™ (Incubating) directory, run the sync process using the below command. ```shell md title="shell" -java -jar xtable-utilities/target/xtable-utilities-0.2.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml +java -jar xtable-utilities/target/xtable-utilities_2.12-0.2.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml ``` :::tip Note: diff --git a/website/docs/how-to.md b/website/docs/how-to.md index ea18a6633..a60f223f1 100644 --- a/website/docs/how-to.md +++ b/website/docs/how-to.md @@ -24,7 +24,7 @@ history to enable proper point in time queries. 1. A compute instance where you can run Apache Spark. This can be your local machine, docker, or a distributed service like Amazon EMR, Google Cloud's Dataproc, Azure HDInsight etc 2. Clone the Apache XTable™ (Incubating) [repository](https://github.com/apache/incubator-xtable) and create the - `xtable-utilities-0.2.0-SNAPSHOT-bundled.jar` by following the steps on the [Installation page](/docs/setup) + `xtable-utilities_2.12-0.2.0-SNAPSHOT-bundled.jar` by following the steps on the [Installation page](/docs/setup) 3. Optional: Setup access to write to and/or read from distributed storage services like: * Amazon S3 by following the steps [here](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html) to install AWSCLIv2 @@ -351,7 +351,7 @@ Authentication for GCP requires service account credentials to be exported. i.e. In your terminal under the cloned Apache XTable™ (Incubating) directory, run the below command. ```shell md title="shell" -java -jar xtable-utilities/target/xtable-utilities-0.2.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml +java -jar xtable-utilities/target/xtable-utilities_2.12-0.2.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml ``` **Optional:** diff --git a/website/docs/unity-catalog.md b/website/docs/unity-catalog.md index b2fb83fee..cc5ccb0d4 100644 --- a/website/docs/unity-catalog.md +++ b/website/docs/unity-catalog.md @@ -17,7 +17,7 @@ This document walks through the steps to register an Apache XTable™ (Incubatin 3. Create a Unity Catalog metastore in Databricks as outlined [here](https://docs.gcp.databricks.com/data-governance/unity-catalog/create-metastore.html#create-a-unity-catalog-metastore). 4. Create an external location in Databricks as outlined [here](https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-create-location.html). 5. Clone the Apache XTable™ (Incubating) [repository](https://github.com/apache/incubator-xtable) and create the - `xtable-utilities-0.2.0-SNAPSHOT-bundled.jar` by following the steps on the [Installation page](/docs/setup) + `xtable-utilities_2.12-0.2.0-SNAPSHOT-bundled.jar` by following the steps on the [Installation page](/docs/setup) ## Pre-requisites (for open-source Unity Catalog) 1. Source table(s) (Hudi/Iceberg) already written to external storage locations like S3/GCS/ADLS or local. @@ -48,7 +48,7 @@ datasets: From your terminal under the cloned Apache XTable™ (Incubating) directory, run the sync process using the below command. ```shell md title="shell" -java -jar xtable-utilities/target/xtable-utilities-0.2.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml +java -jar xtable-utilities/target/xtable-utilities_2.12-0.2.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml ``` :::tip Note: diff --git a/xtable-core/pom.xml b/xtable-core/pom.xml index b5059186a..37e012d91 100644 --- a/xtable-core/pom.xml +++ b/xtable-core/pom.xml @@ -25,17 +25,19 @@ 0.2.0-SNAPSHOT - xtable-core + xtable-core_${scala.binary.version} XTable Project Core org.apache.xtable xtable-api + ${project.version} org.apache.xtable xtable-hudi-support-utils + ${project.version} com.fasterxml.jackson.core @@ -47,7 +49,7 @@ com.fasterxml.jackson.module - jackson-module-scala_${scala.version.prefix} + jackson-module-scala_${scala.binary.version} com.google.guava @@ -69,7 +71,7 @@ org.apache.hudi - hudi-spark${spark.version.prefix}-bundle_${scala.version.prefix} + hudi-spark${spark.version.prefix}-bundle_${scala.binary.version} test @@ -94,11 +96,11 @@ io.delta - delta-core_${scala.version.prefix} + delta-core_${scala.binary.version} io.delta - delta-standalone_${scala.version.prefix} + delta-standalone_${scala.binary.version} @@ -120,16 +122,16 @@ org.apache.iceberg - iceberg-spark-runtime-${spark.version.prefix}_${scala.version.prefix} + iceberg-spark-runtime-${spark.version.prefix}_${scala.binary.version} test org.apache.spark - spark-core_${scala.version.prefix} + spark-core_${scala.binary.version} org.apache.spark - spark-sql_${scala.version.prefix} + spark-sql_${scala.binary.version} diff --git a/xtable-hudi-support/xtable-hudi-support-extensions/pom.xml b/xtable-hudi-support/xtable-hudi-support-extensions/pom.xml index 1795111c6..66f4aa4ae 100644 --- a/xtable-hudi-support/xtable-hudi-support-extensions/pom.xml +++ b/xtable-hudi-support/xtable-hudi-support-extensions/pom.xml @@ -19,23 +19,27 @@ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 + org.apache.xtable xtable-hudi-support 0.2.0-SNAPSHOT - xtable-hudi-support-extensions + xtable-hudi-support-extensions_${scala.binary.version} XTable Project Hudi Support Extensions org.apache.xtable xtable-hudi-support-utils + ${project.version} + org.apache.xtable - xtable-core + xtable-core_${scala.binary.version} + ${project.version} @@ -111,7 +115,7 @@ org.apache.hudi - hudi-spark${spark.version.prefix}-bundle_${scala.version.prefix} + hudi-spark${spark.version.prefix}-bundle_${scala.binary.version} test @@ -125,11 +129,11 @@ org.apache.spark - spark-core_${scala.version.prefix} + spark-core_${scala.binary.version} org.apache.spark - spark-sql_${scala.version.prefix} + spark-sql_${scala.binary.version} diff --git a/xtable-utilities/pom.xml b/xtable-utilities/pom.xml index 175b47578..245b047d3 100644 --- a/xtable-utilities/pom.xml +++ b/xtable-utilities/pom.xml @@ -25,17 +25,14 @@ 0.2.0-SNAPSHOT - xtable-utilities + xtable-utilities_${scala.binary.version} XTable Project Utilities org.apache.xtable - xtable-api - - - org.apache.xtable - xtable-core + xtable-core_${scala.binary.version} + ${project.version} @@ -74,12 +71,12 @@ org.apache.spark - spark-core_${scala.version.prefix} + spark-core_${scala.binary.version} runtime org.apache.spark - spark-sql_${scala.version.prefix} + spark-sql_${scala.binary.version} runtime