diff --git a/Dockerfile b/Dockerfile
index c749651ac..970332940 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -23,7 +23,7 @@ WORKDIR /build
COPY ./ ./
RUN --mount=type=cache,target=/root/.m2 \
MAVEN_OPTS=-Dorg.slf4j.simpleLogger.defaultLogLevel=warn mvn -B package -DskipTests
-RUN mv xtable-utilities/target/xtable-utilities-$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout)-bundled.jar target/app.jar
+RUN mv xtable-utilities/target/xtable-utilities_2.12-$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout)-bundled.jar target/app.jar
FROM eclipse-temurin:17-jre-jammy AS final
diff --git a/README.md b/README.md
index 9eee56f8c..8d31df474 100644
--- a/README.md
+++ b/README.md
@@ -38,6 +38,10 @@ future.
by something like `mvn test -Dtest=TestDeltaSync -pl xtable-core`.
4. Similarly, use `mvn clean verify` or `mvn verify` to run integration tests.
+**Note:** When using Maven version 3.9 or above, Maven automatically caches the build. To ignore build caching, you can
+add the `-Dmaven.build.cache.enabled=false` parameter. For example, `mvn clean package -DskipTests -Dmaven.build.cache.enabled=false`
+
+
# Style guide
1. We use [Maven Spotless plugin](https://github.com/diffplug/spotless/tree/main/plugin-maven) and
[Google java format](https://github.com/google/google-java-format) for code style.
@@ -46,7 +50,7 @@ future.
# Running the bundled jar
1. Get a pre-built bundled jar or create the jar with `mvn install -DskipTests`
-2. create a yaml file that follows the format below:
+2. Create a yaml file that follows the format below:
```yaml
sourceFormat: HUDI
targetFormats:
@@ -110,7 +114,7 @@ catalogOptions: # all other options are passed through in a map
key1: value1
key2: value2
```
-5. run with `java -jar xtable-utilities/target/xtable-utilities-0.2.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml [--hadoopConfig hdfs-site.xml] [--convertersConfig converters.yaml] [--icebergCatalogConfig catalog.yaml]`
+5. Run with `java -jar xtable-utilities/target/xtable-utilities_2.12-0.2.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml [--hadoopConfig hdfs-site.xml] [--convertersConfig converters.yaml] [--icebergCatalogConfig catalog.yaml]`
The bundled jar includes hadoop dependencies for AWS, Azure, and GCP. Sample hadoop configurations for configuring the converters
can be found in the [xtable-hadoop-defaults.xml](https://github.com/apache/incubator-xtable/blob/main/utilities/src/main/resources/xtable-hadoop-defaults.xml) file.
The custom hadoop configurations can be passed in with the `--hadoopConfig [custom-hadoop-config-file]` option.
diff --git a/demo/start_demo.sh b/demo/start_demo.sh
index e2c6d4dc9..3e65d0c8f 100755
--- a/demo/start_demo.sh
+++ b/demo/start_demo.sh
@@ -25,7 +25,7 @@ mvn install -am -pl xtable-core -DskipTests -T 2
mkdir -p demo/jars
cp xtable-hudi-support/xtable-hudi-support-utils/target/xtable-hudi-support-utils-0.2.0-SNAPSHOT.jar demo/jars
cp xtable-api/target/xtable-api-0.2.0-SNAPSHOT.jar demo/jars
-cp xtable-core/target/xtable-core-0.2.0-SNAPSHOT.jar demo/jars
+cp xtable-core/target/xtable-core_2.12-0.2.0-SNAPSHOT.jar demo/jars
cd demo
docker-compose up
diff --git a/pom.xml b/pom.xml
index b10bf31cc..9ab04aeda 100644
--- a/pom.xml
+++ b/pom.xml
@@ -48,12 +48,13 @@
xtable-api
+ xtable-hudi-support
xtable-core
xtable-utilities
- xtable-hudi-support
+ 0.2.0-SNAPSHOT
8
1.11.3
2.22.0
@@ -68,8 +69,10 @@
3.1.1
2.5.3
1.12.2
- 2.12.15
- 2.12
+ 2.12.20
+ 2.13.14
+ ${scala12.version}
+ 2.12
3.4.2
3.4
1.4.2
@@ -84,7 +87,8 @@
${project.build.directory}/delombok
1.7
1.7
-
+ 2.8.1
+
false
${skipTests}
@@ -125,8 +129,8 @@
org.scala-lang.modules
- scala-collection-compat_${scala.version.prefix}
- 2.8.1
+ scala-collection-compat_${scala.binary.version}
+ ${scala-collection-compat.version}
@@ -229,7 +233,7 @@
org.apache.hudi
- hudi-spark${spark.version.prefix}-bundle_${scala.version.prefix}
+ hudi-spark${spark.version.prefix}-bundle_${scala.binary.version}
${hudi.version}
test
@@ -265,7 +269,7 @@
org.apache.iceberg
- iceberg-spark-runtime-${spark.version.prefix}_${scala.version.prefix}
+ iceberg-spark-runtime-${spark.version.prefix}_${scala.binary.version}
${iceberg.version}
test
@@ -273,12 +277,12 @@
io.delta
- delta-core_${scala.version.prefix}
+ delta-core_${scala.binary.version}
${delta.version}
io.delta
- delta-standalone_${scala.version.prefix}
+ delta-standalone_${scala.binary.version}
${delta.standalone.version}
test
@@ -286,7 +290,7 @@
org.apache.spark
- spark-core_${scala.version.prefix}
+ spark-core_${scala.binary.version}
${spark.version}
@@ -306,7 +310,7 @@
org.apache.spark
- spark-sql_${scala.version.prefix}
+ spark-sql_${scala.binary.version}
${spark.version}
provided
@@ -464,7 +468,7 @@
com.fasterxml.jackson.module
- jackson-module-scala_${scala.version.prefix}
+ jackson-module-scala_${scala.binary.version}
${jackson.version}
@@ -867,6 +871,53 @@
+
+
+ scala-2.12
+
+ true
+
+
+ ${scala12.version}
+ 2.12
+
+
+
+
+
+
+
+
+
+ scala-2.13
+
+ false
+
+
+ ${scala13.version}
+ 2.13
+
+
+
+
+
+ net.alchim31.maven
+ scala-maven-plugin
+
+
+ -unchecked
+ -deprecation
+ -feature
+ -explaintypes
+ -target:jvm-1.8
+
+
+
+
+
+
+
+
release
diff --git a/website/docs/biglake-metastore.md b/website/docs/biglake-metastore.md
index 4ee4c2c2b..b292ea694 100644
--- a/website/docs/biglake-metastore.md
+++ b/website/docs/biglake-metastore.md
@@ -25,7 +25,7 @@ This document walks through the steps to register an Apache XTable™ (Incubatin
export GOOGLE_APPLICATION_CREDENTIALS=/path/to/service_account_key.json
```
5. Clone the Apache XTable™ (Incubating) [repository](https://github.com/apache/incubator-xtable) and create the
- `xtable-utilities-0.2.0-SNAPSHOT-bundled.jar` by following the steps on the [Installation page](/docs/setup)
+ `xtable-utilities_2.12-0.2.0-SNAPSHOT-bundled.jar` by following the steps on the [Installation page](/docs/setup)
6. Download the [BigLake Iceberg JAR](gs://spark-lib/biglake/biglake-catalog-iceberg1.2.0-0.1.0-with-dependencies.jar) locally.
Apache XTable™ (Incubating) requires the JAR to be present in the classpath.
@@ -117,7 +117,7 @@ catalogOptions:
From your terminal under the cloned Apache XTable™ (Incubating) directory, run the sync process using the below command.
```shell md title="shell"
-java -cp xtable-utilities/target/xtable-utilities-0.2.0-SNAPSHOT-bundled.jar:/path/to/downloaded/biglake-catalog-iceberg1.2.0-0.1.0-with-dependencies.jar org.apache.xtable.utilities.RunSync --datasetConfig my_config.yaml --icebergCatalogConfig catalog.yaml
+java -cp xtable-utilities/target/xtable-utilities_2.12-0.2.0-SNAPSHOT-bundled.jar:/path/to/downloaded/biglake-catalog-iceberg1.2.0-0.1.0-with-dependencies.jar org.apache.xtable.utilities.RunSync --datasetConfig my_config.yaml --icebergCatalogConfig catalog.yaml
```
:::tip Note:
diff --git a/website/docs/fabric.md b/website/docs/fabric.md
index 9bae2d9b7..10572cbb4 100644
--- a/website/docs/fabric.md
+++ b/website/docs/fabric.md
@@ -98,7 +98,7 @@ An example hadoop configuration for authenticating to ADLS storage account is as
```
```shell md title="shell"
-java -jar xtable-utilities/target/xtable-utilities-0.2.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml --hadoopConfig hadoop.xml
+java -jar xtable-utilities/target/xtable-utilities_2.12-0.2.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml --hadoopConfig hadoop.xml
```
Running the above command will translate the table `people` in Iceberg or Hudi format to Delta Lake format. To validate
diff --git a/website/docs/glue-catalog.md b/website/docs/glue-catalog.md
index 6d1388c96..113178206 100644
--- a/website/docs/glue-catalog.md
+++ b/website/docs/glue-catalog.md
@@ -19,7 +19,7 @@ This document walks through the steps to register an Apache XTable™ (Incubatin
also set up access credentials by following the steps
[here](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-quickstart.html)
3. Clone the Apache XTable™ (Incubating) [repository](https://github.com/apache/incubator-xtable) and create the
- `xtable-utilities-0.2.0-SNAPSHOT-bundled.jar` by following the steps on the [Installation page](/docs/setup)
+ `xtable-utilities_2.12-0.2.0-SNAPSHOT-bundled.jar` by following the steps on the [Installation page](/docs/setup)
## Steps
### Running sync
@@ -84,7 +84,7 @@ Replace with appropriate values for `sourceFormat`, `tableBasePath` and `tableNa
From your terminal under the cloned xtable directory, run the sync process using the below command.
```shell md title="shell"
- java -jar xtable-utilities/target/xtable-utilities-0.2.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml
+ java -jar xtable-utilities/target/xtable-utilities_2.12-0.2.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml
```
:::tip Note:
diff --git a/website/docs/hms.md b/website/docs/hms.md
index 7a4696e89..98682df57 100644
--- a/website/docs/hms.md
+++ b/website/docs/hms.md
@@ -17,7 +17,7 @@ This document walks through the steps to register an Apache XTable™ (Incubatin
or a distributed system like Amazon EMR, Google Cloud's Dataproc, Azure HDInsight etc.
This is a required step to register the table in HMS using a Spark client.
3. Clone the XTable™ (Incubating) [repository](https://github.com/apache/incubator-xtable) and create the
- `xtable-utilities-0.2.0-SNAPSHOT-bundled.jar` by following the steps on the [Installation page](/docs/setup)
+ `xtable-utilities_2.12-0.2.0-SNAPSHOT-bundled.jar` by following the steps on the [Installation page](/docs/setup)
4. This guide also assumes that you have configured the Hive Metastore locally or on EMR/Dataproc/HDInsight
and is already running.
@@ -88,7 +88,7 @@ datasets:
From your terminal under the cloned Apache XTable™ (Incubating) directory, run the sync process using the below command.
```shell md title="shell"
-java -jar xtable-utilities/target/xtable-utilities-0.2.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml
+java -jar xtable-utilities/target/xtable-utilities_2.12-0.2.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml
```
:::tip Note:
diff --git a/website/docs/how-to.md b/website/docs/how-to.md
index ea18a6633..a60f223f1 100644
--- a/website/docs/how-to.md
+++ b/website/docs/how-to.md
@@ -24,7 +24,7 @@ history to enable proper point in time queries.
1. A compute instance where you can run Apache Spark. This can be your local machine, docker,
or a distributed service like Amazon EMR, Google Cloud's Dataproc, Azure HDInsight etc
2. Clone the Apache XTable™ (Incubating) [repository](https://github.com/apache/incubator-xtable) and create the
- `xtable-utilities-0.2.0-SNAPSHOT-bundled.jar` by following the steps on the [Installation page](/docs/setup)
+ `xtable-utilities_2.12-0.2.0-SNAPSHOT-bundled.jar` by following the steps on the [Installation page](/docs/setup)
3. Optional: Setup access to write to and/or read from distributed storage services like:
* Amazon S3 by following the steps
[here](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html) to install AWSCLIv2
@@ -351,7 +351,7 @@ Authentication for GCP requires service account credentials to be exported. i.e.
In your terminal under the cloned Apache XTable™ (Incubating) directory, run the below command.
```shell md title="shell"
-java -jar xtable-utilities/target/xtable-utilities-0.2.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml
+java -jar xtable-utilities/target/xtable-utilities_2.12-0.2.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml
```
**Optional:**
diff --git a/website/docs/unity-catalog.md b/website/docs/unity-catalog.md
index b2fb83fee..cc5ccb0d4 100644
--- a/website/docs/unity-catalog.md
+++ b/website/docs/unity-catalog.md
@@ -17,7 +17,7 @@ This document walks through the steps to register an Apache XTable™ (Incubatin
3. Create a Unity Catalog metastore in Databricks as outlined [here](https://docs.gcp.databricks.com/data-governance/unity-catalog/create-metastore.html#create-a-unity-catalog-metastore).
4. Create an external location in Databricks as outlined [here](https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-create-location.html).
5. Clone the Apache XTable™ (Incubating) [repository](https://github.com/apache/incubator-xtable) and create the
- `xtable-utilities-0.2.0-SNAPSHOT-bundled.jar` by following the steps on the [Installation page](/docs/setup)
+ `xtable-utilities_2.12-0.2.0-SNAPSHOT-bundled.jar` by following the steps on the [Installation page](/docs/setup)
## Pre-requisites (for open-source Unity Catalog)
1. Source table(s) (Hudi/Iceberg) already written to external storage locations like S3/GCS/ADLS or local.
@@ -48,7 +48,7 @@ datasets:
From your terminal under the cloned Apache XTable™ (Incubating) directory, run the sync process using the below command.
```shell md title="shell"
-java -jar xtable-utilities/target/xtable-utilities-0.2.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml
+java -jar xtable-utilities/target/xtable-utilities_2.12-0.2.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml
```
:::tip Note:
diff --git a/xtable-core/pom.xml b/xtable-core/pom.xml
index b5059186a..37e012d91 100644
--- a/xtable-core/pom.xml
+++ b/xtable-core/pom.xml
@@ -25,17 +25,19 @@
0.2.0-SNAPSHOT
- xtable-core
+ xtable-core_${scala.binary.version}
XTable Project Core
org.apache.xtable
xtable-api
+ ${project.version}
org.apache.xtable
xtable-hudi-support-utils
+ ${project.version}
com.fasterxml.jackson.core
@@ -47,7 +49,7 @@
com.fasterxml.jackson.module
- jackson-module-scala_${scala.version.prefix}
+ jackson-module-scala_${scala.binary.version}
com.google.guava
@@ -69,7 +71,7 @@
org.apache.hudi
- hudi-spark${spark.version.prefix}-bundle_${scala.version.prefix}
+ hudi-spark${spark.version.prefix}-bundle_${scala.binary.version}
test
@@ -94,11 +96,11 @@
io.delta
- delta-core_${scala.version.prefix}
+ delta-core_${scala.binary.version}
io.delta
- delta-standalone_${scala.version.prefix}
+ delta-standalone_${scala.binary.version}
@@ -120,16 +122,16 @@
org.apache.iceberg
- iceberg-spark-runtime-${spark.version.prefix}_${scala.version.prefix}
+ iceberg-spark-runtime-${spark.version.prefix}_${scala.binary.version}
test
org.apache.spark
- spark-core_${scala.version.prefix}
+ spark-core_${scala.binary.version}
org.apache.spark
- spark-sql_${scala.version.prefix}
+ spark-sql_${scala.binary.version}
diff --git a/xtable-hudi-support/xtable-hudi-support-extensions/pom.xml b/xtable-hudi-support/xtable-hudi-support-extensions/pom.xml
index 1795111c6..66f4aa4ae 100644
--- a/xtable-hudi-support/xtable-hudi-support-extensions/pom.xml
+++ b/xtable-hudi-support/xtable-hudi-support-extensions/pom.xml
@@ -19,23 +19,27 @@
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
4.0.0
+
org.apache.xtable
xtable-hudi-support
0.2.0-SNAPSHOT
- xtable-hudi-support-extensions
+ xtable-hudi-support-extensions_${scala.binary.version}
XTable Project Hudi Support Extensions
org.apache.xtable
xtable-hudi-support-utils
+ ${project.version}
+
org.apache.xtable
- xtable-core
+ xtable-core_${scala.binary.version}
+ ${project.version}
@@ -111,7 +115,7 @@
org.apache.hudi
- hudi-spark${spark.version.prefix}-bundle_${scala.version.prefix}
+ hudi-spark${spark.version.prefix}-bundle_${scala.binary.version}
test
@@ -125,11 +129,11 @@
org.apache.spark
- spark-core_${scala.version.prefix}
+ spark-core_${scala.binary.version}
org.apache.spark
- spark-sql_${scala.version.prefix}
+ spark-sql_${scala.binary.version}
diff --git a/xtable-utilities/pom.xml b/xtable-utilities/pom.xml
index 175b47578..245b047d3 100644
--- a/xtable-utilities/pom.xml
+++ b/xtable-utilities/pom.xml
@@ -25,17 +25,14 @@
0.2.0-SNAPSHOT
- xtable-utilities
+ xtable-utilities_${scala.binary.version}
XTable Project Utilities
org.apache.xtable
- xtable-api
-
-
- org.apache.xtable
- xtable-core
+ xtable-core_${scala.binary.version}
+ ${project.version}
@@ -74,12 +71,12 @@
org.apache.spark
- spark-core_${scala.version.prefix}
+ spark-core_${scala.binary.version}
runtime
org.apache.spark
- spark-sql_${scala.version.prefix}
+ spark-sql_${scala.binary.version}
runtime