Skip to content

Commit

Permalink
Maintenance
Browse files Browse the repository at this point in the history
* Use mamba instead of conda install
* Upgrade packages
  • Loading branch information
aliavni committed Jun 16, 2024
1 parent 72f4469 commit 34e702f
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 21 deletions.
14 changes: 6 additions & 8 deletions docker/airflow/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
-c https://raw.githubusercontent.com/apache/airflow/constraints-2.9.1/constraints-3.11.txt
apache-airflow-providers-amazon==8.20.0
apache-airflow-providers-apache-spark==4.7.2
apache-airflow-providers-slack==8.6.2
plyvel==1.5.1

# duckdb==0.10.2
# polars==0.20.26
# pyspark==3.5.1
# apache-airflow-providers-slack
# deltalake
# delta-spark
delta-spark==3.2.0
deltalake==0.17.3
duckdb==0.10.2
polars==0.20.31
pyspark==3.5.1
3 changes: 2 additions & 1 deletion docker/jupyter/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,5 @@ COPY ./requirements.txt /tmp/requirements.txt

RUN export DOCKER_BUILDKIT=1

RUN --mount=type=cache,target=/opt/conda/pkgs conda install --quiet --yes --file /tmp/requirements.txt
RUN conda install -y -c conda-forge mamba
RUN mamba install -y --file /tmp/requirements.txt
14 changes: 7 additions & 7 deletions docker/jupyter/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
delta-spark==3.0.0
deltalake==0.17.4
delta-spark
deltalake
# duckdb_engine==0.12.0
grpcio==1.62.1
grpcio-status==1.62.1
jupyterlab==4.1.6
grpcio
grpcio-status
jupyterlab==4.2.2
kafka-python==2.0.2
pandas==2.2.2
polars==0.20.26
protobuf==4.25.3
polars==0.20.31
protobuf
pyspark==3.5.1
41 changes: 36 additions & 5 deletions notebooks/spark.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"id": "f88878dc",
"metadata": {},
"outputs": [],
Expand All @@ -20,10 +20,20 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"id": "c15aad9d",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"24/06/14 18:21:26 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n",
"Setting default log level to \"WARN\".\n",
"To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n"
]
}
],
"source": [
"# Add postgres jar\n",
"spark = (\n",
Expand All @@ -46,10 +56,31 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"id": "4640f260-f0a9-40e9-855c-4ffa7a744dff",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"root\n",
" |-- table_catalog: string (nullable = true)\n",
" |-- table_schema: string (nullable = true)\n",
" |-- table_name: string (nullable = true)\n",
" |-- table_type: string (nullable = true)\n",
" |-- self_referencing_column_name: string (nullable = true)\n",
" |-- reference_generation: string (nullable = true)\n",
" |-- user_defined_type_catalog: string (nullable = true)\n",
" |-- user_defined_type_schema: string (nullable = true)\n",
" |-- user_defined_type_name: string (nullable = true)\n",
" |-- is_insertable_into: string (nullable = true)\n",
" |-- is_typed: string (nullable = true)\n",
" |-- commit_action: string (nullable = true)\n",
"\n"
]
}
],
"source": [
"df = (\n",
" spark.read.format(\"jdbc\")\n",
Expand Down

0 comments on commit 34e702f

Please sign in to comment.