chore: python version to 3.11 (while supporting 3.10) (#31503)

apache · Jan 14, 2025 · 274aa14 · 274aa14
1 parent 8550113
commit 274aa14
Show file tree

Hide file tree

Showing 31 changed files with 60 additions and 47 deletions.
diff --git a/.github/actions/setup-backend/action.yml b/.github/actions/setup-backend/action.yml
@@ -26,11 +26,12 @@ runs:
       shell: bash
       run: |
         if [ "${{ inputs.python-version }}" = "current" ]; then
-          echo "PYTHON_VERSION=3.10" >> $GITHUB_ENV
-        elif [ "${{ inputs.python-version }}" = "next" ]; then
           echo "PYTHON_VERSION=3.11" >> $GITHUB_ENV
+        elif [ "${{ inputs.python-version }}" = "next" ]; then
+          # currently disabled in GHA matrixes because of library compatibility issues
+          echo "PYTHON_VERSION=3.12" >> $GITHUB_ENV
         elif [ "${{ inputs.python-version }}" = "previous" ]; then
-          echo "PYTHON_VERSION=3.9" >> $GITHUB_ENV
+          echo "PYTHON_VERSION=3.10" >> $GITHUB_ENV
         else
           echo "PYTHON_VERSION=${{ inputs.python-version }}" >> $GITHUB_ENV
         fi
@@ -43,6 +44,7 @@ runs:
       run: |
         if [ "${{ inputs.install-superset }}" = "true" ]; then
           sudo apt-get update && sudo apt-get -y install libldap2-dev libsasl2-dev
+
           pip install --upgrade pip setuptools wheel uv
 
           if [ "${{ inputs.requirements-type }}" = "dev" ]; then

diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
@@ -18,7 +18,7 @@ jobs:
     runs-on: ubuntu-24.04
     strategy:
       matrix:
-        python-version: ["current", "next", "previous"]
+        python-version: ["current", "previous"]
     steps:
       - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )"
         uses: actions/checkout@v4

diff --git a/.github/workflows/superset-python-integrationtest.yml b/.github/workflows/superset-python-integrationtest.yml
@@ -77,7 +77,7 @@ jobs:
     runs-on: ubuntu-24.04
     strategy:
       matrix:
-        python-version: ["current", "next", "previous"]
+        python-version: ["current", "previous"]
     env:
       PYTHONPATH: ${{ github.workspace }}
       SUPERSET_CONFIG: tests.integration_tests.superset_test_config

diff --git a/.github/workflows/superset-python-unittest.yml b/.github/workflows/superset-python-unittest.yml
@@ -19,7 +19,7 @@ jobs:
     runs-on: ubuntu-24.04
     strategy:
       matrix:
-        python-version: ["current", "next"]
+        python-version: ["previous", "current"]
     env:
       PYTHONPATH: ${{ github.workspace }}
     steps:

diff --git a/Dockerfile b/Dockerfile
@@ -18,7 +18,7 @@
 ######################################################################
 # Node stage to deal with static asset construction
 ######################################################################
-ARG PY_VER=3.10-slim-bookworm
+ARG PY_VER=3.11-slim-bookworm
 
 # If BUILDPLATFORM is null, set it to 'amd64' (or leave as is otherwise).
 ARG BUILDPLATFORM=${BUILDPLATFORM:-amd64}

diff --git a/UPDATING.md b/UPDATING.md
@@ -32,6 +32,7 @@ assists people when migrating to a new version.
 - [31262](https://github.com/apache/superset/pull/31262) NOTE: deprecated `pylint` in favor of `ruff` as our only python linter. Only affect development workflows positively (not the release itself). It should cover most important rules, be much faster, but some things linting rules that were enforced before may not be enforce in the exact same way as before.
 - [31173](https://github.com/apache/superset/pull/31173) Modified `fetch_csrf_token` to align with HTTP standards, particularly regarding how cookies are handled. If you encounter any issues related to CSRF functionality, please report them as a new issue and reference this PR for context.
 - [31385](https://github.com/apache/superset/pull/31385) Significant docker refactor, reducing access levels for the `superset` user, streamlining layer building, ...
+- [31503](https://github.com/apache/superset/pull/31503) Deprecating python 3.9.x support, 3.11 is  now the recommended version and 3.10 is still supported over the Superset 5.0 lifecycle.
 
 ### Potential Downtime
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -24,13 +24,12 @@ name = "apache-superset"
 description = "A modern, enterprise-ready business intelligence web application"
 readme = "README.md"
 dynamic = ["version", "scripts", "entry-points"]
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 license = { file="LICENSE.txt" }
 authors = [
     { name = "Apache Software Foundation", email = "[email protected]" },
 ]
 classifiers = [
-    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
 ]
@@ -67,7 +66,7 @@ dependencies = [
     "markdown>=3.0",
     "msgpack>=1.0.0, <1.1",
     "nh3>=0.2.11, <0.3",
-    "numpy==1.23.5",
+    "numpy>1.23.5, <2",
     "packaging",
     # --------------------------
     # pandas and related (wanting pandas[performance] without numba as it's 100+MB and not needed)
@@ -275,8 +274,8 @@ exclude = [
 line-length = 88
 indent-width = 4
 
-# Assume Python 3.9
-target-version = "py39"
+# Assume Python 3.10
+target-version = "py310"
 
 [tool.ruff.lint]
 # Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`)  codes by default.

diff --git a/requirements/base.in b/requirements/base.in
@@ -23,8 +23,3 @@ numexpr>=2.9.0
 # 5.0.0 has a sensitive deprecation used in other libs
 # -> https://github.com/aio-libs/async-timeout/blob/master/CHANGES.rst#500-2024-10-31
 async_timeout>=4.0.0,<5.0.0
-
-# playwright requires greenlet==3.0.3
-# submitted a PR to relax deps in 11/2024
-# https://github.com/microsoft/playwright-python/pull/2669
-greenlet==3.0.3
diff --git a/requirements/base.txt b/requirements/base.txt
@@ -153,7 +153,6 @@ google-auth==2.36.0
     # via shillelagh
 greenlet==3.0.3
     # via
-    #   -r requirements/base.in
     #   apache-superset (pyproject.toml)
     #   shillelagh
     #   sqlalchemy
@@ -230,7 +229,7 @@ nh3==0.2.19
     # via apache-superset (pyproject.toml)
 numexpr==2.10.2
     # via -r requirements/base.in
-numpy==1.23.5
+numpy==1.26.4
     # via
     #   apache-superset (pyproject.toml)
     #   bottleneck

diff --git a/requirements/development.txt b/requirements/development.txt
@@ -463,7 +463,7 @@ nh3==0.2.19
     #   apache-superset
 nodeenv==1.8.0
     # via pre-commit
-numpy==1.23.5
+numpy==1.26.4
     # via
     #   -c requirements/base.txt
     #   apache-superset

diff --git a/superset/commands/dashboard/export.py b/superset/commands/dashboard/export.py
@@ -83,7 +83,7 @@ def append_charts(position: dict[str, Any], charts: set[Slice]) -> dict[str, Any
             "parents": ["ROOT_ID", "GRID_ID"],
         }
 
-    for chart_hash, chart in zip(chart_hashes, charts):
+    for chart_hash, chart in zip(chart_hashes, charts, strict=False):
         position[chart_hash] = {
             "children": [],
             "id": chart_hash,

diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py
@@ -1907,6 +1907,7 @@ def query_datasources_by_permissions(  # pylint: disable=invalid-name
             for method, perms in zip(
                 (SqlaTable.perm, SqlaTable.schema_perm, SqlaTable.catalog_perm),
                 (permissions, schema_perms, catalog_perms),
+                strict=False,
             )
             if perms
         ]

diff --git a/superset/db_engine_specs/hive.py b/superset/db_engine_specs/hive.py
@@ -440,7 +440,7 @@ def where_latest_partition(
             # table is not partitioned
             return None
         if values is not None and columns is not None:
-            for col_name, value in zip(col_names, values):
+            for col_name, value in zip(col_names, values, strict=False):
                 for clm in columns:
                     if clm.get("name") == col_name:
                         query = query.where(Column(col_name) == value)

diff --git a/superset/db_engine_specs/ocient.py b/superset/db_engine_specs/ocient.py
@@ -348,7 +348,9 @@ def identity(x: Any) -> Any:
                 rows = [
                     tuple(
                         sanitize_func(val)
-                        for sanitize_func, val in zip(sanitization_functions, row)
+                        for sanitize_func, val in zip(
+                            sanitization_functions, row, strict=False
+                        )
                     )
                     for row in rows
                 ]

diff --git a/superset/db_engine_specs/presto.py b/superset/db_engine_specs/presto.py
@@ -545,7 +545,7 @@ def where_latest_partition(
             column.get("column_name"): column.get("type") for column in columns or []
         }
 
-        for col_name, value in zip(col_names, values):
+        for col_name, value in zip(col_names, values, strict=False):
             col_type = column_type_by_name.get(col_name)
 
             if isinstance(col_type, str):
@@ -1240,7 +1240,7 @@ def expand_data(  # pylint: disable=too-many-locals  # noqa: C901
                     if isinstance(values, str):
                         values = cast(Optional[list[Any]], destringify(values))
                         row[name] = values
-                    for value, col in zip(values or [], expanded):
+                    for value, col in zip(values or [], expanded, strict=False):
                         row[col["column_name"]] = value
 
         data = [
@@ -1271,7 +1271,7 @@ def get_extra_table_metadata(
 
             metadata["partitions"] = {
                 "cols": sorted(indexes[0].get("column_names", [])),
-                "latest": dict(zip(col_names, latest_parts)),
+                "latest": dict(zip(col_names, latest_parts, strict=False)),
                 "partitionQuery": cls._partition_query(
                     table=table,
                     indexes=indexes,

diff --git a/superset/db_engine_specs/redshift.py b/superset/db_engine_specs/redshift.py
@@ -131,7 +131,7 @@ def df_to_sql(
             # uses the max size for redshift nvarchar(65335)
             # the default object and string types create a varchar(256)
             col_name: NVARCHAR(length=65535)
-            for col_name, type in zip(df.columns, df.dtypes)
+            for col_name, type in zip(df.columns, df.dtypes, strict=False)
             if isinstance(type, pd.StringDtype)
         }
 

diff --git a/superset/db_engine_specs/trino.py b/superset/db_engine_specs/trino.py
@@ -111,7 +111,7 @@ def get_extra_table_metadata(
                         }
                     )
                 ),
-                "latest": dict(zip(col_names, latest_parts)),
+                "latest": dict(zip(col_names, latest_parts, strict=False)),
                 "partitionQuery": cls._partition_query(
                     table=table,
                     indexes=indexes,

diff --git a/superset/extensions/metadb.py b/superset/extensions/metadb.py
@@ -412,7 +412,7 @@ def get_data(
             connection = engine.connect()
             rows = connection.execute(query)
             for i, row in enumerate(rows):
-                data = dict(zip(self.columns, row))
+                data = dict(zip(self.columns, row, strict=False))
                 data["rowid"] = data[self._rowid] if self._rowid else i
                 yield data
 

diff --git a/superset/models/helpers.py b/superset/models/helpers.py
@@ -1976,7 +1976,7 @@ def get_sqla_query(  # pylint: disable=too-many-arguments,too-many-locals,too-ma
 
         self.make_orderby_compatible(select_exprs, orderby_exprs)
 
-        for col, (orig_col, ascending) in zip(orderby_exprs, orderby):  # noqa: B007
+        for col, (orig_col, ascending) in zip(orderby_exprs, orderby, strict=False):  # noqa: B007
             if not db_engine_spec.allows_alias_in_orderby and isinstance(col, Label):
                 # if engine does not allow using SELECT alias in ORDER BY
                 # revert to the underlying column

diff --git a/superset/result_set.py b/superset/result_set.py
@@ -123,7 +123,9 @@ def __init__(  # pylint: disable=too-many-locals  # noqa: C901
             # fix cursor descriptor with the deduped names
             deduped_cursor_desc = [
                 tuple([column_name, *list(description)[1:]])  # noqa: C409
-                for column_name, description in zip(column_names, cursor_description)
+                for column_name, description in zip(
+                    column_names, cursor_description, strict=False
+                )
             ]
 
             # generate numpy structured array dtype

diff --git a/superset/utils/excel.py b/superset/utils/excel.py
@@ -56,7 +56,7 @@ def df_to_excel(df: pd.DataFrame, **kwargs: Any) -> Any:
 def apply_column_types(
     df: pd.DataFrame, column_types: list[GenericDataType]
 ) -> pd.DataFrame:
-    for column, column_type in zip(df.columns, column_types):
+    for column, column_type in zip(df.columns, column_types, strict=False):
         if column_type == GenericDataType.NUMERIC:
             try:
                 df[column] = pd.to_numeric(df[column])

diff --git a/superset/utils/mock_data.py b/superset/utils/mock_data.py
@@ -221,8 +221,11 @@ def get_column_objects(columns: list[ColumnInfo]) -> list[Column]:
 def generate_data(columns: list[ColumnInfo], num_rows: int) -> list[dict[str, Any]]:
     keys = [column["name"] for column in columns]
     return [
-        dict(zip(keys, row))
-        for row in zip(*[generate_column_data(column, num_rows) for column in columns])
+        dict(zip(keys, row, strict=False))
+        for row in zip(
+            *[generate_column_data(column, num_rows) for column in columns],
+            strict=False,
+        )
     ]
 
 

diff --git a/superset/utils/pandas_postprocessing/compare.py b/superset/utils/pandas_postprocessing/compare.py
@@ -59,7 +59,7 @@ def compare(  # pylint: disable=too-many-arguments
     if len(source_columns) == 0:
         return df
 
-    for s_col, c_col in zip(source_columns, compare_columns):
+    for s_col, c_col in zip(source_columns, compare_columns, strict=False):
         s_df = df.loc[:, [s_col]]
         s_df.rename(columns={s_col: "__intermediate"}, inplace=True)
         c_df = df.loc[:, [c_col]]

diff --git a/superset/utils/pandas_postprocessing/geography.py b/superset/utils/pandas_postprocessing/geography.py
@@ -40,7 +40,7 @@ def geohash_decode(
     try:
         lonlat_df = DataFrame()
         lonlat_df["latitude"], lonlat_df["longitude"] = zip(
-            *df[geohash].apply(geohash_lib.decode)
+            *df[geohash].apply(geohash_lib.decode), strict=False
         )
         return _append_columns(
             df, lonlat_df, {"latitude": latitude, "longitude": longitude}
@@ -109,7 +109,7 @@ def _parse_location(location: str) -> tuple[float, float, float]:
             geodetic_df["latitude"],
             geodetic_df["longitude"],
             geodetic_df["altitude"],
-        ) = zip(*df[geodetic].apply(_parse_location))
+        ) = zip(*df[geodetic].apply(_parse_location), strict=False)
         columns = {"latitude": latitude, "longitude": longitude}
         if altitude:
             columns["altitude"] = altitude

diff --git a/superset/utils/pandas_postprocessing/histogram.py b/superset/utils/pandas_postprocessing/histogram.py
@@ -71,7 +71,7 @@ def hist_values(series: Series) -> np.ndarray:
 
     if len(groupby) == 0:
         # without grouping
-        hist_dict = dict(zip(bin_edges_str, hist_values(df[column])))
+        hist_dict = dict(zip(bin_edges_str, hist_values(df[column]), strict=False))
         histogram_df = DataFrame(hist_dict, index=[0])
     else:
         # with grouping

diff --git a/superset/viz.py b/superset/viz.py
@@ -1483,6 +1483,7 @@ def get_data(self, df: pd.DataFrame) -> VizData:
                     df[self.form_data.get("all_columns_y")],
                     metric_col,
                     point_radius_col,
+                    strict=False,
                 )
             ],
         }
@@ -1606,6 +1607,7 @@ def process_spatial_data_obj(self, key: str, df: pd.DataFrame) -> pd.DataFrame:
                 zip(
                     pd.to_numeric(df[spatial.get("lonCol")], errors="coerce"),
                     pd.to_numeric(df[spatial.get("latCol")], errors="coerce"),
+                    strict=False,
                 )
             )
         elif spatial.get("type") == "delimited":

diff --git a/tests/integration_tests/core_tests.py b/tests/integration_tests/core_tests.py
@@ -679,7 +679,9 @@ def test_explore_json_dist_bar_order(self):
                 count_ds = series["values"]
             if series["key"] == "COUNT(name)":
                 count_name = series["values"]
-        for expected, actual_ds, actual_name in zip(resp["data"], count_ds, count_name):
+        for expected, actual_ds, actual_name in zip(
+            resp["data"], count_ds, count_name, strict=False
+        ):
             assert expected["count_name"] == actual_name["y"]
             assert expected["count_ds"] == actual_ds["y"]
 

diff --git a/tests/integration_tests/db_engine_specs/presto_tests.py b/tests/integration_tests/db_engine_specs/presto_tests.py
@@ -87,7 +87,7 @@ def verify_presto_column(self, column, expected_results):
         inspector.bind.execute.return_value.fetchall = mock.Mock(return_value=[row])
         results = PrestoEngineSpec.get_columns(inspector, Table("", ""))
         assert len(expected_results) == len(results)
-        for expected_result, result in zip(expected_results, results):
+        for expected_result, result in zip(expected_results, results, strict=False):
             assert expected_result[0] == result["column_name"]
             assert expected_result[1] == str(result["type"])
 
@@ -191,7 +191,9 @@ def test_presto_get_fields(self):
                 "label": 'column."quoted.nested obj"',
             },
         ]
-        for actual_result, expected_result in zip(actual_results, expected_results):
+        for actual_result, expected_result in zip(
+            actual_results, expected_results, strict=False
+        ):
             assert actual_result.element.name == expected_result["column_name"]
             assert actual_result.name == expected_result["label"]
 

diff --git a/tests/integration_tests/dict_import_export_tests.py b/tests/integration_tests/dict_import_export_tests.py
@@ -80,15 +80,16 @@ def create_table(
             "id": id,
             "params": json.dumps(params),
             "columns": [
-                {"column_name": c, "uuid": u} for c, u in zip(cols_names, cols_uuids)
+                {"column_name": c, "uuid": u}
+                for c, u in zip(cols_names, cols_uuids, strict=False)
             ],
             "metrics": [{"metric_name": c, "expression": ""} for c in metric_names],
         }
 
         table = SqlaTable(
             id=id, schema=schema, table_name=name, params=json.dumps(params)
         )
-        for col_name, uuid in zip(cols_names, cols_uuids):
+        for col_name, uuid in zip(cols_names, cols_uuids, strict=False):
             table.columns.append(TableColumn(column_name=col_name, uuid=uuid))
         for metric_name in metric_names:
             table.metrics.append(SqlMetric(metric_name=metric_name, expression=""))

diff --git a/tests/integration_tests/import_export_tests.py b/tests/integration_tests/import_export_tests.py
@@ -153,7 +153,7 @@ def assert_dash_equals(
         assert len(expected_dash.slices) == len(actual_dash.slices)
         expected_slices = sorted(expected_dash.slices, key=lambda s: s.slice_name or "")
         actual_slices = sorted(actual_dash.slices, key=lambda s: s.slice_name or "")
-        for e_slc, a_slc in zip(expected_slices, actual_slices):
+        for e_slc, a_slc in zip(expected_slices, actual_slices, strict=False):
             self.assert_slice_equals(e_slc, a_slc)
         if check_position:
             assert expected_dash.position_json == actual_dash.position_json
@@ -212,7 +212,7 @@ def assert_only_exported_slc_fields(self, expected_dash, actual_dash):
         """
         expected_slices = sorted(expected_dash.slices, key=lambda s: s.slice_name or "")
         actual_slices = sorted(actual_dash.slices, key=lambda s: s.slice_name or "")
-        for e_slc, a_slc in zip(expected_slices, actual_slices):
+        for e_slc, a_slc in zip(expected_slices, actual_slices, strict=False):
             params = a_slc.params_dict
             assert e_slc.datasource.name == params["datasource_name"]
             assert e_slc.datasource.schema == params["schema"]