From d4a1826cff21f49d30094d6ce82b4f6e3d62d91e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20P=C3=B6lsterl?= Date: Wed, 29 May 2024 20:03:53 +0200 Subject: [PATCH 1/7] Ignore deprecation of multi_class argument --- pyproject.toml | 2 ++ tests/test_stacking.py | 12 ++++++------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 12ea8913..96ff06e3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -138,6 +138,8 @@ filterwarnings = [ "ignore:np\\.find_common_type is deprecated. Please use `np\\.result_type` or `np\\.promote_types`:DeprecationWarning", # deprecated since NumPy 2.0 "ignore:`trapz` is deprecated\\. Use `trapezoid` instead.*:DeprecationWarning", + # deprecated since scikit-learn 1.5 + "ignore:'multi_class' was deprecated in version 1\\.5 and will be removed in 1\\.7.*:FutureWarning", ] [tool.coverage.run] diff --git a/tests/test_stacking.py b/tests/test_stacking.py index 8410e5f5..31b8deed 100644 --- a/tests/test_stacking.py +++ b/tests/test_stacking.py @@ -42,13 +42,13 @@ def dummy_data(): @pytest.fixture() def iris_data_with_estimator(): - def _make_estimator(**params): + def _make_estimator(): data = load_iris() x = data["data"] y = data["target"] meta = Stacking( - LogisticRegression(**params), + LogisticRegression(solver="lbfgs", multi_class="multinomial"), [ ("tree", DecisionTreeClassifier(max_depth=1, random_state=0)), ("svm", SVC(probability=True, gamma="auto", random_state=0)), @@ -104,7 +104,7 @@ def test_names_not_unique(dummy_data): @staticmethod def test_fit(iris_data_with_estimator): - x, y, meta = iris_data_with_estimator(solver="liblinear", multi_class="ovr") + x, y, meta = iris_data_with_estimator() assert 2 == len(meta) meta.fit(x, y) @@ -115,7 +115,7 @@ def test_fit(iris_data_with_estimator): @staticmethod def test_fit_sample_weights(iris_data_with_estimator): - x, y, meta = iris_data_with_estimator(solver="liblinear", multi_class="ovr") + x, y, meta = iris_data_with_estimator() sample_weight = np.random.RandomState(0).uniform(size=x.shape[0]) meta.fit(x, y, tree__sample_weight=sample_weight, svm__sample_weight=sample_weight) @@ -147,7 +147,7 @@ def test_set_params(): @staticmethod def test_predict(iris_data_with_estimator): - x, y, meta = iris_data_with_estimator(multi_class="multinomial", solver="lbfgs") + x, y, meta = iris_data_with_estimator() assert 2 == len(meta) meta.fit(x, y) p = meta.predict(x) @@ -158,7 +158,7 @@ def test_predict(iris_data_with_estimator): @staticmethod @pytest.mark.parametrize("method", ["predict_proba", "predict_log_proba"]) def test_predict_proba(iris_data_with_estimator, method): - x, y, meta = iris_data_with_estimator(multi_class="multinomial", solver="lbfgs") + x, y, meta = iris_data_with_estimator() meta.fit(x, y) p = getattr(meta, method)(x) From 85286a62df7bc47ed4bcacd72638805359153150 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20P=C3=B6lsterl?= Date: Wed, 29 May 2024 20:04:37 +0200 Subject: [PATCH 2/7] Ensure array dtype is always float64 --- sksurv/kernels/clinical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sksurv/kernels/clinical.py b/sksurv/kernels/clinical.py index 34653b3a..3ff6d892 100644 --- a/sksurv/kernels/clinical.py +++ b/sksurv/kernels/clinical.py @@ -174,7 +174,7 @@ def _prepare_by_column_dtype(self, X): nominal_columns = [] numeric_ranges = [] - fit_data = np.empty_like(X) + fit_data = np.empty(X.shape, dtype=np.float64) for i, dt in enumerate(X.dtypes): col = X.iloc[:, i] From 31ea7fccd0f95cb921e283fa0cc9a4181ca333a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20P=C3=B6lsterl?= Date: Wed, 29 May 2024 20:07:21 +0200 Subject: [PATCH 3/7] Update message when inputs differ in size --- sksurv/kernels/clinical.py | 5 ++++- tests/test_clinical_kernel.py | 9 ++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/sksurv/kernels/clinical.py b/sksurv/kernels/clinical.py index 3ff6d892..0301f29f 100644 --- a/sksurv/kernels/clinical.py +++ b/sksurv/kernels/clinical.py @@ -310,7 +310,10 @@ def pairwise_kernel(self, X, Y): """ check_is_fitted(self, "X_fit_") if X.shape[0] != Y.shape[0]: - raise ValueError("X and Y have different number of features") + raise ValueError( + f"Incompatible dimension for X and Y matrices: X.shape[0] == {X.shape[0]} " + f"while Y.shape[0] == {Y.shape[0]}" + ) val = pairwise_continuous_ordinal_kernel( X[self._numeric_columns], Y[self._numeric_columns], self._numeric_ranges diff --git a/tests/test_clinical_kernel.py b/tests/test_clinical_kernel.py index 46403f8d..47c38431 100644 --- a/tests/test_clinical_kernel.py +++ b/tests/test_clinical_kernel.py @@ -228,7 +228,10 @@ def test_pairwise_x_and_y_error_shape(make_data): t = ClinicalKernelTransform() t.fit(data) - with pytest.raises(ValueError, match="X and Y have different number of features"): + with pytest.raises( + ValueError, + match=r"Incompatible dimension for X and Y matrices: X\.shape\[0\] == 4 while Y\.shape\[0\] == 2", + ): t.pairwise_kernel(data.iloc[0, :], data.iloc[1, :2]) @staticmethod @@ -269,9 +272,9 @@ def test_pairwise_feature_mismatch(make_data): with pytest.raises( ValueError, - match=r"Incompatible dimension for X and Y matrices: X.shape\[1\] == 4 while Y.shape\[1\] == 17", + match=r"Incompatible dimension for X and Y matrices: X\.shape\[[0-1]\] == 4 while Y\.shape\[[0-1]\] == 17", ): - pairwise_kernels(t.X_fit_, np.zeros((2, 17), dtype=float), metric=t.pairwise_kernel, n_jobs=1) + pairwise_kernels(t.X_fit_, np.zeros((5, 17), dtype=float), metric=t.pairwise_kernel, n_jobs=1) @staticmethod def test_prepare(make_data): From 024f3bcd65d746018aa5bb0bdc7f213b1eed4a02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20P=C3=B6lsterl?= Date: Wed, 29 May 2024 21:38:39 +0200 Subject: [PATCH 4/7] Allow scikit-learn 1.5 --- README.rst | 2 +- ci/appveyor/py311.ps1 | 2 +- ci/appveyor/py312.ps1 | 2 +- ci/deps/py311.sh | 2 +- ci/deps/py312.sh | 2 +- doc/install.rst | 2 +- pyproject.toml | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/README.rst b/README.rst index 40bd8a30..57100925 100644 --- a/README.rst +++ b/README.rst @@ -39,7 +39,7 @@ Requirements - numpy - osqp - pandas 1.0.5 or later -- scikit-learn 1.4 +- scikit-learn 1.4 or 1.5 - scipy - C/C++ compiler diff --git a/ci/appveyor/py311.ps1 b/ci/appveyor/py311.ps1 index c19644ac..1e0a8bbd 100644 --- a/ci/appveyor/py311.ps1 +++ b/ci/appveyor/py311.ps1 @@ -1,4 +1,4 @@ $env:CI_PYTHON_VERSION="3.11.*" $env:CI_PANDAS_VERSION="2.0.*" $env:CI_NUMPY_VERSION="1.25.*" -$env:CI_SKLEARN_VERSION="1.4.*" +$env:CI_SKLEARN_VERSION="1.5.*" diff --git a/ci/appveyor/py312.ps1 b/ci/appveyor/py312.ps1 index d103667b..cb0adce3 100644 --- a/ci/appveyor/py312.ps1 +++ b/ci/appveyor/py312.ps1 @@ -1,4 +1,4 @@ $env:CI_PYTHON_VERSION="3.12.*" $env:CI_PANDAS_VERSION="2.2.*" $env:CI_NUMPY_VERSION="1.26.*" -$env:CI_SKLEARN_VERSION="1.4.*" +$env:CI_SKLEARN_VERSION="1.5.*" diff --git a/ci/deps/py311.sh b/ci/deps/py311.sh index b9a271da..23a2f714 100644 --- a/ci/deps/py311.sh +++ b/ci/deps/py311.sh @@ -2,5 +2,5 @@ export CI_PYTHON_VERSION='3.11.*' export CI_PANDAS_VERSION='2.0.*' export CI_NUMPY_VERSION='1.25.*' -export CI_SKLEARN_VERSION='1.4.*' +export CI_SKLEARN_VERSION='1.5.*' export CI_NO_SLOW=true diff --git a/ci/deps/py312.sh b/ci/deps/py312.sh index fbcfdb0c..ed163c07 100644 --- a/ci/deps/py312.sh +++ b/ci/deps/py312.sh @@ -2,5 +2,5 @@ export CI_PYTHON_VERSION='3.12.*' export CI_PANDAS_VERSION='2.2.*' export CI_NUMPY_VERSION='1.26.*' -export CI_SKLEARN_VERSION='1.4.*' +export CI_SKLEARN_VERSION='1.5.*' export CI_NO_SLOW=false diff --git a/doc/install.rst b/doc/install.rst index fdc7fca7..e79a66f0 100644 --- a/doc/install.rst +++ b/doc/install.rst @@ -91,6 +91,6 @@ The current minimum dependencies to run scikit-survival are: - numpy - osqp - pandas 1.0.5 or later -- scikit-learn 1.4 +- scikit-learn 1.4 or 1.5 - scipy - C/C++ compiler diff --git a/pyproject.toml b/pyproject.toml index 96ff06e3..b68a7ada 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,7 +50,7 @@ dependencies = [ "osqp !=0.6.0,!=0.6.1", "pandas >=1.0.5", "scipy >=1.3.2", - "scikit-learn >=1.4.0,<1.5", + "scikit-learn >=1.4.0,<1.6", ] dynamic = ["version"] From cbeb475b975d76a69b910bc2604f63d79767d7d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20P=C3=B6lsterl?= Date: Sun, 2 Jun 2024 14:13:23 +0200 Subject: [PATCH 5/7] Raise numpy version to 2.0.0rc2 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b68a7ada..48c02c49 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ requires = [ # same as scikit-learn "Cython>=3.0.8", # building against numpy 2.x is compatible with numpy 1.x - "numpy>=2.0.0rc1", + "numpy>=2.0.0rc2", # scikit-learn requirements "scikit-learn~=1.4.0; python_version<='3.12'", From 01c3a8e4ea1f7a7b9a28cc06083c616447223ef5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20P=C3=B6lsterl?= Date: Sun, 2 Jun 2024 14:16:48 +0200 Subject: [PATCH 6/7] Raise minimum Cython version to 3.0.10 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 48c02c49..8a581695 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ requires = [ "setuptools-scm>=8", "packaging", # same as scikit-learn - "Cython>=3.0.8", + "Cython>=3.0.10", # building against numpy 2.x is compatible with numpy 1.x "numpy>=2.0.0rc2", From ce1f061af3981ab86ef295f9223c8d1e514a65d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20P=C3=B6lsterl?= Date: Sun, 2 Jun 2024 21:35:49 +0200 Subject: [PATCH 7/7] Update output for scikit-learn 1.5 --- doc/user_guide/00-introduction.ipynb | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/doc/user_guide/00-introduction.ipynb b/doc/user_guide/00-introduction.ipynb index 3b1b6471..14cb3888 100644 --- a/doc/user_guide/00-introduction.ipynb +++ b/doc/user_guide/00-introduction.ipynb @@ -1187,15 +1187,15 @@ "" ], "text/plain": [ - " param_select__k params split0_test_score split1_test_score \\\n", - "4 5 {'select__k': 5} 0.716093 0.719862 \n", - "3 4 {'select__k': 4} 0.697368 0.722332 \n", - "7 8 {'select__k': 8} 0.706478 0.723320 \n", - "5 6 {'select__k': 6} 0.704453 0.719368 \n", - "6 7 {'select__k': 7} 0.700405 0.719368 \n", - "1 2 {'select__k': 2} 0.699393 0.717885 \n", - "0 1 {'select__k': 1} 0.698887 0.707510 \n", - "2 3 {'select__k': 3} 0.708502 0.714427 \n", + " param_select__k params split0_test_score split1_test_score \\\n", + "4 5 {'select__k': 5} 0.716093 0.719862 \n", + "3 4 {'select__k': 4} 0.697368 0.722332 \n", + "7 8 {'select__k': 8} 0.706478 0.723320 \n", + "5 6 {'select__k': 6} 0.704453 0.719368 \n", + "6 7 {'select__k': 7} 0.700405 0.719368 \n", + "1 2 {'select__k': 2} 0.699393 0.717885 \n", + "0 1 {'select__k': 1} 0.698887 0.707510 \n", + "2 3 {'select__k': 3} 0.708502 0.714427 \n", "\n", " split2_test_score mean_test_score std_test_score rank_test_score \\\n", "4 0.716685 0.717547 0.001655 1 \n",