From fa9fbc75c71899c90639acc0e253e701cb077e4c Mon Sep 17 00:00:00 2001 From: valverde Date: Thu, 21 Nov 2024 21:23:52 +0900 Subject: [PATCH 01/11] Add confidence intervals to the cumulative incidence estimator for competing risks. Tests are included. There are some minimal differences in the results of the confidence intervals with respect to the implementation in the R package cmprsk, so we add some tolerance to the assertions. Apart from the Aalen estimator used in cmprsk we include two other estimators, but these are not tested so thoroughly, because we couldn't find data sources. --- doc/api/datasets.rst | 1 + sksurv/datasets/__init__.py | 1 + sksurv/datasets/base.py | 78 ++++++ sksurv/datasets/data/README.md | 3 + sksurv/datasets/data/cgvhd.txt | 101 ++++++++ sksurv/nonparametric.py | 161 +++++++++++- tests/test_nonparametric.py | 446 ++++++++++++++++++++++++++++++++- 7 files changed, 785 insertions(+), 6 deletions(-) create mode 100644 sksurv/datasets/data/cgvhd.txt diff --git a/doc/api/datasets.rst b/doc/api/datasets.rst index 627c7a45..67c93eee 100644 --- a/doc/api/datasets.rst +++ b/doc/api/datasets.rst @@ -9,6 +9,7 @@ Datasets load_aids load_arff_files_standardized load_bmt + load_cgvhd load_breast_cancer load_flchain load_gbsg2 diff --git a/sksurv/datasets/__init__.py b/sksurv/datasets/__init__.py index d00f17e2..51649e96 100644 --- a/sksurv/datasets/__init__.py +++ b/sksurv/datasets/__init__.py @@ -4,6 +4,7 @@ load_arff_files_standardized, # noqa: F401 load_bmt, # noqa: F401 load_breast_cancer, # noqa: F401 + load_cgvhd, # noqa: F401 load_flchain, # noqa: F401 load_gbsg2, # noqa: F401 load_veterans_lung_cancer, # noqa: F401 diff --git a/sksurv/datasets/base.py b/sksurv/datasets/base.py index 10e9bd57..183711a9 100644 --- a/sksurv/datasets/base.py +++ b/sksurv/datasets/base.py @@ -13,6 +13,7 @@ "load_arff_files_standardized", "load_aids", "load_bmt", + "load_cgvhd", "load_breast_cancer", "load_flchain", "load_gbsg2", @@ -474,9 +475,86 @@ def load_bmt(): .. [1] https://doi.org/10.1038/sj.bmt.1705727 Scrucca, L., Santucci, A. & Aversa, F.: "Competing risk analysis using R: an easy guide for clinicians. Bone Marrow Transplant 40, 381–387 (2007)" + .. [2] https://luca-scr.github.io/R/bmt.csv """ full_path = _get_data_path("bmt.arff") data = loadarff(full_path) data["ftime"] = data["ftime"].astype(int) return get_x_y(data, attr_labels=["status", "ftime"], competing_risks=True) + + +def load_cgvhd(): + r"""Load and return data from multicentre randomized clinical trial + initiated for patients with a myeloid malignancy who were to + undergo an allogeneic bone marrow transplant. + + The dataset has 100 samples and 4 features::: + + 1. dx: Diagnosis: AML=acute myeloid leukaemia, CML=chronic myeloid leukaemia, MDS=myelodysplastic syndrome + 2. tx: Randomized treatment: BM=cell harvested from the bone marrow, PB=cell harvested from the peripheral blood + 3. extent: Extent of disease: L=limited, E=extensive + 5. age: Age (years) + + The rest of the columns correspond to outcome variables::: + + 6. survtime: Time from date of transplant to death or last follow-up (Years) + 7. reltime: Time from date of transplant to relapse or last follow-up (Years) + 8. agvhtime: Time from date of transplant to acute GVHD or last follow-up (Years) + 9. cgvhtime: Time from date of transplant to chronic GVHD or last follow-up (Years) + 10. stat Status: 1=Dead, 0=Alive + 11. rcens Relapse: 1=Yes, 0=No + 4. agvhdgd: Grade of acute GVHD + 12. agvh: Acute GVHD: 1=Yes, 0=No + 13. cgvh: Chronic GVHD: 1=Yes, 0=No + + The last column (14) is a patient ID. + + Columns 6,7 and 9 contain the time to death, relapse and CGVHD + calculated in years (survtime, reltime, cgvhtime) and the + respective indicator variables are in columns 10,11 and 13 (stat, + rcens, cgvh). The earliest time that any of these events happened + is calculated by taking the minimum of the observed times. The + censoring variable cens is coded as 0 when no events were + observed, 1 if CGVHD was observed as first event, 2 if a relapse + was observed as the first event and 3 if death occurred before + either of the events: The endpoint (status) are therefore: + + 0. Survival (Right-censored data). 4 patients (4%) + 1. Chronic graft versus host disease (CGVHD). 86 events (86%) + 2. Relapse (TRM). 5 events (5%) + 3. Death. 5 events (5%) + + See [1]_ for further description and [2]_ for the dataset. + + Returns + ------- + x : pandas.DataFrame + The measurements for each patient. + + y : structured array with 2 fields + *status*: Integer indicating the endpoint: 0-(right censored data), 1-(GCVHD), 2-(relapse), 3-(death) + + *ftime*: total length of follow-up or time of event. + + References + ---------- + .. [1] Melania Pintilie: "Competing Risks: A Practical Perspective". John Wiley & Sons, 2006 + + .. [2] https://drive.google.com/file/d/1FPM264pE\_-F8DB7lvFeLB1yQ3HDo7c-i/view + https://sites.google.com/view/melaniapintiliemscstatistics/home/statistics + """ + full_path = _get_data_path("cgvhd.txt") + + df = pd.read_csv(full_path) + df["ftime"] = df[["survtime", "reltime", "cgvhtime"]].min(axis=1) + df["status"] = ( + ((df["ftime"] == df["cgvhtime"]) & (df["cgvh"] == 1)).astype(int) + + 2 * ((df["ftime"] == df["reltime"]) & (df["rcens"] == 1)).astype(int) + + 3 * ((df["ftime"] == df["survtime"]) & (df["stat"] == 1)).astype(int) + ) + df = df[["ftime", "status", "tx"]] + ftime, event = df["ftime"].values, df["status"].values + + # return get_x_y(data, attr_labels=["status", "ftime"], competing_risks=True) + return None, {"ftime": ftime, "status": event} diff --git a/sksurv/datasets/data/README.md b/sksurv/datasets/data/README.md index 567d89a5..5f6d2db2 100644 --- a/sksurv/datasets/data/README.md +++ b/sksurv/datasets/data/README.md @@ -12,6 +12,7 @@ for survival analysis. | veteran | [Veteran's Lung Cancer][Kalbfleisch2008] | 137 | 6 | 128 (93.4%) | Death | | whas500 | [Worcester Heart Attack Study][Hosmer2008] | 500 | 14 | 215 (43.0%) | Death | | BMT | [Leukemia HSC Transplant][Scrucca2007] | 35 | 1 | 24 (68.6%) | Transplant related death or relapse | +| GCVHD | [GCVHD][Pintilie2006] | 100 | 4 | 96 (96%) | Chronic graft disease (GCVHD), relapse or death | [Desmedt2007]: http://dx.doi.org/10.1158/1078-0432.CCR-06-2765 "Desmedt, C., Piette, F., Loi et al.: Strong Time Dependence of the 76-Gene Prognostic Signature for Node-Negative Breast Cancer Patients in the TRANSBIG Multicenter Independent Validation Series. Clin. Cancer Res. 13(11), 3207–14 (2007)" @@ -24,3 +25,5 @@ for survival analysis. [Schumacher1994]: http://ascopubs.org/doi/abs/10.1200/jco.1994.12.10.2086 "Schumacher, M., Basert, G., Bojar, H., et al. Randomized 2 × 2 trial evaluating hormonal treatment and the duration of chemotherapy in node-positive breast cancer patients. Journal of Clinical Oncology 12, 2086–2093. (1994)" [Scrucca2007]: https://doi.org/10.1038/sj.bmt.1705727 "Scrucca, L., Santucci, A. & Aversa, F. Competing risk analysis using R: an easy guide for clinicians. Bone Marrow Transplant 40, 381–387 (2007)" + +[Pintilie2006]: https://www.wiley.com/en-us/Competing+Risks%3A+A+Practical+Perspective-p-9780470870693 "Melania Pintilie: Competing Risks: A Practical Perspective. John Wiley & Sons, (2006)" diff --git a/sksurv/datasets/data/cgvhd.txt b/sksurv/datasets/data/cgvhd.txt new file mode 100644 index 00000000..49e06ce9 --- /dev/null +++ b/sksurv/datasets/data/cgvhd.txt @@ -0,0 +1,101 @@ +"dx","tx","extent","agvhdgd","age","survtime","reltime","agvhtime","cgvhtime","stat","rcens","agvh","cgvh","stnum" +"CML","PB","L",1,36,4.895,4.895,0.099,0.520,0,0,1,1, 1 +"AML","PB","L",3,57,3.474,0.753,0.101,0.408,1,1,1,1, 2 +"CML","PB","L",0,48,4.950,4.950,4.950,0.348,0,0,0,1, 3 +"AML","PB","L",2,52,4.643,4.643,0.057,0.482,0,0,1,1, 4 +"AML","PB","L",3,45,4.066,4.066,0.137,0.378,0,0,1,1, 5 +"AML","PB","L",3,47,1.558,0.416,0.055,1.558,1,1,1,0, 6 +"CML","PB","L",1,40,4.512,4.512,0.090,0.381,0,0,1,1, 7 +"AML","PB","L",3,38,4.041,4.041,0.082,0.914,0,0,1,1, 8 +"AML","PB","L",2,41,4.164,4.164,0.055,0.923,0,0,1,1, 9 +"CML","PB","L",0,50,4.011,4.011,4.011,0.397,0,0,0,1, 10 +"CML","PB","L",1,56,3.945,3.945,0.047,0.479,0,0,1,1, 11 +"CML","PB","L",2,56,4.361,4.361,0.079,0.991,0,0,1,1, 12 +"AML","PB","L",1,54,0.841,0.654,0.077,0.474,1,1,1,1, 13 +"CML","PB","L",3,25,2.951,2.951,0.164,0.339,0,0,1,1, 14 +"CML","PB","L",4,40,0.586,0.586,0.055,0.277,1,0,1,1, 15 +"CML","PB","L",0,41,3.559,3.559,3.559,0.367,0,0,0,1, 16 +"CML","PB","L",2,57,3.422,3.422,0.131,0.742,0,0,1,1, 17 +"CML","PB","L",3,62,0.408,0.408,0.408,0.408,0,0,1,1, 18 +"CML","PB","L",1,29,3.428,3.428,0.090,0.958,0,0,1,1, 19 +"AML","PB","L",1,44,0.063,0.063,0.014,0.063,1,0,1,0, 20 +"CML","PB","L",2,40,1.572,1.572,0.090,0.282,1,0,1,1, 21 +"CML","PB","L",1,54,1.013,1.013,0.093,0.413,1,0,1,1, 22 +"AML","PB","L",2,37,3.023,3.023,0.074,0.394,0,0,1,1, 23 +"AML","PB","L",1,58,2.979,2.979,0.079,0.342,0,0,1,1, 24 +"CML","PB","L",3,39,2.817,2.817,0.049,0.367,0,0,1,1, 25 +"CML","PB","L",2,31,2.804,2.804,0.137,0.277,0,0,1,1, 26 +"CML","PB","L",2,45,2.609,2.609,0.252,0.367,0,0,1,1, 27 +"AML","PB","L",0,48,2.508,2.508,2.508,0.331,0,0,0,1, 28 +"CML","PB","L",0,53,0.665,0.665,0.665,0.320,1,0,0,1, 29 +"CML","PB","L",0,29,2.497,2.497,2.497,0.329,0,0,0,1, 30 +"CML","PB","L",0,27,1.799,1.799,1.799,0.444,1,0,0,1, 31 +"AML","PB","L",3,45,0.471,0.438,0.071,0.471,1,1,1,0, 32 +"CML","PB","L",1,39,2.031,2.031,0.112,0.964,0,0,1,1, 33 +"CML","PB","L",3,49,2.073,2.073,0.063,0.564,0,0,1,1, 34 +"AML","PB","L",1,37,0.999,0.750,0.274,0.402,1,1,1,1, 35 +"AML","PB","L",3,53,0.427,0.427,0.055,0.277,1,0,1,1, 36 +"CML","PB","L",1,48,1.766,1.766,0.216,0.400,0,0,1,1, 37 +"AML","PB","L",1,59,1.555,1.555,0.178,0.446,0,0,1,1, 38 +"CML","PB","L",2,33,1.670,1.670,0.110,0.474,0,0,1,1, 39 +"CML","PB","L",0,38,1.607,1.607,1.607,0.329,0,0,0,1, 40 +"CML","PB","L",4,37,1.511,1.511,0.055,0.323,0,0,1,1, 41 +"AML","PB","L",3,41,1.287,1.287,0.049,0.392,0,0,1,1, 42 +"AML","PB","E",1,64,1.227,1.227,0.230,0.496,0,0,1,1, 43 +"CML","PB","L",3,32,1.300,1.300,0.063,0.630,0,0,1,1, 44 +"CML","PB","L",0,41,1.270,1.270,1.270,0.383,0,0,0,1, 45 +"AML","PB","E",1,56,1.205,1.205,0.074,1.205,0,0,1,0, 46 +"CML","PB","L",1,50,1.147,1.147,0.131,0.361,0,0,1,1, 47 +"CML","PB","L",3,37,1.109,1.109,0.055,0.277,0,0,1,1, 48 +"CML","PB","L",0,27,0.994,0.994,0.994,0.287,0,0,0,1, 49 +"CML","BM","L",3,45,4.572,4.572,0.066,0.619,0,0,1,1, 50 +"AML","BM","L",3,45,4.616,4.616,0.101,0.452,0,0,1,1, 51 +"AML","BM","L",2,42,4.000,4.000,0.027,0.290,0,0,1,1, 52 +"CML","BM","L",0,22,4.238,4.238,4.238,0.479,0,0,0,1, 53 +"AML","BM","L",4,47,0.110,0.110,0.074,0.110,1,0,1,0, 54 +"AML","BM","L",2,48,4.030,4.030,0.101,0.857,0,0,1,1, 55 +"AML","BM","L",2,49,3.124,2.527,0.115,1.993,1,1,1,1, 56 +"CML","BM","L",2,38,0.515,0.515,0.079,0.463,1,0,1,1, 57 +"CML","BM","L",1,39,4.222,3.149,0.085,0.496,0,1,1,1, 58 +"CML","BM","L",3,41,4.027,4.027,0.104,0.422,0,0,1,1, 59 +"CML","BM","L",2,46,1.969,1.969,0.038,0.307,1,0,1,1, 60 +"AML","BM","L",0,24,3.792,3.792,3.792,0.701,0,0,0,1, 61 +"AML","BM","L",3,32,0.427,0.427,0.041,0.279,1,0,1,1, 62 +"CML","BM","L",0,36,3.340,3.340,3.340,0.419,0,0,0,1, 63 +"CML","BM","L",1,53,3.504,0.720,0.112,0.616,0,1,1,1, 64 +"CML","BM","L",0,52,3.685,3.685,3.685,0.331,0,0,0,1, 65 +"CML","BM","L",1,59,0.181,0.181,0.049,0.181,1,0,1,0, 66 +"CML","BM","L",3,42,0.736,0.736,0.090,0.567,1,0,1,1, 67 +"CML","BM","L",1,65,0.287,0.287,0.052,0.287,1,0,1,0, 68 +"CML","BM","E",0,60,0.057,0.057,0.057,0.057,0,0,0,0, 69 +"CML","BM","L",2,61,3.107,3.107,0.088,0.764,0,0,1,1, 70 +"CML","BM","L",1,55,3.088,3.088,0.110,0.381,0,0,1,1, 71 +"AML","BM","E",0,48,0.446,0.274,0.446,0.446,1,1,0,0, 72 +"AML","BM","E",0,49,2.776,2.776,2.776,2.776,0,0,0,0, 73 +"CML","BM","L",0,36,0.693,0.172,0.693,0.635,1,1,0,1, 74 +"AML","BM","L",1,48,2.010,2.010,0.077,0.553,0,0,1,1, 75 +"CML","BM","L",0,47,2.374,2.374,2.374,0.287,0,0,0,1, 76 +"AML","BM","L",3,43,1.079,1.079,0.088,0.345,1,0,1,1, 77 +"CML","BM","L",0,56,2.604,2.604,2.604,0.375,0,0,0,1, 78 +"CML","BM","L",1,56,2.478,2.478,0.170,0.517,0,0,1,1, 79 +"CML","BM","L",0,36,2.338,2.338,2.338,0.457,0,0,0,1, 80 +"CML","BM","L",2,52,2.300,2.300,0.049,0.345,0,0,1,1, 81 +"CML","BM","E",1,44,0.219,0.219,0.145,0.219,1,0,1,0, 82 +"AML","BM","L",3,32,2.127,2.127,0.118,0.422,0,0,1,1, 83 +"AML","BM","L",1,44,2.034,2.034,0.096,0.479,0,0,1,1, 84 +"CML","BM","L",0,45,2.034,2.034,2.034,0.290,0,0,0,1, 85 +"AML","BM","L",3,48,2.007,2.007,0.088,0.350,0,0,1,1, 86 +"CML","BM","L",0,48,1.183,1.183,1.183,0.372,0,0,0,1, 87 +"AML","BM","L",3,42,0.375,0.375,0.096,0.277,1,0,1,1, 88 +"AML","BM","E",2,24,0.353,0.301,0.096,0.353,1,1,1,0, 89 +"CML","BM","L",2,26,1.566,1.566,0.137,0.474,0,0,1,1, 90 +"CML","BM","L",2,34,1.588,1.588,0.129,0.465,0,0,1,1, 91 +"CML","BM","L",0,57,1.243,1.243,1.243,0.433,0,0,0,1, 92 +"CML","BM","L",3,51,1.555,1.555,0.090,0.359,0,0,1,1, 93 +"AML","BM","L",2,54,1.202,1.202,0.192,1.202,0,0,1,0, 94 +"AML","BM","E",0,20,1.251,1.251,1.251,0.408,0,0,0,1, 95 +"AML","BM","L",2,39,1.114,1.114,0.074,0.402,0,0,1,1, 96 +"AML","BM","L",0,49,1.150,1.150,1.150,0.350,0,0,0,1, 97 +"CML","BM","L",1,42,0.997,0.997,0.142,0.411,0,0,1,1, 98 +"CML","BM","L",0,44,1.057,1.057,1.057,0.301,0,0,0,1, 99 +"CML","BM","L",1,56,1.125,1.125,0.129,0.320,0,0,1,1,100 diff --git a/sksurv/nonparametric.py b/sksurv/nonparametric.py index 21e45fad..9adec750 100644 --- a/sksurv/nonparametric.py +++ b/sksurv/nonparametric.py @@ -601,7 +601,42 @@ def predict_ipcw(self, y): return weights -def cumulative_incidence_competing_risks(event, time_exit, time_min=None): +def _cr_ci_logmlog(cum_inc, sigma_t, z): + """Compute the pointwise log-minus-log transformed confidence intervals""" + eps = np.finfo(cum_inc.dtype).eps + log_cum_i = np.zeros_like(cum_inc) + np.log(cum_inc, where=cum_inc > eps, out=log_cum_i) + theta = np.zeros_like(cum_inc) + den = cum_inc * log_cum_i + np.divide(sigma_t, den, where=den < -eps, out=theta) + theta = z * np.multiply.outer(np.array([-1, 1]), theta) + ci = np.exp(log_cum_i * np.exp(theta)) + ci[:, cum_inc <= eps] = 0.0 + ci[:, 1.0 - cum_inc <= eps] = 1.0 + return ci + + +def _cum_inc_cr_ci_estimator(cum_inc, var, conf_level, conf_type): + if conf_type not in {"log-log"}: + raise ValueError(f"conf_type must be None or a str among {{'log-log'}}, but was {conf_type!r}") + + if not isinstance(conf_level, numbers.Real) or not np.isfinite(conf_level) or conf_level <= 0 or conf_level >= 1.0: + raise ValueError(f"conf_level must be a float in the range (0.0, 1.0), but was {conf_level!r}") + + z = stats.norm.isf((1.0 - conf_level) / 2.0) + sigma = np.sqrt(var) + ci = _cr_ci_logmlog(cum_inc[1:], sigma, z) + return ci + + +def cumulative_incidence_competing_risks( + event, + time_exit, + time_min=None, + conf_level=0.95, + conf_type=None, + var_type=None, +): """Non-parametric estimator of Cumulative Incidence function in the case of competing risks. See [1]_ for further description. @@ -621,6 +656,19 @@ def cumulative_incidence_competing_risks(event, time_exit, time_min=None): Compute estimator conditional on survival at least up to the specified time. + conf_level : float, optional, default: 0.95 + The level for a two-sided confidence interval on the survival curves. + + conf_type : None or {'log-log'}, optional, default: None. + The type of confidence intervals to estimate. + If `None`, no confidence intervals are estimated. + If "log-log", estimate confidence intervals using + the log hazard or :math:`log(-log(S(t)))`. + + var_type : None or one of {'Dinse', 'Dinse_Approx', 'Aalen'}, optional, default: None + The method for estimating the variance of the estimator. + Only valid if conf_type is valid. + Returns ------- time : array, shape = (n_times,) @@ -631,6 +679,12 @@ def cumulative_incidence_competing_risks(event, time_exit, time_min=None): The first dimension indicates total risk (``cum_incidence[0]``), the dimension `i=1,..,n_risks` the incidence for each competing risk. + conf_int : array, shape = (2, n_risks + 1, n_times) + Pointwise confidence interval of the Kaplan-Meier estimator + at each unique time point (last index) + for all possible risks (second index) including overall risk (i=0). + Only provided if `conf_type` is not None. + Examples -------- Creating cumulative incidence curves: @@ -640,10 +694,12 @@ def cumulative_incidence_competing_risks(event, time_exit, time_min=None): >>> event = bmt_df["status"] >>> time = bmt_df["ftime"] >>> n_risks = event.max() - >>> x, y = cumulative_incidence_competing_risks(event, time) + >>> x, y, conf_int = cumulative_incidence_competing_risks(event, time, conf_type="log-log", var_type="Aalen") >>> plt.step(x, y[0], where="post", label="Total risk") + >>> plt.fill_between(x, conf_int[0,0], conf_int[1,0], alpha=0.25, step="post") >>> for i in range(1, n_risks + 1): >>> plt.step(x, y[i], where="post", label=f"{i}-risk") + >>> plt.fill_between(x, conf_int[0,i], conf_int[1,i], alpha=0.25, step="post") >>> plt.ylim(0, 1) >>> plt.legend() >>> plt.show() @@ -680,4 +736,103 @@ def cumulative_incidence_competing_risks(event, time_exit, time_min=None): cum_inc[1 : n_risks + 1] = np.cumsum((ratio[:, 1 : n_risks + 1].T * kpe_prime), axis=1) cum_inc[0] = 1.0 - kpe - return uniq_times, cum_inc + if conf_type is None: + return uniq_times, cum_inc + + if var_type == "Dinse": + var = var_dinse(n_events_cr, kpe_prime, n_at_risk, cum_inc) + elif var_type == "Dinse_Approx": + var = var_dinse_approx(n_events_cr, kpe_prime, n_at_risk, cum_inc) + elif var_type == "Aalen": + var = var_aalen(n_events_cr, kpe_prime, n_at_risk, cum_inc) + else: + raise ValueError(f"{var_type=} not implemented.") + + _x, _y, conf_int_km = kaplan_meier_estimator(event > 0, time_exit, conf_type="log-log") + ci = np.empty(shape=(2, n_risks + 1, n_t), dtype=conf_int_km.dtype) + ci[:, 0, :] = 1 - conf_int_km + ci[:, 1:, :] = _cum_inc_cr_ci_estimator(cum_inc, var, conf_level, conf_type) + + return uniq_times, cum_inc, ci + + +def var_dinse_approx(n_events_cr, kpe_prime, n_at_risk, cum_inc): + """ + Variance estimator from Dinse and Larson, Biometrika (1986), 379 + See Section 4, Eqs. 6. + This is an approximation from the var_dinse, so that one should be preferred. + However, this seems to be more common in the literature. + """ + dr = n_events_cr[:, 0] + dr_cr = n_events_cr[:, 1:].T + irt = cum_inc[1:, :, np.newaxis] - cum_inc[1:, np.newaxis, :] + mask = np.tril(np.ones_like(irt[0])) + + var_a = np.einsum("rjk,jk,k->rj", irt**2, mask, dr / (n_at_risk * (n_at_risk - dr))) + var_b = np.cumsum(((n_at_risk - dr_cr) / n_at_risk) * (dr_cr / n_at_risk**2) * kpe_prime**2, axis=1) + var_c = -2 * np.einsum("rjk,jk,rk,k->rj", irt, mask, dr_cr, kpe_prime / n_at_risk**2) + + var = var_a + var_b + var_c + return var + + +def var_dinse(n_events_cr, kpe_prime, n_at_risk): + """ + Variance estimator from Dinse and Larson, Biometrika (1986), 379 + See Section 4, Eqs. 4 and 5 + """ + dr = n_events_cr[:, 0] + dr_cr = n_events_cr[:, 1:].T + theta = dr_cr * kpe_prime / n_at_risk + x = dr / (n_at_risk * (n_at_risk - dr)) + cprod = np.cumprod(1 + x) / (1 + x) + + n_t = dr.size + i_idx = np.arange(n_t)[:, None, None] + j_idx = np.arange(n_t)[None, :, None] + k_idx = np.arange(n_t)[None, None, :] + mask = ((j_idx < i_idx) & (k_idx > j_idx) & (k_idx <= i_idx)).astype(int) + + _v1 = np.zeros_like(theta) + np.divide((n_at_risk - dr_cr), n_at_risk * dr_cr, out=_v1, where=dr_cr > 0) + v1 = np.cumsum(theta**2 * ((1 + _v1) * cprod - 1), axis=1) + + corr = (1 - 1 / n_at_risk) * cprod - 1 + v2 = 2 * np.einsum("rj,rk,ijk->ri", theta * corr, theta, mask) + var = v1 + v2 + + return var + + +def var_aalen(n_events_cr, kpe_prime, n_at_risk, cum_inc): + """ + Variance estimator from Aalen + Aalen, O. (1978a). Nonparametric estimation of partial transition + probabilities in multiple decrement models. Annals of Statistics, 6, 534–545. + We implement it as shown in + M. Pintilie: "Competing Risks: A Practical Perspective". John Wiley & Sons, 2006, Eq. 4.5 + This seems to be the estimator used in cmprsk, but there are some numerical differences with our implementation. + """ + dr = n_events_cr[:, 0] + dr_cr = n_events_cr[:, 1:].T + irt = cum_inc[1:, :, np.newaxis] - cum_inc[1:, np.newaxis, :] + mask = np.tril(np.ones_like(irt[0])) + + _va = np.zeros_like(kpe_prime) + den_a = (n_at_risk - 1) * (n_at_risk - dr) + np.divide(dr, den_a, out=_va, where=den_a > 0) + var_a = np.einsum("rjk,jk,k->rj", irt**2, mask, _va) + + _vb = np.zeros_like(kpe_prime) + den_b = (n_at_risk - 1) * n_at_risk**2 + np.divide(1.0, den_b, out=_vb, where=den_b > 0) + var_b = np.cumsum((n_at_risk - dr_cr) * dr_cr * _vb * kpe_prime**2, axis=1) + + _vca = dr_cr * (n_at_risk - dr_cr) + _vcb = np.zeros_like(kpe_prime) + den_c = n_at_risk * (n_at_risk - dr) * (n_at_risk - 1) + np.divide(kpe_prime, den_c, out=_vcb, where=den_c > 0) + var_c = -2 * np.einsum("rjk,jk,rk,k->rj", irt, mask, _vca, _vcb) + + var = var_a + var_b + var_c + return var diff --git a/tests/test_nonparametric.py b/tests/test_nonparametric.py index 7c46d25b..2c16eb23 100644 --- a/tests/test_nonparametric.py +++ b/tests/test_nonparametric.py @@ -1,11 +1,11 @@ from os.path import dirname, join import numpy as np -from numpy.testing import assert_array_almost_equal, assert_array_equal +from numpy.testing import assert_allclose, assert_array_almost_equal, assert_array_equal import pandas as pd import pytest -from sksurv.datasets import load_bmt +from sksurv.datasets import load_bmt, load_cgvhd from sksurv.nonparametric import ( CensoringDistributionEstimator, SurvivalFunctionEstimator, @@ -6496,6 +6496,415 @@ def data_three_competing_cases(self): return event, time, true_x, true_y +class CGVHD_DataSets(FixtureParameterFactory): + _, df = load_cgvhd() + event, ftime = df["status"], df["ftime"] + + def data_full(self): + event = self.event + time = self.ftime + + true_x = np.array( + [ + 0.057, + 0.063, + 0.11, + 0.172, + 0.181, + 0.219, + 0.274, + 0.277, + 0.279, + 0.282, + 0.287, + 0.29, + 0.301, + 0.307, + 0.32, + 0.323, + 0.329, + 0.331, + 0.339, + 0.342, + 0.345, + 0.348, + 0.35, + 0.359, + 0.361, + 0.367, + 0.372, + 0.375, + 0.378, + 0.381, + 0.383, + 0.392, + 0.394, + 0.397, + 0.4, + 0.402, + 0.408, + 0.411, + 0.413, + 0.416, + 0.419, + 0.422, + 0.433, + 0.438, + 0.444, + 0.446, + 0.452, + 0.457, + 0.463, + 0.465, + 0.474, + 0.479, + 0.482, + 0.496, + 0.517, + 0.52, + 0.553, + 0.564, + 0.567, + 0.616, + 0.619, + 0.63, + 0.701, + 0.742, + 0.764, + 0.857, + 0.914, + 0.923, + 0.958, + 0.964, + 0.991, + 1.202, + 1.205, + 1.993, + 2.776, + ] + ) + true_y = np.array( + [ + [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0.0505050505050505, + 0.0606060606060606, + 0.0707070707070707, + 0.0909090909090909, + 0.111111111111111, + 0.121212121212121, + 0.131313131313131, + 0.151515151515152, + 0.161616161616162, + 0.181818181818182, + 0.202020202020202, + 0.212121212121212, + 0.222222222222222, + 0.242424242424243, + 0.252525252525253, + 0.272727272727273, + 0.282828282828283, + 0.292929292929293, + 0.323232323232323, + 0.333333333333334, + 0.343434343434344, + 0.353535353535354, + 0.373737373737374, + 0.383838383838384, + 0.393939393939394, + 0.404040404040404, + 0.414141414141414, + 0.424242424242425, + 0.444444444444445, + 0.474747474747475, + 0.484848484848485, + 0.494949494949495, + 0.494949494949495, + 0.505050505050505, + 0.525252525252526, + 0.535353535353536, + 0.535353535353536, + 0.545454545454546, + 0.555555555555556, + 0.565656565656566, + 0.575757575757576, + 0.585858585858586, + 0.595959595959596, + 0.626262626262626, + 0.656565656565657, + 0.666666666666667, + 0.686868686868687, + 0.696969696969697, + 0.707070707070707, + 0.717171717171718, + 0.727272727272728, + 0.737373737373738, + 0.747474747474748, + 0.757575757575758, + 0.767676767676768, + 0.777777777777779, + 0.787878787878789, + 0.797979797979799, + 0.808080808080809, + 0.818181818181819, + 0.828282828282829, + 0.838383838383839, + 0.848484848484849, + 0.858585858585859, + 0.858585858585859, + 0.858585858585859, + 0.878787878787879, + 0.878787878787879, + ], + [ + 0, + 0, + 0, + 0.0101010101010101, + 0.0101010101010101, + 0.0101010101010101, + 0.0202020202020202, + 0.0202020202020202, + 0.0202020202020202, + 0.0202020202020202, + 0.0202020202020202, + 0.0202020202020202, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0404040404040404, + 0.0404040404040404, + 0.0404040404040404, + 0.0404040404040404, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + ], + [ + 0, + 0.0101010101010101, + 0.0202020202020202, + 0.0202020202020202, + 0.0303030303030303, + 0.0404040404040404, + 0.0404040404040404, + 0.0404040404040404, + 0.0404040404040404, + 0.0404040404040404, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + ], + ] + ) + + cix = np.array( + [ + [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, 8.76630585e-04, np.nan, np.nan, 4.98093393e-02], + [np.nan, np.nan, 3.86470442e-03, np.nan, np.nan, 6.45435439e-02], + [np.nan, 8.76353733e-04, 3.86470442e-03, np.nan, 4.98160434e-02, 6.45435439e-02], + [np.nan, 8.76353733e-04, 8.09297347e-03, np.nan, 4.98160434e-02, 7.90139451e-02], + [np.nan, 8.76353733e-04, 1.31071435e-02, np.nan, 4.98160434e-02, 9.29739191e-02], + [np.nan, 3.86346901e-03, 1.31071435e-02, np.nan, 6.45537226e-02, 9.29739191e-02], + [1.86735626e-02, 3.86346901e-03, 1.31071435e-02, 1.06521413e-01, 6.45537226e-02, 9.29739191e-02], + [2.46648637e-02, 3.86346901e-03, 1.31071435e-02, 1.19714550e-01, 6.45537226e-02, 9.29739191e-02], + [3.09922063e-02, 3.86346901e-03, 1.31071435e-02, 1.32625202e-01, 6.45537226e-02, 9.29739191e-02], + [4.44345647e-02, 3.86346901e-03, 1.86753126e-02, 1.57768941e-01, 6.45537226e-02, 1.06515798e-01], + [5.86926671e-02, 3.86346901e-03, 1.86753126e-02, 1.82197926e-01, 6.45537226e-02, 1.06515798e-01], + [6.60668761e-02, 8.08749124e-03, 1.86753126e-02, 1.94196068e-01, 7.90421599e-02, 1.06515798e-01], + [7.35824051e-02, 8.08749124e-03, 1.86753126e-02, 2.06068244e-01, 7.90421599e-02, 1.06515798e-01], + [8.89863978e-02, 8.08749124e-03, 1.86753126e-02, 2.29479009e-01, 7.90421599e-02, 1.06515798e-01], + [9.68543051e-02, 8.08749124e-03, 1.86753126e-02, 2.41035520e-01, 7.90421599e-02, 1.06515798e-01], + [1.12882999e-01, 8.08749124e-03, 1.86753126e-02, 2.63883900e-01, 7.90421599e-02, 1.06515798e-01], + [1.29260771e-01, 8.08749124e-03, 1.86753126e-02, 2.86415701e-01, 7.90421599e-02, 1.06515798e-01], + [1.37567828e-01, 8.08749124e-03, 1.86753126e-02, 2.97574049e-01, 7.90421599e-02, 1.06515798e-01], + [1.45948291e-01, 8.08749124e-03, 1.86753126e-02, 3.08665304e-01, 7.90421599e-02, 1.06515798e-01], + [1.62916013e-01, 8.08749124e-03, 1.86753126e-02, 3.30658087e-01, 7.90421599e-02, 1.06515798e-01], + [1.71496808e-01, 8.08749124e-03, 1.86753126e-02, 3.41565431e-01, 7.90421599e-02, 1.06515798e-01], + [1.88840363e-01, 8.08749124e-03, 1.86753126e-02, 3.63212079e-01, 7.90421599e-02, 1.06515798e-01], + [1.97598250e-01, 8.08749124e-03, 1.86753126e-02, 3.73955784e-01, 7.90421599e-02, 1.06515798e-01], + [2.06411125e-01, 8.08749124e-03, 1.86753126e-02, 3.84648459e-01, 7.90421599e-02, 1.06515798e-01], + [2.33164884e-01, 8.08749124e-03, 1.86753126e-02, 4.16432824e-01, 7.90421599e-02, 1.06515798e-01], + [2.42181998e-01, 8.08749124e-03, 1.86753126e-02, 4.26935045e-01, 7.90421599e-02, 1.06515798e-01], + [2.51246943e-01, 8.08749124e-03, 1.86753126e-02, 4.37392426e-01, 7.90421599e-02, 1.06515798e-01], + [2.60358645e-01, 8.08749124e-03, 1.86753126e-02, 4.47805885e-01, 7.90421599e-02, 1.06515798e-01], + [2.78719164e-01, 8.08749124e-03, 1.86753126e-02, 4.68503631e-01, 7.90421599e-02, 1.06515798e-01], + [2.87965610e-01, 8.08749124e-03, 1.86753126e-02, 4.78790029e-01, 7.90421599e-02, 1.06515798e-01], + [2.97255414e-01, 8.08749124e-03, 1.86753126e-02, 4.89035373e-01, 7.90421599e-02, 1.06515798e-01], + [3.06587917e-01, 8.08749124e-03, 1.86753126e-02, 4.99240206e-01, 7.90421599e-02, 1.06515798e-01], + [3.15962523e-01, 8.08749124e-03, 1.86753126e-02, 5.09405012e-01, 7.90421599e-02, 1.06515798e-01], + [3.25378702e-01, 8.08749124e-03, 1.86753126e-02, 5.19530217e-01, 7.90421599e-02, 1.06515798e-01], + [3.44334814e-01, 8.08749124e-03, 1.86753126e-02, 5.39662456e-01, 7.90421599e-02, 1.06515798e-01], + [3.73072188e-01, 8.08749124e-03, 1.86753126e-02, 5.69570192e-01, 7.90421599e-02, 1.06515798e-01], + [3.82729571e-01, 8.08749124e-03, 1.86753126e-02, 5.79463982e-01, 7.90421599e-02, 1.06515798e-01], + [3.92426305e-01, 8.08749124e-03, 1.86753126e-02, 5.89319725e-01, 7.90421599e-02, 1.06515798e-01], + [3.92426305e-01, 1.30725842e-02, 1.86753126e-02, 5.89319725e-01, 9.31084543e-02, 1.06515798e-01], + [4.02161016e-01, 1.30725842e-02, 1.86753126e-02, 5.99138501e-01, 9.31084543e-02, 1.06515798e-01], + [4.21749168e-01, 1.30725842e-02, 1.86753126e-02, 6.18660773e-01, 9.31084543e-02, 1.06515798e-01], + [4.31601113e-01, 1.30725842e-02, 1.86753126e-02, 6.28365328e-01, 9.31084543e-02, 1.06515798e-01], + [4.31601113e-01, 1.86073989e-02, 1.86753126e-02, 6.28365328e-01, 1.06734142e-01, 1.06515798e-01], + [4.41490527e-01, 1.86073989e-02, 1.86753126e-02, 6.38032889e-01, 1.06734142e-01, 1.06515798e-01], + [4.51419138e-01, 1.86073989e-02, 1.86753126e-02, 6.47661932e-01, 1.06734142e-01, 1.06515798e-01], + [4.61387078e-01, 1.86073989e-02, 1.86753126e-02, 6.57252226e-01, 1.06734142e-01, 1.06515798e-01], + [4.71394524e-01, 1.86073989e-02, 1.86753126e-02, 6.66803488e-01, 1.06734142e-01, 1.06515798e-01], + [4.81441701e-01, 1.86073989e-02, 1.86753126e-02, 6.76315386e-01, 1.06734142e-01, 1.06515798e-01], + [4.91528876e-01, 1.86073989e-02, 1.86753126e-02, 6.85787537e-01, 1.06734142e-01, 1.06515798e-01], + [5.22042223e-01, 1.86073989e-02, 1.86753126e-02, 7.13954859e-01, 1.06734142e-01, 1.06515798e-01], + [5.52931085e-01, 1.86073989e-02, 1.86753126e-02, 7.41744842e-01, 1.06734142e-01, 1.06515798e-01], + [5.63308855e-01, 1.86073989e-02, 1.86753126e-02, 7.50923965e-01, 1.06734142e-01, 1.06515798e-01], + [5.84202761e-01, 1.86073989e-02, 1.86753126e-02, 7.69142314e-01, 1.06734142e-01, 1.06515798e-01], + [5.94714799e-01, 1.86073989e-02, 1.86753126e-02, 7.78183131e-01, 1.06734142e-01, 1.06515798e-01], + [6.05273508e-01, 1.86073989e-02, 1.86753126e-02, 7.87175387e-01, 1.06734142e-01, 1.06515798e-01], + [6.15879903e-01, 1.86073989e-02, 1.86753126e-02, 7.96117716e-01, 1.06734142e-01, 1.06515798e-01], + [6.26535089e-01, 1.86073989e-02, 1.86753126e-02, 8.05008614e-01, 1.06734142e-01, 1.06515798e-01], + [6.37240259e-01, 1.86073989e-02, 1.86753126e-02, 8.13846424e-01, 1.06734142e-01, 1.06515798e-01], + [6.47996704e-01, 1.86073989e-02, 1.86753126e-02, 8.22629319e-01, 1.06734142e-01, 1.06515798e-01], + [6.58805813e-01, 1.86073989e-02, 1.86753126e-02, 8.31355277e-01, 1.06734142e-01, 1.06515798e-01], + [6.69669074e-01, 1.86073989e-02, 1.86753126e-02, 8.40022063e-01, 1.06734142e-01, 1.06515798e-01], + [6.80588073e-01, 1.86073989e-02, 1.86753126e-02, 8.48627195e-01, 1.06734142e-01, 1.06515798e-01], + [6.91564481e-01, 1.86073989e-02, 1.86753126e-02, 8.57167918e-01, 1.06734142e-01, 1.06515798e-01], + [7.02600038e-01, 1.86073989e-02, 1.86753126e-02, 8.65641160e-01, 1.06734142e-01, 1.06515798e-01], + [7.13696504e-01, 1.86073989e-02, 1.86753126e-02, 8.74043497e-01, 1.06734142e-01, 1.06515798e-01], + [7.24855578e-01, 1.86073989e-02, 1.86753126e-02, 8.82371106e-01, 1.06734142e-01, 1.06515798e-01], + [7.36078733e-01, 1.86073989e-02, 1.86753126e-02, 8.90619734e-01, 1.06734142e-01, 1.06515798e-01], + [7.47366885e-01, 1.86073989e-02, 1.86753126e-02, 8.98784683e-01, 1.06734142e-01, 1.06515798e-01], + [7.58719674e-01, 1.86073989e-02, 1.86753126e-02, 9.06860885e-01, 1.06734142e-01, 1.06515798e-01], + [7.70133729e-01, 1.86073989e-02, 1.86753126e-02, 9.14843249e-01, 1.06734142e-01, 1.06515798e-01], + [7.70133729e-01, 1.86073989e-02, 1.86753126e-02, 9.14843249e-01, 1.06734142e-01, 1.06515798e-01], + [7.70133729e-01, 1.86073989e-02, 1.86753126e-02, 9.14843249e-01, 1.06734142e-01, 1.06515798e-01], + [7.81200203e-01, 1.86073989e-02, 1.86753126e-02, 9.34620546e-01, 1.06734142e-01, 1.06515798e-01], + [7.81200203e-01, 1.86073989e-02, 1.86753126e-02, 9.34620546e-01, 1.06734142e-01, 1.06515798e-01], + ] + ).T + + true_ci = np.empty(shape=(2, 3, 75), dtype=cix.dtype) + true_ci[0], true_ci[1] = cix[0:3], cix[3:] + + return event, time, true_x, true_y, true_ci + + class TestCumIncCompetingRisks: @staticmethod @pytest.mark.parametrize("event, time, true_x, true_y", SimpleDataBMTCases().get_cases()) @@ -6540,6 +6949,37 @@ def test_truncated(event, time, true_x, true_y): assert_array_equal(x, true_x) _x_km, true_km = kaplan_meier_estimator(event > 0, time, time_min=time_min) - assert_array_almost_equal(y[0], 1.0 - true_km) + assert_array_almost_equal(y[0], 1 - true_km) + assert_array_almost_equal(y[1:], true_y) + assert_array_almost_equal(y[0], y[1:].sum(axis=0)) + + @staticmethod + @pytest.mark.parametrize("event, time, true_x, true_y, true_ci", CGVHD_DataSets().get_cases()) + def test_ci(event, time, true_x, true_y, true_ci): + x, y, ci = cumulative_incidence_competing_risks(event, time, conf_type="log-log", var_type="Aalen") + + assert_array_equal(x, true_x) + _x_km, true_km, true_ci_0 = kaplan_meier_estimator(event > 0, time, conf_type="log-log") + assert_array_almost_equal(y[0], 1 - true_km) assert_array_almost_equal(y[1:], true_y) assert_array_almost_equal(y[0], y[1:].sum(axis=0)) + + assert_allclose(ci[:, 0, :], 1 - true_ci_0, rtol=5.0e-4) + + ci_1 = ci[:, 1:, :] + nan_mask = np.isnan(true_ci) + assert_allclose(np.ma.masked_array(ci_1, nan_mask), np.ma.masked_array(true_ci, nan_mask), rtol=5.0e-4) + assert_array_equal(np.ma.masked_array(ci_1, ~nan_mask), np.ma.masked_array(np.zeros_like(ci_1), ~nan_mask)) + + @staticmethod + @pytest.mark.parametrize("event, time, true_x, true_y, true_ci", CGVHD_DataSets().get_cases()) + def test_scrucca_ci(event, time, true_x, true_y, true_ci): + x, y, ci_full = cumulative_incidence_competing_risks(event, time, conf_type="log-log", var_type="Dinse_Approx") + + t = 0.5 + it = np.argmax(t < x) - 1 + ci = ci_full[:, 1, it] + true_ci = np.array([0.5855, 0.7683]) + + assert x[it] <= t < x[it + 1] + assert_allclose(ci, true_ci, rtol=5.0e-4) From da316fcf354c48c8424fc7bebcd6934dd2d86b16 Mon Sep 17 00:00:00 2001 From: valverde Date: Fri, 6 Dec 2024 17:33:49 +0900 Subject: [PATCH 02/11] Remove MS-DOS new line characters from data files. --- sksurv/datasets/data/cgvhd.txt | 202 ++++++++++++++++----------------- 1 file changed, 101 insertions(+), 101 deletions(-) diff --git a/sksurv/datasets/data/cgvhd.txt b/sksurv/datasets/data/cgvhd.txt index 49e06ce9..f5e90e32 100644 --- a/sksurv/datasets/data/cgvhd.txt +++ b/sksurv/datasets/data/cgvhd.txt @@ -1,101 +1,101 @@ -"dx","tx","extent","agvhdgd","age","survtime","reltime","agvhtime","cgvhtime","stat","rcens","agvh","cgvh","stnum" -"CML","PB","L",1,36,4.895,4.895,0.099,0.520,0,0,1,1, 1 -"AML","PB","L",3,57,3.474,0.753,0.101,0.408,1,1,1,1, 2 -"CML","PB","L",0,48,4.950,4.950,4.950,0.348,0,0,0,1, 3 -"AML","PB","L",2,52,4.643,4.643,0.057,0.482,0,0,1,1, 4 -"AML","PB","L",3,45,4.066,4.066,0.137,0.378,0,0,1,1, 5 -"AML","PB","L",3,47,1.558,0.416,0.055,1.558,1,1,1,0, 6 -"CML","PB","L",1,40,4.512,4.512,0.090,0.381,0,0,1,1, 7 -"AML","PB","L",3,38,4.041,4.041,0.082,0.914,0,0,1,1, 8 -"AML","PB","L",2,41,4.164,4.164,0.055,0.923,0,0,1,1, 9 -"CML","PB","L",0,50,4.011,4.011,4.011,0.397,0,0,0,1, 10 -"CML","PB","L",1,56,3.945,3.945,0.047,0.479,0,0,1,1, 11 -"CML","PB","L",2,56,4.361,4.361,0.079,0.991,0,0,1,1, 12 -"AML","PB","L",1,54,0.841,0.654,0.077,0.474,1,1,1,1, 13 -"CML","PB","L",3,25,2.951,2.951,0.164,0.339,0,0,1,1, 14 -"CML","PB","L",4,40,0.586,0.586,0.055,0.277,1,0,1,1, 15 -"CML","PB","L",0,41,3.559,3.559,3.559,0.367,0,0,0,1, 16 -"CML","PB","L",2,57,3.422,3.422,0.131,0.742,0,0,1,1, 17 -"CML","PB","L",3,62,0.408,0.408,0.408,0.408,0,0,1,1, 18 -"CML","PB","L",1,29,3.428,3.428,0.090,0.958,0,0,1,1, 19 -"AML","PB","L",1,44,0.063,0.063,0.014,0.063,1,0,1,0, 20 -"CML","PB","L",2,40,1.572,1.572,0.090,0.282,1,0,1,1, 21 -"CML","PB","L",1,54,1.013,1.013,0.093,0.413,1,0,1,1, 22 -"AML","PB","L",2,37,3.023,3.023,0.074,0.394,0,0,1,1, 23 -"AML","PB","L",1,58,2.979,2.979,0.079,0.342,0,0,1,1, 24 -"CML","PB","L",3,39,2.817,2.817,0.049,0.367,0,0,1,1, 25 -"CML","PB","L",2,31,2.804,2.804,0.137,0.277,0,0,1,1, 26 -"CML","PB","L",2,45,2.609,2.609,0.252,0.367,0,0,1,1, 27 -"AML","PB","L",0,48,2.508,2.508,2.508,0.331,0,0,0,1, 28 -"CML","PB","L",0,53,0.665,0.665,0.665,0.320,1,0,0,1, 29 -"CML","PB","L",0,29,2.497,2.497,2.497,0.329,0,0,0,1, 30 -"CML","PB","L",0,27,1.799,1.799,1.799,0.444,1,0,0,1, 31 -"AML","PB","L",3,45,0.471,0.438,0.071,0.471,1,1,1,0, 32 -"CML","PB","L",1,39,2.031,2.031,0.112,0.964,0,0,1,1, 33 -"CML","PB","L",3,49,2.073,2.073,0.063,0.564,0,0,1,1, 34 -"AML","PB","L",1,37,0.999,0.750,0.274,0.402,1,1,1,1, 35 -"AML","PB","L",3,53,0.427,0.427,0.055,0.277,1,0,1,1, 36 -"CML","PB","L",1,48,1.766,1.766,0.216,0.400,0,0,1,1, 37 -"AML","PB","L",1,59,1.555,1.555,0.178,0.446,0,0,1,1, 38 -"CML","PB","L",2,33,1.670,1.670,0.110,0.474,0,0,1,1, 39 -"CML","PB","L",0,38,1.607,1.607,1.607,0.329,0,0,0,1, 40 -"CML","PB","L",4,37,1.511,1.511,0.055,0.323,0,0,1,1, 41 -"AML","PB","L",3,41,1.287,1.287,0.049,0.392,0,0,1,1, 42 -"AML","PB","E",1,64,1.227,1.227,0.230,0.496,0,0,1,1, 43 -"CML","PB","L",3,32,1.300,1.300,0.063,0.630,0,0,1,1, 44 -"CML","PB","L",0,41,1.270,1.270,1.270,0.383,0,0,0,1, 45 -"AML","PB","E",1,56,1.205,1.205,0.074,1.205,0,0,1,0, 46 -"CML","PB","L",1,50,1.147,1.147,0.131,0.361,0,0,1,1, 47 -"CML","PB","L",3,37,1.109,1.109,0.055,0.277,0,0,1,1, 48 -"CML","PB","L",0,27,0.994,0.994,0.994,0.287,0,0,0,1, 49 -"CML","BM","L",3,45,4.572,4.572,0.066,0.619,0,0,1,1, 50 -"AML","BM","L",3,45,4.616,4.616,0.101,0.452,0,0,1,1, 51 -"AML","BM","L",2,42,4.000,4.000,0.027,0.290,0,0,1,1, 52 -"CML","BM","L",0,22,4.238,4.238,4.238,0.479,0,0,0,1, 53 -"AML","BM","L",4,47,0.110,0.110,0.074,0.110,1,0,1,0, 54 -"AML","BM","L",2,48,4.030,4.030,0.101,0.857,0,0,1,1, 55 -"AML","BM","L",2,49,3.124,2.527,0.115,1.993,1,1,1,1, 56 -"CML","BM","L",2,38,0.515,0.515,0.079,0.463,1,0,1,1, 57 -"CML","BM","L",1,39,4.222,3.149,0.085,0.496,0,1,1,1, 58 -"CML","BM","L",3,41,4.027,4.027,0.104,0.422,0,0,1,1, 59 -"CML","BM","L",2,46,1.969,1.969,0.038,0.307,1,0,1,1, 60 -"AML","BM","L",0,24,3.792,3.792,3.792,0.701,0,0,0,1, 61 -"AML","BM","L",3,32,0.427,0.427,0.041,0.279,1,0,1,1, 62 -"CML","BM","L",0,36,3.340,3.340,3.340,0.419,0,0,0,1, 63 -"CML","BM","L",1,53,3.504,0.720,0.112,0.616,0,1,1,1, 64 -"CML","BM","L",0,52,3.685,3.685,3.685,0.331,0,0,0,1, 65 -"CML","BM","L",1,59,0.181,0.181,0.049,0.181,1,0,1,0, 66 -"CML","BM","L",3,42,0.736,0.736,0.090,0.567,1,0,1,1, 67 -"CML","BM","L",1,65,0.287,0.287,0.052,0.287,1,0,1,0, 68 -"CML","BM","E",0,60,0.057,0.057,0.057,0.057,0,0,0,0, 69 -"CML","BM","L",2,61,3.107,3.107,0.088,0.764,0,0,1,1, 70 -"CML","BM","L",1,55,3.088,3.088,0.110,0.381,0,0,1,1, 71 -"AML","BM","E",0,48,0.446,0.274,0.446,0.446,1,1,0,0, 72 -"AML","BM","E",0,49,2.776,2.776,2.776,2.776,0,0,0,0, 73 -"CML","BM","L",0,36,0.693,0.172,0.693,0.635,1,1,0,1, 74 -"AML","BM","L",1,48,2.010,2.010,0.077,0.553,0,0,1,1, 75 -"CML","BM","L",0,47,2.374,2.374,2.374,0.287,0,0,0,1, 76 -"AML","BM","L",3,43,1.079,1.079,0.088,0.345,1,0,1,1, 77 -"CML","BM","L",0,56,2.604,2.604,2.604,0.375,0,0,0,1, 78 -"CML","BM","L",1,56,2.478,2.478,0.170,0.517,0,0,1,1, 79 -"CML","BM","L",0,36,2.338,2.338,2.338,0.457,0,0,0,1, 80 -"CML","BM","L",2,52,2.300,2.300,0.049,0.345,0,0,1,1, 81 -"CML","BM","E",1,44,0.219,0.219,0.145,0.219,1,0,1,0, 82 -"AML","BM","L",3,32,2.127,2.127,0.118,0.422,0,0,1,1, 83 -"AML","BM","L",1,44,2.034,2.034,0.096,0.479,0,0,1,1, 84 -"CML","BM","L",0,45,2.034,2.034,2.034,0.290,0,0,0,1, 85 -"AML","BM","L",3,48,2.007,2.007,0.088,0.350,0,0,1,1, 86 -"CML","BM","L",0,48,1.183,1.183,1.183,0.372,0,0,0,1, 87 -"AML","BM","L",3,42,0.375,0.375,0.096,0.277,1,0,1,1, 88 -"AML","BM","E",2,24,0.353,0.301,0.096,0.353,1,1,1,0, 89 -"CML","BM","L",2,26,1.566,1.566,0.137,0.474,0,0,1,1, 90 -"CML","BM","L",2,34,1.588,1.588,0.129,0.465,0,0,1,1, 91 -"CML","BM","L",0,57,1.243,1.243,1.243,0.433,0,0,0,1, 92 -"CML","BM","L",3,51,1.555,1.555,0.090,0.359,0,0,1,1, 93 -"AML","BM","L",2,54,1.202,1.202,0.192,1.202,0,0,1,0, 94 -"AML","BM","E",0,20,1.251,1.251,1.251,0.408,0,0,0,1, 95 -"AML","BM","L",2,39,1.114,1.114,0.074,0.402,0,0,1,1, 96 -"AML","BM","L",0,49,1.150,1.150,1.150,0.350,0,0,0,1, 97 -"CML","BM","L",1,42,0.997,0.997,0.142,0.411,0,0,1,1, 98 -"CML","BM","L",0,44,1.057,1.057,1.057,0.301,0,0,0,1, 99 -"CML","BM","L",1,56,1.125,1.125,0.129,0.320,0,0,1,1,100 +"dx","tx","extent","agvhdgd","age","survtime","reltime","agvhtime","cgvhtime","stat","rcens","agvh","cgvh","stnum" +"CML","PB","L",1,36,4.895,4.895,0.099,0.520,0,0,1,1, 1 +"AML","PB","L",3,57,3.474,0.753,0.101,0.408,1,1,1,1, 2 +"CML","PB","L",0,48,4.950,4.950,4.950,0.348,0,0,0,1, 3 +"AML","PB","L",2,52,4.643,4.643,0.057,0.482,0,0,1,1, 4 +"AML","PB","L",3,45,4.066,4.066,0.137,0.378,0,0,1,1, 5 +"AML","PB","L",3,47,1.558,0.416,0.055,1.558,1,1,1,0, 6 +"CML","PB","L",1,40,4.512,4.512,0.090,0.381,0,0,1,1, 7 +"AML","PB","L",3,38,4.041,4.041,0.082,0.914,0,0,1,1, 8 +"AML","PB","L",2,41,4.164,4.164,0.055,0.923,0,0,1,1, 9 +"CML","PB","L",0,50,4.011,4.011,4.011,0.397,0,0,0,1, 10 +"CML","PB","L",1,56,3.945,3.945,0.047,0.479,0,0,1,1, 11 +"CML","PB","L",2,56,4.361,4.361,0.079,0.991,0,0,1,1, 12 +"AML","PB","L",1,54,0.841,0.654,0.077,0.474,1,1,1,1, 13 +"CML","PB","L",3,25,2.951,2.951,0.164,0.339,0,0,1,1, 14 +"CML","PB","L",4,40,0.586,0.586,0.055,0.277,1,0,1,1, 15 +"CML","PB","L",0,41,3.559,3.559,3.559,0.367,0,0,0,1, 16 +"CML","PB","L",2,57,3.422,3.422,0.131,0.742,0,0,1,1, 17 +"CML","PB","L",3,62,0.408,0.408,0.408,0.408,0,0,1,1, 18 +"CML","PB","L",1,29,3.428,3.428,0.090,0.958,0,0,1,1, 19 +"AML","PB","L",1,44,0.063,0.063,0.014,0.063,1,0,1,0, 20 +"CML","PB","L",2,40,1.572,1.572,0.090,0.282,1,0,1,1, 21 +"CML","PB","L",1,54,1.013,1.013,0.093,0.413,1,0,1,1, 22 +"AML","PB","L",2,37,3.023,3.023,0.074,0.394,0,0,1,1, 23 +"AML","PB","L",1,58,2.979,2.979,0.079,0.342,0,0,1,1, 24 +"CML","PB","L",3,39,2.817,2.817,0.049,0.367,0,0,1,1, 25 +"CML","PB","L",2,31,2.804,2.804,0.137,0.277,0,0,1,1, 26 +"CML","PB","L",2,45,2.609,2.609,0.252,0.367,0,0,1,1, 27 +"AML","PB","L",0,48,2.508,2.508,2.508,0.331,0,0,0,1, 28 +"CML","PB","L",0,53,0.665,0.665,0.665,0.320,1,0,0,1, 29 +"CML","PB","L",0,29,2.497,2.497,2.497,0.329,0,0,0,1, 30 +"CML","PB","L",0,27,1.799,1.799,1.799,0.444,1,0,0,1, 31 +"AML","PB","L",3,45,0.471,0.438,0.071,0.471,1,1,1,0, 32 +"CML","PB","L",1,39,2.031,2.031,0.112,0.964,0,0,1,1, 33 +"CML","PB","L",3,49,2.073,2.073,0.063,0.564,0,0,1,1, 34 +"AML","PB","L",1,37,0.999,0.750,0.274,0.402,1,1,1,1, 35 +"AML","PB","L",3,53,0.427,0.427,0.055,0.277,1,0,1,1, 36 +"CML","PB","L",1,48,1.766,1.766,0.216,0.400,0,0,1,1, 37 +"AML","PB","L",1,59,1.555,1.555,0.178,0.446,0,0,1,1, 38 +"CML","PB","L",2,33,1.670,1.670,0.110,0.474,0,0,1,1, 39 +"CML","PB","L",0,38,1.607,1.607,1.607,0.329,0,0,0,1, 40 +"CML","PB","L",4,37,1.511,1.511,0.055,0.323,0,0,1,1, 41 +"AML","PB","L",3,41,1.287,1.287,0.049,0.392,0,0,1,1, 42 +"AML","PB","E",1,64,1.227,1.227,0.230,0.496,0,0,1,1, 43 +"CML","PB","L",3,32,1.300,1.300,0.063,0.630,0,0,1,1, 44 +"CML","PB","L",0,41,1.270,1.270,1.270,0.383,0,0,0,1, 45 +"AML","PB","E",1,56,1.205,1.205,0.074,1.205,0,0,1,0, 46 +"CML","PB","L",1,50,1.147,1.147,0.131,0.361,0,0,1,1, 47 +"CML","PB","L",3,37,1.109,1.109,0.055,0.277,0,0,1,1, 48 +"CML","PB","L",0,27,0.994,0.994,0.994,0.287,0,0,0,1, 49 +"CML","BM","L",3,45,4.572,4.572,0.066,0.619,0,0,1,1, 50 +"AML","BM","L",3,45,4.616,4.616,0.101,0.452,0,0,1,1, 51 +"AML","BM","L",2,42,4.000,4.000,0.027,0.290,0,0,1,1, 52 +"CML","BM","L",0,22,4.238,4.238,4.238,0.479,0,0,0,1, 53 +"AML","BM","L",4,47,0.110,0.110,0.074,0.110,1,0,1,0, 54 +"AML","BM","L",2,48,4.030,4.030,0.101,0.857,0,0,1,1, 55 +"AML","BM","L",2,49,3.124,2.527,0.115,1.993,1,1,1,1, 56 +"CML","BM","L",2,38,0.515,0.515,0.079,0.463,1,0,1,1, 57 +"CML","BM","L",1,39,4.222,3.149,0.085,0.496,0,1,1,1, 58 +"CML","BM","L",3,41,4.027,4.027,0.104,0.422,0,0,1,1, 59 +"CML","BM","L",2,46,1.969,1.969,0.038,0.307,1,0,1,1, 60 +"AML","BM","L",0,24,3.792,3.792,3.792,0.701,0,0,0,1, 61 +"AML","BM","L",3,32,0.427,0.427,0.041,0.279,1,0,1,1, 62 +"CML","BM","L",0,36,3.340,3.340,3.340,0.419,0,0,0,1, 63 +"CML","BM","L",1,53,3.504,0.720,0.112,0.616,0,1,1,1, 64 +"CML","BM","L",0,52,3.685,3.685,3.685,0.331,0,0,0,1, 65 +"CML","BM","L",1,59,0.181,0.181,0.049,0.181,1,0,1,0, 66 +"CML","BM","L",3,42,0.736,0.736,0.090,0.567,1,0,1,1, 67 +"CML","BM","L",1,65,0.287,0.287,0.052,0.287,1,0,1,0, 68 +"CML","BM","E",0,60,0.057,0.057,0.057,0.057,0,0,0,0, 69 +"CML","BM","L",2,61,3.107,3.107,0.088,0.764,0,0,1,1, 70 +"CML","BM","L",1,55,3.088,3.088,0.110,0.381,0,0,1,1, 71 +"AML","BM","E",0,48,0.446,0.274,0.446,0.446,1,1,0,0, 72 +"AML","BM","E",0,49,2.776,2.776,2.776,2.776,0,0,0,0, 73 +"CML","BM","L",0,36,0.693,0.172,0.693,0.635,1,1,0,1, 74 +"AML","BM","L",1,48,2.010,2.010,0.077,0.553,0,0,1,1, 75 +"CML","BM","L",0,47,2.374,2.374,2.374,0.287,0,0,0,1, 76 +"AML","BM","L",3,43,1.079,1.079,0.088,0.345,1,0,1,1, 77 +"CML","BM","L",0,56,2.604,2.604,2.604,0.375,0,0,0,1, 78 +"CML","BM","L",1,56,2.478,2.478,0.170,0.517,0,0,1,1, 79 +"CML","BM","L",0,36,2.338,2.338,2.338,0.457,0,0,0,1, 80 +"CML","BM","L",2,52,2.300,2.300,0.049,0.345,0,0,1,1, 81 +"CML","BM","E",1,44,0.219,0.219,0.145,0.219,1,0,1,0, 82 +"AML","BM","L",3,32,2.127,2.127,0.118,0.422,0,0,1,1, 83 +"AML","BM","L",1,44,2.034,2.034,0.096,0.479,0,0,1,1, 84 +"CML","BM","L",0,45,2.034,2.034,2.034,0.290,0,0,0,1, 85 +"AML","BM","L",3,48,2.007,2.007,0.088,0.350,0,0,1,1, 86 +"CML","BM","L",0,48,1.183,1.183,1.183,0.372,0,0,0,1, 87 +"AML","BM","L",3,42,0.375,0.375,0.096,0.277,1,0,1,1, 88 +"AML","BM","E",2,24,0.353,0.301,0.096,0.353,1,1,1,0, 89 +"CML","BM","L",2,26,1.566,1.566,0.137,0.474,0,0,1,1, 90 +"CML","BM","L",2,34,1.588,1.588,0.129,0.465,0,0,1,1, 91 +"CML","BM","L",0,57,1.243,1.243,1.243,0.433,0,0,0,1, 92 +"CML","BM","L",3,51,1.555,1.555,0.090,0.359,0,0,1,1, 93 +"AML","BM","L",2,54,1.202,1.202,0.192,1.202,0,0,1,0, 94 +"AML","BM","E",0,20,1.251,1.251,1.251,0.408,0,0,0,1, 95 +"AML","BM","L",2,39,1.114,1.114,0.074,0.402,0,0,1,1, 96 +"AML","BM","L",0,49,1.150,1.150,1.150,0.350,0,0,0,1, 97 +"CML","BM","L",1,42,0.997,0.997,0.142,0.411,0,0,1,1, 98 +"CML","BM","L",0,44,1.057,1.057,1.057,0.301,0,0,0,1, 99 +"CML","BM","L",1,56,1.125,1.125,0.129,0.320,0,0,1,1,100 From 5ef4d0abd529e14c905a43dd714faa0c15a4fa30 Mon Sep 17 00:00:00 2001 From: valverde Date: Fri, 6 Dec 2024 21:47:27 +0900 Subject: [PATCH 03/11] Move txt data file to arff format. --- sksurv/datasets/base.py | 21 +++--- sksurv/datasets/data/cgvhd.arff | 118 ++++++++++++++++++++++++++++++++ sksurv/datasets/data/cgvhd.txt | 101 --------------------------- sksurv/nonparametric.py | 14 ++-- 4 files changed, 134 insertions(+), 120 deletions(-) create mode 100644 sksurv/datasets/data/cgvhd.arff delete mode 100644 sksurv/datasets/data/cgvhd.txt diff --git a/sksurv/datasets/base.py b/sksurv/datasets/base.py index 183711a9..43970590 100644 --- a/sksurv/datasets/base.py +++ b/sksurv/datasets/base.py @@ -544,17 +544,14 @@ def load_cgvhd(): .. [2] https://drive.google.com/file/d/1FPM264pE\_-F8DB7lvFeLB1yQ3HDo7c-i/view https://sites.google.com/view/melaniapintiliemscstatistics/home/statistics """ - full_path = _get_data_path("cgvhd.txt") - - df = pd.read_csv(full_path) - df["ftime"] = df[["survtime", "reltime", "cgvhtime"]].min(axis=1) - df["status"] = ( - ((df["ftime"] == df["cgvhtime"]) & (df["cgvh"] == 1)).astype(int) - + 2 * ((df["ftime"] == df["reltime"]) & (df["rcens"] == 1)).astype(int) - + 3 * ((df["ftime"] == df["survtime"]) & (df["stat"] == 1)).astype(int) + full_path = _get_data_path("cgvhd.arff") + data = loadarff(full_path) + data["ftime"] = data[["survtime", "reltime", "cgvhtime"]].min(axis=1) + data["status"] = ( + ((data["ftime"] == data["cgvhtime"]) & (data["cgvh"] == "1")).astype(int) + + 2 * ((data["ftime"] == data["reltime"]) & (data["rcens"] == "1")).astype(int) + + 3 * ((data["ftime"] == data["survtime"]) & (data["stat"] == "1")).astype(int) ) - df = df[["ftime", "status", "tx"]] - ftime, event = df["ftime"].values, df["status"].values + data = data[["ftime", "status", "dx", "tx", "extent", "age"]] - # return get_x_y(data, attr_labels=["status", "ftime"], competing_risks=True) - return None, {"ftime": ftime, "status": event} + return get_x_y(data, attr_labels=["status", "ftime"], competing_risks=True) diff --git a/sksurv/datasets/data/cgvhd.arff b/sksurv/datasets/data/cgvhd.arff new file mode 100644 index 00000000..634d06ab --- /dev/null +++ b/sksurv/datasets/data/cgvhd.arff @@ -0,0 +1,118 @@ +@RELATION CGVHD + +@ATTRIBUTE dx {CML, AML} +@ATTRIBUTE tx {PB, BM} +@ATTRIBUTE extent {L, E} +@ATTRIBUTE agvhdgd NUMERIC +@ATTRIBUTE age NUMERIC +@ATTRIBUTE survtime NUMERIC +@ATTRIBUTE reltime NUMERIC +@ATTRIBUTE agvhtime NUMERIC +@ATTRIBUTE cgvhtime NUMERIC +@ATTRIBUTE stat {0, 1} +@ATTRIBUTE rcens {0, 1} +@ATTRIBUTE agvh {1, 0} +@ATTRIBUTE cgvh {1, 0} +@ATTRIBUTE stnum NUMERIC + +@DATA +CML,PB,L,1,36,4.895,4.895,0.099,0.52,0,0,1,1,1 +AML,PB,L,3,57,3.474,0.753,0.101,0.408,1,1,1,1,2 +CML,PB,L,0,48,4.95,4.95,4.95,0.348,0,0,0,1,3 +AML,PB,L,2,52,4.643,4.643,0.057,0.482,0,0,1,1,4 +AML,PB,L,3,45,4.066,4.066,0.137,0.378,0,0,1,1,5 +AML,PB,L,3,47,1.558,0.416,0.055,1.558,1,1,1,0,6 +CML,PB,L,1,40,4.512,4.512,0.09,0.381,0,0,1,1,7 +AML,PB,L,3,38,4.041,4.041,0.082,0.914,0,0,1,1,8 +AML,PB,L,2,41,4.164,4.164,0.055,0.923,0,0,1,1,9 +CML,PB,L,0,50,4.011,4.011,4.011,0.397,0,0,0,1,10 +CML,PB,L,1,56,3.945,3.945,0.047,0.479,0,0,1,1,11 +CML,PB,L,2,56,4.361,4.361,0.079,0.991,0,0,1,1,12 +AML,PB,L,1,54,0.841,0.654,0.077,0.474,1,1,1,1,13 +CML,PB,L,3,25,2.951,2.951,0.164,0.339,0,0,1,1,14 +CML,PB,L,4,40,0.586,0.586,0.055,0.277,1,0,1,1,15 +CML,PB,L,0,41,3.559,3.559,3.559,0.367,0,0,0,1,16 +CML,PB,L,2,57,3.422,3.422,0.131,0.742,0,0,1,1,17 +CML,PB,L,3,62,0.408,0.408,0.408,0.408,0,0,1,1,18 +CML,PB,L,1,29,3.428,3.428,0.09,0.958,0,0,1,1,19 +AML,PB,L,1,44,0.063,0.063,0.014,0.063,1,0,1,0,20 +CML,PB,L,2,40,1.572,1.572,0.09,0.282,1,0,1,1,21 +CML,PB,L,1,54,1.013,1.013,0.093,0.413,1,0,1,1,22 +AML,PB,L,2,37,3.023,3.023,0.074,0.394,0,0,1,1,23 +AML,PB,L,1,58,2.979,2.979,0.079,0.342,0,0,1,1,24 +CML,PB,L,3,39,2.817,2.817,0.049,0.367,0,0,1,1,25 +CML,PB,L,2,31,2.804,2.804,0.137,0.277,0,0,1,1,26 +CML,PB,L,2,45,2.609,2.609,0.252,0.367,0,0,1,1,27 +AML,PB,L,0,48,2.508,2.508,2.508,0.331,0,0,0,1,28 +CML,PB,L,0,53,0.665,0.665,0.665,0.32,1,0,0,1,29 +CML,PB,L,0,29,2.497,2.497,2.497,0.329,0,0,0,1,30 +CML,PB,L,0,27,1.799,1.799,1.799,0.444,1,0,0,1,31 +AML,PB,L,3,45,0.471,0.438,0.071,0.471,1,1,1,0,32 +CML,PB,L,1,39,2.031,2.031,0.112,0.964,0,0,1,1,33 +CML,PB,L,3,49,2.073,2.073,0.063,0.564,0,0,1,1,34 +AML,PB,L,1,37,0.999,0.75,0.274,0.402,1,1,1,1,35 +AML,PB,L,3,53,0.427,0.427,0.055,0.277,1,0,1,1,36 +CML,PB,L,1,48,1.766,1.766,0.216,0.4,0,0,1,1,37 +AML,PB,L,1,59,1.555,1.555,0.178,0.446,0,0,1,1,38 +CML,PB,L,2,33,1.67,1.67,0.11,0.474,0,0,1,1,39 +CML,PB,L,0,38,1.607,1.607,1.607,0.329,0,0,0,1,40 +CML,PB,L,4,37,1.511,1.511,0.055,0.323,0,0,1,1,41 +AML,PB,L,3,41,1.287,1.287,0.049,0.392,0,0,1,1,42 +AML,PB,E,1,64,1.227,1.227,0.23,0.496,0,0,1,1,43 +CML,PB,L,3,32,1.3,1.3,0.063,0.63,0,0,1,1,44 +CML,PB,L,0,41,1.27,1.27,1.27,0.383,0,0,0,1,45 +AML,PB,E,1,56,1.205,1.205,0.074,1.205,0,0,1,0,46 +CML,PB,L,1,50,1.147,1.147,0.131,0.361,0,0,1,1,47 +CML,PB,L,3,37,1.109,1.109,0.055,0.277,0,0,1,1,48 +CML,PB,L,0,27,0.994,0.994,0.994,0.287,0,0,0,1,49 +CML,BM,L,3,45,4.572,4.572,0.066,0.619,0,0,1,1,50 +AML,BM,L,3,45,4.616,4.616,0.101,0.452,0,0,1,1,51 +AML,BM,L,2,42,4.0,4.0,0.027,0.29,0,0,1,1,52 +CML,BM,L,0,22,4.238,4.238,4.238,0.479,0,0,0,1,53 +AML,BM,L,4,47,0.11,0.11,0.074,0.11,1,0,1,0,54 +AML,BM,L,2,48,4.03,4.03,0.101,0.857,0,0,1,1,55 +AML,BM,L,2,49,3.124,2.527,0.115,1.993,1,1,1,1,56 +CML,BM,L,2,38,0.515,0.515,0.079,0.463,1,0,1,1,57 +CML,BM,L,1,39,4.222,3.149,0.085,0.496,0,1,1,1,58 +CML,BM,L,3,41,4.027,4.027,0.104,0.422,0,0,1,1,59 +CML,BM,L,2,46,1.969,1.969,0.038,0.307,1,0,1,1,60 +AML,BM,L,0,24,3.792,3.792,3.792,0.701,0,0,0,1,61 +AML,BM,L,3,32,0.427,0.427,0.041,0.279,1,0,1,1,62 +CML,BM,L,0,36,3.34,3.34,3.34,0.419,0,0,0,1,63 +CML,BM,L,1,53,3.504,0.72,0.112,0.616,0,1,1,1,64 +CML,BM,L,0,52,3.685,3.685,3.685,0.331,0,0,0,1,65 +CML,BM,L,1,59,0.181,0.181,0.049,0.181,1,0,1,0,66 +CML,BM,L,3,42,0.736,0.736,0.09,0.567,1,0,1,1,67 +CML,BM,L,1,65,0.287,0.287,0.052,0.287,1,0,1,0,68 +CML,BM,E,0,60,0.057,0.057,0.057,0.057,0,0,0,0,69 +CML,BM,L,2,61,3.107,3.107,0.088,0.764,0,0,1,1,70 +CML,BM,L,1,55,3.088,3.088,0.11,0.381,0,0,1,1,71 +AML,BM,E,0,48,0.446,0.274,0.446,0.446,1,1,0,0,72 +AML,BM,E,0,49,2.776,2.776,2.776,2.776,0,0,0,0,73 +CML,BM,L,0,36,0.693,0.172,0.693,0.635,1,1,0,1,74 +AML,BM,L,1,48,2.01,2.01,0.077,0.553,0,0,1,1,75 +CML,BM,L,0,47,2.374,2.374,2.374,0.287,0,0,0,1,76 +AML,BM,L,3,43,1.079,1.079,0.088,0.345,1,0,1,1,77 +CML,BM,L,0,56,2.604,2.604,2.604,0.375,0,0,0,1,78 +CML,BM,L,1,56,2.478,2.478,0.17,0.517,0,0,1,1,79 +CML,BM,L,0,36,2.338,2.338,2.338,0.457,0,0,0,1,80 +CML,BM,L,2,52,2.3,2.3,0.049,0.345,0,0,1,1,81 +CML,BM,E,1,44,0.219,0.219,0.145,0.219,1,0,1,0,82 +AML,BM,L,3,32,2.127,2.127,0.118,0.422,0,0,1,1,83 +AML,BM,L,1,44,2.034,2.034,0.096,0.479,0,0,1,1,84 +CML,BM,L,0,45,2.034,2.034,2.034,0.29,0,0,0,1,85 +AML,BM,L,3,48,2.007,2.007,0.088,0.35,0,0,1,1,86 +CML,BM,L,0,48,1.183,1.183,1.183,0.372,0,0,0,1,87 +AML,BM,L,3,42,0.375,0.375,0.096,0.277,1,0,1,1,88 +AML,BM,E,2,24,0.353,0.301,0.096,0.353,1,1,1,0,89 +CML,BM,L,2,26,1.566,1.566,0.137,0.474,0,0,1,1,90 +CML,BM,L,2,34,1.588,1.588,0.129,0.465,0,0,1,1,91 +CML,BM,L,0,57,1.243,1.243,1.243,0.433,0,0,0,1,92 +CML,BM,L,3,51,1.555,1.555,0.09,0.359,0,0,1,1,93 +AML,BM,L,2,54,1.202,1.202,0.192,1.202,0,0,1,0,94 +AML,BM,E,0,20,1.251,1.251,1.251,0.408,0,0,0,1,95 +AML,BM,L,2,39,1.114,1.114,0.074,0.402,0,0,1,1,96 +AML,BM,L,0,49,1.15,1.15,1.15,0.35,0,0,0,1,97 +CML,BM,L,1,42,0.997,0.997,0.142,0.411,0,0,1,1,98 +CML,BM,L,0,44,1.057,1.057,1.057,0.301,0,0,0,1,99 +CML,BM,L,1,56,1.125,1.125,0.129,0.32,0,0,1,1,100 diff --git a/sksurv/datasets/data/cgvhd.txt b/sksurv/datasets/data/cgvhd.txt deleted file mode 100644 index f5e90e32..00000000 --- a/sksurv/datasets/data/cgvhd.txt +++ /dev/null @@ -1,101 +0,0 @@ -"dx","tx","extent","agvhdgd","age","survtime","reltime","agvhtime","cgvhtime","stat","rcens","agvh","cgvh","stnum" -"CML","PB","L",1,36,4.895,4.895,0.099,0.520,0,0,1,1, 1 -"AML","PB","L",3,57,3.474,0.753,0.101,0.408,1,1,1,1, 2 -"CML","PB","L",0,48,4.950,4.950,4.950,0.348,0,0,0,1, 3 -"AML","PB","L",2,52,4.643,4.643,0.057,0.482,0,0,1,1, 4 -"AML","PB","L",3,45,4.066,4.066,0.137,0.378,0,0,1,1, 5 -"AML","PB","L",3,47,1.558,0.416,0.055,1.558,1,1,1,0, 6 -"CML","PB","L",1,40,4.512,4.512,0.090,0.381,0,0,1,1, 7 -"AML","PB","L",3,38,4.041,4.041,0.082,0.914,0,0,1,1, 8 -"AML","PB","L",2,41,4.164,4.164,0.055,0.923,0,0,1,1, 9 -"CML","PB","L",0,50,4.011,4.011,4.011,0.397,0,0,0,1, 10 -"CML","PB","L",1,56,3.945,3.945,0.047,0.479,0,0,1,1, 11 -"CML","PB","L",2,56,4.361,4.361,0.079,0.991,0,0,1,1, 12 -"AML","PB","L",1,54,0.841,0.654,0.077,0.474,1,1,1,1, 13 -"CML","PB","L",3,25,2.951,2.951,0.164,0.339,0,0,1,1, 14 -"CML","PB","L",4,40,0.586,0.586,0.055,0.277,1,0,1,1, 15 -"CML","PB","L",0,41,3.559,3.559,3.559,0.367,0,0,0,1, 16 -"CML","PB","L",2,57,3.422,3.422,0.131,0.742,0,0,1,1, 17 -"CML","PB","L",3,62,0.408,0.408,0.408,0.408,0,0,1,1, 18 -"CML","PB","L",1,29,3.428,3.428,0.090,0.958,0,0,1,1, 19 -"AML","PB","L",1,44,0.063,0.063,0.014,0.063,1,0,1,0, 20 -"CML","PB","L",2,40,1.572,1.572,0.090,0.282,1,0,1,1, 21 -"CML","PB","L",1,54,1.013,1.013,0.093,0.413,1,0,1,1, 22 -"AML","PB","L",2,37,3.023,3.023,0.074,0.394,0,0,1,1, 23 -"AML","PB","L",1,58,2.979,2.979,0.079,0.342,0,0,1,1, 24 -"CML","PB","L",3,39,2.817,2.817,0.049,0.367,0,0,1,1, 25 -"CML","PB","L",2,31,2.804,2.804,0.137,0.277,0,0,1,1, 26 -"CML","PB","L",2,45,2.609,2.609,0.252,0.367,0,0,1,1, 27 -"AML","PB","L",0,48,2.508,2.508,2.508,0.331,0,0,0,1, 28 -"CML","PB","L",0,53,0.665,0.665,0.665,0.320,1,0,0,1, 29 -"CML","PB","L",0,29,2.497,2.497,2.497,0.329,0,0,0,1, 30 -"CML","PB","L",0,27,1.799,1.799,1.799,0.444,1,0,0,1, 31 -"AML","PB","L",3,45,0.471,0.438,0.071,0.471,1,1,1,0, 32 -"CML","PB","L",1,39,2.031,2.031,0.112,0.964,0,0,1,1, 33 -"CML","PB","L",3,49,2.073,2.073,0.063,0.564,0,0,1,1, 34 -"AML","PB","L",1,37,0.999,0.750,0.274,0.402,1,1,1,1, 35 -"AML","PB","L",3,53,0.427,0.427,0.055,0.277,1,0,1,1, 36 -"CML","PB","L",1,48,1.766,1.766,0.216,0.400,0,0,1,1, 37 -"AML","PB","L",1,59,1.555,1.555,0.178,0.446,0,0,1,1, 38 -"CML","PB","L",2,33,1.670,1.670,0.110,0.474,0,0,1,1, 39 -"CML","PB","L",0,38,1.607,1.607,1.607,0.329,0,0,0,1, 40 -"CML","PB","L",4,37,1.511,1.511,0.055,0.323,0,0,1,1, 41 -"AML","PB","L",3,41,1.287,1.287,0.049,0.392,0,0,1,1, 42 -"AML","PB","E",1,64,1.227,1.227,0.230,0.496,0,0,1,1, 43 -"CML","PB","L",3,32,1.300,1.300,0.063,0.630,0,0,1,1, 44 -"CML","PB","L",0,41,1.270,1.270,1.270,0.383,0,0,0,1, 45 -"AML","PB","E",1,56,1.205,1.205,0.074,1.205,0,0,1,0, 46 -"CML","PB","L",1,50,1.147,1.147,0.131,0.361,0,0,1,1, 47 -"CML","PB","L",3,37,1.109,1.109,0.055,0.277,0,0,1,1, 48 -"CML","PB","L",0,27,0.994,0.994,0.994,0.287,0,0,0,1, 49 -"CML","BM","L",3,45,4.572,4.572,0.066,0.619,0,0,1,1, 50 -"AML","BM","L",3,45,4.616,4.616,0.101,0.452,0,0,1,1, 51 -"AML","BM","L",2,42,4.000,4.000,0.027,0.290,0,0,1,1, 52 -"CML","BM","L",0,22,4.238,4.238,4.238,0.479,0,0,0,1, 53 -"AML","BM","L",4,47,0.110,0.110,0.074,0.110,1,0,1,0, 54 -"AML","BM","L",2,48,4.030,4.030,0.101,0.857,0,0,1,1, 55 -"AML","BM","L",2,49,3.124,2.527,0.115,1.993,1,1,1,1, 56 -"CML","BM","L",2,38,0.515,0.515,0.079,0.463,1,0,1,1, 57 -"CML","BM","L",1,39,4.222,3.149,0.085,0.496,0,1,1,1, 58 -"CML","BM","L",3,41,4.027,4.027,0.104,0.422,0,0,1,1, 59 -"CML","BM","L",2,46,1.969,1.969,0.038,0.307,1,0,1,1, 60 -"AML","BM","L",0,24,3.792,3.792,3.792,0.701,0,0,0,1, 61 -"AML","BM","L",3,32,0.427,0.427,0.041,0.279,1,0,1,1, 62 -"CML","BM","L",0,36,3.340,3.340,3.340,0.419,0,0,0,1, 63 -"CML","BM","L",1,53,3.504,0.720,0.112,0.616,0,1,1,1, 64 -"CML","BM","L",0,52,3.685,3.685,3.685,0.331,0,0,0,1, 65 -"CML","BM","L",1,59,0.181,0.181,0.049,0.181,1,0,1,0, 66 -"CML","BM","L",3,42,0.736,0.736,0.090,0.567,1,0,1,1, 67 -"CML","BM","L",1,65,0.287,0.287,0.052,0.287,1,0,1,0, 68 -"CML","BM","E",0,60,0.057,0.057,0.057,0.057,0,0,0,0, 69 -"CML","BM","L",2,61,3.107,3.107,0.088,0.764,0,0,1,1, 70 -"CML","BM","L",1,55,3.088,3.088,0.110,0.381,0,0,1,1, 71 -"AML","BM","E",0,48,0.446,0.274,0.446,0.446,1,1,0,0, 72 -"AML","BM","E",0,49,2.776,2.776,2.776,2.776,0,0,0,0, 73 -"CML","BM","L",0,36,0.693,0.172,0.693,0.635,1,1,0,1, 74 -"AML","BM","L",1,48,2.010,2.010,0.077,0.553,0,0,1,1, 75 -"CML","BM","L",0,47,2.374,2.374,2.374,0.287,0,0,0,1, 76 -"AML","BM","L",3,43,1.079,1.079,0.088,0.345,1,0,1,1, 77 -"CML","BM","L",0,56,2.604,2.604,2.604,0.375,0,0,0,1, 78 -"CML","BM","L",1,56,2.478,2.478,0.170,0.517,0,0,1,1, 79 -"CML","BM","L",0,36,2.338,2.338,2.338,0.457,0,0,0,1, 80 -"CML","BM","L",2,52,2.300,2.300,0.049,0.345,0,0,1,1, 81 -"CML","BM","E",1,44,0.219,0.219,0.145,0.219,1,0,1,0, 82 -"AML","BM","L",3,32,2.127,2.127,0.118,0.422,0,0,1,1, 83 -"AML","BM","L",1,44,2.034,2.034,0.096,0.479,0,0,1,1, 84 -"CML","BM","L",0,45,2.034,2.034,2.034,0.290,0,0,0,1, 85 -"AML","BM","L",3,48,2.007,2.007,0.088,0.350,0,0,1,1, 86 -"CML","BM","L",0,48,1.183,1.183,1.183,0.372,0,0,0,1, 87 -"AML","BM","L",3,42,0.375,0.375,0.096,0.277,1,0,1,1, 88 -"AML","BM","E",2,24,0.353,0.301,0.096,0.353,1,1,1,0, 89 -"CML","BM","L",2,26,1.566,1.566,0.137,0.474,0,0,1,1, 90 -"CML","BM","L",2,34,1.588,1.588,0.129,0.465,0,0,1,1, 91 -"CML","BM","L",0,57,1.243,1.243,1.243,0.433,0,0,0,1, 92 -"CML","BM","L",3,51,1.555,1.555,0.090,0.359,0,0,1,1, 93 -"AML","BM","L",2,54,1.202,1.202,0.192,1.202,0,0,1,0, 94 -"AML","BM","E",0,20,1.251,1.251,1.251,0.408,0,0,0,1, 95 -"AML","BM","L",2,39,1.114,1.114,0.074,0.402,0,0,1,1, 96 -"AML","BM","L",0,49,1.150,1.150,1.150,0.350,0,0,0,1, 97 -"CML","BM","L",1,42,0.997,0.997,0.142,0.411,0,0,1,1, 98 -"CML","BM","L",0,44,1.057,1.057,1.057,0.301,0,0,0,1, 99 -"CML","BM","L",1,56,1.125,1.125,0.129,0.320,0,0,1,1,100 diff --git a/sksurv/nonparametric.py b/sksurv/nonparametric.py index 9adec750..3093ec11 100644 --- a/sksurv/nonparametric.py +++ b/sksurv/nonparametric.py @@ -740,11 +740,11 @@ def cumulative_incidence_competing_risks( return uniq_times, cum_inc if var_type == "Dinse": - var = var_dinse(n_events_cr, kpe_prime, n_at_risk, cum_inc) + var = _var_dinse(n_events_cr, kpe_prime, n_at_risk, cum_inc) elif var_type == "Dinse_Approx": - var = var_dinse_approx(n_events_cr, kpe_prime, n_at_risk, cum_inc) + var = _var_dinse_approx(n_events_cr, kpe_prime, n_at_risk, cum_inc) elif var_type == "Aalen": - var = var_aalen(n_events_cr, kpe_prime, n_at_risk, cum_inc) + var = _var_aalen(n_events_cr, kpe_prime, n_at_risk, cum_inc) else: raise ValueError(f"{var_type=} not implemented.") @@ -756,11 +756,11 @@ def cumulative_incidence_competing_risks( return uniq_times, cum_inc, ci -def var_dinse_approx(n_events_cr, kpe_prime, n_at_risk, cum_inc): +def _var_dinse_approx(n_events_cr, kpe_prime, n_at_risk, cum_inc): """ Variance estimator from Dinse and Larson, Biometrika (1986), 379 See Section 4, Eqs. 6. - This is an approximation from the var_dinse, so that one should be preferred. + This is an approximation from the _var_dinse, so that one should be preferred. However, this seems to be more common in the literature. """ dr = n_events_cr[:, 0] @@ -776,7 +776,7 @@ def var_dinse_approx(n_events_cr, kpe_prime, n_at_risk, cum_inc): return var -def var_dinse(n_events_cr, kpe_prime, n_at_risk): +def _var_dinse(n_events_cr, kpe_prime, n_at_risk): """ Variance estimator from Dinse and Larson, Biometrika (1986), 379 See Section 4, Eqs. 4 and 5 @@ -804,7 +804,7 @@ def var_dinse(n_events_cr, kpe_prime, n_at_risk): return var -def var_aalen(n_events_cr, kpe_prime, n_at_risk, cum_inc): +def _var_aalen(n_events_cr, kpe_prime, n_at_risk, cum_inc): """ Variance estimator from Aalen Aalen, O. (1978a). Nonparametric estimation of partial transition From a04c6bc983c0ee1cc736c54a959b8476da748c8a Mon Sep 17 00:00:00 2001 From: valverde Date: Sat, 7 Dec 2024 23:29:37 +0900 Subject: [PATCH 04/11] Add tests for wrong parameters. --- sksurv/nonparametric.py | 2 +- tests/test_nonparametric.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/sksurv/nonparametric.py b/sksurv/nonparametric.py index 3093ec11..2dbeb032 100644 --- a/sksurv/nonparametric.py +++ b/sksurv/nonparametric.py @@ -657,7 +657,7 @@ def cumulative_incidence_competing_risks( the specified time. conf_level : float, optional, default: 0.95 - The level for a two-sided confidence interval on the survival curves. + The level for a two-sided confidence interval on the cumulative incidence curves. conf_type : None or {'log-log'}, optional, default: None. The type of confidence intervals to estimate. diff --git a/tests/test_nonparametric.py b/tests/test_nonparametric.py index 2c16eb23..93016cb9 100644 --- a/tests/test_nonparametric.py +++ b/tests/test_nonparametric.py @@ -1,4 +1,5 @@ from os.path import dirname, join +import re import numpy as np from numpy.testing import assert_allclose, assert_array_almost_equal, assert_array_equal @@ -6983,3 +6984,31 @@ def test_scrucca_ci(event, time, true_x, true_y, true_ci): assert x[it] <= t < x[it + 1] assert_allclose(ci, true_ci, rtol=5.0e-4) + + @staticmethod + @pytest.mark.parametrize("event, time, true_x, true_y", SimpleDataBMTCases().get_cases()) + @pytest.mark.parametrize("conf_level", [None, -1, 1.0, 3.0, np.inf, np.nan]) + def test_invalid_conf_level_competing_risks(event, time, true_x, true_y, conf_level): + msg = f"conf_level must be a float in the range (0.0, 1.0), but was {conf_level}" + with pytest.raises(ValueError, match=re.escape(msg)): + cumulative_incidence_competing_risks( + event, time, conf_level=conf_level, conf_type="log-log", var_type="Dinse_Approx" + ) + + @staticmethod + @pytest.mark.parametrize("conf_type", ["None", -1, "", "not"]) + @pytest.mark.parametrize("event, time, true_x, true_y", SimpleDataBMTCases().get_cases()) + def test_invalid_conf_type_competing_risks(event, time, true_x, true_y, conf_type): + msg = f"conf_type must be None or a str among {{'log-log'}}, but was {conf_type!r}" + with pytest.raises(ValueError, match=msg): + cumulative_incidence_competing_risks( + event, time, conf_level=0.9, conf_type=conf_type, var_type="Dinse_Approx" + ) + + @staticmethod + @pytest.mark.parametrize("var_type", ["None", "dinse", 1, "", "not"]) + @pytest.mark.parametrize("event, time, true_x, true_y", SimpleDataBMTCases().get_cases()) + def test_invalid_var_type_competing_risks(event, time, true_x, true_y, var_type): + msg = f"{var_type=} not implemented." + with pytest.raises(ValueError, match=msg): + cumulative_incidence_competing_risks(event, time, conf_level=0.95, conf_type="log-log", var_type=var_type) From ed170a46e8584f8153338523e827feb9b17cff11 Mon Sep 17 00:00:00 2001 From: valverde Date: Thu, 12 Dec 2024 23:03:57 +0900 Subject: [PATCH 05/11] Add regression tests to cover the Dinse variance estimator. --- sksurv/nonparametric.py | 2 +- tests/test_nonparametric.py | 840 +++++++++++++++++++++++++++++++----- 2 files changed, 741 insertions(+), 101 deletions(-) diff --git a/sksurv/nonparametric.py b/sksurv/nonparametric.py index 2dbeb032..4eb3e0f5 100644 --- a/sksurv/nonparametric.py +++ b/sksurv/nonparametric.py @@ -740,7 +740,7 @@ def cumulative_incidence_competing_risks( return uniq_times, cum_inc if var_type == "Dinse": - var = _var_dinse(n_events_cr, kpe_prime, n_at_risk, cum_inc) + var = _var_dinse(n_events_cr, kpe_prime, n_at_risk) elif var_type == "Dinse_Approx": var = _var_dinse_approx(n_events_cr, kpe_prime, n_at_risk, cum_inc) elif var_type == "Aalen": diff --git a/tests/test_nonparametric.py b/tests/test_nonparametric.py index 93016cb9..3e7efd87 100644 --- a/tests/test_nonparametric.py +++ b/tests/test_nonparametric.py @@ -6327,7 +6327,8 @@ def data_AML(self): return event, time, true_x, true_y - def data_three_competing_cases(self): + @staticmethod + def data_three_competing_cases(): time = np.array( [ 5.3, @@ -6498,12 +6499,720 @@ def data_three_competing_cases(self): class CGVHD_DataSets(FixtureParameterFactory): - _, df = load_cgvhd() - event, ftime = df["status"], df["ftime"] - def data_full(self): - event = self.event - time = self.ftime + _ci_Aalen = np.array( + [ + [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, 8.76630585e-04, np.nan, np.nan, 4.98093393e-02], + [np.nan, np.nan, 3.86470442e-03, np.nan, np.nan, 6.45435439e-02], + [np.nan, 8.76353733e-04, 3.86470442e-03, np.nan, 4.98160434e-02, 6.45435439e-02], + [np.nan, 8.76353733e-04, 8.09297347e-03, np.nan, 4.98160434e-02, 7.90139451e-02], + [np.nan, 8.76353733e-04, 1.31071435e-02, np.nan, 4.98160434e-02, 9.29739191e-02], + [np.nan, 3.86346901e-03, 1.31071435e-02, np.nan, 6.45537226e-02, 9.29739191e-02], + [1.86735626e-02, 3.86346901e-03, 1.31071435e-02, 1.06521413e-01, 6.45537226e-02, 9.29739191e-02], + [2.46648637e-02, 3.86346901e-03, 1.31071435e-02, 1.19714550e-01, 6.45537226e-02, 9.29739191e-02], + [3.09922063e-02, 3.86346901e-03, 1.31071435e-02, 1.32625202e-01, 6.45537226e-02, 9.29739191e-02], + [4.44345647e-02, 3.86346901e-03, 1.86753126e-02, 1.57768941e-01, 6.45537226e-02, 1.06515798e-01], + [5.86926671e-02, 3.86346901e-03, 1.86753126e-02, 1.82197926e-01, 6.45537226e-02, 1.06515798e-01], + [6.60668761e-02, 8.08749124e-03, 1.86753126e-02, 1.94196068e-01, 7.90421599e-02, 1.06515798e-01], + [7.35824051e-02, 8.08749124e-03, 1.86753126e-02, 2.06068244e-01, 7.90421599e-02, 1.06515798e-01], + [8.89863978e-02, 8.08749124e-03, 1.86753126e-02, 2.29479009e-01, 7.90421599e-02, 1.06515798e-01], + [9.68543051e-02, 8.08749124e-03, 1.86753126e-02, 2.41035520e-01, 7.90421599e-02, 1.06515798e-01], + [1.12882999e-01, 8.08749124e-03, 1.86753126e-02, 2.63883900e-01, 7.90421599e-02, 1.06515798e-01], + [1.29260771e-01, 8.08749124e-03, 1.86753126e-02, 2.86415701e-01, 7.90421599e-02, 1.06515798e-01], + [1.37567828e-01, 8.08749124e-03, 1.86753126e-02, 2.97574049e-01, 7.90421599e-02, 1.06515798e-01], + [1.45948291e-01, 8.08749124e-03, 1.86753126e-02, 3.08665304e-01, 7.90421599e-02, 1.06515798e-01], + [1.62916013e-01, 8.08749124e-03, 1.86753126e-02, 3.30658087e-01, 7.90421599e-02, 1.06515798e-01], + [1.71496808e-01, 8.08749124e-03, 1.86753126e-02, 3.41565431e-01, 7.90421599e-02, 1.06515798e-01], + [1.88840363e-01, 8.08749124e-03, 1.86753126e-02, 3.63212079e-01, 7.90421599e-02, 1.06515798e-01], + [1.97598250e-01, 8.08749124e-03, 1.86753126e-02, 3.73955784e-01, 7.90421599e-02, 1.06515798e-01], + [2.06411125e-01, 8.08749124e-03, 1.86753126e-02, 3.84648459e-01, 7.90421599e-02, 1.06515798e-01], + [2.33164884e-01, 8.08749124e-03, 1.86753126e-02, 4.16432824e-01, 7.90421599e-02, 1.06515798e-01], + [2.42181998e-01, 8.08749124e-03, 1.86753126e-02, 4.26935045e-01, 7.90421599e-02, 1.06515798e-01], + [2.51246943e-01, 8.08749124e-03, 1.86753126e-02, 4.37392426e-01, 7.90421599e-02, 1.06515798e-01], + [2.60358645e-01, 8.08749124e-03, 1.86753126e-02, 4.47805885e-01, 7.90421599e-02, 1.06515798e-01], + [2.78719164e-01, 8.08749124e-03, 1.86753126e-02, 4.68503631e-01, 7.90421599e-02, 1.06515798e-01], + [2.87965610e-01, 8.08749124e-03, 1.86753126e-02, 4.78790029e-01, 7.90421599e-02, 1.06515798e-01], + [2.97255414e-01, 8.08749124e-03, 1.86753126e-02, 4.89035373e-01, 7.90421599e-02, 1.06515798e-01], + [3.06587917e-01, 8.08749124e-03, 1.86753126e-02, 4.99240206e-01, 7.90421599e-02, 1.06515798e-01], + [3.15962523e-01, 8.08749124e-03, 1.86753126e-02, 5.09405012e-01, 7.90421599e-02, 1.06515798e-01], + [3.25378702e-01, 8.08749124e-03, 1.86753126e-02, 5.19530217e-01, 7.90421599e-02, 1.06515798e-01], + [3.44334814e-01, 8.08749124e-03, 1.86753126e-02, 5.39662456e-01, 7.90421599e-02, 1.06515798e-01], + [3.73072188e-01, 8.08749124e-03, 1.86753126e-02, 5.69570192e-01, 7.90421599e-02, 1.06515798e-01], + [3.82729571e-01, 8.08749124e-03, 1.86753126e-02, 5.79463982e-01, 7.90421599e-02, 1.06515798e-01], + [3.92426305e-01, 8.08749124e-03, 1.86753126e-02, 5.89319725e-01, 7.90421599e-02, 1.06515798e-01], + [3.92426305e-01, 1.30725842e-02, 1.86753126e-02, 5.89319725e-01, 9.31084543e-02, 1.06515798e-01], + [4.02161016e-01, 1.30725842e-02, 1.86753126e-02, 5.99138501e-01, 9.31084543e-02, 1.06515798e-01], + [4.21749168e-01, 1.30725842e-02, 1.86753126e-02, 6.18660773e-01, 9.31084543e-02, 1.06515798e-01], + [4.31601113e-01, 1.30725842e-02, 1.86753126e-02, 6.28365328e-01, 9.31084543e-02, 1.06515798e-01], + [4.31601113e-01, 1.86073989e-02, 1.86753126e-02, 6.28365328e-01, 1.06734142e-01, 1.06515798e-01], + [4.41490527e-01, 1.86073989e-02, 1.86753126e-02, 6.38032889e-01, 1.06734142e-01, 1.06515798e-01], + [4.51419138e-01, 1.86073989e-02, 1.86753126e-02, 6.47661932e-01, 1.06734142e-01, 1.06515798e-01], + [4.61387078e-01, 1.86073989e-02, 1.86753126e-02, 6.57252226e-01, 1.06734142e-01, 1.06515798e-01], + [4.71394524e-01, 1.86073989e-02, 1.86753126e-02, 6.66803488e-01, 1.06734142e-01, 1.06515798e-01], + [4.81441701e-01, 1.86073989e-02, 1.86753126e-02, 6.76315386e-01, 1.06734142e-01, 1.06515798e-01], + [4.91528876e-01, 1.86073989e-02, 1.86753126e-02, 6.85787537e-01, 1.06734142e-01, 1.06515798e-01], + [5.22042223e-01, 1.86073989e-02, 1.86753126e-02, 7.13954859e-01, 1.06734142e-01, 1.06515798e-01], + [5.52931085e-01, 1.86073989e-02, 1.86753126e-02, 7.41744842e-01, 1.06734142e-01, 1.06515798e-01], + [5.63308855e-01, 1.86073989e-02, 1.86753126e-02, 7.50923965e-01, 1.06734142e-01, 1.06515798e-01], + [5.84202761e-01, 1.86073989e-02, 1.86753126e-02, 7.69142314e-01, 1.06734142e-01, 1.06515798e-01], + [5.94714799e-01, 1.86073989e-02, 1.86753126e-02, 7.78183131e-01, 1.06734142e-01, 1.06515798e-01], + [6.05273508e-01, 1.86073989e-02, 1.86753126e-02, 7.87175387e-01, 1.06734142e-01, 1.06515798e-01], + [6.15879903e-01, 1.86073989e-02, 1.86753126e-02, 7.96117716e-01, 1.06734142e-01, 1.06515798e-01], + [6.26535089e-01, 1.86073989e-02, 1.86753126e-02, 8.05008614e-01, 1.06734142e-01, 1.06515798e-01], + [6.37240259e-01, 1.86073989e-02, 1.86753126e-02, 8.13846424e-01, 1.06734142e-01, 1.06515798e-01], + [6.47996704e-01, 1.86073989e-02, 1.86753126e-02, 8.22629319e-01, 1.06734142e-01, 1.06515798e-01], + [6.58805813e-01, 1.86073989e-02, 1.86753126e-02, 8.31355277e-01, 1.06734142e-01, 1.06515798e-01], + [6.69669074e-01, 1.86073989e-02, 1.86753126e-02, 8.40022063e-01, 1.06734142e-01, 1.06515798e-01], + [6.80588073e-01, 1.86073989e-02, 1.86753126e-02, 8.48627195e-01, 1.06734142e-01, 1.06515798e-01], + [6.91564481e-01, 1.86073989e-02, 1.86753126e-02, 8.57167918e-01, 1.06734142e-01, 1.06515798e-01], + [7.02600038e-01, 1.86073989e-02, 1.86753126e-02, 8.65641160e-01, 1.06734142e-01, 1.06515798e-01], + [7.13696504e-01, 1.86073989e-02, 1.86753126e-02, 8.74043497e-01, 1.06734142e-01, 1.06515798e-01], + [7.24855578e-01, 1.86073989e-02, 1.86753126e-02, 8.82371106e-01, 1.06734142e-01, 1.06515798e-01], + [7.36078733e-01, 1.86073989e-02, 1.86753126e-02, 8.90619734e-01, 1.06734142e-01, 1.06515798e-01], + [7.47366885e-01, 1.86073989e-02, 1.86753126e-02, 8.98784683e-01, 1.06734142e-01, 1.06515798e-01], + [7.58719674e-01, 1.86073989e-02, 1.86753126e-02, 9.06860885e-01, 1.06734142e-01, 1.06515798e-01], + [7.70133729e-01, 1.86073989e-02, 1.86753126e-02, 9.14843249e-01, 1.06734142e-01, 1.06515798e-01], + [7.70133729e-01, 1.86073989e-02, 1.86753126e-02, 9.14843249e-01, 1.06734142e-01, 1.06515798e-01], + [7.70133729e-01, 1.86073989e-02, 1.86753126e-02, 9.14843249e-01, 1.06734142e-01, 1.06515798e-01], + [7.81200203e-01, 1.86073989e-02, 1.86753126e-02, 9.34620546e-01, 1.06734142e-01, 1.06515798e-01], + [7.81200203e-01, 1.86073989e-02, 1.86753126e-02, 9.34620546e-01, 1.06734142e-01, 1.06515798e-01], + ] + ).T + + true_ci_Aalen = np.empty(shape=(2, 3, 75), dtype=_ci_Aalen.dtype) + true_ci_Aalen[0], true_ci_Aalen[1] = _ci_Aalen[0:3], _ci_Aalen[3:] + + true_ci_Dinse = np.array( + [ + [ + [ + 0.00000000e00, + 6.95370457e-02, + 7.83645959e-02, + 9.10033947e-02, + 1.04072435e-01, + 1.17082504e-01, + 1.29929377e-01, + 1.91645154e-01, + 2.03566151e-01, + 2.15372208e-01, + 2.50181089e-01, + 2.72943077e-01, + 2.95396099e-01, + 3.06516413e-01, + 3.28560479e-01, + 3.39489609e-01, + 3.61173565e-01, + 3.82637980e-01, + 3.93292024e-01, + 4.03895875e-01, + 4.24957989e-01, + 4.35418464e-01, + 4.56203039e-01, + 4.66528789e-01, + 4.76811151e-01, + 5.07403932e-01, + 5.17518446e-01, + 5.27592116e-01, + 5.37625278e-01, + 5.57571153e-01, + 5.67484281e-01, + 5.77357731e-01, + 5.87191592e-01, + 5.96985903e-01, + 6.06740660e-01, + 6.26131260e-01, + 6.54917692e-01, + 6.64432391e-01, + 6.73906170e-01, + 6.83338631e-01, + 6.92729319e-01, + 7.11383257e-01, + 7.20645286e-01, + 7.29863092e-01, + 7.39035885e-01, + 7.48162792e-01, + 7.57242852e-01, + 7.66275008e-01, + 7.75258098e-01, + 7.84190844e-01, + 8.10672265e-01, + 8.36640715e-01, + 8.45172633e-01, + 8.62033521e-01, + 8.70355794e-01, + 8.78600789e-01, + 8.86764002e-01, + 8.94840306e-01, + 9.02823839e-01, + 9.10707840e-01, + 9.18484458e-01, + 9.26144486e-01, + 9.33677016e-01, + 9.41068967e-01, + 9.48304429e-01, + 9.55363721e-01, + 9.62222015e-01, + 9.68847216e-01, + 9.75196572e-01, + 9.81210932e-01, + 9.86804273e-01, + 9.86804273e-01, + 9.86804273e-01, + 9.97547733e-01, + 9.97547733e-01, + ], + [ + 0.00000000e00, + 0.00000000e00, + 0.00000000e00, + 0.00000000e00, + 0.00000000e00, + 0.00000000e00, + 0.00000000e00, + 1.87821217e-02, + 2.47938240e-02, + 3.11403565e-02, + 4.46180263e-02, + 5.89057082e-02, + 6.62937254e-02, + 7.38218326e-02, + 8.92495123e-02, + 9.71286072e-02, + 1.13178498e-01, + 1.29576058e-01, + 1.37892532e-01, + 1.46282113e-01, + 1.63267218e-01, + 1.71856311e-01, + 1.89215709e-01, + 1.97981168e-01, + 2.06801390e-01, + 2.33575873e-01, + 2.42599500e-01, + 2.51670763e-01, + 2.60788594e-01, + 2.79160804e-01, + 2.88412837e-01, + 2.97708055e-01, + 3.07045802e-01, + 3.16425486e-01, + 3.25846579e-01, + 3.44812020e-01, + 3.73562193e-01, + 3.83223577e-01, + 3.92924166e-01, + 3.92924166e-01, + 4.02662628e-01, + 4.22257828e-01, + 4.32113121e-01, + 4.32113121e-01, + 4.42005809e-01, + 4.51937570e-01, + 4.61908540e-01, + 4.71918900e-01, + 4.81968878e-01, + 4.92058747e-01, + 5.22579250e-01, + 5.53474503e-01, + 5.63854512e-01, + 5.84752549e-01, + 5.95266834e-01, + 6.05827834e-01, + 6.16436604e-01, + 6.27094295e-01, + 6.37802162e-01, + 6.48561574e-01, + 6.59374024e-01, + 6.70241138e-01, + 6.81164688e-01, + 6.92146607e-01, + 7.03189005e-01, + 7.14294193e-01, + 7.25464710e-01, + 7.36703387e-01, + 7.48013460e-01, + 7.59398874e-01, + 7.70865154e-01, + 7.70865154e-01, + 7.70865154e-01, + 7.88426707e-01, + 7.88426707e-01, + ], + [ + 0.00000000e00, + 0.00000000e00, + 0.00000000e00, + 8.89768861e-04, + 8.89768861e-04, + 8.89768861e-04, + 3.90205526e-03, + 3.90205526e-03, + 3.90205526e-03, + 3.90205526e-03, + 3.90205526e-03, + 3.90205526e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 1.31592295e-02, + 1.31592295e-02, + 1.31592295e-02, + 1.31592295e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + ], + [ + 0.00000000e00, + 8.90045062e-04, + 3.90328157e-03, + 3.90328157e-03, + 8.15640607e-03, + 1.31938914e-02, + 1.31938914e-02, + 1.31938914e-02, + 1.31938914e-02, + 1.31938914e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + ], + ], + [ + [ + 0.00000000e00, + 1.42906439e-03, + 5.09108441e-03, + 9.87499475e-03, + 1.53589729e-02, + 2.13379730e-02, + 2.76935814e-02, + 6.31255365e-02, + 7.07260642e-02, + 7.84551064e-02, + 1.02305770e-01, + 1.18678738e-01, + 1.35374220e-01, + 1.43831682e-01, + 1.60948022e-01, + 1.69600857e-01, + 1.87083052e-01, + 2.04786409e-01, + 2.13716449e-01, + 2.22696630e-01, + 2.40801966e-01, + 2.49924703e-01, + 2.68305071e-01, + 2.77560890e-01, + 2.86859395e-01, + 3.15004317e-01, + 3.24467187e-01, + 3.33969872e-01, + 3.43511976e-01, + 3.62713124e-01, + 3.72371634e-01, + 3.82068491e-01, + 3.91803549e-01, + 4.01576703e-01, + 4.11387895e-01, + 4.31124387e-01, + 4.61015540e-01, + 4.71056261e-01, + 4.81135884e-01, + 4.91254721e-01, + 5.01413140e-01, + 5.21850459e-01, + 5.32130377e-01, + 5.42451918e-01, + 5.52815752e-01, + 5.63222628e-01, + 5.73673369e-01, + 5.84168889e-01, + 5.94710192e-01, + 6.05298388e-01, + 6.37357181e-01, + 6.69890244e-01, + 6.80848947e-01, + 7.02952411e-01, + 7.14103031e-01, + 7.25324091e-01, + 7.36619526e-01, + 7.47993807e-01, + 7.59452038e-01, + 7.71000091e-01, + 7.82644771e-01, + 7.94394028e-01, + 8.06257243e-01, + 8.18245604e-01, + 8.30372631e-01, + 8.42654895e-01, + 8.55113054e-01, + 8.67773361e-01, + 8.80669917e-01, + 8.93848051e-01, + 9.07369171e-01, + 9.07369171e-01, + 9.07369171e-01, + 9.20581987e-01, + 9.20581987e-01, + ], + [ + 0.00000000e00, + 0.00000000e00, + 0.00000000e00, + 0.00000000e00, + 0.00000000e00, + 0.00000000e00, + 0.00000000e00, + 1.06174101e-01, + 1.19356660e-01, + 1.32257411e-01, + 1.57383382e-01, + 1.81801442e-01, + 1.93794468e-01, + 2.05662904e-01, + 2.29066656e-01, + 2.40619922e-01, + 2.63462402e-01, + 2.85989093e-01, + 2.97145175e-01, + 3.08234358e-01, + 3.30223582e-01, + 3.41129420e-01, + 3.62773614e-01, + 3.73516353e-01, + 3.84208235e-01, + 4.15991275e-01, + 4.26493370e-01, + 4.36950786e-01, + 4.47364439e-01, + 4.68063058e-01, + 4.78350116e-01, + 4.88596271e-01, + 4.98802067e-01, + 5.08967984e-01, + 5.19094449e-01, + 5.39229669e-01, + 5.69142997e-01, + 5.79038911e-01, + 5.88896921e-01, + 5.88896921e-01, + 5.98718077e-01, + 6.18245569e-01, + 6.27952927e-01, + 6.27952927e-01, + 6.37623389e-01, + 6.47255475e-01, + 6.56848951e-01, + 6.66403536e-01, + 6.75918897e-01, + 6.85394650e-01, + 7.13573847e-01, + 7.41376949e-01, + 7.50560569e-01, + 7.68788476e-01, + 7.77834150e-01, + 7.86831381e-01, + 7.95778794e-01, + 8.04674874e-01, + 8.13517948e-01, + 8.22306165e-01, + 8.31037478e-01, + 8.39709609e-01, + 8.48320023e-01, + 8.56865884e-01, + 8.65344005e-01, + 8.73750790e-01, + 8.82082148e-01, + 8.90333387e-01, + 8.98499057e-01, + 9.06572688e-01, + 9.14546288e-01, + 9.14546288e-01, + 9.14546288e-01, + 9.32175909e-01, + 9.32175909e-01, + ], + [ + 0.00000000e00, + 0.00000000e00, + 0.00000000e00, + 4.94939423e-02, + 4.94939423e-02, + 4.94939423e-02, + 6.42375366e-02, + 6.42375366e-02, + 6.42375366e-02, + 6.42375366e-02, + 6.42375366e-02, + 6.42375366e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 9.27718816e-02, + 9.27718816e-02, + 9.27718816e-02, + 9.27718816e-02, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + ], + [ + 0.00000000e00, + 4.94873690e-02, + 6.42275460e-02, + 6.42275460e-02, + 7.86890219e-02, + 9.26379119e-02, + 9.26379119e-02, + 9.26379119e-02, + 9.26379119e-02, + 9.26379119e-02, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + ], + ], + ] + ) + + @staticmethod + def data_full(): + + _, df = load_cgvhd() + event, ftime = df["status"], df["ftime"] true_x = np.array( [ @@ -6820,90 +7529,7 @@ def data_full(self): ] ) - cix = np.array( - [ - [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], - [np.nan, np.nan, 8.76630585e-04, np.nan, np.nan, 4.98093393e-02], - [np.nan, np.nan, 3.86470442e-03, np.nan, np.nan, 6.45435439e-02], - [np.nan, 8.76353733e-04, 3.86470442e-03, np.nan, 4.98160434e-02, 6.45435439e-02], - [np.nan, 8.76353733e-04, 8.09297347e-03, np.nan, 4.98160434e-02, 7.90139451e-02], - [np.nan, 8.76353733e-04, 1.31071435e-02, np.nan, 4.98160434e-02, 9.29739191e-02], - [np.nan, 3.86346901e-03, 1.31071435e-02, np.nan, 6.45537226e-02, 9.29739191e-02], - [1.86735626e-02, 3.86346901e-03, 1.31071435e-02, 1.06521413e-01, 6.45537226e-02, 9.29739191e-02], - [2.46648637e-02, 3.86346901e-03, 1.31071435e-02, 1.19714550e-01, 6.45537226e-02, 9.29739191e-02], - [3.09922063e-02, 3.86346901e-03, 1.31071435e-02, 1.32625202e-01, 6.45537226e-02, 9.29739191e-02], - [4.44345647e-02, 3.86346901e-03, 1.86753126e-02, 1.57768941e-01, 6.45537226e-02, 1.06515798e-01], - [5.86926671e-02, 3.86346901e-03, 1.86753126e-02, 1.82197926e-01, 6.45537226e-02, 1.06515798e-01], - [6.60668761e-02, 8.08749124e-03, 1.86753126e-02, 1.94196068e-01, 7.90421599e-02, 1.06515798e-01], - [7.35824051e-02, 8.08749124e-03, 1.86753126e-02, 2.06068244e-01, 7.90421599e-02, 1.06515798e-01], - [8.89863978e-02, 8.08749124e-03, 1.86753126e-02, 2.29479009e-01, 7.90421599e-02, 1.06515798e-01], - [9.68543051e-02, 8.08749124e-03, 1.86753126e-02, 2.41035520e-01, 7.90421599e-02, 1.06515798e-01], - [1.12882999e-01, 8.08749124e-03, 1.86753126e-02, 2.63883900e-01, 7.90421599e-02, 1.06515798e-01], - [1.29260771e-01, 8.08749124e-03, 1.86753126e-02, 2.86415701e-01, 7.90421599e-02, 1.06515798e-01], - [1.37567828e-01, 8.08749124e-03, 1.86753126e-02, 2.97574049e-01, 7.90421599e-02, 1.06515798e-01], - [1.45948291e-01, 8.08749124e-03, 1.86753126e-02, 3.08665304e-01, 7.90421599e-02, 1.06515798e-01], - [1.62916013e-01, 8.08749124e-03, 1.86753126e-02, 3.30658087e-01, 7.90421599e-02, 1.06515798e-01], - [1.71496808e-01, 8.08749124e-03, 1.86753126e-02, 3.41565431e-01, 7.90421599e-02, 1.06515798e-01], - [1.88840363e-01, 8.08749124e-03, 1.86753126e-02, 3.63212079e-01, 7.90421599e-02, 1.06515798e-01], - [1.97598250e-01, 8.08749124e-03, 1.86753126e-02, 3.73955784e-01, 7.90421599e-02, 1.06515798e-01], - [2.06411125e-01, 8.08749124e-03, 1.86753126e-02, 3.84648459e-01, 7.90421599e-02, 1.06515798e-01], - [2.33164884e-01, 8.08749124e-03, 1.86753126e-02, 4.16432824e-01, 7.90421599e-02, 1.06515798e-01], - [2.42181998e-01, 8.08749124e-03, 1.86753126e-02, 4.26935045e-01, 7.90421599e-02, 1.06515798e-01], - [2.51246943e-01, 8.08749124e-03, 1.86753126e-02, 4.37392426e-01, 7.90421599e-02, 1.06515798e-01], - [2.60358645e-01, 8.08749124e-03, 1.86753126e-02, 4.47805885e-01, 7.90421599e-02, 1.06515798e-01], - [2.78719164e-01, 8.08749124e-03, 1.86753126e-02, 4.68503631e-01, 7.90421599e-02, 1.06515798e-01], - [2.87965610e-01, 8.08749124e-03, 1.86753126e-02, 4.78790029e-01, 7.90421599e-02, 1.06515798e-01], - [2.97255414e-01, 8.08749124e-03, 1.86753126e-02, 4.89035373e-01, 7.90421599e-02, 1.06515798e-01], - [3.06587917e-01, 8.08749124e-03, 1.86753126e-02, 4.99240206e-01, 7.90421599e-02, 1.06515798e-01], - [3.15962523e-01, 8.08749124e-03, 1.86753126e-02, 5.09405012e-01, 7.90421599e-02, 1.06515798e-01], - [3.25378702e-01, 8.08749124e-03, 1.86753126e-02, 5.19530217e-01, 7.90421599e-02, 1.06515798e-01], - [3.44334814e-01, 8.08749124e-03, 1.86753126e-02, 5.39662456e-01, 7.90421599e-02, 1.06515798e-01], - [3.73072188e-01, 8.08749124e-03, 1.86753126e-02, 5.69570192e-01, 7.90421599e-02, 1.06515798e-01], - [3.82729571e-01, 8.08749124e-03, 1.86753126e-02, 5.79463982e-01, 7.90421599e-02, 1.06515798e-01], - [3.92426305e-01, 8.08749124e-03, 1.86753126e-02, 5.89319725e-01, 7.90421599e-02, 1.06515798e-01], - [3.92426305e-01, 1.30725842e-02, 1.86753126e-02, 5.89319725e-01, 9.31084543e-02, 1.06515798e-01], - [4.02161016e-01, 1.30725842e-02, 1.86753126e-02, 5.99138501e-01, 9.31084543e-02, 1.06515798e-01], - [4.21749168e-01, 1.30725842e-02, 1.86753126e-02, 6.18660773e-01, 9.31084543e-02, 1.06515798e-01], - [4.31601113e-01, 1.30725842e-02, 1.86753126e-02, 6.28365328e-01, 9.31084543e-02, 1.06515798e-01], - [4.31601113e-01, 1.86073989e-02, 1.86753126e-02, 6.28365328e-01, 1.06734142e-01, 1.06515798e-01], - [4.41490527e-01, 1.86073989e-02, 1.86753126e-02, 6.38032889e-01, 1.06734142e-01, 1.06515798e-01], - [4.51419138e-01, 1.86073989e-02, 1.86753126e-02, 6.47661932e-01, 1.06734142e-01, 1.06515798e-01], - [4.61387078e-01, 1.86073989e-02, 1.86753126e-02, 6.57252226e-01, 1.06734142e-01, 1.06515798e-01], - [4.71394524e-01, 1.86073989e-02, 1.86753126e-02, 6.66803488e-01, 1.06734142e-01, 1.06515798e-01], - [4.81441701e-01, 1.86073989e-02, 1.86753126e-02, 6.76315386e-01, 1.06734142e-01, 1.06515798e-01], - [4.91528876e-01, 1.86073989e-02, 1.86753126e-02, 6.85787537e-01, 1.06734142e-01, 1.06515798e-01], - [5.22042223e-01, 1.86073989e-02, 1.86753126e-02, 7.13954859e-01, 1.06734142e-01, 1.06515798e-01], - [5.52931085e-01, 1.86073989e-02, 1.86753126e-02, 7.41744842e-01, 1.06734142e-01, 1.06515798e-01], - [5.63308855e-01, 1.86073989e-02, 1.86753126e-02, 7.50923965e-01, 1.06734142e-01, 1.06515798e-01], - [5.84202761e-01, 1.86073989e-02, 1.86753126e-02, 7.69142314e-01, 1.06734142e-01, 1.06515798e-01], - [5.94714799e-01, 1.86073989e-02, 1.86753126e-02, 7.78183131e-01, 1.06734142e-01, 1.06515798e-01], - [6.05273508e-01, 1.86073989e-02, 1.86753126e-02, 7.87175387e-01, 1.06734142e-01, 1.06515798e-01], - [6.15879903e-01, 1.86073989e-02, 1.86753126e-02, 7.96117716e-01, 1.06734142e-01, 1.06515798e-01], - [6.26535089e-01, 1.86073989e-02, 1.86753126e-02, 8.05008614e-01, 1.06734142e-01, 1.06515798e-01], - [6.37240259e-01, 1.86073989e-02, 1.86753126e-02, 8.13846424e-01, 1.06734142e-01, 1.06515798e-01], - [6.47996704e-01, 1.86073989e-02, 1.86753126e-02, 8.22629319e-01, 1.06734142e-01, 1.06515798e-01], - [6.58805813e-01, 1.86073989e-02, 1.86753126e-02, 8.31355277e-01, 1.06734142e-01, 1.06515798e-01], - [6.69669074e-01, 1.86073989e-02, 1.86753126e-02, 8.40022063e-01, 1.06734142e-01, 1.06515798e-01], - [6.80588073e-01, 1.86073989e-02, 1.86753126e-02, 8.48627195e-01, 1.06734142e-01, 1.06515798e-01], - [6.91564481e-01, 1.86073989e-02, 1.86753126e-02, 8.57167918e-01, 1.06734142e-01, 1.06515798e-01], - [7.02600038e-01, 1.86073989e-02, 1.86753126e-02, 8.65641160e-01, 1.06734142e-01, 1.06515798e-01], - [7.13696504e-01, 1.86073989e-02, 1.86753126e-02, 8.74043497e-01, 1.06734142e-01, 1.06515798e-01], - [7.24855578e-01, 1.86073989e-02, 1.86753126e-02, 8.82371106e-01, 1.06734142e-01, 1.06515798e-01], - [7.36078733e-01, 1.86073989e-02, 1.86753126e-02, 8.90619734e-01, 1.06734142e-01, 1.06515798e-01], - [7.47366885e-01, 1.86073989e-02, 1.86753126e-02, 8.98784683e-01, 1.06734142e-01, 1.06515798e-01], - [7.58719674e-01, 1.86073989e-02, 1.86753126e-02, 9.06860885e-01, 1.06734142e-01, 1.06515798e-01], - [7.70133729e-01, 1.86073989e-02, 1.86753126e-02, 9.14843249e-01, 1.06734142e-01, 1.06515798e-01], - [7.70133729e-01, 1.86073989e-02, 1.86753126e-02, 9.14843249e-01, 1.06734142e-01, 1.06515798e-01], - [7.70133729e-01, 1.86073989e-02, 1.86753126e-02, 9.14843249e-01, 1.06734142e-01, 1.06515798e-01], - [7.81200203e-01, 1.86073989e-02, 1.86753126e-02, 9.34620546e-01, 1.06734142e-01, 1.06515798e-01], - [7.81200203e-01, 1.86073989e-02, 1.86753126e-02, 9.34620546e-01, 1.06734142e-01, 1.06515798e-01], - ] - ).T - - true_ci = np.empty(shape=(2, 3, 75), dtype=cix.dtype) - true_ci[0], true_ci[1] = cix[0:3], cix[3:] - - return event, time, true_x, true_y, true_ci + return event, ftime, true_x, true_y class TestCumIncCompetingRisks: @@ -6955,8 +7581,8 @@ def test_truncated(event, time, true_x, true_y): assert_array_almost_equal(y[0], y[1:].sum(axis=0)) @staticmethod - @pytest.mark.parametrize("event, time, true_x, true_y, true_ci", CGVHD_DataSets().get_cases()) - def test_ci(event, time, true_x, true_y, true_ci): + @pytest.mark.parametrize("event, time, true_x, true_y", CGVHD_DataSets().get_cases()) + def test_ci(event, time, true_x, true_y): x, y, ci = cumulative_incidence_competing_risks(event, time, conf_type="log-log", var_type="Aalen") assert_array_equal(x, true_x) @@ -6967,14 +7593,15 @@ def test_ci(event, time, true_x, true_y, true_ci): assert_allclose(ci[:, 0, :], 1 - true_ci_0, rtol=5.0e-4) + true_ci = CGVHD_DataSets().true_ci_Aalen ci_1 = ci[:, 1:, :] nan_mask = np.isnan(true_ci) assert_allclose(np.ma.masked_array(ci_1, nan_mask), np.ma.masked_array(true_ci, nan_mask), rtol=5.0e-4) assert_array_equal(np.ma.masked_array(ci_1, ~nan_mask), np.ma.masked_array(np.zeros_like(ci_1), ~nan_mask)) @staticmethod - @pytest.mark.parametrize("event, time, true_x, true_y, true_ci", CGVHD_DataSets().get_cases()) - def test_scrucca_ci(event, time, true_x, true_y, true_ci): + @pytest.mark.parametrize("event, time, true_x, true_y", CGVHD_DataSets().get_cases()) + def test_dinse_approx_ci(event, time, true_x, true_y): x, y, ci_full = cumulative_incidence_competing_risks(event, time, conf_type="log-log", var_type="Dinse_Approx") t = 0.5 @@ -6985,6 +7612,21 @@ def test_scrucca_ci(event, time, true_x, true_y, true_ci): assert x[it] <= t < x[it + 1] assert_allclose(ci, true_ci, rtol=5.0e-4) + @staticmethod + @pytest.mark.parametrize("event, time, true_x, true_y", CGVHD_DataSets().get_cases()) + def test_dinse_ci_regression(event, time, true_x, true_y): + x, y, ci = cumulative_incidence_competing_risks(event, time, conf_type="log-log", var_type="Dinse") + + assert_array_equal(x, true_x) + _x_km, true_km, true_ci_0 = kaplan_meier_estimator(event > 0, time, conf_type="log-log") + + assert_array_almost_equal(y[0], 1 - true_km) + assert_array_almost_equal(y[1:], true_y) + assert_array_almost_equal(y[0], y[1:].sum(axis=0)) + + true_ci = CGVHD_DataSets.true_ci_Dinse + assert_array_almost_equal(ci, true_ci) + @staticmethod @pytest.mark.parametrize("event, time, true_x, true_y", SimpleDataBMTCases().get_cases()) @pytest.mark.parametrize("conf_level", [None, -1, 1.0, 3.0, np.inf, np.nan]) @@ -6992,22 +7634,20 @@ def test_invalid_conf_level_competing_risks(event, time, true_x, true_y, conf_le msg = f"conf_level must be a float in the range (0.0, 1.0), but was {conf_level}" with pytest.raises(ValueError, match=re.escape(msg)): cumulative_incidence_competing_risks( - event, time, conf_level=conf_level, conf_type="log-log", var_type="Dinse_Approx" + event, time, conf_level=conf_level, conf_type="log-log", var_type="Aalen" ) @staticmethod - @pytest.mark.parametrize("conf_type", ["None", -1, "", "not"]) @pytest.mark.parametrize("event, time, true_x, true_y", SimpleDataBMTCases().get_cases()) + @pytest.mark.parametrize("conf_type", ["None", -1, "", "not"]) def test_invalid_conf_type_competing_risks(event, time, true_x, true_y, conf_type): msg = f"conf_type must be None or a str among {{'log-log'}}, but was {conf_type!r}" with pytest.raises(ValueError, match=msg): - cumulative_incidence_competing_risks( - event, time, conf_level=0.9, conf_type=conf_type, var_type="Dinse_Approx" - ) + cumulative_incidence_competing_risks(event, time, conf_level=0.9, conf_type=conf_type, var_type="Aalen") @staticmethod - @pytest.mark.parametrize("var_type", ["None", "dinse", 1, "", "not"]) @pytest.mark.parametrize("event, time, true_x, true_y", SimpleDataBMTCases().get_cases()) + @pytest.mark.parametrize("var_type", ["None", "dinse", 1, "", "not"]) def test_invalid_var_type_competing_risks(event, time, true_x, true_y, var_type): msg = f"{var_type=} not implemented." with pytest.raises(ValueError, match=msg): From 344e15da742132245cf63e4c50096d8561cb0109 Mon Sep 17 00:00:00 2001 From: valverde Date: Thu, 12 Dec 2024 23:52:45 +0900 Subject: [PATCH 06/11] Add comments to clarify the meaning of the CI tests. --- tests/test_nonparametric.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/test_nonparametric.py b/tests/test_nonparametric.py index 3e7efd87..82927dc1 100644 --- a/tests/test_nonparametric.py +++ b/tests/test_nonparametric.py @@ -7583,6 +7583,7 @@ def test_truncated(event, time, true_x, true_y): @staticmethod @pytest.mark.parametrize("event, time, true_x, true_y", CGVHD_DataSets().get_cases()) def test_ci(event, time, true_x, true_y): + "The true CI values were generated using the cmprsk R package." x, y, ci = cumulative_incidence_competing_risks(event, time, conf_type="log-log", var_type="Aalen") assert_array_equal(x, true_x) @@ -7602,6 +7603,9 @@ def test_ci(event, time, true_x, true_y): @staticmethod @pytest.mark.parametrize("event, time, true_x, true_y", CGVHD_DataSets().get_cases()) def test_dinse_approx_ci(event, time, true_x, true_y): + """The true CI values are taken from + M. Pintilie: "Competing Risks: A Practical Perspective". John Wiley & Sons, 2006, + """ x, y, ci_full = cumulative_incidence_competing_risks(event, time, conf_type="log-log", var_type="Dinse_Approx") t = 0.5 @@ -7615,6 +7619,10 @@ def test_dinse_approx_ci(event, time, true_x, true_y): @staticmethod @pytest.mark.parametrize("event, time, true_x, true_y", CGVHD_DataSets().get_cases()) def test_dinse_ci_regression(event, time, true_x, true_y): + """An external implementation of the Dinse variance haven't been found, + so the true CI values are taken from this internal code. + This is just thus a regression test. + """ x, y, ci = cumulative_incidence_competing_risks(event, time, conf_type="log-log", var_type="Dinse") assert_array_equal(x, true_x) From 90a379cf5b202feef14b1c255e4f2ddc8f37568e Mon Sep 17 00:00:00 2001 From: valverde Date: Tue, 7 Jan 2025 10:33:19 +0900 Subject: [PATCH 07/11] Changes in the pull request review: - Better documentation. - Some errors with `::` in Rest are corrected. - Clearer processing of the cum_inc = 0 case when calculating Confidence Intervals. --- sksurv/datasets/base.py | 64 ++++++++++++++++++++++++------------- sksurv/nonparametric.py | 28 +++++++++------- tests/test_nonparametric.py | 2 +- 3 files changed, 60 insertions(+), 34 deletions(-) diff --git a/sksurv/datasets/base.py b/sksurv/datasets/base.py index 43970590..9e386fc3 100644 --- a/sksurv/datasets/base.py +++ b/sksurv/datasets/base.py @@ -489,26 +489,47 @@ def load_cgvhd(): initiated for patients with a myeloid malignancy who were to undergo an allogeneic bone marrow transplant. - The dataset has 100 samples and 4 features::: + The dataset has 100 samples. + + +-------+------------+----------------------------------------------+-------------------------------------------+ + | Index | Name | Description | Encoding | + +=======+============+==============================================+===========================================+ + | 1 | dx | Diagnosis | | AML=acute myeloid leukaemia | + | | | | | CML=chronic myeloid leukaemia | + | | | | | MDS=myelodysplastic syndrome | + +-------+------------+----------------------------------------------+-------------------------------------------+ + | 2 | tx | Randomized treatment | | BM=cell harvested from the bone marrow | + | | | | | PB=cell harvested from peripheral blood | + +-------+------------+----------------------------------------------+-------------------------------------------+ + | 3 | extent | Extent of disease | L=limited, E=extensive | + +-------+------------+----------------------------------------------+-------------------------------------------+ + | 4 | agvhdgd | Grade of acute GVHD | | + +-------+------------+----------------------------------------------+-------------------------------------------+ + | 5 | age | Age | Years | + +-------+------------+----------------------------------------------+-------------------------------------------+ + | 6 | survtime | Time from date of transplant to death | Years | + | | | or last follow-up | | + +-------+------------+----------------------------------------------+-------------------------------------------+ + | 7 | reltime | Time from date of transplant to relapse | Years | + | | | or last follow-up | | + +-------+------------+----------------------------------------------+-------------------------------------------+ + | 8 | agvhtime | Time from date of transplant to acute GVHD | Years | + | | | or last follow-up | | + +-------+------------+----------------------------------------------+-------------------------------------------+ + | 9 | cgvhtime | Time from date of transplant to chronic GVHD | Years | + | | | or last follow-up | | + +-------+------------+----------------------------------------------+-------------------------------------------+ + | 10 | stat | Status | 1=Dead, 0=Alive | + +-------+------------+----------------------------------------------+-------------------------------------------+ + | 11 | rcens | Relapse | 1=Yes, 0=No | + +-------+------------+----------------------------------------------+-------------------------------------------+ + | 12 | agvh | Acute GVHD | 1=Yes, 0=No | + +-------+------------+----------------------------------------------+-------------------------------------------+ + | 13 | cgvh | Chronic GVHD | 1=Yes, 0=No | + +-------+------------+----------------------------------------------+-------------------------------------------+ + | 14 | patient ID | | | + +-------+------------+----------------------------------------------+-------------------------------------------+ - 1. dx: Diagnosis: AML=acute myeloid leukaemia, CML=chronic myeloid leukaemia, MDS=myelodysplastic syndrome - 2. tx: Randomized treatment: BM=cell harvested from the bone marrow, PB=cell harvested from the peripheral blood - 3. extent: Extent of disease: L=limited, E=extensive - 5. age: Age (years) - - The rest of the columns correspond to outcome variables::: - - 6. survtime: Time from date of transplant to death or last follow-up (Years) - 7. reltime: Time from date of transplant to relapse or last follow-up (Years) - 8. agvhtime: Time from date of transplant to acute GVHD or last follow-up (Years) - 9. cgvhtime: Time from date of transplant to chronic GVHD or last follow-up (Years) - 10. stat Status: 1=Dead, 0=Alive - 11. rcens Relapse: 1=Yes, 0=No - 4. agvhdgd: Grade of acute GVHD - 12. agvh: Acute GVHD: 1=Yes, 0=No - 13. cgvh: Chronic GVHD: 1=Yes, 0=No - - The last column (14) is a patient ID. Columns 6,7 and 9 contain the time to death, relapse and CGVHD calculated in years (survtime, reltime, cgvhtime) and the @@ -518,7 +539,7 @@ def load_cgvhd(): censoring variable cens is coded as 0 when no events were observed, 1 if CGVHD was observed as first event, 2 if a relapse was observed as the first event and 3 if death occurred before - either of the events: The endpoint (status) are therefore: + either of the events: The endpoint (status) are therefore:: 0. Survival (Right-censored data). 4 patients (4%) 1. Chronic graft versus host disease (CGVHD). 86 events (86%) @@ -541,8 +562,7 @@ def load_cgvhd(): ---------- .. [1] Melania Pintilie: "Competing Risks: A Practical Perspective". John Wiley & Sons, 2006 - .. [2] https://drive.google.com/file/d/1FPM264pE\_-F8DB7lvFeLB1yQ3HDo7c-i/view - https://sites.google.com/view/melaniapintiliemscstatistics/home/statistics + .. [2] https://sites.google.com/view/melaniapintiliemscstatistics/home/statistics """ full_path = _get_data_path("cgvhd.arff") data = loadarff(full_path) diff --git a/sksurv/nonparametric.py b/sksurv/nonparametric.py index 4eb3e0f5..c5693e05 100644 --- a/sksurv/nonparametric.py +++ b/sksurv/nonparametric.py @@ -604,15 +604,15 @@ def predict_ipcw(self, y): def _cr_ci_logmlog(cum_inc, sigma_t, z): """Compute the pointwise log-minus-log transformed confidence intervals""" eps = np.finfo(cum_inc.dtype).eps + zero_count = cum_inc <= eps log_cum_i = np.zeros_like(cum_inc) - np.log(cum_inc, where=cum_inc > eps, out=log_cum_i) + np.log(cum_inc, where=~zero_count, out=log_cum_i) theta = np.zeros_like(cum_inc) den = cum_inc * log_cum_i - np.divide(sigma_t, den, where=den < -eps, out=theta) + np.divide(sigma_t, den, where=~zero_count, out=theta) theta = z * np.multiply.outer(np.array([-1, 1]), theta) ci = np.exp(log_cum_i * np.exp(theta)) - ci[:, cum_inc <= eps] = 0.0 - ci[:, 1.0 - cum_inc <= eps] = 1.0 + ci[:, zero_count] = 0.0 return ci @@ -635,7 +635,7 @@ def cumulative_incidence_competing_risks( time_min=None, conf_level=0.95, conf_type=None, - var_type=None, + var_type="Dinse", ): """Non-parametric estimator of Cumulative Incidence function in the case of competing risks. @@ -665,8 +665,9 @@ def cumulative_incidence_competing_risks( If "log-log", estimate confidence intervals using the log hazard or :math:`log(-log(S(t)))`. - var_type : None or one of {'Dinse', 'Dinse_Approx', 'Aalen'}, optional, default: None + var_type : None or one of {'Dinse', 'Dinse_Approx', 'Aalen'}, optional, default: 'Dinse' The method for estimating the variance of the estimator. + See [2]_, [3]_ and [4]_ for each of the methods. Only valid if conf_type is valid. Returns @@ -708,6 +709,11 @@ def cumulative_incidence_competing_risks( ---------- .. [1] Kalbfleisch, J.D. and Prentice, R.L. (2002) The Statistical Analysis of Failure Time Data. 2nd Edition, John Wiley and Sons, New York. + .. [2] Dinse and Larson, Biometrika (1986), 379. Sect. 4, Eqs. 4 and 5. + .. [3] Dinse and Larson, Biometrika (1986), 379. Sect. 4, Eq. 6. + .. [4] Aalen, O. (1978a). Annals of Statistics, 6, 534–545. + We implement the formula in M. Pintilie: "Competing Risks: A Practical Perspective". + John Wiley & Sons, 2006, Eq. 4.5 """ event, time_exit = check_y_survival(event, time_exit, allow_all_censored=True, competing_risks=True) check_consistent_length(event, time_exit) @@ -746,7 +752,7 @@ def cumulative_incidence_competing_risks( elif var_type == "Aalen": var = _var_aalen(n_events_cr, kpe_prime, n_at_risk, cum_inc) else: - raise ValueError(f"{var_type=} not implemented.") + raise ValueError(f"{var_type=} must be one of 'Dinse', 'Dinse_approx' or 'Aalen'.") _x, _y, conf_int_km = kaplan_meier_estimator(event > 0, time_exit, conf_type="log-log") ci = np.empty(shape=(2, n_risks + 1, n_t), dtype=conf_int_km.dtype) @@ -768,9 +774,9 @@ def _var_dinse_approx(n_events_cr, kpe_prime, n_at_risk, cum_inc): irt = cum_inc[1:, :, np.newaxis] - cum_inc[1:, np.newaxis, :] mask = np.tril(np.ones_like(irt[0])) - var_a = np.einsum("rjk,jk,k->rj", irt**2, mask, dr / (n_at_risk * (n_at_risk - dr))) + var_a = np.sum(irt**2 * mask * (dr / (n_at_risk * (n_at_risk - dr))), axis=2) var_b = np.cumsum(((n_at_risk - dr_cr) / n_at_risk) * (dr_cr / n_at_risk**2) * kpe_prime**2, axis=1) - var_c = -2 * np.einsum("rjk,jk,rk,k->rj", irt, mask, dr_cr, kpe_prime / n_at_risk**2) + var_c = -2 * np.sum(irt * mask * dr_cr[:, np.newaxis, :] * (kpe_prime / n_at_risk**2), axis=2) var = var_a + var_b + var_c return var @@ -821,7 +827,7 @@ def _var_aalen(n_events_cr, kpe_prime, n_at_risk, cum_inc): _va = np.zeros_like(kpe_prime) den_a = (n_at_risk - 1) * (n_at_risk - dr) np.divide(dr, den_a, out=_va, where=den_a > 0) - var_a = np.einsum("rjk,jk,k->rj", irt**2, mask, _va) + var_a = np.sum(irt**2 * mask * _va, axis=2) _vb = np.zeros_like(kpe_prime) den_b = (n_at_risk - 1) * n_at_risk**2 @@ -832,7 +838,7 @@ def _var_aalen(n_events_cr, kpe_prime, n_at_risk, cum_inc): _vcb = np.zeros_like(kpe_prime) den_c = n_at_risk * (n_at_risk - dr) * (n_at_risk - 1) np.divide(kpe_prime, den_c, out=_vcb, where=den_c > 0) - var_c = -2 * np.einsum("rjk,jk,rk,k->rj", irt, mask, _vca, _vcb) + var_c = -2 * np.sum(irt * mask * _vca[:, np.newaxis, :] * _vcb, axis=2) var = var_a + var_b + var_c return var diff --git a/tests/test_nonparametric.py b/tests/test_nonparametric.py index 82927dc1..4b661c0a 100644 --- a/tests/test_nonparametric.py +++ b/tests/test_nonparametric.py @@ -7657,6 +7657,6 @@ def test_invalid_conf_type_competing_risks(event, time, true_x, true_y, conf_typ @pytest.mark.parametrize("event, time, true_x, true_y", SimpleDataBMTCases().get_cases()) @pytest.mark.parametrize("var_type", ["None", "dinse", 1, "", "not"]) def test_invalid_var_type_competing_risks(event, time, true_x, true_y, var_type): - msg = f"{var_type=} not implemented." + msg = f"{var_type=} must be one of 'Dinse', 'Dinse_approx' or 'Aalen'." with pytest.raises(ValueError, match=msg): cumulative_incidence_competing_risks(event, time, conf_level=0.95, conf_type="log-log", var_type=var_type) From f61777632366be03cfedafd2e8ca5e2270e568c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20P=C3=B6lsterl?= Date: Tue, 7 Jan 2025 20:03:38 +0000 Subject: [PATCH 08/11] Improve API doc of load_cgvhd() --- sksurv/datasets/base.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/sksurv/datasets/base.py b/sksurv/datasets/base.py index 9e386fc3..bdd98da9 100644 --- a/sksurv/datasets/base.py +++ b/sksurv/datasets/base.py @@ -527,10 +527,9 @@ def load_cgvhd(): +-------+------------+----------------------------------------------+-------------------------------------------+ | 13 | cgvh | Chronic GVHD | 1=Yes, 0=No | +-------+------------+----------------------------------------------+-------------------------------------------+ - | 14 | patient ID | | | + | 14 | stnum | patient ID | | +-------+------------+----------------------------------------------+-------------------------------------------+ - Columns 6,7 and 9 contain the time to death, relapse and CGVHD calculated in years (survtime, reltime, cgvhtime) and the respective indicator variables are in columns 10,11 and 13 (stat, @@ -539,12 +538,19 @@ def load_cgvhd(): censoring variable cens is coded as 0 when no events were observed, 1 if CGVHD was observed as first event, 2 if a relapse was observed as the first event and 3 if death occurred before - either of the events: The endpoint (status) are therefore:: - - 0. Survival (Right-censored data). 4 patients (4%) - 1. Chronic graft versus host disease (CGVHD). 86 events (86%) - 2. Relapse (TRM). 5 events (5%) - 3. Death. 5 events (5%) + either of the events: The endpoint (status) is therefore defined as + + +-------+-------------------------------------------+-----------------+ + | Value | Description | Count (%) | + +=======+===========================================+=================+ + | 0 | Survival (Right-censored data) | 4 patients (4%) | + +-------+-------------------------------------------+-----------------+ + | 1 | Chronic graft versus host disease (CGVHD) | 86 events (86%) | + +-------+-------------------------------------------+-----------------+ + | 2 | Relapse (TRM) | 5 events (5%) | + +-------+-------------------------------------------+-----------------+ + | 3 | Death | 5 events (5%) | + +-------+-------------------------------------------+-----------------+ See [1]_ for further description and [2]_ for the dataset. @@ -554,7 +560,7 @@ def load_cgvhd(): The measurements for each patient. y : structured array with 2 fields - *status*: Integer indicating the endpoint: 0-(right censored data), 1-(GCVHD), 2-(relapse), 3-(death) + *status*: Integer indicating the endpoint: 0: right censored data; 1: GCVHD; 2: relapse; 3: death. *ftime*: total length of follow-up or time of event. From 6d389db6261dd6e74ae5d814fa9bd7567bc1e384 Mon Sep 17 00:00:00 2001 From: valverde Date: Thu, 9 Jan 2025 14:50:46 +0900 Subject: [PATCH 09/11] Better documentation of the CGVHD dataset. Reverting to use einsum. Other couple of minor changes. --- sksurv/datasets/base.py | 7 +++---- sksurv/nonparametric.py | 28 ++++++++++++++++------------ 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/sksurv/datasets/base.py b/sksurv/datasets/base.py index bdd98da9..e590877a 100644 --- a/sksurv/datasets/base.py +++ b/sksurv/datasets/base.py @@ -489,14 +489,13 @@ def load_cgvhd(): initiated for patients with a myeloid malignancy who were to undergo an allogeneic bone marrow transplant. - The dataset has 100 samples. + The available dataset [1]_ is a 100 size subsample of the full data set. See [2]_ for further details. +-------+------------+----------------------------------------------+-------------------------------------------+ | Index | Name | Description | Encoding | +=======+============+==============================================+===========================================+ | 1 | dx | Diagnosis | | AML=acute myeloid leukaemia | | | | | | CML=chronic myeloid leukaemia | - | | | | | MDS=myelodysplastic syndrome | +-------+------------+----------------------------------------------+-------------------------------------------+ | 2 | tx | Randomized treatment | | BM=cell harvested from the bone marrow | | | | | | PB=cell harvested from peripheral blood | @@ -566,9 +565,9 @@ def load_cgvhd(): References ---------- - .. [1] Melania Pintilie: "Competing Risks: A Practical Perspective". John Wiley & Sons, 2006 + .. [1] https://sites.google.com/view/melaniapintiliemscstatistics/home/statistics - .. [2] https://sites.google.com/view/melaniapintiliemscstatistics/home/statistics + .. [2] Melania Pintilie: "Competing Risks: A Practical Perspective". John Wiley & Sons, 2006 """ full_path = _get_data_path("cgvhd.arff") data = loadarff(full_path) diff --git a/sksurv/nonparametric.py b/sksurv/nonparametric.py index c5693e05..eb2a164d 100644 --- a/sksurv/nonparametric.py +++ b/sksurv/nonparametric.py @@ -604,15 +604,15 @@ def predict_ipcw(self, y): def _cr_ci_logmlog(cum_inc, sigma_t, z): """Compute the pointwise log-minus-log transformed confidence intervals""" eps = np.finfo(cum_inc.dtype).eps - zero_count = cum_inc <= eps + non_zero_count = cum_inc > eps log_cum_i = np.zeros_like(cum_inc) - np.log(cum_inc, where=~zero_count, out=log_cum_i) + np.log(cum_inc, where=non_zero_count, out=log_cum_i) theta = np.zeros_like(cum_inc) den = cum_inc * log_cum_i - np.divide(sigma_t, den, where=~zero_count, out=theta) + np.divide(sigma_t, den, where=non_zero_count, out=theta) theta = z * np.multiply.outer(np.array([-1, 1]), theta) ci = np.exp(log_cum_i * np.exp(theta)) - ci[:, zero_count] = 0.0 + ci[:, ~non_zero_count] = 0.0 return ci @@ -774,9 +774,11 @@ def _var_dinse_approx(n_events_cr, kpe_prime, n_at_risk, cum_inc): irt = cum_inc[1:, :, np.newaxis] - cum_inc[1:, np.newaxis, :] mask = np.tril(np.ones_like(irt[0])) - var_a = np.sum(irt**2 * mask * (dr / (n_at_risk * (n_at_risk - dr))), axis=2) + # var_a = np.sum(irt**2 * mask * (dr / (n_at_risk * (n_at_risk - dr))), axis=2) + var_a = np.einsum("rjk,jk,k->rj", irt**2, mask, dr / (n_at_risk * (n_at_risk - dr))) var_b = np.cumsum(((n_at_risk - dr_cr) / n_at_risk) * (dr_cr / n_at_risk**2) * kpe_prime**2, axis=1) - var_c = -2 * np.sum(irt * mask * dr_cr[:, np.newaxis, :] * (kpe_prime / n_at_risk**2), axis=2) + # var_c = -2 * np.sum(irt * mask * dr_cr[:, np.newaxis, :] * (kpe_prime / n_at_risk**2), axis=2) + var_c = -2 * np.einsum("rjk,jk,rk,k->rj", irt, mask, dr_cr, kpe_prime / n_at_risk**2) var = var_a + var_b + var_c return var @@ -793,10 +795,10 @@ def _var_dinse(n_events_cr, kpe_prime, n_at_risk): x = dr / (n_at_risk * (n_at_risk - dr)) cprod = np.cumprod(1 + x) / (1 + x) - n_t = dr.size - i_idx = np.arange(n_t)[:, None, None] - j_idx = np.arange(n_t)[None, :, None] - k_idx = np.arange(n_t)[None, None, :] + nt_range = np.arange(dr.size) + i_idx = nt_range[:, None, None] + j_idx = nt_range[None, :, None] + k_idx = nt_range[None, None, :] mask = ((j_idx < i_idx) & (k_idx > j_idx) & (k_idx <= i_idx)).astype(int) _v1 = np.zeros_like(theta) @@ -827,7 +829,8 @@ def _var_aalen(n_events_cr, kpe_prime, n_at_risk, cum_inc): _va = np.zeros_like(kpe_prime) den_a = (n_at_risk - 1) * (n_at_risk - dr) np.divide(dr, den_a, out=_va, where=den_a > 0) - var_a = np.sum(irt**2 * mask * _va, axis=2) + # var_a = np.sum(irt**2 * mask * _va, axis=2) + var_a = np.einsum("rjk,jk,k->rj", irt**2, mask, _va) _vb = np.zeros_like(kpe_prime) den_b = (n_at_risk - 1) * n_at_risk**2 @@ -838,7 +841,8 @@ def _var_aalen(n_events_cr, kpe_prime, n_at_risk, cum_inc): _vcb = np.zeros_like(kpe_prime) den_c = n_at_risk * (n_at_risk - dr) * (n_at_risk - 1) np.divide(kpe_prime, den_c, out=_vcb, where=den_c > 0) - var_c = -2 * np.sum(irt * mask * _vca[:, np.newaxis, :] * _vcb, axis=2) + # var_c = -2 * np.sum(irt * mask * _vca[:, np.newaxis, :] * _vcb, axis=2) + var_c = -2 * np.einsum("rjk,jk,rk,k->rj", irt, mask, _vca, _vcb) var = var_a + var_b + var_c return var From 32f4bc076e27e9f039a1f3ea70fe8d7bf5b9118e Mon Sep 17 00:00:00 2001 From: valverde Date: Thu, 9 Jan 2025 15:34:14 +0900 Subject: [PATCH 10/11] Refactor the calculation of confidence intervals. --- sksurv/nonparametric.py | 43 +++++++++++++++-------------------------- 1 file changed, 16 insertions(+), 27 deletions(-) diff --git a/sksurv/nonparametric.py b/sksurv/nonparametric.py index eb2a164d..52eab0cb 100644 --- a/sksurv/nonparametric.py +++ b/sksurv/nonparametric.py @@ -188,17 +188,20 @@ def _compute_counts_truncated(event, time_enter, time_exit): return uniq_times, event_counts, total_counts -def _ci_logmlog(prob_survival, sigma_t, z): - """Compute the pointwise log-minus-log transformed confidence intervals""" - eps = np.finfo(prob_survival.dtype).eps - log_p = np.zeros_like(prob_survival) - np.log(prob_survival, where=prob_survival > eps, out=log_p) - theta = np.zeros_like(prob_survival) +def _ci_logmlog(s, sigma_t, z): + """Compute the pointwise log-minus-log transformed confidence intervals. + s refers to the prob_survival or the cum_inc (for the competing risks case). + sigma_t is the square root of the estimator of the log of the variance of s. + """ + eps = np.finfo(s.dtype).eps + mask = s > eps + log_p = np.zeros_like(s) + np.log(s, where=mask, out=log_p) + theta = np.zeros_like(s) np.true_divide(sigma_t, log_p, where=log_p < -eps, out=theta) - theta = np.array([[-1], [1]]) * theta * z + theta = z * np.multiply.outer([-1, 1], theta) ci = np.exp(np.exp(theta) * log_p) - ci[:, prob_survival <= eps] = 0.0 - ci[:, 1.0 - prob_survival <= eps] = 1.0 + ci[:, ~mask] = 0.0 return ci @@ -601,31 +604,17 @@ def predict_ipcw(self, y): return weights -def _cr_ci_logmlog(cum_inc, sigma_t, z): - """Compute the pointwise log-minus-log transformed confidence intervals""" - eps = np.finfo(cum_inc.dtype).eps - non_zero_count = cum_inc > eps - log_cum_i = np.zeros_like(cum_inc) - np.log(cum_inc, where=non_zero_count, out=log_cum_i) - theta = np.zeros_like(cum_inc) - den = cum_inc * log_cum_i - np.divide(sigma_t, den, where=non_zero_count, out=theta) - theta = z * np.multiply.outer(np.array([-1, 1]), theta) - ci = np.exp(log_cum_i * np.exp(theta)) - ci[:, ~non_zero_count] = 0.0 - return ci - - def _cum_inc_cr_ci_estimator(cum_inc, var, conf_level, conf_type): if conf_type not in {"log-log"}: raise ValueError(f"conf_type must be None or a str among {{'log-log'}}, but was {conf_type!r}") if not isinstance(conf_level, numbers.Real) or not np.isfinite(conf_level) or conf_level <= 0 or conf_level >= 1.0: raise ValueError(f"conf_level must be a float in the range (0.0, 1.0), but was {conf_level!r}") - + eps = np.finfo(var.dtype).eps z = stats.norm.isf((1.0 - conf_level) / 2.0) - sigma = np.sqrt(var) - ci = _cr_ci_logmlog(cum_inc[1:], sigma, z) + sigma = np.zeros_like(var) + np.divide(np.sqrt(var), cum_inc[1:], where=var > eps, out=sigma) + ci = _ci_logmlog(cum_inc[1:], sigma, z) return ci From 28d765225ea657f7e183343d3c128f56a971a401 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20P=C3=B6lsterl?= Date: Sun, 12 Jan 2025 20:43:19 +0000 Subject: [PATCH 11/11] Fix API doc of _ci_logmlog --- sksurv/nonparametric.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sksurv/nonparametric.py b/sksurv/nonparametric.py index 52eab0cb..5d0d1f81 100644 --- a/sksurv/nonparametric.py +++ b/sksurv/nonparametric.py @@ -189,9 +189,13 @@ def _compute_counts_truncated(event, time_enter, time_exit): def _ci_logmlog(s, sigma_t, z): - """Compute the pointwise log-minus-log transformed confidence intervals. + r"""Compute the pointwise log-minus-log transformed confidence intervals. s refers to the prob_survival or the cum_inc (for the competing risks case). - sigma_t is the square root of the estimator of the log of the variance of s. + sigma_t is the square root of the variance of the log of the estimator of s. + + .. math:: + + \sigma_t = \mathrm{Var}(\log(\hat{S}(t))) """ eps = np.finfo(s.dtype).eps mask = s > eps