diff --git a/doc/api/datasets.rst b/doc/api/datasets.rst index 627c7a45..67c93eee 100644 --- a/doc/api/datasets.rst +++ b/doc/api/datasets.rst @@ -9,6 +9,7 @@ Datasets load_aids load_arff_files_standardized load_bmt + load_cgvhd load_breast_cancer load_flchain load_gbsg2 diff --git a/sksurv/datasets/__init__.py b/sksurv/datasets/__init__.py index d00f17e2..51649e96 100644 --- a/sksurv/datasets/__init__.py +++ b/sksurv/datasets/__init__.py @@ -4,6 +4,7 @@ load_arff_files_standardized, # noqa: F401 load_bmt, # noqa: F401 load_breast_cancer, # noqa: F401 + load_cgvhd, # noqa: F401 load_flchain, # noqa: F401 load_gbsg2, # noqa: F401 load_veterans_lung_cancer, # noqa: F401 diff --git a/sksurv/datasets/base.py b/sksurv/datasets/base.py index 10e9bd57..e590877a 100644 --- a/sksurv/datasets/base.py +++ b/sksurv/datasets/base.py @@ -13,6 +13,7 @@ "load_arff_files_standardized", "load_aids", "load_bmt", + "load_cgvhd", "load_breast_cancer", "load_flchain", "load_gbsg2", @@ -474,9 +475,108 @@ def load_bmt(): .. [1] https://doi.org/10.1038/sj.bmt.1705727 Scrucca, L., Santucci, A. & Aversa, F.: "Competing risk analysis using R: an easy guide for clinicians. Bone Marrow Transplant 40, 381–387 (2007)" + .. [2] https://luca-scr.github.io/R/bmt.csv """ full_path = _get_data_path("bmt.arff") data = loadarff(full_path) data["ftime"] = data["ftime"].astype(int) return get_x_y(data, attr_labels=["status", "ftime"], competing_risks=True) + + +def load_cgvhd(): + r"""Load and return data from multicentre randomized clinical trial + initiated for patients with a myeloid malignancy who were to + undergo an allogeneic bone marrow transplant. + + The available dataset [1]_ is a 100 size subsample of the full data set. See [2]_ for further details. + + +-------+------------+----------------------------------------------+-------------------------------------------+ + | Index | Name | Description | Encoding | + +=======+============+==============================================+===========================================+ + | 1 | dx | Diagnosis | | AML=acute myeloid leukaemia | + | | | | | CML=chronic myeloid leukaemia | + +-------+------------+----------------------------------------------+-------------------------------------------+ + | 2 | tx | Randomized treatment | | BM=cell harvested from the bone marrow | + | | | | | PB=cell harvested from peripheral blood | + +-------+------------+----------------------------------------------+-------------------------------------------+ + | 3 | extent | Extent of disease | L=limited, E=extensive | + +-------+------------+----------------------------------------------+-------------------------------------------+ + | 4 | agvhdgd | Grade of acute GVHD | | + +-------+------------+----------------------------------------------+-------------------------------------------+ + | 5 | age | Age | Years | + +-------+------------+----------------------------------------------+-------------------------------------------+ + | 6 | survtime | Time from date of transplant to death | Years | + | | | or last follow-up | | + +-------+------------+----------------------------------------------+-------------------------------------------+ + | 7 | reltime | Time from date of transplant to relapse | Years | + | | | or last follow-up | | + +-------+------------+----------------------------------------------+-------------------------------------------+ + | 8 | agvhtime | Time from date of transplant to acute GVHD | Years | + | | | or last follow-up | | + +-------+------------+----------------------------------------------+-------------------------------------------+ + | 9 | cgvhtime | Time from date of transplant to chronic GVHD | Years | + | | | or last follow-up | | + +-------+------------+----------------------------------------------+-------------------------------------------+ + | 10 | stat | Status | 1=Dead, 0=Alive | + +-------+------------+----------------------------------------------+-------------------------------------------+ + | 11 | rcens | Relapse | 1=Yes, 0=No | + +-------+------------+----------------------------------------------+-------------------------------------------+ + | 12 | agvh | Acute GVHD | 1=Yes, 0=No | + +-------+------------+----------------------------------------------+-------------------------------------------+ + | 13 | cgvh | Chronic GVHD | 1=Yes, 0=No | + +-------+------------+----------------------------------------------+-------------------------------------------+ + | 14 | stnum | patient ID | | + +-------+------------+----------------------------------------------+-------------------------------------------+ + + Columns 6,7 and 9 contain the time to death, relapse and CGVHD + calculated in years (survtime, reltime, cgvhtime) and the + respective indicator variables are in columns 10,11 and 13 (stat, + rcens, cgvh). The earliest time that any of these events happened + is calculated by taking the minimum of the observed times. The + censoring variable cens is coded as 0 when no events were + observed, 1 if CGVHD was observed as first event, 2 if a relapse + was observed as the first event and 3 if death occurred before + either of the events: The endpoint (status) is therefore defined as + + +-------+-------------------------------------------+-----------------+ + | Value | Description | Count (%) | + +=======+===========================================+=================+ + | 0 | Survival (Right-censored data) | 4 patients (4%) | + +-------+-------------------------------------------+-----------------+ + | 1 | Chronic graft versus host disease (CGVHD) | 86 events (86%) | + +-------+-------------------------------------------+-----------------+ + | 2 | Relapse (TRM) | 5 events (5%) | + +-------+-------------------------------------------+-----------------+ + | 3 | Death | 5 events (5%) | + +-------+-------------------------------------------+-----------------+ + + See [1]_ for further description and [2]_ for the dataset. + + Returns + ------- + x : pandas.DataFrame + The measurements for each patient. + + y : structured array with 2 fields + *status*: Integer indicating the endpoint: 0: right censored data; 1: GCVHD; 2: relapse; 3: death. + + *ftime*: total length of follow-up or time of event. + + References + ---------- + .. [1] https://sites.google.com/view/melaniapintiliemscstatistics/home/statistics + + .. [2] Melania Pintilie: "Competing Risks: A Practical Perspective". John Wiley & Sons, 2006 + """ + full_path = _get_data_path("cgvhd.arff") + data = loadarff(full_path) + data["ftime"] = data[["survtime", "reltime", "cgvhtime"]].min(axis=1) + data["status"] = ( + ((data["ftime"] == data["cgvhtime"]) & (data["cgvh"] == "1")).astype(int) + + 2 * ((data["ftime"] == data["reltime"]) & (data["rcens"] == "1")).astype(int) + + 3 * ((data["ftime"] == data["survtime"]) & (data["stat"] == "1")).astype(int) + ) + data = data[["ftime", "status", "dx", "tx", "extent", "age"]] + + return get_x_y(data, attr_labels=["status", "ftime"], competing_risks=True) diff --git a/sksurv/datasets/data/README.md b/sksurv/datasets/data/README.md index 567d89a5..5f6d2db2 100644 --- a/sksurv/datasets/data/README.md +++ b/sksurv/datasets/data/README.md @@ -12,6 +12,7 @@ for survival analysis. | veteran | [Veteran's Lung Cancer][Kalbfleisch2008] | 137 | 6 | 128 (93.4%) | Death | | whas500 | [Worcester Heart Attack Study][Hosmer2008] | 500 | 14 | 215 (43.0%) | Death | | BMT | [Leukemia HSC Transplant][Scrucca2007] | 35 | 1 | 24 (68.6%) | Transplant related death or relapse | +| GCVHD | [GCVHD][Pintilie2006] | 100 | 4 | 96 (96%) | Chronic graft disease (GCVHD), relapse or death | [Desmedt2007]: http://dx.doi.org/10.1158/1078-0432.CCR-06-2765 "Desmedt, C., Piette, F., Loi et al.: Strong Time Dependence of the 76-Gene Prognostic Signature for Node-Negative Breast Cancer Patients in the TRANSBIG Multicenter Independent Validation Series. Clin. Cancer Res. 13(11), 3207–14 (2007)" @@ -24,3 +25,5 @@ for survival analysis. [Schumacher1994]: http://ascopubs.org/doi/abs/10.1200/jco.1994.12.10.2086 "Schumacher, M., Basert, G., Bojar, H., et al. Randomized 2 × 2 trial evaluating hormonal treatment and the duration of chemotherapy in node-positive breast cancer patients. Journal of Clinical Oncology 12, 2086–2093. (1994)" [Scrucca2007]: https://doi.org/10.1038/sj.bmt.1705727 "Scrucca, L., Santucci, A. & Aversa, F. Competing risk analysis using R: an easy guide for clinicians. Bone Marrow Transplant 40, 381–387 (2007)" + +[Pintilie2006]: https://www.wiley.com/en-us/Competing+Risks%3A+A+Practical+Perspective-p-9780470870693 "Melania Pintilie: Competing Risks: A Practical Perspective. John Wiley & Sons, (2006)" diff --git a/sksurv/datasets/data/cgvhd.arff b/sksurv/datasets/data/cgvhd.arff new file mode 100644 index 00000000..634d06ab --- /dev/null +++ b/sksurv/datasets/data/cgvhd.arff @@ -0,0 +1,118 @@ +@RELATION CGVHD + +@ATTRIBUTE dx {CML, AML} +@ATTRIBUTE tx {PB, BM} +@ATTRIBUTE extent {L, E} +@ATTRIBUTE agvhdgd NUMERIC +@ATTRIBUTE age NUMERIC +@ATTRIBUTE survtime NUMERIC +@ATTRIBUTE reltime NUMERIC +@ATTRIBUTE agvhtime NUMERIC +@ATTRIBUTE cgvhtime NUMERIC +@ATTRIBUTE stat {0, 1} +@ATTRIBUTE rcens {0, 1} +@ATTRIBUTE agvh {1, 0} +@ATTRIBUTE cgvh {1, 0} +@ATTRIBUTE stnum NUMERIC + +@DATA +CML,PB,L,1,36,4.895,4.895,0.099,0.52,0,0,1,1,1 +AML,PB,L,3,57,3.474,0.753,0.101,0.408,1,1,1,1,2 +CML,PB,L,0,48,4.95,4.95,4.95,0.348,0,0,0,1,3 +AML,PB,L,2,52,4.643,4.643,0.057,0.482,0,0,1,1,4 +AML,PB,L,3,45,4.066,4.066,0.137,0.378,0,0,1,1,5 +AML,PB,L,3,47,1.558,0.416,0.055,1.558,1,1,1,0,6 +CML,PB,L,1,40,4.512,4.512,0.09,0.381,0,0,1,1,7 +AML,PB,L,3,38,4.041,4.041,0.082,0.914,0,0,1,1,8 +AML,PB,L,2,41,4.164,4.164,0.055,0.923,0,0,1,1,9 +CML,PB,L,0,50,4.011,4.011,4.011,0.397,0,0,0,1,10 +CML,PB,L,1,56,3.945,3.945,0.047,0.479,0,0,1,1,11 +CML,PB,L,2,56,4.361,4.361,0.079,0.991,0,0,1,1,12 +AML,PB,L,1,54,0.841,0.654,0.077,0.474,1,1,1,1,13 +CML,PB,L,3,25,2.951,2.951,0.164,0.339,0,0,1,1,14 +CML,PB,L,4,40,0.586,0.586,0.055,0.277,1,0,1,1,15 +CML,PB,L,0,41,3.559,3.559,3.559,0.367,0,0,0,1,16 +CML,PB,L,2,57,3.422,3.422,0.131,0.742,0,0,1,1,17 +CML,PB,L,3,62,0.408,0.408,0.408,0.408,0,0,1,1,18 +CML,PB,L,1,29,3.428,3.428,0.09,0.958,0,0,1,1,19 +AML,PB,L,1,44,0.063,0.063,0.014,0.063,1,0,1,0,20 +CML,PB,L,2,40,1.572,1.572,0.09,0.282,1,0,1,1,21 +CML,PB,L,1,54,1.013,1.013,0.093,0.413,1,0,1,1,22 +AML,PB,L,2,37,3.023,3.023,0.074,0.394,0,0,1,1,23 +AML,PB,L,1,58,2.979,2.979,0.079,0.342,0,0,1,1,24 +CML,PB,L,3,39,2.817,2.817,0.049,0.367,0,0,1,1,25 +CML,PB,L,2,31,2.804,2.804,0.137,0.277,0,0,1,1,26 +CML,PB,L,2,45,2.609,2.609,0.252,0.367,0,0,1,1,27 +AML,PB,L,0,48,2.508,2.508,2.508,0.331,0,0,0,1,28 +CML,PB,L,0,53,0.665,0.665,0.665,0.32,1,0,0,1,29 +CML,PB,L,0,29,2.497,2.497,2.497,0.329,0,0,0,1,30 +CML,PB,L,0,27,1.799,1.799,1.799,0.444,1,0,0,1,31 +AML,PB,L,3,45,0.471,0.438,0.071,0.471,1,1,1,0,32 +CML,PB,L,1,39,2.031,2.031,0.112,0.964,0,0,1,1,33 +CML,PB,L,3,49,2.073,2.073,0.063,0.564,0,0,1,1,34 +AML,PB,L,1,37,0.999,0.75,0.274,0.402,1,1,1,1,35 +AML,PB,L,3,53,0.427,0.427,0.055,0.277,1,0,1,1,36 +CML,PB,L,1,48,1.766,1.766,0.216,0.4,0,0,1,1,37 +AML,PB,L,1,59,1.555,1.555,0.178,0.446,0,0,1,1,38 +CML,PB,L,2,33,1.67,1.67,0.11,0.474,0,0,1,1,39 +CML,PB,L,0,38,1.607,1.607,1.607,0.329,0,0,0,1,40 +CML,PB,L,4,37,1.511,1.511,0.055,0.323,0,0,1,1,41 +AML,PB,L,3,41,1.287,1.287,0.049,0.392,0,0,1,1,42 +AML,PB,E,1,64,1.227,1.227,0.23,0.496,0,0,1,1,43 +CML,PB,L,3,32,1.3,1.3,0.063,0.63,0,0,1,1,44 +CML,PB,L,0,41,1.27,1.27,1.27,0.383,0,0,0,1,45 +AML,PB,E,1,56,1.205,1.205,0.074,1.205,0,0,1,0,46 +CML,PB,L,1,50,1.147,1.147,0.131,0.361,0,0,1,1,47 +CML,PB,L,3,37,1.109,1.109,0.055,0.277,0,0,1,1,48 +CML,PB,L,0,27,0.994,0.994,0.994,0.287,0,0,0,1,49 +CML,BM,L,3,45,4.572,4.572,0.066,0.619,0,0,1,1,50 +AML,BM,L,3,45,4.616,4.616,0.101,0.452,0,0,1,1,51 +AML,BM,L,2,42,4.0,4.0,0.027,0.29,0,0,1,1,52 +CML,BM,L,0,22,4.238,4.238,4.238,0.479,0,0,0,1,53 +AML,BM,L,4,47,0.11,0.11,0.074,0.11,1,0,1,0,54 +AML,BM,L,2,48,4.03,4.03,0.101,0.857,0,0,1,1,55 +AML,BM,L,2,49,3.124,2.527,0.115,1.993,1,1,1,1,56 +CML,BM,L,2,38,0.515,0.515,0.079,0.463,1,0,1,1,57 +CML,BM,L,1,39,4.222,3.149,0.085,0.496,0,1,1,1,58 +CML,BM,L,3,41,4.027,4.027,0.104,0.422,0,0,1,1,59 +CML,BM,L,2,46,1.969,1.969,0.038,0.307,1,0,1,1,60 +AML,BM,L,0,24,3.792,3.792,3.792,0.701,0,0,0,1,61 +AML,BM,L,3,32,0.427,0.427,0.041,0.279,1,0,1,1,62 +CML,BM,L,0,36,3.34,3.34,3.34,0.419,0,0,0,1,63 +CML,BM,L,1,53,3.504,0.72,0.112,0.616,0,1,1,1,64 +CML,BM,L,0,52,3.685,3.685,3.685,0.331,0,0,0,1,65 +CML,BM,L,1,59,0.181,0.181,0.049,0.181,1,0,1,0,66 +CML,BM,L,3,42,0.736,0.736,0.09,0.567,1,0,1,1,67 +CML,BM,L,1,65,0.287,0.287,0.052,0.287,1,0,1,0,68 +CML,BM,E,0,60,0.057,0.057,0.057,0.057,0,0,0,0,69 +CML,BM,L,2,61,3.107,3.107,0.088,0.764,0,0,1,1,70 +CML,BM,L,1,55,3.088,3.088,0.11,0.381,0,0,1,1,71 +AML,BM,E,0,48,0.446,0.274,0.446,0.446,1,1,0,0,72 +AML,BM,E,0,49,2.776,2.776,2.776,2.776,0,0,0,0,73 +CML,BM,L,0,36,0.693,0.172,0.693,0.635,1,1,0,1,74 +AML,BM,L,1,48,2.01,2.01,0.077,0.553,0,0,1,1,75 +CML,BM,L,0,47,2.374,2.374,2.374,0.287,0,0,0,1,76 +AML,BM,L,3,43,1.079,1.079,0.088,0.345,1,0,1,1,77 +CML,BM,L,0,56,2.604,2.604,2.604,0.375,0,0,0,1,78 +CML,BM,L,1,56,2.478,2.478,0.17,0.517,0,0,1,1,79 +CML,BM,L,0,36,2.338,2.338,2.338,0.457,0,0,0,1,80 +CML,BM,L,2,52,2.3,2.3,0.049,0.345,0,0,1,1,81 +CML,BM,E,1,44,0.219,0.219,0.145,0.219,1,0,1,0,82 +AML,BM,L,3,32,2.127,2.127,0.118,0.422,0,0,1,1,83 +AML,BM,L,1,44,2.034,2.034,0.096,0.479,0,0,1,1,84 +CML,BM,L,0,45,2.034,2.034,2.034,0.29,0,0,0,1,85 +AML,BM,L,3,48,2.007,2.007,0.088,0.35,0,0,1,1,86 +CML,BM,L,0,48,1.183,1.183,1.183,0.372,0,0,0,1,87 +AML,BM,L,3,42,0.375,0.375,0.096,0.277,1,0,1,1,88 +AML,BM,E,2,24,0.353,0.301,0.096,0.353,1,1,1,0,89 +CML,BM,L,2,26,1.566,1.566,0.137,0.474,0,0,1,1,90 +CML,BM,L,2,34,1.588,1.588,0.129,0.465,0,0,1,1,91 +CML,BM,L,0,57,1.243,1.243,1.243,0.433,0,0,0,1,92 +CML,BM,L,3,51,1.555,1.555,0.09,0.359,0,0,1,1,93 +AML,BM,L,2,54,1.202,1.202,0.192,1.202,0,0,1,0,94 +AML,BM,E,0,20,1.251,1.251,1.251,0.408,0,0,0,1,95 +AML,BM,L,2,39,1.114,1.114,0.074,0.402,0,0,1,1,96 +AML,BM,L,0,49,1.15,1.15,1.15,0.35,0,0,0,1,97 +CML,BM,L,1,42,0.997,0.997,0.142,0.411,0,0,1,1,98 +CML,BM,L,0,44,1.057,1.057,1.057,0.301,0,0,0,1,99 +CML,BM,L,1,56,1.125,1.125,0.129,0.32,0,0,1,1,100 diff --git a/sksurv/nonparametric.py b/sksurv/nonparametric.py index 21e45fad..5d0d1f81 100644 --- a/sksurv/nonparametric.py +++ b/sksurv/nonparametric.py @@ -188,17 +188,24 @@ def _compute_counts_truncated(event, time_enter, time_exit): return uniq_times, event_counts, total_counts -def _ci_logmlog(prob_survival, sigma_t, z): - """Compute the pointwise log-minus-log transformed confidence intervals""" - eps = np.finfo(prob_survival.dtype).eps - log_p = np.zeros_like(prob_survival) - np.log(prob_survival, where=prob_survival > eps, out=log_p) - theta = np.zeros_like(prob_survival) +def _ci_logmlog(s, sigma_t, z): + r"""Compute the pointwise log-minus-log transformed confidence intervals. + s refers to the prob_survival or the cum_inc (for the competing risks case). + sigma_t is the square root of the variance of the log of the estimator of s. + + .. math:: + + \sigma_t = \mathrm{Var}(\log(\hat{S}(t))) + """ + eps = np.finfo(s.dtype).eps + mask = s > eps + log_p = np.zeros_like(s) + np.log(s, where=mask, out=log_p) + theta = np.zeros_like(s) np.true_divide(sigma_t, log_p, where=log_p < -eps, out=theta) - theta = np.array([[-1], [1]]) * theta * z + theta = z * np.multiply.outer([-1, 1], theta) ci = np.exp(np.exp(theta) * log_p) - ci[:, prob_survival <= eps] = 0.0 - ci[:, 1.0 - prob_survival <= eps] = 1.0 + ci[:, ~mask] = 0.0 return ci @@ -601,7 +608,28 @@ def predict_ipcw(self, y): return weights -def cumulative_incidence_competing_risks(event, time_exit, time_min=None): +def _cum_inc_cr_ci_estimator(cum_inc, var, conf_level, conf_type): + if conf_type not in {"log-log"}: + raise ValueError(f"conf_type must be None or a str among {{'log-log'}}, but was {conf_type!r}") + + if not isinstance(conf_level, numbers.Real) or not np.isfinite(conf_level) or conf_level <= 0 or conf_level >= 1.0: + raise ValueError(f"conf_level must be a float in the range (0.0, 1.0), but was {conf_level!r}") + eps = np.finfo(var.dtype).eps + z = stats.norm.isf((1.0 - conf_level) / 2.0) + sigma = np.zeros_like(var) + np.divide(np.sqrt(var), cum_inc[1:], where=var > eps, out=sigma) + ci = _ci_logmlog(cum_inc[1:], sigma, z) + return ci + + +def cumulative_incidence_competing_risks( + event, + time_exit, + time_min=None, + conf_level=0.95, + conf_type=None, + var_type="Dinse", +): """Non-parametric estimator of Cumulative Incidence function in the case of competing risks. See [1]_ for further description. @@ -621,6 +649,20 @@ def cumulative_incidence_competing_risks(event, time_exit, time_min=None): Compute estimator conditional on survival at least up to the specified time. + conf_level : float, optional, default: 0.95 + The level for a two-sided confidence interval on the cumulative incidence curves. + + conf_type : None or {'log-log'}, optional, default: None. + The type of confidence intervals to estimate. + If `None`, no confidence intervals are estimated. + If "log-log", estimate confidence intervals using + the log hazard or :math:`log(-log(S(t)))`. + + var_type : None or one of {'Dinse', 'Dinse_Approx', 'Aalen'}, optional, default: 'Dinse' + The method for estimating the variance of the estimator. + See [2]_, [3]_ and [4]_ for each of the methods. + Only valid if conf_type is valid. + Returns ------- time : array, shape = (n_times,) @@ -631,6 +673,12 @@ def cumulative_incidence_competing_risks(event, time_exit, time_min=None): The first dimension indicates total risk (``cum_incidence[0]``), the dimension `i=1,..,n_risks` the incidence for each competing risk. + conf_int : array, shape = (2, n_risks + 1, n_times) + Pointwise confidence interval of the Kaplan-Meier estimator + at each unique time point (last index) + for all possible risks (second index) including overall risk (i=0). + Only provided if `conf_type` is not None. + Examples -------- Creating cumulative incidence curves: @@ -640,10 +688,12 @@ def cumulative_incidence_competing_risks(event, time_exit, time_min=None): >>> event = bmt_df["status"] >>> time = bmt_df["ftime"] >>> n_risks = event.max() - >>> x, y = cumulative_incidence_competing_risks(event, time) + >>> x, y, conf_int = cumulative_incidence_competing_risks(event, time, conf_type="log-log", var_type="Aalen") >>> plt.step(x, y[0], where="post", label="Total risk") + >>> plt.fill_between(x, conf_int[0,0], conf_int[1,0], alpha=0.25, step="post") >>> for i in range(1, n_risks + 1): >>> plt.step(x, y[i], where="post", label=f"{i}-risk") + >>> plt.fill_between(x, conf_int[0,i], conf_int[1,i], alpha=0.25, step="post") >>> plt.ylim(0, 1) >>> plt.legend() >>> plt.show() @@ -652,6 +702,11 @@ def cumulative_incidence_competing_risks(event, time_exit, time_min=None): ---------- .. [1] Kalbfleisch, J.D. and Prentice, R.L. (2002) The Statistical Analysis of Failure Time Data. 2nd Edition, John Wiley and Sons, New York. + .. [2] Dinse and Larson, Biometrika (1986), 379. Sect. 4, Eqs. 4 and 5. + .. [3] Dinse and Larson, Biometrika (1986), 379. Sect. 4, Eq. 6. + .. [4] Aalen, O. (1978a). Annals of Statistics, 6, 534–545. + We implement the formula in M. Pintilie: "Competing Risks: A Practical Perspective". + John Wiley & Sons, 2006, Eq. 4.5 """ event, time_exit = check_y_survival(event, time_exit, allow_all_censored=True, competing_risks=True) check_consistent_length(event, time_exit) @@ -680,4 +735,107 @@ def cumulative_incidence_competing_risks(event, time_exit, time_min=None): cum_inc[1 : n_risks + 1] = np.cumsum((ratio[:, 1 : n_risks + 1].T * kpe_prime), axis=1) cum_inc[0] = 1.0 - kpe - return uniq_times, cum_inc + if conf_type is None: + return uniq_times, cum_inc + + if var_type == "Dinse": + var = _var_dinse(n_events_cr, kpe_prime, n_at_risk) + elif var_type == "Dinse_Approx": + var = _var_dinse_approx(n_events_cr, kpe_prime, n_at_risk, cum_inc) + elif var_type == "Aalen": + var = _var_aalen(n_events_cr, kpe_prime, n_at_risk, cum_inc) + else: + raise ValueError(f"{var_type=} must be one of 'Dinse', 'Dinse_approx' or 'Aalen'.") + + _x, _y, conf_int_km = kaplan_meier_estimator(event > 0, time_exit, conf_type="log-log") + ci = np.empty(shape=(2, n_risks + 1, n_t), dtype=conf_int_km.dtype) + ci[:, 0, :] = 1 - conf_int_km + ci[:, 1:, :] = _cum_inc_cr_ci_estimator(cum_inc, var, conf_level, conf_type) + + return uniq_times, cum_inc, ci + + +def _var_dinse_approx(n_events_cr, kpe_prime, n_at_risk, cum_inc): + """ + Variance estimator from Dinse and Larson, Biometrika (1986), 379 + See Section 4, Eqs. 6. + This is an approximation from the _var_dinse, so that one should be preferred. + However, this seems to be more common in the literature. + """ + dr = n_events_cr[:, 0] + dr_cr = n_events_cr[:, 1:].T + irt = cum_inc[1:, :, np.newaxis] - cum_inc[1:, np.newaxis, :] + mask = np.tril(np.ones_like(irt[0])) + + # var_a = np.sum(irt**2 * mask * (dr / (n_at_risk * (n_at_risk - dr))), axis=2) + var_a = np.einsum("rjk,jk,k->rj", irt**2, mask, dr / (n_at_risk * (n_at_risk - dr))) + var_b = np.cumsum(((n_at_risk - dr_cr) / n_at_risk) * (dr_cr / n_at_risk**2) * kpe_prime**2, axis=1) + # var_c = -2 * np.sum(irt * mask * dr_cr[:, np.newaxis, :] * (kpe_prime / n_at_risk**2), axis=2) + var_c = -2 * np.einsum("rjk,jk,rk,k->rj", irt, mask, dr_cr, kpe_prime / n_at_risk**2) + + var = var_a + var_b + var_c + return var + + +def _var_dinse(n_events_cr, kpe_prime, n_at_risk): + """ + Variance estimator from Dinse and Larson, Biometrika (1986), 379 + See Section 4, Eqs. 4 and 5 + """ + dr = n_events_cr[:, 0] + dr_cr = n_events_cr[:, 1:].T + theta = dr_cr * kpe_prime / n_at_risk + x = dr / (n_at_risk * (n_at_risk - dr)) + cprod = np.cumprod(1 + x) / (1 + x) + + nt_range = np.arange(dr.size) + i_idx = nt_range[:, None, None] + j_idx = nt_range[None, :, None] + k_idx = nt_range[None, None, :] + mask = ((j_idx < i_idx) & (k_idx > j_idx) & (k_idx <= i_idx)).astype(int) + + _v1 = np.zeros_like(theta) + np.divide((n_at_risk - dr_cr), n_at_risk * dr_cr, out=_v1, where=dr_cr > 0) + v1 = np.cumsum(theta**2 * ((1 + _v1) * cprod - 1), axis=1) + + corr = (1 - 1 / n_at_risk) * cprod - 1 + v2 = 2 * np.einsum("rj,rk,ijk->ri", theta * corr, theta, mask) + var = v1 + v2 + + return var + + +def _var_aalen(n_events_cr, kpe_prime, n_at_risk, cum_inc): + """ + Variance estimator from Aalen + Aalen, O. (1978a). Nonparametric estimation of partial transition + probabilities in multiple decrement models. Annals of Statistics, 6, 534–545. + We implement it as shown in + M. Pintilie: "Competing Risks: A Practical Perspective". John Wiley & Sons, 2006, Eq. 4.5 + This seems to be the estimator used in cmprsk, but there are some numerical differences with our implementation. + """ + dr = n_events_cr[:, 0] + dr_cr = n_events_cr[:, 1:].T + irt = cum_inc[1:, :, np.newaxis] - cum_inc[1:, np.newaxis, :] + mask = np.tril(np.ones_like(irt[0])) + + _va = np.zeros_like(kpe_prime) + den_a = (n_at_risk - 1) * (n_at_risk - dr) + np.divide(dr, den_a, out=_va, where=den_a > 0) + # var_a = np.sum(irt**2 * mask * _va, axis=2) + var_a = np.einsum("rjk,jk,k->rj", irt**2, mask, _va) + + _vb = np.zeros_like(kpe_prime) + den_b = (n_at_risk - 1) * n_at_risk**2 + np.divide(1.0, den_b, out=_vb, where=den_b > 0) + var_b = np.cumsum((n_at_risk - dr_cr) * dr_cr * _vb * kpe_prime**2, axis=1) + + _vca = dr_cr * (n_at_risk - dr_cr) + _vcb = np.zeros_like(kpe_prime) + den_c = n_at_risk * (n_at_risk - dr) * (n_at_risk - 1) + np.divide(kpe_prime, den_c, out=_vcb, where=den_c > 0) + # var_c = -2 * np.sum(irt * mask * _vca[:, np.newaxis, :] * _vcb, axis=2) + var_c = -2 * np.einsum("rjk,jk,rk,k->rj", irt, mask, _vca, _vcb) + + var = var_a + var_b + var_c + return var diff --git a/tests/test_nonparametric.py b/tests/test_nonparametric.py index 7c46d25b..4b661c0a 100644 --- a/tests/test_nonparametric.py +++ b/tests/test_nonparametric.py @@ -1,11 +1,12 @@ from os.path import dirname, join +import re import numpy as np -from numpy.testing import assert_array_almost_equal, assert_array_equal +from numpy.testing import assert_allclose, assert_array_almost_equal, assert_array_equal import pandas as pd import pytest -from sksurv.datasets import load_bmt +from sksurv.datasets import load_bmt, load_cgvhd from sksurv.nonparametric import ( CensoringDistributionEstimator, SurvivalFunctionEstimator, @@ -6326,7 +6327,8 @@ def data_AML(self): return event, time, true_x, true_y - def data_three_competing_cases(self): + @staticmethod + def data_three_competing_cases(): time = np.array( [ 5.3, @@ -6496,6 +6498,1040 @@ def data_three_competing_cases(self): return event, time, true_x, true_y +class CGVHD_DataSets(FixtureParameterFactory): + + _ci_Aalen = np.array( + [ + [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, 8.76630585e-04, np.nan, np.nan, 4.98093393e-02], + [np.nan, np.nan, 3.86470442e-03, np.nan, np.nan, 6.45435439e-02], + [np.nan, 8.76353733e-04, 3.86470442e-03, np.nan, 4.98160434e-02, 6.45435439e-02], + [np.nan, 8.76353733e-04, 8.09297347e-03, np.nan, 4.98160434e-02, 7.90139451e-02], + [np.nan, 8.76353733e-04, 1.31071435e-02, np.nan, 4.98160434e-02, 9.29739191e-02], + [np.nan, 3.86346901e-03, 1.31071435e-02, np.nan, 6.45537226e-02, 9.29739191e-02], + [1.86735626e-02, 3.86346901e-03, 1.31071435e-02, 1.06521413e-01, 6.45537226e-02, 9.29739191e-02], + [2.46648637e-02, 3.86346901e-03, 1.31071435e-02, 1.19714550e-01, 6.45537226e-02, 9.29739191e-02], + [3.09922063e-02, 3.86346901e-03, 1.31071435e-02, 1.32625202e-01, 6.45537226e-02, 9.29739191e-02], + [4.44345647e-02, 3.86346901e-03, 1.86753126e-02, 1.57768941e-01, 6.45537226e-02, 1.06515798e-01], + [5.86926671e-02, 3.86346901e-03, 1.86753126e-02, 1.82197926e-01, 6.45537226e-02, 1.06515798e-01], + [6.60668761e-02, 8.08749124e-03, 1.86753126e-02, 1.94196068e-01, 7.90421599e-02, 1.06515798e-01], + [7.35824051e-02, 8.08749124e-03, 1.86753126e-02, 2.06068244e-01, 7.90421599e-02, 1.06515798e-01], + [8.89863978e-02, 8.08749124e-03, 1.86753126e-02, 2.29479009e-01, 7.90421599e-02, 1.06515798e-01], + [9.68543051e-02, 8.08749124e-03, 1.86753126e-02, 2.41035520e-01, 7.90421599e-02, 1.06515798e-01], + [1.12882999e-01, 8.08749124e-03, 1.86753126e-02, 2.63883900e-01, 7.90421599e-02, 1.06515798e-01], + [1.29260771e-01, 8.08749124e-03, 1.86753126e-02, 2.86415701e-01, 7.90421599e-02, 1.06515798e-01], + [1.37567828e-01, 8.08749124e-03, 1.86753126e-02, 2.97574049e-01, 7.90421599e-02, 1.06515798e-01], + [1.45948291e-01, 8.08749124e-03, 1.86753126e-02, 3.08665304e-01, 7.90421599e-02, 1.06515798e-01], + [1.62916013e-01, 8.08749124e-03, 1.86753126e-02, 3.30658087e-01, 7.90421599e-02, 1.06515798e-01], + [1.71496808e-01, 8.08749124e-03, 1.86753126e-02, 3.41565431e-01, 7.90421599e-02, 1.06515798e-01], + [1.88840363e-01, 8.08749124e-03, 1.86753126e-02, 3.63212079e-01, 7.90421599e-02, 1.06515798e-01], + [1.97598250e-01, 8.08749124e-03, 1.86753126e-02, 3.73955784e-01, 7.90421599e-02, 1.06515798e-01], + [2.06411125e-01, 8.08749124e-03, 1.86753126e-02, 3.84648459e-01, 7.90421599e-02, 1.06515798e-01], + [2.33164884e-01, 8.08749124e-03, 1.86753126e-02, 4.16432824e-01, 7.90421599e-02, 1.06515798e-01], + [2.42181998e-01, 8.08749124e-03, 1.86753126e-02, 4.26935045e-01, 7.90421599e-02, 1.06515798e-01], + [2.51246943e-01, 8.08749124e-03, 1.86753126e-02, 4.37392426e-01, 7.90421599e-02, 1.06515798e-01], + [2.60358645e-01, 8.08749124e-03, 1.86753126e-02, 4.47805885e-01, 7.90421599e-02, 1.06515798e-01], + [2.78719164e-01, 8.08749124e-03, 1.86753126e-02, 4.68503631e-01, 7.90421599e-02, 1.06515798e-01], + [2.87965610e-01, 8.08749124e-03, 1.86753126e-02, 4.78790029e-01, 7.90421599e-02, 1.06515798e-01], + [2.97255414e-01, 8.08749124e-03, 1.86753126e-02, 4.89035373e-01, 7.90421599e-02, 1.06515798e-01], + [3.06587917e-01, 8.08749124e-03, 1.86753126e-02, 4.99240206e-01, 7.90421599e-02, 1.06515798e-01], + [3.15962523e-01, 8.08749124e-03, 1.86753126e-02, 5.09405012e-01, 7.90421599e-02, 1.06515798e-01], + [3.25378702e-01, 8.08749124e-03, 1.86753126e-02, 5.19530217e-01, 7.90421599e-02, 1.06515798e-01], + [3.44334814e-01, 8.08749124e-03, 1.86753126e-02, 5.39662456e-01, 7.90421599e-02, 1.06515798e-01], + [3.73072188e-01, 8.08749124e-03, 1.86753126e-02, 5.69570192e-01, 7.90421599e-02, 1.06515798e-01], + [3.82729571e-01, 8.08749124e-03, 1.86753126e-02, 5.79463982e-01, 7.90421599e-02, 1.06515798e-01], + [3.92426305e-01, 8.08749124e-03, 1.86753126e-02, 5.89319725e-01, 7.90421599e-02, 1.06515798e-01], + [3.92426305e-01, 1.30725842e-02, 1.86753126e-02, 5.89319725e-01, 9.31084543e-02, 1.06515798e-01], + [4.02161016e-01, 1.30725842e-02, 1.86753126e-02, 5.99138501e-01, 9.31084543e-02, 1.06515798e-01], + [4.21749168e-01, 1.30725842e-02, 1.86753126e-02, 6.18660773e-01, 9.31084543e-02, 1.06515798e-01], + [4.31601113e-01, 1.30725842e-02, 1.86753126e-02, 6.28365328e-01, 9.31084543e-02, 1.06515798e-01], + [4.31601113e-01, 1.86073989e-02, 1.86753126e-02, 6.28365328e-01, 1.06734142e-01, 1.06515798e-01], + [4.41490527e-01, 1.86073989e-02, 1.86753126e-02, 6.38032889e-01, 1.06734142e-01, 1.06515798e-01], + [4.51419138e-01, 1.86073989e-02, 1.86753126e-02, 6.47661932e-01, 1.06734142e-01, 1.06515798e-01], + [4.61387078e-01, 1.86073989e-02, 1.86753126e-02, 6.57252226e-01, 1.06734142e-01, 1.06515798e-01], + [4.71394524e-01, 1.86073989e-02, 1.86753126e-02, 6.66803488e-01, 1.06734142e-01, 1.06515798e-01], + [4.81441701e-01, 1.86073989e-02, 1.86753126e-02, 6.76315386e-01, 1.06734142e-01, 1.06515798e-01], + [4.91528876e-01, 1.86073989e-02, 1.86753126e-02, 6.85787537e-01, 1.06734142e-01, 1.06515798e-01], + [5.22042223e-01, 1.86073989e-02, 1.86753126e-02, 7.13954859e-01, 1.06734142e-01, 1.06515798e-01], + [5.52931085e-01, 1.86073989e-02, 1.86753126e-02, 7.41744842e-01, 1.06734142e-01, 1.06515798e-01], + [5.63308855e-01, 1.86073989e-02, 1.86753126e-02, 7.50923965e-01, 1.06734142e-01, 1.06515798e-01], + [5.84202761e-01, 1.86073989e-02, 1.86753126e-02, 7.69142314e-01, 1.06734142e-01, 1.06515798e-01], + [5.94714799e-01, 1.86073989e-02, 1.86753126e-02, 7.78183131e-01, 1.06734142e-01, 1.06515798e-01], + [6.05273508e-01, 1.86073989e-02, 1.86753126e-02, 7.87175387e-01, 1.06734142e-01, 1.06515798e-01], + [6.15879903e-01, 1.86073989e-02, 1.86753126e-02, 7.96117716e-01, 1.06734142e-01, 1.06515798e-01], + [6.26535089e-01, 1.86073989e-02, 1.86753126e-02, 8.05008614e-01, 1.06734142e-01, 1.06515798e-01], + [6.37240259e-01, 1.86073989e-02, 1.86753126e-02, 8.13846424e-01, 1.06734142e-01, 1.06515798e-01], + [6.47996704e-01, 1.86073989e-02, 1.86753126e-02, 8.22629319e-01, 1.06734142e-01, 1.06515798e-01], + [6.58805813e-01, 1.86073989e-02, 1.86753126e-02, 8.31355277e-01, 1.06734142e-01, 1.06515798e-01], + [6.69669074e-01, 1.86073989e-02, 1.86753126e-02, 8.40022063e-01, 1.06734142e-01, 1.06515798e-01], + [6.80588073e-01, 1.86073989e-02, 1.86753126e-02, 8.48627195e-01, 1.06734142e-01, 1.06515798e-01], + [6.91564481e-01, 1.86073989e-02, 1.86753126e-02, 8.57167918e-01, 1.06734142e-01, 1.06515798e-01], + [7.02600038e-01, 1.86073989e-02, 1.86753126e-02, 8.65641160e-01, 1.06734142e-01, 1.06515798e-01], + [7.13696504e-01, 1.86073989e-02, 1.86753126e-02, 8.74043497e-01, 1.06734142e-01, 1.06515798e-01], + [7.24855578e-01, 1.86073989e-02, 1.86753126e-02, 8.82371106e-01, 1.06734142e-01, 1.06515798e-01], + [7.36078733e-01, 1.86073989e-02, 1.86753126e-02, 8.90619734e-01, 1.06734142e-01, 1.06515798e-01], + [7.47366885e-01, 1.86073989e-02, 1.86753126e-02, 8.98784683e-01, 1.06734142e-01, 1.06515798e-01], + [7.58719674e-01, 1.86073989e-02, 1.86753126e-02, 9.06860885e-01, 1.06734142e-01, 1.06515798e-01], + [7.70133729e-01, 1.86073989e-02, 1.86753126e-02, 9.14843249e-01, 1.06734142e-01, 1.06515798e-01], + [7.70133729e-01, 1.86073989e-02, 1.86753126e-02, 9.14843249e-01, 1.06734142e-01, 1.06515798e-01], + [7.70133729e-01, 1.86073989e-02, 1.86753126e-02, 9.14843249e-01, 1.06734142e-01, 1.06515798e-01], + [7.81200203e-01, 1.86073989e-02, 1.86753126e-02, 9.34620546e-01, 1.06734142e-01, 1.06515798e-01], + [7.81200203e-01, 1.86073989e-02, 1.86753126e-02, 9.34620546e-01, 1.06734142e-01, 1.06515798e-01], + ] + ).T + + true_ci_Aalen = np.empty(shape=(2, 3, 75), dtype=_ci_Aalen.dtype) + true_ci_Aalen[0], true_ci_Aalen[1] = _ci_Aalen[0:3], _ci_Aalen[3:] + + true_ci_Dinse = np.array( + [ + [ + [ + 0.00000000e00, + 6.95370457e-02, + 7.83645959e-02, + 9.10033947e-02, + 1.04072435e-01, + 1.17082504e-01, + 1.29929377e-01, + 1.91645154e-01, + 2.03566151e-01, + 2.15372208e-01, + 2.50181089e-01, + 2.72943077e-01, + 2.95396099e-01, + 3.06516413e-01, + 3.28560479e-01, + 3.39489609e-01, + 3.61173565e-01, + 3.82637980e-01, + 3.93292024e-01, + 4.03895875e-01, + 4.24957989e-01, + 4.35418464e-01, + 4.56203039e-01, + 4.66528789e-01, + 4.76811151e-01, + 5.07403932e-01, + 5.17518446e-01, + 5.27592116e-01, + 5.37625278e-01, + 5.57571153e-01, + 5.67484281e-01, + 5.77357731e-01, + 5.87191592e-01, + 5.96985903e-01, + 6.06740660e-01, + 6.26131260e-01, + 6.54917692e-01, + 6.64432391e-01, + 6.73906170e-01, + 6.83338631e-01, + 6.92729319e-01, + 7.11383257e-01, + 7.20645286e-01, + 7.29863092e-01, + 7.39035885e-01, + 7.48162792e-01, + 7.57242852e-01, + 7.66275008e-01, + 7.75258098e-01, + 7.84190844e-01, + 8.10672265e-01, + 8.36640715e-01, + 8.45172633e-01, + 8.62033521e-01, + 8.70355794e-01, + 8.78600789e-01, + 8.86764002e-01, + 8.94840306e-01, + 9.02823839e-01, + 9.10707840e-01, + 9.18484458e-01, + 9.26144486e-01, + 9.33677016e-01, + 9.41068967e-01, + 9.48304429e-01, + 9.55363721e-01, + 9.62222015e-01, + 9.68847216e-01, + 9.75196572e-01, + 9.81210932e-01, + 9.86804273e-01, + 9.86804273e-01, + 9.86804273e-01, + 9.97547733e-01, + 9.97547733e-01, + ], + [ + 0.00000000e00, + 0.00000000e00, + 0.00000000e00, + 0.00000000e00, + 0.00000000e00, + 0.00000000e00, + 0.00000000e00, + 1.87821217e-02, + 2.47938240e-02, + 3.11403565e-02, + 4.46180263e-02, + 5.89057082e-02, + 6.62937254e-02, + 7.38218326e-02, + 8.92495123e-02, + 9.71286072e-02, + 1.13178498e-01, + 1.29576058e-01, + 1.37892532e-01, + 1.46282113e-01, + 1.63267218e-01, + 1.71856311e-01, + 1.89215709e-01, + 1.97981168e-01, + 2.06801390e-01, + 2.33575873e-01, + 2.42599500e-01, + 2.51670763e-01, + 2.60788594e-01, + 2.79160804e-01, + 2.88412837e-01, + 2.97708055e-01, + 3.07045802e-01, + 3.16425486e-01, + 3.25846579e-01, + 3.44812020e-01, + 3.73562193e-01, + 3.83223577e-01, + 3.92924166e-01, + 3.92924166e-01, + 4.02662628e-01, + 4.22257828e-01, + 4.32113121e-01, + 4.32113121e-01, + 4.42005809e-01, + 4.51937570e-01, + 4.61908540e-01, + 4.71918900e-01, + 4.81968878e-01, + 4.92058747e-01, + 5.22579250e-01, + 5.53474503e-01, + 5.63854512e-01, + 5.84752549e-01, + 5.95266834e-01, + 6.05827834e-01, + 6.16436604e-01, + 6.27094295e-01, + 6.37802162e-01, + 6.48561574e-01, + 6.59374024e-01, + 6.70241138e-01, + 6.81164688e-01, + 6.92146607e-01, + 7.03189005e-01, + 7.14294193e-01, + 7.25464710e-01, + 7.36703387e-01, + 7.48013460e-01, + 7.59398874e-01, + 7.70865154e-01, + 7.70865154e-01, + 7.70865154e-01, + 7.88426707e-01, + 7.88426707e-01, + ], + [ + 0.00000000e00, + 0.00000000e00, + 0.00000000e00, + 8.89768861e-04, + 8.89768861e-04, + 8.89768861e-04, + 3.90205526e-03, + 3.90205526e-03, + 3.90205526e-03, + 3.90205526e-03, + 3.90205526e-03, + 3.90205526e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 8.15097968e-03, + 1.31592295e-02, + 1.31592295e-02, + 1.31592295e-02, + 1.31592295e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + 1.87158794e-02, + ], + [ + 0.00000000e00, + 8.90045062e-04, + 3.90328157e-03, + 3.90328157e-03, + 8.15640607e-03, + 1.31938914e-02, + 1.31938914e-02, + 1.31938914e-02, + 1.31938914e-02, + 1.31938914e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + 1.87838537e-02, + ], + ], + [ + [ + 0.00000000e00, + 1.42906439e-03, + 5.09108441e-03, + 9.87499475e-03, + 1.53589729e-02, + 2.13379730e-02, + 2.76935814e-02, + 6.31255365e-02, + 7.07260642e-02, + 7.84551064e-02, + 1.02305770e-01, + 1.18678738e-01, + 1.35374220e-01, + 1.43831682e-01, + 1.60948022e-01, + 1.69600857e-01, + 1.87083052e-01, + 2.04786409e-01, + 2.13716449e-01, + 2.22696630e-01, + 2.40801966e-01, + 2.49924703e-01, + 2.68305071e-01, + 2.77560890e-01, + 2.86859395e-01, + 3.15004317e-01, + 3.24467187e-01, + 3.33969872e-01, + 3.43511976e-01, + 3.62713124e-01, + 3.72371634e-01, + 3.82068491e-01, + 3.91803549e-01, + 4.01576703e-01, + 4.11387895e-01, + 4.31124387e-01, + 4.61015540e-01, + 4.71056261e-01, + 4.81135884e-01, + 4.91254721e-01, + 5.01413140e-01, + 5.21850459e-01, + 5.32130377e-01, + 5.42451918e-01, + 5.52815752e-01, + 5.63222628e-01, + 5.73673369e-01, + 5.84168889e-01, + 5.94710192e-01, + 6.05298388e-01, + 6.37357181e-01, + 6.69890244e-01, + 6.80848947e-01, + 7.02952411e-01, + 7.14103031e-01, + 7.25324091e-01, + 7.36619526e-01, + 7.47993807e-01, + 7.59452038e-01, + 7.71000091e-01, + 7.82644771e-01, + 7.94394028e-01, + 8.06257243e-01, + 8.18245604e-01, + 8.30372631e-01, + 8.42654895e-01, + 8.55113054e-01, + 8.67773361e-01, + 8.80669917e-01, + 8.93848051e-01, + 9.07369171e-01, + 9.07369171e-01, + 9.07369171e-01, + 9.20581987e-01, + 9.20581987e-01, + ], + [ + 0.00000000e00, + 0.00000000e00, + 0.00000000e00, + 0.00000000e00, + 0.00000000e00, + 0.00000000e00, + 0.00000000e00, + 1.06174101e-01, + 1.19356660e-01, + 1.32257411e-01, + 1.57383382e-01, + 1.81801442e-01, + 1.93794468e-01, + 2.05662904e-01, + 2.29066656e-01, + 2.40619922e-01, + 2.63462402e-01, + 2.85989093e-01, + 2.97145175e-01, + 3.08234358e-01, + 3.30223582e-01, + 3.41129420e-01, + 3.62773614e-01, + 3.73516353e-01, + 3.84208235e-01, + 4.15991275e-01, + 4.26493370e-01, + 4.36950786e-01, + 4.47364439e-01, + 4.68063058e-01, + 4.78350116e-01, + 4.88596271e-01, + 4.98802067e-01, + 5.08967984e-01, + 5.19094449e-01, + 5.39229669e-01, + 5.69142997e-01, + 5.79038911e-01, + 5.88896921e-01, + 5.88896921e-01, + 5.98718077e-01, + 6.18245569e-01, + 6.27952927e-01, + 6.27952927e-01, + 6.37623389e-01, + 6.47255475e-01, + 6.56848951e-01, + 6.66403536e-01, + 6.75918897e-01, + 6.85394650e-01, + 7.13573847e-01, + 7.41376949e-01, + 7.50560569e-01, + 7.68788476e-01, + 7.77834150e-01, + 7.86831381e-01, + 7.95778794e-01, + 8.04674874e-01, + 8.13517948e-01, + 8.22306165e-01, + 8.31037478e-01, + 8.39709609e-01, + 8.48320023e-01, + 8.56865884e-01, + 8.65344005e-01, + 8.73750790e-01, + 8.82082148e-01, + 8.90333387e-01, + 8.98499057e-01, + 9.06572688e-01, + 9.14546288e-01, + 9.14546288e-01, + 9.14546288e-01, + 9.32175909e-01, + 9.32175909e-01, + ], + [ + 0.00000000e00, + 0.00000000e00, + 0.00000000e00, + 4.94939423e-02, + 4.94939423e-02, + 4.94939423e-02, + 6.42375366e-02, + 6.42375366e-02, + 6.42375366e-02, + 6.42375366e-02, + 6.42375366e-02, + 6.42375366e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 7.87167076e-02, + 9.27718816e-02, + 9.27718816e-02, + 9.27718816e-02, + 9.27718816e-02, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + 1.06385776e-01, + ], + [ + 0.00000000e00, + 4.94873690e-02, + 6.42275460e-02, + 6.42275460e-02, + 7.86890219e-02, + 9.26379119e-02, + 9.26379119e-02, + 9.26379119e-02, + 9.26379119e-02, + 9.26379119e-02, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + 1.06168577e-01, + ], + ], + ] + ) + + @staticmethod + def data_full(): + + _, df = load_cgvhd() + event, ftime = df["status"], df["ftime"] + + true_x = np.array( + [ + 0.057, + 0.063, + 0.11, + 0.172, + 0.181, + 0.219, + 0.274, + 0.277, + 0.279, + 0.282, + 0.287, + 0.29, + 0.301, + 0.307, + 0.32, + 0.323, + 0.329, + 0.331, + 0.339, + 0.342, + 0.345, + 0.348, + 0.35, + 0.359, + 0.361, + 0.367, + 0.372, + 0.375, + 0.378, + 0.381, + 0.383, + 0.392, + 0.394, + 0.397, + 0.4, + 0.402, + 0.408, + 0.411, + 0.413, + 0.416, + 0.419, + 0.422, + 0.433, + 0.438, + 0.444, + 0.446, + 0.452, + 0.457, + 0.463, + 0.465, + 0.474, + 0.479, + 0.482, + 0.496, + 0.517, + 0.52, + 0.553, + 0.564, + 0.567, + 0.616, + 0.619, + 0.63, + 0.701, + 0.742, + 0.764, + 0.857, + 0.914, + 0.923, + 0.958, + 0.964, + 0.991, + 1.202, + 1.205, + 1.993, + 2.776, + ] + ) + true_y = np.array( + [ + [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0.0505050505050505, + 0.0606060606060606, + 0.0707070707070707, + 0.0909090909090909, + 0.111111111111111, + 0.121212121212121, + 0.131313131313131, + 0.151515151515152, + 0.161616161616162, + 0.181818181818182, + 0.202020202020202, + 0.212121212121212, + 0.222222222222222, + 0.242424242424243, + 0.252525252525253, + 0.272727272727273, + 0.282828282828283, + 0.292929292929293, + 0.323232323232323, + 0.333333333333334, + 0.343434343434344, + 0.353535353535354, + 0.373737373737374, + 0.383838383838384, + 0.393939393939394, + 0.404040404040404, + 0.414141414141414, + 0.424242424242425, + 0.444444444444445, + 0.474747474747475, + 0.484848484848485, + 0.494949494949495, + 0.494949494949495, + 0.505050505050505, + 0.525252525252526, + 0.535353535353536, + 0.535353535353536, + 0.545454545454546, + 0.555555555555556, + 0.565656565656566, + 0.575757575757576, + 0.585858585858586, + 0.595959595959596, + 0.626262626262626, + 0.656565656565657, + 0.666666666666667, + 0.686868686868687, + 0.696969696969697, + 0.707070707070707, + 0.717171717171718, + 0.727272727272728, + 0.737373737373738, + 0.747474747474748, + 0.757575757575758, + 0.767676767676768, + 0.777777777777779, + 0.787878787878789, + 0.797979797979799, + 0.808080808080809, + 0.818181818181819, + 0.828282828282829, + 0.838383838383839, + 0.848484848484849, + 0.858585858585859, + 0.858585858585859, + 0.858585858585859, + 0.878787878787879, + 0.878787878787879, + ], + [ + 0, + 0, + 0, + 0.0101010101010101, + 0.0101010101010101, + 0.0101010101010101, + 0.0202020202020202, + 0.0202020202020202, + 0.0202020202020202, + 0.0202020202020202, + 0.0202020202020202, + 0.0202020202020202, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0303030303030303, + 0.0404040404040404, + 0.0404040404040404, + 0.0404040404040404, + 0.0404040404040404, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + ], + [ + 0, + 0.0101010101010101, + 0.0202020202020202, + 0.0202020202020202, + 0.0303030303030303, + 0.0404040404040404, + 0.0404040404040404, + 0.0404040404040404, + 0.0404040404040404, + 0.0404040404040404, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + 0.0505050505050505, + ], + ] + ) + + return event, ftime, true_x, true_y + + class TestCumIncCompetingRisks: @staticmethod @pytest.mark.parametrize("event, time, true_x, true_y", SimpleDataBMTCases().get_cases()) @@ -6540,6 +7576,87 @@ def test_truncated(event, time, true_x, true_y): assert_array_equal(x, true_x) _x_km, true_km = kaplan_meier_estimator(event > 0, time, time_min=time_min) - assert_array_almost_equal(y[0], 1.0 - true_km) + assert_array_almost_equal(y[0], 1 - true_km) + assert_array_almost_equal(y[1:], true_y) + assert_array_almost_equal(y[0], y[1:].sum(axis=0)) + + @staticmethod + @pytest.mark.parametrize("event, time, true_x, true_y", CGVHD_DataSets().get_cases()) + def test_ci(event, time, true_x, true_y): + "The true CI values were generated using the cmprsk R package." + x, y, ci = cumulative_incidence_competing_risks(event, time, conf_type="log-log", var_type="Aalen") + + assert_array_equal(x, true_x) + _x_km, true_km, true_ci_0 = kaplan_meier_estimator(event > 0, time, conf_type="log-log") + assert_array_almost_equal(y[0], 1 - true_km) + assert_array_almost_equal(y[1:], true_y) + assert_array_almost_equal(y[0], y[1:].sum(axis=0)) + + assert_allclose(ci[:, 0, :], 1 - true_ci_0, rtol=5.0e-4) + + true_ci = CGVHD_DataSets().true_ci_Aalen + ci_1 = ci[:, 1:, :] + nan_mask = np.isnan(true_ci) + assert_allclose(np.ma.masked_array(ci_1, nan_mask), np.ma.masked_array(true_ci, nan_mask), rtol=5.0e-4) + assert_array_equal(np.ma.masked_array(ci_1, ~nan_mask), np.ma.masked_array(np.zeros_like(ci_1), ~nan_mask)) + + @staticmethod + @pytest.mark.parametrize("event, time, true_x, true_y", CGVHD_DataSets().get_cases()) + def test_dinse_approx_ci(event, time, true_x, true_y): + """The true CI values are taken from + M. Pintilie: "Competing Risks: A Practical Perspective". John Wiley & Sons, 2006, + """ + x, y, ci_full = cumulative_incidence_competing_risks(event, time, conf_type="log-log", var_type="Dinse_Approx") + + t = 0.5 + it = np.argmax(t < x) - 1 + ci = ci_full[:, 1, it] + true_ci = np.array([0.5855, 0.7683]) + + assert x[it] <= t < x[it + 1] + assert_allclose(ci, true_ci, rtol=5.0e-4) + + @staticmethod + @pytest.mark.parametrize("event, time, true_x, true_y", CGVHD_DataSets().get_cases()) + def test_dinse_ci_regression(event, time, true_x, true_y): + """An external implementation of the Dinse variance haven't been found, + so the true CI values are taken from this internal code. + This is just thus a regression test. + """ + x, y, ci = cumulative_incidence_competing_risks(event, time, conf_type="log-log", var_type="Dinse") + + assert_array_equal(x, true_x) + _x_km, true_km, true_ci_0 = kaplan_meier_estimator(event > 0, time, conf_type="log-log") + + assert_array_almost_equal(y[0], 1 - true_km) assert_array_almost_equal(y[1:], true_y) assert_array_almost_equal(y[0], y[1:].sum(axis=0)) + + true_ci = CGVHD_DataSets.true_ci_Dinse + assert_array_almost_equal(ci, true_ci) + + @staticmethod + @pytest.mark.parametrize("event, time, true_x, true_y", SimpleDataBMTCases().get_cases()) + @pytest.mark.parametrize("conf_level", [None, -1, 1.0, 3.0, np.inf, np.nan]) + def test_invalid_conf_level_competing_risks(event, time, true_x, true_y, conf_level): + msg = f"conf_level must be a float in the range (0.0, 1.0), but was {conf_level}" + with pytest.raises(ValueError, match=re.escape(msg)): + cumulative_incidence_competing_risks( + event, time, conf_level=conf_level, conf_type="log-log", var_type="Aalen" + ) + + @staticmethod + @pytest.mark.parametrize("event, time, true_x, true_y", SimpleDataBMTCases().get_cases()) + @pytest.mark.parametrize("conf_type", ["None", -1, "", "not"]) + def test_invalid_conf_type_competing_risks(event, time, true_x, true_y, conf_type): + msg = f"conf_type must be None or a str among {{'log-log'}}, but was {conf_type!r}" + with pytest.raises(ValueError, match=msg): + cumulative_incidence_competing_risks(event, time, conf_level=0.9, conf_type=conf_type, var_type="Aalen") + + @staticmethod + @pytest.mark.parametrize("event, time, true_x, true_y", SimpleDataBMTCases().get_cases()) + @pytest.mark.parametrize("var_type", ["None", "dinse", 1, "", "not"]) + def test_invalid_var_type_competing_risks(event, time, true_x, true_y, var_type): + msg = f"{var_type=} must be one of 'Dinse', 'Dinse_approx' or 'Aalen'." + with pytest.raises(ValueError, match=msg): + cumulative_incidence_competing_risks(event, time, conf_level=0.95, conf_type="log-log", var_type=var_type)