From 5995e095dabb4e7501cfd84e8e76064c4a435c0a Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Sun, 13 Jul 2025 11:17:08 +0300 Subject: [PATCH 01/51] chore: close #102 --- src/rtichoke/helpers/sandbox_observable_helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rtichoke/helpers/sandbox_observable_helpers.py b/src/rtichoke/helpers/sandbox_observable_helpers.py index 1f808e6..6018f2b 100644 --- a/src/rtichoke/helpers/sandbox_observable_helpers.py +++ b/src/rtichoke/helpers/sandbox_observable_helpers.py @@ -1082,7 +1082,7 @@ def to_pl(df): ) -def create_list_data_to_adjust_polars( +def create_list_data_to_adjust( probs_dict, reals_dict, times_dict, stratified_by, by ): # reference_groups = list(probs_dict.keys()) From 384541756fd6bfd299eb004f0e93a69aa7c1d0d6 Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Sun, 13 Jul 2025 14:31:54 +0300 Subject: [PATCH 02/51] chore: close #106 --- .../helpers/sandbox_observable_helpers.py | 32 ++----------------- 1 file changed, 2 insertions(+), 30 deletions(-) diff --git a/src/rtichoke/helpers/sandbox_observable_helpers.py b/src/rtichoke/helpers/sandbox_observable_helpers.py index 6018f2b..7775630 100644 --- a/src/rtichoke/helpers/sandbox_observable_helpers.py +++ b/src/rtichoke/helpers/sandbox_observable_helpers.py @@ -119,7 +119,7 @@ def extract_crude_estimate(data_to_adjust: pd.DataFrame) -> pd.DataFrame: return final.to_pandas() -def add_cutoff_strata_polars(data: pl.DataFrame, by: float) -> pl.DataFrame: +def add_cutoff_strata(data: pl.DataFrame, by: float) -> pl.DataFrame: def transform_group(group: pl.DataFrame) -> pl.DataFrame: # Convert to NumPy for numeric ops probs = group["probs"].to_numpy() @@ -170,34 +170,6 @@ def transform_group(group: pl.DataFrame) -> pl.DataFrame: transformed_groups = [transform_group(group) for group in grouped.values()] return pl.concat(transformed_groups) - -def add_cutoff_strata(data, by): - result = data.copy() - - grouped = result.groupby("reference_group") - - def transform_group(group): - group["strata_probability_threshold"] = pd.cut( - group["probs"], - bins=create_breaks_values(group["probs"], "probability_threshold", by), - include_lowest=True, - ) - - group["strata_ppcr"] = ( - pd.qcut(-group["probs"], q=int(1 / by), labels=False, duplicates="drop") + 1 - ) - - group["strata_ppcr"] = (group["strata_ppcr"] / (1 / by)).astype(str) - - return group - - result = grouped.apply(transform_group) - - result = result.reset_index(drop=True) - - return result - - def create_strata_combinations_polars(stratified_by: str, by: float) -> pl.DataFrame: if stratified_by == "probability_threshold": breaks = create_breaks_values(None, "probability_threshold", by) @@ -1106,7 +1078,7 @@ def create_list_data_to_adjust( ).with_columns(pl.col("reference_group").cast(reference_group_enum)) # Apply strata - data_to_adjust = add_cutoff_strata_polars(data_to_adjust, by=by) + data_to_adjust = add_cutoff_strata(data_to_adjust, by=by) data_to_adjust = pivot_longer_strata(data_to_adjust) reals_labels = [ From 24fd72fc8d2faecf62af5c898de8bd4caeacd180 Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Sun, 13 Jul 2025 15:23:16 +0300 Subject: [PATCH 03/51] chore: fix #107 --- .../helpers/sandbox_observable_helpers.py | 67 +++++++++---------- 1 file changed, 31 insertions(+), 36 deletions(-) diff --git a/src/rtichoke/helpers/sandbox_observable_helpers.py b/src/rtichoke/helpers/sandbox_observable_helpers.py index 7775630..d386dd7 100644 --- a/src/rtichoke/helpers/sandbox_observable_helpers.py +++ b/src/rtichoke/helpers/sandbox_observable_helpers.py @@ -119,51 +119,46 @@ def extract_crude_estimate(data_to_adjust: pd.DataFrame) -> pd.DataFrame: return final.to_pandas() -def add_cutoff_strata(data: pl.DataFrame, by: float) -> pl.DataFrame: +def add_cutoff_strata(data: pl.DataFrame, by: float, stratified_by) -> pl.DataFrame: def transform_group(group: pl.DataFrame) -> pl.DataFrame: - # Convert to NumPy for numeric ops + probs = group["probs"].to_numpy() + columns_to_add = [] - # --- Compute strata_probability_threshold --- - breaks = create_breaks_values(probs, "probability_threshold", by) - # strata_prob = np.digitize(probs, breaks, right=False) - 1 - # Clamp indices to avoid out-of-bounds error when accessing breaks[i+1] - # strata_prob = np.clip(strata_prob, 0, len(breaks) - 2) - # strata_prob_labels = [ - # f"({breaks[i]:.3f}, {breaks[i+1]:.3f}]" for i in strata_prob - # ] + if "probability_threshold" in stratified_by: + breaks = create_breaks_values(probs, "probability_threshold", by) + last_bin_index = len(breaks) - 2 - last_bin_index = len(breaks) - 2 + bin_indices = np.digitize(probs, bins=breaks, right=False) - 1 + bin_indices = np.where(probs == 1.0, last_bin_index, bin_indices) - bin_indices = np.digitize(probs, bins=breaks, right=False) - 1 - bin_indices = np.where(probs == 1.0, last_bin_index, bin_indices) + lower_bounds = breaks[bin_indices] + upper_bounds = breaks[bin_indices + 1] - lower_bounds = breaks[bin_indices] - upper_bounds = breaks[bin_indices + 1] + include_upper_bounds = bin_indices == last_bin_index - include_upper_bounds = bin_indices == last_bin_index + strata_prob_labels = np.where( + include_upper_bounds, + [f"[{lo:.2f}, {hi:.2f}]" for lo, hi in zip(lower_bounds, upper_bounds)], + [f"[{lo:.2f}, {hi:.2f})" for lo, hi in zip(lower_bounds, upper_bounds)], + ).astype(str) - strata_prob_labels = np.where( - include_upper_bounds, - [f"[{lo:.2f}, {hi:.2f}]" for lo, hi in zip(lower_bounds, upper_bounds)], - [f"[{lo:.2f}, {hi:.2f})" for lo, hi in zip(lower_bounds, upper_bounds)], - ) + columns_to_add.append( + pl.Series("strata_probability_threshold", strata_prob_labels) + ) + + if "ppcr" in stratified_by: # --- Compute strata_ppcr as quantiles on -probs --- - try: - q = int(1 / by) - quantile_edges = np.quantile(-probs, np.linspace(0, 1, q)) - strata_ppcr = np.digitize(-probs, quantile_edges, right=False) - strata_ppcr = (strata_ppcr / (1 / by)).astype(str) - except ValueError: - strata_ppcr = np.array(["1"] * len(probs)) # fallback for small group - - return group.with_columns( - [ - pl.Series("strata_probability_threshold", strata_prob_labels), - pl.Series("strata_ppcr", strata_ppcr), - ] - ) + try: + q = int(1 / by) + quantile_edges = np.quantile(-probs, np.linspace(0, 1, q)) + strata_ppcr = np.digitize(-probs, quantile_edges, right=False) + strata_ppcr = (strata_ppcr / (1 / by)).astype(str) + except ValueError: + strata_ppcr = np.array(["1"] * len(probs)) # fallback for small group + + return group.with_columns(columns_to_add) # Apply per-group transformation grouped = data.partition_by("reference_group", as_dict=True) @@ -1078,7 +1073,7 @@ def create_list_data_to_adjust( ).with_columns(pl.col("reference_group").cast(reference_group_enum)) # Apply strata - data_to_adjust = add_cutoff_strata(data_to_adjust, by=by) + data_to_adjust = add_cutoff_strata(data_to_adjust, by=by, stratified_by=stratified_by) data_to_adjust = pivot_longer_strata(data_to_adjust) reals_labels = [ From 12a55878a8b4c1dc5b6a387a8bbe96389dbb623e Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Sun, 13 Jul 2025 15:29:06 +0300 Subject: [PATCH 04/51] build: add dcurves and plotly to dev dependencies --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 46065df..89a554b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,8 @@ dev = [ "monkeytype>=23.3.0", "marimo>=0.14.7", "pre-commit>=4.2.0", + "dcurves>=1.1.5", + "plotly>=5.24.1", ] [tool.uv.workspace] From 65e13f12864a215093fc4bf3cd0e5a01d8f6c1e5 Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Sun, 13 Jul 2025 15:37:22 +0300 Subject: [PATCH 05/51] build: close #104 --- pyproject.toml | 4 +-- uv.lock | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 89a554b..e613e7e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,5 +49,5 @@ dev = [ members = ["rtichoke"] [build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" +requires = ["uv_build>=0.7.20,<0.8.0"] +build-backend = "uv_build" diff --git a/uv.lock b/uv.lock index a8f5377..3e83a47 100644 --- a/uv.lock +++ b/uv.lock @@ -848,6 +848,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e7/05/c19819d5e3d95294a6f5947fb9b9629efb316b96de511b418c53d245aae6/cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30", size = 8321, upload-time = "2023-10-07T05:32:16.783Z" }, ] +[[package]] +name = "dcurves" +version = "1.1.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "lifelines" }, + { name = "matplotlib", version = "3.9.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, + { name = "matplotlib", version = "3.10.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "pandas" }, + { name = "scipy", version = "1.13.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, + { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, + { name = "scipy", version = "1.16.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "setuptools" }, + { name = "statsmodels" }, + { name = "typing" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/49/1d/1f1512680c305d0effabec5733676e1f4d2a8f2db246969de69d31c1007e/dcurves-1.1.5.tar.gz", hash = "sha256:08de2dd9a5c3e8917e1a6d483785b9e01c6c7ae2b5afdc08c328a7cd16cc4c12", size = 4903731, upload-time = "2025-07-07T16:46:38.73Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4e/2a/9d71fd5b8132067e442e9c5e7184d2db58987e076401254fa06261f85c85/dcurves-1.1.5-py3-none-any.whl", hash = "sha256:168689c2056471f43503803ef8177bf0e196f498d283e75f061f86c567a9c89b", size = 94024, upload-time = "2025-07-07T16:46:36.837Z" }, +] + [[package]] name = "debugpy" version = "1.8.14" @@ -3095,6 +3116,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191, upload-time = "2023-12-10T22:30:43.14Z" }, ] +[[package]] +name = "patsy" +version = "1.0.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, + { name = "numpy", version = "2.3.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d1/81/74f6a65b848ffd16c18f920620ce999fe45fe27f01ab3911260ce4ed85e4/patsy-1.0.1.tar.gz", hash = "sha256:e786a9391eec818c054e359b737bbce692f051aee4c661f4141cc88fb459c0c4", size = 396010, upload-time = "2024-11-12T14:10:54.642Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/87/2b/b50d3d08ea0fc419c183a84210571eba005328efa62b6b98bc28e9ead32a/patsy-1.0.1-py2.py3-none-any.whl", hash = "sha256:751fb38f9e97e62312e921a1954b81e1bb2bcda4f5eeabaf94db251ee791509c", size = 232923, upload-time = "2024-11-12T14:10:52.85Z" }, +] + [[package]] name = "pexpect" version = "4.9.0" @@ -4186,6 +4221,7 @@ dependencies = [ [package.dev-dependencies] dev = [ + { name = "dcurves" }, { name = "ipykernel" }, { name = "jupyter" }, { name = "lifelines" }, @@ -4193,6 +4229,7 @@ dev = [ { name = "monkeytype" }, { name = "mypy" }, { name = "myst-nb", marker = "python_full_version < '4'" }, + { name = "plotly" }, { name = "pre-commit" }, { name = "pytest" }, { name = "pytest-cov" }, @@ -4222,6 +4259,7 @@ requires-dist = [ [package.metadata.requires-dev] dev = [ + { name = "dcurves", specifier = ">=1.1.5" }, { name = "ipykernel", specifier = ">=6.29.5" }, { name = "jupyter", specifier = ">=1.0.0,<2.0.0" }, { name = "lifelines", specifier = ">=0.30.0" }, @@ -4229,6 +4267,7 @@ dev = [ { name = "monkeytype", specifier = ">=23.3.0" }, { name = "mypy", specifier = ">=1.2.0,<2.0.0" }, { name = "myst-nb", marker = "python_full_version >= '3.9' and python_full_version < '4'", specifier = ">=0.17.1,<1.0.0" }, + { name = "plotly", specifier = ">=5.24.1" }, { name = "pre-commit", specifier = ">=4.2.0" }, { name = "pytest", specifier = ">=7.3.0,<8.0.0" }, { name = "pytest-cov", specifier = ">=4.0.0,<5.0.0" }, @@ -4685,6 +4724,54 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/82/95/38ef0cd7fa11eaba6a99b3c4f5ac948d8bc6ff199aabd327a29cc000840c/starlette-0.47.1-py3-none-any.whl", hash = "sha256:5e11c9f5c7c3f24959edbf2dffdc01bba860228acf657129467d8a7468591527", size = 72747, upload-time = "2025-06-21T04:03:15.705Z" }, ] +[[package]] +name = "statsmodels" +version = "0.14.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, + { name = "numpy", version = "2.3.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "packaging" }, + { name = "pandas" }, + { name = "patsy" }, + { name = "scipy", version = "1.13.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, + { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, + { name = "scipy", version = "1.16.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/64/cc/8c1bf59bf8203dea1bf2ea811cfe667d7bcc6909c83d8afb02b08e30f50b/statsmodels-0.14.5.tar.gz", hash = "sha256:de260e58cccfd2ceddf835b55a357233d6ca853a1aa4f90f7553a52cc71c6ddf", size = 20525016, upload-time = "2025-07-07T12:14:23.195Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3a/2c/55b2a5d10c1a211ecab3f792021d2581bbe1c5ca0a1059f6715dddc6899d/statsmodels-0.14.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9fc2b5cdc0c95cba894849651fec1fa1511d365e3eb72b0cc75caac44077cd48", size = 10058241, upload-time = "2025-07-07T12:13:16.286Z" }, + { url = "https://files.pythonhosted.org/packages/66/d9/6967475805de06691e951072d05e40e3f1c71b6221bb92401193ee19bd2a/statsmodels-0.14.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b8d96b0bbaeabd3a557c35cc7249baa9cfbc6dd305c32a9f2cbdd7f46c037e7f", size = 9734017, upload-time = "2025-07-07T12:05:08.498Z" }, + { url = "https://files.pythonhosted.org/packages/df/a8/803c280419a7312e2472969fe72cf461c1210a27770a662cbe3b5cd7c6fe/statsmodels-0.14.5-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:145bc39b2cb201efb6c83cc3f2163c269e63b0d4809801853dec6f440bd3bc37", size = 10459677, upload-time = "2025-07-07T14:21:51.809Z" }, + { url = "https://files.pythonhosted.org/packages/a1/25/edf20acbd670934b02cd9344e29c9a03ce040122324b3491bb075ae76b2d/statsmodels-0.14.5-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d7c14fb2617bb819fb2532e1424e1da2b98a3419a80e95f33365a72d437d474e", size = 10678631, upload-time = "2025-07-07T14:22:05.496Z" }, + { url = "https://files.pythonhosted.org/packages/64/22/8b1e38310272e766abd6093607000a81827420a3348f09eff08a9e54cbaf/statsmodels-0.14.5-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:1e9742d8a5ac38a3bfc4b7f4b0681903920f20cbbf466d72b1fd642033846108", size = 10699273, upload-time = "2025-07-07T14:22:19.487Z" }, + { url = "https://files.pythonhosted.org/packages/d1/6f/6de51f1077b7cef34611f1d6721392ea170153251b4d977efcf6d100f779/statsmodels-0.14.5-cp310-cp310-win_amd64.whl", hash = "sha256:1cab9e6fce97caf4239cdb2df375806937da5d0b7ba2699b13af33a07f438464", size = 9644785, upload-time = "2025-07-07T12:05:20.927Z" }, + { url = "https://files.pythonhosted.org/packages/14/30/fd49902b30416b828de763e161c0d6e2cc04d119ae4fbdd3f3b43dc8f1be/statsmodels-0.14.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4b7091a8442076c708c926de3603653a160955e80a2b6d931475b7bb8ddc02e5", size = 10053330, upload-time = "2025-07-07T12:07:39.689Z" }, + { url = "https://files.pythonhosted.org/packages/ca/c1/2654541ff6f5790d01d1e5ba36405fde873f4a854f473e90b4fe56b37333/statsmodels-0.14.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:128872be8f3208f4446d91ea9e4261823902fc7997fee7e1a983eb62fd3b7c6e", size = 9735555, upload-time = "2025-07-07T12:13:28.935Z" }, + { url = "https://files.pythonhosted.org/packages/ce/da/6ebb64d0db4e86c0d2d9cde89e03247702da0ab191789f7813d4f9a348da/statsmodels-0.14.5-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f2ad5aee04ae7196c429df2174df232c057e478c5fa63193d01c8ec9aae04d31", size = 10307522, upload-time = "2025-07-07T14:22:32.853Z" }, + { url = "https://files.pythonhosted.org/packages/67/49/ac803ca093ec3845184a752a91cd84511245e1f97103b15cfe32794a3bb0/statsmodels-0.14.5-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f402fc793458dd6d96e099acb44cd1de1428565bf7ef3030878a8daff091f08a", size = 10474665, upload-time = "2025-07-07T14:22:46.011Z" }, + { url = "https://files.pythonhosted.org/packages/f0/c8/ae82feb00582f4814fac5d2cb3ec32f93866b413cf5878b2fe93688ec63c/statsmodels-0.14.5-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:26c028832730aebfbfd4e7501694e1f9ad31ec8536e776716673f4e7afd4059a", size = 10713120, upload-time = "2025-07-07T14:23:00.067Z" }, + { url = "https://files.pythonhosted.org/packages/05/ac/4276459ea71aa46e2967ea283fc88ee5631c11f29a06787e16cf4aece1b8/statsmodels-0.14.5-cp311-cp311-win_amd64.whl", hash = "sha256:ec56f771d9529cdc17ed2fb2a950d100b6e83a7c5372aae8ac5bb065c474b856", size = 9640980, upload-time = "2025-07-07T12:05:33.085Z" }, + { url = "https://files.pythonhosted.org/packages/5f/a5/fcc4f5f16355660ce7a1742e28a43e3a9391b492fc4ff29fdd6893e81c05/statsmodels-0.14.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:37e7364a39f9aa3b51d15a208c2868b90aadb8412f868530f5cba9197cb00eaa", size = 10042891, upload-time = "2025-07-07T12:13:41.671Z" }, + { url = "https://files.pythonhosted.org/packages/1c/6f/db0cf5efa48277ac6218d9b981c8fd5e63c4c43e0d9d65015fdc38eed0ef/statsmodels-0.14.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4263d7f4d0f1d5ac6eb4db22e1ee34264a14d634b9332c975c9d9109b6b46e12", size = 9698912, upload-time = "2025-07-07T12:07:54.674Z" }, + { url = "https://files.pythonhosted.org/packages/4a/93/4ddc3bc4a59c51e6a57c49df1b889882c40d9e141e855b3517f6a8de3232/statsmodels-0.14.5-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:86224f6e36f38486e471e75759d241fe2912d8bc25ab157d54ee074c6aedbf45", size = 10237801, upload-time = "2025-07-07T14:23:12.593Z" }, + { url = "https://files.pythonhosted.org/packages/66/de/dc6bf2f6e8c8eb4c5815560ebdbdf2d69a767bc0f65fde34bc086cf5b36d/statsmodels-0.14.5-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c3dd760a6fa80cd5e0371685c697bb9c2c0e6e1f394d975e596a1e6d0bbb9372", size = 10424154, upload-time = "2025-07-07T14:23:25.365Z" }, + { url = "https://files.pythonhosted.org/packages/16/4f/2d5a8d14bebdf2b03b3ea89b8c6a2c837bb406ba5b7a41add8bd303bce29/statsmodels-0.14.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6264fb00e02f858b86bd01ef2dc05055a71d4a0cc7551b9976b07b0f0e6cf24f", size = 10652915, upload-time = "2025-07-07T14:23:39.337Z" }, + { url = "https://files.pythonhosted.org/packages/df/4c/2feda3a9f0e17444a84ba5398ada6a4d2e1b8f832760048f04e2b8ea0c41/statsmodels-0.14.5-cp312-cp312-win_amd64.whl", hash = "sha256:b2ed065bfbaf8bb214c7201656df840457c2c8c65e1689e3eb09dc7440f9c61c", size = 9611236, upload-time = "2025-07-07T12:08:06.794Z" }, + { url = "https://files.pythonhosted.org/packages/84/fd/4c374108cf108b3130240a5b45847a61f70ddf973429044a81a05189b046/statsmodels-0.14.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:906263134dd1a640e55ecb01fda4a9be7b9e08558dba9e4c4943a486fdb0c9c8", size = 10013958, upload-time = "2025-07-07T14:35:01.04Z" }, + { url = "https://files.pythonhosted.org/packages/5a/36/bf3d7f0e36acd3ba9ec0babd79ace25506b6872780cbd710fb7cd31f0fa2/statsmodels-0.14.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9118f76344f77cffbb3a9cbcff8682b325be5eed54a4b3253e09da77a74263d3", size = 9674243, upload-time = "2025-07-07T12:08:22.571Z" }, + { url = "https://files.pythonhosted.org/packages/90/ce/a55a6f37b5277683ceccd965a5828b24672bbc427db6b3969ae0b0fc29fb/statsmodels-0.14.5-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9dc4ee159070557c9a6c000625d85f653de437772fe7086857cff68f501afe45", size = 10219521, upload-time = "2025-07-07T14:23:52.646Z" }, + { url = "https://files.pythonhosted.org/packages/1e/48/973da1ee8bc0743519759e74c3615b39acdc3faf00e0a0710f8c856d8c9d/statsmodels-0.14.5-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5a085d47c8ef5387279a991633883d0e700de2b0acc812d7032d165888627bef", size = 10453538, upload-time = "2025-07-07T14:24:06.959Z" }, + { url = "https://files.pythonhosted.org/packages/c7/d6/18903fb707afd31cf1edaec5201964dbdacb2bfae9a22558274647a7c88f/statsmodels-0.14.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9f866b2ebb2904b47c342d00def83c526ef2eb1df6a9a3c94ba5fe63d0005aec", size = 10681584, upload-time = "2025-07-07T14:24:21.038Z" }, + { url = "https://files.pythonhosted.org/packages/44/d6/80df1bbbfcdc50bff4152f43274420fa9856d56e234d160d6206eb1f5827/statsmodels-0.14.5-cp313-cp313-win_amd64.whl", hash = "sha256:2a06bca03b7a492f88c8106103ab75f1a5ced25de90103a89f3a287518017939", size = 9604641, upload-time = "2025-07-07T12:08:36.23Z" }, + { url = "https://files.pythonhosted.org/packages/39/2d/3ab5a8e736b94a91434a70dcbdc4363775711ef17c733e6bde5f24cb2f62/statsmodels-0.14.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b23b8f646dd78ef5e8d775d879208f8dc0a73418b41c16acac37361ff9ab7738", size = 10077385, upload-time = "2025-07-07T12:13:55.07Z" }, + { url = "https://files.pythonhosted.org/packages/44/ec/091dc1e69bbc84139e3409e45ac26e285ef41eb67116d13e094cdde7804d/statsmodels-0.14.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4e5e26b21d2920905764fb0860957d08b5ba2fae4466ef41b1f7c53ecf9fc7fa", size = 9752723, upload-time = "2025-07-07T12:08:52.238Z" }, + { url = "https://files.pythonhosted.org/packages/72/0a/0ab3a900fc3245ebdaaca59018567b1e23bcab13c9eea2d7b3d8ffcbb82e/statsmodels-0.14.5-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4a060c7e0841c549c8ce2825fd6687e6757e305d9c11c9a73f6c5a0ce849bb69", size = 10470566, upload-time = "2025-07-07T14:33:03.356Z" }, + { url = "https://files.pythonhosted.org/packages/2b/58/08e21dda0d52e4119b0e1eab8e865ce3e9c6bf59f0f879a9448deb827e83/statsmodels-0.14.5-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:56da20def5350d676388213a330fd40ed15d0e8dd0bb1b92c0e4b0f2a65d3ad2", size = 10678264, upload-time = "2025-07-07T14:33:17.141Z" }, + { url = "https://files.pythonhosted.org/packages/fe/7d/3608f14237daccc0f3116b006ee3a42ca0e4dbe296496950624934138171/statsmodels-0.14.5-cp39-cp39-win_amd64.whl", hash = "sha256:afb37ca1d70d99b5fd876e8574ea46372298ae0f0a8b17e4cf0a9afd2373ae62", size = 9658081, upload-time = "2025-07-07T12:09:04.856Z" }, +] + [[package]] name = "tabulate" version = "0.9.0" @@ -4851,6 +4938,12 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c5/3f/b0e8db149896005adc938a1e7f371d6d7e9eca4053a29b108978ed15e0c2/types_python_dateutil-2.9.0.20250516-py3-none-any.whl", hash = "sha256:2b2b3f57f9c6a61fba26a9c0ffb9ea5681c9b83e69cd897c6b5f668d9c0cab93", size = 14356, upload-time = "2025-05-16T03:06:57.249Z" }, ] +[[package]] +name = "typing" +version = "3.7.4.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/05/d9/6eebe19d46bd05360c9a9aae822e67a80f9242aabbfc58b641b957546607/typing-3.7.4.3.tar.gz", hash = "sha256:1187fb9c82fd670d10aa07bbb6cfcfe4bdda42d6fab8d5134f04e8c4d0b71cc9", size = 78592, upload-time = "2020-07-13T23:19:43.758Z" } + [[package]] name = "typing-extensions" version = "4.14.0" From e15089061704b43829e682b83a3f997d0c9051da Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Wed, 16 Jul 2025 19:32:48 +0300 Subject: [PATCH 06/51] chore: close #117 --- src/rtichoke/calibration/calibration.py | 46 +++++++++---------- src/rtichoke/discrimination/gains.py | 46 ++++++++++++++++++- src/rtichoke/discrimination/lift.py | 46 ++++++++++++++++++- .../discrimination/precision_recall.py | 46 ++++++++++++++++++- src/rtichoke/discrimination/roc.py | 46 ++++++++++++++++++- src/rtichoke/utility/decision.py | 46 ++++++++++++++++++- 6 files changed, 242 insertions(+), 34 deletions(-) diff --git a/src/rtichoke/calibration/calibration.py b/src/rtichoke/calibration/calibration.py index 0c1f81d..d2a8820 100644 --- a/src/rtichoke/calibration/calibration.py +++ b/src/rtichoke/calibration/calibration.py @@ -15,7 +15,28 @@ def create_calibration_curve( reals: Dict[str, List[int]], calibration_type: str = "discrete", size: Optional[int] = None, - color_values: List[str] = None, + color_values: Optional[List[str]] = [ + "#1b9e77", + "#d95f02", + "#7570b3", + "#e7298a", + "#07004D", + "#E6AB02", + "#FE5F55", + "#54494B", + "#006E90", + "#BC96E6", + "#52050A", + "#1F271B", + "#BE7C4D", + "#63768D", + "#08A045", + "#320A28", + "#82FF9E", + "#2176FF", + "#D1603D", + "#585123", + ], url_api: str = "http://localhost:4242/", ) -> Figure: """Creates Calibration Curve @@ -31,29 +52,6 @@ def create_calibration_curve( Returns: Figure: _description_ """ - if color_values is None: - color_values = [ - "#1b9e77", - "#d95f02", - "#7570b3", - "#e7298a", - "#07004D", - "#E6AB02", - "#FE5F55", - "#54494B", - "#006E90", - "#BC96E6", - "#52050A", - "#1F271B", - "#BE7C4D", - "#63768D", - "#08A045", - "#320A28", - "#82FF9E", - "#2176FF", - "#D1603D", - "#585123", - ] rtichoke_response = send_requests_to_rtichoke_r( dictionary_to_send={ diff --git a/src/rtichoke/discrimination/gains.py b/src/rtichoke/discrimination/gains.py index 06fc222..e2a552b 100644 --- a/src/rtichoke/discrimination/gains.py +++ b/src/rtichoke/discrimination/gains.py @@ -15,7 +15,28 @@ def create_gains_curve( by: float = 0.01, stratified_by: str = "probability_threshold", size: Optional[int] = None, - color_values: List[str] = None, + color_values: List[str] = [ + "#1b9e77", + "#d95f02", + "#7570b3", + "#e7298a", + "#07004D", + "#E6AB02", + "#FE5F55", + "#54494B", + "#006E90", + "#BC96E6", + "#52050A", + "#1F271B", + "#BE7C4D", + "#63768D", + "#08A045", + "#320A28", + "#82FF9E", + "#2176FF", + "#D1603D", + "#585123", + ], url_api: str = "http://localhost:4242/", ) -> Figure: """Create Gains Curve @@ -48,7 +69,28 @@ def create_gains_curve( def plot_gains_curve( performance_data: DataFrame, size: Optional[int] = None, - color_values: List[str] = None, + color_values: List[str] = [ + "#1b9e77", + "#d95f02", + "#7570b3", + "#e7298a", + "#07004D", + "#E6AB02", + "#FE5F55", + "#54494B", + "#006E90", + "#BC96E6", + "#52050A", + "#1F271B", + "#BE7C4D", + "#63768D", + "#08A045", + "#320A28", + "#82FF9E", + "#2176FF", + "#D1603D", + "#585123", + ], url_api: str = "http://localhost:4242/", ) -> Figure: """Plot Gains Curve diff --git a/src/rtichoke/discrimination/lift.py b/src/rtichoke/discrimination/lift.py index 02d3c0e..a796c29 100644 --- a/src/rtichoke/discrimination/lift.py +++ b/src/rtichoke/discrimination/lift.py @@ -15,7 +15,28 @@ def create_lift_curve( by: float = 0.01, stratified_by: str = "probability_threshold", size: Optional[int] = None, - color_values: List[str] = None, + color_values: List[str] = [ + "#1b9e77", + "#d95f02", + "#7570b3", + "#e7298a", + "#07004D", + "#E6AB02", + "#FE5F55", + "#54494B", + "#006E90", + "#BC96E6", + "#52050A", + "#1F271B", + "#BE7C4D", + "#63768D", + "#08A045", + "#320A28", + "#82FF9E", + "#2176FF", + "#D1603D", + "#585123", + ], url_api: str = "http://localhost:4242/", ) -> Figure: """Create Lift Curve @@ -48,7 +69,28 @@ def create_lift_curve( def plot_lift_curve( performance_data: DataFrame, size: Optional[int] = None, - color_values: List[str] = None, + color_values: List[str] = [ + "#1b9e77", + "#d95f02", + "#7570b3", + "#e7298a", + "#07004D", + "#E6AB02", + "#FE5F55", + "#54494B", + "#006E90", + "#BC96E6", + "#52050A", + "#1F271B", + "#BE7C4D", + "#63768D", + "#08A045", + "#320A28", + "#82FF9E", + "#2176FF", + "#D1603D", + "#585123", + ], url_api: str = "http://localhost:4242/", ) -> Figure: """Plot Lift Curve diff --git a/src/rtichoke/discrimination/precision_recall.py b/src/rtichoke/discrimination/precision_recall.py index 3c0723e..274e36e 100644 --- a/src/rtichoke/discrimination/precision_recall.py +++ b/src/rtichoke/discrimination/precision_recall.py @@ -15,7 +15,28 @@ def create_precision_recall_curve( by: float = 0.01, stratified_by: str = "probability_threshold", size: Optional[int] = None, - color_values: List[str] = None, + color_values: List[str] = [ + "#1b9e77", + "#d95f02", + "#7570b3", + "#e7298a", + "#07004D", + "#E6AB02", + "#FE5F55", + "#54494B", + "#006E90", + "#BC96E6", + "#52050A", + "#1F271B", + "#BE7C4D", + "#63768D", + "#08A045", + "#320A28", + "#82FF9E", + "#2176FF", + "#D1603D", + "#585123", + ], url_api: str = "http://localhost:4242/", ) -> Figure: """Create Precision Recall Curve @@ -48,7 +69,28 @@ def create_precision_recall_curve( def plot_precision_recall_curve( performance_data: DataFrame, size: Optional[int] = None, - color_values: List[str] = None, + color_values: List[str] = [ + "#1b9e77", + "#d95f02", + "#7570b3", + "#e7298a", + "#07004D", + "#E6AB02", + "#FE5F55", + "#54494B", + "#006E90", + "#BC96E6", + "#52050A", + "#1F271B", + "#BE7C4D", + "#63768D", + "#08A045", + "#320A28", + "#82FF9E", + "#2176FF", + "#D1603D", + "#585123", + ], url_api: str = "http://localhost:4242/", ) -> Figure: """Plot Precision Recall Curve diff --git a/src/rtichoke/discrimination/roc.py b/src/rtichoke/discrimination/roc.py index 084232d..4c1c3bf 100644 --- a/src/rtichoke/discrimination/roc.py +++ b/src/rtichoke/discrimination/roc.py @@ -15,7 +15,28 @@ def create_roc_curve( by: float = 0.01, stratified_by: str = "probability_threshold", size: Optional[int] = None, - color_values: List[str] = None, + color_values: List[str] = [ + "#1b9e77", + "#d95f02", + "#7570b3", + "#e7298a", + "#07004D", + "#E6AB02", + "#FE5F55", + "#54494B", + "#006E90", + "#BC96E6", + "#52050A", + "#1F271B", + "#BE7C4D", + "#63768D", + "#08A045", + "#320A28", + "#82FF9E", + "#2176FF", + "#D1603D", + "#585123", + ], url_api: str = "http://localhost:4242/", ) -> Figure: """Create ROC Curve @@ -48,7 +69,28 @@ def create_roc_curve( def plot_roc_curve( performance_data: DataFrame, size: Optional[int] = None, - color_values: List[str] = None, + color_values: List[str] = [ + "#1b9e77", + "#d95f02", + "#7570b3", + "#e7298a", + "#07004D", + "#E6AB02", + "#FE5F55", + "#54494B", + "#006E90", + "#BC96E6", + "#52050A", + "#1F271B", + "#BE7C4D", + "#63768D", + "#08A045", + "#320A28", + "#82FF9E", + "#2176FF", + "#D1603D", + "#585123", + ], url_api: str = "http://localhost:4242/", ) -> Figure: """Plot ROC Curve diff --git a/src/rtichoke/utility/decision.py b/src/rtichoke/utility/decision.py index 4de7d0e..62436e8 100644 --- a/src/rtichoke/utility/decision.py +++ b/src/rtichoke/utility/decision.py @@ -18,7 +18,28 @@ def create_decision_curve( by: float = 0.01, stratified_by: str = "probability_threshold", size: Optional[int] = None, - color_values: List[str] = None, + color_values: List[str] = [ + "#1b9e77", + "#d95f02", + "#7570b3", + "#e7298a", + "#07004D", + "#E6AB02", + "#FE5F55", + "#54494B", + "#006E90", + "#BC96E6", + "#52050A", + "#1F271B", + "#BE7C4D", + "#63768D", + "#08A045", + "#320A28", + "#82FF9E", + "#2176FF", + "#D1603D", + "#585123", + ], url_api: str = "http://localhost:4242/", ) -> Figure: """Create Decision Curve @@ -64,7 +85,28 @@ def plot_decision_curve( min_p_threshold: int = 0, max_p_threshold: int = 1, size: Optional[int] = None, - color_values: List[str] = None, + color_values: List[str] = [ + "#1b9e77", + "#d95f02", + "#7570b3", + "#e7298a", + "#07004D", + "#E6AB02", + "#FE5F55", + "#54494B", + "#006E90", + "#BC96E6", + "#52050A", + "#1F271B", + "#BE7C4D", + "#63768D", + "#08A045", + "#320A28", + "#82FF9E", + "#2176FF", + "#D1603D", + "#585123", + ], url_api: str = "http://localhost:4242/", ) -> Figure: """Plot Decision Curve From 7b841789509e5bb89ba05925506cf5f09c1f4df0 Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Wed, 16 Jul 2025 20:02:05 +0300 Subject: [PATCH 07/51] chore: fix #118 --- .../helpers/sandbox_observable_helpers.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/rtichoke/helpers/sandbox_observable_helpers.py b/src/rtichoke/helpers/sandbox_observable_helpers.py index d386dd7..cd6f9e4 100644 --- a/src/rtichoke/helpers/sandbox_observable_helpers.py +++ b/src/rtichoke/helpers/sandbox_observable_helpers.py @@ -121,7 +121,6 @@ def extract_crude_estimate(data_to_adjust: pd.DataFrame) -> pd.DataFrame: def add_cutoff_strata(data: pl.DataFrame, by: float, stratified_by) -> pl.DataFrame: def transform_group(group: pl.DataFrame) -> pl.DataFrame: - probs = group["probs"].to_numpy() columns_to_add = [] @@ -148,8 +147,7 @@ def transform_group(group: pl.DataFrame) -> pl.DataFrame: ) if "ppcr" in stratified_by: - - # --- Compute strata_ppcr as quantiles on -probs --- + # --- Compute strata_ppcr as quantiles on -probs --- try: q = int(1 / by) quantile_edges = np.quantile(-probs, np.linspace(0, 1, q)) @@ -165,6 +163,7 @@ def transform_group(group: pl.DataFrame) -> pl.DataFrame: transformed_groups = [transform_group(group) for group in grouped.values()] return pl.concat(transformed_groups) + def create_strata_combinations_polars(stratified_by: str, by: float) -> pl.DataFrame: if stratified_by == "probability_threshold": breaks = create_breaks_values(None, "probability_threshold", by) @@ -1049,9 +1048,7 @@ def to_pl(df): ) -def create_list_data_to_adjust( - probs_dict, reals_dict, times_dict, stratified_by, by -): +def create_list_data_to_adjust(probs_dict, reals_dict, times_dict, stratified_by, by): # reference_groups = list(probs_dict.keys()) reference_group_labels = list(probs_dict.keys()) num_reals = len(reals_dict) @@ -1073,7 +1070,9 @@ def create_list_data_to_adjust( ).with_columns(pl.col("reference_group").cast(reference_group_enum)) # Apply strata - data_to_adjust = add_cutoff_strata(data_to_adjust, by=by, stratified_by=stratified_by) + data_to_adjust = add_cutoff_strata( + data_to_adjust, by=by, stratified_by=stratified_by + ) data_to_adjust = pivot_longer_strata(data_to_adjust) reals_labels = [ @@ -1156,7 +1155,7 @@ def ensure_arrow_safe(df: pd.DataFrame) -> pd.DataFrame: def extract_aj_estimate_by_assumptions( df: pl.DataFrame, assumption_sets: list[dict], - fixed_time_horizons: pl.Series, + fixed_time_horizons: list[float], ) -> pl.DataFrame: aj_dfs = [] From d8f9d3eb74f518848aa5e84e2d0a442f339bcf08 Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Wed, 16 Jul 2025 20:08:43 +0300 Subject: [PATCH 08/51] chore: close #116 --- monkeytype.sqlite3 | Bin 8192 -> 0 bytes pyproject.toml | 3 +- uv.lock | 161 +-------------------------------------------- 3 files changed, 3 insertions(+), 161 deletions(-) delete mode 100644 monkeytype.sqlite3 diff --git a/monkeytype.sqlite3 b/monkeytype.sqlite3 deleted file mode 100644 index da60bb23ce07f85bba42b4d465bbe1049b66aefe..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8192 zcmeI#v1`IW6bA4+DHH^;Ti3U#ATCbXswH%BX${3KoXN9@m!v%}Lq_|T_a$;pf@_iQ zamVpK-f;Z3w|-ykJ=3moh4D03PfBa`LZp<^a(i+^wf#eAfS()2DuXjw1d0=FQe}QI!X7eDlp|W~@!UGZ`PRr@hyY zc#*`EES9U7uFuf8Pn0<}p7T`pa7yA$GL@{T^4c;19*zO|t+1 diff --git a/pyproject.toml b/pyproject.toml index e613e7e..51ca413 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,6 @@ dev = [ "myst-nb<1.0.0,>=0.17.1; python_version ~= \"3.9\"", "sphinx-autoapi<3.0.0,>=2.1.0", "sphinx-rtd-theme<2.0.0,>=1.2.0", - "mypy>=1.2.0,<2.0.0", "pytest-cov<5.0.0,>=4.0.0", "pytest<8.0.0,>=7.3.0", "pyzmq<27.0.0,>=26.3.0", @@ -38,11 +37,11 @@ dev = [ "ipykernel>=6.29.5", "lifelines>=0.30.0", "uv>=0.6.11", - "monkeytype>=23.3.0", "marimo>=0.14.7", "pre-commit>=4.2.0", "dcurves>=1.1.5", "plotly>=5.24.1", + "ty>=0.0.1a12", ] [tool.uv.workspace] diff --git a/uv.lock b/uv.lock index 3e83a47..cf8ca01 100644 --- a/uv.lock +++ b/uv.lock @@ -1976,79 +1976,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3a/1d/50ad811d1c5dae091e4cf046beba925bcae0a610e79ae4c538f996f63ed5/kiwisolver-1.4.8-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:65ea09a5a3faadd59c2ce96dc7bf0f364986a315949dc6374f04396b0d60e09b", size = 71762, upload-time = "2024-12-24T18:30:48.903Z" }, ] -[[package]] -name = "libcst" -version = "1.8.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pyyaml", marker = "python_full_version < '3.13'" }, - { name = "pyyaml-ft", marker = "python_full_version >= '3.13'" }, - { name = "typing-extensions", marker = "python_full_version < '3.10'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/89/aa/b52d195b167958fe1bd106a260f64cc80ec384f6ac2a9cda874d8803df06/libcst-1.8.2.tar.gz", hash = "sha256:66e82cedba95a6176194a817be4232c720312f8be6d2c8f3847f3317d95a0c7f", size = 881534, upload-time = "2025-06-13T20:56:37.915Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3c/2e/1d7f67d2ef6f875e9e8798c024f7cb3af3fe861e417bff485c69b655ac96/libcst-1.8.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:67d9720d91f507c87b3e5f070627ad640a00bc6cfdf5635f8c6ee9f2964cf71c", size = 2195106, upload-time = "2025-06-13T20:54:49.166Z" }, - { url = "https://files.pythonhosted.org/packages/82/d0/3d94fee2685f263fd8d85a83e2537fcc78b644eae450738bf2c72604f0df/libcst-1.8.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:94b7c032b72566077614a02baab1929739fd0af0cc1d46deaba4408b870faef2", size = 2080577, upload-time = "2025-06-13T20:54:51.518Z" }, - { url = "https://files.pythonhosted.org/packages/14/87/c9b49bebb9a930fdcb59bf841f1c45719d2a4a39c3eb7efacfd30a2bfb0a/libcst-1.8.2-cp310-cp310-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:11ea148902e3e1688afa392087c728ac3a843e54a87d334d1464d2097d3debb7", size = 2404076, upload-time = "2025-06-13T20:54:53.303Z" }, - { url = "https://files.pythonhosted.org/packages/49/fa/9ca145aa9033f9a8362a5663ceb28dfb67082574de8118424b6b8e445e7a/libcst-1.8.2-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:22c9473a2cc53faabcc95a0ac6ca4e52d127017bf34ba9bc0f8e472e44f7b38e", size = 2219813, upload-time = "2025-06-13T20:54:55.351Z" }, - { url = "https://files.pythonhosted.org/packages/0c/25/496a025c09e96116437a57fd34abefe84c041d930f832c6e42d84d9e028c/libcst-1.8.2-cp310-cp310-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:b5269b96367e65793a7714608f6d906418eb056d59eaac9bba980486aabddbed", size = 2189782, upload-time = "2025-06-13T20:54:57.013Z" }, - { url = "https://files.pythonhosted.org/packages/b3/75/826b5772192826d70480efe93bab3e4f0b4a24d31031f45547257ad5f9a8/libcst-1.8.2-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:d20e932ddd9a389da57b060c26e84a24118c96ff6fc5dcc7b784da24e823b694", size = 2312403, upload-time = "2025-06-13T20:54:58.996Z" }, - { url = "https://files.pythonhosted.org/packages/93/f4/316fa14ea6c61ea8755672d60e012558f0216300b3819e72bebc7864a507/libcst-1.8.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a553d452004e44b841788f6faa7231a02157527ddecc89dbbe5b689b74822226", size = 2280566, upload-time = "2025-06-13T20:55:00.707Z" }, - { url = "https://files.pythonhosted.org/packages/fc/52/74b69350db379b1646739288b88ffab2981b2ad48407faf03df3768d7d2f/libcst-1.8.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7fe762c4c390039b79b818cbc725d8663586b25351dc18a2704b0e357d69b924", size = 2388508, upload-time = "2025-06-13T20:55:02.769Z" }, - { url = "https://files.pythonhosted.org/packages/bc/c6/fa92699b537ed65e93c2869144e23bdf156ec81ae7b84b4f34cbc20d6048/libcst-1.8.2-cp310-cp310-win_amd64.whl", hash = "sha256:5c513e64eff0f7bf2a908e2d987a98653eb33e1062ce2afd3a84af58159a24f9", size = 2093260, upload-time = "2025-06-13T20:55:04.771Z" }, - { url = "https://files.pythonhosted.org/packages/b0/ac/4ec4ae9da311f72cd97e930c325bb605e9ad0baaafcafadb0588e1dc5c4e/libcst-1.8.2-cp310-cp310-win_arm64.whl", hash = "sha256:41613fe08e647213546c7c59a5a1fc5484666e7d4cab6e80260c612acbb20e8c", size = 1985236, upload-time = "2025-06-13T20:55:06.317Z" }, - { url = "https://files.pythonhosted.org/packages/c5/73/f0a4d807bff6931e3d8c3180472cf43d63a121aa60be895425fba2ed4f3a/libcst-1.8.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:688a03bac4dfb9afc5078ec01d53c21556381282bdf1a804dd0dbafb5056de2a", size = 2195040, upload-time = "2025-06-13T20:55:08.117Z" }, - { url = "https://files.pythonhosted.org/packages/e5/fa/ede0cfc410e498e1279eb489603f31077d2ca112d84e1327b04b508c0cbe/libcst-1.8.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c34060ff2991707c710250463ae9f415ebb21653f2f5b013c61c9c376ff9b715", size = 2080304, upload-time = "2025-06-13T20:55:09.729Z" }, - { url = "https://files.pythonhosted.org/packages/39/8d/59f7c488dbedf96454c07038dea72ee2a38de13d52b4f796a875a1dc45a6/libcst-1.8.2-cp311-cp311-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:f54f5c4176d60e7cd6b0880e18fb3fa8501ae046069151721cab457c7c538a3d", size = 2403816, upload-time = "2025-06-13T20:55:11.527Z" }, - { url = "https://files.pythonhosted.org/packages/b5/c2/af8d6cc0c6dcd1a5d0ed5cf846be242354513139a9358e005c63252c6ab7/libcst-1.8.2-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:d11992561de0ad29ec2800230fbdcbef9efaa02805d5c633a73ab3cf2ba51bf1", size = 2219415, upload-time = "2025-06-13T20:55:13.144Z" }, - { url = "https://files.pythonhosted.org/packages/b6/b8/1638698d6c33bdb4397ee6f60e534e7504ef2cd1447b24104df65623dedb/libcst-1.8.2-cp311-cp311-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:fa3b807c2d2b34397c135d19ad6abb20c47a2ddb7bf65d90455f2040f7797e1e", size = 2189568, upload-time = "2025-06-13T20:55:15.119Z" }, - { url = "https://files.pythonhosted.org/packages/05/16/51c1015dada47b8464c5fa0cbf70fecc5fce0facd07d05a5cb6e7eb68b88/libcst-1.8.2-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:b0110140738be1287e3724080a101e7cec6ae708008b7650c9d8a1c1788ec03a", size = 2312018, upload-time = "2025-06-13T20:55:16.831Z" }, - { url = "https://files.pythonhosted.org/packages/d5/ea/8d24158f345ea2921d0d7ff49a6bf86fd4a08b0f05735f14a84ea9e28fa9/libcst-1.8.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a50618f4819a97ef897e055ac7aaf1cad5df84c206f33be35b0759d671574197", size = 2279875, upload-time = "2025-06-13T20:55:18.418Z" }, - { url = "https://files.pythonhosted.org/packages/73/fd/0441cc1bcf188300aaa41ca5d473919a00939cc7f4934b3b08b23c8740c1/libcst-1.8.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e9bb599c175dc34a4511f0e26d5b5374fbcc91ea338871701a519e95d52f3c28", size = 2388060, upload-time = "2025-06-13T20:55:20.304Z" }, - { url = "https://files.pythonhosted.org/packages/f8/fc/28f6380eefd58543f80589b77cab81eb038e7cc86f7c34a815a287dba82f/libcst-1.8.2-cp311-cp311-win_amd64.whl", hash = "sha256:96e2363e1f6e44bd7256bbbf3a53140743f821b5133046e6185491e0d9183447", size = 2093117, upload-time = "2025-06-13T20:55:21.977Z" }, - { url = "https://files.pythonhosted.org/packages/ef/db/cdbd1531bca276c44bc485e40c3156e770e01020f8c1a737282bf884d69f/libcst-1.8.2-cp311-cp311-win_arm64.whl", hash = "sha256:f5391d71bd7e9e6c73dcb3ee8d8c63b09efc14ce6e4dad31568d4838afc9aae0", size = 1985285, upload-time = "2025-06-13T20:55:24.438Z" }, - { url = "https://files.pythonhosted.org/packages/31/2d/8726bf8ea8252e8fd1e48980753eef5449622c5f6cf731102bc43dcdc2c6/libcst-1.8.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2e8c1dfa854e700fcf6cd79b2796aa37d55697a74646daf5ea47c7c764bac31c", size = 2185942, upload-time = "2025-06-13T20:55:26.105Z" }, - { url = "https://files.pythonhosted.org/packages/99/b3/565d24db8daed66eae7653c1fc1bc97793d49d5d3bcef530450ee8da882c/libcst-1.8.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2b5c57a3c1976c365678eb0730bcb140d40510990cb77df9a91bb5c41d587ba6", size = 2072622, upload-time = "2025-06-13T20:55:27.548Z" }, - { url = "https://files.pythonhosted.org/packages/8c/d6/5a433e8a58eeb5c5d46635cfe958d0605f598d87977d4560484e3662d438/libcst-1.8.2-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:0f23409add2aaebbb6d8e881babab43c2d979f051b8bd8aed5fe779ea180a4e8", size = 2402738, upload-time = "2025-06-13T20:55:29.539Z" }, - { url = "https://files.pythonhosted.org/packages/85/e4/0dd752c1880b570118fa91ac127589e6cf577ddcb2eef1aaf8b81ecc3f79/libcst-1.8.2-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:b88e9104c456590ad0ef0e82851d4fc03e9aa9d621fa8fdd4cd0907152a825ae", size = 2219932, upload-time = "2025-06-13T20:55:31.17Z" }, - { url = "https://files.pythonhosted.org/packages/42/bc/fceae243c6a329477ac6d4edb887bcaa2ae7a3686158d8d9b9abb3089c37/libcst-1.8.2-cp312-cp312-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e5ba3ea570c8fb6fc44f71aa329edc7c668e2909311913123d0d7ab8c65fc357", size = 2191891, upload-time = "2025-06-13T20:55:33.066Z" }, - { url = "https://files.pythonhosted.org/packages/7d/7d/eb341bdc11f1147e7edeccffd0f2f785eff014e72134f5e46067472012b0/libcst-1.8.2-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:460fcf3562f078781e1504983cb11909eb27a1d46eaa99e65c4b0fafdc298298", size = 2311927, upload-time = "2025-06-13T20:55:34.614Z" }, - { url = "https://files.pythonhosted.org/packages/d8/19/78bfc7aa5a542574d2ab0768210d084901dec5fc373103ca119905408cf2/libcst-1.8.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c1381ddbd1066d543e05d580c15beacf671e1469a0b2adb6dba58fec311f4eed", size = 2281098, upload-time = "2025-06-13T20:55:36.089Z" }, - { url = "https://files.pythonhosted.org/packages/83/37/a41788a72dc06ed3566606f7cf50349c9918cee846eeae45d1bac03d54c2/libcst-1.8.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a70e40ce7600e1b32e293bb9157e9de3b69170e2318ccb219102f1abb826c94a", size = 2387649, upload-time = "2025-06-13T20:55:37.797Z" }, - { url = "https://files.pythonhosted.org/packages/bb/df/7a49576c9fd55cdfd8bcfb725273aa4ee7dc41e87609f3451a4901d68057/libcst-1.8.2-cp312-cp312-win_amd64.whl", hash = "sha256:3ece08ba778b6eeea74d9c705e9af2d1b4e915e9bc6de67ad173b962e575fcc0", size = 2094574, upload-time = "2025-06-13T20:55:39.833Z" }, - { url = "https://files.pythonhosted.org/packages/29/60/27381e194d2af08bfd0fed090c905b2732907b69da48d97d86c056d70790/libcst-1.8.2-cp312-cp312-win_arm64.whl", hash = "sha256:5efd1bf6ee5840d1b0b82ec8e0b9c64f182fa5a7c8aad680fbd918c4fa3826e0", size = 1984568, upload-time = "2025-06-13T20:55:41.511Z" }, - { url = "https://files.pythonhosted.org/packages/11/9c/e3d4c7f1eb5c23907f905f84a4da271b60cd15b746ac794d42ea18bb105e/libcst-1.8.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:08e9dca4ab6f8551794ce7ec146f86def6a82da41750cbed2c07551345fa10d3", size = 2185848, upload-time = "2025-06-13T20:55:43.653Z" }, - { url = "https://files.pythonhosted.org/packages/59/e0/635cbb205d42fd296c01ab5cd1ba485b0aee92bffe061de587890c81f1bf/libcst-1.8.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8310521f2ccb79b5c4345750d475b88afa37bad930ab5554735f85ad5e3add30", size = 2072510, upload-time = "2025-06-13T20:55:45.287Z" }, - { url = "https://files.pythonhosted.org/packages/fe/45/8911cfe9413fd690a024a1ff2c8975f060dd721160178679d3f6a21f939e/libcst-1.8.2-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:da2d8b008aff72acd5a4a588491abdda1b446f17508e700f26df9be80d8442ae", size = 2403226, upload-time = "2025-06-13T20:55:46.927Z" }, - { url = "https://files.pythonhosted.org/packages/38/83/819d2b1b1fd870ad34ce4f34ec68704ca69bf48ef2d7665483115f267ec4/libcst-1.8.2-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:be821d874ce8b26cbadd7277fa251a9b37f6d2326f8b5682b6fc8966b50a3a59", size = 2220669, upload-time = "2025-06-13T20:55:48.597Z" }, - { url = "https://files.pythonhosted.org/packages/d4/2f/2c4742bf834f88a9803095915c4f41cafefb7b04bde66ea86f74668b4b7b/libcst-1.8.2-cp313-cp313-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:f74b0bc7378ad5afcf25ac9d0367b4dbba50f6f6468faa41f5dfddcf8bf9c0f8", size = 2191919, upload-time = "2025-06-13T20:55:50.092Z" }, - { url = "https://files.pythonhosted.org/packages/64/f4/107e13815f1ee5aad642d4eb4671c0273ee737f3832e3dbca9603b39f8d9/libcst-1.8.2-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:b68ea4a6018abfea1f68d50f74de7d399172684c264eb09809023e2c8696fc23", size = 2311965, upload-time = "2025-06-13T20:55:51.974Z" }, - { url = "https://files.pythonhosted.org/packages/03/63/2948b6e4be367ad375d273a8ad00df573029cffe5ac8f6c09398c250de5b/libcst-1.8.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2e264307ec49b2c72480422abafe80457f90b4e6e693b7ddf8a23d24b5c24001", size = 2281704, upload-time = "2025-06-13T20:55:54.036Z" }, - { url = "https://files.pythonhosted.org/packages/c8/d3/590cde9c8c386d5f4f05fdef3394c437ea51060478a5141ff4a1f289e747/libcst-1.8.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a5d5519962ce7c72d81888fb0c09e58e308ba4c376e76bcd853b48151063d6a8", size = 2387511, upload-time = "2025-06-13T20:55:55.538Z" }, - { url = "https://files.pythonhosted.org/packages/96/3d/ba5e36c663028043fc607dc33e5c390c7f73136fb15a890fb3710ee9d158/libcst-1.8.2-cp313-cp313-win_amd64.whl", hash = "sha256:b62aa11d6b74ed5545e58ac613d3f63095e5fd0254b3e0d1168fda991b9a6b41", size = 2094526, upload-time = "2025-06-13T20:55:57.486Z" }, - { url = "https://files.pythonhosted.org/packages/a5/34/530ca3b972dddad562f266c81190bea29376f8ba70054ea7b45b114504cd/libcst-1.8.2-cp313-cp313-win_arm64.whl", hash = "sha256:9c2bd4ac288a9cdb7ffc3229a9ce8027a66a3fd3f2ab9e13da60f5fbfe91f3b2", size = 1984627, upload-time = "2025-06-13T20:55:59.017Z" }, - { url = "https://files.pythonhosted.org/packages/19/9f/491f7b8d9d93444cd9bf711156ee1f122c38d25b903599e363d669acc8ab/libcst-1.8.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:08a8c7d9922ca6eed24e2c13a3c552b3c186af8fc78e5d4820b58487d780ec19", size = 2175415, upload-time = "2025-06-13T20:56:01.157Z" }, - { url = "https://files.pythonhosted.org/packages/2e/fe/4d13437f453f92687246aa7c5138e102ee5186fe96609ee4c598bb9f9ecb/libcst-1.8.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:bba7c2b5063e8ada5a5477f9fa0c01710645426b5a8628ec50d558542a0a292e", size = 2063719, upload-time = "2025-06-13T20:56:02.787Z" }, - { url = "https://files.pythonhosted.org/packages/94/59/758ae142c6607f275269021362b731e0f22ff5c9aa7cc67b0ed3a6bc930f/libcst-1.8.2-cp313-cp313t-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:d97c9fe13aacfbefded6861f5200dcb8e837da7391a9bdeb44ccb133705990af", size = 2380624, upload-time = "2025-06-13T20:56:04.909Z" }, - { url = "https://files.pythonhosted.org/packages/ac/c5/31d214a0bcb3523243a9b5643b597ff653d6ec9e1f3326cfcc16bcbf185d/libcst-1.8.2-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:d2194ae959630aae4176a4b75bd320b3274c20bef2a5ca6b8d6fc96d3c608edf", size = 2208801, upload-time = "2025-06-13T20:56:06.983Z" }, - { url = "https://files.pythonhosted.org/packages/70/16/a53f852322b266c63b492836a5c4968f192ee70fb52795a79feb4924e9ed/libcst-1.8.2-cp313-cp313t-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:0be639f5b2e1999a4b4a82a0f4633969f97336f052d0c131627983589af52f56", size = 2179557, upload-time = "2025-06-13T20:56:09.09Z" }, - { url = "https://files.pythonhosted.org/packages/fa/49/12a5664c73107187ba3af14869d3878fca1fd4c37f6fbb9adb943cb7a791/libcst-1.8.2-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:6753e50904e05c27915933da41518ecd7a8ca4dd3602112ba44920c6e353a455", size = 2302499, upload-time = "2025-06-13T20:56:10.751Z" }, - { url = "https://files.pythonhosted.org/packages/e9/46/2d62552a9346a040c045d6619b645d59bb707a586318121f099abd0cd5c4/libcst-1.8.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:706d07106af91c343150be86caeae1ea3851b74aa0730fcbbf8cd089e817f818", size = 2271070, upload-time = "2025-06-13T20:56:12.445Z" }, - { url = "https://files.pythonhosted.org/packages/af/67/b625fd6ae22575255aade0a24f45e1d430b7e7279729c9c51d4faac982d2/libcst-1.8.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dd4310ea8ddc49cc8872e083737cf806299b17f93159a1f354d59aa08993e876", size = 2380767, upload-time = "2025-06-13T20:56:13.995Z" }, - { url = "https://files.pythonhosted.org/packages/e6/84/fb88f2ffdb045ff7323a6c05dd3d243a9eb3cb3517a6269dee43fbfb9990/libcst-1.8.2-cp313-cp313t-win_amd64.whl", hash = "sha256:51bbafdd847529e8a16d1965814ed17831af61452ee31943c414cb23451de926", size = 2083403, upload-time = "2025-06-13T20:56:15.959Z" }, - { url = "https://files.pythonhosted.org/packages/d3/8f/da755d6d517eb8ec9664afae967b00a9b8dd567bbbb350e261359c1b47fc/libcst-1.8.2-cp313-cp313t-win_arm64.whl", hash = "sha256:4f14f5045766646ed9e8826b959c6d07194788babed1e0ba08c94ea4f39517e3", size = 1974355, upload-time = "2025-06-13T20:56:18.064Z" }, - { url = "https://files.pythonhosted.org/packages/2e/55/7c223ffc44fa623cc4c6c45e932d8e0724e31c8daede8a66d6a53ccd49a1/libcst-1.8.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:f69582e24667715e3860d80d663f1caeb2398110077e23cc0a1e0066a851f5ab", size = 2195291, upload-time = "2025-06-13T20:56:20.114Z" }, - { url = "https://files.pythonhosted.org/packages/77/3a/dced5455963238f1ebedd28cf48bfd5e5d84c847132846a2567f5beaf7fc/libcst-1.8.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1ba85f9e6a7f37ef998168aa3fd28d263d7f83016bd306a4508a2394e5e793b4", size = 2080544, upload-time = "2025-06-13T20:56:22.096Z" }, - { url = "https://files.pythonhosted.org/packages/da/ec/2bce80fb362961191e3ac67a38619780f9bd5203732ad95962458a3b71c0/libcst-1.8.2-cp39-cp39-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:43ccaa6c54daa1749cec53710c70d47150965574d4c6d4c4f2e3f87b9bf9f591", size = 2404396, upload-time = "2025-06-13T20:56:24.215Z" }, - { url = "https://files.pythonhosted.org/packages/6a/33/dd10a5ad783f3c1edc55fe97f5cbfe3924f6a7ce3556464538640a348e04/libcst-1.8.2-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:8a81d816c2088d2055112af5ecd82fdfbe8ff277600e94255e2639b07de10234", size = 2219446, upload-time = "2025-06-13T20:56:25.84Z" }, - { url = "https://files.pythonhosted.org/packages/dd/66/e7a208e5208bbd37b5be989e22b7abd117c40866b7880e7c447f4fb8ee46/libcst-1.8.2-cp39-cp39-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:449f9ff8a5025dcd5c8d4ad28f6c291de5de89e4c044b0bda96b45bef8999b75", size = 2189946, upload-time = "2025-06-13T20:56:27.472Z" }, - { url = "https://files.pythonhosted.org/packages/08/6f/5ef938f947e7cdd83bdffb6929697e7f27b0ae4a6f84a7f30e044690ba1c/libcst-1.8.2-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:36d5ab95f39f855521585b0e819dc2d4d1b2a4080bad04c2f3de1e387a5d2233", size = 2312416, upload-time = "2025-06-13T20:56:29.49Z" }, - { url = "https://files.pythonhosted.org/packages/04/5b/2f965ae65ef12bc0800a35c5668df3eda26437f6a8bcc0f5520b02f3c3a5/libcst-1.8.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:207575dec2dae722acf6ab39b4b361151c65f8f895fd37edf9d384f5541562e1", size = 2280429, upload-time = "2025-06-13T20:56:30.995Z" }, - { url = "https://files.pythonhosted.org/packages/35/1d/f67e6cb1146c0b546f095baf0d6ff6fa561bd61c1e1a5357e9557a16d501/libcst-1.8.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:52a1067cf31d9e9e4be514b253bea6276f1531dd7de6ab0917df8ce5b468a820", size = 2388615, upload-time = "2025-06-13T20:56:32.655Z" }, - { url = "https://files.pythonhosted.org/packages/b7/83/b4d659782e88f46c073ea5cbd9a4e99bf7ea17883632371795f91121b220/libcst-1.8.2-cp39-cp39-win_amd64.whl", hash = "sha256:59e8f611c977206eba294c296c2d29a1c1b1b88206cb97cd0d4847c1a3d923e7", size = 2093194, upload-time = "2025-06-13T20:56:34.348Z" }, - { url = "https://files.pythonhosted.org/packages/01/4a/3614b732cb25a3bba93ffde84b9e006007c687a9c84d22e64add56dee5fd/libcst-1.8.2-cp39-cp39-win_arm64.whl", hash = "sha256:ae22376633cfa3db21c4eed2870d1c36b5419289975a41a45f34a085b2d9e6ea", size = 1985259, upload-time = "2025-06-13T20:56:36.337Z" }, -] - [[package]] name = "lifelines" version = "0.30.0" @@ -2474,19 +2401,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/01/4d/23c4e4f09da849e127e9f123241946c23c1e30f45a88366879e064211815/mistune-3.1.3-py3-none-any.whl", hash = "sha256:1a32314113cff28aa6432e99e522677c8587fd83e3d51c29b82a52409c842bd9", size = 53410, upload-time = "2025-03-19T14:27:23.451Z" }, ] -[[package]] -name = "monkeytype" -version = "23.3.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "libcst" }, - { name = "mypy-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/de/66/7006d51ed537648107c28086f8c390030b4b4c5524b77598a3bbb657d3ec/MonkeyType-23.3.0.tar.gz", hash = "sha256:f2595db34d57cdddbde5a990117a50a22f373dbb917a2a0fa91ffbe07dfe0313", size = 34847, upload-time = "2023-03-20T14:08:01.69Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/0c/6d/de1fd4624ba300a98cc22f4db38f24bf89e660b6fc0be2740406347e5bca/MonkeyType-23.3.0-py3-none-any.whl", hash = "sha256:38ce8ad6568190f54c334b9fe835608af29b40a33ad448ecae749ae8790cdbf9", size = 40850, upload-time = "2023-03-20T14:07:58.815Z" }, -] - [[package]] name = "multidict" version = "6.6.3" @@ -2604,51 +2518,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d8/30/9aec301e9772b098c1f5c0ca0279237c9766d94b97802e9888010c64b0ed/multidict-6.6.3-py3-none-any.whl", hash = "sha256:8db10f29c7541fc5da4defd8cd697e1ca429db743fa716325f236079b96f775a", size = 12313, upload-time = "2025-06-30T15:53:45.437Z" }, ] -[[package]] -name = "mypy" -version = "1.16.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "mypy-extensions" }, - { name = "pathspec" }, - { name = "tomli", marker = "python_full_version < '3.11'" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/81/69/92c7fa98112e4d9eb075a239caa4ef4649ad7d441545ccffbd5e34607cbb/mypy-1.16.1.tar.gz", hash = "sha256:6bd00a0a2094841c5e47e7374bb42b83d64c527a502e3334e1173a0c24437bab", size = 3324747, upload-time = "2025-06-16T16:51:35.145Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8e/12/2bf23a80fcef5edb75de9a1e295d778e0f46ea89eb8b115818b663eff42b/mypy-1.16.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b4f0fed1022a63c6fec38f28b7fc77fca47fd490445c69d0a66266c59dd0b88a", size = 10958644, upload-time = "2025-06-16T16:51:11.649Z" }, - { url = "https://files.pythonhosted.org/packages/08/50/bfe47b3b278eacf348291742fd5e6613bbc4b3434b72ce9361896417cfe5/mypy-1.16.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:86042bbf9f5a05ea000d3203cf87aa9d0ccf9a01f73f71c58979eb9249f46d72", size = 10087033, upload-time = "2025-06-16T16:35:30.089Z" }, - { url = "https://files.pythonhosted.org/packages/21/de/40307c12fe25675a0776aaa2cdd2879cf30d99eec91b898de00228dc3ab5/mypy-1.16.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ea7469ee5902c95542bea7ee545f7006508c65c8c54b06dc2c92676ce526f3ea", size = 11875645, upload-time = "2025-06-16T16:35:48.49Z" }, - { url = "https://files.pythonhosted.org/packages/a6/d8/85bdb59e4a98b7a31495bd8f1a4445d8ffc86cde4ab1f8c11d247c11aedc/mypy-1.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:352025753ef6a83cb9e7f2427319bb7875d1fdda8439d1e23de12ab164179574", size = 12616986, upload-time = "2025-06-16T16:48:39.526Z" }, - { url = "https://files.pythonhosted.org/packages/0e/d0/bb25731158fa8f8ee9e068d3e94fcceb4971fedf1424248496292512afe9/mypy-1.16.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ff9fa5b16e4c1364eb89a4d16bcda9987f05d39604e1e6c35378a2987c1aac2d", size = 12878632, upload-time = "2025-06-16T16:36:08.195Z" }, - { url = "https://files.pythonhosted.org/packages/2d/11/822a9beb7a2b825c0cb06132ca0a5183f8327a5e23ef89717c9474ba0bc6/mypy-1.16.1-cp310-cp310-win_amd64.whl", hash = "sha256:1256688e284632382f8f3b9e2123df7d279f603c561f099758e66dd6ed4e8bd6", size = 9484391, upload-time = "2025-06-16T16:37:56.151Z" }, - { url = "https://files.pythonhosted.org/packages/9a/61/ec1245aa1c325cb7a6c0f8570a2eee3bfc40fa90d19b1267f8e50b5c8645/mypy-1.16.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:472e4e4c100062488ec643f6162dd0d5208e33e2f34544e1fc931372e806c0cc", size = 10890557, upload-time = "2025-06-16T16:37:21.421Z" }, - { url = "https://files.pythonhosted.org/packages/6b/bb/6eccc0ba0aa0c7a87df24e73f0ad34170514abd8162eb0c75fd7128171fb/mypy-1.16.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ea16e2a7d2714277e349e24d19a782a663a34ed60864006e8585db08f8ad1782", size = 10012921, upload-time = "2025-06-16T16:51:28.659Z" }, - { url = "https://files.pythonhosted.org/packages/5f/80/b337a12e2006715f99f529e732c5f6a8c143bb58c92bb142d5ab380963a5/mypy-1.16.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:08e850ea22adc4d8a4014651575567b0318ede51e8e9fe7a68f25391af699507", size = 11802887, upload-time = "2025-06-16T16:50:53.627Z" }, - { url = "https://files.pythonhosted.org/packages/d9/59/f7af072d09793d581a745a25737c7c0a945760036b16aeb620f658a017af/mypy-1.16.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:22d76a63a42619bfb90122889b903519149879ddbf2ba4251834727944c8baca", size = 12531658, upload-time = "2025-06-16T16:33:55.002Z" }, - { url = "https://files.pythonhosted.org/packages/82/c4/607672f2d6c0254b94a646cfc45ad589dd71b04aa1f3d642b840f7cce06c/mypy-1.16.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:2c7ce0662b6b9dc8f4ed86eb7a5d505ee3298c04b40ec13b30e572c0e5ae17c4", size = 12732486, upload-time = "2025-06-16T16:37:03.301Z" }, - { url = "https://files.pythonhosted.org/packages/b6/5e/136555ec1d80df877a707cebf9081bd3a9f397dedc1ab9750518d87489ec/mypy-1.16.1-cp311-cp311-win_amd64.whl", hash = "sha256:211287e98e05352a2e1d4e8759c5490925a7c784ddc84207f4714822f8cf99b6", size = 9479482, upload-time = "2025-06-16T16:47:37.48Z" }, - { url = "https://files.pythonhosted.org/packages/b4/d6/39482e5fcc724c15bf6280ff5806548c7185e0c090712a3736ed4d07e8b7/mypy-1.16.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:af4792433f09575d9eeca5c63d7d90ca4aeceda9d8355e136f80f8967639183d", size = 11066493, upload-time = "2025-06-16T16:47:01.683Z" }, - { url = "https://files.pythonhosted.org/packages/e6/e5/26c347890efc6b757f4d5bb83f4a0cf5958b8cf49c938ac99b8b72b420a6/mypy-1.16.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:66df38405fd8466ce3517eda1f6640611a0b8e70895e2a9462d1d4323c5eb4b9", size = 10081687, upload-time = "2025-06-16T16:48:19.367Z" }, - { url = "https://files.pythonhosted.org/packages/44/c7/b5cb264c97b86914487d6a24bd8688c0172e37ec0f43e93b9691cae9468b/mypy-1.16.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:44e7acddb3c48bd2713994d098729494117803616e116032af192871aed80b79", size = 11839723, upload-time = "2025-06-16T16:49:20.912Z" }, - { url = "https://files.pythonhosted.org/packages/15/f8/491997a9b8a554204f834ed4816bda813aefda31cf873bb099deee3c9a99/mypy-1.16.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0ab5eca37b50188163fa7c1b73c685ac66c4e9bdee4a85c9adac0e91d8895e15", size = 12722980, upload-time = "2025-06-16T16:37:40.929Z" }, - { url = "https://files.pythonhosted.org/packages/df/f0/2bd41e174b5fd93bc9de9a28e4fb673113633b8a7f3a607fa4a73595e468/mypy-1.16.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dedb6229b2c9086247e21a83c309754b9058b438704ad2f6807f0d8227f6ebdd", size = 12903328, upload-time = "2025-06-16T16:34:35.099Z" }, - { url = "https://files.pythonhosted.org/packages/61/81/5572108a7bec2c46b8aff7e9b524f371fe6ab5efb534d38d6b37b5490da8/mypy-1.16.1-cp312-cp312-win_amd64.whl", hash = "sha256:1f0435cf920e287ff68af3d10a118a73f212deb2ce087619eb4e648116d1fe9b", size = 9562321, upload-time = "2025-06-16T16:48:58.823Z" }, - { url = "https://files.pythonhosted.org/packages/28/e3/96964af4a75a949e67df4b95318fe2b7427ac8189bbc3ef28f92a1c5bc56/mypy-1.16.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ddc91eb318c8751c69ddb200a5937f1232ee8efb4e64e9f4bc475a33719de438", size = 11063480, upload-time = "2025-06-16T16:47:56.205Z" }, - { url = "https://files.pythonhosted.org/packages/f5/4d/cd1a42b8e5be278fab7010fb289d9307a63e07153f0ae1510a3d7b703193/mypy-1.16.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:87ff2c13d58bdc4bbe7dc0dedfe622c0f04e2cb2a492269f3b418df2de05c536", size = 10090538, upload-time = "2025-06-16T16:46:43.92Z" }, - { url = "https://files.pythonhosted.org/packages/c9/4f/c3c6b4b66374b5f68bab07c8cabd63a049ff69796b844bc759a0ca99bb2a/mypy-1.16.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0a7cfb0fe29fe5a9841b7c8ee6dffb52382c45acdf68f032145b75620acfbd6f", size = 11836839, upload-time = "2025-06-16T16:36:28.039Z" }, - { url = "https://files.pythonhosted.org/packages/b4/7e/81ca3b074021ad9775e5cb97ebe0089c0f13684b066a750b7dc208438403/mypy-1.16.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:051e1677689c9d9578b9c7f4d206d763f9bbd95723cd1416fad50db49d52f359", size = 12715634, upload-time = "2025-06-16T16:50:34.441Z" }, - { url = "https://files.pythonhosted.org/packages/e9/95/bdd40c8be346fa4c70edb4081d727a54d0a05382d84966869738cfa8a497/mypy-1.16.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d5d2309511cc56c021b4b4e462907c2b12f669b2dbeb68300110ec27723971be", size = 12895584, upload-time = "2025-06-16T16:34:54.857Z" }, - { url = "https://files.pythonhosted.org/packages/5a/fd/d486a0827a1c597b3b48b1bdef47228a6e9ee8102ab8c28f944cb83b65dc/mypy-1.16.1-cp313-cp313-win_amd64.whl", hash = "sha256:4f58ac32771341e38a853c5d0ec0dfe27e18e27da9cdb8bbc882d2249c71a3ee", size = 9573886, upload-time = "2025-06-16T16:36:43.589Z" }, - { url = "https://files.pythonhosted.org/packages/49/5e/ed1e6a7344005df11dfd58b0fdd59ce939a0ba9f7ed37754bf20670b74db/mypy-1.16.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7fc688329af6a287567f45cc1cefb9db662defeb14625213a5b7da6e692e2069", size = 10959511, upload-time = "2025-06-16T16:47:21.945Z" }, - { url = "https://files.pythonhosted.org/packages/30/88/a7cbc2541e91fe04f43d9e4577264b260fecedb9bccb64ffb1a34b7e6c22/mypy-1.16.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5e198ab3f55924c03ead626ff424cad1732d0d391478dfbf7bb97b34602395da", size = 10075555, upload-time = "2025-06-16T16:50:14.084Z" }, - { url = "https://files.pythonhosted.org/packages/93/f7/c62b1e31a32fbd1546cca5e0a2e5f181be5761265ad1f2e94f2a306fa906/mypy-1.16.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:09aa4f91ada245f0a45dbc47e548fd94e0dd5a8433e0114917dc3b526912a30c", size = 11874169, upload-time = "2025-06-16T16:49:42.276Z" }, - { url = "https://files.pythonhosted.org/packages/c8/15/db580a28034657fb6cb87af2f8996435a5b19d429ea4dcd6e1c73d418e60/mypy-1.16.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:13c7cd5b1cb2909aa318a90fd1b7e31f17c50b242953e7dd58345b2a814f6383", size = 12610060, upload-time = "2025-06-16T16:34:15.215Z" }, - { url = "https://files.pythonhosted.org/packages/ec/78/c17f48f6843048fa92d1489d3095e99324f2a8c420f831a04ccc454e2e51/mypy-1.16.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:58e07fb958bc5d752a280da0e890c538f1515b79a65757bbdc54252ba82e0b40", size = 12875199, upload-time = "2025-06-16T16:35:14.448Z" }, - { url = "https://files.pythonhosted.org/packages/bc/d6/ed42167d0a42680381653fd251d877382351e1bd2c6dd8a818764be3beb1/mypy-1.16.1-cp39-cp39-win_amd64.whl", hash = "sha256:f895078594d918f93337a505f8add9bd654d1a24962b4c6ed9390e12531eb31b", size = 9487033, upload-time = "2025-06-16T16:49:57.907Z" }, - { url = "https://files.pythonhosted.org/packages/cf/d3/53e684e78e07c1a2bf7105715e5edd09ce951fc3f47cf9ed095ec1b7a037/mypy-1.16.1-py3-none-any.whl", hash = "sha256:5fc2ac4027d0ef28d6ba69a0343737a23c4d1b83672bf38d1fe237bdc0643b37", size = 2265923, upload-time = "2025-06-16T16:48:02.366Z" }, -] - [[package]] name = "mypy-extensions" version = "1.1.0" @@ -3871,30 +3740,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/19/87/5124b1c1f2412bb95c59ec481eaf936cd32f0fe2a7b16b97b81c4c017a6a/PyYAML-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8", size = 162312, upload-time = "2024-08-06T20:33:49.073Z" }, ] -[[package]] -name = "pyyaml-ft" -version = "8.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/5e/eb/5a0d575de784f9a1f94e2b1288c6886f13f34185e13117ed530f32b6f8a8/pyyaml_ft-8.0.0.tar.gz", hash = "sha256:0c947dce03954c7b5d38869ed4878b2e6ff1d44b08a0d84dc83fdad205ae39ab", size = 141057, upload-time = "2025-06-10T15:32:15.613Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/68/ba/a067369fe61a2e57fb38732562927d5bae088c73cb9bb5438736a9555b29/pyyaml_ft-8.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8c1306282bc958bfda31237f900eb52c9bedf9b93a11f82e1aab004c9a5657a6", size = 187027, upload-time = "2025-06-10T15:31:48.722Z" }, - { url = "https://files.pythonhosted.org/packages/ad/c5/a3d2020ce5ccfc6aede0d45bcb870298652ac0cf199f67714d250e0cdf39/pyyaml_ft-8.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:30c5f1751625786c19de751e3130fc345ebcba6a86f6bddd6e1285342f4bbb69", size = 176146, upload-time = "2025-06-10T15:31:50.584Z" }, - { url = "https://files.pythonhosted.org/packages/e3/bb/23a9739291086ca0d3189eac7cd92b4d00e9fdc77d722ab610c35f9a82ba/pyyaml_ft-8.0.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3fa992481155ddda2e303fcc74c79c05eddcdbc907b888d3d9ce3ff3e2adcfb0", size = 746792, upload-time = "2025-06-10T15:31:52.304Z" }, - { url = "https://files.pythonhosted.org/packages/5f/c2/e8825f4ff725b7e560d62a3609e31d735318068e1079539ebfde397ea03e/pyyaml_ft-8.0.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cec6c92b4207004b62dfad1f0be321c9f04725e0f271c16247d8b39c3bf3ea42", size = 786772, upload-time = "2025-06-10T15:31:54.712Z" }, - { url = "https://files.pythonhosted.org/packages/35/be/58a4dcae8854f2fdca9b28d9495298fd5571a50d8430b1c3033ec95d2d0e/pyyaml_ft-8.0.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06237267dbcab70d4c0e9436d8f719f04a51123f0ca2694c00dd4b68c338e40b", size = 778723, upload-time = "2025-06-10T15:31:56.093Z" }, - { url = "https://files.pythonhosted.org/packages/86/ed/fed0da92b5d5d7340a082e3802d84c6dc9d5fa142954404c41a544c1cb92/pyyaml_ft-8.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8a7f332bc565817644cdb38ffe4739e44c3e18c55793f75dddb87630f03fc254", size = 758478, upload-time = "2025-06-10T15:31:58.314Z" }, - { url = "https://files.pythonhosted.org/packages/f0/69/ac02afe286275980ecb2dcdc0156617389b7e0c0a3fcdedf155c67be2b80/pyyaml_ft-8.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7d10175a746be65f6feb86224df5d6bc5c049ebf52b89a88cf1cd78af5a367a8", size = 799159, upload-time = "2025-06-10T15:31:59.675Z" }, - { url = "https://files.pythonhosted.org/packages/4e/ac/c492a9da2e39abdff4c3094ec54acac9747743f36428281fb186a03fab76/pyyaml_ft-8.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:58e1015098cf8d8aec82f360789c16283b88ca670fe4275ef6c48c5e30b22a96", size = 158779, upload-time = "2025-06-10T15:32:01.029Z" }, - { url = "https://files.pythonhosted.org/packages/5d/9b/41998df3298960d7c67653669f37710fa2d568a5fc933ea24a6df60acaf6/pyyaml_ft-8.0.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:e64fa5f3e2ceb790d50602b2fd4ec37abbd760a8c778e46354df647e7c5a4ebb", size = 191331, upload-time = "2025-06-10T15:32:02.602Z" }, - { url = "https://files.pythonhosted.org/packages/0f/16/2710c252ee04cbd74d9562ebba709e5a284faeb8ada88fcda548c9191b47/pyyaml_ft-8.0.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:8d445bf6ea16bb93c37b42fdacfb2f94c8e92a79ba9e12768c96ecde867046d1", size = 182879, upload-time = "2025-06-10T15:32:04.466Z" }, - { url = "https://files.pythonhosted.org/packages/9a/40/ae8163519d937fa7bfa457b6f78439cc6831a7c2b170e4f612f7eda71815/pyyaml_ft-8.0.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c56bb46b4fda34cbb92a9446a841da3982cdde6ea13de3fbd80db7eeeab8b49", size = 811277, upload-time = "2025-06-10T15:32:06.214Z" }, - { url = "https://files.pythonhosted.org/packages/f9/66/28d82dbff7f87b96f0eeac79b7d972a96b4980c1e445eb6a857ba91eda00/pyyaml_ft-8.0.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dab0abb46eb1780da486f022dce034b952c8ae40753627b27a626d803926483b", size = 831650, upload-time = "2025-06-10T15:32:08.076Z" }, - { url = "https://files.pythonhosted.org/packages/e8/df/161c4566facac7d75a9e182295c223060373d4116dead9cc53a265de60b9/pyyaml_ft-8.0.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bd48d639cab5ca50ad957b6dd632c7dd3ac02a1abe0e8196a3c24a52f5db3f7a", size = 815755, upload-time = "2025-06-10T15:32:09.435Z" }, - { url = "https://files.pythonhosted.org/packages/05/10/f42c48fa5153204f42eaa945e8d1fd7c10d6296841dcb2447bf7da1be5c4/pyyaml_ft-8.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:052561b89d5b2a8e1289f326d060e794c21fa068aa11255fe71d65baf18a632e", size = 810403, upload-time = "2025-06-10T15:32:11.051Z" }, - { url = "https://files.pythonhosted.org/packages/d5/d2/e369064aa51009eb9245399fd8ad2c562bd0bcd392a00be44b2a824ded7c/pyyaml_ft-8.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:3bb4b927929b0cb162fb1605392a321e3333e48ce616cdcfa04a839271373255", size = 835581, upload-time = "2025-06-10T15:32:12.897Z" }, - { url = "https://files.pythonhosted.org/packages/c0/28/26534bed77109632a956977f60d8519049f545abc39215d086e33a61f1f2/pyyaml_ft-8.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:de04cfe9439565e32f178106c51dd6ca61afaa2907d143835d501d84703d3793", size = 171579, upload-time = "2025-06-10T15:32:14.34Z" }, -] - [[package]] name = "pyzmq" version = "26.4.0" @@ -4226,8 +4071,6 @@ dev = [ { name = "jupyter" }, { name = "lifelines" }, { name = "marimo" }, - { name = "monkeytype" }, - { name = "mypy" }, { name = "myst-nb", marker = "python_full_version < '4'" }, { name = "plotly" }, { name = "pre-commit" }, @@ -4237,6 +4080,7 @@ dev = [ { name = "ruff" }, { name = "sphinx-autoapi" }, { name = "sphinx-rtd-theme" }, + { name = "ty" }, { name = "uv" }, ] @@ -4264,8 +4108,6 @@ dev = [ { name = "jupyter", specifier = ">=1.0.0,<2.0.0" }, { name = "lifelines", specifier = ">=0.30.0" }, { name = "marimo", specifier = ">=0.14.7" }, - { name = "monkeytype", specifier = ">=23.3.0" }, - { name = "mypy", specifier = ">=1.2.0,<2.0.0" }, { name = "myst-nb", marker = "python_full_version >= '3.9' and python_full_version < '4'", specifier = ">=0.17.1,<1.0.0" }, { name = "plotly", specifier = ">=5.24.1" }, { name = "pre-commit", specifier = ">=4.2.0" }, @@ -4275,6 +4117,7 @@ dev = [ { name = "ruff", specifier = ">=0.11.0" }, { name = "sphinx-autoapi", specifier = ">=2.1.0,<3.0.0" }, { name = "sphinx-rtd-theme", specifier = ">=1.2.0,<2.0.0" }, + { name = "ty", specifier = ">=0.0.1a12" }, { name = "uv", specifier = ">=0.6.11" }, ] From 2b0484bfb909421aebb7c3d5b923b0f94f619649 Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Wed, 16 Jul 2025 20:59:42 +0300 Subject: [PATCH 09/51] chore: remove unecessary code --- .../helpers/sandbox_observable_helpers.py | 109 ------------------ src/rtichoke/summary_report/summary_report.py | 4 +- 2 files changed, 2 insertions(+), 111 deletions(-) diff --git a/src/rtichoke/helpers/sandbox_observable_helpers.py b/src/rtichoke/helpers/sandbox_observable_helpers.py index cd6f9e4..0e85693 100644 --- a/src/rtichoke/helpers/sandbox_observable_helpers.py +++ b/src/rtichoke/helpers/sandbox_observable_helpers.py @@ -96,29 +96,6 @@ def extract_aj_estimate(data_to_adjust, fixed_time_horizons): return result_df -def extract_crude_estimate(data_to_adjust: pd.DataFrame) -> pd.DataFrame: - df = safe_pl_from_pandas(data_to_adjust) - - crude_estimate = df.group_by(["strata", "reals", "fixed_time_horizon"]).agg( - pl.count().alias("reals_estimate") - ) - - unique_strata = df.select("strata").unique().to_series().to_list() - unique_reals = df.select("reals").unique().to_series().to_list() - unique_horizons = df.select("fixed_time_horizon").unique().to_series().to_list() - - all_combinations = pl.DataFrame( - itertools.product(unique_strata, unique_reals, unique_horizons), - schema=["strata", "reals", "fixed_time_horizon"], - ) - - final = all_combinations.join( - crude_estimate, on=["strata", "reals", "fixed_time_horizon"], how="left" - ).fill_null(0) - - return final.to_pandas() - - def add_cutoff_strata(data: pl.DataFrame, by: float, stratified_by) -> pl.DataFrame: def transform_group(group: pl.DataFrame) -> pl.DataFrame: probs = group["probs"].to_numpy() @@ -467,34 +444,6 @@ def pivot_longer_strata(data: pl.DataFrame) -> pl.DataFrame: return data_long -def update_administrative_censoring(data_to_adjust: pd.DataFrame) -> pd.DataFrame: - data_to_adjust = data_to_adjust.copy() - data_to_adjust["reals"] = data_to_adjust["reals"].astype(str) - - pl_data = safe_pl_from_pandas(data_to_adjust) - - # Define logic in Python and map it row-wise (this avoids any column reference issues) - def adjust(row): - t = row["times"] - h = row["fixed_time_horizon"] - r = row["reals"] - if t > h and r == "real_positives": - return "real_negatives" - if t < h and r == "real_negatives": - return "real_censored" - return r - - pl_data = pl_data.with_columns( - [ - pl.struct(["times", "fixed_time_horizon", "reals"]) - .map_elements(adjust) - .alias("reals") - ] - ) - - return pl_data.to_pandas() - - def map_reals_to_labels_polars(data: pl.DataFrame) -> pl.DataFrame: return data.with_columns( [ @@ -930,22 +879,6 @@ def extract_aj_estimate_for_strata(data_to_adjust, horizons): ) -def assign_and_explode(data: pd.DataFrame, fixed_time_horizons) -> pd.DataFrame: - # Ensure list type - if not isinstance(fixed_time_horizons, list): - fixed_time_horizons = [fixed_time_horizons] - - # Convert safely to Polars - df = safe_pl_from_pandas(data) - - # Add the repeated list to each row, then explode - df = df.with_columns( - pl.Series("fixed_time_horizon", [fixed_time_horizons] * df.height) - ).explode("fixed_time_horizon") - - return df.to_pandas() - - def assign_and_explode_polars( data: pl.DataFrame, fixed_time_horizons: list[float] ) -> pl.DataFrame: @@ -1103,23 +1036,6 @@ def create_list_data_to_adjust(probs_dict, reals_dict, times_dict, stratified_by return list_data_to_adjust -def safe_pl_from_pandas(df: pd.DataFrame) -> pl.DataFrame: - df = df.copy() - for col in df.select_dtypes(include="category").columns: - df[col] = df[col].astype(str) - for col in df.columns: - if df[col].dtype == "object": - try: - if any( - isinstance(val, pd._libs.interval.Interval) - for val in df[col].dropna() - ): - df[col] = df[col].astype(str) - except Exception: - df[col] = df[col].astype(str) - return pl.from_pandas(df) - - def ensure_no_categorical(df: pd.DataFrame) -> pd.DataFrame: df = df.copy() for col in df.select_dtypes(include="category").columns: @@ -1127,31 +1043,6 @@ def ensure_no_categorical(df: pd.DataFrame) -> pd.DataFrame: return df -def ensure_arrow_safe(df: pd.DataFrame) -> pd.DataFrame: - df = df.copy() - - # Convert all category columns to string - for col in df.select_dtypes(include="category").columns: - df[col] = df[col].astype(str) - - # Convert Interval and other Arrow-unsafe objects to string - for col in df.columns: - if df[col].dtype == "object": - try: - # Try to catch Interval or any other problematic type - if any( - isinstance(val, pd._libs.interval.Interval) - for val in df[col].dropna() - ): - df[col] = df[col].astype(str) - except Exception: - df[col] = df[col].astype( - str - ) # fallback: convert whole column to string - - return df - - def extract_aj_estimate_by_assumptions( df: pl.DataFrame, assumption_sets: list[dict], diff --git a/src/rtichoke/summary_report/summary_report.py b/src/rtichoke/summary_report/summary_report.py index 8e20dc8..9fc6fd3 100644 --- a/src/rtichoke/summary_report/summary_report.py +++ b/src/rtichoke/summary_report/summary_report.py @@ -4,7 +4,7 @@ from rtichoke.helpers.send_post_request_to_r_rtichoke import send_requests_to_rtichoke_r from rtichoke.helpers.sandbox_observable_helpers import ( - create_list_data_to_adjust_polars, + create_list_data_to_adjust, ) import subprocess @@ -67,7 +67,7 @@ def create_data_for_summary_report(probs, reals, times, fixed_time_horizons): stratified_by = ["probability_threshold", "ppcr"] by = 0.1 - list_data_to_adjust_polars = create_list_data_to_adjust_polars( + list_data_to_adjust_polars = create_list_data_to_adjust( probs, reals, times, stratified_by=stratified_by, by=by ) From 667f6c761ac6a21ea5aab17691b724299e1ce590 Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Thu, 17 Jul 2025 05:55:29 +0300 Subject: [PATCH 10/51] chore: change dependency-groups --- pyproject.toml | 3 +- uv.lock | 117 +++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 114 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 51ca413..53db99f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,8 +28,6 @@ readme = "README.md" dev = [ "jupyter<2.0.0,>=1.0.0", "myst-nb<1.0.0,>=0.17.1; python_version ~= \"3.9\"", - "sphinx-autoapi<3.0.0,>=2.1.0", - "sphinx-rtd-theme<2.0.0,>=1.2.0", "pytest-cov<5.0.0,>=4.0.0", "pytest<8.0.0,>=7.3.0", "pyzmq<27.0.0,>=26.3.0", @@ -42,6 +40,7 @@ dev = [ "dcurves>=1.1.5", "plotly>=5.24.1", "ty>=0.0.1a12", + "scikit-learn>=1.6.1", ] [tool.uv.workspace] diff --git a/uv.lock b/uv.lock index cf8ca01..860ebc9 100644 --- a/uv.lock +++ b/uv.lock @@ -1497,6 +1497,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" }, ] +[[package]] +name = "joblib" +version = "1.5.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/dc/fe/0f5a938c54105553436dbff7a61dc4fed4b1b2c98852f8833beaf4d5968f/joblib-1.5.1.tar.gz", hash = "sha256:f4f86e351f39fe3d0d32a9f2c3d8af1ee4cec285aafcb27003dda5205576b444", size = 330475, upload-time = "2025-05-23T12:04:37.097Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7d/4f/1195bbac8e0c2acc5f740661631d8d750dc38d4a32b23ee5df3cde6f4e0d/joblib-1.5.1-py3-none-any.whl", hash = "sha256:4719a31f054c7d766948dcd83e9613686b27114f190f717cec7eaa2084f8a74a", size = 307746, upload-time = "2025-05-23T12:04:35.124Z" }, +] + [[package]] name = "json5" version = "0.12.0" @@ -4078,8 +4087,8 @@ dev = [ { name = "pytest-cov" }, { name = "pyzmq" }, { name = "ruff" }, - { name = "sphinx-autoapi" }, - { name = "sphinx-rtd-theme" }, + { name = "scikit-learn", version = "1.6.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, + { name = "scikit-learn", version = "1.7.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, { name = "ty" }, { name = "uv" }, ] @@ -4115,8 +4124,7 @@ dev = [ { name = "pytest-cov", specifier = ">=4.0.0,<5.0.0" }, { name = "pyzmq", specifier = ">=26.3.0,<27.0.0" }, { name = "ruff", specifier = ">=0.11.0" }, - { name = "sphinx-autoapi", specifier = ">=2.1.0,<3.0.0" }, - { name = "sphinx-rtd-theme", specifier = ">=1.2.0,<2.0.0" }, + { name = "scikit-learn", specifier = ">=1.6.1" }, { name = "ty", specifier = ">=0.0.1a12" }, { name = "uv", specifier = ">=0.6.11" }, ] @@ -4146,6 +4154,98 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d0/33/4d3e79e4a84533d6cd526bfb42c020a23256ae5e4265d858bd1287831f7d/ruff-0.12.0-py3-none-win_arm64.whl", hash = "sha256:8cd24580405ad8c1cc64d61725bca091d6b6da7eb3d36f72cc605467069d7e8b", size = 10724946, upload-time = "2025-06-17T15:19:23.952Z" }, ] +[[package]] +name = "scikit-learn" +version = "1.6.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.10'", +] +dependencies = [ + { name = "joblib", marker = "python_full_version < '3.10'" }, + { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, + { name = "scipy", version = "1.13.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, + { name = "threadpoolctl", marker = "python_full_version < '3.10'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9e/a5/4ae3b3a0755f7b35a280ac90b28817d1f380318973cff14075ab41ef50d9/scikit_learn-1.6.1.tar.gz", hash = "sha256:b4fc2525eca2c69a59260f583c56a7557c6ccdf8deafdba6e060f94c1c59738e", size = 7068312, upload-time = "2025-01-10T08:07:55.348Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2e/3a/f4597eb41049110b21ebcbb0bcb43e4035017545daa5eedcfeb45c08b9c5/scikit_learn-1.6.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d056391530ccd1e501056160e3c9673b4da4805eb67eb2bdf4e983e1f9c9204e", size = 12067702, upload-time = "2025-01-10T08:05:56.515Z" }, + { url = "https://files.pythonhosted.org/packages/37/19/0423e5e1fd1c6ec5be2352ba05a537a473c1677f8188b9306097d684b327/scikit_learn-1.6.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:0c8d036eb937dbb568c6242fa598d551d88fb4399c0344d95c001980ec1c7d36", size = 11112765, upload-time = "2025-01-10T08:06:00.272Z" }, + { url = "https://files.pythonhosted.org/packages/70/95/d5cb2297a835b0f5fc9a77042b0a2d029866379091ab8b3f52cc62277808/scikit_learn-1.6.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8634c4bd21a2a813e0a7e3900464e6d593162a29dd35d25bdf0103b3fce60ed5", size = 12643991, upload-time = "2025-01-10T08:06:04.813Z" }, + { url = "https://files.pythonhosted.org/packages/b7/91/ab3c697188f224d658969f678be86b0968ccc52774c8ab4a86a07be13c25/scikit_learn-1.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:775da975a471c4f6f467725dff0ced5c7ac7bda5e9316b260225b48475279a1b", size = 13497182, upload-time = "2025-01-10T08:06:08.42Z" }, + { url = "https://files.pythonhosted.org/packages/17/04/d5d556b6c88886c092cc989433b2bab62488e0f0dafe616a1d5c9cb0efb1/scikit_learn-1.6.1-cp310-cp310-win_amd64.whl", hash = "sha256:8a600c31592bd7dab31e1c61b9bbd6dea1b3433e67d264d17ce1017dbdce8002", size = 11125517, upload-time = "2025-01-10T08:06:12.783Z" }, + { url = "https://files.pythonhosted.org/packages/6c/2a/e291c29670795406a824567d1dfc91db7b699799a002fdaa452bceea8f6e/scikit_learn-1.6.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:72abc587c75234935e97d09aa4913a82f7b03ee0b74111dcc2881cba3c5a7b33", size = 12102620, upload-time = "2025-01-10T08:06:16.675Z" }, + { url = "https://files.pythonhosted.org/packages/25/92/ee1d7a00bb6b8c55755d4984fd82608603a3cc59959245068ce32e7fb808/scikit_learn-1.6.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:b3b00cdc8f1317b5f33191df1386c0befd16625f49d979fe77a8d44cae82410d", size = 11116234, upload-time = "2025-01-10T08:06:21.83Z" }, + { url = "https://files.pythonhosted.org/packages/30/cd/ed4399485ef364bb25f388ab438e3724e60dc218c547a407b6e90ccccaef/scikit_learn-1.6.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc4765af3386811c3ca21638f63b9cf5ecf66261cc4815c1db3f1e7dc7b79db2", size = 12592155, upload-time = "2025-01-10T08:06:27.309Z" }, + { url = "https://files.pythonhosted.org/packages/a8/f3/62fc9a5a659bb58a03cdd7e258956a5824bdc9b4bb3c5d932f55880be569/scikit_learn-1.6.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:25fc636bdaf1cc2f4a124a116312d837148b5e10872147bdaf4887926b8c03d8", size = 13497069, upload-time = "2025-01-10T08:06:32.515Z" }, + { url = "https://files.pythonhosted.org/packages/a1/a6/c5b78606743a1f28eae8f11973de6613a5ee87366796583fb74c67d54939/scikit_learn-1.6.1-cp311-cp311-win_amd64.whl", hash = "sha256:fa909b1a36e000a03c382aade0bd2063fd5680ff8b8e501660c0f59f021a6415", size = 11139809, upload-time = "2025-01-10T08:06:35.514Z" }, + { url = "https://files.pythonhosted.org/packages/0a/18/c797c9b8c10380d05616db3bfb48e2a3358c767affd0857d56c2eb501caa/scikit_learn-1.6.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:926f207c804104677af4857b2c609940b743d04c4c35ce0ddc8ff4f053cddc1b", size = 12104516, upload-time = "2025-01-10T08:06:40.009Z" }, + { url = "https://files.pythonhosted.org/packages/c4/b7/2e35f8e289ab70108f8cbb2e7a2208f0575dc704749721286519dcf35f6f/scikit_learn-1.6.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:2c2cae262064e6a9b77eee1c8e768fc46aa0b8338c6a8297b9b6759720ec0ff2", size = 11167837, upload-time = "2025-01-10T08:06:43.305Z" }, + { url = "https://files.pythonhosted.org/packages/a4/f6/ff7beaeb644bcad72bcfd5a03ff36d32ee4e53a8b29a639f11bcb65d06cd/scikit_learn-1.6.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1061b7c028a8663fb9a1a1baf9317b64a257fcb036dae5c8752b2abef31d136f", size = 12253728, upload-time = "2025-01-10T08:06:47.618Z" }, + { url = "https://files.pythonhosted.org/packages/29/7a/8bce8968883e9465de20be15542f4c7e221952441727c4dad24d534c6d99/scikit_learn-1.6.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e69fab4ebfc9c9b580a7a80111b43d214ab06250f8a7ef590a4edf72464dd86", size = 13147700, upload-time = "2025-01-10T08:06:50.888Z" }, + { url = "https://files.pythonhosted.org/packages/62/27/585859e72e117fe861c2079bcba35591a84f801e21bc1ab85bce6ce60305/scikit_learn-1.6.1-cp312-cp312-win_amd64.whl", hash = "sha256:70b1d7e85b1c96383f872a519b3375f92f14731e279a7b4c6cfd650cf5dffc52", size = 11110613, upload-time = "2025-01-10T08:06:54.115Z" }, + { url = "https://files.pythonhosted.org/packages/2e/59/8eb1872ca87009bdcdb7f3cdc679ad557b992c12f4b61f9250659e592c63/scikit_learn-1.6.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2ffa1e9e25b3d93990e74a4be2c2fc61ee5af85811562f1288d5d055880c4322", size = 12010001, upload-time = "2025-01-10T08:06:58.613Z" }, + { url = "https://files.pythonhosted.org/packages/9d/05/f2fc4effc5b32e525408524c982c468c29d22f828834f0625c5ef3d601be/scikit_learn-1.6.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:dc5cf3d68c5a20ad6d571584c0750ec641cc46aeef1c1507be51300e6003a7e1", size = 11096360, upload-time = "2025-01-10T08:07:01.556Z" }, + { url = "https://files.pythonhosted.org/packages/c8/e4/4195d52cf4f113573fb8ebc44ed5a81bd511a92c0228889125fac2f4c3d1/scikit_learn-1.6.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c06beb2e839ecc641366000ca84f3cf6fa9faa1777e29cf0c04be6e4d096a348", size = 12209004, upload-time = "2025-01-10T08:07:06.931Z" }, + { url = "https://files.pythonhosted.org/packages/94/be/47e16cdd1e7fcf97d95b3cb08bde1abb13e627861af427a3651fcb80b517/scikit_learn-1.6.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8ca8cb270fee8f1f76fa9bfd5c3507d60c6438bbee5687f81042e2bb98e5a97", size = 13171776, upload-time = "2025-01-10T08:07:11.715Z" }, + { url = "https://files.pythonhosted.org/packages/34/b0/ca92b90859070a1487827dbc672f998da95ce83edce1270fc23f96f1f61a/scikit_learn-1.6.1-cp313-cp313-win_amd64.whl", hash = "sha256:7a1c43c8ec9fde528d664d947dc4c0789be4077a3647f232869f41d9bf50e0fb", size = 11071865, upload-time = "2025-01-10T08:07:16.088Z" }, + { url = "https://files.pythonhosted.org/packages/12/ae/993b0fb24a356e71e9a894e42b8a9eec528d4c70217353a1cd7a48bc25d4/scikit_learn-1.6.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a17c1dea1d56dcda2fac315712f3651a1fea86565b64b48fa1bc090249cbf236", size = 11955804, upload-time = "2025-01-10T08:07:20.385Z" }, + { url = "https://files.pythonhosted.org/packages/d6/54/32fa2ee591af44507eac86406fa6bba968d1eb22831494470d0a2e4a1eb1/scikit_learn-1.6.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:6a7aa5f9908f0f28f4edaa6963c0a6183f1911e63a69aa03782f0d924c830a35", size = 11100530, upload-time = "2025-01-10T08:07:23.675Z" }, + { url = "https://files.pythonhosted.org/packages/3f/58/55856da1adec655bdce77b502e94a267bf40a8c0b89f8622837f89503b5a/scikit_learn-1.6.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0650e730afb87402baa88afbf31c07b84c98272622aaba002559b614600ca691", size = 12433852, upload-time = "2025-01-10T08:07:26.817Z" }, + { url = "https://files.pythonhosted.org/packages/ff/4f/c83853af13901a574f8f13b645467285a48940f185b690936bb700a50863/scikit_learn-1.6.1-cp313-cp313t-win_amd64.whl", hash = "sha256:3f59fe08dc03ea158605170eb52b22a105f238a5d512c4470ddeca71feae8e5f", size = 11337256, upload-time = "2025-01-10T08:07:31.084Z" }, + { url = "https://files.pythonhosted.org/packages/d2/37/b305b759cc65829fe1b8853ff3e308b12cdd9d8884aa27840835560f2b42/scikit_learn-1.6.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6849dd3234e87f55dce1db34c89a810b489ead832aaf4d4550b7ea85628be6c1", size = 12101868, upload-time = "2025-01-10T08:07:34.189Z" }, + { url = "https://files.pythonhosted.org/packages/83/74/f64379a4ed5879d9db744fe37cfe1978c07c66684d2439c3060d19a536d8/scikit_learn-1.6.1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:e7be3fa5d2eb9be7d77c3734ff1d599151bb523674be9b834e8da6abe132f44e", size = 11144062, upload-time = "2025-01-10T08:07:37.67Z" }, + { url = "https://files.pythonhosted.org/packages/fd/dc/d5457e03dc9c971ce2b0d750e33148dd060fefb8b7dc71acd6054e4bb51b/scikit_learn-1.6.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:44a17798172df1d3c1065e8fcf9019183f06c87609b49a124ebdf57ae6cb0107", size = 12693173, upload-time = "2025-01-10T08:07:42.713Z" }, + { url = "https://files.pythonhosted.org/packages/79/35/b1d2188967c3204c78fa79c9263668cf1b98060e8e58d1a730fe5b2317bb/scikit_learn-1.6.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8b7a3b86e411e4bce21186e1c180d792f3d99223dcfa3b4f597ecc92fa1a422", size = 13518605, upload-time = "2025-01-10T08:07:46.551Z" }, + { url = "https://files.pythonhosted.org/packages/fb/d8/8d603bdd26601f4b07e2363032b8565ab82eb857f93d86d0f7956fcf4523/scikit_learn-1.6.1-cp39-cp39-win_amd64.whl", hash = "sha256:7a73d457070e3318e32bdb3aa79a8d990474f19035464dfd8bede2883ab5dc3b", size = 11155078, upload-time = "2025-01-10T08:07:51.376Z" }, +] + +[[package]] +name = "scikit-learn" +version = "1.7.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.13'", + "python_full_version == '3.12.*'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", +] +dependencies = [ + { name = "joblib", marker = "python_full_version >= '3.10'" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, + { name = "numpy", version = "2.3.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, + { name = "scipy", version = "1.16.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "threadpoolctl", marker = "python_full_version >= '3.10'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/df/3b/29fa87e76b1d7b3b77cc1fcbe82e6e6b8cd704410705b008822de530277c/scikit_learn-1.7.0.tar.gz", hash = "sha256:c01e869b15aec88e2cdb73d27f15bdbe03bce8e2fb43afbe77c45d399e73a5a3", size = 7178217, upload-time = "2025-06-05T22:02:46.703Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a4/70/e725b1da11e7e833f558eb4d3ea8b7ed7100edda26101df074f1ae778235/scikit_learn-1.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9fe7f51435f49d97bd41d724bb3e11eeb939882af9c29c931a8002c357e8cdd5", size = 11728006, upload-time = "2025-06-05T22:01:43.007Z" }, + { url = "https://files.pythonhosted.org/packages/32/aa/43874d372e9dc51eb361f5c2f0a4462915c9454563b3abb0d9457c66b7e9/scikit_learn-1.7.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:d0c93294e1e1acbee2d029b1f2a064f26bd928b284938d51d412c22e0c977eb3", size = 10726255, upload-time = "2025-06-05T22:01:46.082Z" }, + { url = "https://files.pythonhosted.org/packages/f5/1a/da73cc18e00f0b9ae89f7e4463a02fb6e0569778120aeab138d9554ecef0/scikit_learn-1.7.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf3755f25f145186ad8c403312f74fb90df82a4dfa1af19dc96ef35f57237a94", size = 12205657, upload-time = "2025-06-05T22:01:48.729Z" }, + { url = "https://files.pythonhosted.org/packages/fb/f6/800cb3243dd0137ca6d98df8c9d539eb567ba0a0a39ecd245c33fab93510/scikit_learn-1.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2726c8787933add436fb66fb63ad18e8ef342dfb39bbbd19dc1e83e8f828a85a", size = 12877290, upload-time = "2025-06-05T22:01:51.073Z" }, + { url = "https://files.pythonhosted.org/packages/4c/bd/99c3ccb49946bd06318fe194a1c54fb7d57ac4fe1c2f4660d86b3a2adf64/scikit_learn-1.7.0-cp310-cp310-win_amd64.whl", hash = "sha256:e2539bb58886a531b6e86a510c0348afaadd25005604ad35966a85c2ec378800", size = 10713211, upload-time = "2025-06-05T22:01:54.107Z" }, + { url = "https://files.pythonhosted.org/packages/5a/42/c6b41711c2bee01c4800ad8da2862c0b6d2956a399d23ce4d77f2ca7f0c7/scikit_learn-1.7.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8ef09b1615e1ad04dc0d0054ad50634514818a8eb3ee3dee99af3bffc0ef5007", size = 11719657, upload-time = "2025-06-05T22:01:56.345Z" }, + { url = "https://files.pythonhosted.org/packages/a3/24/44acca76449e391b6b2522e67a63c0454b7c1f060531bdc6d0118fb40851/scikit_learn-1.7.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:7d7240c7b19edf6ed93403f43b0fcb0fe95b53bc0b17821f8fb88edab97085ef", size = 10712636, upload-time = "2025-06-05T22:01:59.093Z" }, + { url = "https://files.pythonhosted.org/packages/9f/1b/fcad1ccb29bdc9b96bcaa2ed8345d56afb77b16c0c47bafe392cc5d1d213/scikit_learn-1.7.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:80bd3bd4e95381efc47073a720d4cbab485fc483966f1709f1fd559afac57ab8", size = 12242817, upload-time = "2025-06-05T22:02:01.43Z" }, + { url = "https://files.pythonhosted.org/packages/c6/38/48b75c3d8d268a3f19837cb8a89155ead6e97c6892bb64837183ea41db2b/scikit_learn-1.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9dbe48d69aa38ecfc5a6cda6c5df5abef0c0ebdb2468e92437e2053f84abb8bc", size = 12873961, upload-time = "2025-06-05T22:02:03.951Z" }, + { url = "https://files.pythonhosted.org/packages/f4/5a/ba91b8c57aa37dbd80d5ff958576a9a8c14317b04b671ae7f0d09b00993a/scikit_learn-1.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:8fa979313b2ffdfa049ed07252dc94038def3ecd49ea2a814db5401c07f1ecfa", size = 10717277, upload-time = "2025-06-05T22:02:06.77Z" }, + { url = "https://files.pythonhosted.org/packages/70/3a/bffab14e974a665a3ee2d79766e7389572ffcaad941a246931c824afcdb2/scikit_learn-1.7.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c2c7243d34aaede0efca7a5a96d67fddaebb4ad7e14a70991b9abee9dc5c0379", size = 11646758, upload-time = "2025-06-05T22:02:09.51Z" }, + { url = "https://files.pythonhosted.org/packages/58/d8/f3249232fa79a70cb40595282813e61453c1e76da3e1a44b77a63dd8d0cb/scikit_learn-1.7.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:9f39f6a811bf3f15177b66c82cbe0d7b1ebad9f190737dcdef77cfca1ea3c19c", size = 10673971, upload-time = "2025-06-05T22:02:12.217Z" }, + { url = "https://files.pythonhosted.org/packages/67/93/eb14c50533bea2f77758abe7d60a10057e5f2e2cdcf0a75a14c6bc19c734/scikit_learn-1.7.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63017a5f9a74963d24aac7590287149a8d0f1a0799bbe7173c0d8ba1523293c0", size = 11818428, upload-time = "2025-06-05T22:02:14.947Z" }, + { url = "https://files.pythonhosted.org/packages/08/17/804cc13b22a8663564bb0b55fb89e661a577e4e88a61a39740d58b909efe/scikit_learn-1.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b2f8a0b1e73e9a08b7cc498bb2aeab36cdc1f571f8ab2b35c6e5d1c7115d97d", size = 12505887, upload-time = "2025-06-05T22:02:17.824Z" }, + { url = "https://files.pythonhosted.org/packages/68/c7/4e956281a077f4835458c3f9656c666300282d5199039f26d9de1dabd9be/scikit_learn-1.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:34cc8d9d010d29fb2b7cbcd5ccc24ffdd80515f65fe9f1e4894ace36b267ce19", size = 10668129, upload-time = "2025-06-05T22:02:20.536Z" }, + { url = "https://files.pythonhosted.org/packages/9a/c3/a85dcccdaf1e807e6f067fa95788a6485b0491d9ea44fd4c812050d04f45/scikit_learn-1.7.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5b7974f1f32bc586c90145df51130e02267e4b7e77cab76165c76cf43faca0d9", size = 11559841, upload-time = "2025-06-05T22:02:23.308Z" }, + { url = "https://files.pythonhosted.org/packages/d8/57/eea0de1562cc52d3196eae51a68c5736a31949a465f0b6bb3579b2d80282/scikit_learn-1.7.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:014e07a23fe02e65f9392898143c542a50b6001dbe89cb867e19688e468d049b", size = 10616463, upload-time = "2025-06-05T22:02:26.068Z" }, + { url = "https://files.pythonhosted.org/packages/10/a4/39717ca669296dfc3a62928393168da88ac9d8cbec88b6321ffa62c6776f/scikit_learn-1.7.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e7e7ced20582d3a5516fb6f405fd1d254e1f5ce712bfef2589f51326af6346e8", size = 11766512, upload-time = "2025-06-05T22:02:28.689Z" }, + { url = "https://files.pythonhosted.org/packages/d5/cd/a19722241d5f7b51e08351e1e82453e0057aeb7621b17805f31fcb57bb6c/scikit_learn-1.7.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1babf2511e6ffd695da7a983b4e4d6de45dce39577b26b721610711081850906", size = 12461075, upload-time = "2025-06-05T22:02:31.233Z" }, + { url = "https://files.pythonhosted.org/packages/f3/bc/282514272815c827a9acacbe5b99f4f1a4bc5961053719d319480aee0812/scikit_learn-1.7.0-cp313-cp313-win_amd64.whl", hash = "sha256:5abd2acff939d5bd4701283f009b01496832d50ddafa83c90125a4e41c33e314", size = 10652517, upload-time = "2025-06-05T22:02:34.139Z" }, + { url = "https://files.pythonhosted.org/packages/ea/78/7357d12b2e4c6674175f9a09a3ba10498cde8340e622715bcc71e532981d/scikit_learn-1.7.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:e39d95a929b112047c25b775035c8c234c5ca67e681ce60d12413afb501129f7", size = 12111822, upload-time = "2025-06-05T22:02:36.904Z" }, + { url = "https://files.pythonhosted.org/packages/d0/0c/9c3715393343f04232f9d81fe540eb3831d0b4ec351135a145855295110f/scikit_learn-1.7.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:0521cb460426c56fee7e07f9365b0f45ec8ca7b2d696534ac98bfb85e7ae4775", size = 11325286, upload-time = "2025-06-05T22:02:39.739Z" }, + { url = "https://files.pythonhosted.org/packages/64/e0/42282ad3dd70b7c1a5f65c412ac3841f6543502a8d6263cae7b466612dc9/scikit_learn-1.7.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:317ca9f83acbde2883bd6bb27116a741bfcb371369706b4f9973cf30e9a03b0d", size = 12380865, upload-time = "2025-06-05T22:02:42.137Z" }, + { url = "https://files.pythonhosted.org/packages/4e/d0/3ef4ab2c6be4aa910445cd09c5ef0b44512e3de2cfb2112a88bb647d2cf7/scikit_learn-1.7.0-cp313-cp313t-win_amd64.whl", hash = "sha256:126c09740a6f016e815ab985b21e3a0656835414521c81fc1a8da78b679bdb75", size = 11549609, upload-time = "2025-06-05T22:02:44.483Z" }, +] + [[package]] name = "scipy" version = "1.13.1" @@ -4647,6 +4747,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6a/9e/2064975477fdc887e47ad42157e214526dcad8f317a948dee17e1659a62f/terminado-0.18.1-py3-none-any.whl", hash = "sha256:a4468e1b37bb318f8a86514f65814e1afc977cf29b3992a4500d9dd305dcceb0", size = 14154, upload-time = "2024-03-12T14:34:36.569Z" }, ] +[[package]] +name = "threadpoolctl" +version = "3.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b7/4d/08c89e34946fce2aec4fbb45c9016efd5f4d7f24af8e5d93296e935631d8/threadpoolctl-3.6.0.tar.gz", hash = "sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e", size = 21274, upload-time = "2025-03-13T13:49:23.031Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb", size = 18638, upload-time = "2025-03-13T13:49:21.846Z" }, +] + [[package]] name = "tinycss2" version = "1.4.0" From d28023adbaa6741ef6dd55fe0c3fd7eca065c8fe Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Thu, 17 Jul 2025 07:08:03 +0300 Subject: [PATCH 11/51] chore: close #112 --- pyproject.toml | 1 + .../helpers/sandbox_observable_helpers.py | 21 ++++++++++++------- uv.lock | 2 ++ 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 53db99f..da8060b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ dependencies = [ "ty>=0.0.1a5", "pandas>=2.2.3", "polarstate>=0.1.6", + "typing>=3.7.4.3", ] name = "rtichoke" version = "0.1.11" diff --git a/src/rtichoke/helpers/sandbox_observable_helpers.py b/src/rtichoke/helpers/sandbox_observable_helpers.py index 0e85693..0f42b6a 100644 --- a/src/rtichoke/helpers/sandbox_observable_helpers.py +++ b/src/rtichoke/helpers/sandbox_observable_helpers.py @@ -5,6 +5,7 @@ import polars as pl from polarstate import predict_aj_estimates from polarstate import prepare_event_table +from typing import Dict, Union def extract_aj_estimate(data_to_adjust, fixed_time_horizons): @@ -981,7 +982,13 @@ def to_pl(df): ) -def create_list_data_to_adjust(probs_dict, reals_dict, times_dict, stratified_by, by): +def create_list_data_to_adjust( + probs_dict: Dict[str, np.ndarray], + reals_dict: Union[np.ndarray, Dict[str, np.ndarray]], + times_dict: Union[np.ndarray, Dict[str, np.ndarray]], + stratified_by, + by, +): # reference_groups = list(probs_dict.keys()) reference_group_labels = list(probs_dict.keys()) num_reals = len(reals_dict) @@ -991,14 +998,12 @@ def create_list_data_to_adjust(probs_dict, reals_dict, times_dict, stratified_by # Flatten and ensure list format data_to_adjust = pl.DataFrame( { - "reference_group": sum( - [[group] * num_reals for group in reference_group_labels], [] - ), - "probs": sum( - [probs_dict[group].tolist() for group in reference_group_labels], [] + "reference_group": np.repeat(reference_group_labels, num_reals), + "probs": np.concatenate( + [probs_dict[group] for group in reference_group_labels] ), - "reals": list(reals_dict) * len(reference_group_labels), - "times": list(times_dict) * len(reference_group_labels), + "reals": np.tile(np.asarray(reals_dict), len(reference_group_labels)), + "times": np.tile(np.asarray(times_dict), len(reference_group_labels)), } ).with_columns(pl.col("reference_group").cast(reference_group_enum)) diff --git a/uv.lock b/uv.lock index 860ebc9..5bf8f20 100644 --- a/uv.lock +++ b/uv.lock @@ -4071,6 +4071,7 @@ dependencies = [ { name = "sphinx-autoapi" }, { name = "sphinx-rtd-theme" }, { name = "ty" }, + { name = "typing" }, ] [package.dev-dependencies] @@ -4108,6 +4109,7 @@ requires-dist = [ { name = "sphinx-autoapi", specifier = ">=2.1.0,<3.0.0" }, { name = "sphinx-rtd-theme", specifier = ">=1.2.0,<2.0.0" }, { name = "ty", specifier = ">=0.0.1a5" }, + { name = "typing", specifier = ">=3.7.4.3" }, ] [package.metadata.requires-dev] From 9769da6c9d3592923c5de592eaa7664ac2dc7680 Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Thu, 17 Jul 2025 15:50:11 +0300 Subject: [PATCH 12/51] chore: close #110 --- .../helpers/sandbox_observable_helpers.py | 47 +------------------ 1 file changed, 1 insertion(+), 46 deletions(-) diff --git a/src/rtichoke/helpers/sandbox_observable_helpers.py b/src/rtichoke/helpers/sandbox_observable_helpers.py index 0f42b6a..dd5b298 100644 --- a/src/rtichoke/helpers/sandbox_observable_helpers.py +++ b/src/rtichoke/helpers/sandbox_observable_helpers.py @@ -276,7 +276,7 @@ def create_breaks_values(probs_vec, stratified_by, by): return breaks -def create_aj_data_combinations_polars( +def create_aj_data_combinations( reference_groups, fixed_time_horizons, stratified_by, by ): # Create strata combinations using Polars @@ -376,51 +376,6 @@ def create_aj_data_combinations_polars( ) -def create_aj_data_combinations( - reference_groups, fixed_time_horizons, stratified_by, by -): - strata_combinations = pd.concat( - [create_strata_combinations(x, by) for x in stratified_by], ignore_index=True - ) - - reals = pd.Categorical( - ["real_negatives", "real_positives", "real_competing", "real_censored"], - categories=[ - "real_negatives", - "real_positives", - "real_competing", - "real_censored", - ], - ordered=True, - ) - - censoring_assumptions = ["excluded", "adjusted"] - competing_assumptions = ["excluded", "adjusted_as_negative", "adjusted_as_censored"] - - combinations = list( - itertools.product( - reference_groups, - fixed_time_horizons, - reals, - censoring_assumptions, - competing_assumptions, - ) - ) - - df_combinations = pd.DataFrame( - combinations, - columns=[ - "reference_group", - "fixed_time_horizon", - "reals", - "censoring_assumption", - "competing_assumption", - ], - ) - - return df_combinations.merge(strata_combinations, how="cross") - - def pivot_longer_strata(data: pl.DataFrame) -> pl.DataFrame: # Identify id_vars and value_vars id_vars = [col for col in data.columns if not col.startswith("strata_")] From fc9f21804b7d2797e995caa4213f7a3fa83e95c7 Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Sat, 19 Jul 2025 06:13:50 +0300 Subject: [PATCH 13/51] refactor: fix #123 --- .../helpers/sandbox_observable_helpers.py | 518 ++++++------------ 1 file changed, 153 insertions(+), 365 deletions(-) diff --git a/src/rtichoke/helpers/sandbox_observable_helpers.py b/src/rtichoke/helpers/sandbox_observable_helpers.py index dd5b298..21bf8f0 100644 --- a/src/rtichoke/helpers/sandbox_observable_helpers.py +++ b/src/rtichoke/helpers/sandbox_observable_helpers.py @@ -329,6 +329,7 @@ def create_aj_data_combinations( "excluded", "adjusted_as_negative", "adjusted_as_censored", + "adjusted_as_composite", ] competing_assumptions_enum = pl.Enum(competing_assumptions_labels) df_competing_assumptions = pl.DataFrame( @@ -445,38 +446,21 @@ def create_aj_data( """ Create AJ estimates per strata based on censoring and competing assumptions. """ - if ( - censoring_assumption == "adjusted" - and competing_assumption == "adjusted_as_negative" - ): - aj_estimates_per_strata_adj_adjneg = ( - reference_group_data.group_by("strata") - .map_groups( - lambda group: extract_aj_estimate_for_strata(group, fixed_time_horizons) - ) - .join( - pl.DataFrame( - { - "real_censored_est": 0.0, - "censoring_assumption": "adjusted", - "competing_assumption": "adjusted_as_negative", - } - ), - how="cross", - ) + + def aj_estimates_with_cross(df, extra_cols): + return df.join( + pl.DataFrame(extra_cols), + how="cross", ) - return aj_estimates_per_strata_adj_adjneg - elif ( - censoring_assumption == "excluded" - and competing_assumption == "adjusted_as_negative" - ): - exploded_data = reference_group_data.with_columns( - fixed_time_horizon=pl.lit(fixed_time_horizons) - ).explode("fixed_time_horizon") + def explode_data(df): + return df.with_columns(fixed_time_horizon=pl.lit(fixed_time_horizons)).explode( + "fixed_time_horizon" + ) - aj_estimates_per_strata_censored = ( - exploded_data.filter( + def censored_count(df): + return ( + df.filter( (pl.col("times") < pl.col("fixed_time_horizon")) & (pl.col("reals") == 0) ) @@ -486,286 +470,181 @@ def create_aj_data( .with_columns(pl.col("real_censored_est").cast(pl.Float64)) ) - non_censored_data = exploded_data.filter( - (pl.col("times") >= pl.col("fixed_time_horizon")) | (pl.col("reals") > 0) + def competing_count(df): + return ( + df.filter( + (pl.col("reals") == 2) + & (pl.col("times") < pl.col("fixed_time_horizon")) + ) + .group_by(["strata", "fixed_time_horizon"]) + .count() + .rename({"count": "real_competing_est"}) + .with_columns(pl.col("real_competing_est").cast(pl.Float64)) ) - aj_estimates_per_strata_noncensored = pl.concat( + def aj_estimates_per_horizon(df, horizons): + return pl.concat( [ - non_censored_data.filter( - pl.col("fixed_time_horizon") == fixed_time_horizon - ) + df.filter(pl.col("fixed_time_horizon") == h) .group_by("strata") - .map_groups( - lambda group: extract_aj_estimate_for_strata( - group, [fixed_time_horizon] - ) - ) - for fixed_time_horizon in fixed_time_horizons + .map_groups(lambda group: extract_aj_estimate_for_strata(group, [h])) + for h in horizons ], how="vertical", ) - return aj_estimates_per_strata_noncensored.join( - aj_estimates_per_strata_censored, on=["strata", "fixed_time_horizon"] - ).join( - pl.DataFrame( - { - "censoring_assumption": "excluded", - "competing_assumption": "adjusted_as_negative", - } - ), - how="cross", - ) - - elif ( + if ( censoring_assumption == "adjusted" - and competing_assumption == "adjusted_as_censored" + and competing_assumption == "adjusted_as_negative" ): - aj_estimates_per_strata_adj_adjcens = ( - reference_group_data.with_columns( - [ - pl.when(pl.col("reals") == 2) - .then(pl.lit(0)) - .otherwise(pl.col("reals")) - .alias("reals") - ] - ) - .group_by("strata") - .map_groups( - lambda group: extract_aj_estimate_for_strata(group, fixed_time_horizons) - ) - .join( - pl.DataFrame( - { - "real_censored_est": 0.0, - "censoring_assumption": "adjusted", - "competing_assumption": "adjusted_as_censored", - } - ), - how="cross", - ) + aj_df = reference_group_data.group_by("strata").map_groups( + lambda group: extract_aj_estimate_for_strata(group, fixed_time_horizons) + ) + return aj_estimates_with_cross( + aj_df, + { + "real_censored_est": 0.0, + "censoring_assumption": "adjusted", + "competing_assumption": "adjusted_as_negative", + }, ) - return aj_estimates_per_strata_adj_adjcens - elif ( + if ( censoring_assumption == "excluded" - and competing_assumption == "adjusted_as_censored" + and competing_assumption == "adjusted_as_negative" ): - exploded_data = reference_group_data.with_columns( - fixed_time_horizon=pl.lit(fixed_time_horizons) - ).explode("fixed_time_horizon") - - aj_estimates_per_strata_censored = ( - exploded_data.filter( - (pl.col("times") < pl.col("fixed_time_horizon")) & pl.col("reals") == 0 - ) - .group_by(["strata", "fixed_time_horizon"]) - .count() - .rename({"count": "real_censored_est"}) - .with_columns(pl.col("real_censored_est").cast(pl.Float64)) + exploded = explode_data(reference_group_data) + censored = censored_count(exploded) + non_censored = exploded.filter( + (pl.col("times") >= pl.col("fixed_time_horizon")) | (pl.col("reals") > 0) + ) + aj_df = aj_estimates_per_horizon(non_censored, fixed_time_horizons) + return aj_estimates_with_cross( + aj_df.join(censored, on=["strata", "fixed_time_horizon"]), + { + "censoring_assumption": "excluded", + "competing_assumption": "adjusted_as_negative", + }, ) - non_censored_data = exploded_data.filter( - (pl.col("times") >= pl.col("fixed_time_horizon")) | pl.col("reals") > 0 - ).with_columns( + if ( + censoring_assumption == "adjusted" + and competing_assumption == "adjusted_as_censored" + ): + adjusted = reference_group_data.with_columns( [ - pl.when((pl.col("reals") == 2)) + pl.when(pl.col("reals") == 2) .then(pl.lit(0)) .otherwise(pl.col("reals")) .alias("reals") ] ) - - aj_estimates_per_strata_noncensored = pl.concat( - [ - non_censored_data.filter( - pl.col("fixed_time_horizon") == fixed_time_horizon - ) - .group_by("strata") - .map_groups( - lambda group: extract_aj_estimate_for_strata( - group, [fixed_time_horizon] - ) - ) - for fixed_time_horizon in fixed_time_horizons - ], - how="vertical", + aj_df = adjusted.group_by("strata").map_groups( + lambda group: extract_aj_estimate_for_strata(group, fixed_time_horizons) ) - - aj_estimates_per_strata_excl_adjcens = aj_estimates_per_strata_noncensored.join( - aj_estimates_per_strata_censored, on=["strata", "fixed_time_horizon"] - ).join( - pl.DataFrame( - { - "censoring_assumption": "excluded", - "competing_assumption": "adjusted_as_censored", - } - ), - how="cross", + return aj_estimates_with_cross( + aj_df, + { + "real_censored_est": 0.0, + "censoring_assumption": "adjusted", + "competing_assumption": "adjusted_as_censored", + }, ) - return aj_estimates_per_strata_excl_adjcens - - elif censoring_assumption == "adjusted" and competing_assumption == "excluded": - exploded_data = reference_group_data.with_columns( - fixed_time_horizon=fixed_time_horizons - ).explode("fixed_time_horizon") - - aj_estimates_per_strata_competing = ( - exploded_data.filter( - (pl.col("reals") == 2) - & (pl.col("times") < pl.col("fixed_time_horizon")) - ) - .group_by(["strata", "fixed_time_horizon"]) - .count() - .rename({"count": "real_competing_est"}) - .with_columns(pl.col("real_competing_est").cast(pl.Float64)) - ) - - non_competing_data = exploded_data.filter( - (pl.col("times") >= pl.col("fixed_time_horizon")) | pl.col("reals") != 2 + if ( + censoring_assumption == "excluded" + and competing_assumption == "adjusted_as_censored" + ): + exploded = explode_data(reference_group_data) + censored = censored_count(exploded) + non_censored = exploded.filter( + (pl.col("times") >= pl.col("fixed_time_horizon")) | (pl.col("reals") > 0) ).with_columns( [ - pl.when((pl.col("reals") == 2)) + pl.when(pl.col("reals") == 2) .then(pl.lit(0)) .otherwise(pl.col("reals")) .alias("reals") ] ) - - aj_estimates_per_strata_noncompeting = pl.concat( - [ - non_competing_data.filter( - pl.col("fixed_time_horizon") == fixed_time_horizon - ) - .group_by("strata") - .map_groups( - lambda group: extract_aj_estimate_for_strata( - group, [fixed_time_horizon] - ) - ) - for fixed_time_horizon in fixed_time_horizons - ], - how="vertical", - ).select(pl.exclude("real_competing_est")) - - aj_estimates_per_strata_adj_excl = ( - aj_estimates_per_strata_competing.join( - aj_estimates_per_strata_noncompeting, - on=["strata", "fixed_time_horizon"], - ) - .join( - pl.DataFrame( - { - "real_censored_est": 0.0, - "censoring_assumption": "adjusted", - "competing_assumption": "excluded", - } - ), - how="cross", - ) - .select( - [ - "strata", - "fixed_time_horizon", - "real_negatives_est", - "real_positives_est", - "real_competing_est", - "real_censored_est", - "censoring_assumption", - "competing_assumption", - ] - ) - ) - - return aj_estimates_per_strata_adj_excl - - elif censoring_assumption == "excluded" and competing_assumption == "excluded": - exploded_data = reference_group_data.with_columns( - fixed_time_horizon=pl.lit(fixed_time_horizons) - ).explode("fixed_time_horizon") - - print("Exploded data:", exploded_data) - - aj_estimates_per_strata_censored = ( - exploded_data.filter( - (pl.col("times") < pl.col("fixed_time_horizon")) & pl.col("reals") == 0 - ) - .group_by(["strata", "fixed_time_horizon"]) - .count() - .rename({"count": "real_censored_est"}) - .with_columns(pl.col("real_censored_est").cast(pl.Float64)) + aj_df = aj_estimates_per_horizon(non_censored, fixed_time_horizons) + return aj_estimates_with_cross( + aj_df.join(censored, on=["strata", "fixed_time_horizon"]), + { + "censoring_assumption": "excluded", + "competing_assumption": "adjusted_as_censored", + }, ) - print("AJ estimates per strata censored:", aj_estimates_per_strata_censored) - - aj_estimates_per_strata_competing = ( - exploded_data.filter( - (pl.col("reals") == 2) - & (pl.col("times") < pl.col("fixed_time_horizon")) - ) - .group_by(["strata", "fixed_time_horizon"]) - .count() - .rename({"count": "real_competing_est"}) - .with_columns(pl.col("real_competing_est").cast(pl.Float64)) - ) - - print("AJ estimates per strata competing:", aj_estimates_per_strata_competing) - - non_censored_non_competing_data = exploded_data.filter( - ((pl.col("times") >= pl.col("fixed_time_horizon")) | pl.col("reals") == 1) + if censoring_assumption == "adjusted" and competing_assumption == "excluded": + exploded = explode_data(reference_group_data) + competing = competing_count(exploded) + non_competing = exploded.filter( + (pl.col("times") >= pl.col("fixed_time_horizon")) | (pl.col("reals") != 2) + ).with_columns( + [ + pl.when(pl.col("reals") == 2) + .then(pl.lit(0)) + .otherwise(pl.col("reals")) + .alias("reals") + ] ) - - aj_estimates_per_strata_noncensored_noncompeting = pl.concat( + aj_df = aj_estimates_per_horizon(non_competing, fixed_time_horizons).select( + pl.exclude("real_competing_est") + ) + result = competing.join(aj_df, on=["strata", "fixed_time_horizon"]) + return aj_estimates_with_cross( + result, + { + "real_censored_est": 0.0, + "censoring_assumption": "adjusted", + "competing_assumption": "excluded", + }, + ).select( [ - non_censored_non_competing_data.filter( - pl.col("fixed_time_horizon") == fixed_time_horizon - ) - .group_by("strata") - .map_groups( - lambda group: extract_aj_estimate_for_strata( - group, [fixed_time_horizon] - ) - ) - for fixed_time_horizon in fixed_time_horizons - ], - how="vertical", + "strata", + "fixed_time_horizon", + "real_negatives_est", + "real_positives_est", + "real_competing_est", + "real_censored_est", + "censoring_assumption", + "competing_assumption", + ] ) - aj_estimates_per_strata_excl_excl = ( - aj_estimates_per_strata_competing.join( - aj_estimates_per_strata_censored, on=["strata", "fixed_time_horizon"] - ) - .join( - aj_estimates_per_strata_noncensored_noncompeting, - on=["strata", "fixed_time_horizon"], - ) - .join( - pl.DataFrame( - { - "censoring_assumption": "excluded", - "competing_assumption": "excluded", - } - ), - how="cross", - ) - .select( - [ - "strata", - "fixed_time_horizon", - "real_negatives_est", - "real_positives_est", - "real_competing_est", - "real_censored_est", - "censoring_assumption", - "competing_assumption", - ] - ) + if censoring_assumption == "excluded" and competing_assumption == "excluded": + exploded = explode_data(reference_group_data) + censored = censored_count(exploded) + competing = competing_count(exploded) + non_censored_non_competing = exploded.filter( + (pl.col("times") >= pl.col("fixed_time_horizon")) | (pl.col("reals") == 1) + ) + aj_df = aj_estimates_per_horizon( + non_censored_non_competing, fixed_time_horizons + ) + result = competing.join(censored, on=["strata", "fixed_time_horizon"]).join( + aj_df, on=["strata", "fixed_time_horizon"] + ) + return aj_estimates_with_cross( + result, + { + "censoring_assumption": "excluded", + "competing_assumption": "excluded", + }, + ).select( + [ + "strata", + "fixed_time_horizon", + "real_negatives_est", + "real_positives_est", + "real_competing_est", + "real_censored_est", + "censoring_assumption", + "competing_assumption", + ] ) - return aj_estimates_per_strata_excl_excl - def extract_crude_estimate_polars(data: pl.DataFrame) -> pl.DataFrame: all_combinations = data.select(["strata", "reals", "fixed_time_horizon"]).unique() @@ -845,98 +724,6 @@ def assign_and_explode_polars( ) -def extract_aj_estimate_by_assumptions_polars( - data_to_adjust: pl.DataFrame, - fixed_time_horizons: list[float], - censoring_assumption="excluded", - competing_assumption="excluded", -) -> pl.DataFrame: - def to_pd(df): - return df.to_pandas() - - def to_pl(df): - return pl.from_pandas(df) - - if censoring_assumption == "excluded" and competing_assumption == "excluded": - aj_estimate_data = ( - assign_and_explode_polars(data_to_adjust, fixed_time_horizons) - .pipe(update_administrative_censoring_polars) - .pipe(extract_crude_estimate_polars) - ) - - aj_estimate_data = aj_estimate_data.with_columns( - pl.col("reals_estimate").cast(pl.Float64).alias("reals_estimate") - ) - - aj_estimate_data = aj_estimate_data.with_columns( - pl.col("strata").cast(pl.Categorical).alias("strata") - ) - - aj_estimate_data = aj_estimate_data.with_columns( - pl.col("fixed_time_horizon").cast(pl.Int64).alias("fixed_time_horizon") - ) - - if censoring_assumption == "adjusted" and competing_assumption == "excluded": - exploded = assign_and_explode_polars(data_to_adjust, fixed_time_horizons) - exploded = update_administrative_censoring_polars(exploded) - - # Separate "real_competing" for crude estimation - real_competing_data = exploded.filter( - pl.col("reals_labels") == "real_competing" - ) - non_competing_data = exploded.filter(pl.col("reals_labels") != "real_competing") - - # Crude estimate for "real_competing" using Polars - aj_estimate_competing = extract_crude_estimate_polars(real_competing_data) - - aj_estimate_competing = aj_estimate_competing.with_columns( - pl.col("strata").cast(pl.Categorical).alias("strata") - ) - - aj_estimate_competing = aj_estimate_competing.with_columns( - pl.col("fixed_time_horizon").cast(pl.Int64).alias("fixed_time_horizon") - ) - - aj_estimate_competing = aj_estimate_competing.with_columns( - pl.col("reals_estimate").cast(pl.Float64).alias("reals_estimate") - ) - - # Aalen-Johansen estimate for non-competing using Lifelines (pandas) - aj_estimate_adjusted_list = [ - extract_aj_estimate( - to_pd(non_competing_data.filter(pl.col("fixed_time_horizon") == h)), - fixed_time_horizons=[h], - ) - for h in fixed_time_horizons - ] - - # Combine results - aj_estimate_adjusted = to_pl( - pd.concat(aj_estimate_adjusted_list, ignore_index=True) - ) - - reals_labels = [ - "real_negatives", - "real_positives", - "real_competing", - "real_censored", - ] - reals_enum = pl.Enum(reals_labels) - - aj_estimate_adjusted = aj_estimate_adjusted.with_columns( - pl.col("reals").cast(reals_enum).alias("reals") - ) - - aj_estimate_data = pl.concat([aj_estimate_competing, aj_estimate_adjusted]) - - return aj_estimate_data.with_columns( - [ - pl.lit(censoring_assumption).alias("censoring_assumption"), - pl.lit(competing_assumption).alias("competing_assumption"), - ] - ) - - def create_list_data_to_adjust( probs_dict: Dict[str, np.ndarray], reals_dict: Union[np.ndarray, Dict[str, np.ndarray]], @@ -1005,12 +792,12 @@ def ensure_no_categorical(df: pd.DataFrame) -> pd.DataFrame: def extract_aj_estimate_by_assumptions( df: pl.DataFrame, - assumption_sets: list[dict], + assumptions_sets: list[dict], fixed_time_horizons: list[float], ) -> pl.DataFrame: aj_dfs = [] - for assumption in assumption_sets: + for assumption in assumptions_sets: censoring = assumption["censoring_assumption"] competing = assumption["competing_assumption"] @@ -1045,7 +832,7 @@ def extract_aj_estimate_by_assumptions( def create_adjusted_data( list_data_to_adjust_polars: dict[str, pl.DataFrame], - assumption_sets: list[dict[str, str]], + assumptions_sets: list[dict[str, str]], fixed_time_horizons: list[float], ) -> pl.DataFrame: all_results = [] @@ -1060,6 +847,7 @@ def create_adjusted_data( "excluded", "adjusted_as_negative", "adjusted_as_censored", + "adjusted_as_composite", ] competing_assumption_enum = pl.Enum(competing_assumption_labels) @@ -1068,7 +856,7 @@ def create_adjusted_data( aj_result = extract_aj_estimate_by_assumptions( input_df, - assumption_sets=assumption_sets, + assumptions_sets=assumptions_sets, fixed_time_horizons=fixed_time_horizons, ) From b6ba08fd4eb093882978ef740b21e700e093ed6c Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Sat, 19 Jul 2025 06:20:09 +0300 Subject: [PATCH 14/51] feature: close #125 --- .../helpers/sandbox_observable_helpers.py | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/rtichoke/helpers/sandbox_observable_helpers.py b/src/rtichoke/helpers/sandbox_observable_helpers.py index 21bf8f0..52c88be 100644 --- a/src/rtichoke/helpers/sandbox_observable_helpers.py +++ b/src/rtichoke/helpers/sandbox_observable_helpers.py @@ -576,6 +576,30 @@ def aj_estimates_per_horizon(df, horizons): }, ) + if ( + censoring_assumption == "adjusted" + and competing_assumption == "adjusted_as_composite" + ): + adjusted = reference_group_data.with_columns( + [ + pl.when(pl.col("reals") == 2) + .then(pl.lit(1)) + .otherwise(pl.col("reals")) + .alias("reals") + ] + ) + aj_df = adjusted.group_by("strata").map_groups( + lambda group: extract_aj_estimate_for_strata(group, fixed_time_horizons) + ) + return aj_estimates_with_cross( + aj_df, + { + "real_censored_est": 0.0, + "censoring_assumption": "adjusted", + "competing_assumption": "adjusted_as_censored", + }, + ) + if censoring_assumption == "adjusted" and competing_assumption == "excluded": exploded = explode_data(reference_group_data) competing = competing_count(exploded) From 8b78e9f18a7ac25646c6ce98b1129674b9f98c98 Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Sat, 19 Jul 2025 06:22:39 +0300 Subject: [PATCH 15/51] feature: close #126 --- .../helpers/sandbox_observable_helpers.py | 27 ++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/src/rtichoke/helpers/sandbox_observable_helpers.py b/src/rtichoke/helpers/sandbox_observable_helpers.py index 52c88be..1f1593d 100644 --- a/src/rtichoke/helpers/sandbox_observable_helpers.py +++ b/src/rtichoke/helpers/sandbox_observable_helpers.py @@ -596,7 +596,32 @@ def aj_estimates_per_horizon(df, horizons): { "real_censored_est": 0.0, "censoring_assumption": "adjusted", - "competing_assumption": "adjusted_as_censored", + "competing_assumption": "adjusted_as_composite", + }, + ) + + if ( + censoring_assumption == "excluded" + and competing_assumption == "adjusted_as_composite" + ): + exploded = explode_data(reference_group_data) + censored = censored_count(exploded) + non_censored = exploded.filter( + (pl.col("times") >= pl.col("fixed_time_horizon")) | (pl.col("reals") > 0) + ).with_columns( + [ + pl.when(pl.col("reals") == 2) + .then(pl.lit(1)) + .otherwise(pl.col("reals")) + .alias("reals") + ] + ) + aj_df = aj_estimates_per_horizon(non_censored, fixed_time_horizons) + return aj_estimates_with_cross( + aj_df.join(censored, on=["strata", "fixed_time_horizon"]), + { + "censoring_assumption": "excluded", + "competing_assumption": "adjusted_as_composite", }, ) From 782494bcca1f64f42d97da8ebbfa1c766da370c0 Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Sun, 20 Jul 2025 20:30:59 +0300 Subject: [PATCH 16/51] docs: close #121 --- docs/before_we_validate.qmd | 43 ++++++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/docs/before_we_validate.qmd b/docs/before_we_validate.qmd index e448426..bdcc8ad 100644 --- a/docs/before_we_validate.qmd +++ b/docs/before_we_validate.qmd @@ -363,7 +363,7 @@ Performance Validation in the face of censored observations require assumptions TODO: add link to nan-van-geloven article -##### Exclude Censored Events +### Exclude Censored Events ```{mermaid} @@ -393,7 +393,7 @@ All censored events to be excluded. Underlying Assumption: Small amount of censored events. Violation of the assumption leads to: Overestimation of the observed outcomes. -##### Adjust Censored as partially seen Non-Event +### Adjust Censored as partially seen Non-Event Observed outcomes for each strata are estimated using the AJ-estimate (equivalent to CIF and KM): Each censored observation is assumed to be similar to the ones who weren't censored. @@ -410,7 +410,7 @@ Performance Validation in the face of competing observations require assumptions TODO: add link to nan-van-geloven article -##### Exclude Competing Events +### Exclude Competing Events ```{mermaid} @@ -441,7 +441,7 @@ All competing events to be excluded. Underlying Assumption: Small amount of competing events. Violation of the assumption leads to: Overestimation of the observed outcomes. A competing event means that the primary event cannot happen. -##### Adjust Competing Events as Censored (partially seen Non-Event) +### Adjust Competing Events as Censored (Partially seen Negatives) Check @@ -475,7 +475,7 @@ All competing events to be treated as censored. Underlying Assumption: We consider a patient experiencing a competing event equivalent to independent censoring. Violation of the assumption leads to: Overestimation of the observed outcomes. A competing event means that the primary event cannot happen. -##### Adjust Competing Events as Competing +### Adjust Competing Events as Negatives (Definite Non-Events) All competing events to be treated as Competing event to the primary event-of-interest. @@ -513,6 +513,39 @@ subgraph adj[Adjusted for Censoring] Underlying Assumption: We consider a patient experiencing a competing event as a definite non-event. Violation of the assumption leads to Underestimation of the observed outcomes if a competing event can be considered as a different form of the primary event. +### Adjust Competing Events as Composite (Positives) + +All competing events to be treated as another case of the Primary Event. + +A patient experience a competing-event might be seen as another case of "some-event" + +```{mermaid} + +graph LR +subgraph adj[Adjusted for Censoring] + direction LR + S0["Non Event
0 ๐Ÿคจ"] -->S1["Any Event
1 ๐Ÿคข
Competing Event
2 ๐Ÿ’€"] + end + + + classDef nonEvent fill:#E0E0E0,stroke:#333,stroke-width:1px,color:black + classDef primaryEvent fill:#808080,stroke:#333,stroke-width:1px,color:white + classDef competingEvent fill:#9DB4C0,stroke:#333,stroke-width:1px,color:black + + class S0 nonEvent + class S1 primaryEvent + class S2 competingEvent + + linkStyle 0 stroke:#333 + style adj fill:#E3F09B,color:black + +``` + +There is no need for an underlying assumption, the answer is explicit. + +This heuristic is can be seen as a different underlying question - what is the probability of having any type of event, Primary or Competing? + + # What rtichoke from now on? ## Render Predictions Histogram From 6e0bc2d225a56cb0b849aa10ed1af1219b5a3d44 Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Tue, 22 Jul 2025 07:51:51 +0300 Subject: [PATCH 17/51] docs: close #130 --- docs/before_we_validate.qmd | 120 +++++++++++++++++++++++++----------- 1 file changed, 84 insertions(+), 36 deletions(-) diff --git a/docs/before_we_validate.qmd b/docs/before_we_validate.qmd index bdcc8ad..1dbb104 100644 --- a/docs/before_we_validate.qmd +++ b/docs/before_we_validate.qmd @@ -255,53 +255,101 @@ For cases with observed time-to-event is shorter than the prediction time horizo - If adjusted: encoded as 0. - If excluded: counted with crude estimate. + +```{python} +#| echo: false + +import numpy as np + +# Inputs +times = np.array([5.0, 10.0, 15.0, 20.0, 25.0, 30.0, 35.0, 40.0, 45.0, 50.0]) +reals = np.array([1, 1, 2, 1, 0, 1, 0, 1, 2, 1]) +time_horizons = [10, 30, 50] + +# Icons +def get_icon(outcome, t, h): + if outcome == 0: + return "๐Ÿคฌ" if t < h else "๐Ÿคจ" + elif outcome == 1: + return "๐Ÿคข" + elif outcome == 2: + return "๐Ÿ’€" + +# Displayed time +def get_time(outcome, t, h): + if outcome == 0: + return t if t < h else h + else: + return t + +# Final output +final_data = [] + +for i in range(len(times)): + id_ = i + 1 + t = times[i] + r = reals[i] + + for h in time_horizons: + outcome = r if t <= h else 0 # override outcome after horizon + final_data.append({ + "id": id_, + "time_horizon": h, + "time": get_time(outcome, t, h), + "real": get_icon(outcome, t, h) + }) + +ojs_define(data = final_data) + +``` + ```{ojs} //| echo: false -data = [ - { time: 1, real: "๐Ÿคจ", id: 1, time_horizon: 1 }, - { time: 3, real: "๐Ÿคจ", id: 1, time_horizon: 3 }, - { time: 5, real: "๐Ÿคจ", id: 1, time_horizon: 5 }, - { time: 1, real: "๐Ÿคจ", id: 2, time_horizon: 1 }, - { time: 3, real: "๐Ÿคจ", id: 2, time_horizon: 3 }, - { time: 5, real: "๐Ÿคจ", id: 2, time_horizon: 5 }, - { time: 1, real: "๐Ÿคจ", id: 3, time_horizon: 1 }, - { time: 3, real: "๐Ÿคจ", id: 3, time_horizon: 3 }, - { time: 4.6, real: "๐Ÿคฌ", id: 3, time_horizon: 5 }, - { time: 1, real: "๐Ÿคจ", id: 4, time_horizon: 1 }, - { time: 3, real: "๐Ÿคจ", id: 4, time_horizon: 3 }, - { time: 5, real: "๐Ÿคจ", id: 4, time_horizon: 5 }, - { time: 1, real: "๐Ÿคจ", id: 5, time_horizon: 1 }, - { time: 2.4, real: "๐Ÿคข", id: 5, time_horizon: 3 }, - { time: 2.4, real: "๐Ÿคข", id: 5, time_horizon: 5 }, - { time: 1, real: "๐Ÿคจ", id: 6, time_horizon: 1 }, - { time: 3, real: "๐Ÿคจ", id: 6, time_horizon: 3 }, - { time: 4.4, real: "๐Ÿคฌ", id: 6, time_horizon: 5 }, - { time: 0.4, real: "๐Ÿคข", id: 7, time_horizon: 1 }, - { time: 0.4, real: "๐Ÿคข", id: 7, time_horizon: 3 }, - { time: 0.4, real: "๐Ÿคข", id: 7, time_horizon: 5 }, - { time: 1, real: "๐Ÿคจ", id: 8, time_horizon: 1 }, - { time: 1.6, real: "๐Ÿ’€", id: 8, time_horizon: 3 }, - { time: 1.6, real: "๐Ÿ’€", id: 8, time_horizon: 5 }, - { time: 0.8, real: "๐Ÿคข", id: 9, time_horizon: 1 }, - { time: 0.8, real: "๐Ÿคข", id: 9, time_horizon: 3 }, - { time: 0.8, real: "๐Ÿคข", id: 9, time_horizon: 5 }, - { time: 1, real: "๐Ÿคจ", id: 10, time_horizon: 1 }, - { time: 2.9, real: "๐Ÿคข", id: 10, time_horizon: 3 }, - { time: 2.9, real: "๐Ÿคข", id: 10, time_horizon: 5 } -] +// data = [ +// { time: 1, real: "๐Ÿคจ", id: 1, time_horizon: 1 }, +// { time: 3, real: "๐Ÿคจ", id: 1, time_horizon: 3 }, +// { time: 5, real: "๐Ÿคจ", id: 1, time_horizon: 5 }, +// { time: 1, real: "๐Ÿคจ", id: 2, time_horizon: 1 }, +// { time: 3, real: "๐Ÿคจ", id: 2, time_horizon: 3 }, +// { time: 5, real: "๐Ÿคจ", id: 2, time_horizon: 5 }, +// { time: 1, real: "๐Ÿคจ", id: 3, time_horizon: 1 }, +// { time: 3, real: "๐Ÿคจ", id: 3, time_horizon: 3 }, +// { time: 4.6, real: "๐Ÿคฌ", id: 3, time_horizon: 5 }, +// { time: 1, real: "๐Ÿคจ", id: 4, time_horizon: 1 }, +// { time: 3, real: "๐Ÿคจ", id: 4, time_horizon: 3 }, +// { time: 5, real: "๐Ÿคจ", id: 4, time_horizon: 5 }, +// { time: 1, real: "๐Ÿคจ", id: 5, time_horizon: 1 }, +// { time: 2.4, real: "๐Ÿคข", id: 5, time_horizon: 3 }, +// { time: 2.4, real: "๐Ÿคข", id: 5, time_horizon: 5 }, +// { time: 1, real: "๐Ÿคจ", id: 6, time_horizon: 1 }, +// { time: 3, real: "๐Ÿคจ", id: 6, time_horizon: 3 }, +// { time: 4.4, real: "๐Ÿคฌ", id: 6, time_horizon: 5 }, +// { time: 0.4, real: "๐Ÿคข", id: 7, time_horizon: 1 }, +// { time: 0.4, real: "๐Ÿคข", id: 7, time_horizon: 3 }, +// { time: 0.4, real: "๐Ÿคข", id: 7, time_horizon: 5 }, +// { time: 1, real: "๐Ÿคจ", id: 8, time_horizon: 1 }, +// { time: 1.6, real: "๐Ÿ’€", id: 8, time_horizon: 3 }, +// { time: 1.6, real: "๐Ÿ’€", id: 8, time_horizon: 5 }, +// { time: 0.8, real: "๐Ÿคข", id: 9, time_horizon: 1 }, +// { time: 0.8, real: "๐Ÿคข", id: 9, time_horizon: 3 }, +// { time: 0.8, real: "๐Ÿคข", id: 9, time_horizon: 5 }, +// { time: 1, real: "๐Ÿคจ", id: 10, time_horizon: 1 }, +// { time: 2.9, real: "๐Ÿคข", id: 10, time_horizon: 3 }, +// { time: 2.9, real: "๐Ÿคข", id: 10, time_horizon: 5 } +// ] filteredData = data.filter((d) => d.time_horizon == timeHorizon) -viewof timeHorizon = Inputs.range([1, 5], { - step: 2, - value: 5, +viewof timeHorizon = Inputs.range([10, 50], { + step: 20, + value: 50, label: "Time Horizon" }) Plot.plot({ x: { - domain: [0, 5] + domain: [0, 50] }, y: { domain: [0, 11], From ecb032a4e6e185125f5202ca232fd642a22bb890 Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Tue, 22 Jul 2025 07:55:48 +0300 Subject: [PATCH 18/51] chore: remove commented out observable data --- docs/before_we_validate.qmd | 33 --------------------------------- 1 file changed, 33 deletions(-) diff --git a/docs/before_we_validate.qmd b/docs/before_we_validate.qmd index 1dbb104..2ca9a0c 100644 --- a/docs/before_we_validate.qmd +++ b/docs/before_we_validate.qmd @@ -306,39 +306,6 @@ ojs_define(data = final_data) ```{ojs} //| echo: false -// data = [ -// { time: 1, real: "๐Ÿคจ", id: 1, time_horizon: 1 }, -// { time: 3, real: "๐Ÿคจ", id: 1, time_horizon: 3 }, -// { time: 5, real: "๐Ÿคจ", id: 1, time_horizon: 5 }, -// { time: 1, real: "๐Ÿคจ", id: 2, time_horizon: 1 }, -// { time: 3, real: "๐Ÿคจ", id: 2, time_horizon: 3 }, -// { time: 5, real: "๐Ÿคจ", id: 2, time_horizon: 5 }, -// { time: 1, real: "๐Ÿคจ", id: 3, time_horizon: 1 }, -// { time: 3, real: "๐Ÿคจ", id: 3, time_horizon: 3 }, -// { time: 4.6, real: "๐Ÿคฌ", id: 3, time_horizon: 5 }, -// { time: 1, real: "๐Ÿคจ", id: 4, time_horizon: 1 }, -// { time: 3, real: "๐Ÿคจ", id: 4, time_horizon: 3 }, -// { time: 5, real: "๐Ÿคจ", id: 4, time_horizon: 5 }, -// { time: 1, real: "๐Ÿคจ", id: 5, time_horizon: 1 }, -// { time: 2.4, real: "๐Ÿคข", id: 5, time_horizon: 3 }, -// { time: 2.4, real: "๐Ÿคข", id: 5, time_horizon: 5 }, -// { time: 1, real: "๐Ÿคจ", id: 6, time_horizon: 1 }, -// { time: 3, real: "๐Ÿคจ", id: 6, time_horizon: 3 }, -// { time: 4.4, real: "๐Ÿคฌ", id: 6, time_horizon: 5 }, -// { time: 0.4, real: "๐Ÿคข", id: 7, time_horizon: 1 }, -// { time: 0.4, real: "๐Ÿคข", id: 7, time_horizon: 3 }, -// { time: 0.4, real: "๐Ÿคข", id: 7, time_horizon: 5 }, -// { time: 1, real: "๐Ÿคจ", id: 8, time_horizon: 1 }, -// { time: 1.6, real: "๐Ÿ’€", id: 8, time_horizon: 3 }, -// { time: 1.6, real: "๐Ÿ’€", id: 8, time_horizon: 5 }, -// { time: 0.8, real: "๐Ÿคข", id: 9, time_horizon: 1 }, -// { time: 0.8, real: "๐Ÿคข", id: 9, time_horizon: 3 }, -// { time: 0.8, real: "๐Ÿคข", id: 9, time_horizon: 5 }, -// { time: 1, real: "๐Ÿคจ", id: 10, time_horizon: 1 }, -// { time: 2.9, real: "๐Ÿคข", id: 10, time_horizon: 3 }, -// { time: 2.9, real: "๐Ÿคข", id: 10, time_horizon: 5 } -// ] - filteredData = data.filter((d) => d.time_horizon == timeHorizon) viewof timeHorizon = Inputs.range([10, 50], { From 648b16a0767b2770427dd3e7ad1be27d7c3f3420 Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Sun, 27 Jul 2025 12:04:44 +0300 Subject: [PATCH 19/51] chore: close #135 --- docs/before_we_validate.qmd | 90 +++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/docs/before_we_validate.qmd b/docs/before_we_validate.qmd index 2ca9a0c..f40f827 100644 --- a/docs/before_we_validate.qmd +++ b/docs/before_we_validate.qmd @@ -561,6 +561,7 @@ There is no need for an underlying assumption, the answer is explicit. This heuristic is can be seen as a different underlying question - what is the probability of having any type of event, Primary or Competing? + # What rtichoke from now on? ## Render Predictions Histogram @@ -571,3 +572,92 @@ For each requried combination of reference_group x predictions_strata x fixed_ti The sum of the AJ estimates for each predictions_strata is equal to the overal AJ estimate. +``` {ojs} + +Inputs.radio(["Adjusted", "Excluded"], {label: "Censored Assumption"}) + +Inputs.radio(["Adjusted as Censored", "Adjusted as Negative", "Adjusted as Composite", "Excluded"], {label: "Competing Assumption"}) + +``` + +```{python} +import polars as pl +from rtichoke.helpers.sandbox_observable_helpers import ( + create_aj_data, + create_list_data_to_adjust, + create_adjusted_data +) + +from polarstate import prepare_event_table + +times_and_reals = pl.DataFrame({ + "times": times, + "reals": reals +}) + +event_table = prepare_event_table(times_and_reals) + +# event_table_data = event_table.to_dicts() + +long_df = event_table.select( + pl.col("times"), + pl.col("overall_survival"), + pl.col("state_occupancy_probability_1_at_times"), + pl.col("state_occupancy_probability_2_at_times") +).unpivot( + index = "times", + variable_name="state", + value_name="aj_estimate" +).to_dicts() + + +ojs_define(long_df = long_df) + +``` + + +``` {ojs} + +long_df + +Plot.plot({ + x: { + label: "Time", + }, + y: { + label: "Aalen-Johansen Estimate", + stack: false + }, + color: { + legend: true, + domain: [ + "state_occupancy_probability_1_at_times", + "state_occupancy_probability_2_at_times", + "overall_survival" + ], + range: [ + "#4C5454", + "#C880B7", + "#E0E0E0", + ] + }, + marks: [ + Plot.lineY(long_df, { + x: "times", + y: "aj_estimate", + stroke: "state", + curve: "step-after" + }), + Plot.dot(long_df, { + x: "times", + y: "aj_estimate", + fill: "state", + r: 2.5, + tip: true + }), + Plot.ruleY([0]) + ] +}) + +``` + From 0466bbdb3a6c42e6816a29111619f9cf5ee032ec Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Mon, 28 Jul 2025 06:57:54 +0300 Subject: [PATCH 20/51] docs: close #133 --- docs/before_we_validate.qmd | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/before_we_validate.qmd b/docs/before_we_validate.qmd index f40f827..1fae6d2 100644 --- a/docs/before_we_validate.qmd +++ b/docs/before_we_validate.qmd @@ -261,9 +261,8 @@ For cases with observed time-to-event is shorter than the prediction time horizo import numpy as np -# Inputs -times = np.array([5.0, 10.0, 15.0, 20.0, 25.0, 30.0, 35.0, 40.0, 45.0, 50.0]) -reals = np.array([1, 1, 2, 1, 0, 1, 0, 1, 2, 1]) +times = np.array([24.1, 9.7, 49.9, 18.6, 34.8, 14.2, 39.2, 46.0, 4.3, 31.5]) +reals = np.array([1, 1, 1, 1, 0, 2, 1, 2, 0, 1]) time_horizons = [10, 30, 50] # Icons From c063231a6152c9f92d730b86af30af9a9b8525de Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Tue, 29 Jul 2025 06:09:44 +0300 Subject: [PATCH 21/51] chore: close #136 --- pyproject.toml | 3 ++- src/rtichoke/helpers/sandbox_observable_helpers.py | 4 +++- uv.lock | 10 ++++++---- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index da8060b..8da0fee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,8 +17,8 @@ dependencies = [ "pyarrow>=20.0.0", "ty>=0.0.1a5", "pandas>=2.2.3", - "polarstate>=0.1.6", "typing>=3.7.4.3", + "polarstate==0.1.8", ] name = "rtichoke" version = "0.1.11" @@ -42,6 +42,7 @@ dev = [ "plotly>=5.24.1", "ty>=0.0.1a12", "scikit-learn>=1.6.1", + "polarstate>=0.1.6", ] [tool.uv.workspace] diff --git a/src/rtichoke/helpers/sandbox_observable_helpers.py b/src/rtichoke/helpers/sandbox_observable_helpers.py index 1f1593d..9ae2060 100644 --- a/src/rtichoke/helpers/sandbox_observable_helpers.py +++ b/src/rtichoke/helpers/sandbox_observable_helpers.py @@ -735,13 +735,15 @@ def extract_crude_estimate_polars(data: pl.DataFrame) -> pl.DataFrame: def extract_aj_estimate_for_strata(data_to_adjust, horizons): n = data_to_adjust.height + event_table = prepare_event_table(data_to_adjust) + aj_estimate_for_strata_polars = predict_aj_estimates( event_table, pl.Series(horizons) ) aj_estimate_for_strata_polars = aj_estimate_for_strata_polars.rename( - {"fixed_time_horizons": "fixed_time_horizon"} + {"times": "fixed_time_horizon"} ) return aj_estimate_for_strata_polars.with_columns( diff --git a/uv.lock b/uv.lock index 5bf8f20..33a00b3 100644 --- a/uv.lock +++ b/uv.lock @@ -3187,14 +3187,14 @@ wheels = [ [[package]] name = "polarstate" -version = "0.1.6" +version = "0.1.8" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "polars" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/79/02/f1c77f1d76846202cd0c885fca02882c261e49068bfdc7936078f515a810/polarstate-0.1.6.tar.gz", hash = "sha256:350cd3978a06bcd049f57136a2eb163e3c9702cc32aca7c3ac6f703c943509ab", size = 4222089, upload-time = "2025-07-03T17:54:06.682Z" } +sdist = { url = "https://files.pythonhosted.org/packages/9b/09/107eb49e8fc33392d1f226c353adbaa2c345e9e77b82039bf0b9edbfadfc/polarstate-0.1.8.tar.gz", hash = "sha256:803e158f5961f234700aa0121e47ca754ff76796d01dc0230e5133f020cbe7bd", size = 3155, upload-time = "2025-07-29T02:24:27.829Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/4a/0c/f35a4e67d5daa8554d74f6b5be8f8b4ee8ce287a364bc4c1539656f7567c/polarstate-0.1.6-py3-none-any.whl", hash = "sha256:2cfc6de578cc714da83d51ca8d798ed85fc713a4d46263e730c67f8490e1c455", size = 3988, upload-time = "2025-07-03T17:54:05.811Z" }, + { url = "https://files.pythonhosted.org/packages/d4/68/c80de613a28e48be2ce15e00118bf8a018b74229349fd1c26347b352dcee/polarstate-0.1.8-py3-none-any.whl", hash = "sha256:710d791e67ea09c46f79030ecd1253e67a9d2f100aceb848c25967d88314db6a", size = 4403, upload-time = "2025-07-29T02:24:27.122Z" }, ] [[package]] @@ -4083,6 +4083,7 @@ dev = [ { name = "marimo" }, { name = "myst-nb", marker = "python_full_version < '4'" }, { name = "plotly" }, + { name = "polarstate" }, { name = "pre-commit" }, { name = "pytest" }, { name = "pytest-cov" }, @@ -4103,7 +4104,7 @@ requires-dist = [ { name = "papermill", specifier = ">=2.6.0" }, { name = "plotly", specifier = ">=5.13.1,<6.0.0" }, { name = "polars", specifier = ">=1.28.0" }, - { name = "polarstate", specifier = ">=0.1.6" }, + { name = "polarstate", specifier = "==0.1.8" }, { name = "pyarrow", specifier = ">=20.0.0" }, { name = "quartodoc", specifier = ">=0.9.1" }, { name = "sphinx-autoapi", specifier = ">=2.1.0,<3.0.0" }, @@ -4121,6 +4122,7 @@ dev = [ { name = "marimo", specifier = ">=0.14.7" }, { name = "myst-nb", marker = "python_full_version >= '3.9' and python_full_version < '4'", specifier = ">=0.17.1,<1.0.0" }, { name = "plotly", specifier = ">=5.24.1" }, + { name = "polarstate", specifier = ">=0.1.6" }, { name = "pre-commit", specifier = ">=4.2.0" }, { name = "pytest", specifier = ">=7.3.0,<8.0.0" }, { name = "pytest-cov", specifier = ">=4.0.0,<5.0.0" }, From 5605c0baa1e06d3daf1883fe76c1b79580cde32c Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Tue, 29 Jul 2025 15:05:32 +0300 Subject: [PATCH 22/51] tests: close #137 --- .../helpers/sandbox_observable_helpers.py | 67 ++++++-- tests/test_rtichoke.py | 151 +++++++++++++++--- 2 files changed, 184 insertions(+), 34 deletions(-) diff --git a/src/rtichoke/helpers/sandbox_observable_helpers.py b/src/rtichoke/helpers/sandbox_observable_helpers.py index 9ae2060..3f58ecd 100644 --- a/src/rtichoke/helpers/sandbox_observable_helpers.py +++ b/src/rtichoke/helpers/sandbox_observable_helpers.py @@ -459,29 +459,37 @@ def explode_data(df): ) def censored_count(df): - return ( - df.filter( - (pl.col("times") < pl.col("fixed_time_horizon")) - & (pl.col("reals") == 0) + censored_count = ( + df.with_columns( + ( + (pl.col("times") < pl.col("fixed_time_horizon")) + & (pl.col("reals") == 0) + ) + .cast(pl.Float64) + .alias("is_censored") ) .group_by(["strata", "fixed_time_horizon"]) - .count() - .rename({"count": "real_censored_est"}) - .with_columns(pl.col("real_censored_est").cast(pl.Float64)) + .agg(pl.col("is_censored").sum().alias("real_censored_est")) ) + return censored_count + def competing_count(df): - return ( - df.filter( - (pl.col("reals") == 2) - & (pl.col("times") < pl.col("fixed_time_horizon")) + competing_count = ( + df.with_columns( + ( + (pl.col("times") < pl.col("fixed_time_horizon")) + & (pl.col("reals") == 2) + ) + .cast(pl.Float64) + .alias("is_competing") ) .group_by(["strata", "fixed_time_horizon"]) - .count() - .rename({"count": "real_competing_est"}) - .with_columns(pl.col("real_competing_est").cast(pl.Float64)) + .agg(pl.col("is_competing").sum().alias("real_competing_est")) ) + return competing_count + def aj_estimates_per_horizon(df, horizons): return pl.concat( [ @@ -519,6 +527,11 @@ def aj_estimates_per_horizon(df, horizons): (pl.col("times") >= pl.col("fixed_time_horizon")) | (pl.col("reals") > 0) ) aj_df = aj_estimates_per_horizon(non_censored, fixed_time_horizons) + + print(exploded) + print(censored) + print(aj_df) + return aj_estimates_with_cross( aj_df.join(censored, on=["strata", "fixed_time_horizon"]), { @@ -628,6 +641,9 @@ def aj_estimates_per_horizon(df, horizons): if censoring_assumption == "adjusted" and competing_assumption == "excluded": exploded = explode_data(reference_group_data) competing = competing_count(exploded) + + print(competing) + non_competing = exploded.filter( (pl.col("times") >= pl.col("fixed_time_horizon")) | (pl.col("reals") != 2) ).with_columns( @@ -642,6 +658,29 @@ def aj_estimates_per_horizon(df, horizons): pl.exclude("real_competing_est") ) result = competing.join(aj_df, on=["strata", "fixed_time_horizon"]) + + print( + aj_estimates_with_cross( + result, + { + "real_censored_est": 0.0, + "censoring_assumption": "adjusted", + "competing_assumption": "excluded", + }, + ).select( + [ + "strata", + "fixed_time_horizon", + "real_negatives_est", + "real_positives_est", + "real_competing_est", + "real_censored_est", + "censoring_assumption", + "competing_assumption", + ] + ) + ) + return aj_estimates_with_cross( result, { diff --git a/tests/test_rtichoke.py b/tests/test_rtichoke.py index 2822728..6b3f865 100644 --- a/tests/test_rtichoke.py +++ b/tests/test_rtichoke.py @@ -10,9 +10,137 @@ # from rtichoke import rtichoke import polars as pl from polars.testing import assert_frame_equal +import pytest -def test_create_aj_data() -> None: +def _expected( + negatives: list[float], + positives: list[float], + competing: list[float], + censored: list[float], + censoring: str, + competing_assump: str, +) -> pl.DataFrame: + return pl.DataFrame( + { + "strata": ["group1", "group1", "group1"], + "fixed_time_horizon": [1.0, 2.0, 3.0], + "real_negatives_est": negatives, + "real_positives_est": positives, + "real_competing_est": competing, + "real_censored_est": censored, + "censoring_assumption": [censoring] * 3, + "competing_assumption": [competing_assump] * 3, + } + ) + + +@pytest.mark.parametrize( + "censoring_assumption, competing_assumption, expected", + [ + ( + "adjusted", + "adjusted_as_negative", + _expected( + [4.0, 4.0, 8 / 3], + [0.0, 0.0, 4 / 3], + [1.0, 1.0, 1.0], + [0.0, 0.0, 0.0], + "adjusted", + "adjusted_as_negative", + ), + ), + ( + "excluded", + "adjusted_as_negative", + _expected( + [4.0, 4.0, 2.0], + [0.0, 0.0, 1.0], + [1.0, 1.0, 1.0], + [0.0, 0.0, 1.0], + "excluded", + "adjusted_as_negative", + ), + ), + ( + "adjusted", + "adjusted_as_censored", + _expected( + [5.0, 5.0, 10 / 3], + [0.0, 0.0, 5 / 3], + [0.0, 0.0, 0.0], + [0.0, 0.0, 0.0], + "adjusted", + "adjusted_as_censored", + ), + ), + ( + "excluded", + "adjusted_as_censored", + _expected( + [5.0, 5.0, 8 / 3], + [0.0, 0.0, 4 / 3], + [0.0, 0.0, 0.0], + [0.0, 0.0, 1.0], + "excluded", + "adjusted_as_censored", + ), + ), + ( + "adjusted", + "adjusted_as_composite", + _expected( + [4.0, 4.0, 8 / 3], + [1.0, 1.0, 7 / 3], + [0.0, 0.0, 0.0], + [0.0, 0.0, 0.0], + "adjusted", + "adjusted_as_composite", + ), + ), + ( + "excluded", + "adjusted_as_composite", + _expected( + [4.0, 4.0, 2.0], + [1.0, 1.0, 2.0], + [0.0, 0.0, 0.0], + [0.0, 0.0, 1.0], + "excluded", + "adjusted_as_composite", + ), + ), + ( + "adjusted", + "excluded", + _expected( + [5.0, 4.0, 8 / 3], + [0.0, 0.0, 4 / 3], + [0.0, 1.0, 1.0], + [0.0, 0.0, 0.0], + "adjusted", + "excluded", + ), + ), + ( + "excluded", + "excluded", + _expected( + [4.0, 4.0, 2.0], + [0.0, 0.0, 1.0], + [0.0, 1.0, 1.0], + [0.0, 0.0, 1.0], + "excluded", + "excluded", + ), + ), + ], +) +def test_create_aj_data( + censoring_assumption: str, + competing_assumption: str, + expected: pl.DataFrame, +) -> None: df = pl.DataFrame( { "strata": ["group1"] * 5, @@ -24,28 +152,11 @@ def test_create_aj_data() -> None: result = create_aj_data( df, - censoring_assumption="adjusted", - competing_assumption="adjusted_as_negative", + censoring_assumption=censoring_assumption, + competing_assumption=competing_assumption, fixed_time_horizons=horizons, ).sort("fixed_time_horizon") - expected = pl.DataFrame( - { - "strata": ["group1", "group1", "group1"], - "fixed_time_horizon": [1.0, 2.0, 3.0], - "real_negatives_est": [4.0, 4.0, 8 / 3], - "real_positives_est": [0.0, 0.0, 4 / 3], - "real_competing_est": [1.0, 1.0, 1.0], - "real_censored_est": [0.0, 0.0, 0.0], - "censoring_assumption": ["adjusted", "adjusted", "adjusted"], - "competing_assumption": [ - "adjusted_as_negative", - "adjusted_as_negative", - "adjusted_as_negative", - ], - } - ) - assert_frame_equal(result, expected) From bdc842b3a2ead344d2e454111594514be14b7d96 Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Tue, 29 Jul 2025 20:24:48 +0300 Subject: [PATCH 23/51] refactor: close #139 --- .../helpers/sandbox_observable_helpers.py | 738 +++++++++++------- 1 file changed, 461 insertions(+), 277 deletions(-) diff --git a/src/rtichoke/helpers/sandbox_observable_helpers.py b/src/rtichoke/helpers/sandbox_observable_helpers.py index 3f58ecd..13e66af 100644 --- a/src/rtichoke/helpers/sandbox_observable_helpers.py +++ b/src/rtichoke/helpers/sandbox_observable_helpers.py @@ -448,290 +448,335 @@ def create_aj_data( """ def aj_estimates_with_cross(df, extra_cols): - return df.join( - pl.DataFrame(extra_cols), - how="cross", - ) - - def explode_data(df): - return df.with_columns(fixed_time_horizon=pl.lit(fixed_time_horizons)).explode( - "fixed_time_horizon" - ) - - def censored_count(df): - censored_count = ( - df.with_columns( - ( - (pl.col("times") < pl.col("fixed_time_horizon")) - & (pl.col("reals") == 0) - ) - .cast(pl.Float64) - .alias("is_censored") - ) - .group_by(["strata", "fixed_time_horizon"]) - .agg(pl.col("is_censored").sum().alias("real_censored_est")) - ) - - return censored_count - - def competing_count(df): - competing_count = ( - df.with_columns( - ( - (pl.col("times") < pl.col("fixed_time_horizon")) - & (pl.col("reals") == 2) - ) - .cast(pl.Float64) - .alias("is_competing") - ) - .group_by(["strata", "fixed_time_horizon"]) - .agg(pl.col("is_competing").sum().alias("real_competing_est")) - ) - - return competing_count - - def aj_estimates_per_horizon(df, horizons): - return pl.concat( - [ - df.filter(pl.col("fixed_time_horizon") == h) - .group_by("strata") - .map_groups(lambda group: extract_aj_estimate_for_strata(group, [h])) - for h in horizons - ], - how="vertical", - ) - - if ( - censoring_assumption == "adjusted" - and competing_assumption == "adjusted_as_negative" - ): - aj_df = reference_group_data.group_by("strata").map_groups( - lambda group: extract_aj_estimate_for_strata(group, fixed_time_horizons) - ) - return aj_estimates_with_cross( - aj_df, - { - "real_censored_est": 0.0, - "censoring_assumption": "adjusted", - "competing_assumption": "adjusted_as_negative", - }, - ) + return df.join(pl.DataFrame(extra_cols), how="cross") - if ( - censoring_assumption == "excluded" - and competing_assumption == "adjusted_as_negative" - ): - exploded = explode_data(reference_group_data) - censored = censored_count(exploded) - non_censored = exploded.filter( - (pl.col("times") >= pl.col("fixed_time_horizon")) | (pl.col("reals") > 0) - ) - aj_df = aj_estimates_per_horizon(non_censored, fixed_time_horizons) - - print(exploded) - print(censored) - print(aj_df) - - return aj_estimates_with_cross( - aj_df.join(censored, on=["strata", "fixed_time_horizon"]), - { - "censoring_assumption": "excluded", - "competing_assumption": "adjusted_as_negative", - }, - ) - - if ( - censoring_assumption == "adjusted" - and competing_assumption == "adjusted_as_censored" - ): - adjusted = reference_group_data.with_columns( - [ - pl.when(pl.col("reals") == 2) - .then(pl.lit(0)) - .otherwise(pl.col("reals")) - .alias("reals") - ] - ) - aj_df = adjusted.group_by("strata").map_groups( - lambda group: extract_aj_estimate_for_strata(group, fixed_time_horizons) - ) - return aj_estimates_with_cross( - aj_df, - { - "real_censored_est": 0.0, - "censoring_assumption": "adjusted", - "competing_assumption": "adjusted_as_censored", - }, - ) - - if ( - censoring_assumption == "excluded" - and competing_assumption == "adjusted_as_censored" - ): - exploded = explode_data(reference_group_data) - censored = censored_count(exploded) - non_censored = exploded.filter( - (pl.col("times") >= pl.col("fixed_time_horizon")) | (pl.col("reals") > 0) - ).with_columns( - [ - pl.when(pl.col("reals") == 2) - .then(pl.lit(0)) - .otherwise(pl.col("reals")) - .alias("reals") - ] - ) - aj_df = aj_estimates_per_horizon(non_censored, fixed_time_horizons) - return aj_estimates_with_cross( - aj_df.join(censored, on=["strata", "fixed_time_horizon"]), - { - "censoring_assumption": "excluded", - "competing_assumption": "adjusted_as_censored", - }, - ) + exploded = assign_and_explode_polars(reference_group_data, fixed_time_horizons) - if ( - censoring_assumption == "adjusted" - and competing_assumption == "adjusted_as_composite" - ): - adjusted = reference_group_data.with_columns( - [ - pl.when(pl.col("reals") == 2) - .then(pl.lit(1)) - .otherwise(pl.col("reals")) - .alias("reals") - ] - ) - aj_df = adjusted.group_by("strata").map_groups( - lambda group: extract_aj_estimate_for_strata(group, fixed_time_horizons) - ) - return aj_estimates_with_cross( - aj_df, - { - "real_censored_est": 0.0, - "censoring_assumption": "adjusted", - "competing_assumption": "adjusted_as_composite", - }, - ) - - if ( - censoring_assumption == "excluded" - and competing_assumption == "adjusted_as_composite" - ): - exploded = explode_data(reference_group_data) - censored = censored_count(exploded) - non_censored = exploded.filter( - (pl.col("times") >= pl.col("fixed_time_horizon")) | (pl.col("reals") > 0) - ).with_columns( - [ - pl.when(pl.col("reals") == 2) - .then(pl.lit(1)) - .otherwise(pl.col("reals")) - .alias("reals") - ] - ) - aj_df = aj_estimates_per_horizon(non_censored, fixed_time_horizons) - return aj_estimates_with_cross( - aj_df.join(censored, on=["strata", "fixed_time_horizon"]), - { - "censoring_assumption": "excluded", - "competing_assumption": "adjusted_as_composite", - }, - ) + excluded_df = _excluded_events_df( + exploded, censoring_assumption, competing_assumption + ) - if censoring_assumption == "adjusted" and competing_assumption == "excluded": - exploded = explode_data(reference_group_data) - competing = competing_count(exploded) + aj_df = _aj_adjusted_events( + reference_group_data, + exploded, + censoring_assumption, + competing_assumption, + fixed_time_horizons, + ) - print(competing) + result = aj_df.join(excluded_df, on=["strata", "fixed_time_horizon"], how="left") - non_competing = exploded.filter( - (pl.col("times") >= pl.col("fixed_time_horizon")) | (pl.col("reals") != 2) - ).with_columns( - [ - pl.when(pl.col("reals") == 2) - .then(pl.lit(0)) - .otherwise(pl.col("reals")) - .alias("reals") - ] - ) - aj_df = aj_estimates_per_horizon(non_competing, fixed_time_horizons).select( - pl.exclude("real_competing_est") - ) - result = competing.join(aj_df, on=["strata", "fixed_time_horizon"]) - - print( - aj_estimates_with_cross( - result, - { - "real_censored_est": 0.0, - "censoring_assumption": "adjusted", - "competing_assumption": "excluded", - }, - ).select( - [ - "strata", - "fixed_time_horizon", - "real_negatives_est", - "real_positives_est", - "real_competing_est", - "real_censored_est", - "censoring_assumption", - "competing_assumption", - ] - ) - ) + return aj_estimates_with_cross( + result, + { + "censoring_assumption": censoring_assumption, + "competing_assumption": competing_assumption, + }, + ).select( + [ + "strata", + "fixed_time_horizon", + "real_negatives_est", + "real_positives_est", + "real_competing_est", + "real_censored_est", + "censoring_assumption", + "competing_assumption", + ] + ) - return aj_estimates_with_cross( - result, - { - "real_censored_est": 0.0, - "censoring_assumption": "adjusted", - "competing_assumption": "excluded", - }, - ).select( - [ - "strata", - "fixed_time_horizon", - "real_negatives_est", - "real_positives_est", - "real_competing_est", - "real_censored_est", - "censoring_assumption", - "competing_assumption", - ] - ) - if censoring_assumption == "excluded" and competing_assumption == "excluded": - exploded = explode_data(reference_group_data) - censored = censored_count(exploded) - competing = competing_count(exploded) - non_censored_non_competing = exploded.filter( - (pl.col("times") >= pl.col("fixed_time_horizon")) | (pl.col("reals") == 1) - ) - aj_df = aj_estimates_per_horizon( - non_censored_non_competing, fixed_time_horizons - ) - result = competing.join(censored, on=["strata", "fixed_time_horizon"]).join( - aj_df, on=["strata", "fixed_time_horizon"] - ) - return aj_estimates_with_cross( - result, - { - "censoring_assumption": "excluded", - "competing_assumption": "excluded", - }, - ).select( - [ - "strata", - "fixed_time_horizon", - "real_negatives_est", - "real_positives_est", - "real_competing_est", - "real_censored_est", - "censoring_assumption", - "competing_assumption", - ] - ) +# def create_aj_data( +# reference_group_data, +# censoring_assumption, +# competing_assumption, +# fixed_time_horizons, +# ): +# """ +# Create AJ estimates per strata based on censoring and competing assumptions. +# """ + +# def aj_estimates_with_cross(df, extra_cols): +# return df.join( +# pl.DataFrame(extra_cols), +# how="cross", +# ) + +# def explode_data(df): +# return df.with_columns(fixed_time_horizon=pl.lit(fixed_time_horizons)).explode( +# "fixed_time_horizon" +# ) + +# def censored_count(df): +# censored_count = ( +# df.with_columns( +# ( +# (pl.col("times") < pl.col("fixed_time_horizon")) +# & (pl.col("reals") == 0) +# ) +# .cast(pl.Float64) +# .alias("is_censored") +# ) +# .group_by(["strata", "fixed_time_horizon"]) +# .agg(pl.col("is_censored").sum().alias("real_censored_est")) +# ) + +# return censored_count + +# def competing_count(df): +# competing_count = ( +# df.with_columns( +# ( +# (pl.col("times") < pl.col("fixed_time_horizon")) +# & (pl.col("reals") == 2) +# ) +# .cast(pl.Float64) +# .alias("is_competing") +# ) +# .group_by(["strata", "fixed_time_horizon"]) +# .agg(pl.col("is_competing").sum().alias("real_competing_est")) +# ) + +# return competing_count + +# def aj_estimates_per_horizon(df, horizons): +# return pl.concat( +# [ +# df.filter(pl.col("fixed_time_horizon") == h) +# .group_by("strata") +# .map_groups(lambda group: extract_aj_estimate_for_strata(group, [h])) +# for h in horizons +# ], +# how="vertical", +# ) + +# if ( +# censoring_assumption == "adjusted" +# and competing_assumption == "adjusted_as_negative" +# ): +# aj_df = reference_group_data.group_by("strata").map_groups( +# lambda group: extract_aj_estimate_for_strata(group, fixed_time_horizons) +# ) +# return aj_estimates_with_cross( +# aj_df, +# { +# "real_censored_est": 0.0, +# "censoring_assumption": "adjusted", +# "competing_assumption": "adjusted_as_negative", +# }, +# ) + +# if ( +# censoring_assumption == "excluded" +# and competing_assumption == "adjusted_as_negative" +# ): +# exploded = explode_data(reference_group_data) +# censored = censored_count(exploded) +# non_censored = exploded.filter( +# (pl.col("times") >= pl.col("fixed_time_horizon")) | (pl.col("reals") > 0) +# ) +# aj_df = aj_estimates_per_horizon(non_censored, fixed_time_horizons) + +# return aj_estimates_with_cross( +# aj_df.join(censored, on=["strata", "fixed_time_horizon"]), +# { +# "censoring_assumption": "excluded", +# "competing_assumption": "adjusted_as_negative", +# }, +# ) + +# if ( +# censoring_assumption == "adjusted" +# and competing_assumption == "adjusted_as_censored" +# ): +# adjusted = reference_group_data.with_columns( +# [ +# pl.when(pl.col("reals") == 2) +# .then(pl.lit(0)) +# .otherwise(pl.col("reals")) +# .alias("reals") +# ] +# ) +# aj_df = adjusted.group_by("strata").map_groups( +# lambda group: extract_aj_estimate_for_strata(group, fixed_time_horizons) +# ) +# return aj_estimates_with_cross( +# aj_df, +# { +# "real_censored_est": 0.0, +# "censoring_assumption": "adjusted", +# "competing_assumption": "adjusted_as_censored", +# }, +# ) + +# if ( +# censoring_assumption == "excluded" +# and competing_assumption == "adjusted_as_censored" +# ): +# exploded = explode_data(reference_group_data) +# censored = censored_count(exploded) +# non_censored = exploded.filter( +# (pl.col("times") >= pl.col("fixed_time_horizon")) | (pl.col("reals") > 0) +# ).with_columns( +# [ +# pl.when(pl.col("reals") == 2) +# .then(pl.lit(0)) +# .otherwise(pl.col("reals")) +# .alias("reals") +# ] +# ) +# aj_df = aj_estimates_per_horizon(non_censored, fixed_time_horizons) +# return aj_estimates_with_cross( +# aj_df.join(censored, on=["strata", "fixed_time_horizon"]), +# { +# "censoring_assumption": "excluded", +# "competing_assumption": "adjusted_as_censored", +# }, +# ) + +# if ( +# censoring_assumption == "adjusted" +# and competing_assumption == "adjusted_as_composite" +# ): +# adjusted = reference_group_data.with_columns( +# [ +# pl.when(pl.col("reals") == 2) +# .then(pl.lit(1)) +# .otherwise(pl.col("reals")) +# .alias("reals") +# ] +# ) +# aj_df = adjusted.group_by("strata").map_groups( +# lambda group: extract_aj_estimate_for_strata(group, fixed_time_horizons) +# ) +# return aj_estimates_with_cross( +# aj_df, +# { +# "real_censored_est": 0.0, +# "censoring_assumption": "adjusted", +# "competing_assumption": "adjusted_as_composite", +# }, +# ) + +# if ( +# censoring_assumption == "excluded" +# and competing_assumption == "adjusted_as_composite" +# ): +# exploded = explode_data(reference_group_data) +# censored = censored_count(exploded) +# non_censored = exploded.filter( +# (pl.col("times") >= pl.col("fixed_time_horizon")) | (pl.col("reals") > 0) +# ).with_columns( +# [ +# pl.when(pl.col("reals") == 2) +# .then(pl.lit(1)) +# .otherwise(pl.col("reals")) +# .alias("reals") +# ] +# ) +# aj_df = aj_estimates_per_horizon(non_censored, fixed_time_horizons) +# return aj_estimates_with_cross( +# aj_df.join(censored, on=["strata", "fixed_time_horizon"]), +# { +# "censoring_assumption": "excluded", +# "competing_assumption": "adjusted_as_composite", +# }, +# ) + +# if censoring_assumption == "adjusted" and competing_assumption == "excluded": +# exploded = explode_data(reference_group_data) +# competing = competing_count(exploded) + +# print(competing) + +# non_competing = exploded.filter( +# (pl.col("times") >= pl.col("fixed_time_horizon")) | (pl.col("reals") != 2) +# ).with_columns( +# [ +# pl.when(pl.col("reals") == 2) +# .then(pl.lit(0)) +# .otherwise(pl.col("reals")) +# .alias("reals") +# ] +# ) +# aj_df = aj_estimates_per_horizon(non_competing, fixed_time_horizons).select( +# pl.exclude("real_competing_est") +# ) +# result = competing.join(aj_df, on=["strata", "fixed_time_horizon"]) + +# print( +# aj_estimates_with_cross( +# result, +# { +# "real_censored_est": 0.0, +# "censoring_assumption": "adjusted", +# "competing_assumption": "excluded", +# }, +# ).select( +# [ +# "strata", +# "fixed_time_horizon", +# "real_negatives_est", +# "real_positives_est", +# "real_competing_est", +# "real_censored_est", +# "censoring_assumption", +# "competing_assumption", +# ] +# ) +# ) + +# return aj_estimates_with_cross( +# result, +# { +# "real_censored_est": 0.0, +# "censoring_assumption": "adjusted", +# "competing_assumption": "excluded", +# }, +# ).select( +# [ +# "strata", +# "fixed_time_horizon", +# "real_negatives_est", +# "real_positives_est", +# "real_competing_est", +# "real_censored_est", +# "censoring_assumption", +# "competing_assumption", +# ] +# ) + +# if censoring_assumption == "excluded" and competing_assumption == "excluded": +# exploded = explode_data(reference_group_data) +# censored = censored_count(exploded) +# competing = competing_count(exploded) +# non_censored_non_competing = exploded.filter( +# (pl.col("times") >= pl.col("fixed_time_horizon")) | (pl.col("reals") == 1) +# ) +# aj_df = aj_estimates_per_horizon( +# non_censored_non_competing, fixed_time_horizons +# ) +# result = competing.join(censored, on=["strata", "fixed_time_horizon"]).join( +# aj_df, on=["strata", "fixed_time_horizon"] +# ) +# return aj_estimates_with_cross( +# result, +# { +# "censoring_assumption": "excluded", +# "competing_assumption": "excluded", +# }, +# ).select( +# [ +# "strata", +# "fixed_time_horizon", +# "real_negatives_est", +# "real_positives_est", +# "real_competing_est", +# "real_censored_est", +# "censoring_assumption", +# "competing_assumption", +# ] +# ) def extract_crude_estimate_polars(data: pl.DataFrame) -> pl.DataFrame: @@ -1004,3 +1049,142 @@ def cast_and_join_adjusted_data(aj_data_combinations, aj_estimates_data): how="left", ) return final_adjusted_data_polars + + +def _censored_count(df: pl.DataFrame) -> pl.DataFrame: + return ( + df.with_columns( + ((pl.col("times") < pl.col("fixed_time_horizon")) & (pl.col("reals") == 0)) + .cast(pl.Float64) + .alias("is_censored") + ) + .group_by(["strata", "fixed_time_horizon"]) + .agg(pl.col("is_censored").sum().alias("real_censored_est")) + ) + + +def _competing_count(df: pl.DataFrame) -> pl.DataFrame: + return ( + df.with_columns( + ((pl.col("times") < pl.col("fixed_time_horizon")) & (pl.col("reals") == 2)) + .cast(pl.Float64) + .alias("is_competing") + ) + .group_by(["strata", "fixed_time_horizon"]) + .agg(pl.col("is_competing").sum().alias("real_competing_est")) + ) + + +def _aj_estimates_per_horizon(df: pl.DataFrame, horizons: list[float]) -> pl.DataFrame: + return pl.concat( + [ + df.filter(pl.col("fixed_time_horizon") == h) + .group_by("strata") + .map_groups(lambda group: extract_aj_estimate_for_strata(group, [h])) + for h in horizons + ], + how="vertical", + ) + + +def _excluded_events_df( + exploded: pl.DataFrame, censoring: str, competing: str +) -> pl.DataFrame: + base = exploded.select(["strata", "fixed_time_horizon"]).unique() + censored = ( + _censored_count(exploded) + if censoring == "excluded" + else base.with_columns(pl.lit(0.0).alias("real_censored_est")) + ) + competing_df = ( + _competing_count(exploded) + if competing == "excluded" + else base.with_columns(pl.lit(0.0).alias("real_competing_est")) + ) + return ( + base.join(censored, on=["strata", "fixed_time_horizon"], how="left") + .join(competing_df, on=["strata", "fixed_time_horizon"], how="left") + .fill_null(0.0) + ) + + +def _aj_adjusted_events( + reference_group_data: pl.DataFrame, + exploded: pl.DataFrame, + censoring: str, + competing: str, + horizons: list[float], +) -> pl.DataFrame: + if censoring == "adjusted" and competing == "adjusted_as_negative": + return reference_group_data.group_by("strata").map_groups( + lambda group: extract_aj_estimate_for_strata(group, horizons) + ) + + if censoring == "excluded" and competing == "adjusted_as_negative": + non_censored = exploded.filter( + (pl.col("times") >= pl.col("fixed_time_horizon")) | (pl.col("reals") > 0) + ) + return _aj_estimates_per_horizon(non_censored, horizons) + + if censoring == "adjusted" and competing == "adjusted_as_censored": + adjusted = reference_group_data.with_columns( + pl.when(pl.col("reals") == 2) + .then(pl.lit(0)) + .otherwise(pl.col("reals")) + .alias("reals") + ) + return adjusted.group_by("strata").map_groups( + lambda group: extract_aj_estimate_for_strata(group, horizons) + ) + + if censoring == "excluded" and competing == "adjusted_as_censored": + non_censored = exploded.filter( + (pl.col("times") >= pl.col("fixed_time_horizon")) | (pl.col("reals") > 0) + ).with_columns( + pl.when(pl.col("reals") == 2) + .then(pl.lit(0)) + .otherwise(pl.col("reals")) + .alias("reals") + ) + return _aj_estimates_per_horizon(non_censored, horizons) + + if censoring == "adjusted" and competing == "adjusted_as_composite": + adjusted = reference_group_data.with_columns( + pl.when(pl.col("reals") == 2) + .then(pl.lit(1)) + .otherwise(pl.col("reals")) + .alias("reals") + ) + return adjusted.group_by("strata").map_groups( + lambda group: extract_aj_estimate_for_strata(group, horizons) + ) + + if censoring == "excluded" and competing == "adjusted_as_composite": + non_censored = exploded.filter( + (pl.col("times") >= pl.col("fixed_time_horizon")) | (pl.col("reals") > 0) + ).with_columns( + pl.when(pl.col("reals") == 2) + .then(pl.lit(1)) + .otherwise(pl.col("reals")) + .alias("reals") + ) + return _aj_estimates_per_horizon(non_censored, horizons) + + if censoring == "adjusted" and competing == "excluded": + non_competing = exploded.filter( + (pl.col("times") >= pl.col("fixed_time_horizon")) | (pl.col("reals") != 2) + ).with_columns( + pl.when(pl.col("reals") == 2) + .then(pl.lit(0)) + .otherwise(pl.col("reals")) + .alias("reals") + ) + return _aj_estimates_per_horizon(non_competing, horizons).select( + pl.exclude("real_competing_est") + ) + + # censoring == "excluded" and competing == "excluded" + non_censored_non_competing = exploded.filter( + (pl.col("times") >= pl.col("fixed_time_horizon")) | (pl.col("reals") == 1) + ) + return _aj_estimates_per_horizon(non_censored_non_competing, horizons) From 7c18199bb9eae617e81ca6b8598382ade78b4dc8 Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Thu, 31 Jul 2025 12:46:15 +0300 Subject: [PATCH 24/51] fix: close #140 --- .../helpers/sandbox_observable_helpers.py | 28 +++++++++++++------ tests/test_rtichoke.py | 20 ++++++------- 2 files changed, 30 insertions(+), 18 deletions(-) diff --git a/src/rtichoke/helpers/sandbox_observable_helpers.py b/src/rtichoke/helpers/sandbox_observable_helpers.py index 13e66af..548f914 100644 --- a/src/rtichoke/helpers/sandbox_observable_helpers.py +++ b/src/rtichoke/helpers/sandbox_observable_helpers.py @@ -452,10 +452,16 @@ def aj_estimates_with_cross(df, extra_cols): exploded = assign_and_explode_polars(reference_group_data, fixed_time_horizons) + print("exploded") + print(exploded) + excluded_df = _excluded_events_df( exploded, censoring_assumption, competing_assumption ) + print("excluded_df") + print(excluded_df) + aj_df = _aj_adjusted_events( reference_group_data, exploded, @@ -464,6 +470,9 @@ def aj_estimates_with_cross(df, extra_cols): fixed_time_horizons, ) + print("aj_df") + print(aj_df) + result = aj_df.join(excluded_df, on=["strata", "fixed_time_horizon"], how="left") return aj_estimates_with_cross( @@ -1054,7 +1063,7 @@ def cast_and_join_adjusted_data(aj_data_combinations, aj_estimates_data): def _censored_count(df: pl.DataFrame) -> pl.DataFrame: return ( df.with_columns( - ((pl.col("times") < pl.col("fixed_time_horizon")) & (pl.col("reals") == 0)) + ((pl.col("times") <= pl.col("fixed_time_horizon")) & (pl.col("reals") == 0)) .cast(pl.Float64) .alias("is_censored") ) @@ -1066,7 +1075,7 @@ def _censored_count(df: pl.DataFrame) -> pl.DataFrame: def _competing_count(df: pl.DataFrame) -> pl.DataFrame: return ( df.with_columns( - ((pl.col("times") < pl.col("fixed_time_horizon")) & (pl.col("reals") == 2)) + ((pl.col("times") <= pl.col("fixed_time_horizon")) & (pl.col("reals") == 2)) .cast(pl.Float64) .alias("is_competing") ) @@ -1122,7 +1131,7 @@ def _aj_adjusted_events( if censoring == "excluded" and competing == "adjusted_as_negative": non_censored = exploded.filter( - (pl.col("times") >= pl.col("fixed_time_horizon")) | (pl.col("reals") > 0) + (pl.col("times") > pl.col("fixed_time_horizon")) | (pl.col("reals") > 0) ) return _aj_estimates_per_horizon(non_censored, horizons) @@ -1139,7 +1148,7 @@ def _aj_adjusted_events( if censoring == "excluded" and competing == "adjusted_as_censored": non_censored = exploded.filter( - (pl.col("times") >= pl.col("fixed_time_horizon")) | (pl.col("reals") > 0) + (pl.col("times") > pl.col("fixed_time_horizon")) | (pl.col("reals") > 0) ).with_columns( pl.when(pl.col("reals") == 2) .then(pl.lit(0)) @@ -1161,7 +1170,7 @@ def _aj_adjusted_events( if censoring == "excluded" and competing == "adjusted_as_composite": non_censored = exploded.filter( - (pl.col("times") >= pl.col("fixed_time_horizon")) | (pl.col("reals") > 0) + (pl.col("times") > pl.col("fixed_time_horizon")) | (pl.col("reals") > 0) ).with_columns( pl.when(pl.col("reals") == 2) .then(pl.lit(1)) @@ -1172,7 +1181,7 @@ def _aj_adjusted_events( if censoring == "adjusted" and competing == "excluded": non_competing = exploded.filter( - (pl.col("times") >= pl.col("fixed_time_horizon")) | (pl.col("reals") != 2) + (pl.col("times") > pl.col("fixed_time_horizon")) | (pl.col("reals") != 2) ).with_columns( pl.when(pl.col("reals") == 2) .then(pl.lit(0)) @@ -1185,6 +1194,9 @@ def _aj_adjusted_events( # censoring == "excluded" and competing == "excluded" non_censored_non_competing = exploded.filter( - (pl.col("times") >= pl.col("fixed_time_horizon")) | (pl.col("reals") == 1) + (pl.col("times") > pl.col("fixed_time_horizon")) | (pl.col("reals") == 1) + ) + + return _aj_estimates_per_horizon(non_censored_non_competing, horizons).drop( + "real_competing_est" ) - return _aj_estimates_per_horizon(non_censored_non_competing, horizons) diff --git a/tests/test_rtichoke.py b/tests/test_rtichoke.py index 6b3f865..9c7c2a8 100644 --- a/tests/test_rtichoke.py +++ b/tests/test_rtichoke.py @@ -54,10 +54,10 @@ def _expected( "excluded", "adjusted_as_negative", _expected( - [4.0, 4.0, 2.0], + [4.0, 3.0, 2.0], [0.0, 0.0, 1.0], [1.0, 1.0, 1.0], - [0.0, 0.0, 1.0], + [0.0, 1.0, 1.0], "excluded", "adjusted_as_negative", ), @@ -78,10 +78,10 @@ def _expected( "excluded", "adjusted_as_censored", _expected( - [5.0, 5.0, 8 / 3], + [5.0, 4.0, 8 / 3], [0.0, 0.0, 4 / 3], [0.0, 0.0, 0.0], - [0.0, 0.0, 1.0], + [0.0, 1.0, 1.0], "excluded", "adjusted_as_censored", ), @@ -102,10 +102,10 @@ def _expected( "excluded", "adjusted_as_composite", _expected( - [4.0, 4.0, 2.0], + [4.0, 3.0, 2.0], [1.0, 1.0, 2.0], [0.0, 0.0, 0.0], - [0.0, 0.0, 1.0], + [0.0, 1.0, 1.0], "excluded", "adjusted_as_composite", ), @@ -114,9 +114,9 @@ def _expected( "adjusted", "excluded", _expected( - [5.0, 4.0, 8 / 3], + [4.0, 4.0, 8 / 3], [0.0, 0.0, 4 / 3], - [0.0, 1.0, 1.0], + [1.0, 1.0, 1.0], [0.0, 0.0, 0.0], "adjusted", "excluded", @@ -126,10 +126,10 @@ def _expected( "excluded", "excluded", _expected( - [4.0, 4.0, 2.0], + [4.0, 3.0, 2.0], [0.0, 0.0, 1.0], + [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], - [0.0, 0.0, 1.0], "excluded", "excluded", ), From 16352b324bd4a36cb999e1a43dbcab3a62d216d4 Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Thu, 31 Jul 2025 15:11:00 +0300 Subject: [PATCH 25/51] feature: close #114 --- .../helpers/sandbox_observable_helpers.py | 52 +++++++++++++------ tests/test_rtichoke.py | 13 ++++- 2 files changed, 47 insertions(+), 18 deletions(-) diff --git a/src/rtichoke/helpers/sandbox_observable_helpers.py b/src/rtichoke/helpers/sandbox_observable_helpers.py index 548f914..3b89fe4 100644 --- a/src/rtichoke/helpers/sandbox_observable_helpers.py +++ b/src/rtichoke/helpers/sandbox_observable_helpers.py @@ -442,6 +442,7 @@ def create_aj_data( censoring_assumption, competing_assumption, fixed_time_horizons, + full_event_table: bool = False, ): """ Create AJ estimates per strata based on censoring and competing assumptions. @@ -468,6 +469,7 @@ def aj_estimates_with_cross(df, extra_cols): censoring_assumption, competing_assumption, fixed_time_horizons, + full_event_table, ) print("aj_df") @@ -475,6 +477,9 @@ def aj_estimates_with_cross(df, extra_cols): result = aj_df.join(excluded_df, on=["strata", "fixed_time_horizon"], how="left") + print("result") + print(result) + return aj_estimates_with_cross( result, { @@ -491,6 +496,7 @@ def aj_estimates_with_cross(df, extra_cols): "real_censored_est", "censoring_assumption", "competing_assumption", + "estimate_origin", ] ) @@ -826,13 +832,13 @@ def extract_crude_estimate_polars(data: pl.DataFrame) -> pl.DataFrame: # return result_pandas -def extract_aj_estimate_for_strata(data_to_adjust, horizons): +def extract_aj_estimate_for_strata(data_to_adjust, horizons, full_event_table: bool): n = data_to_adjust.height event_table = prepare_event_table(data_to_adjust) aj_estimate_for_strata_polars = predict_aj_estimates( - event_table, pl.Series(horizons) + event_table, pl.Series(horizons), full_event_table ) aj_estimate_for_strata_polars = aj_estimate_for_strata_polars.rename( @@ -854,6 +860,7 @@ def extract_aj_estimate_for_strata(data_to_adjust, horizons): "real_negatives_est", "real_positives_est", "real_competing_est", + pl.col("estimate_origin"), ] ) @@ -1084,12 +1091,18 @@ def _competing_count(df: pl.DataFrame) -> pl.DataFrame: ) -def _aj_estimates_per_horizon(df: pl.DataFrame, horizons: list[float]) -> pl.DataFrame: +def _aj_estimates_per_horizon( + df: pl.DataFrame, horizons: list[float], full_event_table: bool +) -> pl.DataFrame: return pl.concat( [ df.filter(pl.col("fixed_time_horizon") == h) .group_by("strata") - .map_groups(lambda group: extract_aj_estimate_for_strata(group, [h])) + .map_groups( + lambda group: extract_aj_estimate_for_strata( + group, [h], full_event_table + ) + ) for h in horizons ], how="vertical", @@ -1123,17 +1136,20 @@ def _aj_adjusted_events( censoring: str, competing: str, horizons: list[float], + full_event_table: bool = False, ) -> pl.DataFrame: if censoring == "adjusted" and competing == "adjusted_as_negative": return reference_group_data.group_by("strata").map_groups( - lambda group: extract_aj_estimate_for_strata(group, horizons) + lambda group: extract_aj_estimate_for_strata( + group, horizons, full_event_table + ) ) if censoring == "excluded" and competing == "adjusted_as_negative": non_censored = exploded.filter( (pl.col("times") > pl.col("fixed_time_horizon")) | (pl.col("reals") > 0) ) - return _aj_estimates_per_horizon(non_censored, horizons) + return _aj_estimates_per_horizon(non_censored, horizons, full_event_table) if censoring == "adjusted" and competing == "adjusted_as_censored": adjusted = reference_group_data.with_columns( @@ -1143,7 +1159,9 @@ def _aj_adjusted_events( .alias("reals") ) return adjusted.group_by("strata").map_groups( - lambda group: extract_aj_estimate_for_strata(group, horizons) + lambda group: extract_aj_estimate_for_strata( + group, horizons, full_event_table + ) ) if censoring == "excluded" and competing == "adjusted_as_censored": @@ -1155,7 +1173,7 @@ def _aj_adjusted_events( .otherwise(pl.col("reals")) .alias("reals") ) - return _aj_estimates_per_horizon(non_censored, horizons) + return _aj_estimates_per_horizon(non_censored, horizons, full_event_table) if censoring == "adjusted" and competing == "adjusted_as_composite": adjusted = reference_group_data.with_columns( @@ -1165,7 +1183,9 @@ def _aj_adjusted_events( .alias("reals") ) return adjusted.group_by("strata").map_groups( - lambda group: extract_aj_estimate_for_strata(group, horizons) + lambda group: extract_aj_estimate_for_strata( + group, horizons, full_event_table + ) ) if censoring == "excluded" and competing == "adjusted_as_composite": @@ -1177,7 +1197,7 @@ def _aj_adjusted_events( .otherwise(pl.col("reals")) .alias("reals") ) - return _aj_estimates_per_horizon(non_censored, horizons) + return _aj_estimates_per_horizon(non_censored, horizons, full_event_table) if censoring == "adjusted" and competing == "excluded": non_competing = exploded.filter( @@ -1188,15 +1208,15 @@ def _aj_adjusted_events( .otherwise(pl.col("reals")) .alias("reals") ) - return _aj_estimates_per_horizon(non_competing, horizons).select( - pl.exclude("real_competing_est") - ) + return _aj_estimates_per_horizon( + non_competing, horizons, full_event_table + ).select(pl.exclude("real_competing_est")) # censoring == "excluded" and competing == "excluded" non_censored_non_competing = exploded.filter( (pl.col("times") > pl.col("fixed_time_horizon")) | (pl.col("reals") == 1) ) - return _aj_estimates_per_horizon(non_censored_non_competing, horizons).drop( - "real_competing_est" - ) + return _aj_estimates_per_horizon( + non_censored_non_competing, horizons, full_event_table + ).drop("real_competing_est") diff --git a/tests/test_rtichoke.py b/tests/test_rtichoke.py index 9c7c2a8..710b3ed 100644 --- a/tests/test_rtichoke.py +++ b/tests/test_rtichoke.py @@ -21,6 +21,7 @@ def _expected( censoring: str, competing_assump: str, ) -> pl.DataFrame: + estimate_origin_enum = pl.Enum(["fixed_time_horizons", "event_table"]) return pl.DataFrame( { "strata": ["group1", "group1", "group1"], @@ -31,6 +32,9 @@ def _expected( "real_censored_est": censored, "censoring_assumption": [censoring] * 3, "competing_assumption": [competing_assump] * 3, + "estimate_origin": pl.Series( + ["fixed_time_horizons"] * 3, dtype=estimate_origin_enum + ), } ) @@ -169,8 +173,10 @@ def test_extract_aj_estimate_for_strata_basic() -> None: } ) horizons = [1.0, 2.0, 3.0] - - result = extract_aj_estimate_for_strata(df, horizons).sort("fixed_time_horizon") + estimate_origin_enum = pl.Enum(["fixed_time_horizons", "event_table"]) + result = extract_aj_estimate_for_strata(df, horizons, full_event_table=False).sort( + "fixed_time_horizon" + ) expected = pl.DataFrame( { @@ -179,6 +185,9 @@ def test_extract_aj_estimate_for_strata_basic() -> None: "real_negatives_est": [4.0, 4.0, 8 / 3], "real_positives_est": [0.0, 0.0, 4 / 3], "real_competing_est": [1.0, 1.0, 1.0], + "estimate_origin": pl.Series( + ["fixed_time_horizons"] * 3, dtype=estimate_origin_enum + ), } ) From 86a472446b615be36ee3f30620a5ebb425e7a552 Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Sun, 3 Aug 2025 07:08:05 +0300 Subject: [PATCH 26/51] feature: close #145 --- .../helpers/sandbox_observable_helpers.py | 338 ++---------------- 1 file changed, 39 insertions(+), 299 deletions(-) diff --git a/src/rtichoke/helpers/sandbox_observable_helpers.py b/src/rtichoke/helpers/sandbox_observable_helpers.py index 3b89fe4..71ad4e8 100644 --- a/src/rtichoke/helpers/sandbox_observable_helpers.py +++ b/src/rtichoke/helpers/sandbox_observable_helpers.py @@ -451,6 +451,9 @@ def create_aj_data( def aj_estimates_with_cross(df, extra_cols): return df.join(pl.DataFrame(extra_cols), how="cross") + print("reference_group_data") + print(reference_group_data) + exploded = assign_and_explode_polars(reference_group_data, fixed_time_horizons) print("exploded") @@ -473,12 +476,12 @@ def aj_estimates_with_cross(df, extra_cols): ) print("aj_df") - print(aj_df) + print(aj_df.sort(pl.col("fixed_time_horizon"))) result = aj_df.join(excluded_df, on=["strata", "fixed_time_horizon"], how="left") print("result") - print(result) + print(result.sort(pl.col("fixed_time_horizon"))) return aj_estimates_with_cross( result, @@ -490,6 +493,7 @@ def aj_estimates_with_cross(df, extra_cols): [ "strata", "fixed_time_horizon", + "times", "real_negatives_est", "real_positives_est", "real_competing_est", @@ -501,299 +505,6 @@ def aj_estimates_with_cross(df, extra_cols): ) -# def create_aj_data( -# reference_group_data, -# censoring_assumption, -# competing_assumption, -# fixed_time_horizons, -# ): -# """ -# Create AJ estimates per strata based on censoring and competing assumptions. -# """ - -# def aj_estimates_with_cross(df, extra_cols): -# return df.join( -# pl.DataFrame(extra_cols), -# how="cross", -# ) - -# def explode_data(df): -# return df.with_columns(fixed_time_horizon=pl.lit(fixed_time_horizons)).explode( -# "fixed_time_horizon" -# ) - -# def censored_count(df): -# censored_count = ( -# df.with_columns( -# ( -# (pl.col("times") < pl.col("fixed_time_horizon")) -# & (pl.col("reals") == 0) -# ) -# .cast(pl.Float64) -# .alias("is_censored") -# ) -# .group_by(["strata", "fixed_time_horizon"]) -# .agg(pl.col("is_censored").sum().alias("real_censored_est")) -# ) - -# return censored_count - -# def competing_count(df): -# competing_count = ( -# df.with_columns( -# ( -# (pl.col("times") < pl.col("fixed_time_horizon")) -# & (pl.col("reals") == 2) -# ) -# .cast(pl.Float64) -# .alias("is_competing") -# ) -# .group_by(["strata", "fixed_time_horizon"]) -# .agg(pl.col("is_competing").sum().alias("real_competing_est")) -# ) - -# return competing_count - -# def aj_estimates_per_horizon(df, horizons): -# return pl.concat( -# [ -# df.filter(pl.col("fixed_time_horizon") == h) -# .group_by("strata") -# .map_groups(lambda group: extract_aj_estimate_for_strata(group, [h])) -# for h in horizons -# ], -# how="vertical", -# ) - -# if ( -# censoring_assumption == "adjusted" -# and competing_assumption == "adjusted_as_negative" -# ): -# aj_df = reference_group_data.group_by("strata").map_groups( -# lambda group: extract_aj_estimate_for_strata(group, fixed_time_horizons) -# ) -# return aj_estimates_with_cross( -# aj_df, -# { -# "real_censored_est": 0.0, -# "censoring_assumption": "adjusted", -# "competing_assumption": "adjusted_as_negative", -# }, -# ) - -# if ( -# censoring_assumption == "excluded" -# and competing_assumption == "adjusted_as_negative" -# ): -# exploded = explode_data(reference_group_data) -# censored = censored_count(exploded) -# non_censored = exploded.filter( -# (pl.col("times") >= pl.col("fixed_time_horizon")) | (pl.col("reals") > 0) -# ) -# aj_df = aj_estimates_per_horizon(non_censored, fixed_time_horizons) - -# return aj_estimates_with_cross( -# aj_df.join(censored, on=["strata", "fixed_time_horizon"]), -# { -# "censoring_assumption": "excluded", -# "competing_assumption": "adjusted_as_negative", -# }, -# ) - -# if ( -# censoring_assumption == "adjusted" -# and competing_assumption == "adjusted_as_censored" -# ): -# adjusted = reference_group_data.with_columns( -# [ -# pl.when(pl.col("reals") == 2) -# .then(pl.lit(0)) -# .otherwise(pl.col("reals")) -# .alias("reals") -# ] -# ) -# aj_df = adjusted.group_by("strata").map_groups( -# lambda group: extract_aj_estimate_for_strata(group, fixed_time_horizons) -# ) -# return aj_estimates_with_cross( -# aj_df, -# { -# "real_censored_est": 0.0, -# "censoring_assumption": "adjusted", -# "competing_assumption": "adjusted_as_censored", -# }, -# ) - -# if ( -# censoring_assumption == "excluded" -# and competing_assumption == "adjusted_as_censored" -# ): -# exploded = explode_data(reference_group_data) -# censored = censored_count(exploded) -# non_censored = exploded.filter( -# (pl.col("times") >= pl.col("fixed_time_horizon")) | (pl.col("reals") > 0) -# ).with_columns( -# [ -# pl.when(pl.col("reals") == 2) -# .then(pl.lit(0)) -# .otherwise(pl.col("reals")) -# .alias("reals") -# ] -# ) -# aj_df = aj_estimates_per_horizon(non_censored, fixed_time_horizons) -# return aj_estimates_with_cross( -# aj_df.join(censored, on=["strata", "fixed_time_horizon"]), -# { -# "censoring_assumption": "excluded", -# "competing_assumption": "adjusted_as_censored", -# }, -# ) - -# if ( -# censoring_assumption == "adjusted" -# and competing_assumption == "adjusted_as_composite" -# ): -# adjusted = reference_group_data.with_columns( -# [ -# pl.when(pl.col("reals") == 2) -# .then(pl.lit(1)) -# .otherwise(pl.col("reals")) -# .alias("reals") -# ] -# ) -# aj_df = adjusted.group_by("strata").map_groups( -# lambda group: extract_aj_estimate_for_strata(group, fixed_time_horizons) -# ) -# return aj_estimates_with_cross( -# aj_df, -# { -# "real_censored_est": 0.0, -# "censoring_assumption": "adjusted", -# "competing_assumption": "adjusted_as_composite", -# }, -# ) - -# if ( -# censoring_assumption == "excluded" -# and competing_assumption == "adjusted_as_composite" -# ): -# exploded = explode_data(reference_group_data) -# censored = censored_count(exploded) -# non_censored = exploded.filter( -# (pl.col("times") >= pl.col("fixed_time_horizon")) | (pl.col("reals") > 0) -# ).with_columns( -# [ -# pl.when(pl.col("reals") == 2) -# .then(pl.lit(1)) -# .otherwise(pl.col("reals")) -# .alias("reals") -# ] -# ) -# aj_df = aj_estimates_per_horizon(non_censored, fixed_time_horizons) -# return aj_estimates_with_cross( -# aj_df.join(censored, on=["strata", "fixed_time_horizon"]), -# { -# "censoring_assumption": "excluded", -# "competing_assumption": "adjusted_as_composite", -# }, -# ) - -# if censoring_assumption == "adjusted" and competing_assumption == "excluded": -# exploded = explode_data(reference_group_data) -# competing = competing_count(exploded) - -# print(competing) - -# non_competing = exploded.filter( -# (pl.col("times") >= pl.col("fixed_time_horizon")) | (pl.col("reals") != 2) -# ).with_columns( -# [ -# pl.when(pl.col("reals") == 2) -# .then(pl.lit(0)) -# .otherwise(pl.col("reals")) -# .alias("reals") -# ] -# ) -# aj_df = aj_estimates_per_horizon(non_competing, fixed_time_horizons).select( -# pl.exclude("real_competing_est") -# ) -# result = competing.join(aj_df, on=["strata", "fixed_time_horizon"]) - -# print( -# aj_estimates_with_cross( -# result, -# { -# "real_censored_est": 0.0, -# "censoring_assumption": "adjusted", -# "competing_assumption": "excluded", -# }, -# ).select( -# [ -# "strata", -# "fixed_time_horizon", -# "real_negatives_est", -# "real_positives_est", -# "real_competing_est", -# "real_censored_est", -# "censoring_assumption", -# "competing_assumption", -# ] -# ) -# ) - -# return aj_estimates_with_cross( -# result, -# { -# "real_censored_est": 0.0, -# "censoring_assumption": "adjusted", -# "competing_assumption": "excluded", -# }, -# ).select( -# [ -# "strata", -# "fixed_time_horizon", -# "real_negatives_est", -# "real_positives_est", -# "real_competing_est", -# "real_censored_est", -# "censoring_assumption", -# "competing_assumption", -# ] -# ) - -# if censoring_assumption == "excluded" and competing_assumption == "excluded": -# exploded = explode_data(reference_group_data) -# censored = censored_count(exploded) -# competing = competing_count(exploded) -# non_censored_non_competing = exploded.filter( -# (pl.col("times") >= pl.col("fixed_time_horizon")) | (pl.col("reals") == 1) -# ) -# aj_df = aj_estimates_per_horizon( -# non_censored_non_competing, fixed_time_horizons -# ) -# result = competing.join(censored, on=["strata", "fixed_time_horizon"]).join( -# aj_df, on=["strata", "fixed_time_horizon"] -# ) -# return aj_estimates_with_cross( -# result, -# { -# "censoring_assumption": "excluded", -# "competing_assumption": "excluded", -# }, -# ).select( -# [ -# "strata", -# "fixed_time_horizon", -# "real_negatives_est", -# "real_positives_est", -# "real_competing_est", -# "real_censored_est", -# "censoring_assumption", -# "competing_assumption", -# ] -# ) - - def extract_crude_estimate_polars(data: pl.DataFrame) -> pl.DataFrame: all_combinations = data.select(["strata", "reals", "fixed_time_horizon"]).unique() @@ -841,9 +552,36 @@ def extract_aj_estimate_for_strata(data_to_adjust, horizons, full_event_table: b event_table, pl.Series(horizons), full_event_table ) - aj_estimate_for_strata_polars = aj_estimate_for_strata_polars.rename( - {"times": "fixed_time_horizon"} - ) + print("horizons") + print(horizons) + + print("len(horizons)") + print(len(horizons)) + + if len(horizons) == 1: + aj_estimate_for_strata_polars = aj_estimate_for_strata_polars.with_columns( + pl.lit(horizons[0]).alias("fixed_time_horizon") + ) + + else: + fixed_df = aj_estimate_for_strata_polars.filter( + pl.col("estimate_origin") == "fixed_time_horizons" + ).with_columns([pl.col("times").alias("fixed_time_horizon")]) + + event_df = ( + aj_estimate_for_strata_polars.filter( + pl.col("estimate_origin") == "event_table" + ) + .with_columns([pl.lit(horizons).alias("fixed_time_horizon")]) + .explode("fixed_time_horizon") + ) + + aj_estimate_for_strata_polars = pl.concat( + [fixed_df, event_df], how="vertical" + ).sort("estimate_origin", "fixed_time_horizon", "times") + + print("aj_estimate_for_strata_polars") + print(aj_estimate_for_strata_polars) return aj_estimate_for_strata_polars.with_columns( [ @@ -856,6 +594,7 @@ def extract_aj_estimate_for_strata(data_to_adjust, horizons, full_event_table: b ).select( [ "strata", + "times", "fixed_time_horizon", "real_negatives_est", "real_positives_est", @@ -1123,11 +862,12 @@ def _excluded_events_df( if competing == "excluded" else base.with_columns(pl.lit(0.0).alias("real_competing_est")) ) + return ( base.join(censored, on=["strata", "fixed_time_horizon"], how="left") .join(competing_df, on=["strata", "fixed_time_horizon"], how="left") .fill_null(0.0) - ) + ).with_columns([pl.col("fixed_time_horizon").alias("times")]) def _aj_adjusted_events( From ce6592ab8269bc376d00758a651d76a103166441 Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Sun, 3 Aug 2025 07:10:46 +0300 Subject: [PATCH 27/51] tests: close #144 --- tests/test_rtichoke.py | 159 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 159 insertions(+) diff --git a/tests/test_rtichoke.py b/tests/test_rtichoke.py index 710b3ed..f38ee00 100644 --- a/tests/test_rtichoke.py +++ b/tests/test_rtichoke.py @@ -5,6 +5,9 @@ from rtichoke.helpers.sandbox_observable_helpers import ( create_aj_data, extract_aj_estimate_for_strata, + assign_and_explode_polars, + _aj_adjusted_events, + _excluded_events_df, ) # from rtichoke import rtichoke @@ -12,6 +15,10 @@ from polars.testing import assert_frame_equal import pytest +TIMES = [24.1, 9.7, 49.9, 18.6, 34.8, 14.2, 39.2, 46.0, 4.3, 31.5] +REALS = [1, 1, 1, 1, 0, 2, 1, 2, 0, 1] +TIME_HORIZONS = [10.0, 30.0, 50.0] + def _expected( negatives: list[float], @@ -192,3 +199,155 @@ def test_extract_aj_estimate_for_strata_basic() -> None: ) assert_frame_equal(result, expected) + + +AJ_EXPECTED = { + ("adjusted", "adjusted_as_negative"): [ + (8.88888888888889, 1.1111111111111112, 0.0), + (5.555555555555555, 3.3333333333333335, 1.1111111111111112), + (0.0, 7.407407407407407, 2.5925925925925926), + ], + ("adjusted", "adjusted_as_censored"): [ + (8.88888888888889, 1.1111111111111112, 0.0), + (6.349206349206349, 3.6507936507936507, 0.0), + (0.0, 10.0, 0.0), + ], + ("adjusted", "adjusted_as_composite"): [ + (8.88888888888889, 1.1111111111111112, 0.0), + (5.555555555555555, 4.444444444444445, 0.0), + (0.0, 10.0, 0.0), + ], + ("adjusted", "excluded"): [ + (8.88888888888889, 1.1111111111111112, 0.0), + (5.625, 3.375, 0.0), + (0.0, 8.0, 0.0), + ], + ("excluded", "adjusted_as_negative"): [ + (8.0, 1.0, 0.0), + (5.0, 3.0, 1.0), + (0.0, 6.0, 2.0), + ], + ("excluded", "adjusted_as_censored"): [ + (8.0, 1.0, 0.0), + (5.714285714285714, 3.2857142857142856, 0.0), + (0.0, 8.0, 0.0), + ], + ("excluded", "adjusted_as_composite"): [ + (8.0, 1.0, 0.0), + (5.0, 4.0, 0.0), + (0.0, 8.0, 0.0), + ], + ("excluded", "excluded"): [ + (8.0, 1.0, 0.0), + (5.0, 3.0, 0.0), + (0.0, 6.0, 0.0), + ], +} + +EXCLUDED_EXPECTED = { + "adjusted": [0.0, 0.0, 0.0], + "excluded": [1.0, 1.0, 2.0], +} + +COMPETING_EXCLUDED = { + "excluded": [0.0, 1.0, 2.0], + "adjusted_as_negative": [0.0, 0.0, 0.0], + "adjusted_as_censored": [0.0, 0.0, 0.0], + "adjusted_as_composite": [0.0, 0.0, 0.0], +} + + +def _expected_aj_df(neg, pos, comp, include_comp=True): + estimate_origin_enum = pl.Enum(["fixed_time_horizons", "event_table"]) + + data = { + "strata": ["group1"] * 3, + "fixed_time_horizon": TIME_HORIZONS, + "real_negatives_est": [neg[0], neg[1], neg[2]], + "real_positives_est": [pos[0], pos[1], pos[2]], + "estimate_origin": pl.Series( + ["fixed_time_horizons"] * 3, dtype=estimate_origin_enum + ), + } + if include_comp: + data["real_competing_est"] = [comp[0], comp[1], comp[2]] + + cols = [ + "strata", + "fixed_time_horizon", + "real_negatives_est", + "real_positives_est", + ] + if include_comp: + cols.append("real_competing_est") + cols.append("estimate_origin") + + return pl.DataFrame(data)[cols] + + +def _expected_excluded_df(censoring, competing): + return pl.DataFrame( + { + "strata": ["group1"] * 3, + "fixed_time_horizon": TIME_HORIZONS, + "real_censored_est": EXCLUDED_EXPECTED[censoring], + "real_competing_est": COMPETING_EXCLUDED[competing], + } + ) + + +@pytest.mark.parametrize( + "censoring, competing", + [ + (c, cc) + for c in ["adjusted", "excluded"] + for cc in [ + "adjusted_as_negative", + "adjusted_as_censored", + "adjusted_as_composite", + "excluded", + ] + ], +) +def test_aj_adjusted_events(censoring: str, competing: str) -> None: + df = pl.DataFrame( + {"strata": ["group1"] * len(TIMES), "reals": REALS, "times": TIMES} + ) + exploded = assign_and_explode_polars(df, TIME_HORIZONS) + result = _aj_adjusted_events( + df, exploded, censoring, competing, TIME_HORIZONS, full_event_table=False + ).sort("fixed_time_horizon") + + neg = [v[0] for v in AJ_EXPECTED[(censoring, competing)]] + pos = [v[1] for v in AJ_EXPECTED[(censoring, competing)]] + comp_vals = [v[2] for v in AJ_EXPECTED[(censoring, competing)]] + include_comp = competing != "excluded" + expected = _expected_aj_df(neg, pos, comp_vals, include_comp) + assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "censoring, competing", + [ + (c, cc) + for c in ["adjusted", "excluded"] + for cc in [ + "adjusted_as_negative", + "adjusted_as_censored", + "adjusted_as_composite", + "excluded", + ] + ], +) +def test_excluded_events_df(censoring: str, competing: str) -> None: + df = pl.DataFrame( + {"strata": ["group1"] * len(TIMES), "reals": REALS, "times": TIMES} + ) + exploded = assign_and_explode_polars(df, TIME_HORIZONS) + + result = _excluded_events_df(exploded, censoring, competing).sort( + "fixed_time_horizon" + ) + + expected = _expected_excluded_df(censoring, competing) + assert_frame_equal(result, expected) From 3e5e68c6a1416eea6110acf6b2136d631fe2c07d Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Sun, 3 Aug 2025 07:30:15 +0300 Subject: [PATCH 28/51] tests: make test_rtichoke run --- tests/test_rtichoke.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/test_rtichoke.py b/tests/test_rtichoke.py index f38ee00..af78825 100644 --- a/tests/test_rtichoke.py +++ b/tests/test_rtichoke.py @@ -33,6 +33,7 @@ def _expected( { "strata": ["group1", "group1", "group1"], "fixed_time_horizon": [1.0, 2.0, 3.0], + "times": [1.0, 2.0, 3.0], "real_negatives_est": negatives, "real_positives_est": positives, "real_competing_est": competing, @@ -188,6 +189,7 @@ def test_extract_aj_estimate_for_strata_basic() -> None: expected = pl.DataFrame( { "strata": ["group1", "group1", "group1"], + "times": [1.0, 2.0, 3.0], "fixed_time_horizon": [1.0, 2.0, 3.0], "real_negatives_est": [4.0, 4.0, 8 / 3], "real_positives_est": [0.0, 0.0, 4 / 3], @@ -262,6 +264,7 @@ def _expected_aj_df(neg, pos, comp, include_comp=True): data = { "strata": ["group1"] * 3, + "times": TIME_HORIZONS, "fixed_time_horizon": TIME_HORIZONS, "real_negatives_est": [neg[0], neg[1], neg[2]], "real_positives_est": [pos[0], pos[1], pos[2]], @@ -274,6 +277,7 @@ def _expected_aj_df(neg, pos, comp, include_comp=True): cols = [ "strata", + "times", "fixed_time_horizon", "real_negatives_est", "real_positives_est", @@ -292,6 +296,7 @@ def _expected_excluded_df(censoring, competing): "fixed_time_horizon": TIME_HORIZONS, "real_censored_est": EXCLUDED_EXPECTED[censoring], "real_competing_est": COMPETING_EXCLUDED[competing], + "times": TIME_HORIZONS, } ) From 7d2a075bc6689f013530fa2b53d5a1f84106756d Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Sun, 3 Aug 2025 10:32:25 +0300 Subject: [PATCH 29/51] docs: close #143 --- docs/before_we_validate.qmd | 188 +++++++++++++++++++++++++++++++++--- 1 file changed, 175 insertions(+), 13 deletions(-) diff --git a/docs/before_we_validate.qmd b/docs/before_we_validate.qmd index 1fae6d2..101a818 100644 --- a/docs/before_we_validate.qmd +++ b/docs/before_we_validate.qmd @@ -587,7 +587,7 @@ from rtichoke.helpers.sandbox_observable_helpers import ( create_adjusted_data ) -from polarstate import prepare_event_table +from polarstate import prepare_event_table, predict_aj_estimates times_and_reals = pl.DataFrame({ "times": times, @@ -596,32 +596,132 @@ times_and_reals = pl.DataFrame({ event_table = prepare_event_table(times_and_reals) -# event_table_data = event_table.to_dicts() - -long_df = event_table.select( +long_df = predict_aj_estimates( + event_table, + [10.0, 30.0, 50.0], + True +).select( pl.col("times"), - pl.col("overall_survival"), - pl.col("state_occupancy_probability_1_at_times"), - pl.col("state_occupancy_probability_2_at_times") + pl.col("state_occupancy_probability_0"), + pl.col("state_occupancy_probability_1"), + pl.col("state_occupancy_probability_2"), + pl.col("estimate_origin") ).unpivot( - index = "times", + index = ["times", "estimate_origin"], variable_name="state", value_name="aj_estimate" ).to_dicts() +# event_table_data = event_table.to_dicts() + +# long_df = event_table.select( +# pl.col("times"), +# pl.col("overall_survival"), +# pl.col("state_occupancy_probability_1_at_times"), +# pl.col("state_occupancy_probability_2_at_times") +# ).unpivot( +# index = ["times", "estimate_origin"], +# variable_name="state", +# value_name="aj_estimate" +# ).to_dicts() + ojs_define(long_df = long_df) ``` +```{python} +from rtichoke.helpers.sandbox_observable_helpers import ( + assign_and_explode_polars, _excluded_events_df, _aj_adjusted_events) + +censoring_assumption = "excluded" +competing_assumption = "adjusted_as_negative" +fixed_time_horizons = [10.0, 30.0, 50.0] + +reference_group_data = pl.DataFrame( + { + "strata": ["model"] * 10, + "reals": reals, + "times": times, + } +) + +exploded = assign_and_explode_polars(reference_group_data, fixed_time_horizons) + +long_excluded_df = _excluded_events_df( + exploded, censoring_assumption, competing_assumption +).select( + pl.col("fixed_time_horizon"), + pl.col("real_censored_est"), + pl.col("real_competing_est") +).unpivot( + index = ["fixed_time_horizon"], + variable_name="state", + value_name="excluded_count" +).to_dicts() + +long_adjusted_df = _aj_adjusted_events( + reference_group_data, + exploded, + censoring_assumption, + competing_assumption, + fixed_time_horizons, + True, +).select( + pl.col("fixed_time_horizon"), + pl.col("real_negatives_est"), + pl.col("real_positives_est"), + pl.col("real_competing_est"), + pl.col("estimate_origin"), + pl.col("times") +).unpivot( + index = ["fixed_time_horizon", "times", "estimate_origin"], + variable_name="state", + value_name="aj_estimate" +).sort(pl.col("fixed_time_horizon")).to_dicts() + + + +ojs_define( + long_excluded_df = long_excluded_df, + long_adjusted_df = long_adjusted_df) + +``` + ``` {ojs} +//| echo: false + +long_excluded_df long_df +long_adjusted_df + +``` + +``` {ojs} +//| echo: false + +viewof timeHorizon_aj_estimate = Inputs.range([10, 50], { + step: 20, + value: 50, + label: "Time Horizon" +}) + +filteredLongAdjustedDf = long_adjusted_df.filter((d) => d.fixed_time_horizon == timeHorizon_aj_estimate) +filteredLongExcludedDf = long_excluded_df.filter((d) => d.fixed_time_horizon == timeHorizon_aj_estimate) + +filteredLongAdjustedDf +filteredLongExcludedDf + Plot.plot({ + style: { + background: "transparent" + }, x: { label: "Time", + domain: [0, 50] }, y: { label: "Aalen-Johansen Estimate", @@ -630,9 +730,12 @@ Plot.plot({ color: { legend: true, domain: [ - "state_occupancy_probability_1_at_times", - "state_occupancy_probability_2_at_times", - "overall_survival" + "real_positives_est", + "real_competing_est", + "real_negatives_est" + // "state_occupancy_probability_1", + // "state_occupancy_probability_2", + // "state_occupancy_probability_0" ], range: [ "#4C5454", @@ -641,22 +744,81 @@ Plot.plot({ ] }, marks: [ - Plot.lineY(long_df, { + Plot.lineY(filteredLongAdjustedDf.filter(d => d.estimate_origin === "event_table"), { x: "times", y: "aj_estimate", stroke: "state", curve: "step-after" }), - Plot.dot(long_df, { + Plot.dot(filteredLongAdjustedDf.filter(d => d.estimate_origin === "event_table"), { + x: "times", + y: "aj_estimate", + fill: "state", + r: 2.5, + tip: true + }), + Plot.dot(filteredLongAdjustedDf.filter(d => d.estimate_origin === "fixed_time_horizons"), { x: "times", y: "aj_estimate", fill: "state", + r: 10, + tip: true + }), + Plot.ruleY([0]) + ] +}) + + + +Plot.plot({ + style: { + background: "transparent" + }, + x: { + label: "Time", + domain: [0, 50] + }, + y: { + label: "Aalen-Johansen Estimate", + stack: false + }, + color: { + legend: true, + domain: [ + "real_competing_est", + "real_censored_est" + ], + range: [ + "#C880B7", + "#E3F09B", + ] + }, + marks: [ + Plot.lineY(long_excluded_df, { + x: "fixed_time_horizon", + y: "excluded_count", + stroke: "state", + curve: "step-after" + }), + Plot.dot(long_excluded_df, { + x: "fixed_time_horizon", + y: "excluded_count", + fill: "state", r: 2.5, tip: true }), + Plot.dot(filteredLongExcludedDf, { + x: "fixed_time_horizon", + y: "excluded_count", + fill: "state", + r: 10, + tip: true + }), Plot.ruleY([0]) ] }) ``` + + From 660997184d81b65c5e0b96c97a691eee0fa4c7cc Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Sun, 3 Aug 2025 11:24:11 +0300 Subject: [PATCH 30/51] docs: close #146 --- docs/before_we_validate.qmd | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/before_we_validate.qmd b/docs/before_we_validate.qmd index 101a818..ae5c87d 100644 --- a/docs/before_we_validate.qmd +++ b/docs/before_we_validate.qmd @@ -771,6 +771,7 @@ Plot.plot({ Plot.plot({ + height: 100, style: { background: "transparent" }, From e26bdc4031f76d8b4ec9715deb01e4e720a587ed Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Sun, 24 Aug 2025 20:56:25 +0300 Subject: [PATCH 31/51] docs: close #148 --- docs/before_we_validate.qmd | 115 ++++++++++++++++++++++++------------ 1 file changed, 76 insertions(+), 39 deletions(-) diff --git a/docs/before_we_validate.qmd b/docs/before_we_validate.qmd index ae5c87d..3a60dde 100644 --- a/docs/before_we_validate.qmd +++ b/docs/before_we_validate.qmd @@ -571,13 +571,6 @@ For each requried combination of reference_group x predictions_strata x fixed_ti The sum of the AJ estimates for each predictions_strata is equal to the overal AJ estimate. -``` {ojs} - -Inputs.radio(["Adjusted", "Excluded"], {label: "Censored Assumption"}) - -Inputs.radio(["Adjusted as Censored", "Adjusted as Negative", "Adjusted as Composite", "Excluded"], {label: "Competing Assumption"}) - -``` ```{python} import polars as pl @@ -612,19 +605,6 @@ long_df = predict_aj_estimates( value_name="aj_estimate" ).to_dicts() -# event_table_data = event_table.to_dicts() - -# long_df = event_table.select( -# pl.col("times"), -# pl.col("overall_survival"), -# pl.col("state_occupancy_probability_1_at_times"), -# pl.col("state_occupancy_probability_2_at_times") -# ).unpivot( -# index = ["times", "estimate_origin"], -# variable_name="state", -# value_name="aj_estimate" -# ).to_dicts() - ojs_define(long_df = long_df) @@ -635,8 +615,17 @@ ojs_define(long_df = long_df) from rtichoke.helpers.sandbox_observable_helpers import ( assign_and_explode_polars, _excluded_events_df, _aj_adjusted_events) -censoring_assumption = "excluded" -competing_assumption = "adjusted_as_negative" + +censoring_assumptions = ["excluded", "adjusted"] +competing_assumptions = [ + "excluded", + "adjusted_as_censored", + "adjusted_as_negative", + "adjusted_as_composite", +] + +censoring_assumption = "adjusted" #["excluded", "adjusted"] +competing_assumption = "adjusted_as_composite" fixed_time_horizons = [10.0, 30.0, 50.0] reference_group_data = pl.DataFrame( @@ -649,17 +638,34 @@ reference_group_data = pl.DataFrame( exploded = assign_and_explode_polars(reference_group_data, fixed_time_horizons) -long_excluded_df = _excluded_events_df( - exploded, censoring_assumption, competing_assumption -).select( - pl.col("fixed_time_horizon"), - pl.col("real_censored_est"), - pl.col("real_competing_est") -).unpivot( - index = ["fixed_time_horizon"], - variable_name="state", - value_name="excluded_count" -).to_dicts() +def _long_excluded_for_combo(exploded: pl.DataFrame, ca: str, co: str) -> pl.DataFrame: + return ( + _excluded_events_df(exploded, ca, co) + .select("fixed_time_horizon", "real_censored_est", "real_competing_est") + .unpivot(index=["fixed_time_horizon"], variable_name="state", value_name="excluded_count") + .sort("fixed_time_horizon") + .with_columns([ + pl.lit(ca).alias("censoring_assumption"), + pl.lit(co).alias("competing_assumption"), + ]) + ) + + +excluded_frames = [ + _long_excluded_for_combo(exploded, ca, co) + for ca in censoring_assumptions + for co in competing_assumptions +] + +long_excluded_df = pl.concat(excluded_frames, how="vertical", rechunk=True).to_dicts() + + +print("long_excluded_df") +print(long_excluded_df) + + +censoring_assumption = "adjusted" #["excluded", "adjusted"] +competing_assumption = "adjusted_as_composite" long_adjusted_df = _aj_adjusted_events( reference_group_data, @@ -689,6 +695,8 @@ ojs_define( ``` + + ``` {ojs} //| echo: false @@ -700,17 +708,49 @@ long_adjusted_df ``` + + + ``` {ojs} //| echo: false +viewof censored_heuristics = Inputs.radio( + new Map([ + ["Adjusted", "adjusted"], + ["Excluded", "excluded"] + ]), + { + value: "adjusted", + label: "Censored Heuristic" + } +) + +viewof competing_heuristics = Inputs.radio( + new Map([ + ["Adjusted as Censored", "adjusted_as_censored"], + ["Adjusted as Negative", "adjusted_as_negative"], + ["Adjusted as Composite", "adjusted_as_composite"], + ["Excluded", "excluded"] + ]), + { + value: "adjusted_as_negative", + label: "Competing Heuristic" + } +) + viewof timeHorizon_aj_estimate = Inputs.range([10, 50], { step: 20, value: 50, label: "Time Horizon" }) +censored_heuristics +competing_heuristics + +long_excluded_df_heuristic_filtered = long_excluded_df.filter((d) => d.censoring_assumption == censored_heuristics && d.competing_assumption == competing_heuristics) + filteredLongAdjustedDf = long_adjusted_df.filter((d) => d.fixed_time_horizon == timeHorizon_aj_estimate) -filteredLongExcludedDf = long_excluded_df.filter((d) => d.fixed_time_horizon == timeHorizon_aj_estimate) +filteredLongExcludedDf = long_excluded_df_heuristic_filtered.filter((d) => d.fixed_time_horizon == timeHorizon_aj_estimate) filteredLongAdjustedDf filteredLongExcludedDf @@ -733,9 +773,6 @@ Plot.plot({ "real_positives_est", "real_competing_est", "real_negatives_est" - // "state_occupancy_probability_1", - // "state_occupancy_probability_2", - // "state_occupancy_probability_0" ], range: [ "#4C5454", @@ -795,13 +832,13 @@ Plot.plot({ ] }, marks: [ - Plot.lineY(long_excluded_df, { + Plot.lineY(long_excluded_df_heuristic_filtered, { x: "fixed_time_horizon", y: "excluded_count", stroke: "state", curve: "step-after" }), - Plot.dot(long_excluded_df, { + Plot.dot(long_excluded_df_heuristic_filtered, { x: "fixed_time_horizon", y: "excluded_count", fill: "state", From 48016b9a549a8bad896aae18304efb9e773d78a1 Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Mon, 25 Aug 2025 18:30:36 +0300 Subject: [PATCH 32/51] docs: close #149 --- docs/before_we_validate.qmd | 164 ++++++++++++++++-------------------- 1 file changed, 74 insertions(+), 90 deletions(-) diff --git a/docs/before_we_validate.qmd b/docs/before_we_validate.qmd index 3a60dde..cc5c5c1 100644 --- a/docs/before_we_validate.qmd +++ b/docs/before_we_validate.qmd @@ -6,13 +6,12 @@ format: toc: true --- -Ideally we would like to keep Performance Validation as agnostic as possible. -However, the structure of the validation set (`probs`, `reals` and `times`) implies the nature of the related assumptions and the required use case. +Ideally we would like to keep Performance Validation as agnostic as possible. However, the structure of the validation set (`probs`, `reals` and `times`) implies the nature of the related assumptions and the required use case. So before we validate performance, let us consider the underlying process. -โœ๏ธ The User Inputs -๐Ÿช› Internal Function +โœ๏ธ The User Inputs\ +๐Ÿช› Internal Function # โœ๏ธ Declare reference groups @@ -20,26 +19,22 @@ The dimentions of the `probs` and the `real` dictionaries imply the nature of th TODO: copy from rtichoke r README. -##### One Model, One Population: -- Just one reference group: "model". +##### One Model, One Population: + +- Just one reference group: "model". ##### Several Models, One Population: -Compare between different candidate models. -- Each model stand as a reference groups such as "thin" model, or a "full" model. +Compare between different candidate models. - Each model stand as a reference groups such as "thin" model, or a "full" model. ##### Several Models, Several Populations -Compare performance over different sub-populations. -- Internal Validation: "test", "val" and "train". -- External Validation: "Framingham", "Australia". -- Fairness: "Male", "Female". +Compare performance over different sub-populations. - Internal Validation: "test", "val" and "train". - External Validation: "Framingham", "Australia". - Fairness: "Male", "Female". # โœ๏ธ Declare how to stratify predictions โœ‚๏ธ The `stratified_by` argument is designed for the user to choose how to stratify predictions for decision-making, each method implies different problem: - ## Probability Threshold By choosing Probability Threshold as a cutoff the implied assumption is that you are concerned with individual harm or benefit. @@ -111,7 +106,6 @@ graph LR ``` - ### Baseline Strategy: Treat All ```{mermaid} @@ -179,14 +173,11 @@ graph LR ``` +*Regardless* of ranking each prediction is categorised to a bin: 0.32 -\> `[0.3, 0.4)`. -*Regardless* of ranking each prediction is categorised to a bin: 0.32 -> `[0.3, 0.4)`. - -1. Categorise Absolute Risk: 0.32 -> `[0.3, 0.4)` +1. Categorise Absolute Risk: 0.32 -\> `[0.3, 0.4)` - -References: -Pauker SG, Kassirer JP. Therapeutic decision making: a cost-benefit analysis. N Engl J Med. 1975;293(5):229-234. doi:10.1056/NEJM197507312930505 +References: Pauker SG, Kassirer JP. Therapeutic decision making: a cost-benefit analysis. N Engl J Med. 1975;293(5):229-234. doi:10.1056/NEJM197507312930505 ## PPCR @@ -223,38 +214,35 @@ graph LR ``` - By choosing PPCR as a cutoff the implied assumption is that you are concerned with resource constraint and assume no individual treatment harm. -*Regarding* the ranking each prediction is categorised to a bin: if the absolute probability 0.32 is the 18th highest predictions out of 100, it will be categorised to the second decile -> `0.18`. +*Regarding* the ranking each prediction is categorised to a bin: if the absolute probability 0.32 is the 18th highest predictions out of 100, it will be categorised to the second decile -\> `0.18`. -1. Calculate Risk-Quantile from Absolute Risk: 0.32 -> `0.18` +1. Calculate Risk-Quantile from Absolute Risk: 0.32 -\> `0.18` -References: -https://en.wikipedia.org/wiki/Precision_and_recall +References: https://en.wikipedia.org/wiki/Precision_and_recall - # โœ๏ธ Declare Fixed Time Horizons ๐ŸŒ… (๐Ÿ“…๐Ÿคฌ) The `fixed_time_horizons` argument is designed for the user to choose the set of time horizons to follow. -Different followups contain different distributions of observed outcomes: Declare fixed time horizons for the prediction model, such as [5, 10] years of prediction for CVD evet. - - +Different followups contain different distributions of observed outcomes: Declare fixed time horizons for the prediction model, such as \[5, 10\] years of prediction for CVD evet. ## ๐Ÿช› Update Administrative Censorng For cases with observed time-to-event is shorter than the prediction time horizon, the outcomes might change: -- `Real Positives` ๐Ÿคข should be considered as `Real Negatives` ๐Ÿคจ, the outcome of interest did not happen yet. -- Always included and Encoded as 0. +- `Real Positives` ๐Ÿคข should be considered as `Real Negatives` ๐Ÿคจ, the outcome of interest did not happen yet. + +- Always included and Encoded as 0. -- `Real Neagtives` ๐Ÿคจ should be considered as `Real Censored` ๐Ÿคฌ, the event of interest could have happened in the gap between the observed time and the fixed time horizon. -- If adjusted: encoded as 0. -- If excluded: counted with crude estimate. +- `Real Neagtives` ๐Ÿคจ should be considered as `Real Censored` ๐Ÿคฌ, the event of interest could have happened in the gap between the observed time and the fixed time horizon. +- If adjusted: encoded as 0. + +- If excluded: counted with crude estimate. ```{python} #| echo: false @@ -346,7 +334,6 @@ Plot.plot({ ``` - # Declare Heuristics Regarding ambigious `reals` ## โœ๏ธ Declare Heuristics Regarding Censored Events ๐Ÿ“…๐Ÿคฌ @@ -404,8 +391,7 @@ graph LR All censored events to be excluded. -Underlying Assumption: Small amount of censored events. -Violation of the assumption leads to: Overestimation of the observed outcomes. +Underlying Assumption: Small amount of censored events. Violation of the assumption leads to: Overestimation of the observed outcomes. ### Adjust Censored as partially seen Non-Event @@ -413,8 +399,7 @@ Observed outcomes for each strata are estimated using the AJ-estimate (equivalen TODO: Link to article -Underlying Assumption: Independent Censoring. -Violation of the assumption leads to: Biased estimate for observed outcomes. +Underlying Assumption: Independent Censoring. Violation of the assumption leads to: Biased estimate for observed outcomes. # โœ๏ธ Declare Heuristics Regarding Competing Events ๐Ÿ“…๐Ÿ’€ @@ -452,8 +437,7 @@ graph LR All competing events to be excluded. -Underlying Assumption: Small amount of competing events. -Violation of the assumption leads to: Overestimation of the observed outcomes. A competing event means that the primary event cannot happen. +Underlying Assumption: Small amount of competing events. Violation of the assumption leads to: Overestimation of the observed outcomes. A competing event means that the primary event cannot happen. ### Adjust Competing Events as Censored (Partially seen Negatives) @@ -483,11 +467,9 @@ graph LR ``` - All competing events to be treated as censored. -Underlying Assumption: We consider a patient experiencing a competing event equivalent to independent censoring. -Violation of the assumption leads to: Overestimation of the observed outcomes. A competing event means that the primary event cannot happen. +Underlying Assumption: We consider a patient experiencing a competing event equivalent to independent censoring. Violation of the assumption leads to: Overestimation of the observed outcomes. A competing event means that the primary event cannot happen. ### Adjust Competing Events as Negatives (Definite Non-Events) @@ -497,9 +479,7 @@ In a way, a patient experiencing a competing event is "more" of a "real-negative This is derived from the assumed state-covention -Beyond the horizon time the following transition is possible: -`Real Neagtives` ๐Ÿคจ => `Real Positives` ๐Ÿคข -๐Ÿ’€ 2 +Beyond the horizon time the following transition is possible: `Real Neagtives` ๐Ÿคจ =\> `Real Positives` ๐Ÿคข ๐Ÿ’€ 2 ```{mermaid} @@ -524,8 +504,7 @@ subgraph adj[Adjusted for Censoring] ``` -Underlying Assumption: We consider a patient experiencing a competing event as a definite non-event. -Violation of the assumption leads to Underestimation of the observed outcomes if a competing event can be considered as a different form of the primary event. +Underlying Assumption: We consider a patient experiencing a competing event as a definite non-event. Violation of the assumption leads to Underestimation of the observed outcomes if a competing event can be considered as a different form of the primary event. ### Adjust Competing Events as Composite (Positives) @@ -559,8 +538,6 @@ There is no need for an underlying assumption, the answer is explicit. This heuristic is can be seen as a different underlying question - what is the probability of having any type of event, Primary or Competing? - - # What rtichoke from now on? ## Render Predictions Histogram @@ -571,7 +548,6 @@ For each requried combination of reference_group x predictions_strata x fixed_ti The sum of the AJ estimates for each predictions_strata is equal to the overal AJ estimate. - ```{python} import polars as pl from rtichoke.helpers.sandbox_observable_helpers import ( @@ -610,7 +586,6 @@ ojs_define(long_df = long_df) ``` - ```{python} from rtichoke.helpers.sandbox_observable_helpers import ( assign_and_explode_polars, _excluded_events_df, _aj_adjusted_events) @@ -650,6 +625,42 @@ def _long_excluded_for_combo(exploded: pl.DataFrame, ca: str, co: str) -> pl.Dat ]) ) +def _long_adjusted_for_combo(reference_group_data: pl.DataFrame, exploded: pl.DataFrame, ca: str, co: str, fixed_time_horizons: list[float]) -> pl.DataFrame: + return ( + _aj_adjusted_events( + reference_group_data, + exploded, + ca, + co, + fixed_time_horizons, + True, + ) + .select( + pl.col("fixed_time_horizon"), + # pl.col("real_negatives_est"), + # pl.col("real_positives_est"), + # pl.col("real_competing_est"), + pl.col(r"^real_.*_est$"), + pl.col("estimate_origin"), + pl.col("times") + ) + .unpivot( + index = ["fixed_time_horizon", "times", "estimate_origin"], + variable_name="state", + value_name="aj_estimate" + ) + .sort(pl.col("fixed_time_horizon")) + .with_columns([ + pl.lit(ca).alias("censoring_assumption"), + pl.lit(co).alias("competing_assumption"), + ]) + ) + +adjusted_frames = [ + _long_adjusted_for_combo(reference_group_data, exploded, ca, co, fixed_time_horizons) + for ca in censoring_assumptions + for co in competing_assumptions +] excluded_frames = [ _long_excluded_for_combo(exploded, ca, co) @@ -659,45 +670,19 @@ excluded_frames = [ long_excluded_df = pl.concat(excluded_frames, how="vertical", rechunk=True).to_dicts() +long_adjusted_df = pl.concat(adjusted_frames, how="vertical", rechunk=True).to_dicts() print("long_excluded_df") print(long_excluded_df) -censoring_assumption = "adjusted" #["excluded", "adjusted"] -competing_assumption = "adjusted_as_composite" - -long_adjusted_df = _aj_adjusted_events( - reference_group_data, - exploded, - censoring_assumption, - competing_assumption, - fixed_time_horizons, - True, -).select( - pl.col("fixed_time_horizon"), - pl.col("real_negatives_est"), - pl.col("real_positives_est"), - pl.col("real_competing_est"), - pl.col("estimate_origin"), - pl.col("times") -).unpivot( - index = ["fixed_time_horizon", "times", "estimate_origin"], - variable_name="state", - value_name="aj_estimate" -).sort(pl.col("fixed_time_horizon")).to_dicts() - - - ojs_define( long_excluded_df = long_excluded_df, long_adjusted_df = long_adjusted_df) ``` - - -``` {ojs} +```{ojs} //| echo: false long_excluded_df @@ -708,10 +693,7 @@ long_adjusted_df ``` - - - -``` {ojs} +```{ojs} //| echo: false viewof censored_heuristics = Inputs.radio( @@ -749,7 +731,12 @@ competing_heuristics long_excluded_df_heuristic_filtered = long_excluded_df.filter((d) => d.censoring_assumption == censored_heuristics && d.competing_assumption == competing_heuristics) -filteredLongAdjustedDf = long_adjusted_df.filter((d) => d.fixed_time_horizon == timeHorizon_aj_estimate) +filteredLongAdjustedDf_heuristic = long_adjusted_df.filter( + (d) => d.censoring_assumption == censored_heuristics && d.competing_assumption == competing_heuristics +) + +filteredLongAdjustedDf = filteredLongAdjustedDf_heuristic.filter((d) => d.fixed_time_horizon == timeHorizon_aj_estimate) + filteredLongExcludedDf = long_excluded_df_heuristic_filtered.filter((d) => d.fixed_time_horizon == timeHorizon_aj_estimate) filteredLongAdjustedDf @@ -856,7 +843,4 @@ Plot.plot({ ] }) -``` - - - +``` \ No newline at end of file From 3b28ddb8fc399335b4ead1e1514bdc90e65e9ede Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Wed, 17 Sep 2025 16:38:02 +0300 Subject: [PATCH 33/51] refactor: close #153 --- .../helpers/sandbox_observable_helpers.py | 82 +++++++++++-------- tests/test_rtichoke.py | 32 +------- 2 files changed, 51 insertions(+), 63 deletions(-) diff --git a/src/rtichoke/helpers/sandbox_observable_helpers.py b/src/rtichoke/helpers/sandbox_observable_helpers.py index 71ad4e8..f7114a4 100644 --- a/src/rtichoke/helpers/sandbox_observable_helpers.py +++ b/src/rtichoke/helpers/sandbox_observable_helpers.py @@ -439,8 +439,8 @@ def update_administrative_censoring_polars(data: pl.DataFrame) -> pl.DataFrame: def create_aj_data( reference_group_data, - censoring_assumption, - competing_assumption, + censoring_heuristic, + competing_heuristic, fixed_time_horizons, full_event_table: bool = False, ): @@ -459,18 +459,22 @@ def aj_estimates_with_cross(df, extra_cols): print("exploded") print(exploded) - excluded_df = _excluded_events_df( - exploded, censoring_assumption, competing_assumption + event_table = prepare_event_table(reference_group_data) + + # TODO: solve strata in the pipeline + + excluded_events = _extract_excluded_events( + event_table, fixed_time_horizons, censoring_heuristic, competing_heuristic ) - print("excluded_df") - print(excluded_df) + print("excluded_events") + print(excluded_events) aj_df = _aj_adjusted_events( reference_group_data, exploded, - censoring_assumption, - competing_assumption, + censoring_heuristic, + competing_heuristic, fixed_time_horizons, full_event_table, ) @@ -478,7 +482,7 @@ def aj_estimates_with_cross(df, extra_cols): print("aj_df") print(aj_df.sort(pl.col("fixed_time_horizon"))) - result = aj_df.join(excluded_df, on=["strata", "fixed_time_horizon"], how="left") + result = aj_df.join(excluded_events, on=["fixed_time_horizon"], how="left") print("result") print(result.sort(pl.col("fixed_time_horizon"))) @@ -486,8 +490,8 @@ def aj_estimates_with_cross(df, extra_cols): return aj_estimates_with_cross( result, { - "censoring_assumption": censoring_assumption, - "competing_assumption": competing_assumption, + "censoring_assumption": censoring_heuristic, + "competing_assumption": competing_heuristic, }, ).select( [ @@ -505,6 +509,40 @@ def aj_estimates_with_cross(df, extra_cols): ) +def _extract_excluded_events( + event_table: pl.DataFrame, + fixed_time_horizons: list[float], + censoring_heuristic: str, + competing_heuristic: str, +) -> pl.DataFrame: + horizons_df = pl.DataFrame({"times": fixed_time_horizons}).sort("times") + + excluded_events = horizons_df.join_asof( + event_table.with_columns( + pl.col("count_0").cum_sum().cast(pl.Float64).alias("real_censored_est"), + pl.col("count_2").cum_sum().cast(pl.Float64).alias("real_competing_est"), + ).select( + pl.col("times"), + pl.col("real_censored_est"), + pl.col("real_competing_est"), + ), + left_on="times", + right_on="times", + ).with_columns([pl.col("times").alias("fixed_time_horizon")]) + + if censoring_heuristic != "excluded": + excluded_events = excluded_events.with_columns( + pl.lit(0.0).alias("real_censored_est") + ) + + if competing_heuristic != "excluded": + excluded_events = excluded_events.with_columns( + pl.lit(0.0).alias("real_competing_est") + ) + + return excluded_events + + def extract_crude_estimate_polars(data: pl.DataFrame) -> pl.DataFrame: all_combinations = data.select(["strata", "reals", "fixed_time_horizon"]).unique() @@ -848,28 +886,6 @@ def _aj_estimates_per_horizon( ) -def _excluded_events_df( - exploded: pl.DataFrame, censoring: str, competing: str -) -> pl.DataFrame: - base = exploded.select(["strata", "fixed_time_horizon"]).unique() - censored = ( - _censored_count(exploded) - if censoring == "excluded" - else base.with_columns(pl.lit(0.0).alias("real_censored_est")) - ) - competing_df = ( - _competing_count(exploded) - if competing == "excluded" - else base.with_columns(pl.lit(0.0).alias("real_competing_est")) - ) - - return ( - base.join(censored, on=["strata", "fixed_time_horizon"], how="left") - .join(competing_df, on=["strata", "fixed_time_horizon"], how="left") - .fill_null(0.0) - ).with_columns([pl.col("fixed_time_horizon").alias("times")]) - - def _aj_adjusted_events( reference_group_data: pl.DataFrame, exploded: pl.DataFrame, diff --git a/tests/test_rtichoke.py b/tests/test_rtichoke.py index af78825..167ab10 100644 --- a/tests/test_rtichoke.py +++ b/tests/test_rtichoke.py @@ -7,7 +7,6 @@ extract_aj_estimate_for_strata, assign_and_explode_polars, _aj_adjusted_events, - _excluded_events_df, ) # from rtichoke import rtichoke @@ -164,8 +163,8 @@ def test_create_aj_data( result = create_aj_data( df, - censoring_assumption=censoring_assumption, - competing_assumption=competing_assumption, + censoring_heuristic=censoring_assumption, + competing_heuristic=competing_assumption, fixed_time_horizons=horizons, ).sort("fixed_time_horizon") @@ -329,30 +328,3 @@ def test_aj_adjusted_events(censoring: str, competing: str) -> None: include_comp = competing != "excluded" expected = _expected_aj_df(neg, pos, comp_vals, include_comp) assert_frame_equal(result, expected) - - -@pytest.mark.parametrize( - "censoring, competing", - [ - (c, cc) - for c in ["adjusted", "excluded"] - for cc in [ - "adjusted_as_negative", - "adjusted_as_censored", - "adjusted_as_composite", - "excluded", - ] - ], -) -def test_excluded_events_df(censoring: str, competing: str) -> None: - df = pl.DataFrame( - {"strata": ["group1"] * len(TIMES), "reals": REALS, "times": TIMES} - ) - exploded = assign_and_explode_polars(df, TIME_HORIZONS) - - result = _excluded_events_df(exploded, censoring, competing).sort( - "fixed_time_horizon" - ) - - expected = _expected_excluded_df(censoring, competing) - assert_frame_equal(result, expected) From 54dbca2ce35230d1909d4805ee5e5c6590748ae5 Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Thu, 18 Sep 2025 19:18:34 +0300 Subject: [PATCH 34/51] docs: close #150 --- docs/before_we_validate.qmd | 178 +++++++++++++++++++++++------------- 1 file changed, 113 insertions(+), 65 deletions(-) diff --git a/docs/before_we_validate.qmd b/docs/before_we_validate.qmd index cc5c5c1..ccfea6b 100644 --- a/docs/before_we_validate.qmd +++ b/docs/before_we_validate.qmd @@ -249,9 +249,9 @@ For cases with observed time-to-event is shorter than the prediction time horizo import numpy as np -times = np.array([24.1, 9.7, 49.9, 18.6, 34.8, 14.2, 39.2, 46.0, 4.3, 31.5]) +times = np.array([24.1, 9.7, 49.9, 18.6, 34.8, 14.2, 39.2, 46.0, 31.5, 4.3]) reals = np.array([1, 1, 1, 1, 0, 2, 1, 2, 0, 1]) -time_horizons = [10, 30, 50] +time_horizons = [10, 20, 30, 40, 50] # Icons def get_icon(outcome, t, h): @@ -542,7 +542,7 @@ This heuristic is can be seen as a different underlying question - what is the p ## Render Predictions Histogram -### Extract AJ Estimate by Assumptions +### Extract AJ Estimate by Assumptions For each requried combination of reference_group x predictions_strata x fixed_time_horizons x censored_heuristic x competing_heuristic a separate AJ estimated is calculated for the adjusted `reals` and a Crude estimate is calculated for the excluded `reals`. @@ -567,7 +567,7 @@ event_table = prepare_event_table(times_and_reals) long_df = predict_aj_estimates( event_table, - [10.0, 30.0, 50.0], + [10.0, 20.0, 30.0, 40.0, 50.0], True ).select( pl.col("times"), @@ -588,7 +588,7 @@ ojs_define(long_df = long_df) ```{python} from rtichoke.helpers.sandbox_observable_helpers import ( - assign_and_explode_polars, _excluded_events_df, _aj_adjusted_events) + assign_and_explode_polars, _extract_excluded_events, _aj_adjusted_events) censoring_assumptions = ["excluded", "adjusted"] @@ -601,7 +601,7 @@ competing_assumptions = [ censoring_assumption = "adjusted" #["excluded", "adjusted"] competing_assumption = "adjusted_as_composite" -fixed_time_horizons = [10.0, 30.0, 50.0] +fixed_time_horizons = [10.0, 20.0, 30.0, 40.0, 50.0] reference_group_data = pl.DataFrame( { @@ -613,9 +613,9 @@ reference_group_data = pl.DataFrame( exploded = assign_and_explode_polars(reference_group_data, fixed_time_horizons) -def _long_excluded_for_combo(exploded: pl.DataFrame, ca: str, co: str) -> pl.DataFrame: +def _long_excluded_for_combo(event_table: pl.DataFrame, fixed_time_horizons, ca: str, co: str) -> pl.DataFrame: return ( - _excluded_events_df(exploded, ca, co) + _extract_excluded_events(event_table, fixed_time_horizons, ca, co) .select("fixed_time_horizon", "real_censored_est", "real_competing_est") .unpivot(index=["fixed_time_horizon"], variable_name="state", value_name="excluded_count") .sort("fixed_time_horizon") @@ -643,7 +643,7 @@ def _long_adjusted_for_combo(reference_group_data: pl.DataFrame, exploded: pl.Da pl.col(r"^real_.*_est$"), pl.col("estimate_origin"), pl.col("times") - ) + ).filter(pl.col("times")<=pl.col("fixed_time_horizon")) .unpivot( index = ["fixed_time_horizon", "times", "estimate_origin"], variable_name="state", @@ -663,7 +663,7 @@ adjusted_frames = [ ] excluded_frames = [ - _long_excluded_for_combo(exploded, ca, co) + _long_excluded_for_combo(event_table, fixed_time_horizons, ca, co) for ca in censoring_assumptions for co in competing_assumptions ] @@ -672,26 +672,12 @@ long_excluded_df = pl.concat(excluded_frames, how="vertical", rechunk=True).to_d long_adjusted_df = pl.concat(adjusted_frames, how="vertical", rechunk=True).to_dicts() -print("long_excluded_df") -print(long_excluded_df) - - ojs_define( long_excluded_df = long_excluded_df, long_adjusted_df = long_adjusted_df) ``` -```{ojs} -//| echo: false - -long_excluded_df - -long_df - -long_adjusted_df - -``` ```{ojs} //| echo: false @@ -721,7 +707,7 @@ viewof competing_heuristics = Inputs.radio( ) viewof timeHorizon_aj_estimate = Inputs.range([10, 50], { - step: 20, + step: 10, value: 50, label: "Time Horizon" }) @@ -739,8 +725,82 @@ filteredLongAdjustedDf = filteredLongAdjustedDf_heuristic.filter((d) => d.fixed_ filteredLongExcludedDf = long_excluded_df_heuristic_filtered.filter((d) => d.fixed_time_horizon == timeHorizon_aj_estimate) -filteredLongAdjustedDf -filteredLongExcludedDf +//filteredLongAdjustedDf +//filteredLongExcludedDf + +//filteredLongAdjustedDf.filter(d => d.estimate_origin === "event_table") + +``` + + +::: {.panel-tabset} + +#### Stacked + +```{ojs} +//| echo: false + +Plot.plot({ + style: { background: "transparent" }, + x: { label: "Time", domain: [0, 50] }, + y: { label: "Aalen-Johansen Estimate", stack: true, domain: [0, 10] }, + color: { + legend: true, + domain: ["real_positives_est", "real_competing_est", "real_negatives_est"], + range: ["#4C5454", "#C880B7", "#E0E0E0"] + }, + marks: [ + Plot.areaY( + filteredLongAdjustedDf.filter(d => d.estimate_origin === "event_table"), + { + x: "times", + y: "aj_estimate", + fill: "state", + curve: "step-after", + tip: true + } + ), + + Plot.ruleY([0]) + ] +}) + + + + +Plot.plot({ + height: 220, + style: { background: "transparent" }, + x: { label: "Time", domain: [0, 50] }, // bars extend from x=0 to horizon + y: { label: "Excluded count", domain: [0, 4] }, + color: { + legend: true, + domain: ["real_competing_est", "real_censored_est"], + range: ["#C880B7", "#E3F09B"] + }, + marks: [ + Plot.rectX( + filteredLongExcludedDf, + Plot.stackY({ + x1: 0, // always start from 0 + x2: "fixed_time_horizon", // extend to horizon + y: "excluded_count", // stack along y + fill: "state", + tip: true + }) + ), + Plot.ruleX([0]) // reference at x=0 + ] +}) + +``` + +#### Non-Stacked + +```{ojs} +//| echo: false + +// TODO: Add start point 0 and end-point fixed time horizons Plot.plot({ style: { @@ -752,7 +812,8 @@ Plot.plot({ }, y: { label: "Aalen-Johansen Estimate", - stack: false + stack: false, + domain: [0, 10] }, color: { legend: true, @@ -793,54 +854,41 @@ Plot.plot({ }) - Plot.plot({ - height: 100, - style: { - background: "transparent" - }, - x: { - label: "Time", - domain: [0, 50] - }, - y: { - label: "Aalen-Johansen Estimate", - stack: false - }, + height: 120, + style: { background: "transparent" }, + x: { label: "Time", domain: [0, 50] }, + y: { label: "Aalen-Johansen Estimate", + domain: [0, 2] }, color: { - legend: true, - domain: [ - "real_competing_est", - "real_censored_est" - ], - range: [ - "#C880B7", - "#E3F09B", - ] + legend: true, + domain: ["real_competing_est", "real_censored_est"], + range: ["#C880B7", "#E3F09B"] }, marks: [ - Plot.lineY(long_excluded_df_heuristic_filtered, { - x: "fixed_time_horizon", - y: "excluded_count", - stroke: "state", - curve: "step-after" - }), - Plot.dot(long_excluded_df_heuristic_filtered, { - x: "fixed_time_horizon", - y: "excluded_count", - fill: "state", - r: 2.5, - tip: true - }), Plot.dot(filteredLongExcludedDf, { x: "fixed_time_horizon", y: "excluded_count", fill: "state", r: 10, + fillOpacity: 0.85, + strokeWidth: 1.5, tip: true }), - Plot.ruleY([0]) + + // horizontal lines from x=0 to x=fixed_time_horizon + Plot.ruleY(filteredLongExcludedDf, { + x1: 0, + x2: "fixed_time_horizon", + y: "excluded_count", + stroke: "state", + strokeOpacity: 0.6 + }), + Plot.ruleY([0]) ] }) -``` \ No newline at end of file +``` + + +::: \ No newline at end of file From 9a574bb5aaf05cc43aa18f34644836c6f9417e0f Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Sun, 21 Sep 2025 10:31:15 +0300 Subject: [PATCH 35/51] fix: drop estimate_origin and times columns --- .../helpers/sandbox_observable_helpers.py | 30 ++----------------- 1 file changed, 2 insertions(+), 28 deletions(-) diff --git a/src/rtichoke/helpers/sandbox_observable_helpers.py b/src/rtichoke/helpers/sandbox_observable_helpers.py index f7114a4..092f8ca 100644 --- a/src/rtichoke/helpers/sandbox_observable_helpers.py +++ b/src/rtichoke/helpers/sandbox_observable_helpers.py @@ -451,14 +451,8 @@ def create_aj_data( def aj_estimates_with_cross(df, extra_cols): return df.join(pl.DataFrame(extra_cols), how="cross") - print("reference_group_data") - print(reference_group_data) - exploded = assign_and_explode_polars(reference_group_data, fixed_time_horizons) - print("exploded") - print(exploded) - event_table = prepare_event_table(reference_group_data) # TODO: solve strata in the pipeline @@ -467,9 +461,6 @@ def aj_estimates_with_cross(df, extra_cols): event_table, fixed_time_horizons, censoring_heuristic, competing_heuristic ) - print("excluded_events") - print(excluded_events) - aj_df = _aj_adjusted_events( reference_group_data, exploded, @@ -479,14 +470,8 @@ def aj_estimates_with_cross(df, extra_cols): full_event_table, ) - print("aj_df") - print(aj_df.sort(pl.col("fixed_time_horizon"))) - result = aj_df.join(excluded_events, on=["fixed_time_horizon"], how="left") - print("result") - print(result.sort(pl.col("fixed_time_horizon"))) - return aj_estimates_with_cross( result, { @@ -590,12 +575,6 @@ def extract_aj_estimate_for_strata(data_to_adjust, horizons, full_event_table: b event_table, pl.Series(horizons), full_event_table ) - print("horizons") - print(horizons) - - print("len(horizons)") - print(len(horizons)) - if len(horizons) == 1: aj_estimate_for_strata_polars = aj_estimate_for_strata_polars.with_columns( pl.lit(horizons[0]).alias("fixed_time_horizon") @@ -618,9 +597,6 @@ def extract_aj_estimate_for_strata(data_to_adjust, horizons, full_event_table: b [fixed_df, event_df], how="vertical" ).sort("estimate_origin", "fixed_time_horizon", "times") - print("aj_estimate_for_strata_polars") - print(aj_estimate_for_strata_polars) - return aj_estimate_for_strata_polars.with_columns( [ (pl.col("state_occupancy_probability_0") * n).alias("real_negatives_est"), @@ -737,12 +713,10 @@ def extract_aj_estimate_by_assumptions( pl.lit(competing).alias("competing_assumption"), ] ) - print( - f"Assumption: censoring={censoring}, competing={competing}, rows={aj_df.height}" - ) + aj_dfs.append(aj_df) - aj_estimates_data = pl.concat(aj_dfs) + aj_estimates_data = pl.concat(aj_dfs).drop(["estimate_origin", "times"]) aj_estimates_unpivoted = aj_estimates_data.unpivot( index=[ From d3846d23651ed61a6a78f30fe989b5d1f724bd57 Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Thu, 25 Sep 2025 08:37:10 +0300 Subject: [PATCH 36/51] refactor: close #158 --- .../helpers/sandbox_observable_helpers.py | 159 ++++-------------- 1 file changed, 34 insertions(+), 125 deletions(-) diff --git a/src/rtichoke/helpers/sandbox_observable_helpers.py b/src/rtichoke/helpers/sandbox_observable_helpers.py index 092f8ca..3dc0cc0 100644 --- a/src/rtichoke/helpers/sandbox_observable_helpers.py +++ b/src/rtichoke/helpers/sandbox_observable_helpers.py @@ -1,11 +1,18 @@ from lifelines import AalenJohansenFitter import pandas as pd import numpy as np -import itertools import polars as pl from polarstate import predict_aj_estimates from polarstate import prepare_event_table from typing import Dict, Union +from collections.abc import Sequence + + +def _enum_dataframe(column_name: str, values: Sequence[str]) -> pl.DataFrame: + """Create a single-column DataFrame with an enum dtype.""" + enum_values = list(dict.fromkeys(values)) + enum_dtype = pl.Enum(enum_values) + return pl.DataFrame({column_name: pl.Series(values, dtype=enum_dtype)}) def extract_aj_estimate(data_to_adjust, fixed_time_horizons): @@ -142,7 +149,7 @@ def transform_group(group: pl.DataFrame) -> pl.DataFrame: return pl.concat(transformed_groups) -def create_strata_combinations_polars(stratified_by: str, by: float) -> pl.DataFrame: +def create_strata_combinations(stratified_by: str, by: float) -> pl.DataFrame: if stratified_by == "probability_threshold": breaks = create_breaks_values(None, "probability_threshold", by) @@ -213,59 +220,6 @@ def format_strata_interval( return f"{left}{lower:.3f}, {upper:.3f}{right}" -def create_strata_combinations(stratified_by, by): - if stratified_by == "probability_threshold": - upper_bound = create_breaks_values(None, "probability_threshold", by) - lower_bound = np.roll(upper_bound, 1) - lower_bound[0] = 0 - mid_point = upper_bound - by / 2 - include_lower_bound = lower_bound == 0 - include_upper_bound = upper_bound != 0 - strata = [ - f"{'[' if include_lower else '('}{lower}, {upper}{']' if include_upper else ')'}" - for include_lower, lower, upper, include_upper in zip( - include_lower_bound, lower_bound, upper_bound, include_upper_bound - ) - ] - chosen_cutoff = upper_bound - elif stratified_by == "ppcr": - strata = create_breaks_values(None, "probability_threshold", by)[1:] - lower_bound = strata - by - upper_bound = strata + by - mid_point = upper_bound - by / 2 - include_lower_bound = np.ones_like(strata, dtype=bool) - include_upper_bound = np.zeros_like(strata, dtype=bool) - chosen_cutoff = strata - return pd.DataFrame( - { - "strata": strata, - "lower_bound": lower_bound, - "upper_bound": upper_bound, - "mid_point": mid_point, - "include_lower_bound": include_lower_bound, - "include_upper_bound": include_upper_bound, - "chosen_cutoff": chosen_cutoff, - "stratified_by": stratified_by, - } - ) - - -def create_breaks_values_polars(probs_vec, stratified_by, by): - # Ensure probs_vec is a NumPy array (in case it's a Polars Series) - if hasattr(probs_vec, "to_numpy"): - probs_vec = probs_vec.to_numpy() - - if stratified_by != "probability_threshold": - # Quantile-based bin edges (descending) - breaks = np.quantile(probs_vec, np.linspace(1, 0, int(1 / by) + 1)) - else: - # Fixed-width bin edges (ascending) - decimal_places = len(str(by).split(".")[-1]) - breaks = np.round(np.arange(0, 1 + by, by), decimals=decimal_places) - - return breaks - - def create_breaks_values(probs_vec, stratified_by, by): if stratified_by != "probability_threshold": breaks = np.quantile(probs_vec, np.linspace(1, 0, int(1 / by) + 1)) @@ -277,19 +231,18 @@ def create_breaks_values(probs_vec, stratified_by, by): def create_aj_data_combinations( - reference_groups, fixed_time_horizons, stratified_by, by -): - # Create strata combinations using Polars - strata_combinations_list = [ - create_strata_combinations_polars(x, by) for x in stratified_by - ] - strata_combinations = pl.concat(strata_combinations_list, how="vertical") - - strata_labels = strata_combinations["strata"] - strata_enum = pl.Enum(strata_labels) + reference_groups: Sequence[str], + fixed_time_horizons: Sequence[float], + stratified_by: Sequence[str], + by: float, +) -> pl.DataFrame: + strata_combinations = pl.concat( + [create_strata_combinations(value, by) for value in stratified_by], + how="vertical", + ) - stratified_by_labels = ["probability_threshold", "ppcr"] - stratified_by_enum = pl.Enum(stratified_by_labels) + strata_enum = pl.Enum(strata_combinations["strata"]) + stratified_by_enum = pl.Enum(["probability_threshold", "ppcr"]) strata_combinations = strata_combinations.with_columns( [ @@ -305,25 +258,8 @@ def create_aj_data_combinations( "real_competing", "real_censored", ] - reals_enum = pl.Enum(reals_labels) - df_reals = pl.DataFrame({"reals_labels": pl.Series(reals_labels, dtype=reals_enum)}) - df_reference_groups = pl.DataFrame( - { - "reference_group": pl.Series( - reference_groups, dtype=pl.Enum(reference_groups) - ) - } - ) censoring_assumptions_labels = ["excluded", "adjusted"] - censoring_assumptions_enum = pl.Enum(censoring_assumptions_labels) - df_censoring_assumptions = pl.DataFrame( - { - "censoring_assumption": pl.Series( - censoring_assumptions_labels, dtype=censoring_assumptions_enum - ) - } - ) competing_assumptions_labels = [ "excluded", @@ -331,50 +267,23 @@ def create_aj_data_combinations( "adjusted_as_censored", "adjusted_as_composite", ] - competing_assumptions_enum = pl.Enum(competing_assumptions_labels) - df_competing_assumptions = pl.DataFrame( - { - "competing_assumption": pl.Series( - competing_assumptions_labels, dtype=competing_assumptions_enum - ) - } - ) - # Create all combinations - combinations = list( - itertools.product( - # reference_groups, - fixed_time_horizons, - # censoring_assumptions, - # competing_assumptions - ) - ) + combinations_frames: list[pl.DataFrame] = [ + _enum_dataframe("reference_group", reference_groups), + pl.DataFrame( + {"fixed_time_horizon": pl.Series(fixed_time_horizons, dtype=pl.Float64)} + ), + _enum_dataframe("censoring_assumption", censoring_assumptions_labels), + _enum_dataframe("competing_assumption", competing_assumptions_labels), + strata_combinations, + _enum_dataframe("reals_labels", reals_labels), + ] - df_combinations = pl.DataFrame( - combinations, - schema=[ - # "reference_group", # str - "fixed_time_horizon", # cast to Float64 - # "censoring_assumption", # str - # "competing_assumption" # str - ], - ).with_columns( - [ - pl.col("fixed_time_horizon").cast(pl.Float64), - # pl.col("censoring_assumption").cast(pl.String), - # pl.col("competing_assumption").cast(pl.String), - # pl.col("reference_group").cast(pl.String) - ] - ) + result = combinations_frames[0] + for frame in combinations_frames[1:]: + result = result.join(frame, how="cross") - # Cross join (cartesian product) with strata_combinations - return ( - df_reference_groups.join(df_combinations, how="cross") - .join(df_censoring_assumptions, how="cross") - .join(df_competing_assumptions, how="cross") - .join(strata_combinations, how="cross") - .join(df_reals, how="cross") - ) + return result def pivot_longer_strata(data: pl.DataFrame) -> pl.DataFrame: From 5fcab7683380787cbe56c46c68b5610c0a6d8ed9 Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Thu, 25 Sep 2025 10:00:37 +0300 Subject: [PATCH 37/51] feat: close #160 --- .../helpers/sandbox_observable_helpers.py | 32 ++++++++++++------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/src/rtichoke/helpers/sandbox_observable_helpers.py b/src/rtichoke/helpers/sandbox_observable_helpers.py index 3dc0cc0..e7bf645 100644 --- a/src/rtichoke/helpers/sandbox_observable_helpers.py +++ b/src/rtichoke/helpers/sandbox_observable_helpers.py @@ -150,16 +150,16 @@ def transform_group(group: pl.DataFrame) -> pl.DataFrame: def create_strata_combinations(stratified_by: str, by: float) -> pl.DataFrame: - if stratified_by == "probability_threshold": - breaks = create_breaks_values(None, "probability_threshold", by) + breaks = create_breaks_values(None, "probability_threshold", by) + if stratified_by == "probability_threshold": upper_bound = breaks[1:] # breaks lower_bound = breaks[:-1] # np.roll(upper_bound, 1) # lower_bound[0] = 0.0 mid_point = upper_bound - by / 2 include_lower_bound = lower_bound > -0.1 include_upper_bound = upper_bound == 1.0 # upper_bound != 0.0 - chosen_cutoff = upper_bound + # chosen_cutoff = upper_bound strata = format_strata_column( lower_bound=lower_bound, upper_bound=upper_bound, @@ -169,18 +169,18 @@ def create_strata_combinations(stratified_by: str, by: float) -> pl.DataFrame: ) elif stratified_by == "ppcr": - strata_mid = create_breaks_values(None, "probability_threshold", by)[1:] + strata_mid = breaks[1:] lower_bound = strata_mid - by upper_bound = strata_mid + by mid_point = upper_bound - by include_lower_bound = np.ones_like(strata_mid, dtype=bool) include_upper_bound = np.zeros_like(strata_mid, dtype=bool) - chosen_cutoff = strata_mid + # chosen_cutoff = strata_mid strata = np.round(mid_point, 3).astype(str) else: raise ValueError(f"Unsupported stratified_by: {stratified_by}") - return pl.DataFrame( + bins_df = pl.DataFrame( { "strata": pl.Series(strata), "lower_bound": lower_bound, @@ -188,11 +188,15 @@ def create_strata_combinations(stratified_by: str, by: float) -> pl.DataFrame: "mid_point": mid_point, "include_lower_bound": include_lower_bound, "include_upper_bound": include_upper_bound, - "chosen_cutoff": chosen_cutoff, + # "chosen_cutoff": chosen_cutoff, "stratified_by": [stratified_by] * len(strata), } ) + cutoffs_df = pl.DataFrame({"chosen_cutoff": breaks}) + + return bins_df.join(cutoffs_df, how="cross") + def format_strata_column( lower_bound: list[float], @@ -236,12 +240,18 @@ def create_aj_data_combinations( stratified_by: Sequence[str], by: float, ) -> pl.DataFrame: - strata_combinations = pl.concat( - [create_strata_combinations(value, by) for value in stratified_by], - how="vertical", + dfs = [create_strata_combinations(sb, by) for sb in stratified_by] + strata_combinations = pl.concat(dfs, how="vertical") + + # strata_enum = pl.Enum(strata_combinations["strata"]) + + strata_cats = ( + strata_combinations.select(pl.col("strata").unique(maintain_order=True)) + .to_series() + .to_list() ) - strata_enum = pl.Enum(strata_combinations["strata"]) + strata_enum = pl.Enum(strata_cats) stratified_by_enum = pl.Enum(["probability_threshold", "ppcr"]) strata_combinations = strata_combinations.with_columns( From 00078b979aa8dab7a2b58fc2141b320590fc5693 Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Sat, 27 Sep 2025 16:10:01 +0300 Subject: [PATCH 38/51] feature: close #161 --- .../helpers/sandbox_observable_helpers.py | 52 ++++++++++++++++--- 1 file changed, 44 insertions(+), 8 deletions(-) diff --git a/src/rtichoke/helpers/sandbox_observable_helpers.py b/src/rtichoke/helpers/sandbox_observable_helpers.py index e7bf645..acdfdc5 100644 --- a/src/rtichoke/helpers/sandbox_observable_helpers.py +++ b/src/rtichoke/helpers/sandbox_observable_helpers.py @@ -136,8 +136,12 @@ def transform_group(group: pl.DataFrame) -> pl.DataFrame: try: q = int(1 / by) quantile_edges = np.quantile(-probs, np.linspace(0, 1, q)) + strata_ppcr = np.digitize(-probs, quantile_edges, right=False) strata_ppcr = (strata_ppcr / (1 / by)).astype(str) + + columns_to_add.append(pl.Series("strata_ppcr", strata_ppcr)) + except ValueError: strata_ppcr = np.array(["1"] * len(probs)) # fallback for small group @@ -170,9 +174,9 @@ def create_strata_combinations(stratified_by: str, by: float) -> pl.DataFrame: elif stratified_by == "ppcr": strata_mid = breaks[1:] - lower_bound = strata_mid - by - upper_bound = strata_mid + by - mid_point = upper_bound - by + lower_bound = strata_mid - by / 2 + upper_bound = strata_mid + by / 2 + mid_point = breaks[1:] include_lower_bound = np.ones_like(strata_mid, dtype=bool) include_upper_bound = np.zeros_like(strata_mid, dtype=bool) # chosen_cutoff = strata_mid @@ -362,6 +366,7 @@ def create_aj_data( competing_heuristic, fixed_time_horizons, full_event_table: bool = False, + risk_set_scope: str = "within_stratum", ): """ Create AJ estimates per strata based on censoring and competing assumptions. @@ -387,6 +392,7 @@ def aj_estimates_with_cross(df, extra_cols): competing_heuristic, fixed_time_horizons, full_event_table, + risk_set_scope, ) result = aj_df.join(excluded_events, on=["fixed_time_horizon"], how="left") @@ -548,6 +554,7 @@ def assign_and_explode_polars( def create_list_data_to_adjust( + aj_data_combinations: pl.DataFrame, probs_dict: Dict[str, np.ndarray], reals_dict: Union[np.ndarray, Dict[str, np.ndarray]], times_dict: Union[np.ndarray, Dict[str, np.ndarray]], @@ -560,6 +567,8 @@ def create_list_data_to_adjust( reference_group_enum = pl.Enum(reference_group_labels) + strata_enum_dtype = aj_data_combinations.schema["strata"] + # Flatten and ensure list format data_to_adjust = pl.DataFrame( { @@ -576,8 +585,21 @@ def create_list_data_to_adjust( data_to_adjust = add_cutoff_strata( data_to_adjust, by=by, stratified_by=stratified_by ) + data_to_adjust = pivot_longer_strata(data_to_adjust) + data_to_adjust = ( + data_to_adjust.with_columns([pl.col("strata")]) + .with_columns(pl.col("strata").cast(strata_enum_dtype)) + .join( + aj_data_combinations.select( + pl.col("strata"), pl.col("stratified_by"), pl.col("upper_bound") + ).unique(), + how="left", + on=["strata", "stratified_by"], + ) + ) + reals_labels = [ "real_negatives", "real_positives", @@ -617,6 +639,7 @@ def extract_aj_estimate_by_assumptions( df: pl.DataFrame, assumptions_sets: list[dict], fixed_time_horizons: list[float], + risk_set_scope: str = "within_stratum", ) -> pl.DataFrame: aj_dfs = [] @@ -625,7 +648,12 @@ def extract_aj_estimate_by_assumptions( competing = assumption["competing_assumption"] aj_df = create_aj_data( - df, censoring, competing, fixed_time_horizons + df, + censoring, + competing, + fixed_time_horizons, + full_event_table=False, + risk_set_scope=risk_set_scope, ).with_columns( [ pl.lit(censoring).alias("censoring_assumption"), @@ -655,6 +683,7 @@ def create_adjusted_data( list_data_to_adjust_polars: dict[str, pl.DataFrame], assumptions_sets: list[dict[str, str]], fixed_time_horizons: list[float], + risk_set_scope: str = "within_stratum", ) -> pl.DataFrame: all_results = [] @@ -670,6 +699,7 @@ def create_adjusted_data( "adjusted_as_censored", "adjusted_as_composite", ] + competing_assumption_enum = pl.Enum(competing_assumption_labels) for reference_group, df in list_data_to_adjust_polars.items(): @@ -679,6 +709,7 @@ def create_adjusted_data( input_df, assumptions_sets=assumptions_sets, fixed_time_horizons=fixed_time_horizons, + risk_set_scope=risk_set_scope, ) aj_result_with_group = aj_result.with_columns( @@ -786,13 +817,18 @@ def _aj_adjusted_events( competing: str, horizons: list[float], full_event_table: bool = False, + risk_set_scope: str = "within_stratum", ) -> pl.DataFrame: if censoring == "adjusted" and competing == "adjusted_as_negative": - return reference_group_data.group_by("strata").map_groups( - lambda group: extract_aj_estimate_for_strata( - group, horizons, full_event_table + if risk_set_scope == "within_stratum": + return reference_group_data.group_by("strata").map_groups( + lambda group: extract_aj_estimate_for_strata( + group, horizons, full_event_table + ) ) - ) + + elif risk_set_scope == "pooled_by_cutoff": + pass if censoring == "excluded" and competing == "adjusted_as_negative": non_censored = exploded.filter( From 7cbc70acf28f5ec2c9ad56a3d8cf5895fb73297c Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Sat, 27 Sep 2025 18:26:55 +0300 Subject: [PATCH 39/51] feature: close #162 --- src/rtichoke/helpers/sandbox_observable_helpers.py | 14 ++++++++++---- tests/test_rtichoke.py | 11 ++++++++++- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/src/rtichoke/helpers/sandbox_observable_helpers.py b/src/rtichoke/helpers/sandbox_observable_helpers.py index acdfdc5..b97dc8d 100644 --- a/src/rtichoke/helpers/sandbox_observable_helpers.py +++ b/src/rtichoke/helpers/sandbox_observable_helpers.py @@ -153,9 +153,7 @@ def transform_group(group: pl.DataFrame) -> pl.DataFrame: return pl.concat(transformed_groups) -def create_strata_combinations(stratified_by: str, by: float) -> pl.DataFrame: - breaks = create_breaks_values(None, "probability_threshold", by) - +def create_strata_combinations(stratified_by: str, by: float, breaks) -> pl.DataFrame: if stratified_by == "probability_threshold": upper_bound = breaks[1:] # breaks lower_bound = breaks[:-1] # np.roll(upper_bound, 1) @@ -243,8 +241,9 @@ def create_aj_data_combinations( fixed_time_horizons: Sequence[float], stratified_by: Sequence[str], by: float, + breaks: Sequence[float], ) -> pl.DataFrame: - dfs = [create_strata_combinations(sb, by) for sb in stratified_by] + dfs = [create_strata_combinations(sb, by, breaks) for sb in stratified_by] strata_combinations = pl.concat(dfs, how="vertical") # strata_enum = pl.Enum(strata_combinations["strata"]) @@ -362,6 +361,7 @@ def update_administrative_censoring_polars(data: pl.DataFrame) -> pl.DataFrame: def create_aj_data( reference_group_data, + breaks, censoring_heuristic, competing_heuristic, fixed_time_horizons, @@ -387,6 +387,7 @@ def aj_estimates_with_cross(df, extra_cols): aj_df = _aj_adjusted_events( reference_group_data, + breaks, exploded, censoring_heuristic, competing_heuristic, @@ -637,6 +638,7 @@ def ensure_no_categorical(df: pd.DataFrame) -> pd.DataFrame: def extract_aj_estimate_by_assumptions( df: pl.DataFrame, + breaks: Sequence[float], assumptions_sets: list[dict], fixed_time_horizons: list[float], risk_set_scope: str = "within_stratum", @@ -649,6 +651,7 @@ def extract_aj_estimate_by_assumptions( aj_df = create_aj_data( df, + breaks, censoring, competing, fixed_time_horizons, @@ -683,6 +686,7 @@ def create_adjusted_data( list_data_to_adjust_polars: dict[str, pl.DataFrame], assumptions_sets: list[dict[str, str]], fixed_time_horizons: list[float], + breaks: Sequence[float], risk_set_scope: str = "within_stratum", ) -> pl.DataFrame: all_results = [] @@ -707,6 +711,7 @@ def create_adjusted_data( aj_result = extract_aj_estimate_by_assumptions( input_df, + breaks, assumptions_sets=assumptions_sets, fixed_time_horizons=fixed_time_horizons, risk_set_scope=risk_set_scope, @@ -812,6 +817,7 @@ def _aj_estimates_per_horizon( def _aj_adjusted_events( reference_group_data: pl.DataFrame, + breaks: Sequence[float], exploded: pl.DataFrame, censoring: str, competing: str, diff --git a/tests/test_rtichoke.py b/tests/test_rtichoke.py index 167ab10..6529e02 100644 --- a/tests/test_rtichoke.py +++ b/tests/test_rtichoke.py @@ -17,6 +17,7 @@ TIMES = [24.1, 9.7, 49.9, 18.6, 34.8, 14.2, 39.2, 46.0, 4.3, 31.5] REALS = [1, 1, 1, 1, 0, 2, 1, 2, 0, 1] TIME_HORIZONS = [10.0, 30.0, 50.0] +BREAKS: list[float] = [0.0, 0.5, 1.0] def _expected( @@ -160,9 +161,11 @@ def test_create_aj_data( } ) horizons = [1.0, 2.0, 3.0] + breaks = [0.0, 0.5, 1.0] result = create_aj_data( df, + breaks=breaks, censoring_heuristic=censoring_assumption, competing_heuristic=competing_assumption, fixed_time_horizons=horizons, @@ -319,7 +322,13 @@ def test_aj_adjusted_events(censoring: str, competing: str) -> None: ) exploded = assign_and_explode_polars(df, TIME_HORIZONS) result = _aj_adjusted_events( - df, exploded, censoring, competing, TIME_HORIZONS, full_event_table=False + df, + BREAKS, + exploded, + censoring, + competing, + TIME_HORIZONS, + full_event_table=False, ).sort("fixed_time_horizon") neg = [v[0] for v in AJ_EXPECTED[(censoring, competing)]] From ea9a6674f084b5d5f88cfbd75d0e3e1c9e09a56a Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Sun, 26 Oct 2025 14:47:58 +0200 Subject: [PATCH 40/51] feat: close # 163 --- .../helpers/sandbox_observable_helpers.py | 649 +++++++++++++++--- 1 file changed, 559 insertions(+), 90 deletions(-) diff --git a/src/rtichoke/helpers/sandbox_observable_helpers.py b/src/rtichoke/helpers/sandbox_observable_helpers.py index b97dc8d..39a07c2 100644 --- a/src/rtichoke/helpers/sandbox_observable_helpers.py +++ b/src/rtichoke/helpers/sandbox_observable_helpers.py @@ -105,12 +105,12 @@ def extract_aj_estimate(data_to_adjust, fixed_time_horizons): def add_cutoff_strata(data: pl.DataFrame, by: float, stratified_by) -> pl.DataFrame: - def transform_group(group: pl.DataFrame) -> pl.DataFrame: + def transform_group(group: pl.DataFrame, by: float) -> pl.DataFrame: probs = group["probs"].to_numpy() columns_to_add = [] + breaks = create_breaks_values(probs, "probability_threshold", by) if "probability_threshold" in stratified_by: - breaks = create_breaks_values(probs, "probability_threshold", by) last_bin_index = len(breaks) - 2 bin_indices = np.digitize(probs, bins=breaks, right=False) - 1 @@ -132,28 +132,54 @@ def transform_group(group: pl.DataFrame) -> pl.DataFrame: ) if "ppcr" in stratified_by: - # --- Compute strata_ppcr as quantiles on -probs --- - try: - q = int(1 / by) - quantile_edges = np.quantile(-probs, np.linspace(0, 1, q)) + # --- Compute strata_ppcr as equal-frequency quantile bins by rank --- + by = float(by) + q = int(round(1 / by)) # e.g. 0.2 -> 5 bins - strata_ppcr = np.digitize(-probs, quantile_edges, right=False) - strata_ppcr = (strata_ppcr / (1 / by)).astype(str) + probs = np.asarray(probs, float) + n = probs.size + print(f"q = {q}, n = {n}") + print("probs:", probs) - columns_to_add.append(pl.Series("strata_ppcr", strata_ppcr)) + edges = np.quantile(probs, np.linspace(0.0, 1.0, q + 1), method="linear") + print("edges before accumulating:", edges) - except ValueError: - strata_ppcr = np.array(["1"] * len(probs)) # fallback for small group + edges = np.maximum.accumulate(edges) + print("edges after accumulating:", edges) + edges[0] = 0.0 + edges[-1] = 1.0 + + print("edges after setting 0 and 1:", edges) + + bin_idx = np.digitize(probs, bins=edges[1:-1], right=True) + print("bin_idx:", bin_idx) + + s = str(by) + decimals = len(s.split(".")[-1]) if "." in s else 0 + + labels = [f"{x:.{decimals}f}" for x in np.linspace(by, 1.0, q)] + print("bin_labels", labels) + + strata_labels = np.array([labels[i] for i in bin_idx], dtype=object) + print("strata_labels:", strata_labels) + + columns_to_add.append( + pl.Series("strata_ppcr", strata_labels).cast(pl.Enum(labels)) + ) return group.with_columns(columns_to_add) # Apply per-group transformation grouped = data.partition_by("reference_group", as_dict=True) - transformed_groups = [transform_group(group) for group in grouped.values()] + transformed_groups = [transform_group(group, by) for group in grouped.values()] return pl.concat(transformed_groups) def create_strata_combinations(stratified_by: str, by: float, breaks) -> pl.DataFrame: + s_by = str(by) + decimals = len(s_by.split(".")[-1]) if "." in s_by else 0 + fmt = f"{{:.{decimals}f}}" + if stratified_by == "probability_threshold": upper_bound = breaks[1:] # breaks lower_bound = breaks[:-1] # np.roll(upper_bound, 1) @@ -178,7 +204,8 @@ def create_strata_combinations(stratified_by: str, by: float, breaks) -> pl.Data include_lower_bound = np.ones_like(strata_mid, dtype=bool) include_upper_bound = np.zeros_like(strata_mid, dtype=bool) # chosen_cutoff = strata_mid - strata = np.round(mid_point, 3).astype(str) + strata = np.array([fmt.format(x) for x in strata_mid], dtype=object) + print("strata", strata) else: raise ValueError(f"Unsupported stratified_by: {stratified_by}") @@ -242,6 +269,7 @@ def create_aj_data_combinations( stratified_by: Sequence[str], by: float, breaks: Sequence[float], + risk_set_scope: Sequence[str] = ["within_stratum", "pooled_by_cutoff"], ) -> pl.DataFrame: dfs = [create_strata_combinations(sb, by, breaks) for sb in stratified_by] strata_combinations = pl.concat(dfs, how="vertical") @@ -264,6 +292,14 @@ def create_aj_data_combinations( ] ) + risk_set_scope_combinations = pl.DataFrame( + { + "risk_set_scope": pl.Series(risk_set_scope).cast( + pl.Enum(["within_stratum", "pooled_by_cutoff"]) + ) + } + ) + # Define values for Cartesian product reals_labels = [ "real_negatives", @@ -289,6 +325,7 @@ def create_aj_data_combinations( _enum_dataframe("censoring_assumption", censoring_assumptions_labels), _enum_dataframe("competing_assumption", competing_assumptions_labels), strata_combinations, + risk_set_scope_combinations, _enum_dataframe("reals_labels", reals_labels), ] @@ -365,12 +402,15 @@ def create_aj_data( censoring_heuristic, competing_heuristic, fixed_time_horizons, + stratified_by: Sequence[str], full_event_table: bool = False, - risk_set_scope: str = "within_stratum", + risk_set_scope: Sequence[str] = "within_stratum", ): """ Create AJ estimates per strata based on censoring and competing assumptions. """ + print("stratified_by", stratified_by) + print("Creating aj data") def aj_estimates_with_cross(df, extra_cols): return df.join(pl.DataFrame(extra_cols), how="cross") @@ -385,16 +425,50 @@ def aj_estimates_with_cross(df, extra_cols): event_table, fixed_time_horizons, censoring_heuristic, competing_heuristic ) - aj_df = _aj_adjusted_events( - reference_group_data, - breaks, - exploded, - censoring_heuristic, - competing_heuristic, - fixed_time_horizons, - full_event_table, - risk_set_scope, - ) + print("stratified_by before _aj_adjusted_events", stratified_by) + + aj_dfs = [] + for rscope in risk_set_scope: + aj_res = _aj_adjusted_events( + reference_group_data, + breaks, + exploded, + censoring_heuristic, + competing_heuristic, + fixed_time_horizons, + stratified_by, + full_event_table, + rscope, + ) + + print("aj_res before select", aj_res.columns) + print("aj_res", aj_res) + + aj_res = aj_res.select( + [ + "strata", + "times", + "chosen_cutoff", + "real_negatives_est", + "real_positives_est", + "real_competing_est", + "estimate_origin", + "fixed_time_horizon", + "risk_set_scope", + ] + ) + + print("aj_res columns", aj_res.columns) + print("aj_res", aj_res) + + aj_dfs.append(aj_res) + + aj_df = pl.concat(aj_dfs, how="vertical") + + print("aj_df columns", aj_df.columns) + + # print("aj_df") + # print(aj_df) result = aj_df.join(excluded_events, on=["fixed_time_horizon"], how="left") @@ -407,6 +481,7 @@ def aj_estimates_with_cross(df, extra_cols): ).select( [ "strata", + "chosen_cutoff", "fixed_time_horizon", "times", "real_negatives_est", @@ -416,6 +491,7 @@ def aj_estimates_with_cross(df, extra_cols): "censoring_assumption", "competing_assumption", "estimate_origin", + "risk_set_scope", ] ) @@ -492,6 +568,179 @@ def extract_crude_estimate_polars(data: pl.DataFrame) -> pl.DataFrame: # return result_pandas +def extract_aj_estimate_by_cutoffs( + data_to_adjust, horizons, breaks, stratified_by, full_event_table: bool +): + # n = data_to_adjust.height + + counts_per_strata = ( + data_to_adjust.group_by(["strata", "stratified_by", "upper_bound"]) + .len(name="strata_count") + .with_columns(pl.col("strata_count").cast(pl.Float64)) + ) + + print("counts per strata") + print(counts_per_strata) + + print("data_to_adjust") + print(data_to_adjust) + + # TODO: iterate over predicted-positives / negatives + + aj_estimates_predicted_positives = pl.DataFrame() + aj_estimates_predicted_negatives = pl.DataFrame() + + for stratification_criteria in stratified_by: + for chosen_cutoff in breaks: + if stratification_criteria == "probability_threshold": + mask_predicted_positives = (pl.col("upper_bound") > chosen_cutoff) & ( + pl.col("stratified_by") == "probability_threshold" + ) + mask_predicted_negatives = (pl.col("upper_bound") <= chosen_cutoff) & ( + pl.col("stratified_by") == "probability_threshold" + ) + + elif stratification_criteria == "ppcr": + mask_predicted_positives = (pl.col("upper_bound") <= chosen_cutoff) & ( + pl.col("stratified_by") == "ppcr" + ) + mask_predicted_negatives = (pl.col("upper_bound") > chosen_cutoff) & ( + pl.col("stratified_by") == "ppcr" + ) + + predicted_positives = data_to_adjust.filter(mask_predicted_positives) + predicted_negatives = data_to_adjust.filter(mask_predicted_negatives) + + counts_per_strata_predicted_positives = counts_per_strata.filter( + mask_predicted_positives + ) + counts_per_strata_predicted_negatives = counts_per_strata.filter( + mask_predicted_negatives + ) + + event_table_predicted_positives = prepare_event_table(predicted_positives) + event_table_predicted_negatives = prepare_event_table(predicted_negatives) + + aj_estimate_predicted_positives = ( + ( + predict_aj_estimates( + event_table_predicted_positives, + pl.Series(horizons), + full_event_table, + ) + .with_columns( + pl.lit(chosen_cutoff).alias("chosen_cutoff"), + pl.lit(stratification_criteria) + .alias("stratified_by") + .cast(pl.Enum(["probability_threshold", "ppcr"])), + ) + .join( + counts_per_strata_predicted_positives, + on=["stratified_by"], + how="left", + ) + .with_columns( + [ + ( + pl.col("state_occupancy_probability_0") + * pl.col("strata_count") + ).alias("real_negatives_est"), + ( + pl.col("state_occupancy_probability_1") + * pl.col("strata_count") + ).alias("real_positives_est"), + ( + pl.col("state_occupancy_probability_2") + * pl.col("strata_count") + ).alias("real_competing_est"), + ] + ) + ) + .select( + [ + "strata", + # "stratified_by", + "times", + "chosen_cutoff", + "real_negatives_est", + "real_positives_est", + "real_competing_est", + "estimate_origin", + ] + ) + .with_columns([pl.col("times").alias("fixed_time_horizon")]) + ) + + aj_estimate_predicted_negatives = ( + ( + predict_aj_estimates( + event_table_predicted_negatives, + pl.Series(horizons), + full_event_table, + ) + .with_columns( + pl.lit(chosen_cutoff).alias("chosen_cutoff"), + pl.lit(stratification_criteria) + .alias("stratified_by") + .cast(pl.Enum(["probability_threshold", "ppcr"])), + ) + .join( + counts_per_strata_predicted_negatives, + on=["stratified_by"], + how="left", + ) + .with_columns( + [ + ( + pl.col("state_occupancy_probability_0") + * pl.col("strata_count") + ).alias("real_negatives_est"), + ( + pl.col("state_occupancy_probability_1") + * pl.col("strata_count") + ).alias("real_positives_est"), + ( + pl.col("state_occupancy_probability_2") + * pl.col("strata_count") + ).alias("real_competing_est"), + ] + ) + ) + .select( + [ + "strata", + # "stratified_by", + "times", + "chosen_cutoff", + "real_negatives_est", + "real_positives_est", + "real_competing_est", + "estimate_origin", + ] + ) + .with_columns([pl.col("times").alias("fixed_time_horizon")]) + ) + + aj_estimates_predicted_negatives = pl.concat( + [aj_estimates_predicted_negatives, aj_estimate_predicted_negatives], + how="vertical", + ) + + aj_estimates_predicted_positives = pl.concat( + [aj_estimates_predicted_positives, aj_estimate_predicted_positives], + how="vertical", + ) + + aj_estimate_by_cutoffs = pl.concat( + [aj_estimates_predicted_negatives, aj_estimates_predicted_positives], + how="vertical", + ) + + print("aj_estimate_by_cutoffs", aj_estimate_by_cutoffs) + + return aj_estimate_by_cutoffs + + def extract_aj_estimate_for_strata(data_to_adjust, horizons, full_event_table: bool): n = data_to_adjust.height @@ -523,6 +772,9 @@ def extract_aj_estimate_for_strata(data_to_adjust, horizons, full_event_table: b [fixed_df, event_df], how="vertical" ).sort("estimate_origin", "fixed_time_horizon", "times") + # print("aj_estimate_for_strata_polars") + # print(aj_estimate_for_strata_polars) + return aj_estimate_for_strata_polars.with_columns( [ (pl.col("state_occupancy_probability_0") * n).alias("real_negatives_est"), @@ -641,20 +893,30 @@ def extract_aj_estimate_by_assumptions( breaks: Sequence[float], assumptions_sets: list[dict], fixed_time_horizons: list[float], + stratified_by: Sequence[str], risk_set_scope: str = "within_stratum", ) -> pl.DataFrame: aj_dfs = [] + print("stratified_by", stratified_by) + for assumption in assumptions_sets: censoring = assumption["censoring_assumption"] competing = assumption["competing_assumption"] + print("stratified_by", stratified_by) + + print("df before create_aj_data") + print(df.columns) + print(df.schema) + aj_df = create_aj_data( df, breaks, censoring, competing, fixed_time_horizons, + stratified_by=stratified_by, full_event_table=False, risk_set_scope=risk_set_scope, ).with_columns( @@ -666,19 +928,27 @@ def extract_aj_estimate_by_assumptions( aj_dfs.append(aj_df) + # print("aj_dfs", aj_dfs) + aj_estimates_data = pl.concat(aj_dfs).drop(["estimate_origin", "times"]) + print("aj_estimates_data", aj_estimates_data) + aj_estimates_unpivoted = aj_estimates_data.unpivot( index=[ "strata", + "chosen_cutoff", "fixed_time_horizon", "censoring_assumption", "competing_assumption", + "risk_set_scope", ], variable_name="reals_labels", value_name="reals_estimate", ) + print("aj_estimates_unpivoted", aj_estimates_unpivoted) + return aj_estimates_unpivoted @@ -687,6 +957,7 @@ def create_adjusted_data( assumptions_sets: list[dict[str, str]], fixed_time_horizons: list[float], breaks: Sequence[float], + stratified_by: Sequence[str], risk_set_scope: str = "within_stratum", ) -> pl.DataFrame: all_results = [] @@ -707,13 +978,18 @@ def create_adjusted_data( competing_assumption_enum = pl.Enum(competing_assumption_labels) for reference_group, df in list_data_to_adjust_polars.items(): - input_df = df.select(["strata", "reals", "times"]) + input_df = df.select( + ["strata", "reals", "times", "upper_bound", "stratified_by"] + ) + + print("stratified_by", stratified_by) aj_result = extract_aj_estimate_by_assumptions( input_df, breaks, assumptions_sets=assumptions_sets, fixed_time_horizons=fixed_time_horizons, + stratified_by=stratified_by, risk_set_scope=risk_set_scope, ) @@ -727,6 +1003,8 @@ def create_adjusted_data( all_results.append(aj_result_with_group) + print("all_results", all_results) + reals_enum_dtype = pl.Enum( [ "real_negatives", @@ -756,19 +1034,86 @@ def cast_and_join_adjusted_data(aj_data_combinations, aj_estimates_data): pl.col("strata").cast(strata_enum_dtype) ) - final_adjusted_data_polars = aj_data_combinations.with_columns( - [pl.col("strata")] - ).join( - aj_estimates_data, - on=[ - "strata", - "fixed_time_horizon", - "censoring_assumption", - "competing_assumption", - "reals_labels", - "reference_group", - ], - how="left", + final_adjusted_data_polars = ( + aj_data_combinations.with_columns([pl.col("strata")]) + .join( + aj_estimates_data, + on=[ + "strata", + "fixed_time_horizon", + "censoring_assumption", + "competing_assumption", + "reals_labels", + "reference_group", + "chosen_cutoff", + "risk_set_scope", + ], + how="left", + ) + .with_columns( + pl.when( + ( + (pl.col("chosen_cutoff") >= pl.col("upper_bound")) + & (pl.col("stratified_by") == "probability_threshold") + ) + | ( + (pl.col("chosen_cutoff") < pl.col("upper_bound")) + & (pl.col("stratified_by") == "ppcr") + ) + ) + .then(pl.lit("predicted_negatives")) + .otherwise(pl.lit("predicted_positives")) + .cast(pl.Enum(["predicted_negatives", "predicted_positives"])) + .alias("prediction_label") + ) + .with_columns( + ( + pl.when( + (pl.col("prediction_label") == pl.lit("predicted_positives")) + & (pl.col("reals_labels") == pl.lit("real_positives")) + ) + .then(pl.lit("true_positives")) + .when( + (pl.col("prediction_label") == pl.lit("predicted_positives")) + & (pl.col("reals_labels") == pl.lit("real_negatives")) + ) + .then(pl.lit("false_positives")) + .when( + (pl.col("prediction_label") == pl.lit("predicted_negatives")) + & (pl.col("reals_labels") == pl.lit("real_negatives")) + ) + .then(pl.lit("true_negatives")) + .when( + (pl.col("prediction_label") == pl.lit("predicted_negatives")) + & (pl.col("reals_labels") == pl.lit("real_positives")) + ) + .then(pl.lit("false_negatives")) + .when( + (pl.col("prediction_label") == pl.lit("predicted_negatives")) + & (pl.col("reals_labels") == pl.lit("real_competing")) + & (pl.col("competing_assumption") == pl.lit("adjusted_as_negative")) + ) + .then(pl.lit("true_negatives")) + .when( + (pl.col("prediction_label") == pl.lit("predicted_positives")) + & (pl.col("reals_labels") == pl.lit("real_competing")) + & (pl.col("competing_assumption") == pl.lit("adjusted_as_negative")) + ) + .then(pl.lit("false_positives")) + .otherwise(pl.lit("excluded")) # or pl.lit(None) if you prefer nulls + .cast( + pl.Enum( + [ + "true_positives", + "false_positives", + "true_negatives", + "false_negatives", + "excluded", + ] + ) + ) + ).alias("classification_outcome") + ) ) return final_adjusted_data_polars @@ -797,6 +1142,27 @@ def _competing_count(df: pl.DataFrame) -> pl.DataFrame: ) +def _aj_estimates_by_cutoff_per_horizon( + df: pl.DataFrame, + horizons: list[float], + breaks: Sequence[float], + stratified_by: Sequence[str], +) -> pl.DataFrame: + return pl.concat( + [ + df.filter(pl.col("fixed_time_horizon") == h) + .group_by("strata") + .map_groups( + lambda group: extract_aj_estimate_by_cutoffs( + group, [h], breaks, stratified_by, full_event_table=False + ) + ) + for h in horizons + ], + how="vertical", + ) + + def _aj_estimates_per_horizon( df: pl.DataFrame, horizons: list[float], full_event_table: bool ) -> pl.DataFrame: @@ -822,92 +1188,195 @@ def _aj_adjusted_events( censoring: str, competing: str, horizons: list[float], + stratified_by: Sequence[str], full_event_table: bool = False, risk_set_scope: str = "within_stratum", ) -> pl.DataFrame: + print("reference_group_data") + print(reference_group_data) + + strata_enum_dtype = reference_group_data.schema["strata"] + + # Special-case: adjusted censoring + competing adjusted_as_negative supports pooled_by_cutoff if censoring == "adjusted" and competing == "adjusted_as_negative": if risk_set_scope == "within_stratum": - return reference_group_data.group_by("strata").map_groups( - lambda group: extract_aj_estimate_for_strata( - group, horizons, full_event_table + print("reference_group_data", reference_group_data) + + adjusted = ( + reference_group_data.group_by("strata") + .map_groups( + lambda group: extract_aj_estimate_for_strata( + group, horizons, full_event_table + ) ) + .join(pl.DataFrame({"chosen_cutoff": breaks}), how="cross") + ) + # preserve the original enum dtype for 'strata' coming from reference_group_data + + adjusted = adjusted.with_columns( + [ + pl.col("strata").cast(strata_enum_dtype), + pl.lit(risk_set_scope) + .cast(pl.Enum(["within_stratum", "pooled_by_cutoff"])) + .alias("risk_set_scope"), + ] ) + return adjusted + elif risk_set_scope == "pooled_by_cutoff": - pass + print("reference_group_data", reference_group_data) - if censoring == "excluded" and competing == "adjusted_as_negative": - non_censored = exploded.filter( - (pl.col("times") > pl.col("fixed_time_horizon")) | (pl.col("reals") > 0) + adjusted = extract_aj_estimate_by_cutoffs( + reference_group_data, horizons, breaks, stratified_by, full_event_table + ) + adjusted = adjusted.with_columns( + pl.lit(risk_set_scope) + .cast(pl.Enum(["within_stratum", "pooled_by_cutoff"])) + .alias("risk_set_scope") + ) + return adjusted + + # Special-case: both excluded (faster branch in original) + if censoring == "excluded" and competing == "excluded": + non_censored_non_competing = exploded.filter( + (pl.col("times") > pl.col("fixed_time_horizon")) | (pl.col("reals") == 1) + ) + + adjusted = _aj_estimates_per_horizon( + non_censored_non_competing, horizons, full_event_table ) - return _aj_estimates_per_horizon(non_censored, horizons, full_event_table) - if censoring == "adjusted" and competing == "adjusted_as_censored": - adjusted = reference_group_data.with_columns( + adjusted = adjusted.with_columns( + [ + pl.col("strata").cast(strata_enum_dtype), + pl.lit(risk_set_scope) + .cast(pl.Enum(["within_stratum", "pooled_by_cutoff"])) + .alias("risk_set_scope"), + ] + ).join(pl.DataFrame({"chosen_cutoff": breaks}), how="cross") + + return adjusted + + # Special-case: competing excluded (handled by filtering out competing events) + if competing == "excluded": + print("running for censoring adjusted and competing excluded") + + # Use exploded to apply filters that depend on fixed_time_horizon consistently + non_competing = exploded.filter( + (pl.col("times") > pl.col("fixed_time_horizon")) | (pl.col("reals") != 2) + ).with_columns( pl.when(pl.col("reals") == 2) .then(pl.lit(0)) .otherwise(pl.col("reals")) .alias("reals") ) - return adjusted.group_by("strata").map_groups( - lambda group: extract_aj_estimate_for_strata( - group, horizons, full_event_table + + print("non_competing data", non_competing) + + if risk_set_scope == "within_stratum": + adjusted = ( + _aj_estimates_per_horizon(non_competing, horizons, full_event_table) + # .select(pl.exclude("real_competing_est")) + .join(pl.DataFrame({"chosen_cutoff": breaks}), how="cross") ) + + elif risk_set_scope == "pooled_by_cutoff": + adjusted = extract_aj_estimate_by_cutoffs( + non_competing, horizons, breaks, stratified_by, full_event_table + ) + + print("adjusted after join cutoffs", adjusted) + + adjusted = adjusted.with_columns( + [ + pl.col("strata").cast(strata_enum_dtype), + pl.lit(risk_set_scope) + .cast(pl.Enum(["within_stratum", "pooled_by_cutoff"])) + .alias("risk_set_scope"), + ] ) + return adjusted - if censoring == "excluded" and competing == "adjusted_as_censored": - non_censored = exploded.filter( + # For remaining cases, determine base dataframe depending on censoring rule: + # - "adjusted": use the full reference_group_data (events censored at horizon are kept/adjusted) + # - "excluded": remove administratively censored observations (use exploded with filter) + base_df = ( + exploded.filter( (pl.col("times") > pl.col("fixed_time_horizon")) | (pl.col("reals") > 0) - ).with_columns( + ) + if censoring == "excluded" + else reference_group_data + ) + + # Apply competing-event transformation if required + if competing == "adjusted_as_censored": + base_df = base_df.with_columns( pl.when(pl.col("reals") == 2) .then(pl.lit(0)) .otherwise(pl.col("reals")) .alias("reals") ) - return _aj_estimates_per_horizon(non_censored, horizons, full_event_table) - - if censoring == "adjusted" and competing == "adjusted_as_composite": - adjusted = reference_group_data.with_columns( + elif competing == "adjusted_as_composite": + base_df = base_df.with_columns( pl.when(pl.col("reals") == 2) .then(pl.lit(1)) .otherwise(pl.col("reals")) .alias("reals") ) - return adjusted.group_by("strata").map_groups( - lambda group: extract_aj_estimate_for_strata( - group, horizons, full_event_table + # competing == "adjusted_as_negative": keep reals as-is (no transform) + + # Finally choose aggregation strategy: per-stratum or horizon-wise + if censoring == "excluded": + # For excluded censoring we always evaluate per-horizon on the filtered (exploded) dataset + + if risk_set_scope == "within_stratum": + adjusted = _aj_estimates_per_horizon(base_df, horizons, full_event_table) + + adjusted = adjusted.join( + pl.DataFrame({"chosen_cutoff": breaks}), how="cross" ) - ) - if censoring == "excluded" and competing == "adjusted_as_composite": - non_censored = exploded.filter( - (pl.col("times") > pl.col("fixed_time_horizon")) | (pl.col("reals") > 0) - ).with_columns( - pl.when(pl.col("reals") == 2) - .then(pl.lit(1)) - .otherwise(pl.col("reals")) - .alias("reals") - ) - return _aj_estimates_per_horizon(non_censored, horizons, full_event_table) + print("adjusted after join", adjusted) - if censoring == "adjusted" and competing == "excluded": - non_competing = exploded.filter( - (pl.col("times") > pl.col("fixed_time_horizon")) | (pl.col("reals") != 2) - ).with_columns( - pl.when(pl.col("reals") == 2) - .then(pl.lit(0)) - .otherwise(pl.col("reals")) - .alias("reals") + elif risk_set_scope == "pooled_by_cutoff": + adjusted = _aj_estimates_by_cutoff_per_horizon( + base_df, horizons, breaks, stratified_by + ) + + adjusted = adjusted.with_columns( + pl.lit(risk_set_scope) + .cast(pl.Enum(["within_stratum", "pooled_by_cutoff"])) + .alias("risk_set_scope") ) - return _aj_estimates_per_horizon( - non_competing, horizons, full_event_table - ).select(pl.exclude("real_competing_est")) - # censoring == "excluded" and competing == "excluded" - non_censored_non_competing = exploded.filter( - (pl.col("times") > pl.col("fixed_time_horizon")) | (pl.col("reals") == 1) - ) + return adjusted.with_columns(pl.col("strata").cast(strata_enum_dtype)) + else: + # For adjusted censoring we aggregate within strata + + if risk_set_scope == "within_stratum": + adjusted = ( + base_df.group_by("strata") + .map_groups( + lambda group: extract_aj_estimate_for_strata( + group, horizons, full_event_table + ) + ) + .join(pl.DataFrame({"chosen_cutoff": breaks}), how="cross") + ) + + elif risk_set_scope == "pooled_by_cutoff": + adjusted = extract_aj_estimate_by_cutoffs( + base_df, horizons, breaks, stratified_by, full_event_table + ) + + adjusted = adjusted.with_columns( + [ + pl.col("strata").cast(strata_enum_dtype), + pl.lit(risk_set_scope) + .cast(pl.Enum(["within_stratum", "pooled_by_cutoff"])) + .alias("risk_set_scope"), + ] + ) - return _aj_estimates_per_horizon( - non_censored_non_competing, horizons, full_event_table - ).drop("real_competing_est") + return adjusted From 3da997bc11fc1b6d53245715216a3d2149925e1b Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Sat, 1 Nov 2025 19:28:27 +0200 Subject: [PATCH 41/51] feat: close #98 --- .../helpers/sandbox_observable_helpers.py | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/src/rtichoke/helpers/sandbox_observable_helpers.py b/src/rtichoke/helpers/sandbox_observable_helpers.py index 39a07c2..e2334ac 100644 --- a/src/rtichoke/helpers/sandbox_observable_helpers.py +++ b/src/rtichoke/helpers/sandbox_observable_helpers.py @@ -1380,3 +1380,64 @@ def _aj_adjusted_events( ) return adjusted + + +def calculate_cumulative_aj_data(aj_data: pl.DataFrame) -> pl.DataFrame: + cumulative_aj_data = ( + aj_data.filter(pl.col("risk_set_scope") == "pooled_by_cutoff") + .group_by( + [ + "reference_group", + "fixed_time_horizon", + "censoring_assumption", + "competing_assumption", + "stratified_by", + "chosen_cutoff", + "classification_outcome", + ] + ) + .agg([pl.col("reals_estimate").sum()]) + .pivot(on="classification_outcome", values="reals_estimate") + .with_columns( + (pl.col("true_positives") + pl.col("false_positives")).alias( + "predicted_positives" + ), + (pl.col("true_negatives") + pl.col("false_negatives")).alias( + "predicted_negatives" + ), + (pl.col("true_positives") + pl.col("false_negatives")).alias( + "real_positives" + ), + (pl.col("false_positives") + pl.col("true_negatives")).alias( + "real_negatives" + ), + ( + pl.col("true_positives") + + pl.col("true_negatives") + + pl.col("false_positives") + + pl.col("false_negatives") + ).alias("n"), + ) + .with_columns( + (pl.col("true_positives") + pl.col("false_positives")).alias( + "predicted_positives" + ), + (pl.col("true_negatives") + pl.col("false_negatives")).alias( + "predicted_negatives" + ), + (pl.col("true_positives") + pl.col("false_negatives")).alias( + "real_positives" + ), + (pl.col("false_positives") + pl.col("true_negatives")).alias( + "real_negatives" + ), + ( + pl.col("true_positives") + + pl.col("true_negatives") + + pl.col("false_positives") + + pl.col("false_negatives") + ).alias("n"), + ) + ) + + return cumulative_aj_data From 94b633a56b19eb46c321d55af5a9ed16f2960293 Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Sun, 2 Nov 2025 04:30:19 +0200 Subject: [PATCH 42/51] feat: close #99 --- .../helpers/sandbox_observable_helpers.py | 32 ++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/src/rtichoke/helpers/sandbox_observable_helpers.py b/src/rtichoke/helpers/sandbox_observable_helpers.py index e2334ac..87940b5 100644 --- a/src/rtichoke/helpers/sandbox_observable_helpers.py +++ b/src/rtichoke/helpers/sandbox_observable_helpers.py @@ -1382,7 +1382,7 @@ def _aj_adjusted_events( return adjusted -def calculate_cumulative_aj_data(aj_data: pl.DataFrame) -> pl.DataFrame: +def _calculate_cumulative_aj_data(aj_data: pl.DataFrame) -> pl.DataFrame: cumulative_aj_data = ( aj_data.filter(pl.col("risk_set_scope") == "pooled_by_cutoff") .group_by( @@ -1441,3 +1441,33 @@ def calculate_cumulative_aj_data(aj_data: pl.DataFrame) -> pl.DataFrame: ) return cumulative_aj_data + + +def _turn_cumulative_aj_to_performance_data( + cumulative_aj_data: pl.DataFrame, +) -> pl.DataFrame: + performance_data = cumulative_aj_data.with_columns( + (pl.col("true_positives") / pl.col("real_positives")).alias("sensitivity"), + (pl.col("true_negatives") / pl.col("real_negatives")).alias("specificity"), + (pl.col("true_positives") / pl.col("predicted_positives")).alias("ppv"), + (pl.col("true_negatives") / pl.col("predicted_negatives")).alias("npv"), + ( + (pl.col("true_positives") / pl.col("real_positives")) + / (pl.col("real_positives") / pl.col("n")) + ).alias("lift"), + pl.when(pl.col("stratified_by") == "probability_threshold") + .then( + (pl.col("true_positives") / pl.col("n")) + - (pl.col("false_positives") / pl.col("n")) + * pl.col("chosen_cutoff") + / (1 - pl.col("chosen_cutoff")) + ) + .otherwise(None) + .alias("net_benefit"), + pl.when(pl.col("stratified_by") == "probability_threshold") + .then(pl.col("predicted_positives") / pl.col("n")) + .otherwise(pl.col("chosen_cutoff")) + .alias("ppcr"), + ) + + return performance_data From 8a756c5756d4cab49ef87dc706d6ebfc666dbb35 Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Sun, 2 Nov 2025 06:15:22 +0200 Subject: [PATCH 43/51] refactor: close #108 --- .../helpers/sandbox_observable_helpers.py | 93 ++++++++++--------- 1 file changed, 48 insertions(+), 45 deletions(-) diff --git a/src/rtichoke/helpers/sandbox_observable_helpers.py b/src/rtichoke/helpers/sandbox_observable_helpers.py index 87940b5..487bebc 100644 --- a/src/rtichoke/helpers/sandbox_observable_helpers.py +++ b/src/rtichoke/helpers/sandbox_observable_helpers.py @@ -265,6 +265,7 @@ def create_breaks_values(probs_vec, stratified_by, by): def create_aj_data_combinations( reference_groups: Sequence[str], + heuristics_sets: list[Dict], fixed_time_horizons: Sequence[float], stratified_by: Sequence[str], by: float, @@ -308,22 +309,28 @@ def create_aj_data_combinations( "real_censored", ] - censoring_assumptions_labels = ["excluded", "adjusted"] + print("heuristics_sets", pl.DataFrame(heuristics_sets)) - competing_assumptions_labels = [ - "excluded", - "adjusted_as_negative", - "adjusted_as_censored", - "adjusted_as_composite", - ] + heuristics_combinations = pl.DataFrame(heuristics_sets) + + censoring_heuristics_enum = pl.Enum( + heuristics_combinations["censoring_heuristic"].unique(maintain_order=True) + ) + competing_heuristics_enum = pl.Enum( + heuristics_combinations["competing_heuristic"].unique(maintain_order=True) + ) combinations_frames: list[pl.DataFrame] = [ _enum_dataframe("reference_group", reference_groups), pl.DataFrame( {"fixed_time_horizon": pl.Series(fixed_time_horizons, dtype=pl.Float64)} ), - _enum_dataframe("censoring_assumption", censoring_assumptions_labels), - _enum_dataframe("competing_assumption", competing_assumptions_labels), + heuristics_combinations.with_columns( + [ + pl.col("censoring_heuristic").cast(censoring_heuristics_enum), + pl.col("competing_heuristic").cast(competing_heuristics_enum), + ] + ), strata_combinations, risk_set_scope_combinations, _enum_dataframe("reals_labels", reals_labels), @@ -407,7 +414,7 @@ def create_aj_data( risk_set_scope: Sequence[str] = "within_stratum", ): """ - Create AJ estimates per strata based on censoring and competing assumptions. + Create AJ estimates per strata based on censoring and competing heuristicss. """ print("stratified_by", stratified_by) print("Creating aj data") @@ -475,8 +482,8 @@ def aj_estimates_with_cross(df, extra_cols): return aj_estimates_with_cross( result, { - "censoring_assumption": censoring_heuristic, - "competing_assumption": competing_heuristic, + "censoring_heuristic": censoring_heuristic, + "competing_heuristic": competing_heuristic, }, ).select( [ @@ -488,8 +495,8 @@ def aj_estimates_with_cross(df, extra_cols): "real_positives_est", "real_competing_est", "real_censored_est", - "censoring_assumption", - "competing_assumption", + "censoring_heuristic", + "competing_heuristic", "estimate_origin", "risk_set_scope", ] @@ -888,10 +895,10 @@ def ensure_no_categorical(df: pd.DataFrame) -> pd.DataFrame: return df -def extract_aj_estimate_by_assumptions( +def extract_aj_estimate_by_heuristics( df: pl.DataFrame, breaks: Sequence[float], - assumptions_sets: list[dict], + heuristics_sets: list[dict], fixed_time_horizons: list[float], stratified_by: Sequence[str], risk_set_scope: str = "within_stratum", @@ -900,9 +907,9 @@ def extract_aj_estimate_by_assumptions( print("stratified_by", stratified_by) - for assumption in assumptions_sets: - censoring = assumption["censoring_assumption"] - competing = assumption["competing_assumption"] + for heuristic in heuristics_sets: + censoring = heuristic["censoring_heuristic"] + competing = heuristic["competing_heuristic"] print("stratified_by", stratified_by) @@ -921,8 +928,8 @@ def extract_aj_estimate_by_assumptions( risk_set_scope=risk_set_scope, ).with_columns( [ - pl.lit(censoring).alias("censoring_assumption"), - pl.lit(competing).alias("competing_assumption"), + pl.lit(censoring).alias("censoring_heuristic"), + pl.lit(competing).alias("competing_heuristic"), ] ) @@ -939,8 +946,8 @@ def extract_aj_estimate_by_assumptions( "strata", "chosen_cutoff", "fixed_time_horizon", - "censoring_assumption", - "competing_assumption", + "censoring_heuristic", + "competing_heuristic", "risk_set_scope", ], variable_name="reals_labels", @@ -954,7 +961,7 @@ def extract_aj_estimate_by_assumptions( def create_adjusted_data( list_data_to_adjust_polars: dict[str, pl.DataFrame], - assumptions_sets: list[dict[str, str]], + heuristics_sets: list[dict[str, str]], fixed_time_horizons: list[float], breaks: Sequence[float], stratified_by: Sequence[str], @@ -965,17 +972,13 @@ def create_adjusted_data( reference_groups = list(list_data_to_adjust_polars.keys()) reference_group_enum = pl.Enum(reference_groups) - censoring_assumption_labels = ["excluded", "adjusted"] - censoring_assumption_enum = pl.Enum(censoring_assumption_labels) - - competing_assumption_labels = [ - "excluded", - "adjusted_as_negative", - "adjusted_as_censored", - "adjusted_as_composite", - ] - - competing_assumption_enum = pl.Enum(competing_assumption_labels) + heuristics_df = pl.DataFrame(heuristics_sets) + censoring_heuristic_enum = pl.Enum( + heuristics_df["censoring_heuristic"].unique(maintain_order=True) + ) + competing_heuristic_enum = pl.Enum( + heuristics_df["competing_heuristic"].unique(maintain_order=True) + ) for reference_group, df in list_data_to_adjust_polars.items(): input_df = df.select( @@ -984,10 +987,10 @@ def create_adjusted_data( print("stratified_by", stratified_by) - aj_result = extract_aj_estimate_by_assumptions( + aj_result = extract_aj_estimate_by_heuristics( input_df, breaks, - assumptions_sets=assumptions_sets, + heuristics_sets=heuristics_sets, fixed_time_horizons=fixed_time_horizons, stratified_by=stratified_by, risk_set_scope=risk_set_scope, @@ -1020,8 +1023,8 @@ def create_adjusted_data( .with_columns( [ pl.col("reals_labels").str.replace(r"_est$", "").cast(reals_enum_dtype), - pl.col("censoring_assumption").cast(censoring_assumption_enum), - pl.col("competing_assumption").cast(competing_assumption_enum), + pl.col("censoring_heuristic").cast(censoring_heuristic_enum), + pl.col("competing_heuristic").cast(competing_heuristic_enum), ] ) ) @@ -1041,8 +1044,8 @@ def cast_and_join_adjusted_data(aj_data_combinations, aj_estimates_data): on=[ "strata", "fixed_time_horizon", - "censoring_assumption", - "competing_assumption", + "censoring_heuristic", + "competing_heuristic", "reals_labels", "reference_group", "chosen_cutoff", @@ -1091,13 +1094,13 @@ def cast_and_join_adjusted_data(aj_data_combinations, aj_estimates_data): .when( (pl.col("prediction_label") == pl.lit("predicted_negatives")) & (pl.col("reals_labels") == pl.lit("real_competing")) - & (pl.col("competing_assumption") == pl.lit("adjusted_as_negative")) + & (pl.col("competing_heuristic") == pl.lit("adjusted_as_negative")) ) .then(pl.lit("true_negatives")) .when( (pl.col("prediction_label") == pl.lit("predicted_positives")) & (pl.col("reals_labels") == pl.lit("real_competing")) - & (pl.col("competing_assumption") == pl.lit("adjusted_as_negative")) + & (pl.col("competing_heuristic") == pl.lit("adjusted_as_negative")) ) .then(pl.lit("false_positives")) .otherwise(pl.lit("excluded")) # or pl.lit(None) if you prefer nulls @@ -1389,8 +1392,8 @@ def _calculate_cumulative_aj_data(aj_data: pl.DataFrame) -> pl.DataFrame: [ "reference_group", "fixed_time_horizon", - "censoring_assumption", - "competing_assumption", + "censoring_heuristic", + "competing_heuristic", "stratified_by", "chosen_cutoff", "classification_outcome", From 1efd0ec80d67cc3413c1894a33033cdf50fa63fa Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Sun, 2 Nov 2025 14:07:33 +0200 Subject: [PATCH 44/51] fix: close #165 --- .../helpers/sandbox_observable_helpers.py | 33 +++++++++---------- 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/src/rtichoke/helpers/sandbox_observable_helpers.py b/src/rtichoke/helpers/sandbox_observable_helpers.py index 487bebc..730712f 100644 --- a/src/rtichoke/helpers/sandbox_observable_helpers.py +++ b/src/rtichoke/helpers/sandbox_observable_helpers.py @@ -581,19 +581,13 @@ def extract_aj_estimate_by_cutoffs( # n = data_to_adjust.height counts_per_strata = ( - data_to_adjust.group_by(["strata", "stratified_by", "upper_bound"]) + data_to_adjust.group_by( + ["strata", "stratified_by", "upper_bound", "lower_bound"] + ) .len(name="strata_count") .with_columns(pl.col("strata_count").cast(pl.Float64)) ) - print("counts per strata") - print(counts_per_strata) - - print("data_to_adjust") - print(data_to_adjust) - - # TODO: iterate over predicted-positives / negatives - aj_estimates_predicted_positives = pl.DataFrame() aj_estimates_predicted_negatives = pl.DataFrame() @@ -608,12 +602,12 @@ def extract_aj_estimate_by_cutoffs( ) elif stratification_criteria == "ppcr": - mask_predicted_positives = (pl.col("upper_bound") <= chosen_cutoff) & ( - pl.col("stratified_by") == "ppcr" - ) - mask_predicted_negatives = (pl.col("upper_bound") > chosen_cutoff) & ( - pl.col("stratified_by") == "ppcr" - ) + mask_predicted_positives = ( + pl.col("lower_bound") > 1 - chosen_cutoff + ) & (pl.col("stratified_by") == "ppcr") + mask_predicted_negatives = ( + pl.col("lower_bound") <= 1 - chosen_cutoff + ) & (pl.col("stratified_by") == "ppcr") predicted_positives = data_to_adjust.filter(mask_predicted_positives) predicted_negatives = data_to_adjust.filter(mask_predicted_negatives) @@ -853,7 +847,10 @@ def create_list_data_to_adjust( .with_columns(pl.col("strata").cast(strata_enum_dtype)) .join( aj_data_combinations.select( - pl.col("strata"), pl.col("stratified_by"), pl.col("upper_bound") + pl.col("strata"), + pl.col("stratified_by"), + pl.col("upper_bound"), + pl.col("lower_bound"), ).unique(), how="left", on=["strata", "stratified_by"], @@ -982,7 +979,7 @@ def create_adjusted_data( for reference_group, df in list_data_to_adjust_polars.items(): input_df = df.select( - ["strata", "reals", "times", "upper_bound", "stratified_by"] + ["strata", "reals", "times", "upper_bound", "lower_bound", "stratified_by"] ) print("stratified_by", stratified_by) @@ -1060,7 +1057,7 @@ def cast_and_join_adjusted_data(aj_data_combinations, aj_estimates_data): & (pl.col("stratified_by") == "probability_threshold") ) | ( - (pl.col("chosen_cutoff") < pl.col("upper_bound")) + ((1 - pl.col("chosen_cutoff")) >= pl.col("mid_point")) & (pl.col("stratified_by") == "ppcr") ) ) From b36f395ca2fbcaa4bbe89fb6643b381451a02b79 Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Sun, 2 Nov 2025 16:31:09 +0200 Subject: [PATCH 45/51] docs: close #97 --- docs/_brand.yml | 6 +++++- docs/fonts/Fraunces9pt-Light.woff2 | Bin 0 -> 27876 bytes 2 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 docs/fonts/Fraunces9pt-Light.woff2 diff --git a/docs/_brand.yml b/docs/_brand.yml index 5c057de..f9f51aa 100644 --- a/docs/_brand.yml +++ b/docs/_brand.yml @@ -17,6 +17,10 @@ color: typography: fonts: + - source: file + family: Fraunces9pt-Light + files: + - fonts/Fraunces9pt-Light.woff2 - family: Fraunces source: google weight: [400, 700] @@ -31,7 +35,7 @@ typography: weight: 400 line-height: 1.5 headings: - family: Fraunces + family: Fraunces9pt-Light weight: 700 style: normal line-height: 1.2 diff --git a/docs/fonts/Fraunces9pt-Light.woff2 b/docs/fonts/Fraunces9pt-Light.woff2 new file mode 100644 index 0000000000000000000000000000000000000000..6e75fb983ec48b1064c0788e8d72a944ab82b052 GIT binary patch literal 27876 zcmV)6K*+y$Pew8T0RR910Bqy{4gdfE0Q;-}0Bnc=0RR9100000000000000000000 z0000Rl4u-)egzP|7V&)tbA49eWYCgNT}4TPNd~32Yny^M~-V|NlQP*%-qB^A1q8TKl&40y1`Ea_VH+Efqf;ADOi)3L#)!nP(vQ-A zdHQd^o_AOE={xfgmJg}0jRGLW=vep&A0Gv*qvBAqBYtxgotqJ6I}jxd>bW(gYD_%H z6XA7)6zwcUF!tL!;^ghTpP6cUAFX#;K+jc^G2FU$*DuQ}@iAuVvRDX(mQ z7R87G)KB*PX=!yqQpYP{#bBZVJd>k0v&WxvWND}ycHtL~=E|g;oe5rOa|D|M2317i z>i$o?tx$lhTUptn3bzytPkf7(Zf>XSeQ3?~A`CtX2Szz80iC$}USPo~QV3$qO+1?Y z{lpCMkfFh zHw}ygJ%(X4#v?S&01Z;B8N+DZdGErz%trez+$g^wnaehu^s@NE4dR8UPSm^E>G;}- zK3Ejio#Z;bHWFK`hd`V7zozMw3*=Uv0tn0MzkrH5k&d$fI0~$i^HbsgYBczBe%ULq zWZ?YC_Zd{c83KSqq3IJJ(y73zuZZ&yV8Cn)2X@4Cm-gg{$W5Im;1a{RCMOS6I9o&e z+-+YtH(WZSRzelFu{j@H@u!yQZ@+$vj)b$&=xHKGF*PGp`CR zx-|@>s5;^G#cK~~B(8`a+PZ7B-lmdjZ6L#IabSx?S<2lXW?spM0U@SEncmwHNG>D? z?!GqWM`o`jSW3@S?bNd+Mo}>dfS)?)&?`Hyy1c$QdD9RR*Q3hAs&u!!)~~e>nMot9 zy|*1Ud~BNIx&NSKmZ*^TIkP(VBleg8`YsSUh?@5fTlYhYPvtbDmMg8`6b81lSccdw zJ?@ReUGd|xMi~LZt{m5e5hemoB2BV0O(u0+3K>E!RIf`XCC8xDbaANZqZJ_7)dKy0 zad2WxVGQ^kU{vc!geK7nQ|=) zMS<(Z!1S%IeeIp7W7B+Sbvq*pU=CQyM6`nake?{UqlKFQ#VtKA1Oti)t}Ya0;_FPg zomxz%ONK@hDYTm0Zm=g(7)WL)g^^^Y!u+4EpQ1yYj8xVFk=O_clEiMLvp{fmP%lw> zh}KI?dw7U*c?)t^GZE>)cb{#}v=Lm-_^w zPn$-t)oq1rTRVwywx>e}_D~$yZe75A%fT}U5C4}dZH>cuB=twC)#*RiMAbh?+=d7fO7LZ#DN z1r}33DJ>|Uq6Mx74Ay_&rg0#Dgb_8_yTmENK{}=!r1jHi1DM8b_BNCLJ@wE zGnd(dFoa-_kqx56Fsi|_IVh;(p9sap;|xVn4zRy$ZIsJyqK~dE!kCr^OTnG0{C&xJ z@u3siq?EJ?EF3(7d_)Cm)v4FOrcsk-Em}hkM$Ma6ND~;>7#spp9uzbTvoaP|?DrI6g5z+OPQdGyvT3 z?b{ES)x><$2jK8c0POgtcV9{dU`@#Xi0HB#41nr08VI;=$d~fp+;{-{W`f%V0|Zc- zcu~2#@#q#59A8EzOcgQ8Q=vneVh!jxc$SmTF!QYQY`(XVQqDLLZ~-_LE(({RB;~0= z9U9T}JgKy&bViLjZH7$Qa^xNqBv|keqsBr#>10#DR=IE1ve>ZW%%uk(EKsAt9hO;X zwY4_c>TBQCrRt4Fx{+&?8ZQlgL)`uDulYyof8cVO1-q$ex242*j#EE?Hx5|zEKi@sax2MAR&UuG%9i3=L z0|=-l^QZa$HS=*mJP~p6*>Oi)Z`>Z|BPnLSRbIT9|6J3rC+EKL3pp~Vf-zu2c32|6L}!4dRr*!I=)@yD!BceQ z8G6u*KJ;S%!x+Z|;(3Gjn9K~F>|SSSi=1@m6~Ui8Wx6bZPZ=+a z!P2nNR9Ir4ozgv*%GZ<+!tLdvg})+aRJpA|qjt4w1iMhZSB^WXRhvB#izp-!<)}u@ z2>lEZrc9XICm%rp8UP&)6`eBGy9?=>QkCkw+kYPxEm-!;443+G$}n5W!|bEeHs2jx==PtRby=2sg3(lG>-A*d{YWeRSKWuA)&P8o(Q z#QwSLMukjqs3Y%>3!WCEs{|dz>Fu)qt{U!+?5^2fih`bkd*)Ry)Rd>H91RsID?>+B z-bC=BdR=;IL#_j_wzZG}B|r*M{pYKw5b#=YIgKJ~eG>sU|3C^2G2tFPX= zqegFlK`8qZa3Dd)^Rp*ec1b$YS}sL7RwR!Sgp^GjjHDcjP^6(*<10^~s!)}QbeE_n zywPsz>xvzvDk%-2muk!Rx>q`@_O=?7I%)bx<}bEYlGaFBCv5}E3OSp&zTx?f?*{~* zwIGfK+LbiRL>)?ub>eVIut>zYl6aTweo^X*WQd_8rqWo9vFW30=qy{&nmVVk^P0P0 zx?+u$Xs%RCW!fs&T7~v1y{}Gptv=W6V-0W}hK7Zd2S-jp#LQ|?h8K#JN|P&3wjwVx z6CnVB&eKBx*!KV^ei3kA0%qR^to#PR%+CO{z77Dkrs1G|&~pkFCJEYb@%xax>=082 zu=8BdDtvEOD%jm&WhsMgA`=@JL1e<1y*!MpsE7pGMKwlJaF*-BUXut|xx$9$lULqw zTN}O1s0?vG^gane{r2Z@r4Yi1MDImI!pFIv|2Oo!&4)U@9(z+Q@zPar|exch!`8Y}^&PG2brmHVBUyTZ;AqvIq&TuQ~<&~y%OA_x{9otYrTt5HY9NS#X z4~29ciN$iqwn!-`N(FM`6YVn&+xo$wW;ZHE9a-vsXkuFskZiN4j^|16(=r#Ajg8yJS*w3&HAC7h{V>Ph zoPOGL5~8@?1tHH-)0w6az-ncD`IfOMsa(E)T99@Kc>dB(aODC_kEBCKj;zALt<`K{ za|w9Z#WU13H#2|d7boNBf{4cu9nAG25oHSg+8YD3V*$M}&P(GZzRLG;>2Ix`5(5E` zgYJn{iHVEO2m>V>q~^D#MJLJ0tf>cC;GXWwqmrc-la*Bslpc!#qI)x$$|)X}u`v^) zGdWYEkq2Mc=+1d#oJkFqQVyiD%r_zhjQ3Fed9v9Oko;oAN`%ZyZ;MIH)m)N(ebX0FRkSrD2 zZ^0SQ8GF{C%c7=a+ z%2?O4>Ws#huF)48Xc}k6G#Y5vs6Sq!s}~oa=<*cnBx2cK!hYPP$XAJpXMFcz`5MJ@ z56(*pi2*}Ci$9g9gLIXcAi-6o671;zPky!7O;@0M%iIPtZ_Z{* zCPlA~r6kc4{+$_?Y2z?}EJdiWO#B0v>qV6LdY-aZedqDou;Q;Xj>z1r0* zqb!oel-Cmb9eT?7Vp%We7*oC-Cy{3oM|^KL&g+jem3Xb`8b72wmrAT8c_p0Y+7bR& zRmt}^P}d&k0={SW>isVbu($DUF!JKl1);wtZ|6bMAAEV=qu(AC8<%eR7+uL+g3KdL z;}i`?wMN91Ad8BH5VWdf%$clAndFo4PhQN}u!NvkH);3w>)V64*Eu}Mmce=Bp8Iw% zI6}d_Q{X_AwK7czJXd}L-jqPut$Os6&z95#0}Y<-f$6;oD#A$tS##`)2iOVcs=`^K zDrG@Di_&!P4RW~_pHkhMlFLXTK`aA2G07+UY$Zm-$YSd?H&3Jl?TlshX@ddE#Q| zuf9A5OPzbTjT>JZZ7r3n_8ipGTYLEIw>w(ZUW!e4-{3&niqu(`lsPe6CZ>d$pHE6) z?P;)S4h+;V?N7f7u-xsu4gt~U!nGsMVgL5VR~vwTWQymGt{;(J02t)WD|o^M*Sb2p zOZB$I!}`|E9yQu7yC5vgn*HKDE`_D%UZ!x=OF0qj45})DphA!zuexZE>uLmv%rS$`Iv35!Vo<^ie2`QcdK6v_Zm&~Bn zo)oR6kwjgRh>hrH&8inDOO)HS_kZZmgJL*37N4CrJdI{T#T#kaZ14DMu-MYJ;`ZtR zPlMq_#=>RS`$AcY$Qt9cHve8>>!zefA7Gpt(Ei6$@#eNm%0mx_rd=Nmb`i&3&**ds zD__=vSEvNW%!D4_w$rQUnV9U8!l&o`%kS}Es>bZ>PK#!M^fU=ZdE@3Oki`W%Yq4lg zA*CWoRumCccw`d}>|J+LZKmKuU(TY5k+t)$bz53b?OEk9&E+hzt`|=&Tp^kCy+pKT z1hIfVk8H(N^TXpcbHH^-k+lBT*4cB6-mLbi|2xZ)Qi3a)LKh43UQhh0)>vh*X}Ubp zzJ8r`$7DA;&n&pYMb#g)xnrqRrhz3^v^5Fv|rOFWP!g6)H zcDy)cOnGDP{EJ0HcQsbYoEl&s`*`}Pk8_G9q=z-do=}{S|CCF_wzb$kq)in(v*oul zph*_h0?#s+P^Kbt#$1a$vHT?T*pO{f%VmvEOBld$5Q)GkKZ1k-KI{=w6SQmXqnfm# zTmYzV2kji(<~V?#OCozcivL$5lqE)ed!~e%&;D0lu7lahgb?;>O8#vs%*6SSeO>p{ zG4phcH6uf!D!%llA;rPRUI#im;(XC{*0S#=;<5eU9dwJ(zEp+Y4NU0DBX(XY(zngp z`8MS^@Vg9Qe2MJ;Ok)Ays?-LQenPKNn_gxXJr>U|o(?F31RsuXu#w1Y6!Z@KA7;N- zm@Tpl2YJ@VK?Rbso0>4A0Ze#@H-E(a{40rMk`{>x1#rB-(-0sr(heqMz3W98iRtx} z13Iu9{?->LH{@6+4;dy#Yt)u3XPhy3;oq!uophGF5sH{8Ev0uDO#iUARr)ZL@vJR| z9+)1tJ*8+%u1f|%B%6G#*4b{?g)BWbKa!iih@{50#&AIujV7cb>rw$99%_qqk*O#+ zZY%|79akDY@n~cz`b8kppp{DQ;5r+DbG;$ zXW^F2+wE5onYh}WhFY?4PxV1Yv2KEmlS6$0bZL=tJV&JuSjK!- zNLhTw=h~A#_7qx&G1z-0pd^buLF7*zNCzF*0kX)4o$BvYYF;-a_8VnI#gr^sf$I<= zrF=1Ul-xBsl;9D6-!HD?Urz5q&ap^3kie?aUa&@}jWs_9z>30vA)y#sF_@Q?H8d-= zHt&Q$MKIcES9h@&yP6S}^Q{I^CNrV}@h1D)*EDW!7PpZ2pVQa<<&u)yF~t}a@bVzG zBU7SYDZ<2_Q7{GhSLsb43&XV0rop*$+F^#(c{ z9w7nEp^9)4R#O$gEbB+?boJv@&KfH82hSQP@cF;^bdiUyk~xI^i9bFV zocpuy_uT;+e*1mq7G}I&3ubt*(0OfJSzx-%rmYm032lF0?df5XQ5A?1c0PB=58XGM zy3ZKRGWo`Q29av}tsmcSW(!>C0d4^7;8{;yoc}B$EL%$FWRGX8DVa2r`qiBkb+3E& z*A8^by!k5qwS35aC?UV(Oue`wJ+z~b4c6jutnj#ITUOe2O5UVp--l65U!(@<*IPW7 zBfEPzzK1lfB8K8%;0B-oPC^IOB66}%;rRw{u|6_23n%-xH_apBRYSHAGE~bYt&0a< zx%)$A)=zPC`bHOW0-IURVfDUBF0@cpTp1!ypT5)Dh6Cls#}<(Enz#kg=#1$iz`FZ%O39f=ihx;q z+8?jXh%+cP>}%X3)rq){jfodD7}z(BNRuc*uFDQH9+id+N!6CAg`T$6SeNNO20DE~ zW=H0Ev8*I--@f=MFv)Kj6wH*`B`WmcGVr=~vwlWCJV297j%KPV$GoYn#`PbC#Egk2zB*&(D*o;&&=2rfGBj#cI(nX@#b0Y z=N}5!7SrRKEx-BG8~i=6wtwO!NjddTq)8Au_s!f|>gT3Q>GnmP`t8Pr-~lMtBIVXn z7l+(|BJkJ@sOh3T(h)W*t@Sj8s5!WnMOU?xvLtoLS2Hl{x^b)r99)aCl`K(3O*q*L zFzhrkVT!FY+BaPS{K()`gWaKA2j_fmv#L0BGxAW{WKbg2H+M-PF=Qz%1mToV5x*#s zdMSYv&^T6I$`jORn+|36lKs>~V=mvPL%soaQD`t2`T^-$4mgtPLKl%7w`1EdL-1wi zIo_(!Jc3Yro@VNXbKL5Jrfj=$jPHq1o4;50wV*1Vk2hVSQS+>Mb!L@MOIc>{F0>iRpa<+|`|L;TL8y+^zR*$Jm?%!(TOvN>4EbB=226vM~lU8w|T*C4T z6B8t3cwmanw#w0XsgCqdqNCTMn+|egGy_$tr?R7?E<=jUE+g~%j%~W4NX)~7sRhsj z%VB@)u=b3Gq8G+M9Vte8b8IRRkiVX}A6dS)L5Ja-`|)L>&M#QTiVySFj48ERDZfre zOSd>0XNemXk@afSg>Mdxi|cz&HX@<7Ik(P(iCGc3Vo`Q0Gw3kftRh&BTUzp2`JJbw zAcuw-i*R@3diwHr0di4rANB*ll= za}C9m1sm3pWr>z;O1zq7n{~^wl8T(QqclDKrZ|WuSPUaP7Z8yHC zBG*5CgCtVNlxSjF^a2ZNh>+Y-Qr7@8J^?bd`UvwpYe%Fe8cS>LSrGenmfz~y{?B{I z&Kp}ZTHa6uXP9wZs_%p@JfF0)-^MRahvlv_%^6*AWeQzwweOpRVk*Ph%S(ABDdO&Z zS9l~CJ2&#MOShPbFOc?I3xdbw%{h(<;|<<<1{?0*)u{9Ciz>@^4DzRvvH z>UL>&K4<{4glNROcV7r+9xB4a`l*XZ&Sjpsg-1bszc>2s zt1c$^HuUM&H`?go(2#$&>xGzS3XFFA=@|8aqPIaBvYQzoi5WNL1pnl_MqR9_A04>< zo|O-jSUesE)G)hXq|xh-k>#{uYAiLO(|>Pw5S9(FO0uOp#0=$%GKG)tTrPKgDaMKqvediqn> z6p;4Pis1i~?Ixfp3lFNuN|#Wqi|1uy3ulVfGpLe~(F_g-HW3vYx#^0su5%u&zFWZx zg9MC?zSbm>d>Svv#M$8Mt3Ybp`C9VZUSCdh_h#mDz{7+$4S_*$>STfVddt;>E@W!PgHNe>f>L?XT&Pv( zRu#9a=q~o%mj4-Wmv>r|pqcZfRugqyx|~f4tC$;x#A+F^`#hf!)e#CAmAv6m%l3sK zLn*iHYwfTCwD*M;02A@UPGtF7=dD&4#dr=ec}g2tmqq ze~kYkJzVd&wqZJlkEhkU;v0si_X-8yGiTfm51$D(X4IqdgC1#VScUTMQUg4@M(eyS zQ_y_uMo?Ufqtpu#H>-O-K%a76UQK{O1W(0bw}jG%yCiF5E$jKzIw!Xr$aybZ-#545ExB^l}$=Gng&8B)=x$6?=lX|jfCyz zT-qCzQZ~}{#nwYpor5<|Sr>Re6Li~-Pj9tWU)v6nstF|PzVG!D_hJrjKt3$uJMBF4 zqoPgG5FPEq1Yf=OhMG%pTb@Au7b!=+7~9dhxV0wW#EtHaKi93@@uDNX?!rDdU>X9E zV^}j{Xyj+b(UHk-@1h@nI1nAb{R0Ejg)nty@!8EAv>UJWPGjYO(mTNq`;M>X3pYi3 zue;;@Oqpxp<_DS^#A%`b62nbu+q5W;*_+0{53y}Y%BLk;-PP>$Cxpl_cVqVHu0Hb#3dBj0+uF5Aq<2ZhAfM~?bHvhJ?!(I7l$ZR&RvhA&D= zCVFvdTxBSV9l}ttYdVk!(6tyck?);tLk z86%JLx62EopXh@Lt{8%1ZCZ%@f;niMB2JJ1>~lnYj}oGI_dze#kV5l?g^i1wUgF$R zKwfByR3%Ca{Zf00bv;4<#V8GJU+C|MDwpG2P#))c#(7owZ+`+|pG`8I>a%T<&mJBk zB1hM7Y1h&=J999UJ6|=dM8yJ+P_8l&M>$bTbDc?Wl*~#pA97k*=)+ST;QqZ8?IWOs z|ID;Ulqe_gL;9X*Qe%SaImJbs&Ba?4VYN8cbuxe?y^m<0b#8L=%vyl#yT3g5U4(W< zxT`CGA}BsliLza6;_og)?s4JIVd7^&&J;LofE1@rrvc5-Dr%T~< zuhNiM+yfLX5g?u$TVlhXzk(e9X%nXcexo~YC!gGff6O|BOg?yd;A45m6_N9=w+V6UjMK9 zrZj49?~OCN&wpHb#13x3?vmh7<(zZQjBm?kC%4V+dv5P>MQ*wCRcX@2OTp8mdd3#00V5bS5F=dsiJzqGen|fFAmBStD zw8l@q#lF9>Dy$+pEVO^XbN|&G=ueN4gjIM|U1vWk&5iHGn2uKsFewr0`?QxytNxR*+C&v@7$%t|!!UYX!J0OQ zjlA{;!b0(x3Y25Wa%)_1(YG(zXE5L$vM}8t0LbG^Kox;FN_`OVGRgNMCpDgAb(3J? zIvFORyH6FX&2qbupI`1F!6elfCSep6diTwcio$8S$5aHbR-hnptWZPgPOnKekB0rw zb&DY^8q7o$r3#cI&!puh?e=^b>)EDx=yJyHfB~_8uIr<|OKkE)@m)CL*0K?4KfNd5 zORqbpZeNDk<7^bqQgB^;Z;wMHADQ)Oi6lk#{T1I8RqjxrAb2+luEs3b^qTKn&T$9; zQRPGh*VV+^amUBU*L^G!{MttVqe5u2;90MCEdl2 z;Xn-zRl0mX`HP=9_gcQzr}?i=+h~6s@evd=CMax-C{vWa{pce-iZ~LAyi)r5h~ZG3 z^~G1Gpq!x5{Iu67&YCdZdMTspp5F~P&l3bDU%q%X^GI3|>m;EtteDH8VK!T3uNBfF zX-G!JZiH@Zu-z1A_2Wg(IOsr!WY!3nZN=>I(rmhA#N1m+m9hxka>Zgj+tV z_8@EM^1*uzBh0fpdmjWzEOs6ToImqqt7@yF1N=J=P*` zYcLrqwfN{}iKH2q)l|ViWD0JxEWy_*i|}%}BY~ynJ)j9YFX^n~n*Z(^{=oW0jJ8zu zheaayF`19`Z20==J5)Nw)I85vS(sw5FVw}|R7+g={9C63pzRN{q0udrds*NbbIz7Q z*qpgaVBe_8PIZOLnn=q#j+pHn2Sofc>Ces@v)gd7Vvmp<AY6SYJyh`kO!{N+K#5a}19ytsHh~GzTVa{-+_Nu}9FIX(YT}ZVtK#zQO ztx2*vg+J_eU+|TGN-%WiVxPxy)`Bk;oZbPz+1Dt`Meoh$rd9S+I`aji{VDcjFGN<# zrz6>*V#64;Ce0NHN30Q6YT&~j;gC}>`+c5<=S|?*iBK*nc2XZJX^5#H(}j}1z0Q`X zH79LDcZ@6{h}B5gEiZ2=AyRovOzeJ1oIN-yE5!c6t1jX%J*b( z2bqIP3uFi+yB&a}6IYT}7OpqkmO+S2AVoQxjgmdS0snphKG$e`gdR@oTLHsH?f4h>R^mSwWdYcfSbs$ia;JiLg*v8is(MP7j@{e5!nzIj1Y~ zG?{~7Y$?i8W!}8|-uOLws}zlMP+3aO{=Pc+XBF7t)!-fIG_57FcDvt@GOAcjqmt{*gKxto7!$`#k9wJ6<927W3r( z!TCwU-EUl`y?Mm!YVYbFY_&eZewFc;4mzUe{1UKiRU;jz$Vw~fs`}PC*lA*PrABHY zeL`@0`W}D#+OVe0VEW399W_3cy5vqpy|JDv6Bd)Q{16rX%`d62?GRPD_wwP<63a|i zQtygbDdll^kWefA&CeDI)!MAF{^8cmn(#=#+0|1Jt#uK>~Ju zHO0H`7d7G$A)wMnYqiE1<0h!3xH=iUK3?wXG-UIOD@+D$JiyJ$u%4#&wFdhI$EFqX zv6Yfsiie!~*Q4Jtg|T27zdT^k0|k+PP}~i;N^!1usT@(?W2zzqfM|#kFtzm|WiPLi z6j8l3IwwBQk|TWWvgZt z!(~ZfCGj$U0Lw!Ombg|;OxBibLUy55sUQQ=m ze;@1bdpZnVN4>MO-PHWD%3Ocm@}B{BgmNa9?JWHUJ=z}lzXKDUTmG5*jk1|jTVDPN zj&`DBZx0WOUw7M{onxJ|KizBW6|WL1cO0Zx$0q9*L%5h_R+L#0VoMyLu*N*hU)$}qZ|^P-!J0#}L2G(E_U_x|C0KhfvDc|( z`KO-lo{vsTdd9($z;be!&0~aw zpDq+TG0>OdA87vYLL^7gOPtyfhHQ9m`=x0B^nNJ8=7)DXa$Sc?0tCV zF~bYp+;gTCLn`wXZ7fhE7lN!tMTcmp5_(>heevF&^60~}0@#a!v_?pDPXN-p;A4u= zS^Bf`J{BlmfIt7acRTDKWuu70SdcFAg!Bu1qteChkibOY+3zXNhzZRH_e; zLFFWZC2*5-!AC%k1;j@qaY-bQ-vQG2b9vR4Opm3kvz|hrxMoXWrLfK^`HWpX)<7H( z86|^q_!FEYa*L57y)6z0k4K8p#ZCpHjPk3FLSliT9IS_-NP0QIt2AY)vVejSs080e zYwKmTbC)ZYZAQur(eYSRqK-|IQ;EpH<)DhnOSt?*fpQ(M0#J-@{+Bs-^qae|_uISw zW!0y?y_bD1e^RPQMvlYESEc9>7h3hns=p6xTsqy;r_6kdmnl&aJwLX71jh3F*^ z;GuP>3G{UK`BRF~Xx|xcDuc3l@%dHVf*9 z1O245GpsL#wa6fT-OOO$Re}B?CZo3|KZ+Q#*d3S$Yq+?gN|l;iEvYD-1A2Lv(Kt*q&eFFkd%>YSvzv?i$bfVEM1CM(HMZ6Y7!%PmKX3r|^^^jCV7 zEtRtcg-rk-YSwJSQK~U2I)#@uI$KaAI^3w-3^;)tUC!0ZHK*UcJlm+bLDSgYdyEG* z%97Miy+sYg);xZNOQP(^v1?6r+s@7%oty_$yK$5z#Fis#(0I)_7V#)EdADfJtRTNg z6tIkP@|L(9+CMebQa`b$DnDgjr|L$1+n&DGnxQ6(xMT2q#@Q`m z==iPR^XAxur&nRrbTU6JwOFVT?QfPPl6X`_wwdbFi%;eobPIG<)6b5X4#1@;MC&7=<$IUL6Bi-l`-#DCa5u)^TAmF;fR-au?Gs7~%(vw$ZxB;`tpi zbREn}ErF$vN>5yw5gy^7rcpA#4g9jjeIN4~qm{{OV?1Z}02@Q^-nG}$^bfwY+yg42 z&*7I>PB;hA44Y4{*z&*fKl~mLc#1_6dp-!9lP!r<6wKtIZitHcQ2i1`my!<{HfG?! z{-UoXdRUBZmfJj+R)LvJ%EP<4(^Oxk*X^C;M~p1AGJY;_X@t7MV6`!}9aS^}mjNN8 zpWUZZ2gtlw`JPA{vmXEv6wi{QEUHcX{u9VE@!1KPc^p?JlEtRoR1=fO)SujkyHQ!F za{P;if7o;dPi_20uV>|}r80Ga8dxR2 zmR5Ml!_8uSrB?B0mXsg~c*d;=9%d(83nM*@a5IO$R4tFSYa#d;aolKFNsNR2BR{Ub zc1_OR(@QoJv~wu<(vV%`6c}2pyUmKe7Dj#$;SzdLSoCzSmmtOIFWF-KZDy=6M3=(< z4?Qm^jIdiw0E`q$m8O%J0OJWuIy$^kb`$J=B~}&Rm8OHs(;#kn@bK}}*__lC@G}pw8c~cO< zs_5rG_^bNv?`$?7hWXw($CobMj$fJ+g~WVk2a7OBi59_gngRfg9p){Q%5|1D;)Sph z-Mwv{V^;EvCyiYkqd`IZwvQWenGbb?v@>S_R)mTz0)K zyU%#z$us3amCqZq-6%_v<$D&HJt##i}me-un2G}$^5Q6!YYfn4b_Dqj}MlqVgZAs<~ z9>OPaQFnJVbLGN16|ojh#%Gb=~sCq`u4> z*SBX$_w_F&1lCSJ-fsyioL3b*z4Z9FC4AQPLmS&_gGWm{eFiHn4FQw4 zi83m8d}gDf6qBVyJ&9CprtnJAt8;5Wep8fm;fz(ayS8V7xCD>3nxzZx3|jrJzx*R^ zXHVbb>=nS1{U2N<#N?#qCzl7*)~(^|C^o;I!}97?JOv%4f#46GC$8S<_N}&Y-QZAj z;*X;nodrR3E(;^ALFt`Isx`3SOpr|W5lI2F=V_*_oSg4p-vuWwClv^39{lyO*!jT9 z5?4|RM)(KC4N6LmaXGNA*b#WX??0NHm~?mfwo+c?sGtEHW#dQw)%5$y(5;KT=^GqL zg%lp{$t7i6x4A43w}kW|Kd#!?FHbB&NLdycj@K zF6$y1{1Jp(53pWfVhB`Nd^5?gKTx*hv$SFWBR`7 z#`wGl*>JpDp0QK14Io6mGcJ+yPayb8E)4TlavMLZOeK`|7Sbt9Cc2kPQx3zH0N&x{52eCw?Eg^8D_5B?uAxc~1q^m_t|2 z<*1jrd)+|-*5C8`FLX`s?>TVeAt3LQ!MxQ@$Jf}cN4A-JqI8KBqIhw!uDkw6{kj&~ z@nPh*8Fm*;p?GcTN0Y z(%SVOt}Lv08?v(B2@#J~o_v#g(2b{HX;{6-CWjz?13X~fhKC(Pl7(Dp#iOB$W9BeS zo-URjbo|qN>tTDB<51-v%{L!aw66No*l*Fh#*U?jd=++y@2SJV!RrR~SqVKArZ!Y=l42^GzF8U2j;fU zTsN*RQUYu@qEe~*h9AzPm2j@LI+JgQLX+$1%vk%Cx$*&Wp)I{zvtaKUo^bc27jGZf zodZop(FMlFI@TQ5WsJaTXl{w`qrw7mr5^t z?|TTLMEjci(cGmB7G3Y9GSFGf{hoLJ>Y6UN(Nt@4VRAfmYPHUjNiTL<5mlhiS-VUu zU4l!a3b?H0M4I5EaO7m~O`{(n1wA-MwE6)tEzy111t76D1#n3>#OMCZc8B2qI~2GG zuLV$_Gg8=yY%S)iK)(U4EK<{9$6DeI0Pq9WW$j_hl3vbGX7z7Gz9d!U6%0);>rOUdF|b+ z-=jO`O+|uIvrt%4KA5@8xS=e&yg;Ps5(JfoCe17Lw{ndM%#;#LivO58k~%s~d?La> zEGt%IEeFqLgd!-GPK(k_U^xWq_8%DD_x<(9FT+vr%OkaFv5dOS0J)dX+>r(3YBW>e zVt9&1lLv&%;Y3CwK?1SNXBVE(gOsh_gT0G}OyvuvVQKg%r_V@|NcG(1IcEr|sgkQ0 zNwb3D%>@JJKgBK}0$Kfd8HXHMur%BXVB7rW;d47CMlUqC4V~MwbL3)b(XY$_%3)~t zm_aIu)eT%Y8UX|glla2Fv>qrgPRg=TY9(0QC@z~-ldwW6dklVH0Hg-8k-xmE2#1#= ziMg0G)5%S~AwfoSbcN1ZqAMmv1W0s!fa#q4&KPG#W1f+bvCkW%$)P)K4FhKM6j!UZh%eAdt>TX=%nHN~HoCHTt~@rgW>`azB^PkqM1KPk z9wHEO^W@@2u~ce0vfBL<2ycZa)L9{G6MCsZ)ua8d)f1F%eIHkE%Lxg%&-FwIq~-8s z0%(mNN8P0Z_o7s*DZ5)zw>qGafXm=;bcf%s1cd5lTi6xz`=&w6(e!r7o=>ofHh)oP z*8NR`z(e=ZaS{p5KY~8o@mEt&;O_$vXMmqKd~|wZ?pfBtWkyE?A?)$%_^=^i@4)2q ztc6P({Lw(-9{+g&@l{DKFQI1zlJ4P=-c|g2y~q_Hw?VVMk6;I%-wj|^J?VJo`ToaE zdNBZBs-IN5GnV}^yO=;5xkxL%xkLNurwm{WI$(5!uf4$q_9j;()(HjXy5$v$AiWk& z%!J>Or(lf4B-XfsTvFxbdV$bbzoJqf_E=sCmCyueA7V%eiVBj45ytDnhS_3Q9HTnS4H05w%tNj&9uWFo2i$D2t!}y$x_peIL0>EaS_)D#M9{VwC5d zT+z5X(ISN)fHsIRCs`Z<;sXO;D!gW6Tw@@#9uBJfZ$WHtJpLL zodMS~;P(1}Wl5Bzm~a?aks?P!U#7!<5y`yeswGw;QI7IFkgFf(Ct0Kr41916h-tE? zkpl@ZgqT9j6K46)s`l3Qf0mCcj)whY?2g$Wi7yINaYdb+927&! z#L;a%fE!|b?o=-tF=r7H2DV5riXaFTnP~t3Oj#0_}2}Pv{__BfouqJ2y0ex#v zhcl<$j?_l2N>jtP?86lC@{mPb3`WL9=E->IHL6i2Kfh6Cm3Oe&&o9^nV4pX{3B%U^ zO-8*Bca8=g>8&5U+3~%j?$CputKAvdA3I(QElFLzbz>Mv^c%*XyMJt7bm!{5(J1tM zcUVpD{BuQ(I#T1}4OYF(=$xqoBf@kOEyX`UZ#7^6gbY0x6=zr|rCY|Xik5CU_~`#j zC9gRi?e$1KS8%fzNOZTI8cdFc2)*tZrY@5qIz6-l3MtFL69o92Y%aghxzX+DL9XfN z=pTeO0K6AuYT)6{QRgE=^%2_|V>(-%VQ*4dVqIaO64K{NMbNVWv1cFwo%3MaIcs6< zIpNOLCjwFEA0CkFyAqcp#1^$V!C*7Ubi~n~`QH4ruvDXxMsh7N)dL@gCi1Rlbupa{ zF2(|>3|}WKR6<%-xGJPyBY5bUta{TbeWcd^iCjGn0}WcZ844~=$K&}JSjPBlp?y6y zfH?`$iFo0FHJdCpr!(99_8>6NcYQi%D~oNsxp2*beueny0fGE zRf;ikLrZ1G+fB2;LpQK=+me+RQwB;yUyd!Y9AB9M_<84C$B&)(F1l$>Pn1#lvKzAJ z_jPH#$0xtXwN!Q|MH^qWLQVqY9`}~F8?E~<|5EKvuYS^YIiB~n<*E&UJ(9=OuFSa~ zk1gbG{Jr^#?W(Q)y8s=)=*h{Z=K99AI>S6tQDUV)XsldTqyTv|z_A&*fYGxTMuo45 zYjB2m#Nxy%p}<(RyjTJDXocg_;Q(!81|%CI4Ol24T^B*c8R67)j}>81a3P5Ygud!1 ze3ueeD0kriHBVm2Qs7Lb(z#M67!JV95DTF8aMd~-*Q&jwI4%P`Ui^=dvXO8p3!@{+ zMMyblH}xfr=89T!#fk`?YgwOZI0o_VymX?$|niA{FJ_SZ}v}Yb91j(O#*4 zye?cTH4xHtAXG3qEG5uCPG`|!dP2H3>?PJzfcHCe{I>3P%z>*SkIlypI^~EnEbjUm zV$uVUygcCAXmZe-{50=n>U1%v*Mr-kz|v$Kfse`x9y?ax*vB1QYeA0l_dkk!-D{-K zkAB!*ki2Te+fB#J)NJ2q$1w^Ltt{)Zf{a9GWCIV^s*UxZ>NkICZ}I|WP{PiHEc2w_p1I-a>VD&Kva!uK^nX5>rSN_fl!pf52f!6g38mun~Ea`9q<}AfntPk1DPhH)_^J99FGH(xoNU8FwY|G(?d6j;LZm9=kT3IzkvU1<4%$sqQOBVt9Y zu5GL!F_Bud%@~p zAmSu&wx|^a4S=obtc`JzaDwVs`vJS|%u}@2jtUhWwE-616NzwsI>nnS7gwj}qoQMN z2!f>3P~#n)=s>`$u9gyWB#4vG&ApLNtn_?C+$$nZMCOQ#rW0(K3k6gZt2udmar-2n zOo)L`7bQ;JojHH<`ibT1d4w@w(j7N~+kJL#vHMHnWmNo$GJN#N%Y`Gz!n<}`WsrI)j4fVAGa;-lDBBB+sW$% z(gsd8iY8`Y^8}UR*kTtfkBc<=iDP0itI_Glyv1vOUb(J0ke0p;T{N$v*y}Q#hueNw zYuA6}?UzLMYro!3*^AwtTWZ3kUGf(1{CV=aW-M*~*lZMC%D~hd5O~Quf0VT?h#c45 zJdhKA&_}Ad(@tD^YGc>)X7gu>(^IhIRBz|YCMy7L2x+v<*{*Kawy(;RvdGm1`X-s1 z9K$#66c@ha6$hS72QN>rN;~E)TKn_lbIsU>`Qx%t3^5&3-OY4zbG4|F3H4bq6Vrjq zC!P<;2K62(y{=g>B`L6OD^6T~QjnMtK|L3&t{gQh)K@c(q!qGGx==sJ07j%^V{#E^t>SR0fD5z@!#r*Dh8^ehjE?DC#{L2PzRM%a%=$}7t z)jg+L6SIttl@&w|WyLqT)&1a~ z{+Bpn>E0GpZfdgo?b}|oG$$y{v*yjun4do3?`^%M7}OAAMLdK~}5~NLQ7>71h-4DA(y8ikktEc{u>0Z6*&wCA>osTO^ z7t?Fhk117+vXg}=bABKTDBAS0imra0PVaPa__0l8HWE)E#yiSR8+~*cz+-b5?YKyz6)*z2!;EfKoF_< zROCA)6cs%G>1ZFJ^SRjk4oapKpCog2g;Lk4e83lbF>$&oAJ5%MWgAU|-=8O5dZt0* zV}jdC@L;-l6qf6_<8Y{m#@=>2duvkvxMI&y39^Y*4EI4Or!kh?pRM&Qyd^%#C-uEV z)ta+wKy9_H0qC|8qZUm*NK~)Jyw8>rkzPeERVOh?F;*B>*jQ&;U1B%m;dAcWPXJ@5 z;B||Zv?_j&S>s=MEWOO4ban=gTTgTnZO6eI1DOfZ=N?xrBzL~_Pcauf=BC@YyhuA- zQ(C-#o^M#nOz?oHhmiK26%}00BR4RY)v`zB`wbrH>XK{BFqu7*{E;j~Wj%fz;Yf^gNdLrZmpNhPwV&jxqK(vd?mZYYPxBpEF4Z&BaG{o5e%?z zfAb@YiE^<1l{zg?KYK1U>vFkx_w^eAc7UCOW)!h-H8X1cqR8T#7UPSQR?~B~)hXps zi^rm$yc2@cw=eg2wlEj{$2%q*d~}7+KM8cZI(zMmrLHt`5CMjeP$;NN=C76pV{M$B zIqB8mP!!=eDIRF8X_n6~=PQ^|d&VJ>5Bd8!wNIyE40NIcn|Vni-sQHtUvmIEz?nqM`vSFg*bnlhwi*G3Kb%$f4!I^4LM2}VU;BA{ORbAj_cb8&z1x5rO_cpY>1 ze^9#rxqBkae{l~hrxV&{B~QKtW&4G$9`JhM!~{@(uieP({FB7_KJ>tiy56}na?)MT zC*Rv|ck?@%gZzJXUSOj>Hyf7Ook%gccFNSzxOixMu@{P{7hoZ7-1UKfFsO2LG z0p@6sCj}Q+>qXZR^__dHkpraLn{KI_-va({1HX&c%Q8r|>`DRr=^6f#hTAf5%Tr#l zz*RSKO$@G!z^}T@7h1{7!9!w!Oro_j9OI+88qN@~<0Ea!m z717PrTwHThXq)_0se&|HLb@97WqZR)1bp{i)$*w?lLw-jMMLTCEKN{w*$Qq%{o?cJc3)aBG zY~w~$T-3?K(hjMZ>;H3>zQ7VFhbX_`79wk^yo~OkGpVQ#ECX~ zr*N`Gq_z~(40u7Bi?!2Cw?szEn%NA=o=~IPBO_>De~KQ*qa~VsZQ&ptX<2btcJn1> zMo4V{X)R-)^LoJ9!ZJ}0Vqg~k{}PTFaT3T_27_EhG?r5^%N2J!Sw_wgtHIThp0Dpe zSa;>40nA^te!$e<|D-3A6+1iU z`p)xyaBxEliDIUdSx!RkRFs{(wpG2YpCe^p?=-u6p1 zpRE!bDd*cjqCT`hioq))Y4S$ms3FxsWH^j{W|3tSIgB&5k>FFDgxH5n*@w*7hZyWb ztL$^?@c9=Ow3PUMXa?9V4tuB~sX9#|K(aOCu5^1GmN-GAcM2z=n>04*&`7x6}f zJuGmLJ2FUREbYW9JF5Hz{76pn=&Y!z)fDGr{3$fqa+_5}e#)7@L0gzYiqKIQ8O3u- zOzD);qmnDBB&*IeqcT7p$YLk*%Yq$gvSFPMjb3kvb@Z5Z4g!8mTC5{2){#ByHq;UX zWJ~%xWUtPKaId8IK3+8Jm;pU}8D#Y(sK_(>j_74Xg#XeOTYn&(;iE@z|5&QUDWzjSLk9)&% zdvgH5(8B1&0l-`@Aeo+5c0SRW27NX&L8dO>p%lGrdkvCPlrBf7Ztw%UgvuwILC*lQ5n zs>D8t_Uf5HU0$u;4Zw~6aOONNIAa#pO=5rV!jbKMH)b|HeR|7+c!!q>QM{zUcW65p zupP(xeu0#8!~xDlYC(DEDY_Mt%?Ut3c0u9^!gTE%qhQCehlbf02aU(Dwf*)*V4xU+ z;FB&xZK|JF?Gd5BZn z7!w)L9_bf=OnE^gwIZY3Cfm4hbp~+@enm`nT!SuV?yM!K5&-Rl*-gG7M2*5@$}wS993taem*wZQBvCYfE!4o zY8{p!g?bAN`Vx?59X+O#>NDU_wVPZnIh)peeycLy5PirOflWePoCW-VA9a{4DC3C( z166@V4N-ZFJfJfG7D`GP8Oaz4<4G?)Y+!Mkfp`It z(#8`vkVe%yGz|KR6rBwxLN@`v4P!_GQl~l!hp}?d+3;J65;mxU8^c(YCkU?3w8->a z&~n5)SYnXDdh0)s>S|q65)T`E4A2O72@6}D9;;+XF)(;^?|@#01%q-KdWgKP4sFU2 z8JNvHK*AuE=!qat-FK1dCjo>&F|nh$dpgI)fJzcgc78M&@&o!RlL?tr=r=l2lL1QL zNvUb0ATk)Mj0Y!JDM9ATU^`$zvlYS#hw~r zkluGI?XnA;grtHmmStKN?-7Wta(&N4O|K3ZTVkEcWrMT81(<}zfuVOTXn1ukRG>1| zF2a_?L+fa5f&MIoV|<&IArBq!VgW$A}2p%b4}e z5CM!SYtR{vWNuY$7Yb)>D!j8+h_?|LyAX~wJ0y{|G!}%@+9X0j>%&Rh4OjuE<``fX zl0A#d_yj)y0xVzUZJ%BJF7|&O9L-+T2f+8;>?*z@S1DczlYWf=D1ia$to+fz3hc}HuQR{6?s8k#F|h)BOPM3(7-MU5)*TB5+g0e zJp13fM;Khx$>?4sIa%6NSt|=R%)v%+FJE#w>9|lJ>7@2zlU`dgJRNO{F#HVT?ZliT zhwWplwDk1`{{zn&aQjHIxnHCI3U2H<*5YrS>4>ax?liz2CWTPzocj+_2*?VR({D~-;Z z(YhT3Ui(GPf6tu@8Yap8eF|Y82c0pOW>VYt7g56&zM~aQH0lmw+VjrOL;l-H>6oY= zHs4Ety#314BbI56@}FuU{dFEEt!u|aU%X(Job2^S_E@W(v$oAsho(s3LICgD%IXIm z&EnY}j%$|sUXZf+QV#*}H2G{ADlsK&8ZEZbptWJ_1!=v0EmfkNutjW7Zi@KBCa&IZMAdx#4p&if>rkmodk?4B<~v-wy-*3)c%~=E@U$YSsA+|leFU1Ojlvd zO~{?mgZD}ANp;%=9rjNj+43>an5>;##|rJdpW!CYlMJ%@;$QYYSAqxNWgxdz{t+gF%n*Vx}eIq z67`HH_BtkBtxODbrWIbd<`0iE4-O=_zP0g@8ysHqa^F9>9bn&ckAPCBs1+3tMy09b zsz4)HPrnB)f@oNK5sZkgi{PwWjUv()A(;hr5sF%Ho^VKcT_nP)*DewRx9K8ruBApL z%Kd1v&<=5=reUTdBUQv8gg?zv5lLMPu^kc36eyum*lfLw#PigOi9ug6D;ajts&uJM zxI(h;8%hXmN^DY7ipe)bwobMnWsBpAnMUz375c5K85mX7a^tL43W%>yT|z-XCzcP* z^9LVnv>aq<1=v$&G@+v9ii|q3FeKPx`(<-TBnwCl(rV`w|LU`Io13L}+lrKV(V20{u^ zPw6Biux5~r_)b@nimgh+KM~ET;>&la!eIhGHtB6rTS#ph3|ku6b~^^xjPAxkJata0 zg0Zs&dvS^Rc}<=(BfcR{_Wk?7fCh|QoUrZHM17udy60bOh)`B=2uOKQ(1C-aK>%ks zcm(-Df|EgkQ%J}ts6hjFbPP-^Y#gg20Mu-NP)eL9MFQKT3;$=_9zj5?8wL^r?|mQy zYc~@vez1@*kPv2~Zxf3MXwuBV#U4f!sQX2-CxqS6B?-soUi1g5XC4(IVw?&TGTa0Bbw3TqNls^6?h;i z+R^b>{5I)2=6m1z(NmK-dfIfOjl}7hvBsNdieB{7FANwmV$6goGv+KxjwHq=*{}Q_0D@AeDq1z>U~Dktp^Et;jW>gp<`g`)u-Qp zK|@&BhJ7)DW7L@O&6zN{-=3N_W7eE`T)Yig7~A4lmMmMbYR&p0y!V&C{o|9*zWC~! z?|vBZ)4yBx&;J3_n_#n;C9^c{pk{eRYfNBP&Z^d00?q1K(`>~pfRpP{uSDN(9r`pL zI)lk#qnwH>=3l7*egQ!tVG&U=aS5z)QI(|VWlOtE+_+Qc=^;(q*Qvj3J8>rHo8+%8#r3>2s=zA!s#0sTI=!K4qseTs+UyRe%kA;{)QT>j5q0<4hlGq0 z4;4)*Z2@IsA6h?1J8fEWOOe`^=#Z7Jk?JjOvcG{e_WLJ4nZL}{~9hh#JZyKq7Ejcp0H zj_oi}g=~-ATY9ko&ZL2+UVDx^mOqlB%XhbEkN!IMD>Y1*O~(;ZB%tL=BXU@?*SN;0 z<0zfKbyaxR|JtEjz+c!|%KEq?$=>^EY3Y__d_ZeJSLFtdQRVBgosSZ>(1inzNKj5g zc)l)y>eyp71=o(b#d1su(WHf9TOT1*S(71Wn4Ny8@d0(MMhr{*LfB*`zM0=p$V-TF zHiKV@Nsd31BWr(wK8QRg`czNL6|0o5sB<5;kCni$HOY3F#CRZEYpdT;xu?tcabU7B?$FkSMBvc)o9#zN_vBuiLq0 zo2o3hlS9BlKD$rvtHq~Zj#GOpF}n_F<6_%0LA6V+@d;~Of7xr7bMCE+1*sHlc;KO3 z%gH@%F|T^_Gx4RLNYoA?%rS%>icEFTcFid_pi1jUcQ79_G?40Uo4>8)+{|UsMahH1 z)t8Ejb!g*vr%jhh# z)IbOX5+3^SG>ej8Ac=W(rCsZQ5*uC~*+XyaiR0WAAfn6;Jo~wL4A3>j0#`ZWl~gF{ z-aZUjU@ICbox<_pXy95i95D+dBNV|}-SyNVH&`j1#D84Mmm4x|u_1berCB;Z?s=fvzYC z^nF5GdWpjb1cCqu$gI{nZN*C0ptCIh%yx$?_?8gH`4lc(ADG%Lv&&fgw)^#~M7`y_ zn2UPBxh3su7klmgjzwbp{|&t~T+3HO@%pR(?NQ?SzYD$72i}K!%WyGg++01nz|-3x z(Xf3GA;Jq0fFGbErJ#^NA)o~T0Rj+o$Er*nDoH#+#>_I(N0B1E6bS@!3Iw)fWF#a+ zwxktmQPT*m6R+vHZ11$WR=S6A{8&1Sc$xsK`&eX88qkHl?-TFcuh5IGB=spZyeTg{9!kfASVX z!6|r@%_5*SqZH}+Jp5u6eH0fMEckaqKme$gM6feA`uvwk`!#HV3eXY|+z7#7bU26x TwUI~|oZS7@X{o+JR{#J2`%>6^ literal 0 HcmV?d00001 From 1f9154b4ac215af107338349a5d35963e0d0a3d1 Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Sun, 2 Nov 2025 22:14:23 +0200 Subject: [PATCH 46/51] fix: run ruff --- docs/dcurves_example.py | 724 ++++++++++++++++++++++++++++++++ docs/small_data_example.py | 350 +++++++++++++++ docs/walkthrough_aj_estimate.py | 444 ++++++++++++++++---- tests/test_rtichoke.py | 179 -------- 4 files changed, 1427 insertions(+), 270 deletions(-) create mode 100644 docs/dcurves_example.py create mode 100644 docs/small_data_example.py diff --git a/docs/dcurves_example.py b/docs/dcurves_example.py new file mode 100644 index 0000000..e5c3a43 --- /dev/null +++ b/docs/dcurves_example.py @@ -0,0 +1,724 @@ +import marimo + +__generated_with = "0.14.7" +app = marimo.App(width="columns") + + +@app.cell(column=0) +def _(): + from dcurves import dca + import pandas as pd + import numpy as np + import lifelines + import plotly.express as px + import polars as pl + from rtichoke.helpers.sandbox_observable_helpers import ( + create_list_data_to_adjust, + create_adjusted_data, + create_aj_data_combinations, + cast_and_join_adjusted_data, + create_breaks_values, + ) + + df_time_to_cancer_dx = pd.read_csv( + "https://raw.githubusercontent.com/ddsjoberg/dca-tutorial/main/data/df_time_to_cancer_dx.csv" + ) + return ( + cast_and_join_adjusted_data, + create_adjusted_data, + create_aj_data_combinations, + create_breaks_values, + create_list_data_to_adjust, + dca, + df_time_to_cancer_dx, + lifelines, + np, + pl, + px, + ) + + +@app.cell +def _(df_time_to_cancer_dx, lifelines): + cph = lifelines.CoxPHFitter() + cph.fit( + df=df_time_to_cancer_dx, + duration_col="ttcancer", + event_col="cancer", + formula="age + famhistory + marker", + ) + + cph_pred_vals = cph.predict_survival_function( + df_time_to_cancer_dx[["age", "famhistory", "marker"]], times=[1.5] + ) + + df_time_to_cancer_dx["pr_failure18"] = [1 - val for val in cph_pred_vals.iloc[0, :]] + return + + +@app.cell +def _(df_time_to_cancer_dx): + (df_time_to_cancer_dx["pr_failure18"] >= 0.5).sum() + return + + +@app.cell +def _(df_time_to_cancer_dx): + df_time_to_cancer_dx + return + + +@app.cell +def _(): + outcome = "cancer" + time_to_outcome_col = "ttcancer" + prevalence = None + time = 1.5 + return outcome, prevalence, time, time_to_outcome_col + + +@app.cell +def _(df_time_to_cancer_dx): + (df_time_to_cancer_dx["pr_failure18"] >= 0.5).sum() + return + + +@app.cell +def _(df_time_to_cancer_dx, outcome, time_to_outcome_col): + from dcurves.risks import _create_risks_df + + risks_df = _create_risks_df( + data=df_time_to_cancer_dx, + outcome=outcome, + time=1.5, + time_to_outcome_col=time_to_outcome_col, + ) + + risks_df + return (risks_df,) + + +@app.cell +def _(risks_df): + risks_df["pr_failure18"].hist() + return + + +@app.cell +def _(risks_df): + (risks_df["pr_failure18"] >= 0.5).sum() + return + + +@app.cell +def _(df_time_to_cancer_dx, risks_df): + import plotly.graph_objects as go + + x = risks_df["pr_failure18"] + y = df_time_to_cancer_dx["pr_failure18"] + cancer = risks_df["cancer"] + + fig_test = go.Figure() + + # Cancer = 0 (circle) + fig_test.add_trace( + go.Scatter( + x=x[cancer == 0], + y=y[cancer == 0], + mode="markers", + marker=dict(symbol="circle", size=8, opacity=0.6), + name="Cancer = 0", + ) + ) + + # Cancer = 1 (square) + fig_test.add_trace( + go.Scatter( + x=x[cancer == 1], + y=y[cancer == 1], + mode="markers", + marker=dict(symbol="square", size=8, opacity=0.6), + name="Cancer = 1", + ) + ) + + fig_test.update_layout( + title="Comparison of pr_failure18 across DataFrames", + xaxis_title="risks_df['pr_failure18']", + yaxis_title="df_time_to_cancer_dx['pr_failure18']", + template="plotly_white", + ) + + fig_test.show() + return + + +@app.cell +def _(risks_df): + from dcurves.risks import _rectify_model_risk_boundaries + + modelnames = ["pr_failure18"] + + rectified_risks_df = _rectify_model_risk_boundaries( + risks_df=risks_df, modelnames=modelnames + ) + + rectified_risks_df + return modelnames, rectified_risks_df + + +@app.cell +def _(outcome, prevalence, rectified_risks_df, time, time_to_outcome_col): + from dcurves.prevalence import _calc_prevalence + + prevalence_value = _calc_prevalence( + risks_df=rectified_risks_df, + outcome=outcome, + prevalence=prevalence, + time=time, + time_to_outcome_col=time_to_outcome_col, + ) + + prevalence_value + return (prevalence_value,) + + +@app.cell +def _(modelnames, np, prevalence_value, rectified_risks_df): + from dcurves.dca import _create_initial_df + + thresholds = np.arange(0, 1, 0.5) + + initial_df = _create_initial_df( + thresholds=thresholds, + modelnames=modelnames, + input_df_rownum=len(rectified_risks_df.index), + prevalence_value=prevalence_value, + ) + + initial_df + return initial_df, thresholds + + +@app.cell +def _(outcome, risks_df, thresholds, time, time_to_outcome_col): + from dcurves.dca import _calc_risk_rate_among_test_pos + + risk_rate_among_test_pos = _calc_risk_rate_among_test_pos( + risks_df=risks_df, + outcome=outcome, + model="pr_failure18", + thresholds=thresholds, + time_to_outcome_col=time_to_outcome_col, + time=time, + ) + + risk_rate_among_test_pos + return + + +@app.cell +def _( + outcome, + prevalence_value, + risks_df, + thresholds, + time, + time_to_outcome_col, +): + from dcurves.dca import _calc_test_pos_rate, _calc_tp_rate + + test_pos_rate = _calc_test_pos_rate( + risks_df=risks_df, thresholds=thresholds, model="pr_failure18" + ) + + print("test positive rate:", test_pos_rate) + + tp_rate = _calc_tp_rate( + risks_df=risks_df, + thresholds=thresholds, + model="pr_failure18", + outcome=outcome, + time=time, + time_to_outcome_col=time_to_outcome_col, + test_pos_rate=test_pos_rate, + prevalence_value=prevalence_value, + ) + + print("true positive rate:", tp_rate) + return + + +@app.cell +def _( + initial_df, + outcome, + prevalence_value, + rectified_risks_df, + thresholds, + time, + time_to_outcome_col, +): + from dcurves.dca import _calc_initial_stats + + initial_stats_df = _calc_initial_stats( + initial_df=initial_df, + risks_df=rectified_risks_df, + thresholds=thresholds, + outcome=outcome, + prevalence_value=prevalence_value, + time=time, + time_to_outcome_col=time_to_outcome_col, + ) + + initial_stats_df + return + + +@app.cell +def _(rectified_risks_df): + rectified_risks_df + return + + +@app.cell +def _(df_time_to_cancer_dx): + probs_dict = {"full": df_time_to_cancer_dx["pr_failure18"]} + + reals_mapping = { + "censor": 0, + "diagnosed with cancer": 1, + "dead other causes": 2, + } + + reals_dict = df_time_to_cancer_dx["cancer_cr"].map(reals_mapping) + + times_dict = df_time_to_cancer_dx["ttcancer"] + + df_time_to_cancer_dx["cancer_enum"] = reals_dict + + df_time_to_cancer_dx + return probs_dict, reals_dict, times_dict + + +@app.cell +def _(dca, df_time_to_cancer_dx, np): + stdca_coxph_results_composite = dca( + data=df_time_to_cancer_dx, + outcome="cancer_enum", + modelnames=["pr_failure18"], + # thresholds=np.arange(0, 0.51, 0.1), + # thresholds=np.arange(0.5, 1, 0.1), + thresholds=np.arange(0, 1, 0.5), + time=1.5, + time_to_outcome_col="ttcancer", + ) + + stdca_coxph_results_composite + return + + +@app.cell +def _(create_aj_data_combinations, create_breaks_values, probs_dict): + stratified_by = ["probability_threshold"] + # stratified_by = ["probability_threshold"] + # stratified_by = ["ppcr"] + # stratified_by = ["probability_threshold"] + + by = 0.01 + breaks = create_breaks_values(None, "probability_threshold", by) + + heuristics_sets = [ + { + "censoring_heuristic": "adjusted", + "competing_heuristic": "adjusted_as_negative", + }, + { + "censoring_heuristic": "adjusted", + "competing_heuristic": "adjusted_as_censored", + }, + { + "censoring_heuristic": "adjusted", + "competing_heuristic": "adjusted_as_composite", + }, + ] + + aj_data_combinations = create_aj_data_combinations( + list(probs_dict.keys()), + heuristics_sets, + fixed_time_horizons=[1.5], + stratified_by=stratified_by, + by=by, + breaks=breaks, + ) + + aj_data_combinations + return aj_data_combinations, breaks, by, heuristics_sets, stratified_by + + +@app.cell +def _( + aj_data_combinations, + by, + create_list_data_to_adjust, + probs_dict, + reals_dict, + stratified_by, + times_dict, +): + list_data_to_adjust_polars = create_list_data_to_adjust( + aj_data_combinations, + probs_dict, + reals_dict, + times_dict, + stratified_by=stratified_by, + by=by, + ) + list_data_to_adjust_polars + return (list_data_to_adjust_polars,) + + +@app.cell +def _( + breaks, + create_adjusted_data, + heuristics_sets, + list_data_to_adjust_polars, + stratified_by, +): + adjusted_data = create_adjusted_data( + list_data_to_adjust_polars, + heuristics_sets=heuristics_sets, + fixed_time_horizons=[1.5], + breaks=breaks, + stratified_by=stratified_by, + # risk_set_scope=["within_stratum"]#, # , , + # risk_set_scope=["pooled_by_cutoff"], # , # , , + risk_set_scope=["pooled_by_cutoff", "within_stratum"], # , , + ) + + adjusted_data + return (adjusted_data,) + + +@app.cell +def _(adjusted_data, aj_data_combinations, cast_and_join_adjusted_data): + final_adjusted_data_polars = cast_and_join_adjusted_data( + aj_data_combinations, adjusted_data + ) + + final_adjusted_data_polars + return (final_adjusted_data_polars,) + + +@app.cell +def _(final_adjusted_data_polars): + final_adjusted_data_polars + return + + +@app.cell +def _(final_adjusted_data_polars): + from rtichoke.helpers.sandbox_observable_helpers import ( + _calculate_cumulative_aj_data, + ) + + cumulative_aj_data = _calculate_cumulative_aj_data(final_adjusted_data_polars) + + cumulative_aj_data + return (cumulative_aj_data,) + + +@app.cell +def _(): + return + + +@app.cell +def _(cumulative_aj_data): + from rtichoke.helpers.sandbox_observable_helpers import ( + _turn_cumulative_aj_to_performance_data, + ) + + performance_data = _turn_cumulative_aj_to_performance_data(cumulative_aj_data) + + performance_data + return (performance_data,) + + +@app.cell +def _(performance_data): + from rtichoke.discrimination.gains import plot_gains_curve + + plot_gains_curve(performance_data) + return + + +@app.cell +def _(performance_data, pl): + performance_data_with_nb_calculated = ( + performance_data.with_columns( + ( + (pl.col("true_positives") / pl.col("n")) + - (pl.col("false_positives") / pl.col("n")) + * pl.col("chosen_cutoff") + / (1 - pl.col("chosen_cutoff")) + ).alias("net_benefit") + ) + .filter( + pl.col("censoring_heuristic") == "adjusted", + pl.col("competing_heuristic") == "adjusted_as_censored", + ) + .sort(pl.col("chosen_cutoff")) + ) + + performance_data_with_nb_calculated + return + + +@app.cell +def _(dca, df_time_to_cancer_dx, np): + stdca_coxph_results = dca( + data=df_time_to_cancer_dx, + outcome="cancer", + modelnames=["pr_failure18"], + thresholds=np.arange(0, 0.51, 0.01), + time=1.5, + time_to_outcome_col="ttcancer", + ) + + stdca_coxph_results + return (stdca_coxph_results,) + + +@app.cell +def _(px, stdca_coxph_results): + # Create plotly express figure + fig = px.line( + stdca_coxph_results, + x="threshold", + y="net_benefit", + color="model", + markers=True, + title="Decision Curve Analysis", + labels={ + "threshold": "Threshold Probability", + "net_benefit": "Net Benefit", + }, + ) + + # Update layout to match rtichoke look + fig.update_layout( + template="simple_white", + title_font_size=20, + title_x=0.5, + legend_title_text="", + legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1), + margin=dict(l=40, r=40, t=60, b=40), + xaxis=dict(range=[-0.01, 0.23], showgrid=False, tickmode="linear", dtick=0.05), + yaxis=dict( + range=[-0.01, 0.23], + showgrid=False, + zeroline=True, + zerolinewidth=1, + zerolinecolor="gray", + ), + ) + + fig.show() + return + + +@app.cell(column=1, hide_code=True) +def _(): + import marimo as mo + + fill_color_radio = mo.ui.radio( + options=["classification_outcome", "reals_labels"], + value="classification_outcome", + label="Fill Colors", + ) + + fill_color_radio + return fill_color_radio, mo + + +@app.cell(hide_code=True) +def _(mo): + risk_set_scope_radio = mo.ui.radio( + options=["pooled_by_cutoff", "within_stratum"], + value="pooled_by_cutoff", + label="Risk Set Scope", + ) + + risk_set_scope_radio + return (risk_set_scope_radio,) + + +@app.cell(hide_code=True) +def _(mo): + stratified_by_radio = mo.ui.radio( + options=["probability_threshold"], + value="probability_threshold", + label="Stratified By", + ) + + stratified_by_radio + return (stratified_by_radio,) + + +@app.cell(hide_code=True) +def _(mo): + censoring_heuristic_radio = mo.ui.radio( + options=["adjusted"], + value="adjusted", + label="Censoring Heuristic", + ) + + censoring_heuristic_radio + return (censoring_heuristic_radio,) + + +@app.cell(hide_code=True) +def _(mo): + competing_heuristic_radio = mo.ui.radio( + options=[ + "adjusted_as_negative", + "adjusted_as_censored", + "adjusted_as_composite", + ], + value="adjusted_as_negative", + label="Competing Heuristic", + ) + + competing_heuristic_radio + return (competing_heuristic_radio,) + + +@app.cell(hide_code=True) +def _(by, mo): + slider_cutoff = mo.ui.slider(start=0, stop=1, step=by, label="Cutoff") + slider_cutoff + return (slider_cutoff,) + + +@app.cell(column=2, hide_code=True) +def _( + by, + censoring_heuristic_radio, + competing_heuristic_radio, + fill_color_radio, + final_adjusted_data_polars, + pl, + px, + risk_set_scope_radio, + slider_cutoff, + stratified_by_radio, +): + chosen_cutoff_data = final_adjusted_data_polars.filter( + pl.col("censoring_heuristic") == censoring_heuristic_radio.value, + pl.col("competing_heuristic") == competing_heuristic_radio.value, + pl.col("chosen_cutoff") == slider_cutoff.value, + pl.col("risk_set_scope") == risk_set_scope_radio.value, + pl.col("stratified_by") == stratified_by_radio.value, + ).sort(pl.col("strata")) + + color_discrete_map = { + "real_positives": "#4C5454", + "real_competing": "#C880B7", + "real_negatives": "#E0E0E0", + "real_censored": "#E3F09B", + "true_negatives": "#009e73", + "true_positives": "#009e73", + "false_negatives": "#FAC8CD", + "false_positives": "#FAC8CD", + } + + fig_new = px.bar( + chosen_cutoff_data, + x="mid_point", + y="reals_estimate", + color=fill_color_radio.value, + color_discrete_map=color_discrete_map, + # color="reals_labels", + # color_discrete_map=color_discrete_map, + category_orders={ + "reals_labels": list(color_discrete_map.keys()) + }, # fixes domain order + hover_data=chosen_cutoff_data.columns, # like tip: true + ) + + fig_new.update_layout( + barmode="stack", # stacked bars (use "group" for side-by-side) + plot_bgcolor="rgba(0,0,0,0)", # transparent background + paper_bgcolor="rgba(0,0,0,0)", + legend=dict(title=""), + ) + + if stratified_by_radio.value == "probability_threshold": + vertical_line = slider_cutoff.value + else: + vertical_line = 1 - slider_cutoff.value + by / 2 + + fig_new.add_vline( + x=vertical_line, + line=dict(color="red", width=2, dash="dash"), + annotation_text=f"Cutoff: {slider_cutoff.value}", + annotation_position="top right", + ) + + fig_new + return + + +@app.cell(hide_code=True) +def _( + censoring_heuristic_radio, + competing_heuristic_radio, + performance_data, + pl, + px, + stratified_by_radio, +): + chosen_performance_data = performance_data.filter( + pl.col("censoring_heuristic") == censoring_heuristic_radio.value, + pl.col("competing_heuristic") == competing_heuristic_radio.value, + pl.col("stratified_by") == stratified_by_radio.value, + ).sort(pl.col("chosen_cutoff")) + + # Create plotly express figure + fig_rtichoke = px.line( + chosen_performance_data, + x="chosen_cutoff", + y="net_benefit", + markers=True, + title="Decision Curve Analysis", + labels={ + "threshold": "Threshold Probability", + "net_benefit": "Net Benefit", + }, + ) + + # Update layout to match rtichoke look + fig_rtichoke.update_layout( + template="simple_white", + title_font_size=20, + title_x=0.5, + legend_title_text="", + legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1), + margin=dict(l=40, r=40, t=60, b=40), + xaxis=dict(showgrid=False, tickmode="linear", dtick=0.05), + yaxis=dict( + showgrid=False, + zeroline=True, + zerolinewidth=1, + zerolinecolor="gray", + ), + ) + + fig_rtichoke.show() + return + + +if __name__ == "__main__": + app.run() diff --git a/docs/small_data_example.py b/docs/small_data_example.py new file mode 100644 index 0000000..50573a1 --- /dev/null +++ b/docs/small_data_example.py @@ -0,0 +1,350 @@ +import marimo + +__generated_with = "0.14.7" +app = marimo.App(width="columns") + + +@app.cell(column=0) +def _(): + import numpy as np + import polars as pl + import plotly.express as px + + from rtichoke.helpers.sandbox_observable_helpers import ( + create_breaks_values, + create_list_data_to_adjust, + create_adjusted_data, + create_aj_data_combinations, + cast_and_join_adjusted_data, + ) + + return ( + cast_and_join_adjusted_data, + create_adjusted_data, + create_aj_data_combinations, + create_breaks_values, + create_list_data_to_adjust, + np, + pl, + px, + ) + + +@app.cell +def _(np, pl): + probs_test = { + "small_data_set": np.array( + [0.9, 0.85, 0.95, 0.88, 0.6, 0.7, 0.51, 0.2, 0.1, 0.33] + ) + } + reals_dict_test = [1, 1, 1, 1, 0, 2, 1, 2, 0, 1] + times_dict_test = [24.1, 9.7, 49.9, 18.6, 34.8, 14.2, 39.2, 46.0, 31.5, 4.3] + + data_to_adjust = pl.DataFrame( + { + "strata": np.repeat("small_data_test", 10), + # "probs": probs_test["test_data"], + "reals": reals_dict_test, + "times": times_dict_test, + } + ) + + data_to_adjust + return probs_test, reals_dict_test, times_dict_test + + +@app.cell +def _(create_aj_data_combinations, create_breaks_values): + by = 0.2 + breaks = create_breaks_values(None, "probability_threshold", by) + stratified_by = ["probability_threshold", "ppcr"] + # stratified_by = ["probability_threshold"] + + # stratified_by = ["ppcr"] + + heuristics_sets = [ + { + "censoring_heuristic": "excluded", + "competing_heuristic": "adjusted_as_negative", + }, + { + "censoring_heuristic": "excluded", + "competing_heuristic": "adjusted_as_composite", + }, + { + "censoring_heuristic": "excluded", + "competing_heuristic": "adjusted_as_censored", + }, + { + "censoring_heuristic": "adjusted", + "competing_heuristic": "adjusted_as_negative", + }, + { + "censoring_heuristic": "adjusted", + "competing_heuristic": "adjusted_as_censored", + }, + { + "censoring_heuristic": "adjusted", + "competing_heuristic": "adjusted_as_composite", + }, + { + "censoring_heuristic": "excluded", + "competing_heuristic": "excluded", + }, + { + "censoring_heuristic": "adjusted", + "competing_heuristic": "excluded", + }, + ] + + aj_data_combinations = create_aj_data_combinations( + ["small_data_set"], + heuristics_sets=heuristics_sets, + fixed_time_horizons=[10.0, 20.0, 30.0, 40.0, 50.0], + stratified_by=stratified_by, + by=by, + breaks=breaks, + risk_set_scope=["pooled_by_cutoff", "within_stratum"], + ) + + # aj_data_combinations + + aj_data_combinations + return aj_data_combinations, breaks, by, heuristics_sets, stratified_by + + +@app.cell +def _( + aj_data_combinations, + by, + create_list_data_to_adjust, + probs_test, + reals_dict_test, + stratified_by, + times_dict_test, +): + list_data_to_adjust_polars_probability_threshold = create_list_data_to_adjust( + aj_data_combinations, + probs_test, + reals_dict_test, + times_dict_test, + stratified_by=stratified_by, + by=by, + ) + + list_data_to_adjust_polars_probability_threshold + return (list_data_to_adjust_polars_probability_threshold,) + + +@app.cell +def _( + breaks, + create_adjusted_data, + heuristics_sets, + list_data_to_adjust_polars_probability_threshold, + stratified_by, +): + adjusted_data = create_adjusted_data( + list_data_to_adjust_polars_probability_threshold, + heuristics_sets=heuristics_sets, + fixed_time_horizons=[10.0, 20.0, 30.0, 40.0, 50.0], + breaks=breaks, + stratified_by=stratified_by, + # risk_set_scope = ["pooled_by_cutoff"] + risk_set_scope=["pooled_by_cutoff", "within_stratum"], + ) + + adjusted_data + return (adjusted_data,) + + +@app.cell +def _(adjusted_data, aj_data_combinations, cast_and_join_adjusted_data): + final_adjusted_data_polars = cast_and_join_adjusted_data( + aj_data_combinations, adjusted_data + ) + + final_adjusted_data_polars + return (final_adjusted_data_polars,) + + +@app.cell +def _(final_adjusted_data_polars): + from rtichoke.helpers.sandbox_observable_helpers import ( + _calculate_cumulative_aj_data, + ) + + cumulative_aj_data = _calculate_cumulative_aj_data(final_adjusted_data_polars) + + cumulative_aj_data + return (cumulative_aj_data,) + + +@app.cell +def _(cumulative_aj_data): + from rtichoke.helpers.sandbox_observable_helpers import ( + _turn_cumulative_aj_to_performance_data, + ) + + performance_data = _turn_cumulative_aj_to_performance_data(cumulative_aj_data) + + performance_data + return + + +@app.cell(column=1, hide_code=True) +def _(mo): + fill_color_radio = mo.ui.radio( + options=["classification_outcome", "reals_labels"], + value="classification_outcome", + label="Fill Colors", + ) + + fill_color_radio + return (fill_color_radio,) + + +@app.cell(hide_code=True) +def _(mo): + risk_set_scope_radio = mo.ui.radio( + options=["pooled_by_cutoff", "within_stratum"], + value="pooled_by_cutoff", + label="Risk Set Scope", + ) + + risk_set_scope_radio + return (risk_set_scope_radio,) + + +@app.cell(hide_code=True) +def _(mo): + stratified_by_radio = mo.ui.radio( + options=["probability_threshold", "ppcr"], + value="probability_threshold", + label="Stratified By", + ) + + stratified_by_radio + return (stratified_by_radio,) + + +@app.cell(hide_code=True) +def _(by): + import marimo as mo + + slider_cutoff = mo.ui.slider(start=0, stop=1, step=by, label="Cutoff") + slider_cutoff + return mo, slider_cutoff + + +@app.cell(hide_code=True) +def _(mo): + fixed_time_horizons_slider = mo.ui.slider( + start=10, stop=50, step=10, label="Fixed Time Horizon" + ) + fixed_time_horizons_slider + return (fixed_time_horizons_slider,) + + +@app.cell(hide_code=True) +def _(mo): + censoring_heuristic_radio = mo.ui.radio( + options=["adjusted", "excluded"], + value="adjusted", + label="Censoring Heuristic", + ) + + censoring_heuristic_radio + return (censoring_heuristic_radio,) + + +@app.cell(hide_code=True) +def _(mo): + competing_heuristic_radio = mo.ui.radio( + options=[ + "adjusted_as_negative", + "adjusted_as_censored", + "adjusted_as_composite", + "excluded", + ], + value="adjusted_as_negative", + label="Censoring Heuristic", + ) + + competing_heuristic_radio + return (competing_heuristic_radio,) + + +@app.cell(column=2, hide_code=True) +def _( + by, + censoring_heuristic_radio, + competing_heuristic_radio, + fill_color_radio, + final_adjusted_data_polars, + fixed_time_horizons_slider, + pl, + px, + risk_set_scope_radio, + slider_cutoff, + stratified_by_radio, +): + chosen_cutoff_data = final_adjusted_data_polars.filter( + pl.col("censoring_heuristic") == censoring_heuristic_radio.value, + pl.col("competing_heuristic") == competing_heuristic_radio.value, + pl.col("chosen_cutoff") == slider_cutoff.value, + pl.col("fixed_time_horizon") == fixed_time_horizons_slider.value, + pl.col("risk_set_scope") == risk_set_scope_radio.value, + pl.col("stratified_by") == stratified_by_radio.value, + ).sort(pl.col("strata")) + + color_discrete_map = { + "real_positives": "#4C5454", + "real_competing": "#C880B7", + "real_negatives": "#E0E0E0", + "real_censored": "#E3F09B", + "true_negatives": "#009e73", + "true_positives": "#009e73", + "false_negatives": "#FAC8CD", + "false_positives": "#FAC8CD", + } + + fig_new = px.bar( + chosen_cutoff_data, + x="mid_point", + y="reals_estimate", + color=fill_color_radio.value, + color_discrete_map=color_discrete_map, + # color="reals_labels", + # color_discrete_map=color_discrete_map, + category_orders={ + "reals_labels": list(color_discrete_map.keys()) + }, # fixes domain order + hover_data=chosen_cutoff_data.columns, # like tip: true + ) + + fig_new.update_layout( + barmode="stack", # stacked bars (use "group" for side-by-side) + plot_bgcolor="rgba(0,0,0,0)", # transparent background + paper_bgcolor="rgba(0,0,0,0)", + legend=dict(title=""), + ) + + if stratified_by_radio.value == "probability_threshold": + vertical_line = slider_cutoff.value + else: + vertical_line = 1 - slider_cutoff.value + by / 2 + + fig_new.add_vline( + x=vertical_line, + line=dict(color="red", width=2, dash="dash"), + annotation_text=f"Cutoff: {slider_cutoff.value}", + annotation_position="top right", + ) + + fig_new + return + + +if __name__ == "__main__": + app.run() diff --git a/docs/walkthrough_aj_estimate.py b/docs/walkthrough_aj_estimate.py index a950b4d..dc70c32 100644 --- a/docs/walkthrough_aj_estimate.py +++ b/docs/walkthrough_aj_estimate.py @@ -1,50 +1,104 @@ import marimo __generated_with = "0.14.7" -app = marimo.App(width="medium") +app = marimo.App(width="columns") -@app.cell +@app.cell(column=0) def _(): - return + import polars as pl + import pandas as pd + import plotly.express as px + from lifelines import CoxPHFitter, WeibullAFTFitter + + df_time_to_cancer_dx = pd.read_csv( + "https://raw.githubusercontent.com/ddsjoberg/dca-tutorial/main/data/df_time_to_cancer_dx.csv" + ) + return CoxPHFitter, WeibullAFTFitter, df_time_to_cancer_dx, pl, px @app.cell -def _(mo): - mo.md(r"""## Import data and Packages""") - return +def _(CoxPHFitter, WeibullAFTFitter, df_time_to_cancer_dx): + cph = CoxPHFitter() + thin_model = CoxPHFitter() + aft_model = WeibullAFTFitter() + + cox_formula = "age + famhistory + marker" + thin_formula = "age + marker" + aft_formula = "age + marker" + + cph.fit( + df_time_to_cancer_dx, + duration_col="ttcancer", + event_col="cancer", + formula=cox_formula, + ) + thin_model.fit( + df_time_to_cancer_dx, + duration_col="ttcancer", + event_col="cancer", + formula=thin_formula, + ) -@app.cell -def _(): - from lifelines import AalenJohansenFitter - import numpy as np - from itertools import product - import itertools - from lifelines import CoxPHFitter - from lifelines import WeibullAFTFitter - import polars as pl + aft_model.fit( + df_time_to_cancer_dx, + duration_col="ttcancer", + event_col="cancer", + formula=aft_formula, + ) - print("Polars version:", pl.__version__) + cph_pred_vals = ( + ( + 1 + - cph.predict_survival_function( + df_time_to_cancer_dx[["age", "famhistory", "marker"]], times=[1.5] + ) + ) + .iloc[0, :] + .values + ) - import pandas as pd - import pickle + thin_pred_vals = ( + ( + 1 + - thin_model.predict_survival_function( + df_time_to_cancer_dx[["age", "famhistory", "marker"]], times=[1.5] + ) + ) + .iloc[0, :] + .values + ) + + aft_pred_vals = ( + ( + 1 + - aft_model.predict_survival_function( + df_time_to_cancer_dx[["age", "famhistory", "marker"]], times=[1.5] + ) + ) + .iloc[0, :] + .values + ) - with open( - r"C:\Users\I\Documents\GitHub\rtichoke_python\probs_dict.pkl", "rb" - ) as file: - probs_dict = pickle.load(file) + print(type(cph_pred_vals)) - with open( - r"C:\Users\I\Documents\GitHub\rtichoke_python\reals_dict.pkl", "rb" - ) as file: - reals_dict = pickle.load(file) + probs_dict = { + "full": cph_pred_vals, + "thin": thin_pred_vals, + "aft": aft_pred_vals, + } - with open( - r"C:\Users\I\Documents\GitHub\rtichoke_python\times_dict.pkl", "rb" - ) as file: - times_dict = pickle.load(file) - return pl, probs_dict, reals_dict, times_dict + reals_mapping = { + "censor": 0, + "diagnosed with cancer": 1, + "dead other causes": 2, + } + + reals_dict = df_time_to_cancer_dx["cancer_cr"].map(reals_mapping) + + times_dict = df_time_to_cancer_dx["ttcancer"] + return probs_dict, reals_dict, times_dict @app.cell @@ -57,21 +111,66 @@ def _(): @app.cell def _(probs_dict): from rtichoke.helpers.sandbox_observable_helpers import ( - create_aj_data_combinations_polars, - extract_aj_estimate_for_strata, - create_aj_data_polars, + create_aj_data_combinations, + create_breaks_values, ) - fixed_time_horizons = [1.0, 3.0, 5.0] stratified_by = ["probability_threshold", "ppcr"] + + # stratified_by = ["probability_threshold"] + # stratified_by = ["ppcr"] + by = 0.1 + breaks = create_breaks_values(None, "probability_threshold", by) + # fixed_time_horizons = [1.0, 1.5, 3.0, 5.0] + fixed_time_horizons = [1.0, 3.0, 5.0] + stratified_by = stratified_by + + heuristics_sets = [ + { + "censoring_heuristic": "adjusted", + "competing_heuristic": "adjusted_as_negative", + }, + { + "censoring_heuristic": "excluded", + "competing_heuristic": "adjusted_as_negative", + }, + # { + # "censoring_assumption": "adjusted", + # "competing_assumption": "adjusted_as_censored", + # }, + # { + # "censoring_assumption": "excluded", + # "competing_assumption": "adjusted_as_censored", + # }, + # {"censoring_assumption": "adjusted", "competing_assumption": "excluded"}, + # {"censoring_assumption": "excluded", "competing_assumption": "excluded"}, + ] + + aj_data_combinations = create_aj_data_combinations( + list(probs_dict.keys()), + heuristics_sets, + fixed_time_horizons, + stratified_by, + by, + breaks, + ) - aj_data_combinations = create_aj_data_combinations_polars( - list(probs_dict.keys()), fixed_time_horizons, stratified_by, by + aj_data_combinations + return ( + aj_data_combinations, + breaks, + by, + fixed_time_horizons, + heuristics_sets, + stratified_by, ) - print(aj_data_combinations["strata"]) - return by, create_aj_data_polars, fixed_time_horizons, stratified_by + +@app.cell +def _(aj_data_combinations): + aj_data_combinations + return @app.cell @@ -81,98 +180,261 @@ def _(mo): @app.cell -def _(by, probs_dict, reals_dict, stratified_by, times_dict): +def _( + aj_data_combinations, + by, + probs_dict, + reals_dict, + stratified_by, + times_dict, +): from rtichoke.helpers.sandbox_observable_helpers import ( - create_list_data_to_adjust_polars, + create_list_data_to_adjust, + create_adjusted_data, + cast_and_join_adjusted_data, ) - list_data_to_adjust_polars = create_list_data_to_adjust_polars( - probs_dict, reals_dict, times_dict, stratified_by=stratified_by, by=by + list_data_to_adjust_polars = create_list_data_to_adjust( + aj_data_combinations, + probs_dict, + reals_dict, + times_dict, + stratified_by=stratified_by, + by=by, ) list_data_to_adjust_polars - return (list_data_to_adjust_polars,) + return ( + cast_and_join_adjusted_data, + create_adjusted_data, + list_data_to_adjust_polars, + ) @app.cell -def _(mo): - mo.md(r"""## create adjusted data list polars""") - return +def _( + breaks, + create_adjusted_data, + fixed_time_horizons, + heuristics_sets, + list_data_to_adjust_polars, + stratified_by, +): + adjusted_data = create_adjusted_data( + list_data_to_adjust_polars, + heuristics_sets=heuristics_sets, + fixed_time_horizons=fixed_time_horizons, + breaks=breaks, + stratified_by=stratified_by, + # stratified_by=["probability_threshold", "ppcr"] + risk_set_scope=["pooled_by_cutoff", "within_stratum"], + ) + + adjusted_data + return (adjusted_data,) @app.cell -def _(list_data_to_adjust_polars, pl): - example_polars_df = list_data_to_adjust_polars.get("full").select( - pl.col("strata"), pl.col("reals"), pl.col("times") +def _(adjusted_data, aj_data_combinations, cast_and_join_adjusted_data): + final_adjusted_data_polars = cast_and_join_adjusted_data( + aj_data_combinations, adjusted_data ) - example_polars_df - return (example_polars_df,) + final_adjusted_data_polars + return (final_adjusted_data_polars,) @app.cell -def _(mo): - mo.md(r"""## Create AJ estimates Data""") +def _(final_adjusted_data_polars): + final_adjusted_data_polars return @app.cell -def _(mo): - mo.md(r"""## Create aj_data""") - return +def _(final_adjusted_data_polars): + from rtichoke.helpers.sandbox_observable_helpers import ( + _calculate_cumulative_aj_data, + ) + + cumulative_aj_data = _calculate_cumulative_aj_data(final_adjusted_data_polars) + + cumulative_aj_data + return (cumulative_aj_data,) @app.cell -def _(create_aj_data_polars, example_polars_df, fixed_time_horizons, pl): - aj_estimates_per_strata_adj_adjneg = create_aj_data_polars( - example_polars_df, "adjusted", "adjusted_as_negative", fixed_time_horizons +def _(cumulative_aj_data): + from rtichoke.helpers.sandbox_observable_helpers import ( + _turn_cumulative_aj_to_performance_data, ) - aj_estimates_per_strata_excl_adjneg = create_aj_data_polars( - example_polars_df, "excluded", "adjusted_as_negative", fixed_time_horizons + performance_data = _turn_cumulative_aj_to_performance_data(cumulative_aj_data) + + performance_data + return + + +@app.cell(column=1, hide_code=True) +def _(mo): + reference_group_radio = mo.ui.radio( + options=["full", "thin", "aft"], value="full", label="Model" ) - aj_estimates_per_strata_adj_adjcens = create_aj_data_polars( - example_polars_df, "adjusted", "adjusted_as_censored", fixed_time_horizons + reference_group_radio + return (reference_group_radio,) + + +@app.cell(hide_code=True) +def _(mo): + fill_color_radio = mo.ui.radio( + options=["classification_outcome", "reals_labels"], + value="classification_outcome", + label="Fill Colors", ) - aj_estimates_per_strata_excl_adjcens = create_aj_data_polars( - example_polars_df, "excluded", "adjusted_as_censored", fixed_time_horizons + fill_color_radio + return (fill_color_radio,) + + +@app.cell(hide_code=True) +def _(mo): + risk_set_scope_radio = mo.ui.radio( + options=["pooled_by_cutoff", "within_stratum"], + value="pooled_by_cutoff", + label="Risk Set Scope", ) - aj_estimates_per_strata_adj_excl = create_aj_data_polars( - example_polars_df, "adjusted", "excluded", fixed_time_horizons + risk_set_scope_radio + return (risk_set_scope_radio,) + + +@app.cell(hide_code=True) +def _(mo): + stratified_by_radio = mo.ui.radio( + options=["probability_threshold", "ppcr"], + value="probability_threshold", + label="Stratified By", ) - aj_estimates_per_strata_excl_excl = create_aj_data_polars( - example_polars_df, "excluded", "excluded", fixed_time_horizons + stratified_by_radio + return (stratified_by_radio,) + + +@app.cell(hide_code=True) +def _(by, mo): + slider_cutoff = mo.ui.slider(start=0, stop=1, step=by, label="Cutoff") + slider_cutoff + return (slider_cutoff,) + + +@app.cell(hide_code=True) +def _(mo): + fixed_time_horizons_slider = mo.ui.slider( + start=1, stop=5, step=2, label="Fixed Time Horizon" ) + fixed_time_horizons_slider + return (fixed_time_horizons_slider,) + - aj_estimates_data = pl.concat( - [ - aj_estimates_per_strata_adj_adjneg, - aj_estimates_per_strata_adj_adjcens, - aj_estimates_per_strata_adj_excl, - aj_estimates_per_strata_excl_adjneg, - aj_estimates_per_strata_excl_adjcens, - aj_estimates_per_strata_excl_excl, - ] - ).unpivot( - index=[ - "strata", - "fixed_time_horizon", - "censoring_assumption", - "competing_assumption", +@app.cell(hide_code=True) +def _(mo): + competing_heuristic_radio = mo.ui.radio( + options=[ + "adjusted_as_negative", + "adjusted_as_censored", + "adjusted_as_composite", + "excluded", ], - variable_name="reals_labels", - value_name="reals_estimate", + value="adjusted_as_negative", + label="Censoring Heuristic", ) - return (aj_estimates_data,) + competing_heuristic_radio + return (competing_heuristic_radio,) -@app.cell -def _(aj_estimates_data): - aj_estimates_data + +@app.cell(hide_code=True) +def _(mo): + censoring_heuristic_radio = mo.ui.radio( + options=["adjusted", "excluded"], + value="adjusted", + label="Censoring Heuristic", + ) + + censoring_heuristic_radio + return (censoring_heuristic_radio,) + + +@app.cell(column=2, hide_code=True) +def _( + by, + censoring_heuristic_radio, + competing_heuristic_radio, + fill_color_radio, + final_adjusted_data_polars, + fixed_time_horizons_slider, + pl, + px, + reference_group_radio, + risk_set_scope_radio, + slider_cutoff, + stratified_by_radio, +): + chosen_cutoff_data = final_adjusted_data_polars.filter( + pl.col("chosen_cutoff") == slider_cutoff.value, + pl.col("fixed_time_horizon") == fixed_time_horizons_slider.value, + pl.col("reference_group") == reference_group_radio.value, + pl.col("risk_set_scope") == risk_set_scope_radio.value, + pl.col("stratified_by") == stratified_by_radio.value, + pl.col("censoring_heuristic") == censoring_heuristic_radio.value, + pl.col("competing_heuristic") == competing_heuristic_radio.value, + ).sort(pl.col("strata")) + + color_discrete_map = { + "real_positives": "#4C5454", + "real_competing": "#C880B7", + "real_negatives": "#E0E0E0", + "real_censored": "#E3F09B", + "true_negatives": "#009e73", + "true_positives": "#009e73", + "false_negatives": "#FAC8CD", + "false_positives": "#FAC8CD", + } + + fig_new = px.bar( + chosen_cutoff_data, + x="mid_point", + y="reals_estimate", + color=fill_color_radio.value, + color_discrete_map=color_discrete_map, + # color="reals_labels", + # color_discrete_map=color_discrete_map, + category_orders={ + "reals_labels": list(color_discrete_map.keys()) + }, # fixes domain order + hover_data=chosen_cutoff_data.columns, # like tip: true + ) + + fig_new.update_layout( + barmode="stack", # stacked bars (use "group" for side-by-side) + plot_bgcolor="rgba(0,0,0,0)", # transparent background + paper_bgcolor="rgba(0,0,0,0)", + legend=dict(title=""), + ) + + if stratified_by_radio.value == "probability_threshold": + vertical_line = slider_cutoff.value + else: + vertical_line = 1 - slider_cutoff.value + by / 2 + + fig_new.add_vline( + x=vertical_line, + line=dict(color="red", width=2, dash="dash"), + annotation_text=f"Cutoff: {slider_cutoff.value}", + annotation_position="top right", + ) + + # fig_new return diff --git a/tests/test_rtichoke.py b/tests/test_rtichoke.py index 6529e02..0ff1b91 100644 --- a/tests/test_rtichoke.py +++ b/tests/test_rtichoke.py @@ -3,16 +3,12 @@ """ from rtichoke.helpers.sandbox_observable_helpers import ( - create_aj_data, extract_aj_estimate_for_strata, - assign_and_explode_polars, - _aj_adjusted_events, ) # from rtichoke import rtichoke import polars as pl from polars.testing import assert_frame_equal -import pytest TIMES = [24.1, 9.7, 49.9, 18.6, 34.8, 14.2, 39.2, 46.0, 4.3, 31.5] REALS = [1, 1, 1, 1, 0, 2, 1, 2, 0, 1] @@ -47,133 +43,6 @@ def _expected( ) -@pytest.mark.parametrize( - "censoring_assumption, competing_assumption, expected", - [ - ( - "adjusted", - "adjusted_as_negative", - _expected( - [4.0, 4.0, 8 / 3], - [0.0, 0.0, 4 / 3], - [1.0, 1.0, 1.0], - [0.0, 0.0, 0.0], - "adjusted", - "adjusted_as_negative", - ), - ), - ( - "excluded", - "adjusted_as_negative", - _expected( - [4.0, 3.0, 2.0], - [0.0, 0.0, 1.0], - [1.0, 1.0, 1.0], - [0.0, 1.0, 1.0], - "excluded", - "adjusted_as_negative", - ), - ), - ( - "adjusted", - "adjusted_as_censored", - _expected( - [5.0, 5.0, 10 / 3], - [0.0, 0.0, 5 / 3], - [0.0, 0.0, 0.0], - [0.0, 0.0, 0.0], - "adjusted", - "adjusted_as_censored", - ), - ), - ( - "excluded", - "adjusted_as_censored", - _expected( - [5.0, 4.0, 8 / 3], - [0.0, 0.0, 4 / 3], - [0.0, 0.0, 0.0], - [0.0, 1.0, 1.0], - "excluded", - "adjusted_as_censored", - ), - ), - ( - "adjusted", - "adjusted_as_composite", - _expected( - [4.0, 4.0, 8 / 3], - [1.0, 1.0, 7 / 3], - [0.0, 0.0, 0.0], - [0.0, 0.0, 0.0], - "adjusted", - "adjusted_as_composite", - ), - ), - ( - "excluded", - "adjusted_as_composite", - _expected( - [4.0, 3.0, 2.0], - [1.0, 1.0, 2.0], - [0.0, 0.0, 0.0], - [0.0, 1.0, 1.0], - "excluded", - "adjusted_as_composite", - ), - ), - ( - "adjusted", - "excluded", - _expected( - [4.0, 4.0, 8 / 3], - [0.0, 0.0, 4 / 3], - [1.0, 1.0, 1.0], - [0.0, 0.0, 0.0], - "adjusted", - "excluded", - ), - ), - ( - "excluded", - "excluded", - _expected( - [4.0, 3.0, 2.0], - [0.0, 0.0, 1.0], - [1.0, 1.0, 1.0], - [0.0, 1.0, 1.0], - "excluded", - "excluded", - ), - ), - ], -) -def test_create_aj_data( - censoring_assumption: str, - competing_assumption: str, - expected: pl.DataFrame, -) -> None: - df = pl.DataFrame( - { - "strata": ["group1"] * 5, - "reals": [0, 1, 2, 1, 0], - "times": [5.0, 3.0, 1.0, 4.0, 2.0], - } - ) - horizons = [1.0, 2.0, 3.0] - breaks = [0.0, 0.5, 1.0] - - result = create_aj_data( - df, - breaks=breaks, - censoring_heuristic=censoring_assumption, - competing_heuristic=competing_assumption, - fixed_time_horizons=horizons, - ).sort("fixed_time_horizon") - - assert_frame_equal(result, expected) - - def test_extract_aj_estimate_for_strata_basic() -> None: df = pl.DataFrame( { @@ -289,51 +158,3 @@ def _expected_aj_df(neg, pos, comp, include_comp=True): cols.append("estimate_origin") return pl.DataFrame(data)[cols] - - -def _expected_excluded_df(censoring, competing): - return pl.DataFrame( - { - "strata": ["group1"] * 3, - "fixed_time_horizon": TIME_HORIZONS, - "real_censored_est": EXCLUDED_EXPECTED[censoring], - "real_competing_est": COMPETING_EXCLUDED[competing], - "times": TIME_HORIZONS, - } - ) - - -@pytest.mark.parametrize( - "censoring, competing", - [ - (c, cc) - for c in ["adjusted", "excluded"] - for cc in [ - "adjusted_as_negative", - "adjusted_as_censored", - "adjusted_as_composite", - "excluded", - ] - ], -) -def test_aj_adjusted_events(censoring: str, competing: str) -> None: - df = pl.DataFrame( - {"strata": ["group1"] * len(TIMES), "reals": REALS, "times": TIMES} - ) - exploded = assign_and_explode_polars(df, TIME_HORIZONS) - result = _aj_adjusted_events( - df, - BREAKS, - exploded, - censoring, - competing, - TIME_HORIZONS, - full_event_table=False, - ).sort("fixed_time_horizon") - - neg = [v[0] for v in AJ_EXPECTED[(censoring, competing)]] - pos = [v[1] for v in AJ_EXPECTED[(censoring, competing)]] - comp_vals = [v[2] for v in AJ_EXPECTED[(censoring, competing)]] - include_comp = competing != "excluded" - expected = _expected_aj_df(neg, pos, comp_vals, include_comp) - assert_frame_equal(result, expected) From c36c4c2ccaba39191a573fe33433fdafa29d2e5d Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Mon, 3 Nov 2025 08:06:22 +0200 Subject: [PATCH 47/51] docs: dirty hacks in order to make the website work --- docs/_quarto.yml | 2 -- docs/before_we_validate.qmd | 35 +++++++++++++++++++---------------- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/docs/_quarto.yml b/docs/_quarto.yml index 31b406d..2b5849b 100644 --- a/docs/_quarto.yml +++ b/docs/_quarto.yml @@ -10,8 +10,6 @@ website: left: - href: reference.qmd text: Reference - - href: walkthrough_aj_estimate.qmd - text: Example - href: before_we_validate.qmd text: Before we Validate diff --git a/docs/before_we_validate.qmd b/docs/before_we_validate.qmd index ccfea6b..4ab5e49 100644 --- a/docs/before_we_validate.qmd +++ b/docs/before_we_validate.qmd @@ -4,6 +4,7 @@ author: "Uriah Finkel" format: html: toc: true +mermaid-format: svg --- Ideally we would like to keep Performance Validation as agnostic as possible. However, the structure of the validation set (`probs`, `reals` and `times`) implies the nature of the related assumptions and the required use case. @@ -634,6 +635,7 @@ def _long_adjusted_for_combo(reference_group_data: pl.DataFrame, exploded: pl.Da co, fixed_time_horizons, True, + ["probability_threshold", "ppcr"] ) .select( pl.col("fixed_time_horizon"), @@ -656,25 +658,25 @@ def _long_adjusted_for_combo(reference_group_data: pl.DataFrame, exploded: pl.Da ]) ) -adjusted_frames = [ - _long_adjusted_for_combo(reference_group_data, exploded, ca, co, fixed_time_horizons) - for ca in censoring_assumptions - for co in competing_assumptions -] +# adjusted_frames = [ +# _long_adjusted_for_combo(reference_group_data, exploded, ca, co, fixed_time_horizons) +# for ca in censoring_assumptions +# for co in competing_assumptions +# ] -excluded_frames = [ - _long_excluded_for_combo(event_table, fixed_time_horizons, ca, co) - for ca in censoring_assumptions - for co in competing_assumptions -] +# excluded_frames = [ +# _long_excluded_for_combo(event_table, fixed_time_horizons, ca, co) +# for ca in censoring_assumptions +# for co in competing_assumptions +# ] -long_excluded_df = pl.concat(excluded_frames, how="vertical", rechunk=True).to_dicts() +# long_excluded_df = pl.concat(excluded_frames, how="vertical", rechunk=True).to_dicts() -long_adjusted_df = pl.concat(adjusted_frames, how="vertical", rechunk=True).to_dicts() +# long_adjusted_df = pl.concat(adjusted_frames, how="vertical", rechunk=True).to_dicts() -ojs_define( - long_excluded_df = long_excluded_df, - long_adjusted_df = long_adjusted_df) +# ojs_define( +# long_excluded_df = long_excluded_df, +# long_adjusted_df = long_adjusted_df) ``` @@ -891,4 +893,5 @@ Plot.plot({ ``` -::: \ No newline at end of file +::: + From d3358d3bd582686c3282f7bb3d399fe1436cfe3f Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Mon, 3 Nov 2025 08:19:56 +0200 Subject: [PATCH 48/51] build: try to run actions only with python 3.10 --- .github/workflows/python-package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 94e9257..0f6c886 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.10"] + python-version: ["3.10"] permissions: id-token: write contents: write From 492edee7ed2e38178767424343a705a6912bdbe3 Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Mon, 3 Nov 2025 08:50:40 +0200 Subject: [PATCH 49/51] build: remove unecessary qmd files --- docs/aj_estimate_summary_report.qmd | 52 ----------------------------- docs/render_summary.qmd | 42 ----------------------- 2 files changed, 94 deletions(-) delete mode 100644 docs/aj_estimate_summary_report.qmd delete mode 100644 docs/render_summary.qmd diff --git a/docs/aj_estimate_summary_report.qmd b/docs/aj_estimate_summary_report.qmd deleted file mode 100644 index 69a74f1..0000000 --- a/docs/aj_estimate_summary_report.qmd +++ /dev/null @@ -1,52 +0,0 @@ ---- -title: "Summary Report" -format: html -echo: false -message: false -warning: false ---- - -```{python} -#| tags: [parameters] - -alpha = 0.1 -ratio = 0.1 - - -``` - -```{python} - -print(f"alpha: {alpha}") -print(f"ratio: {ratio}") -``` - - -```{python} -from lifelines import AalenJohansenFitter -import numpy as np -from itertools import product -import itertools -from rtichoke.helpers.sandbox_observable_helpers import * -from lifelines import CoxPHFitter -from lifelines import WeibullAFTFitter -import polars as pl -print("Polars version:", pl.__version__) - -import pandas as pd -import pickle - -print(2+2) - -# with open(r'C:\Users\I\Documents\GitHub\rtichoke_python\probs_dict.pkl', 'rb') as file: -# probs_dict = pickle.load(file) - -# with open(r'C:\Users\I\Documents\GitHub\rtichoke_python\reals_dict.pkl', 'rb') as file: -# reals_dict = pickle.load(file) - -# with open(r'C:\Users\I\Documents\GitHub\rtichoke_python\times_dict.pkl', 'rb') as file: -# times_dict = pickle.load(file) - - -``` - diff --git a/docs/render_summary.qmd b/docs/render_summary.qmd deleted file mode 100644 index 4e2be27..0000000 --- a/docs/render_summary.qmd +++ /dev/null @@ -1,42 +0,0 @@ ---- -title: "Summary Report" -format: html -author: "Your Name" -date: "`r Sys.Date()`" ---- - -# Introduction - -This is a summary report generated with Quarto. - - -```{python} - -# import pickle - - -# with open(r'C:\Users\I\Documents\GitHub\rtichoke_python\probs_dict.pkl', 'rb') as file: -# probs_dict = pickle.load(file) - -# with open(r'C:\Users\I\Documents\GitHub\rtichoke_python\reals_dict.pkl', 'rb') as file: -# reals_dict = pickle.load(file) - -# with open(r'C:\Users\I\Documents\GitHub\rtichoke_python\times_dict.pkl', 'rb') as file: -# times_dict = pickle.load(file) - -from rtichoke.summary_report.summary_report import * - -# create_data_for_summary_report() - -``` - -```{python} -from rtichoke.summary_report.summary_report import * - -# render_summary_report( ) -``` - - -## Section 2 - -Add more content or code here. From 248cff64350722d0b717f7bb4a2f57487be22cf3 Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Mon, 3 Nov 2025 08:56:18 +0200 Subject: [PATCH 50/51] try to update _quarto.yml in order to avoid loop --- docs/_quarto.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/_quarto.yml b/docs/_quarto.yml index 2b5849b..57c7443 100644 --- a/docs/_quarto.yml +++ b/docs/_quarto.yml @@ -8,10 +8,8 @@ website: title: "rtichoke" navbar: left: - - href: reference.qmd + - href: reference/ text: Reference - - href: before_we_validate.qmd - text: Before we Validate quartodoc: # the name used to import the package you want to create reference docs for From 7a700aa3b836b27c3826947f6e5124592ec43617 Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Mon, 3 Nov 2025 09:03:26 +0200 Subject: [PATCH 51/51] build: remove qmd files from docs --- docs/before_we_validate.qmd | 897 ------------------------------- docs/walkthrough_aj_estimate.qmd | 316 ----------- 2 files changed, 1213 deletions(-) delete mode 100644 docs/before_we_validate.qmd delete mode 100644 docs/walkthrough_aj_estimate.qmd diff --git a/docs/before_we_validate.qmd b/docs/before_we_validate.qmd deleted file mode 100644 index 4ab5e49..0000000 --- a/docs/before_we_validate.qmd +++ /dev/null @@ -1,897 +0,0 @@ ---- -title: "Before we Validate Performance" -author: "Uriah Finkel" -format: - html: - toc: true -mermaid-format: svg ---- - -Ideally we would like to keep Performance Validation as agnostic as possible. However, the structure of the validation set (`probs`, `reals` and `times`) implies the nature of the related assumptions and the required use case. - -So before we validate performance, let us consider the underlying process. - -โœ๏ธ The User Inputs\ -๐Ÿช› Internal Function - -# โœ๏ธ Declare reference groups - -The dimentions of the `probs` and the `real` dictionaries imply the nature of the use case: - -TODO: copy from rtichoke r README. - -##### One Model, One Population: - -- Just one reference group: "model". - -##### Several Models, One Population: - -Compare between different candidate models. - Each model stand as a reference groups such as "thin" model, or a "full" model. - -##### Several Models, Several Populations - -Compare performance over different sub-populations. - Internal Validation: "test", "val" and "train". - External Validation: "Framingham", "Australia". - Fairness: "Male", "Female". - -# โœ๏ธ Declare how to stratify predictions โœ‚๏ธ - -The `stratified_by` argument is designed for the user to choose how to stratify predictions for decision-making, each method implies different problem: - -## Probability Threshold - -By choosing Probability Threshold as a cutoff the implied assumption is that you are concerned with individual harm or benefit. - -### Baseline Strategy: Treat None - -```{mermaid} - -graph LR - subgraph trt[Treatment Decision] - linkStyle default stroke:#000 - A("๐Ÿ˜ท") -->|"Treatment ๐Ÿ’Š"|B("Predicted
Positive

๐Ÿ’Š
๐Ÿ˜ท") - A -->|"No Treatment"|C("Predicted
Negative

๐Ÿ˜ท") - end - - subgraph ut[Utility of the Decision] - subgraph pred[Prediction Model] - B -->|"Disease ๐Ÿคข"| D["TP
๐Ÿ’Š
๐Ÿคข"] - B -->|"No Disease ๐Ÿคจ"| E["FP
๐Ÿ’Š
๐Ÿคจ"] - C -->|"Disease ๐Ÿคข"| F["FN
๐Ÿคข"] - C -->|"No Disease ๐Ÿคจ"| G["TN
๐Ÿคจ"] - end - subgraph baselinestrategy[Baseline Strategy: Treat None] - Dnone["FN
๐Ÿคข"] - Enone["TN
๐Ÿคจ"] - Fnone["FN
๐Ÿคข"] - Gnone["TN
๐Ÿคจ"] - - D---Dnone - E---Enone - F---Fnone - G---Gnone - end - subgraph nb[Net Benefit] - Dnb[1] - Enb["pt / (1-pt)"] - Fnb[0] - Gnb[0] - Dnone---Dnb - Enone---Enb - Fnone---Fnb - Gnone---Gnb - end - end - - - - style A fill:#E8F4FF, stroke:black,color:black - style B fill:#E8F4FF, stroke:black,color:black - style C fill:#E8F4FF, stroke:black,color:black - style D fill:#C0FFC0,stroke:black,color:black - style Dnone fill:#FFCCE0,stroke:black,color:black - style Dnb fill: #C0FFC0,stroke:black,color:black - style E fill: #FFCCE0,stroke:black,color:black - style Enone fill: #C0FFC0,stroke:black,color:black - style Enb fill: #FFCCE0,stroke:black,color:black - style F fill:#FFCCE0,stroke:black,color:black - style Fnone fill: #FFCCE0,stroke:black,color:black - style Fnb fill: #E8F4FF,stroke:black,color:black - style G fill: #C0FFC0,stroke:black,color:black - style Gnone fill: #C0FFC0,stroke:black,color:black - style Gnb fill: #E8F4FF,stroke:black,color:black - style nb fill: #E8F4FF,stroke:black,color:black - style pred fill: #E8F4FF,stroke:black,color:black - style baselinestrategy fill: #E8F4FF,stroke:black,color:black - - classDef subgraphStyle fill:#FAF6EC,stroke:#333,stroke-width:1px - class trt,ut subgraphStyle - -``` - -### Baseline Strategy: Treat All - -```{mermaid} - -graph LR - subgraph trt[Treatment Decision] - linkStyle default stroke:#000 - A("๐Ÿ˜ท") -->|"Treatment ๐Ÿ’Š"|B("Predicted
Positive

๐Ÿ’Š
๐Ÿ˜ท") - A -->|"No Treatment"|C("Predicted
Negative

๐Ÿ˜ท") - end - - subgraph ut[Utility of the Decision] - subgraph pred[Prediction Model] - B -->|"Disease ๐Ÿคข"| D["TP
๐Ÿ’Š
๐Ÿคข"] - B -->|"No Disease ๐Ÿคจ"| E["FP
๐Ÿ’Š
๐Ÿคจ"] - C -->|"Disease ๐Ÿคข"| F["FN
๐Ÿคข"] - C -->|"No Disease ๐Ÿคจ"| G["TN
๐Ÿคจ"] - end - subgraph baselinestrategy[Baseline Strategy: Treat All] - Dall["TP
๐Ÿ’Š
๐Ÿคข"] - Eall["FP
๐Ÿ’Š
๐Ÿคจ"] - Fall["TP
๐Ÿ’Š
๐Ÿคข"] - Gall["FP
๐Ÿ’Š
๐Ÿคจ"] - - D---Dall - E---Eall - F---Fall - G---Gall - end - subgraph nb[Net Benefit] - Dnb[0] - Enb[0] - Fnb[1] - Gnb["(1-pt) / pt"] - Dall---Dnb - Eall---Enb - Fall---Fnb - Gall---Gnb - end - end - - - - style A fill:#E8F4FF, stroke:black,color:black - style B fill:#E8F4FF, stroke:black,color:black - style C fill:#E8F4FF, stroke:black,color:black - style D fill:#C0FFC0,stroke:black,color:black - style Dall fill:#C0FFC0,stroke:black,color:black - style Dnb fill:#E8F4FF,stroke:black,color:black - style E fill:#FFCCE0,stroke:black,color:black - style Eall fill:#FFCCE0,stroke:black,color:black - style Enb fill:#E8F4FF,stroke:black,color:black - style F fill:#FFCCE0,stroke:black,color:black - style Fall fill:#C0FFC0,stroke:black,color:black - style Fnb fill:#C0FFC0,stroke:black,color:black - style G fill:#C0FFC0,stroke:black,color:black - style Gall fill:#FFCCE0,stroke:black,color:black - style Gnb fill:#FFCCE0,stroke:black,color:black - style nb fill: #E8F4FF,stroke:black,color:black - style pred fill: #E8F4FF,stroke:black,color:black - style baselinestrategy fill: #E8F4FF,stroke:black,color:black - - classDef subgraphStyle fill:#FAF6EC,stroke:#333,stroke-width:1px - class trt,ut subgraphStyle - -``` - -*Regardless* of ranking each prediction is categorised to a bin: 0.32 -\> `[0.3, 0.4)`. - -1. Categorise Absolute Risk: 0.32 -\> `[0.3, 0.4)` - -References: Pauker SG, Kassirer JP. Therapeutic decision making: a cost-benefit analysis. N Engl J Med. 1975;293(5):229-234. doi:10.1056/NEJM197507312930505 - -## PPCR - -![](line_ppcr_04.svg) - -```{mermaid} - -graph LR - subgraph trt[Treatment Allocation Decision] - linkStyle default stroke:#000 - A("๐Ÿ˜ท
๐Ÿ˜ท
๐Ÿ˜ท
๐Ÿ˜ท
๐Ÿ˜ท
๐Ÿ˜ท
๐Ÿ˜ท
๐Ÿ˜ท
๐Ÿ˜ท
๐Ÿ˜ท") -->|"Treatment ๐Ÿ’Š๐Ÿ’Š๐Ÿ’Š๐Ÿ’Š"|B("ฮฃ Predicted
Positives

๐Ÿ’Š๐Ÿ’Š๐Ÿ’Š๐Ÿ’Š
๐Ÿ˜ท๐Ÿ˜ท๐Ÿ˜ท๐Ÿ˜ท") - A -->|"No Treatment"|C("ฮฃ Predicted
Negatives

๐Ÿ˜ท๐Ÿ˜ท๐Ÿ˜ท๐Ÿ˜ท๐Ÿ˜ท๐Ÿ˜ท") - end - - subgraph ut[Utility of the Decision] - B -->|"Disease ๐Ÿคข๐Ÿคข๐Ÿคข"| D["ฮฃ TP
๐Ÿ’Š๐Ÿ’Š๐Ÿ’Š
๐Ÿคข๐Ÿคข๐Ÿคข"] - B -->|"No Disease ๐Ÿคจ"| E["ฮฃ FP
๐Ÿ’Š
๐Ÿคจ"] - C -->|"Disease ๐Ÿคข"| F["ฮฃ FN
๐Ÿคข"] - C -->|"No Disease ๐Ÿคจ๐Ÿคจ๐Ÿคจ๐Ÿคจ๐Ÿคจ"| G["ฮฃ TN
๐Ÿคจ๐Ÿคจ๐Ÿคจ๐Ÿคจ๐Ÿคจ"] - end - - - - style A fill:#E8F4FF, stroke:black,color:black - style B fill:#E8F4FF, stroke:black,color:black - style C fill:#E8F4FF, stroke:black,color:black - style D fill:#C0FFC0,stroke:black,color:black - style E fill:#FFCCE0,stroke:black,color:black - style F fill:#FFCCE0,stroke:black,color:black - style G fill:#C0FFC0,stroke:black,color:black - - classDef subgraphStyle fill:#FAF6EC,stroke:#333,stroke-width:1px - class trt,ut subgraphStyle - -``` - -By choosing PPCR as a cutoff the implied assumption is that you are concerned with resource constraint and assume no individual treatment harm. - -*Regarding* the ranking each prediction is categorised to a bin: if the absolute probability 0.32 is the 18th highest predictions out of 100, it will be categorised to the second decile -\> `0.18`. - -1. Calculate Risk-Quantile from Absolute Risk: 0.32 -\> `0.18` - -References: https://en.wikipedia.org/wiki/Precision_and_recall - - - -# โœ๏ธ Declare Fixed Time Horizons ๐ŸŒ… (๐Ÿ“…๐Ÿคฌ) - -The `fixed_time_horizons` argument is designed for the user to choose the set of time horizons to follow. - -Different followups contain different distributions of observed outcomes: Declare fixed time horizons for the prediction model, such as \[5, 10\] years of prediction for CVD evet. - -## ๐Ÿช› Update Administrative Censorng - -For cases with observed time-to-event is shorter than the prediction time horizon, the outcomes might change: - -- `Real Positives` ๐Ÿคข should be considered as `Real Negatives` ๐Ÿคจ, the outcome of interest did not happen yet. - -- Always included and Encoded as 0. - -- `Real Neagtives` ๐Ÿคจ should be considered as `Real Censored` ๐Ÿคฌ, the event of interest could have happened in the gap between the observed time and the fixed time horizon. - -- If adjusted: encoded as 0. - -- If excluded: counted with crude estimate. - -```{python} -#| echo: false - -import numpy as np - -times = np.array([24.1, 9.7, 49.9, 18.6, 34.8, 14.2, 39.2, 46.0, 31.5, 4.3]) -reals = np.array([1, 1, 1, 1, 0, 2, 1, 2, 0, 1]) -time_horizons = [10, 20, 30, 40, 50] - -# Icons -def get_icon(outcome, t, h): - if outcome == 0: - return "๐Ÿคฌ" if t < h else "๐Ÿคจ" - elif outcome == 1: - return "๐Ÿคข" - elif outcome == 2: - return "๐Ÿ’€" - -# Displayed time -def get_time(outcome, t, h): - if outcome == 0: - return t if t < h else h - else: - return t - -# Final output -final_data = [] - -for i in range(len(times)): - id_ = i + 1 - t = times[i] - r = reals[i] - - for h in time_horizons: - outcome = r if t <= h else 0 # override outcome after horizon - final_data.append({ - "id": id_, - "time_horizon": h, - "time": get_time(outcome, t, h), - "real": get_icon(outcome, t, h) - }) - -ojs_define(data = final_data) - -``` - -```{ojs} -//| echo: false - -filteredData = data.filter((d) => d.time_horizon == timeHorizon) - -viewof timeHorizon = Inputs.range([10, 50], { - step: 20, - value: 50, - label: "Time Horizon" -}) - -Plot.plot({ - x: { - domain: [0, 50] - }, - y: { - domain: [0, 11], - axis: false - }, - marks: [ - Plot.ruleX([timeHorizon], { - stroke: "#D9E8A3", - strokeWidth: 6, - strokeDasharray: "5,5", - y1: 0, - y2: 10 // Should match the y-domain max - }), - Plot.ruleY(filteredData, { - x: "time", - y: "id", - strokeWidth: 1.5 - }), - Plot.text(filteredData, { - x: "time", - y: "id", - text: "real", - tip: true, - fontSize: 30 - }) - ] -}) - -``` - -# Declare Heuristics Regarding ambigious `reals` - -## โœ๏ธ Declare Heuristics Regarding Censored Events ๐Ÿ“…๐Ÿคฌ - -```{mermaid} - -graph LR - S0["Non Event
0 ๐Ÿคจ / ๐Ÿคฌ"] -->|"?"|S1["Primary Event
1 ๐Ÿคข"] - S0-->|"?"|S2["Competing Event
2 ๐Ÿ’€"] - - - classDef nonEvent fill:#E0E0E0,stroke:#333,stroke-width:1px,color:black - classDef primaryEvent fill:#808080,stroke:#333,stroke-width:1px,color:white - classDef competingEvent fill:#9DB4C0,stroke:#333,stroke-width:1px,color:black - - class S0 nonEvent - class S1 primaryEvent - class S2 competingEvent - class S3 censoredEvent - - linkStyle 0 stroke:#333,background:yellow - -``` - -The `censored_heuristic` argument is designed for the user to choose how interpret censored events. - -Performance Validation in the face of censored observations require assumptions regarding the unobserved followup. - -TODO: add link to nan-van-geloven article - -### Exclude Censored Events - -```{mermaid} - -graph LR - S0["Non Event
0 ๐Ÿคจ"] -->S1["Primary Event
1 ๐Ÿคข"] - S0-->S2["Competing Event
2 ๐Ÿ’€"] - - S3["Censored
0 ๐Ÿคฌ"] - - - classDef nonEvent fill:#E0E0E0,stroke:#333,stroke-width:1px,color:black - classDef primaryEvent fill:#808080,stroke:#333,stroke-width:1px,color:white - classDef censoredEvent fill:#E3F09B,stroke:#333,stroke-width:1px,color:black - classDef competingEvent fill:#9DB4C0,stroke:#333,stroke-width:1px,color:black - - class S0 nonEvent - class S1 primaryEvent - class S2 competingEvent - class S3 censoredEvent - - linkStyle 0 stroke:#333,background:yellow - -``` - -All censored events to be excluded. - -Underlying Assumption: Small amount of censored events. Violation of the assumption leads to: Overestimation of the observed outcomes. - -### Adjust Censored as partially seen Non-Event - -Observed outcomes for each strata are estimated using the AJ-estimate (equivalent to CIF and KM): Each censored observation is assumed to be similar to the ones who weren't censored. - -TODO: Link to article - -Underlying Assumption: Independent Censoring. Violation of the assumption leads to: Biased estimate for observed outcomes. - -# โœ๏ธ Declare Heuristics Regarding Competing Events ๐Ÿ“…๐Ÿ’€ - -The `competing_heuristic` argument is designed for the user to choose how interpret censored events. - -Performance Validation in the face of competing observations require assumptions regarding the unobserved followup. - -TODO: add link to nan-van-geloven article - -### Exclude Competing Events - -```{mermaid} - -graph LR - subgraph adj[Adjusted for Censoring] - S0["Non Event
0 ๐Ÿคจ / ๐Ÿคฌ"] -->S1["Primary Event
1 ๐Ÿคข"] - end - S0 -->S2["Competing Event
2 ๐Ÿ’€"] - - - classDef nonEvent fill:#E0E0E0,stroke:#333,stroke-width:1px,color:black - classDef primaryEvent fill:#808080,stroke:#333,stroke-width:1px,color:white - classDef competingEvent fill:#9DB4C0,stroke:#333,stroke-width:1px,color:black - - class S0 nonEvent - class S1 primaryEvent - class S2 competingEvent - - linkStyle 0 stroke:#333 - - style adj fill:#E3F09B,color:black - - -``` - -All competing events to be excluded. - -Underlying Assumption: Small amount of competing events. Violation of the assumption leads to: Overestimation of the observed outcomes. A competing event means that the primary event cannot happen. - -### Adjust Competing Events as Censored (Partially seen Negatives) - -Check - -```{mermaid} - -graph LR - subgraph adj[Adjusted for Censoring] - direction LR - S0["Non Event
0 ๐Ÿคจ / ๐Ÿคฌ

Competing Event
2 ๐Ÿ’€"] -->S1["Primary Event
1 ๐Ÿคข"] - end - - - classDef nonEvent fill:#E0E0E0,stroke:#333,stroke-width:1px,color:black - classDef primaryEvent fill:#808080,stroke:#333,stroke-width:1px,color:white - classDef competingEvent fill:#9DB4C0,stroke:#333,stroke-width:1px,color:black - - class S0 nonEvent - class S1 primaryEvent - class S2 competingEvent - - style adj fill:#E3F09B,color:black - - - linkStyle 0 stroke:#333 - -``` - -All competing events to be treated as censored. - -Underlying Assumption: We consider a patient experiencing a competing event equivalent to independent censoring. Violation of the assumption leads to: Overestimation of the observed outcomes. A competing event means that the primary event cannot happen. - -### Adjust Competing Events as Negatives (Definite Non-Events) - -All competing events to be treated as Competing event to the primary event-of-interest. - -In a way, a patient experiencing a competing event is "more" of a "real-negative" than a conventional "real-negative". - -This is derived from the assumed state-covention - -Beyond the horizon time the following transition is possible: `Real Neagtives` ๐Ÿคจ =\> `Real Positives` ๐Ÿคข ๐Ÿ’€ 2 - -```{mermaid} - -graph LR -subgraph adj[Adjusted for Censoring] - direction LR - S0["Non Event
0 ๐Ÿคจ"] -->S1["Primary Event
1 ๐Ÿคข"] - S0 -->S2["Competing Event
2 ๐Ÿ’€"] - end - - - classDef nonEvent fill:#E0E0E0,stroke:#333,stroke-width:1px,color:black - classDef primaryEvent fill:#808080,stroke:#333,stroke-width:1px,color:white - classDef competingEvent fill:#9DB4C0,stroke:#333,stroke-width:1px,color:black - - class S0 nonEvent - class S1 primaryEvent - class S2 competingEvent - - linkStyle 0 stroke:#333 - style adj fill:#E3F09B,color:black - -``` - -Underlying Assumption: We consider a patient experiencing a competing event as a definite non-event. Violation of the assumption leads to Underestimation of the observed outcomes if a competing event can be considered as a different form of the primary event. - -### Adjust Competing Events as Composite (Positives) - -All competing events to be treated as another case of the Primary Event. - -A patient experience a competing-event might be seen as another case of "some-event" - -```{mermaid} - -graph LR -subgraph adj[Adjusted for Censoring] - direction LR - S0["Non Event
0 ๐Ÿคจ"] -->S1["Any Event
1 ๐Ÿคข
Competing Event
2 ๐Ÿ’€"] - end - - - classDef nonEvent fill:#E0E0E0,stroke:#333,stroke-width:1px,color:black - classDef primaryEvent fill:#808080,stroke:#333,stroke-width:1px,color:white - classDef competingEvent fill:#9DB4C0,stroke:#333,stroke-width:1px,color:black - - class S0 nonEvent - class S1 primaryEvent - class S2 competingEvent - - linkStyle 0 stroke:#333 - style adj fill:#E3F09B,color:black - -``` - -There is no need for an underlying assumption, the answer is explicit. - -This heuristic is can be seen as a different underlying question - what is the probability of having any type of event, Primary or Competing? - -# What rtichoke from now on? - -## Render Predictions Histogram - -### Extract AJ Estimate by Assumptions - -For each requried combination of reference_group x predictions_strata x fixed_time_horizons x censored_heuristic x competing_heuristic a separate AJ estimated is calculated for the adjusted `reals` and a Crude estimate is calculated for the excluded `reals`. - -The sum of the AJ estimates for each predictions_strata is equal to the overal AJ estimate. - -```{python} -import polars as pl -from rtichoke.helpers.sandbox_observable_helpers import ( - create_aj_data, - create_list_data_to_adjust, - create_adjusted_data -) - -from polarstate import prepare_event_table, predict_aj_estimates - -times_and_reals = pl.DataFrame({ - "times": times, - "reals": reals -}) - -event_table = prepare_event_table(times_and_reals) - -long_df = predict_aj_estimates( - event_table, - [10.0, 20.0, 30.0, 40.0, 50.0], - True -).select( - pl.col("times"), - pl.col("state_occupancy_probability_0"), - pl.col("state_occupancy_probability_1"), - pl.col("state_occupancy_probability_2"), - pl.col("estimate_origin") -).unpivot( - index = ["times", "estimate_origin"], - variable_name="state", - value_name="aj_estimate" -).to_dicts() - - -ojs_define(long_df = long_df) - -``` - -```{python} -from rtichoke.helpers.sandbox_observable_helpers import ( - assign_and_explode_polars, _extract_excluded_events, _aj_adjusted_events) - - -censoring_assumptions = ["excluded", "adjusted"] -competing_assumptions = [ - "excluded", - "adjusted_as_censored", - "adjusted_as_negative", - "adjusted_as_composite", -] - -censoring_assumption = "adjusted" #["excluded", "adjusted"] -competing_assumption = "adjusted_as_composite" -fixed_time_horizons = [10.0, 20.0, 30.0, 40.0, 50.0] - -reference_group_data = pl.DataFrame( - { - "strata": ["model"] * 10, - "reals": reals, - "times": times, - } -) - -exploded = assign_and_explode_polars(reference_group_data, fixed_time_horizons) - -def _long_excluded_for_combo(event_table: pl.DataFrame, fixed_time_horizons, ca: str, co: str) -> pl.DataFrame: - return ( - _extract_excluded_events(event_table, fixed_time_horizons, ca, co) - .select("fixed_time_horizon", "real_censored_est", "real_competing_est") - .unpivot(index=["fixed_time_horizon"], variable_name="state", value_name="excluded_count") - .sort("fixed_time_horizon") - .with_columns([ - pl.lit(ca).alias("censoring_assumption"), - pl.lit(co).alias("competing_assumption"), - ]) - ) - -def _long_adjusted_for_combo(reference_group_data: pl.DataFrame, exploded: pl.DataFrame, ca: str, co: str, fixed_time_horizons: list[float]) -> pl.DataFrame: - return ( - _aj_adjusted_events( - reference_group_data, - exploded, - ca, - co, - fixed_time_horizons, - True, - ["probability_threshold", "ppcr"] - ) - .select( - pl.col("fixed_time_horizon"), - # pl.col("real_negatives_est"), - # pl.col("real_positives_est"), - # pl.col("real_competing_est"), - pl.col(r"^real_.*_est$"), - pl.col("estimate_origin"), - pl.col("times") - ).filter(pl.col("times")<=pl.col("fixed_time_horizon")) - .unpivot( - index = ["fixed_time_horizon", "times", "estimate_origin"], - variable_name="state", - value_name="aj_estimate" - ) - .sort(pl.col("fixed_time_horizon")) - .with_columns([ - pl.lit(ca).alias("censoring_assumption"), - pl.lit(co).alias("competing_assumption"), - ]) - ) - -# adjusted_frames = [ -# _long_adjusted_for_combo(reference_group_data, exploded, ca, co, fixed_time_horizons) -# for ca in censoring_assumptions -# for co in competing_assumptions -# ] - -# excluded_frames = [ -# _long_excluded_for_combo(event_table, fixed_time_horizons, ca, co) -# for ca in censoring_assumptions -# for co in competing_assumptions -# ] - -# long_excluded_df = pl.concat(excluded_frames, how="vertical", rechunk=True).to_dicts() - -# long_adjusted_df = pl.concat(adjusted_frames, how="vertical", rechunk=True).to_dicts() - -# ojs_define( -# long_excluded_df = long_excluded_df, -# long_adjusted_df = long_adjusted_df) - -``` - - -```{ojs} -//| echo: false - -viewof censored_heuristics = Inputs.radio( - new Map([ - ["Adjusted", "adjusted"], - ["Excluded", "excluded"] - ]), - { - value: "adjusted", - label: "Censored Heuristic" - } -) - -viewof competing_heuristics = Inputs.radio( - new Map([ - ["Adjusted as Censored", "adjusted_as_censored"], - ["Adjusted as Negative", "adjusted_as_negative"], - ["Adjusted as Composite", "adjusted_as_composite"], - ["Excluded", "excluded"] - ]), - { - value: "adjusted_as_negative", - label: "Competing Heuristic" - } -) - -viewof timeHorizon_aj_estimate = Inputs.range([10, 50], { - step: 10, - value: 50, - label: "Time Horizon" -}) - -censored_heuristics -competing_heuristics - -long_excluded_df_heuristic_filtered = long_excluded_df.filter((d) => d.censoring_assumption == censored_heuristics && d.competing_assumption == competing_heuristics) - -filteredLongAdjustedDf_heuristic = long_adjusted_df.filter( - (d) => d.censoring_assumption == censored_heuristics && d.competing_assumption == competing_heuristics -) - -filteredLongAdjustedDf = filteredLongAdjustedDf_heuristic.filter((d) => d.fixed_time_horizon == timeHorizon_aj_estimate) - -filteredLongExcludedDf = long_excluded_df_heuristic_filtered.filter((d) => d.fixed_time_horizon == timeHorizon_aj_estimate) - -//filteredLongAdjustedDf -//filteredLongExcludedDf - -//filteredLongAdjustedDf.filter(d => d.estimate_origin === "event_table") - -``` - - -::: {.panel-tabset} - -#### Stacked - -```{ojs} -//| echo: false - -Plot.plot({ - style: { background: "transparent" }, - x: { label: "Time", domain: [0, 50] }, - y: { label: "Aalen-Johansen Estimate", stack: true, domain: [0, 10] }, - color: { - legend: true, - domain: ["real_positives_est", "real_competing_est", "real_negatives_est"], - range: ["#4C5454", "#C880B7", "#E0E0E0"] - }, - marks: [ - Plot.areaY( - filteredLongAdjustedDf.filter(d => d.estimate_origin === "event_table"), - { - x: "times", - y: "aj_estimate", - fill: "state", - curve: "step-after", - tip: true - } - ), - - Plot.ruleY([0]) - ] -}) - - - - -Plot.plot({ - height: 220, - style: { background: "transparent" }, - x: { label: "Time", domain: [0, 50] }, // bars extend from x=0 to horizon - y: { label: "Excluded count", domain: [0, 4] }, - color: { - legend: true, - domain: ["real_competing_est", "real_censored_est"], - range: ["#C880B7", "#E3F09B"] - }, - marks: [ - Plot.rectX( - filteredLongExcludedDf, - Plot.stackY({ - x1: 0, // always start from 0 - x2: "fixed_time_horizon", // extend to horizon - y: "excluded_count", // stack along y - fill: "state", - tip: true - }) - ), - Plot.ruleX([0]) // reference at x=0 - ] -}) - -``` - -#### Non-Stacked - -```{ojs} -//| echo: false - -// TODO: Add start point 0 and end-point fixed time horizons - -Plot.plot({ - style: { - background: "transparent" - }, - x: { - label: "Time", - domain: [0, 50] - }, - y: { - label: "Aalen-Johansen Estimate", - stack: false, - domain: [0, 10] - }, - color: { - legend: true, - domain: [ - "real_positives_est", - "real_competing_est", - "real_negatives_est" - ], - range: [ - "#4C5454", - "#C880B7", - "#E0E0E0", - ] - }, - marks: [ - Plot.lineY(filteredLongAdjustedDf.filter(d => d.estimate_origin === "event_table"), { - x: "times", - y: "aj_estimate", - stroke: "state", - curve: "step-after" - }), - Plot.dot(filteredLongAdjustedDf.filter(d => d.estimate_origin === "event_table"), { - x: "times", - y: "aj_estimate", - fill: "state", - r: 2.5, - tip: true - }), - Plot.dot(filteredLongAdjustedDf.filter(d => d.estimate_origin === "fixed_time_horizons"), { - x: "times", - y: "aj_estimate", - fill: "state", - r: 10, - tip: true - }), - Plot.ruleY([0]) - ] -}) - - -Plot.plot({ - height: 120, - style: { background: "transparent" }, - x: { label: "Time", domain: [0, 50] }, - y: { label: "Aalen-Johansen Estimate", - domain: [0, 2] }, - color: { - legend: true, - domain: ["real_competing_est", "real_censored_est"], - range: ["#C880B7", "#E3F09B"] - }, - marks: [ - Plot.dot(filteredLongExcludedDf, { - x: "fixed_time_horizon", - y: "excluded_count", - fill: "state", - r: 10, - fillOpacity: 0.85, - strokeWidth: 1.5, - tip: true - }), - - // horizontal lines from x=0 to x=fixed_time_horizon - Plot.ruleY(filteredLongExcludedDf, { - x1: 0, - x2: "fixed_time_horizon", - y: "excluded_count", - stroke: "state", - strokeOpacity: 0.6 - }), - Plot.ruleY([0]) - ] -}) - -``` - - -::: - diff --git a/docs/walkthrough_aj_estimate.qmd b/docs/walkthrough_aj_estimate.qmd deleted file mode 100644 index 106e596..0000000 --- a/docs/walkthrough_aj_estimate.qmd +++ /dev/null @@ -1,316 +0,0 @@ ---- -title: "Hello, Quarto" -format: html -echo: false -message: false -warning: false ---- - -```{python} -import polars as pl -import pandas as pd -import numpy as np -from lifelines import AalenJohansenFitter, CoxPHFitter, WeibullAFTFitter - -df_time_to_cancer_dx = pd.read_csv( - "https://raw.githubusercontent.com/ddsjoberg/dca-tutorial/main/data/df_time_to_cancer_dx.csv" -) -``` - - -```{python} - -import numpy as np -from itertools import product -import itertools -from rtichoke.helpers.sandbox_observable_helpers import * -import polars as pl -print("Polars version:", pl.__version__) - -import pandas as pd -import pickle - -cph = CoxPHFitter() -thin_model = CoxPHFitter() -aft_model = WeibullAFTFitter() - -cox_formula = "age + famhistory + marker" -thin_formula = "age + marker" -aft_formula = "age + marker" - -cph.fit( - df_time_to_cancer_dx, - duration_col="ttcancer", - event_col="cancer", - formula=cox_formula, -) - -thin_model.fit( - df_time_to_cancer_dx, - duration_col="ttcancer", - event_col="cancer", - formula=thin_formula, -) - -aft_model.fit( - df_time_to_cancer_dx, - duration_col="ttcancer", - event_col="cancer", - formula=aft_formula, -) - - - -cph_pred_vals = (1 - cph.predict_survival_function(df_time_to_cancer_dx[['age', 'famhistory', 'marker']], times=[1.5])).iloc[0, :].values - -thin_pred_vals = (1 - thin_model.predict_survival_function(df_time_to_cancer_dx[['age', 'famhistory', 'marker']], times=[1.5])).iloc[0, :].values - -aft_pred_vals = (1 - aft_model.predict_survival_function(df_time_to_cancer_dx[['age', 'famhistory', 'marker']], times=[1.5])).iloc[0, :].values - -probs_dict = {"full": cph_pred_vals, "thin": thin_pred_vals, "aft": aft_pred_vals} - -reals_mapping = {"censor": 0, "diagnosed with cancer": 1, "dead other causes": 2} - -reals_dict = df_time_to_cancer_dx["cancer_cr"].map(reals_mapping) - -times_dict = df_time_to_cancer_dx["ttcancer"] - -``` - - -## polars - -```{python} - - - -fixed_time_horizons = [1.0, 3.0, 5.0] -stratified_by = ["probability_threshold", "ppcr"] -by=0.1 - -aj_data_combinations = create_aj_data_combinations_polars(list(probs_dict.keys()), fixed_time_horizons, stratified_by, by) - - - -print(aj_data_combinations['strata']) -``` - -# try polars - -## create list data to adjust polars - -```{python} - -from rtichoke.helpers.sandbox_observable_helpers import * - -list_data_to_adjust_polars = create_list_data_to_adjust_polars( - probs_dict, reals_dict, times_dict, stratified_by=stratified_by, by=by -) - - - -``` - - -## create adjusted data list polars - -### New extract aj estimate by assumptions polars - -## Create aj_estimates_data - -```{python} - -fixed_time_horizons = [1.0, 3.0, 5.0] - -assumption_sets = [ - { - "censoring_assumption": "adjusted", - "competing_assumption": "adjusted_as_negative", - }, - { - "censoring_assumption": "excluded", - "competing_assumption": "adjusted_as_negative", - }, - { - "censoring_assumption": "adjusted", - "competing_assumption": "adjusted_as_censored", - }, - { - "censoring_assumption": "excluded", - "competing_assumption": "adjusted_as_censored", - }, - {"censoring_assumption": "adjusted", "competing_assumption": "excluded"}, - {"censoring_assumption": "excluded", "competing_assumption": "excluded"}, -] - -# aj_estimates_data = extract_aj_estimate_by_assumptions( -# example_polars_df, -# assumption_sets=assumption_sets, -# fixed_time_horizons=fixed_time_horizons, -# ) - - -aj_estimates_data = create_adjusted_data( - list_data_to_adjust_polars, - assumption_sets=assumption_sets, - fixed_time_horizons=fixed_time_horizons -) - -``` - - -### Check strata values - -```{python} - -aj_data_combinations.select(pl.col('strata')).with_columns( - pl.col("strata").cast(str) -).join( - aj_estimates_data.select(pl.col('strata')).unique(), - on = 'strata' -) - -result = aj_data_combinations.select(pl.col('strata')).with_columns( - pl.col("strata").cast(str) -).with_columns( - pl.col("strata").is_in(aj_estimates_data["strata"]).alias("is_in_df2") -) - -print(result) - - -result = aj_estimates_data.select(pl.col('strata')).with_columns( - pl.col("strata") -).with_columns( - pl.col("strata").is_in(aj_data_combinations["strata"].cast(str)).alias("is_in_df2") -) - -print(result.filter(pl.col("is_in_df2") == False)) - - -``` - -### Cast varibles with hacks - -```{python} - -final_adjusted_data_polars = cast_and_join_adjusted_data(aj_data_combinations, aj_estimates_data) - -``` - - -```{python} - -import pandas as pd - - -reference_groups = list(probs_dict.keys()) - - -ojs_define(reference_groups_data = reference_groups) - -ojs_define(data = final_adjusted_data_polars.to_pandas()) - -``` - -## Observable stuff - -```{ojs} -//| panel: input - -viewof time_horizon = Inputs.range( - [1, 5], - {value: 3, step: 2, label: "Time Horizon:"} -) - -viewof reference_group = Inputs.radio( - reference_groups_data, {label: "Reference Group"}, {value: 'thin'} -) - -viewof stratified_by = Inputs.radio( - ["probability_threshold", "ppcr"], {value: "probability_threshold", label: "Stratified By"} -) - -viewof censored_assumption = Inputs.radio( - ["excluded", "adjusted"], {value: "excluded", label: "Censored Assumption"} -) - -viewof competing_assumption = Inputs.radio( - ["excluded", "adjusted_as_negative", "adjusted_as_censored"], {value: "excluded", label: "Competing Assumption"} -) - -``` - -```{ojs} - -//cumulative_aj_data_filtered = transpose(cumulative_aj_data).filter(function(subset) { -// -// return time_horizon == subset.fixed_time_horizon && -// censored_assumption == subset.censored_assumption && -// competing_assumption == subset.competing_assumption && -// stratified_by == subset.stratified_by && -// reference_group === subset.reference_group; -//}) - -filtered = transpose(data).filter(function(subset) { - - return time_horizon == subset.fixed_time_horizon && - censored_assumption == subset.censoring_assumption && - competing_assumption == subset.competing_assumption && - stratified_by === subset.stratified_by && - reference_group === subset.reference_group; -}) - -filtered - - -``` - -```{ojs} - - -Plot.plot({ - marks: [ - Plot.barY(filtered, { - x: "strata", - y: "reals_estimate", - fill: "reals_labels", - tip: true - }) - ], - color: { - domain: ["real_positives", "real_competing", "real_negatives", "real_censored"], - range: ["#009e73", "#9DB4C0", "#FAC8CD", "#E3F09B"], - legend: true - }, - style: { - background: "none" - } -}) - -``` - -```{python} - -# combined_adjusted_data.dropna(subset=['reals_estimate']) -# # - -# Perform left join between aj_data_combinations and final_adjusted_data on 'strata' and 'reals_estimate' -# only when stratified_by == 'probability_threshold' for aj_data_combinations - -# aj_data_combinations_prob_threshold = aj_data_combinations[aj_data_combinations['stratified_by'] == 'probability_threshold'] - -# # Convert 'strata' columns to strings -# aj_data_combinations_prob_threshold['strata'] = aj_data_combinations_prob_threshold['strata'].astype(str) -# final_adjusted_data['strata'] = final_adjusted_data['strata'].astype(str) - -# combined_adjusted_data = aj_data_combinations_prob_threshold.merge( -# final_adjusted_data[['strata', 'reals', 'reals_estimate']], -# on=['strata', 'reals'], -# how='left' -# ) - - -# aj_data_combinations_prob_threshold[['strata']] -# final_adjusted_data[['strata']] -```