From c3c553042775635a29d0b781c98cfda6f8477209 Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Thu, 3 Jul 2025 21:20:52 +0300 Subject: [PATCH 1/5] test: fix test_extract_aj_estimate_for_strata_basic --- tests/test_rtichoke.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_rtichoke.py b/tests/test_rtichoke.py index 5e62ac6..2822728 100644 --- a/tests/test_rtichoke.py +++ b/tests/test_rtichoke.py @@ -54,7 +54,7 @@ def test_extract_aj_estimate_for_strata_basic() -> None: { "strata": ["group1"] * 5, "reals": [0, 1, 2, 1, 0], - "times": [5, 3, 1, 4, 2], + "times": [5.0, 3.0, 1.0, 4.0, 2.0], } ) horizons = [1.0, 2.0, 3.0] From aed2ecf7c530c1777bffe78bedb8ca47298cfb39 Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Thu, 3 Jul 2025 23:42:18 +0300 Subject: [PATCH 2/5] close #89 --- .github/workflows/python-package.yml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 9ceb6e5..204feee 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -39,7 +39,15 @@ jobs: - name: Show package version run: grep -r "version" pyproject.toml || grep -r "__version__" rtichoke/ || python -c "import rtichoke; print(rtichoke.__version__)" - + + - name: Set up Quarto + uses: quarto-dev/quarto-actions/setup@v2 + + - name: Set Git identity for Quarto publishing + run: | + git config --global user.name "github-actions[bot]" + git config --global user.email "github-actions[bot]@users.noreply.github.com" + - name: Render and Publish working-directory: docs run: uv run quarto publish gh-pages --no-browser --token "${{ secrets.GITHUB_TOKEN }}" From 79b033d3930252d618939c2c6f59e601964d7960 Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Fri, 4 Jul 2025 14:45:14 +0300 Subject: [PATCH 3/5] docs: close #91 --- docs/walkthrough_aj_estimate.qmd | 340 +++++++++---------------------- 1 file changed, 92 insertions(+), 248 deletions(-) diff --git a/docs/walkthrough_aj_estimate.qmd b/docs/walkthrough_aj_estimate.qmd index ba61d86..106e596 100644 --- a/docs/walkthrough_aj_estimate.qmd +++ b/docs/walkthrough_aj_estimate.qmd @@ -7,28 +7,73 @@ warning: false --- ```{python} -from lifelines import AalenJohansenFitter +import polars as pl +import pandas as pd +import numpy as np +from lifelines import AalenJohansenFitter, CoxPHFitter, WeibullAFTFitter + +df_time_to_cancer_dx = pd.read_csv( + "https://raw.githubusercontent.com/ddsjoberg/dca-tutorial/main/data/df_time_to_cancer_dx.csv" +) +``` + + +```{python} + import numpy as np from itertools import product import itertools from rtichoke.helpers.sandbox_observable_helpers import * -from lifelines import CoxPHFitter -from lifelines import WeibullAFTFitter import polars as pl print("Polars version:", pl.__version__) import pandas as pd import pickle -with open(r'C:\Users\I\Documents\GitHub\rtichoke_python\probs_dict.pkl', 'rb') as file: - probs_dict = pickle.load(file) +cph = CoxPHFitter() +thin_model = CoxPHFitter() +aft_model = WeibullAFTFitter() -with open(r'C:\Users\I\Documents\GitHub\rtichoke_python\reals_dict.pkl', 'rb') as file: - reals_dict = pickle.load(file) +cox_formula = "age + famhistory + marker" +thin_formula = "age + marker" +aft_formula = "age + marker" -with open(r'C:\Users\I\Documents\GitHub\rtichoke_python\times_dict.pkl', 'rb') as file: - times_dict = pickle.load(file) +cph.fit( + df_time_to_cancer_dx, + duration_col="ttcancer", + event_col="cancer", + formula=cox_formula, +) +thin_model.fit( + df_time_to_cancer_dx, + duration_col="ttcancer", + event_col="cancer", + formula=thin_formula, +) + +aft_model.fit( + df_time_to_cancer_dx, + duration_col="ttcancer", + event_col="cancer", + formula=aft_formula, +) + + + +cph_pred_vals = (1 - cph.predict_survival_function(df_time_to_cancer_dx[['age', 'famhistory', 'marker']], times=[1.5])).iloc[0, :].values + +thin_pred_vals = (1 - thin_model.predict_survival_function(df_time_to_cancer_dx[['age', 'famhistory', 'marker']], times=[1.5])).iloc[0, :].values + +aft_pred_vals = (1 - aft_model.predict_survival_function(df_time_to_cancer_dx[['age', 'famhistory', 'marker']], times=[1.5])).iloc[0, :].values + +probs_dict = {"full": cph_pred_vals, "thin": thin_pred_vals, "aft": aft_pred_vals} + +reals_mapping = {"censor": 0, "diagnosed with cancer": 1, "dead other causes": 2} + +reals_dict = df_time_to_cancer_dx["cancer_cr"].map(reals_mapping) + +times_dict = df_time_to_cancer_dx["ttcancer"] ``` @@ -39,7 +84,7 @@ with open(r'C:\Users\I\Documents\GitHub\rtichoke_python\times_dict.pkl', 'rb') a -fixed_time_horizons = [1, 3, 5] +fixed_time_horizons = [1.0, 3.0, 5.0] stratified_by = ["probability_threshold", "ppcr"] by=0.1 @@ -71,222 +116,49 @@ list_data_to_adjust_polars = create_list_data_to_adjust_polars( ### New extract aj estimate by assumptions polars -#### One polars dataframe - -```{python} - -example_polars_df = list_data_to_adjust_polars.get('full').select(pl.col("strata"), pl.col("reals"), pl.col("times")) - -fixed_time_horizons = [1, 3, 5] - - -``` - - ## Create aj_estimates_data -## Create aj_data - -```{python} - -fixed_time_horizons = [2, 4] - -aj_estimates_per_strata_adj_adjneg = create_aj_data(example_polars_df, "adjusted", "adjusted_as_negative", fixed_time_horizons) - -aj_estimates_per_strata_excl_adjneg = create_aj_data(example_polars_df, "excluded", "adjusted_as_negative", fixed_time_horizons) - -aj_estimates_per_strata_adj_adjcens = create_aj_data(example_polars_df, "adjusted", "adjusted_as_censored", fixed_time_horizons) - - -``` - -## AJ estimates per assumptions - ```{python} -# 1 adjusted - adjusted_as_negative - -aj_estimates_per_strata_adj_adjneg = example_polars_df.group_by("strata").map_groups( - lambda group: extract_aj_estimate_for_strata(group, fixed_time_horizons)).join(pl.DataFrame({"real_censored_est": 0.0, "censoring_assumption": "adjusted", "competing_assumption": "adjusted_as_negative"}), how = 'cross') - - - -# 2 excluded - adjusted as negative - -exploded_data = example_polars_df.with_columns(fixed_time_horizon = pl.lit([1,3,5])).explode("fixed_time_horizon") - -aj_estimates_per_strata_censored = exploded_data.filter((pl.col("times") < pl.col("fixed_time_horizon")) & pl.col("reals")==0).group_by(["strata", "fixed_time_horizon"]).count().rename({"count": "real_censored_est"}).with_columns( - pl.col("real_censored_est").cast(pl.Float64) -) - -non_censored_data = exploded_data.filter((pl.col("times") >= pl.col("fixed_time_horizon")) | pl.col("reals")>0) - - -aj_estimates_per_strata_noncensored = pl.concat( - [ - non_censored_data - .filter(pl.col("fixed_time_horizon") == fixed_time_horizon) - .group_by("strata") - .map_groups(lambda group: extract_aj_estimate_for_strata(group, fixed_time_horizon)) - for fixed_time_horizon in fixed_time_horizons - ], - how="vertical" -) - -aj_estimates_per_strata_excl_adjneg = aj_estimates_per_strata_noncensored.join( - aj_estimates_per_strata_censored, - on = ['strata', 'fixed_time_horizon'] -).join(pl.DataFrame({"censoring_assumption": "excluded", "competing_assumption": "adjusted_as_negative"}), how = 'cross') - - -# 3 adjusted - adjusted as censored - - -aj_estimates_per_strata_adj_adjcens = example_polars_df.with_columns([ - pl.when( - (pl.col("reals") ==2) - ).then(pl.lit(0)) - .otherwise(pl.col("reals")) - .alias("reals") - ]).group_by("strata").map_groups( - lambda group: extract_aj_estimate_for_strata(group, fixed_time_horizons)).join(pl.DataFrame({"real_censored_est": 0.0, "censoring_assumption": "adjusted", "competing_assumption": "adjusted_as_censored"}), how = 'cross') - -# 4 excluded - adjusted as censored - -exploded_data = example_polars_df.with_columns(fixed_time_horizon = pl.lit([1,3,5])).explode("fixed_time_horizon") - -aj_estimates_per_strata_censored = exploded_data.filter((pl.col("times") < pl.col("fixed_time_horizon")) & pl.col("reals")==0).group_by(["strata", "fixed_time_horizon"]).count().rename({"count": "real_censored_est"}).with_columns( - pl.col("real_censored_est").cast(pl.Float64) -) - -non_censored_data = exploded_data.filter((pl.col("times") >= pl.col("fixed_time_horizon")) | pl.col("reals")>0).with_columns([ - pl.when( - (pl.col("reals") ==2) - ).then(pl.lit(0)) - .otherwise(pl.col("reals")) - .alias("reals") - ]) - - -aj_estimates_per_strata_noncensored = pl.concat( - [ - non_censored_data - .filter(pl.col("fixed_time_horizon") == fixed_time_horizon) - .group_by("strata") - .map_groups(lambda group: extract_aj_estimate_for_strata(group, fixed_time_horizon)) - for fixed_time_horizon in fixed_time_horizons - ], - how="vertical" -) - -aj_estimates_per_strata_excl_adjcens = aj_estimates_per_strata_noncensored.join( - aj_estimates_per_strata_censored, - on = ['strata', 'fixed_time_horizon'] -).join(pl.DataFrame({"censoring_assumption": "excluded", "competing_assumption": "adjusted_as_negative"}), how = 'cross') - - - -## 5 adjusted - excluded - -exploded_data = example_polars_df.with_columns(fixed_time_horizon = pl.lit([1,3,5])).explode("fixed_time_horizon") - -aj_estimates_per_strata_competing = exploded_data.filter((pl.col("reals")==2) & (pl.col("times") < pl.col("fixed_time_horizon"))).group_by(["strata", "fixed_time_horizon"]).count().rename({"count": "real_competing_est"}).with_columns( - pl.col("real_competing_est").cast(pl.Float64) -) - -non_competing_data = exploded_data.filter((pl.col("times") >= pl.col("fixed_time_horizon")) | pl.col("reals")!=2).with_columns([ - pl.when( - (pl.col("reals") ==2) - ).then(pl.lit(0)) - .otherwise(pl.col("reals")) - .alias("reals") - ]) - - -aj_estimates_per_strata_noncompeting = pl.concat( - [ - non_competing_data - .filter(pl.col("fixed_time_horizon") == fixed_time_horizon) - .group_by("strata") - .map_groups(lambda group: extract_aj_estimate_for_strata(group, fixed_time_horizon)) - for fixed_time_horizon in fixed_time_horizons - ], - how="vertical" -).select(pl.exclude("real_competing_est")) - -aj_estimates_per_strata_adj_excl = aj_estimates_per_strata_competing.join( - aj_estimates_per_strata_noncompeting, - on = ['strata', 'fixed_time_horizon'] -).join(pl.DataFrame({"real_censored_est": 0.0, "censoring_assumption": "adjusted", "competing_assumption": "excluded"}), how = 'cross').select( - ['strata', - 'fixed_time_horizon', - 'real_negatives_est', - 'real_positives_est', - 'real_competing_est', - 'real_censored_est', - 'censoring_assumption', - 'competing_assumption'] -) - - -## 6 excluded - excluded - - -exploded_data = example_polars_df.with_columns(fixed_time_horizon = pl.lit([1,3,5])).explode("fixed_time_horizon") - -aj_estimates_per_strata_censored = exploded_data.filter((pl.col("times") < pl.col("fixed_time_horizon")) & pl.col("reals")==0).group_by(["strata", "fixed_time_horizon"]).count().rename({"count": "real_censored_est"}).with_columns( - pl.col("real_censored_est").cast(pl.Float64) -) - -aj_estimates_per_strata_competing = exploded_data.filter((pl.col("reals")==2) & (pl.col("times") < pl.col("fixed_time_horizon"))).group_by(["strata", "fixed_time_horizon"]).count().rename({"count": "real_competing_est"}).with_columns( - pl.col("real_competing_est").cast(pl.Float64) -) - - -non_censored_non_competing_data = exploded_data.filter(((pl.col("times") >= pl.col("fixed_time_horizon")) | pl.col("reals")==1)) - +fixed_time_horizons = [1.0, 3.0, 5.0] + +assumption_sets = [ + { + "censoring_assumption": "adjusted", + "competing_assumption": "adjusted_as_negative", + }, + { + "censoring_assumption": "excluded", + "competing_assumption": "adjusted_as_negative", + }, + { + "censoring_assumption": "adjusted", + "competing_assumption": "adjusted_as_censored", + }, + { + "censoring_assumption": "excluded", + "competing_assumption": "adjusted_as_censored", + }, + {"censoring_assumption": "adjusted", "competing_assumption": "excluded"}, + {"censoring_assumption": "excluded", "competing_assumption": "excluded"}, +] + +# aj_estimates_data = extract_aj_estimate_by_assumptions( +# example_polars_df, +# assumption_sets=assumption_sets, +# fixed_time_horizons=fixed_time_horizons, +# ) -aj_estimates_per_strata_noncompeting_noncompeting = pl.concat( - [ - non_censored_non_competing_data - .filter(pl.col("fixed_time_horizon") == fixed_time_horizon) - .group_by("strata") - .map_groups(lambda group: extract_aj_estimate_for_strata(group, fixed_time_horizon)) - for fixed_time_horizon in fixed_time_horizons - ], - how="vertical" -) -aj_estimates_per_strata_excl_excl = aj_estimates_per_strata_competing.join(aj_estimates_per_strata_censored, on = ['strata', 'fixed_time_horizon']).join( - aj_estimates_per_strata_noncompeting, - on = ['strata', 'fixed_time_horizon'] -).join(pl.DataFrame({"censoring_assumption": "excluded", "competing_assumption": "excluded"}), how = 'cross').select( - ['strata', - 'fixed_time_horizon', - 'real_negatives_est', - 'real_positives_est', - 'real_competing_est', - 'real_censored_est', - 'censoring_assumption', - 'competing_assumption'] +aj_estimates_data = create_adjusted_data( + list_data_to_adjust_polars, + assumption_sets=assumption_sets, + fixed_time_horizons=fixed_time_horizons ) -## combine all - -aj_estimates_data = pl.concat( - [ - aj_estimates_per_strata_adj_adjneg, - aj_estimates_per_strata_adj_adjcens, - aj_estimates_per_strata_adj_excl, - aj_estimates_per_strata_excl_adjneg, - aj_estimates_per_strata_excl_adjcens, - aj_estimates_per_strata_excl_excl - ] -).unpivot( index = ["strata", "fixed_time_horizon", "censoring_assumption", "competing_assumption"] , variable_name = "reals_labels", value_name = "reals_estimate") - - ``` + ### Check strata values ```{python} @@ -322,38 +194,7 @@ print(result.filter(pl.col("is_in_df2") == False)) ```{python} -reals_enum_dtype = aj_data_combinations.schema["reals_labels"] -censoring_assumptions_enum_dtype = aj_data_combinations.schema["censoring_assumption"] -competing_assumptions_enum_dtype = aj_data_combinations.schema["competing_assumption"] - -strata_enum_dtype = aj_data_combinations.schema["strata"] - - -aj_estimates_data = aj_estimates_data.with_columns([ - pl.col("strata") -]).with_columns( - pl.col("reals_labels").str.replace(r"_est$", "").cast(reals_enum_dtype) -).with_columns( - pl.col("censoring_assumption").cast(censoring_assumptions_enum_dtype) -).with_columns( - pl.col("competing_assumption").cast(competing_assumptions_enum_dtype) -).with_columns( - pl.col("strata").cast(strata_enum_dtype) -) - -``` - -```{python} - - -final_adjusted_data_polars = aj_data_combinations.with_columns([ - pl.col("strata") -]).join( - aj_estimates_data, - on = ['strata', 'fixed_time_horizon', 'censoring_assumption', 'competing_assumption', 'reals_labels'], - how = 'left' -) - +final_adjusted_data_polars = cast_and_join_adjusted_data(aj_data_combinations, aj_estimates_data) ``` @@ -441,6 +282,9 @@ Plot.plot({ domain: ["real_positives", "real_competing", "real_negatives", "real_censored"], range: ["#009e73", "#9DB4C0", "#FAC8CD", "#E3F09B"], legend: true + }, + style: { + background: "none" } }) From f6610df093123a34ee05298610942a362b4b09b2 Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Fri, 4 Jul 2025 14:51:22 +0300 Subject: [PATCH 4/5] fix: remove explicit call from an absolute path --- docs/render_summary.qmd | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/render_summary.qmd b/docs/render_summary.qmd index bbb5a06..4e2be27 100644 --- a/docs/render_summary.qmd +++ b/docs/render_summary.qmd @@ -12,28 +12,28 @@ This is a summary report generated with Quarto. ```{python} -import pickle +# import pickle -with open(r'C:\Users\I\Documents\GitHub\rtichoke_python\probs_dict.pkl', 'rb') as file: - probs_dict = pickle.load(file) +# with open(r'C:\Users\I\Documents\GitHub\rtichoke_python\probs_dict.pkl', 'rb') as file: +# probs_dict = pickle.load(file) -with open(r'C:\Users\I\Documents\GitHub\rtichoke_python\reals_dict.pkl', 'rb') as file: - reals_dict = pickle.load(file) +# with open(r'C:\Users\I\Documents\GitHub\rtichoke_python\reals_dict.pkl', 'rb') as file: +# reals_dict = pickle.load(file) -with open(r'C:\Users\I\Documents\GitHub\rtichoke_python\times_dict.pkl', 'rb') as file: - times_dict = pickle.load(file) +# with open(r'C:\Users\I\Documents\GitHub\rtichoke_python\times_dict.pkl', 'rb') as file: +# times_dict = pickle.load(file) from rtichoke.summary_report.summary_report import * -create_data_for_summary_report() +# create_data_for_summary_report() ``` ```{python} from rtichoke.summary_report.summary_report import * -render_summary_report( ) +# render_summary_report( ) ``` From 4534dd065d43c04cd3973a495e9132a1f1f7de30 Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Fri, 4 Jul 2025 14:57:32 +0300 Subject: [PATCH 5/5] build: add contents write permissions to yml file --- .github/workflows/python-package.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 204feee..94e9257 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -19,6 +19,7 @@ jobs: python-version: ["3.9", "3.10"] permissions: id-token: write + contents: write steps: - uses: actions/checkout@v4