From c3c553042775635a29d0b781c98cfda6f8477209 Mon Sep 17 00:00:00 2001
From: Uriah Finkel <ufinkel@gmail.com>
Date: Thu, 3 Jul 2025 21:20:52 +0300
Subject: [PATCH 1/5] test: fix test_extract_aj_estimate_for_strata_basic

---
 tests/test_rtichoke.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_rtichoke.py b/tests/test_rtichoke.py
index 5e62ac6..2822728 100644
--- a/tests/test_rtichoke.py
+++ b/tests/test_rtichoke.py
@@ -54,7 +54,7 @@ def test_extract_aj_estimate_for_strata_basic() -> None:
         {
             "strata": ["group1"] * 5,
             "reals": [0, 1, 2, 1, 0],
-            "times": [5, 3, 1, 4, 2],
+            "times": [5.0, 3.0, 1.0, 4.0, 2.0],
         }
     )
     horizons = [1.0, 2.0, 3.0]

From aed2ecf7c530c1777bffe78bedb8ca47298cfb39 Mon Sep 17 00:00:00 2001
From: Uriah Finkel <ufinkel@gmail.com>
Date: Thu, 3 Jul 2025 23:42:18 +0300
Subject: [PATCH 2/5] close #89

---
 .github/workflows/python-package.yml | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 9ceb6e5..204feee 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -39,7 +39,15 @@ jobs:
 
     - name: Show package version
       run: grep -r "version" pyproject.toml || grep -r "__version__" rtichoke/ || python -c "import rtichoke; print(rtichoke.__version__)"
-    
+      
+    - name: Set up Quarto
+      uses: quarto-dev/quarto-actions/setup@v2
+  
+    - name: Set Git identity for Quarto publishing
+      run: |
+        git config --global user.name "github-actions[bot]"
+        git config --global user.email "github-actions[bot]@users.noreply.github.com"
+  
     - name: Render and Publish
       working-directory: docs
       run: uv run quarto publish gh-pages --no-browser --token "${{ secrets.GITHUB_TOKEN }}"

From 79b033d3930252d618939c2c6f59e601964d7960 Mon Sep 17 00:00:00 2001
From: Uriah Finkel <ufinkel@gmail.com>
Date: Fri, 4 Jul 2025 14:45:14 +0300
Subject: [PATCH 3/5] docs: close #91

---
 docs/walkthrough_aj_estimate.qmd | 340 +++++++++----------------------
 1 file changed, 92 insertions(+), 248 deletions(-)

diff --git a/docs/walkthrough_aj_estimate.qmd b/docs/walkthrough_aj_estimate.qmd
index ba61d86..106e596 100644
--- a/docs/walkthrough_aj_estimate.qmd
+++ b/docs/walkthrough_aj_estimate.qmd
@@ -7,28 +7,73 @@ warning: false
 ---
 
 ```{python}
-from lifelines import AalenJohansenFitter
+import polars as pl
+import pandas as pd
+import numpy as np
+from lifelines import AalenJohansenFitter, CoxPHFitter, WeibullAFTFitter
+
+df_time_to_cancer_dx = pd.read_csv(
+    "https://raw.githubusercontent.com/ddsjoberg/dca-tutorial/main/data/df_time_to_cancer_dx.csv"
+)
+```
+
+
+```{python}
+
 import numpy as np
 from itertools import product
 import itertools
 from rtichoke.helpers.sandbox_observable_helpers import *
-from lifelines import CoxPHFitter
-from lifelines import WeibullAFTFitter
 import polars as pl
 print("Polars version:", pl.__version__)
 
 import pandas as pd
 import pickle  
 
-with open(r'C:\Users\I\Documents\GitHub\rtichoke_python\probs_dict.pkl', 'rb') as file:
-    probs_dict = pickle.load(file)
+cph = CoxPHFitter()
+thin_model = CoxPHFitter()
+aft_model = WeibullAFTFitter()
 
-with open(r'C:\Users\I\Documents\GitHub\rtichoke_python\reals_dict.pkl', 'rb') as file:
-    reals_dict = pickle.load(file)
+cox_formula = "age + famhistory + marker"
+thin_formula = "age + marker"
+aft_formula = "age + marker"
 
-with open(r'C:\Users\I\Documents\GitHub\rtichoke_python\times_dict.pkl', 'rb') as file:
-    times_dict = pickle.load(file)
+cph.fit(
+    df_time_to_cancer_dx,
+    duration_col="ttcancer",
+    event_col="cancer",
+    formula=cox_formula,
+)
 
+thin_model.fit(
+    df_time_to_cancer_dx,
+    duration_col="ttcancer",
+    event_col="cancer",
+    formula=thin_formula,
+)
+
+aft_model.fit(
+    df_time_to_cancer_dx,
+    duration_col="ttcancer",
+    event_col="cancer",
+    formula=aft_formula,
+)
+
+
+
+cph_pred_vals = (1 - cph.predict_survival_function(df_time_to_cancer_dx[['age', 'famhistory', 'marker']], times=[1.5])).iloc[0, :].values
+
+thin_pred_vals = (1 - thin_model.predict_survival_function(df_time_to_cancer_dx[['age', 'famhistory', 'marker']], times=[1.5])).iloc[0, :].values
+
+aft_pred_vals = (1 - aft_model.predict_survival_function(df_time_to_cancer_dx[['age', 'famhistory', 'marker']], times=[1.5])).iloc[0, :].values
+
+probs_dict = {"full": cph_pred_vals, "thin": thin_pred_vals, "aft": aft_pred_vals}
+
+reals_mapping = {"censor": 0, "diagnosed with cancer": 1, "dead other causes": 2}
+
+reals_dict = df_time_to_cancer_dx["cancer_cr"].map(reals_mapping)
+
+times_dict = df_time_to_cancer_dx["ttcancer"]
 
 ```
 
@@ -39,7 +84,7 @@ with open(r'C:\Users\I\Documents\GitHub\rtichoke_python\times_dict.pkl', 'rb') a
 
 
 
-fixed_time_horizons = [1, 3, 5]
+fixed_time_horizons = [1.0, 3.0, 5.0]
 stratified_by = ["probability_threshold", "ppcr"]
 by=0.1
 
@@ -71,222 +116,49 @@ list_data_to_adjust_polars = create_list_data_to_adjust_polars(
 
 ### New extract aj estimate by assumptions polars
 
-#### One polars dataframe
-
-```{python}
-
-example_polars_df = list_data_to_adjust_polars.get('full').select(pl.col("strata"), pl.col("reals"), pl.col("times"))
-
-fixed_time_horizons = [1, 3, 5]
-
-
-```
-
-
 ## Create aj_estimates_data
 
-## Create aj_data
-
-```{python}
-
-fixed_time_horizons = [2, 4]
-
-aj_estimates_per_strata_adj_adjneg = create_aj_data(example_polars_df, "adjusted", "adjusted_as_negative", fixed_time_horizons)
-
-aj_estimates_per_strata_excl_adjneg = create_aj_data(example_polars_df, "excluded", "adjusted_as_negative", fixed_time_horizons)
-
-aj_estimates_per_strata_adj_adjcens = create_aj_data(example_polars_df, "adjusted", "adjusted_as_censored", fixed_time_horizons)
-
-
-```
-
-## AJ estimates per assumptions
-
 ```{python}
 
-# 1 adjusted - adjusted_as_negative
-
-aj_estimates_per_strata_adj_adjneg = example_polars_df.group_by("strata").map_groups(
-  lambda group: extract_aj_estimate_for_strata(group, fixed_time_horizons)).join(pl.DataFrame({"real_censored_est": 0.0, "censoring_assumption": "adjusted", "competing_assumption": "adjusted_as_negative"}), how = 'cross')
-
-
-
-# 2 excluded - adjusted as negative
-
-exploded_data = example_polars_df.with_columns(fixed_time_horizon = pl.lit([1,3,5])).explode("fixed_time_horizon")
-
-aj_estimates_per_strata_censored = exploded_data.filter((pl.col("times") < pl.col("fixed_time_horizon")) & pl.col("reals")==0).group_by(["strata", "fixed_time_horizon"]).count().rename({"count": "real_censored_est"}).with_columns(
-    pl.col("real_censored_est").cast(pl.Float64)
-)
-
-non_censored_data = exploded_data.filter((pl.col("times") >= pl.col("fixed_time_horizon")) | pl.col("reals")>0)
-
-
-aj_estimates_per_strata_noncensored = pl.concat(
-    [
-        non_censored_data
-        .filter(pl.col("fixed_time_horizon") == fixed_time_horizon)
-        .group_by("strata")
-        .map_groups(lambda group: extract_aj_estimate_for_strata(group, fixed_time_horizon))
-        for fixed_time_horizon in fixed_time_horizons
-    ],
-    how="vertical"
-)
-
-aj_estimates_per_strata_excl_adjneg = aj_estimates_per_strata_noncensored.join(
-  aj_estimates_per_strata_censored, 
-  on = ['strata', 'fixed_time_horizon']
-).join(pl.DataFrame({"censoring_assumption": "excluded", "competing_assumption": "adjusted_as_negative"}), how = 'cross')
-
-
-# 3 adjusted - adjusted as censored
-
-
-aj_estimates_per_strata_adj_adjcens = example_polars_df.with_columns([
-        pl.when(
-            (pl.col("reals") ==2)
-        ).then(pl.lit(0))
-            .otherwise(pl.col("reals"))
-         .alias("reals")
-    ]).group_by("strata").map_groups(
-  lambda group: extract_aj_estimate_for_strata(group, fixed_time_horizons)).join(pl.DataFrame({"real_censored_est": 0.0, "censoring_assumption": "adjusted", "competing_assumption": "adjusted_as_censored"}), how = 'cross')
-
-# 4 excluded - adjusted as censored
-
-exploded_data = example_polars_df.with_columns(fixed_time_horizon = pl.lit([1,3,5])).explode("fixed_time_horizon")
-
-aj_estimates_per_strata_censored = exploded_data.filter((pl.col("times") < pl.col("fixed_time_horizon")) & pl.col("reals")==0).group_by(["strata", "fixed_time_horizon"]).count().rename({"count": "real_censored_est"}).with_columns(
-    pl.col("real_censored_est").cast(pl.Float64)
-)
-
-non_censored_data = exploded_data.filter((pl.col("times") >= pl.col("fixed_time_horizon")) | pl.col("reals")>0).with_columns([
-        pl.when(
-            (pl.col("reals") ==2)
-        ).then(pl.lit(0))
-            .otherwise(pl.col("reals"))
-         .alias("reals")
-    ])
-
-
-aj_estimates_per_strata_noncensored = pl.concat(
-    [
-        non_censored_data
-        .filter(pl.col("fixed_time_horizon") == fixed_time_horizon)
-        .group_by("strata")
-        .map_groups(lambda group: extract_aj_estimate_for_strata(group, fixed_time_horizon))
-        for fixed_time_horizon in fixed_time_horizons
-    ],
-    how="vertical"
-)
-
-aj_estimates_per_strata_excl_adjcens = aj_estimates_per_strata_noncensored.join(
-  aj_estimates_per_strata_censored, 
-  on = ['strata', 'fixed_time_horizon']
-).join(pl.DataFrame({"censoring_assumption": "excluded", "competing_assumption": "adjusted_as_negative"}), how = 'cross')
-
-
-
-## 5 adjusted - excluded
-
-exploded_data = example_polars_df.with_columns(fixed_time_horizon = pl.lit([1,3,5])).explode("fixed_time_horizon")
-
-aj_estimates_per_strata_competing = exploded_data.filter((pl.col("reals")==2) & (pl.col("times") < pl.col("fixed_time_horizon"))).group_by(["strata", "fixed_time_horizon"]).count().rename({"count": "real_competing_est"}).with_columns(
-    pl.col("real_competing_est").cast(pl.Float64)
-)
-
-non_competing_data = exploded_data.filter((pl.col("times") >= pl.col("fixed_time_horizon")) | pl.col("reals")!=2).with_columns([
-        pl.when(
-            (pl.col("reals") ==2)
-        ).then(pl.lit(0))
-            .otherwise(pl.col("reals"))
-         .alias("reals")
-    ])
-
-
-aj_estimates_per_strata_noncompeting = pl.concat(
-    [
-        non_competing_data
-        .filter(pl.col("fixed_time_horizon") == fixed_time_horizon)
-        .group_by("strata")
-        .map_groups(lambda group: extract_aj_estimate_for_strata(group, fixed_time_horizon))
-        for fixed_time_horizon in fixed_time_horizons
-    ],
-    how="vertical"
-).select(pl.exclude("real_competing_est"))
-
-aj_estimates_per_strata_adj_excl = aj_estimates_per_strata_competing.join(
-  aj_estimates_per_strata_noncompeting, 
-  on = ['strata', 'fixed_time_horizon']
-).join(pl.DataFrame({"real_censored_est": 0.0, "censoring_assumption": "adjusted", "competing_assumption": "excluded"}), how = 'cross').select(
-  ['strata',
- 'fixed_time_horizon',
- 'real_negatives_est',
- 'real_positives_est',
- 'real_competing_est',
- 'real_censored_est',
- 'censoring_assumption',
- 'competing_assumption']
-)
-
-
-## 6 excluded - excluded
-
-
-exploded_data = example_polars_df.with_columns(fixed_time_horizon = pl.lit([1,3,5])).explode("fixed_time_horizon")
-
-aj_estimates_per_strata_censored = exploded_data.filter((pl.col("times") < pl.col("fixed_time_horizon")) & pl.col("reals")==0).group_by(["strata", "fixed_time_horizon"]).count().rename({"count": "real_censored_est"}).with_columns(
-    pl.col("real_censored_est").cast(pl.Float64)
-)
-
-aj_estimates_per_strata_competing = exploded_data.filter((pl.col("reals")==2) & (pl.col("times") < pl.col("fixed_time_horizon"))).group_by(["strata", "fixed_time_horizon"]).count().rename({"count": "real_competing_est"}).with_columns(
-    pl.col("real_competing_est").cast(pl.Float64)
-)
-
-
-non_censored_non_competing_data = exploded_data.filter(((pl.col("times") >= pl.col("fixed_time_horizon")) | pl.col("reals")==1))
-
+fixed_time_horizons = [1.0, 3.0, 5.0]
+
+assumption_sets = [
+    {
+        "censoring_assumption": "adjusted",
+        "competing_assumption": "adjusted_as_negative",
+    },
+    {
+        "censoring_assumption": "excluded",
+        "competing_assumption": "adjusted_as_negative",
+    },
+    {
+        "censoring_assumption": "adjusted",
+        "competing_assumption": "adjusted_as_censored",
+    },
+    {
+        "censoring_assumption": "excluded",
+        "competing_assumption": "adjusted_as_censored",
+    },
+    {"censoring_assumption": "adjusted", "competing_assumption": "excluded"},
+    {"censoring_assumption": "excluded", "competing_assumption": "excluded"},
+]
+
+# aj_estimates_data = extract_aj_estimate_by_assumptions(
+#     example_polars_df,
+#     assumption_sets=assumption_sets,
+#     fixed_time_horizons=fixed_time_horizons,
+# )
 
-aj_estimates_per_strata_noncompeting_noncompeting = pl.concat(
-    [
-        non_censored_non_competing_data
-        .filter(pl.col("fixed_time_horizon") == fixed_time_horizon)
-        .group_by("strata")
-        .map_groups(lambda group: extract_aj_estimate_for_strata(group, fixed_time_horizon))
-        for fixed_time_horizon in fixed_time_horizons
-    ],
-    how="vertical"
-)
 
-aj_estimates_per_strata_excl_excl = aj_estimates_per_strata_competing.join(aj_estimates_per_strata_censored, on = ['strata', 'fixed_time_horizon']).join(
-  aj_estimates_per_strata_noncompeting, 
-  on = ['strata', 'fixed_time_horizon']
-).join(pl.DataFrame({"censoring_assumption": "excluded", "competing_assumption": "excluded"}), how = 'cross').select(
-  ['strata',
- 'fixed_time_horizon',
- 'real_negatives_est',
- 'real_positives_est',
- 'real_competing_est',
- 'real_censored_est',
- 'censoring_assumption',
- 'competing_assumption']
+aj_estimates_data = create_adjusted_data(
+    list_data_to_adjust_polars,
+    assumption_sets=assumption_sets,
+    fixed_time_horizons=fixed_time_horizons
 )
 
-## combine all
-
-aj_estimates_data = pl.concat(
-  [
-    aj_estimates_per_strata_adj_adjneg,
-    aj_estimates_per_strata_adj_adjcens,
-    aj_estimates_per_strata_adj_excl,
-    aj_estimates_per_strata_excl_adjneg,
-    aj_estimates_per_strata_excl_adjcens,
-    aj_estimates_per_strata_excl_excl
-  ]
-).unpivot( index = ["strata", "fixed_time_horizon", "censoring_assumption", "competing_assumption"] , variable_name = "reals_labels", value_name = "reals_estimate")
-
-
 ```
 
+
 ### Check strata values
 
 ```{python}
@@ -322,38 +194,7 @@ print(result.filter(pl.col("is_in_df2") == False))
 
 ```{python}
 
-reals_enum_dtype = aj_data_combinations.schema["reals_labels"]
-censoring_assumptions_enum_dtype = aj_data_combinations.schema["censoring_assumption"]
-competing_assumptions_enum_dtype = aj_data_combinations.schema["competing_assumption"]
-
-strata_enum_dtype = aj_data_combinations.schema["strata"]
-
-
-aj_estimates_data = aj_estimates_data.with_columns([
-    pl.col("strata")
-]).with_columns(
-    pl.col("reals_labels").str.replace(r"_est$", "").cast(reals_enum_dtype)
-).with_columns(
-  pl.col("censoring_assumption").cast(censoring_assumptions_enum_dtype)
-).with_columns(
-  pl.col("competing_assumption").cast(competing_assumptions_enum_dtype)
-).with_columns(
-  pl.col("strata").cast(strata_enum_dtype)
-)
-
-```
-
-```{python}
-
-
-final_adjusted_data_polars = aj_data_combinations.with_columns([
-    pl.col("strata")
-]).join(
-  aj_estimates_data, 
-  on = ['strata', 'fixed_time_horizon', 'censoring_assumption', 'competing_assumption', 'reals_labels'],
-  how = 'left'
-)
-
+final_adjusted_data_polars = cast_and_join_adjusted_data(aj_data_combinations, aj_estimates_data)
 
 ```
 
@@ -441,6 +282,9 @@ Plot.plot({
     domain: ["real_positives", "real_competing", "real_negatives", "real_censored"], 
     range: ["#009e73", "#9DB4C0", "#FAC8CD", "#E3F09B"],
     legend: true
+  },
+  style: {
+    background: "none"
   }
 })
 

From f6610df093123a34ee05298610942a362b4b09b2 Mon Sep 17 00:00:00 2001
From: Uriah Finkel <ufinkel@gmail.com>
Date: Fri, 4 Jul 2025 14:51:22 +0300
Subject: [PATCH 4/5] fix: remove explicit call from an absolute path

---
 docs/render_summary.qmd | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/docs/render_summary.qmd b/docs/render_summary.qmd
index bbb5a06..4e2be27 100644
--- a/docs/render_summary.qmd
+++ b/docs/render_summary.qmd
@@ -12,28 +12,28 @@ This is a summary report generated with Quarto.
 
 ```{python}
 
-import pickle  
+# import pickle  
 
 
-with open(r'C:\Users\I\Documents\GitHub\rtichoke_python\probs_dict.pkl', 'rb') as file:
-    probs_dict = pickle.load(file)
+# with open(r'C:\Users\I\Documents\GitHub\rtichoke_python\probs_dict.pkl', 'rb') as file:
+#     probs_dict = pickle.load(file)
 
-with open(r'C:\Users\I\Documents\GitHub\rtichoke_python\reals_dict.pkl', 'rb') as file:
-    reals_dict = pickle.load(file)
+# with open(r'C:\Users\I\Documents\GitHub\rtichoke_python\reals_dict.pkl', 'rb') as file:
+#     reals_dict = pickle.load(file)
 
-with open(r'C:\Users\I\Documents\GitHub\rtichoke_python\times_dict.pkl', 'rb') as file:
-    times_dict = pickle.load(file)
+# with open(r'C:\Users\I\Documents\GitHub\rtichoke_python\times_dict.pkl', 'rb') as file:
+#     times_dict = pickle.load(file)
 
 from rtichoke.summary_report.summary_report import * 
 
-create_data_for_summary_report()
+# create_data_for_summary_report()
 
 ```
 
 ```{python}
 from rtichoke.summary_report.summary_report import * 
 
-render_summary_report(  )
+# render_summary_report(  )
 ```
 
 

From 4534dd065d43c04cd3973a495e9132a1f1f7de30 Mon Sep 17 00:00:00 2001
From: Uriah Finkel <ufinkel@gmail.com>
Date: Fri, 4 Jul 2025 14:57:32 +0300
Subject: [PATCH 5/5] build: add contents write permissions to yml file

---
 .github/workflows/python-package.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 204feee..94e9257 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -19,6 +19,7 @@ jobs:
         python-version: ["3.9", "3.10"]
     permissions:
       id-token: write
+      contents: write
 
     steps:
     - uses: actions/checkout@v4