From 4559b417c1b75e986652f5c7f3c00ca7c8e770fe Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Mon, 3 Nov 2025 09:32:05 +0200 Subject: [PATCH 1/3] docs: update before_we_validate.qmd --- docs/before_we_validate.qmd | 556 ++++++++++++++++++++++++++++++++++++ 1 file changed, 556 insertions(+) create mode 100644 docs/before_we_validate.qmd diff --git a/docs/before_we_validate.qmd b/docs/before_we_validate.qmd new file mode 100644 index 0000000..2437c37 --- /dev/null +++ b/docs/before_we_validate.qmd @@ -0,0 +1,556 @@ +--- +title: "Before we Validate Performance" +author: "Uriah Finkel" +format: + html: + echo: false +mermaid-format: svg +--- + +Ideally we would like to keep Performance Validation as agnostic as possible. However, the structure of the validation set (`probs`, `reals` and `times`) implies the nature of the related assumptions and the required use case. + +So before we validate performance, let us consider the underlying process. + +โœ๏ธ The User Inputs\ +๐Ÿช› Internal Function + +# โœ๏ธ Declare reference groups + +The dimentions of the `probs` and the `real` dictionaries imply the nature of the use case: + +TODO: copy from rtichoke r README. + +##### One Model, One Population: + +- Just one reference group: "model". + +##### Several Models, One Population: + +Compare between different candidate models. - Each model stand as a reference groups such as "thin" model, or a "full" model. + +##### Several Models, Several Populations + +Compare performance over different sub-populations. - Internal Validation: "test", "val" and "train". - External Validation: "Framingham", "Australia". - Fairness: "Male", "Female". + +# โœ๏ธ Declare how to stratify predictions โœ‚๏ธ + +The `stratified_by` argument is designed for the user to choose how to stratify predictions for decision-making, each method implies different problem: + +::: {.panel-tabset} + +## Probability Threshold + +::: {.panel-tabset} + +By choosing Probability Threshold as a cutoff the implied assumption is that you are concerned with individual harm or benefit. + +### Baseline Strategy: Treat None + +```{mermaid} + +graph LR + subgraph trt[Treatment Decision] + linkStyle default stroke:#000 + A("๐Ÿ˜ท") -->|"Treatment ๐Ÿ’Š"|B("Predicted
Positive

๐Ÿ’Š
๐Ÿ˜ท") + A -->|"No Treatment"|C("Predicted
Negative

๐Ÿ˜ท") + end + + subgraph ut[Utility of the Decision] + subgraph pred[Prediction Model] + B -->|"Disease ๐Ÿคข"| D["TP
๐Ÿ’Š
๐Ÿคข"] + B -->|"No Disease ๐Ÿคจ"| E["FP
๐Ÿ’Š
๐Ÿคจ"] + C -->|"Disease ๐Ÿคข"| F["FN
๐Ÿคข"] + C -->|"No Disease ๐Ÿคจ"| G["TN
๐Ÿคจ"] + end + subgraph baselinestrategy[Baseline Strategy: Treat None] + Dnone["FN
๐Ÿคข"] + Enone["TN
๐Ÿคจ"] + Fnone["FN
๐Ÿคข"] + Gnone["TN
๐Ÿคจ"] + + D---Dnone + E---Enone + F---Fnone + G---Gnone + end + subgraph nb[Net Benefit] + Dnb[1] + Enb["pt / (1-pt)"] + Fnb[0] + Gnb[0] + Dnone---Dnb + Enone---Enb + Fnone---Fnb + Gnone---Gnb + end + end + + + + style A fill:#E8F4FF, stroke:black,color:black + style B fill:#E8F4FF, stroke:black,color:black + style C fill:#E8F4FF, stroke:black,color:black + style D fill:#C0FFC0,stroke:black,color:black + style Dnone fill:#FFCCE0,stroke:black,color:black + style Dnb fill: #C0FFC0,stroke:black,color:black + style E fill: #FFCCE0,stroke:black,color:black + style Enone fill: #C0FFC0,stroke:black,color:black + style Enb fill: #FFCCE0,stroke:black,color:black + style F fill:#FFCCE0,stroke:black,color:black + style Fnone fill: #FFCCE0,stroke:black,color:black + style Fnb fill: #E8F4FF,stroke:black,color:black + style G fill: #C0FFC0,stroke:black,color:black + style Gnone fill: #C0FFC0,stroke:black,color:black + style Gnb fill: #E8F4FF,stroke:black,color:black + style nb fill: #E8F4FF,stroke:black,color:black + style pred fill: #E8F4FF,stroke:black,color:black + style baselinestrategy fill: #E8F4FF,stroke:black,color:black + + classDef subgraphStyle fill:#FAF6EC,stroke:#333,stroke-width:1px + class trt,ut subgraphStyle + +``` + +### Baseline Strategy: Treat All + +```{mermaid} + +graph LR + subgraph trt[Treatment Decision] + linkStyle default stroke:#000 + A("๐Ÿ˜ท") -->|"Treatment ๐Ÿ’Š"|B("Predicted
Positive

๐Ÿ’Š
๐Ÿ˜ท") + A -->|"No Treatment"|C("Predicted
Negative

๐Ÿ˜ท") + end + + subgraph ut[Utility of the Decision] + subgraph pred[Prediction Model] + B -->|"Disease ๐Ÿคข"| D["TP
๐Ÿ’Š
๐Ÿคข"] + B -->|"No Disease ๐Ÿคจ"| E["FP
๐Ÿ’Š
๐Ÿคจ"] + C -->|"Disease ๐Ÿคข"| F["FN
๐Ÿคข"] + C -->|"No Disease ๐Ÿคจ"| G["TN
๐Ÿคจ"] + end + subgraph baselinestrategy[Baseline Strategy: Treat All] + Dall["TP
๐Ÿ’Š
๐Ÿคข"] + Eall["FP
๐Ÿ’Š
๐Ÿคจ"] + Fall["TP
๐Ÿ’Š
๐Ÿคข"] + Gall["FP
๐Ÿ’Š
๐Ÿคจ"] + + D---Dall + E---Eall + F---Fall + G---Gall + end + subgraph nb[Net Benefit] + Dnb[0] + Enb[0] + Fnb["(1-pt) / pt"] + Gnb["1"] + Dall---Dnb + Eall---Enb + Fall---Fnb + Gall---Gnb + end + end + + + + style A fill:#E8F4FF, stroke:black,color:black + style B fill:#E8F4FF, stroke:black,color:black + style C fill:#E8F4FF, stroke:black,color:black + style D fill:#C0FFC0,stroke:black,color:black + style Dall fill:#C0FFC0,stroke:black,color:black + style Dnb fill:#E8F4FF,stroke:black,color:black + style E fill:#FFCCE0,stroke:black,color:black + style Eall fill:#FFCCE0,stroke:black,color:black + style Enb fill:#E8F4FF,stroke:black,color:black + style F fill:#FFCCE0,stroke:black,color:black + style Fall fill:#C0FFC0,stroke:black,color:black + style Fnb fill:#FFCCE0,stroke:black,color:black + style G fill:#C0FFC0,stroke:black,color:black + style Gall fill:#FFCCE0,stroke:black,color:black + style Gnb fill:#C0FFC0,stroke:black,color:black + style nb fill: #E8F4FF,stroke:black,color:black + style pred fill: #E8F4FF,stroke:black,color:black + style baselinestrategy fill: #E8F4FF,stroke:black,color:black + + classDef subgraphStyle fill:#FAF6EC,stroke:#333,stroke-width:1px + class trt,ut subgraphStyle + +``` + +*Regardless* of ranking each prediction is categorised to a bin: 0.32 -\> `[0.3, 0.4)`. + +1. Categorise Absolute Risk: 0.32 -\> `[0.3, 0.4)` + +References: Pauker SG, Kassirer JP. Therapeutic decision making: a cost-benefit analysis. N Engl J Med. 1975;293(5):229-234. doi:10.1056/NEJM197507312930505 + +::: + +## PPCR + +![](line_ppcr_04.svg) + +```{mermaid} + +graph LR + subgraph trt[Treatment Allocation Decision] + linkStyle default stroke:#000 + A("๐Ÿ˜ท
๐Ÿ˜ท
๐Ÿ˜ท
๐Ÿ˜ท
๐Ÿ˜ท
๐Ÿ˜ท
๐Ÿ˜ท
๐Ÿ˜ท
๐Ÿ˜ท
๐Ÿ˜ท") -->|"Treatment ๐Ÿ’Š๐Ÿ’Š๐Ÿ’Š๐Ÿ’Š"|B("ฮฃ Predicted
Positives

๐Ÿ’Š๐Ÿ’Š๐Ÿ’Š๐Ÿ’Š
๐Ÿ˜ท๐Ÿ˜ท๐Ÿ˜ท๐Ÿ˜ท") + A -->|"No Treatment"|C("ฮฃ Predicted
Negatives

๐Ÿ˜ท๐Ÿ˜ท๐Ÿ˜ท๐Ÿ˜ท๐Ÿ˜ท๐Ÿ˜ท") + end + + subgraph ut[Utility of the Decision] + B -->|"Disease ๐Ÿคข๐Ÿคข๐Ÿคข"| D["ฮฃ TP
๐Ÿ’Š๐Ÿ’Š๐Ÿ’Š
๐Ÿคข๐Ÿคข๐Ÿคข"] + B -->|"No Disease ๐Ÿคจ"| E["ฮฃ FP
๐Ÿ’Š
๐Ÿคจ"] + C -->|"Disease ๐Ÿคข"| F["ฮฃ FN
๐Ÿคข"] + C -->|"No Disease ๐Ÿคจ๐Ÿคจ๐Ÿคจ๐Ÿคจ๐Ÿคจ"| G["ฮฃ TN
๐Ÿคจ๐Ÿคจ๐Ÿคจ๐Ÿคจ๐Ÿคจ"] + end + + + + style A fill:#E8F4FF, stroke:black,color:black + style B fill:#E8F4FF, stroke:black,color:black + style C fill:#E8F4FF, stroke:black,color:black + style D fill:#C0FFC0,stroke:black,color:black + style E fill:#FFCCE0,stroke:black,color:black + style F fill:#FFCCE0,stroke:black,color:black + style G fill:#C0FFC0,stroke:black,color:black + + classDef subgraphStyle fill:#FAF6EC,stroke:#333,stroke-width:1px + class trt,ut subgraphStyle + +``` + +By choosing PPCR as a cutoff the implied assumption is that you are concerned with resource constraint and assume no individual treatment harm. + +*Regarding* the ranking each prediction is categorised to a bin: if the absolute probability 0.32 is the 18th highest predictions out of 100, it will be categorised to the second decile -\> `0.18`. + +1. Calculate Risk-Quantile from Absolute Risk: 0.32 -\> `0.18` + +References: https://en.wikipedia.org/wiki/Precision_and_recall + +::: + +# โœ๏ธ Declare Fixed Time Horizons ๐ŸŒ… (๐Ÿ“…๐Ÿคฌ) + +The `fixed_time_horizons` argument is designed for the user to choose the set of time horizons to follow. + +Different followups contain different distributions of observed outcomes: Declare fixed time horizons for the prediction model, such as \[5, 10\] years of prediction for CVD evet. + +## ๐Ÿช› Update Administrative Censorng + +For cases with observed time-to-event is shorter than the prediction time horizon, the outcomes might change: + +- `Real Positives` ๐Ÿคข should be considered as `Real Negatives` ๐Ÿคจ, the outcome of interest did not happen yet. + +- Always included and Encoded as 0. + +- `Real Neagtives` ๐Ÿคจ should be considered as `Real Censored` ๐Ÿคฌ, the event of interest could have happened in the gap between the observed time and the fixed time horizon. + +- If adjusted: encoded as 0. + +- If excluded: counted with crude estimate. + +```{python} + +import numpy as np + +times = np.array([24.1, 9.7, 49.9, 18.6, 34.8, 14.2, 39.2, 46.0, 31.5, 4.3]) +reals = np.array([1, 1, 1, 1, 0, 2, 1, 2, 0, 1]) +time_horizons = [10, 20, 30, 40, 50] + +# Icons +def get_icon(outcome, t, h): + if outcome == 0: + return "๐Ÿคฌ" if t < h else "๐Ÿคจ" + elif outcome == 1: + return "๐Ÿคข" + elif outcome == 2: + return "๐Ÿ’€" + +# Displayed time +def get_time(outcome, t, h): + if outcome == 0: + return t if t < h else h + else: + return t + +# Final output +final_data = [] + +for i in range(len(times)): + id_ = i + 1 + t = times[i] + r = reals[i] + + for h in time_horizons: + outcome = r if t <= h else 0 # override outcome after horizon + final_data.append({ + "id": id_, + "time_horizon": h, + "time": get_time(outcome, t, h), + "real": get_icon(outcome, t, h) + }) + +ojs_define(data = final_data) + +``` + +```{ojs} + +filteredData = data.filter((d) => d.time_horizon == timeHorizon) + +viewof timeHorizon = Inputs.range([10, 50], { + step: 10, + value: 50, + label: "Time Horizon" +}) + +Plot.plot({ + x: { + domain: [0, 50] + }, + y: { + domain: [0, 11], + axis: false + }, + marks: [ + Plot.ruleX([timeHorizon], { + stroke: "#D9E8A3", + strokeWidth: 6, + strokeDasharray: "5,5", + y1: 0, + y2: 10 // Should match the y-domain max + }), + Plot.ruleY(filteredData, { + x: "time", + y: "id", + strokeWidth: 1.5 + }), + Plot.text(filteredData, { + x: "time", + y: "id", + text: "real", + tip: true, + fontSize: 30 + }) + ] +}) + +``` + +# Declare Heuristics Regarding ambigious `reals` + +::: {.panel-tabset} + +## โœ๏ธ Declare Heuristics Regarding Censored Events ๐Ÿ“…๐Ÿคฌ + +```{mermaid} + +graph LR + S0["Non Event
0 ๐Ÿคจ / ๐Ÿคฌ"] -->|"?"|S1["Primary Event
1 ๐Ÿคข"] + S0-->|"?"|S2["Competing Event
2 ๐Ÿ’€"] + + + classDef nonEvent fill:#E0E0E0,stroke:#333,stroke-width:1px,color:black + classDef primaryEvent fill:#808080,stroke:#333,stroke-width:1px,color:white + classDef competingEvent fill:#9DB4C0,stroke:#333,stroke-width:1px,color:black + + class S0 nonEvent + class S1 primaryEvent + class S2 competingEvent + class S3 censoredEvent + + linkStyle 0 stroke:#333,background:yellow + +``` + +The `censored_heuristic` argument is designed for the user to choose how interpret censored events. + +Performance Validation in the face of censored observations require assumptions regarding the unobserved followup. + +TODO: add link to nan-van-geloven article + +::: {.panel-tabset} + +### Exclude Censored Events + +```{mermaid} + +graph LR + S0["Non Event
0 ๐Ÿคจ"] -->S1["Primary Event
1 ๐Ÿคข"] + S0-->S2["Competing Event
2 ๐Ÿ’€"] + + S3["Censored
0 ๐Ÿคฌ"] + + + classDef nonEvent fill:#E0E0E0,stroke:#333,stroke-width:1px,color:black + classDef primaryEvent fill:#808080,stroke:#333,stroke-width:1px,color:white + classDef censoredEvent fill:#E3F09B,stroke:#333,stroke-width:1px,color:black + classDef competingEvent fill:#9DB4C0,stroke:#333,stroke-width:1px,color:black + + class S0 nonEvent + class S1 primaryEvent + class S2 competingEvent + class S3 censoredEvent + + linkStyle 0 stroke:#333,background:yellow + +``` + +All censored events to be excluded. + +Underlying Assumption: Small amount of censored events. Violation of the assumption leads to: Overestimation of the observed outcomes. + +### Adjust Censored as partially seen Non-Event + +Observed outcomes for each strata are estimated using the AJ-estimate (equivalent to CIF and KM): Each censored observation is assumed to be similar to the ones who weren't censored. + +TODO: Link to article + +Underlying Assumption: Independent Censoring. Violation of the assumption leads to: Biased estimate for observed outcomes. + +::: + +## โœ๏ธ Declare Heuristics Regarding Competing Events ๐Ÿ“…๐Ÿ’€ + +The `competing_heuristic` argument is designed for the user to choose how interpret censored events. + +Performance Validation in the face of competing observations require assumptions regarding the unobserved followup. + +TODO: add link to nan-van-geloven article + +::: {.panel-tabset} + +### Exclude Competing Events + +```{mermaid} + +graph LR + subgraph adj[Adjusted for Censoring] + S0["Non Event
0 ๐Ÿคจ / ๐Ÿคฌ"] -->S1["Primary Event
1 ๐Ÿคข"] + end + S0 -->S2["Competing Event
2 ๐Ÿ’€"] + + + classDef nonEvent fill:#E0E0E0,stroke:#333,stroke-width:1px,color:black + classDef primaryEvent fill:#808080,stroke:#333,stroke-width:1px,color:white + classDef competingEvent fill:#9DB4C0,stroke:#333,stroke-width:1px,color:black + + class S0 nonEvent + class S1 primaryEvent + class S2 competingEvent + + linkStyle 0 stroke:#333 + + style adj fill:#E3F09B,color:black + + +``` + +All competing events to be excluded. + +Underlying Assumption: Small amount of competing events. Violation of the assumption leads to: Overestimation of the observed outcomes. A competing event means that the primary event cannot happen. + +### Adjust Competing Events as Censored (Partially seen Negatives) + +Check + +```{mermaid} + +graph LR + subgraph adj[Adjusted for Censoring] + direction LR + S0["Non Event
0 ๐Ÿคจ / ๐Ÿคฌ

Competing Event
2 ๐Ÿ’€"] -->S1["Primary Event
1 ๐Ÿคข"] + end + + + classDef nonEvent fill:#E0E0E0,stroke:#333,stroke-width:1px,color:black + classDef primaryEvent fill:#808080,stroke:#333,stroke-width:1px,color:white + classDef competingEvent fill:#9DB4C0,stroke:#333,stroke-width:1px,color:black + + class S0 nonEvent + class S1 primaryEvent + class S2 competingEvent + + style adj fill:#E3F09B,color:black + + + linkStyle 0 stroke:#333 + +``` + +All competing events to be treated as censored. + +Underlying Assumption: We consider a patient experiencing a competing event equivalent to independent censoring. Violation of the assumption leads to: Overestimation of the observed outcomes. A competing event means that the primary event cannot happen. + +### Adjust Competing Events as Negatives (Definite Non-Events) + +All competing events to be treated as Competing event to the primary event-of-interest. + +In a way, a patient experiencing a competing event is "more" of a "real-negative" than a conventional "real-negative". + +This is derived from the assumed state-covention + +Beyond the horizon time the following transition is possible: `Real Neagtives` ๐Ÿคจ =\> `Real Positives` ๐Ÿคข ๐Ÿ’€ 2 + +```{mermaid} + +graph LR +subgraph adj[Adjusted for Censoring] + direction LR + S0["Non Event
0 ๐Ÿคจ"] -->S1["Primary Event
1 ๐Ÿคข"] + S0 -->S2["Competing Event
2 ๐Ÿ’€"] + end + + + classDef nonEvent fill:#E0E0E0,stroke:#333,stroke-width:1px,color:black + classDef primaryEvent fill:#808080,stroke:#333,stroke-width:1px,color:white + classDef competingEvent fill:#9DB4C0,stroke:#333,stroke-width:1px,color:black + + class S0 nonEvent + class S1 primaryEvent + class S2 competingEvent + + linkStyle 0 stroke:#333 + style adj fill:#E3F09B,color:black + +``` + +Underlying Assumption: We consider a patient experiencing a competing event as a definite non-event. Violation of the assumption leads to Underestimation of the observed outcomes if a competing event can be considered as a different form of the primary event. + +### Adjust Competing Events as Composite (Positives) + +All competing events to be treated as another case of the Primary Event. + +A patient experience a competing-event might be seen as another case of "some-event" + +```{mermaid} + +graph LR +subgraph adj[Adjusted for Censoring] + direction LR + S0["Non Event
0 ๐Ÿคจ"] -->S1["Any Event
1 ๐Ÿคข
Competing Event
2 ๐Ÿ’€"] + end + + + classDef nonEvent fill:#E0E0E0,stroke:#333,stroke-width:1px,color:black + classDef primaryEvent fill:#808080,stroke:#333,stroke-width:1px,color:white + classDef competingEvent fill:#9DB4C0,stroke:#333,stroke-width:1px,color:black + + class S0 nonEvent + class S1 primaryEvent + class S2 competingEvent + + linkStyle 0 stroke:#333 + style adj fill:#E3F09B,color:black + +``` + +There is no need for an underlying assumption, the answer is explicit. + +This heuristic is can be seen as a different underlying question - what is the probability of having any type of event, Primary or Competing? + +::: + +::: From 461af198b467851f4b4bb930c73418e6be0d064c Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Mon, 3 Nov 2025 09:34:07 +0200 Subject: [PATCH 2/3] build: update rtichoke version --- pyproject.toml | 2 +- uv.lock | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8da0fee..ad99309 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ dependencies = [ "polarstate==0.1.8", ] name = "rtichoke" -version = "0.1.11" +version = "0.1.12" description = "interactive visualizations for performance of predictive models" readme = "README.md" diff --git a/uv.lock b/uv.lock index 33a00b3..d6392ca 100644 --- a/uv.lock +++ b/uv.lock @@ -4054,7 +4054,7 @@ wheels = [ [[package]] name = "rtichoke" -version = "0.1.11" +version = "0.1.12" source = { editable = "." } dependencies = [ { name = "importlib" }, @@ -4712,6 +4712,12 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1e/48/973da1ee8bc0743519759e74c3615b39acdc3faf00e0a0710f8c856d8c9d/statsmodels-0.14.5-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5a085d47c8ef5387279a991633883d0e700de2b0acc812d7032d165888627bef", size = 10453538, upload-time = "2025-07-07T14:24:06.959Z" }, { url = "https://files.pythonhosted.org/packages/c7/d6/18903fb707afd31cf1edaec5201964dbdacb2bfae9a22558274647a7c88f/statsmodels-0.14.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9f866b2ebb2904b47c342d00def83c526ef2eb1df6a9a3c94ba5fe63d0005aec", size = 10681584, upload-time = "2025-07-07T14:24:21.038Z" }, { url = "https://files.pythonhosted.org/packages/44/d6/80df1bbbfcdc50bff4152f43274420fa9856d56e234d160d6206eb1f5827/statsmodels-0.14.5-cp313-cp313-win_amd64.whl", hash = "sha256:2a06bca03b7a492f88c8106103ab75f1a5ced25de90103a89f3a287518017939", size = 9604641, upload-time = "2025-07-07T12:08:36.23Z" }, + { url = "https://files.pythonhosted.org/packages/fd/6c/0fb40a89d715412160097c6f3387049ed88c9bd866c8838a8852c705ae2f/statsmodels-0.14.5-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:07c4dad25bbb15864a31b4917a820f6d104bdc24e5ddadcda59027390c3bed9e", size = 10211256, upload-time = "2025-10-30T13:46:58.591Z" }, + { url = "https://files.pythonhosted.org/packages/88/4a/e36fe8b19270ab3e80df357da924c6c029cab0fb9a0fbd28aaf49341707d/statsmodels-0.14.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:babb067c852e966c2c933b79dbb5d0240919d861941a2ef6c0e13321c255528d", size = 10110933, upload-time = "2025-10-30T13:47:11.774Z" }, + { url = "https://files.pythonhosted.org/packages/8a/bf/1b7e7b1a6c09a88a9c5c9e60622c050dfd08af11c2e6d4a42dbc71b32ee1/statsmodels-0.14.5-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:110194b137286173cc676d7bad0119a197778de6478fc6cbdc3b33571165ac1e", size = 10253981, upload-time = "2025-10-30T16:32:22.399Z" }, + { url = "https://files.pythonhosted.org/packages/b8/d0/f95da95524bdd99613923ca61a3036d1308cee1290e5e8acb89f51736a8c/statsmodels-0.14.5-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9c8a9c384a60c80731b278e7fd18764364c8817f4995b13a175d636f967823d1", size = 10460450, upload-time = "2025-10-30T16:32:44.985Z" }, + { url = "https://files.pythonhosted.org/packages/28/bb/59e7be0271be264b7b541baf3973f97747740950bfd5115de731f63da8ab/statsmodels-0.14.5-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:557df3a870a57248df744fdfcc444ecbc5bdbf1c042b8a8b5d8e3e797830dc2a", size = 10694060, upload-time = "2025-10-30T16:33:07.656Z" }, + { url = "https://files.pythonhosted.org/packages/8b/c0/b28d0fd0347ea38d3610052f479e4b922eb33bb8790817f93cd89e6e08ba/statsmodels-0.14.5-cp314-cp314-win_amd64.whl", hash = "sha256:95af7a9c4689d514f4341478b891f867766f3da297f514b8c4adf08f4fa61d03", size = 9648961, upload-time = "2025-10-30T13:47:24.303Z" }, { url = "https://files.pythonhosted.org/packages/39/2d/3ab5a8e736b94a91434a70dcbdc4363775711ef17c733e6bde5f24cb2f62/statsmodels-0.14.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b23b8f646dd78ef5e8d775d879208f8dc0a73418b41c16acac37361ff9ab7738", size = 10077385, upload-time = "2025-07-07T12:13:55.07Z" }, { url = "https://files.pythonhosted.org/packages/44/ec/091dc1e69bbc84139e3409e45ac26e285ef41eb67116d13e094cdde7804d/statsmodels-0.14.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4e5e26b21d2920905764fb0860957d08b5ba2fae4466ef41b1f7c53ecf9fc7fa", size = 9752723, upload-time = "2025-07-07T12:08:52.238Z" }, { url = "https://files.pythonhosted.org/packages/72/0a/0ab3a900fc3245ebdaaca59018567b1e23bcab13c9eea2d7b3d8ffcbb82e/statsmodels-0.14.5-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4a060c7e0841c549c8ce2825fd6687e6757e305d9c11c9a73f6c5a0ce849bb69", size = 10470566, upload-time = "2025-07-07T14:33:03.356Z" }, From 022437661bbe0eb7e5e792b7b2ec6e6922fa7b33 Mon Sep 17 00:00:00 2001 From: Uriah Finkel Date: Mon, 3 Nov 2025 09:45:18 +0200 Subject: [PATCH 3/3] build: give up before_we_validate.qmd for now --- docs/before_we_validate.qmd | 556 ------------------------------------ docs/sandbox.py | 75 ----- 2 files changed, 631 deletions(-) delete mode 100644 docs/before_we_validate.qmd delete mode 100644 docs/sandbox.py diff --git a/docs/before_we_validate.qmd b/docs/before_we_validate.qmd deleted file mode 100644 index 2437c37..0000000 --- a/docs/before_we_validate.qmd +++ /dev/null @@ -1,556 +0,0 @@ ---- -title: "Before we Validate Performance" -author: "Uriah Finkel" -format: - html: - echo: false -mermaid-format: svg ---- - -Ideally we would like to keep Performance Validation as agnostic as possible. However, the structure of the validation set (`probs`, `reals` and `times`) implies the nature of the related assumptions and the required use case. - -So before we validate performance, let us consider the underlying process. - -โœ๏ธ The User Inputs\ -๐Ÿช› Internal Function - -# โœ๏ธ Declare reference groups - -The dimentions of the `probs` and the `real` dictionaries imply the nature of the use case: - -TODO: copy from rtichoke r README. - -##### One Model, One Population: - -- Just one reference group: "model". - -##### Several Models, One Population: - -Compare between different candidate models. - Each model stand as a reference groups such as "thin" model, or a "full" model. - -##### Several Models, Several Populations - -Compare performance over different sub-populations. - Internal Validation: "test", "val" and "train". - External Validation: "Framingham", "Australia". - Fairness: "Male", "Female". - -# โœ๏ธ Declare how to stratify predictions โœ‚๏ธ - -The `stratified_by` argument is designed for the user to choose how to stratify predictions for decision-making, each method implies different problem: - -::: {.panel-tabset} - -## Probability Threshold - -::: {.panel-tabset} - -By choosing Probability Threshold as a cutoff the implied assumption is that you are concerned with individual harm or benefit. - -### Baseline Strategy: Treat None - -```{mermaid} - -graph LR - subgraph trt[Treatment Decision] - linkStyle default stroke:#000 - A("๐Ÿ˜ท") -->|"Treatment ๐Ÿ’Š"|B("Predicted
Positive

๐Ÿ’Š
๐Ÿ˜ท") - A -->|"No Treatment"|C("Predicted
Negative

๐Ÿ˜ท") - end - - subgraph ut[Utility of the Decision] - subgraph pred[Prediction Model] - B -->|"Disease ๐Ÿคข"| D["TP
๐Ÿ’Š
๐Ÿคข"] - B -->|"No Disease ๐Ÿคจ"| E["FP
๐Ÿ’Š
๐Ÿคจ"] - C -->|"Disease ๐Ÿคข"| F["FN
๐Ÿคข"] - C -->|"No Disease ๐Ÿคจ"| G["TN
๐Ÿคจ"] - end - subgraph baselinestrategy[Baseline Strategy: Treat None] - Dnone["FN
๐Ÿคข"] - Enone["TN
๐Ÿคจ"] - Fnone["FN
๐Ÿคข"] - Gnone["TN
๐Ÿคจ"] - - D---Dnone - E---Enone - F---Fnone - G---Gnone - end - subgraph nb[Net Benefit] - Dnb[1] - Enb["pt / (1-pt)"] - Fnb[0] - Gnb[0] - Dnone---Dnb - Enone---Enb - Fnone---Fnb - Gnone---Gnb - end - end - - - - style A fill:#E8F4FF, stroke:black,color:black - style B fill:#E8F4FF, stroke:black,color:black - style C fill:#E8F4FF, stroke:black,color:black - style D fill:#C0FFC0,stroke:black,color:black - style Dnone fill:#FFCCE0,stroke:black,color:black - style Dnb fill: #C0FFC0,stroke:black,color:black - style E fill: #FFCCE0,stroke:black,color:black - style Enone fill: #C0FFC0,stroke:black,color:black - style Enb fill: #FFCCE0,stroke:black,color:black - style F fill:#FFCCE0,stroke:black,color:black - style Fnone fill: #FFCCE0,stroke:black,color:black - style Fnb fill: #E8F4FF,stroke:black,color:black - style G fill: #C0FFC0,stroke:black,color:black - style Gnone fill: #C0FFC0,stroke:black,color:black - style Gnb fill: #E8F4FF,stroke:black,color:black - style nb fill: #E8F4FF,stroke:black,color:black - style pred fill: #E8F4FF,stroke:black,color:black - style baselinestrategy fill: #E8F4FF,stroke:black,color:black - - classDef subgraphStyle fill:#FAF6EC,stroke:#333,stroke-width:1px - class trt,ut subgraphStyle - -``` - -### Baseline Strategy: Treat All - -```{mermaid} - -graph LR - subgraph trt[Treatment Decision] - linkStyle default stroke:#000 - A("๐Ÿ˜ท") -->|"Treatment ๐Ÿ’Š"|B("Predicted
Positive

๐Ÿ’Š
๐Ÿ˜ท") - A -->|"No Treatment"|C("Predicted
Negative

๐Ÿ˜ท") - end - - subgraph ut[Utility of the Decision] - subgraph pred[Prediction Model] - B -->|"Disease ๐Ÿคข"| D["TP
๐Ÿ’Š
๐Ÿคข"] - B -->|"No Disease ๐Ÿคจ"| E["FP
๐Ÿ’Š
๐Ÿคจ"] - C -->|"Disease ๐Ÿคข"| F["FN
๐Ÿคข"] - C -->|"No Disease ๐Ÿคจ"| G["TN
๐Ÿคจ"] - end - subgraph baselinestrategy[Baseline Strategy: Treat All] - Dall["TP
๐Ÿ’Š
๐Ÿคข"] - Eall["FP
๐Ÿ’Š
๐Ÿคจ"] - Fall["TP
๐Ÿ’Š
๐Ÿคข"] - Gall["FP
๐Ÿ’Š
๐Ÿคจ"] - - D---Dall - E---Eall - F---Fall - G---Gall - end - subgraph nb[Net Benefit] - Dnb[0] - Enb[0] - Fnb["(1-pt) / pt"] - Gnb["1"] - Dall---Dnb - Eall---Enb - Fall---Fnb - Gall---Gnb - end - end - - - - style A fill:#E8F4FF, stroke:black,color:black - style B fill:#E8F4FF, stroke:black,color:black - style C fill:#E8F4FF, stroke:black,color:black - style D fill:#C0FFC0,stroke:black,color:black - style Dall fill:#C0FFC0,stroke:black,color:black - style Dnb fill:#E8F4FF,stroke:black,color:black - style E fill:#FFCCE0,stroke:black,color:black - style Eall fill:#FFCCE0,stroke:black,color:black - style Enb fill:#E8F4FF,stroke:black,color:black - style F fill:#FFCCE0,stroke:black,color:black - style Fall fill:#C0FFC0,stroke:black,color:black - style Fnb fill:#FFCCE0,stroke:black,color:black - style G fill:#C0FFC0,stroke:black,color:black - style Gall fill:#FFCCE0,stroke:black,color:black - style Gnb fill:#C0FFC0,stroke:black,color:black - style nb fill: #E8F4FF,stroke:black,color:black - style pred fill: #E8F4FF,stroke:black,color:black - style baselinestrategy fill: #E8F4FF,stroke:black,color:black - - classDef subgraphStyle fill:#FAF6EC,stroke:#333,stroke-width:1px - class trt,ut subgraphStyle - -``` - -*Regardless* of ranking each prediction is categorised to a bin: 0.32 -\> `[0.3, 0.4)`. - -1. Categorise Absolute Risk: 0.32 -\> `[0.3, 0.4)` - -References: Pauker SG, Kassirer JP. Therapeutic decision making: a cost-benefit analysis. N Engl J Med. 1975;293(5):229-234. doi:10.1056/NEJM197507312930505 - -::: - -## PPCR - -![](line_ppcr_04.svg) - -```{mermaid} - -graph LR - subgraph trt[Treatment Allocation Decision] - linkStyle default stroke:#000 - A("๐Ÿ˜ท
๐Ÿ˜ท
๐Ÿ˜ท
๐Ÿ˜ท
๐Ÿ˜ท
๐Ÿ˜ท
๐Ÿ˜ท
๐Ÿ˜ท
๐Ÿ˜ท
๐Ÿ˜ท") -->|"Treatment ๐Ÿ’Š๐Ÿ’Š๐Ÿ’Š๐Ÿ’Š"|B("ฮฃ Predicted
Positives

๐Ÿ’Š๐Ÿ’Š๐Ÿ’Š๐Ÿ’Š
๐Ÿ˜ท๐Ÿ˜ท๐Ÿ˜ท๐Ÿ˜ท") - A -->|"No Treatment"|C("ฮฃ Predicted
Negatives

๐Ÿ˜ท๐Ÿ˜ท๐Ÿ˜ท๐Ÿ˜ท๐Ÿ˜ท๐Ÿ˜ท") - end - - subgraph ut[Utility of the Decision] - B -->|"Disease ๐Ÿคข๐Ÿคข๐Ÿคข"| D["ฮฃ TP
๐Ÿ’Š๐Ÿ’Š๐Ÿ’Š
๐Ÿคข๐Ÿคข๐Ÿคข"] - B -->|"No Disease ๐Ÿคจ"| E["ฮฃ FP
๐Ÿ’Š
๐Ÿคจ"] - C -->|"Disease ๐Ÿคข"| F["ฮฃ FN
๐Ÿคข"] - C -->|"No Disease ๐Ÿคจ๐Ÿคจ๐Ÿคจ๐Ÿคจ๐Ÿคจ"| G["ฮฃ TN
๐Ÿคจ๐Ÿคจ๐Ÿคจ๐Ÿคจ๐Ÿคจ"] - end - - - - style A fill:#E8F4FF, stroke:black,color:black - style B fill:#E8F4FF, stroke:black,color:black - style C fill:#E8F4FF, stroke:black,color:black - style D fill:#C0FFC0,stroke:black,color:black - style E fill:#FFCCE0,stroke:black,color:black - style F fill:#FFCCE0,stroke:black,color:black - style G fill:#C0FFC0,stroke:black,color:black - - classDef subgraphStyle fill:#FAF6EC,stroke:#333,stroke-width:1px - class trt,ut subgraphStyle - -``` - -By choosing PPCR as a cutoff the implied assumption is that you are concerned with resource constraint and assume no individual treatment harm. - -*Regarding* the ranking each prediction is categorised to a bin: if the absolute probability 0.32 is the 18th highest predictions out of 100, it will be categorised to the second decile -\> `0.18`. - -1. Calculate Risk-Quantile from Absolute Risk: 0.32 -\> `0.18` - -References: https://en.wikipedia.org/wiki/Precision_and_recall - -::: - -# โœ๏ธ Declare Fixed Time Horizons ๐ŸŒ… (๐Ÿ“…๐Ÿคฌ) - -The `fixed_time_horizons` argument is designed for the user to choose the set of time horizons to follow. - -Different followups contain different distributions of observed outcomes: Declare fixed time horizons for the prediction model, such as \[5, 10\] years of prediction for CVD evet. - -## ๐Ÿช› Update Administrative Censorng - -For cases with observed time-to-event is shorter than the prediction time horizon, the outcomes might change: - -- `Real Positives` ๐Ÿคข should be considered as `Real Negatives` ๐Ÿคจ, the outcome of interest did not happen yet. - -- Always included and Encoded as 0. - -- `Real Neagtives` ๐Ÿคจ should be considered as `Real Censored` ๐Ÿคฌ, the event of interest could have happened in the gap between the observed time and the fixed time horizon. - -- If adjusted: encoded as 0. - -- If excluded: counted with crude estimate. - -```{python} - -import numpy as np - -times = np.array([24.1, 9.7, 49.9, 18.6, 34.8, 14.2, 39.2, 46.0, 31.5, 4.3]) -reals = np.array([1, 1, 1, 1, 0, 2, 1, 2, 0, 1]) -time_horizons = [10, 20, 30, 40, 50] - -# Icons -def get_icon(outcome, t, h): - if outcome == 0: - return "๐Ÿคฌ" if t < h else "๐Ÿคจ" - elif outcome == 1: - return "๐Ÿคข" - elif outcome == 2: - return "๐Ÿ’€" - -# Displayed time -def get_time(outcome, t, h): - if outcome == 0: - return t if t < h else h - else: - return t - -# Final output -final_data = [] - -for i in range(len(times)): - id_ = i + 1 - t = times[i] - r = reals[i] - - for h in time_horizons: - outcome = r if t <= h else 0 # override outcome after horizon - final_data.append({ - "id": id_, - "time_horizon": h, - "time": get_time(outcome, t, h), - "real": get_icon(outcome, t, h) - }) - -ojs_define(data = final_data) - -``` - -```{ojs} - -filteredData = data.filter((d) => d.time_horizon == timeHorizon) - -viewof timeHorizon = Inputs.range([10, 50], { - step: 10, - value: 50, - label: "Time Horizon" -}) - -Plot.plot({ - x: { - domain: [0, 50] - }, - y: { - domain: [0, 11], - axis: false - }, - marks: [ - Plot.ruleX([timeHorizon], { - stroke: "#D9E8A3", - strokeWidth: 6, - strokeDasharray: "5,5", - y1: 0, - y2: 10 // Should match the y-domain max - }), - Plot.ruleY(filteredData, { - x: "time", - y: "id", - strokeWidth: 1.5 - }), - Plot.text(filteredData, { - x: "time", - y: "id", - text: "real", - tip: true, - fontSize: 30 - }) - ] -}) - -``` - -# Declare Heuristics Regarding ambigious `reals` - -::: {.panel-tabset} - -## โœ๏ธ Declare Heuristics Regarding Censored Events ๐Ÿ“…๐Ÿคฌ - -```{mermaid} - -graph LR - S0["Non Event
0 ๐Ÿคจ / ๐Ÿคฌ"] -->|"?"|S1["Primary Event
1 ๐Ÿคข"] - S0-->|"?"|S2["Competing Event
2 ๐Ÿ’€"] - - - classDef nonEvent fill:#E0E0E0,stroke:#333,stroke-width:1px,color:black - classDef primaryEvent fill:#808080,stroke:#333,stroke-width:1px,color:white - classDef competingEvent fill:#9DB4C0,stroke:#333,stroke-width:1px,color:black - - class S0 nonEvent - class S1 primaryEvent - class S2 competingEvent - class S3 censoredEvent - - linkStyle 0 stroke:#333,background:yellow - -``` - -The `censored_heuristic` argument is designed for the user to choose how interpret censored events. - -Performance Validation in the face of censored observations require assumptions regarding the unobserved followup. - -TODO: add link to nan-van-geloven article - -::: {.panel-tabset} - -### Exclude Censored Events - -```{mermaid} - -graph LR - S0["Non Event
0 ๐Ÿคจ"] -->S1["Primary Event
1 ๐Ÿคข"] - S0-->S2["Competing Event
2 ๐Ÿ’€"] - - S3["Censored
0 ๐Ÿคฌ"] - - - classDef nonEvent fill:#E0E0E0,stroke:#333,stroke-width:1px,color:black - classDef primaryEvent fill:#808080,stroke:#333,stroke-width:1px,color:white - classDef censoredEvent fill:#E3F09B,stroke:#333,stroke-width:1px,color:black - classDef competingEvent fill:#9DB4C0,stroke:#333,stroke-width:1px,color:black - - class S0 nonEvent - class S1 primaryEvent - class S2 competingEvent - class S3 censoredEvent - - linkStyle 0 stroke:#333,background:yellow - -``` - -All censored events to be excluded. - -Underlying Assumption: Small amount of censored events. Violation of the assumption leads to: Overestimation of the observed outcomes. - -### Adjust Censored as partially seen Non-Event - -Observed outcomes for each strata are estimated using the AJ-estimate (equivalent to CIF and KM): Each censored observation is assumed to be similar to the ones who weren't censored. - -TODO: Link to article - -Underlying Assumption: Independent Censoring. Violation of the assumption leads to: Biased estimate for observed outcomes. - -::: - -## โœ๏ธ Declare Heuristics Regarding Competing Events ๐Ÿ“…๐Ÿ’€ - -The `competing_heuristic` argument is designed for the user to choose how interpret censored events. - -Performance Validation in the face of competing observations require assumptions regarding the unobserved followup. - -TODO: add link to nan-van-geloven article - -::: {.panel-tabset} - -### Exclude Competing Events - -```{mermaid} - -graph LR - subgraph adj[Adjusted for Censoring] - S0["Non Event
0 ๐Ÿคจ / ๐Ÿคฌ"] -->S1["Primary Event
1 ๐Ÿคข"] - end - S0 -->S2["Competing Event
2 ๐Ÿ’€"] - - - classDef nonEvent fill:#E0E0E0,stroke:#333,stroke-width:1px,color:black - classDef primaryEvent fill:#808080,stroke:#333,stroke-width:1px,color:white - classDef competingEvent fill:#9DB4C0,stroke:#333,stroke-width:1px,color:black - - class S0 nonEvent - class S1 primaryEvent - class S2 competingEvent - - linkStyle 0 stroke:#333 - - style adj fill:#E3F09B,color:black - - -``` - -All competing events to be excluded. - -Underlying Assumption: Small amount of competing events. Violation of the assumption leads to: Overestimation of the observed outcomes. A competing event means that the primary event cannot happen. - -### Adjust Competing Events as Censored (Partially seen Negatives) - -Check - -```{mermaid} - -graph LR - subgraph adj[Adjusted for Censoring] - direction LR - S0["Non Event
0 ๐Ÿคจ / ๐Ÿคฌ

Competing Event
2 ๐Ÿ’€"] -->S1["Primary Event
1 ๐Ÿคข"] - end - - - classDef nonEvent fill:#E0E0E0,stroke:#333,stroke-width:1px,color:black - classDef primaryEvent fill:#808080,stroke:#333,stroke-width:1px,color:white - classDef competingEvent fill:#9DB4C0,stroke:#333,stroke-width:1px,color:black - - class S0 nonEvent - class S1 primaryEvent - class S2 competingEvent - - style adj fill:#E3F09B,color:black - - - linkStyle 0 stroke:#333 - -``` - -All competing events to be treated as censored. - -Underlying Assumption: We consider a patient experiencing a competing event equivalent to independent censoring. Violation of the assumption leads to: Overestimation of the observed outcomes. A competing event means that the primary event cannot happen. - -### Adjust Competing Events as Negatives (Definite Non-Events) - -All competing events to be treated as Competing event to the primary event-of-interest. - -In a way, a patient experiencing a competing event is "more" of a "real-negative" than a conventional "real-negative". - -This is derived from the assumed state-covention - -Beyond the horizon time the following transition is possible: `Real Neagtives` ๐Ÿคจ =\> `Real Positives` ๐Ÿคข ๐Ÿ’€ 2 - -```{mermaid} - -graph LR -subgraph adj[Adjusted for Censoring] - direction LR - S0["Non Event
0 ๐Ÿคจ"] -->S1["Primary Event
1 ๐Ÿคข"] - S0 -->S2["Competing Event
2 ๐Ÿ’€"] - end - - - classDef nonEvent fill:#E0E0E0,stroke:#333,stroke-width:1px,color:black - classDef primaryEvent fill:#808080,stroke:#333,stroke-width:1px,color:white - classDef competingEvent fill:#9DB4C0,stroke:#333,stroke-width:1px,color:black - - class S0 nonEvent - class S1 primaryEvent - class S2 competingEvent - - linkStyle 0 stroke:#333 - style adj fill:#E3F09B,color:black - -``` - -Underlying Assumption: We consider a patient experiencing a competing event as a definite non-event. Violation of the assumption leads to Underestimation of the observed outcomes if a competing event can be considered as a different form of the primary event. - -### Adjust Competing Events as Composite (Positives) - -All competing events to be treated as another case of the Primary Event. - -A patient experience a competing-event might be seen as another case of "some-event" - -```{mermaid} - -graph LR -subgraph adj[Adjusted for Censoring] - direction LR - S0["Non Event
0 ๐Ÿคจ"] -->S1["Any Event
1 ๐Ÿคข
Competing Event
2 ๐Ÿ’€"] - end - - - classDef nonEvent fill:#E0E0E0,stroke:#333,stroke-width:1px,color:black - classDef primaryEvent fill:#808080,stroke:#333,stroke-width:1px,color:white - classDef competingEvent fill:#9DB4C0,stroke:#333,stroke-width:1px,color:black - - class S0 nonEvent - class S1 primaryEvent - class S2 competingEvent - - linkStyle 0 stroke:#333 - style adj fill:#E3F09B,color:black - -``` - -There is no need for an underlying assumption, the answer is explicit. - -This heuristic is can be seen as a different underlying question - what is the probability of having any type of event, Primary or Competing? - -::: - -::: diff --git a/docs/sandbox.py b/docs/sandbox.py deleted file mode 100644 index 1d60638..0000000 --- a/docs/sandbox.py +++ /dev/null @@ -1,75 +0,0 @@ -from lifelines import CoxPHFitter -from lifelines import WeibullAFTFitter -import pandas as pd -import numpy as np -import pickle -import subprocess - -from rtichoke.summary_report.summary_report import create_data_for_summary_report - -df_time_to_cancer_dx = \ - pd.read_csv( - "https://raw.githubusercontent.com/ddsjoberg/dca-tutorial/main/data/df_time_to_cancer_dx.csv" - ) - - -cox_model = CoxPHFitter() -thin_model = CoxPHFitter() -aft_model = WeibullAFTFitter() - -cox_formula = 'age + famhistory + marker' -thin_formula = 'age + marker' -aft_formula = 'age + marker' - -cox_model.fit(df_time_to_cancer_dx, duration_col='ttcancer', event_col='cancer', formula=cox_formula) -thin_model.fit(df_time_to_cancer_dx, duration_col='ttcancer', event_col='cancer', formula=thin_formula) -aft_model.fit(df_time_to_cancer_dx, duration_col='ttcancer', event_col='cancer', formula=aft_formula) - - - -reals_mapping = { - "censor": 0, - "diagnosed with cancer": 1, - "dead other causes": 2 -} - -df_time_to_cancer_dx['reals'] = df_time_to_cancer_dx['cancer_cr'].map(reals_mapping) - - -new_data = df_time_to_cancer_dx.copy() -new_data['ttcancer'] = 1.5 - -preds_aft = 1 - np.exp(-aft_model.predict_expectation(new_data)) -pred_1_5 = 1 - np.exp(-cox_model.predict_expectation(new_data)) -pred_thin = 1 - np.exp(-thin_model.predict_expectation(new_data)) - -probs_dict = { - "thin": pred_thin, - "full": pred_1_5, - "aft": preds_aft -} - - -# The import statement has been moved to the top of the file. - -with open('reals_dict.pkl', 'wb') as file: - pickle.dump(df_time_to_cancer_dx['reals'] -, file) - -with open('times_dict.pkl', 'wb') as file: - pickle.dump(df_time_to_cancer_dx['ttcancer'] -, file) - -with open(r'C:\Users\I\Documents\GitHub\rtichoke_python\reals_dict.pkl', 'rb') as file: - reals_dict = pickle.load(file) - -with open(r'C:\Users\I\Documents\GitHub\rtichoke_python\times_dict.pkl', 'rb') as file: - times_dict = pickle.load(file) - - - -create_data_for_summary_report(probs_dict, reals_dict, times_dict) - - -subprocess.run(["quarto", "preview", "aj_estimate_summary_report.qmd"]) -