Merge pull request #178 from uriahf/100-create-prepare_performance_data-and-prepare_performance_data_times-functions

uriahf · web-flow · commit 231b5a2a8bbe · 2025-11-03T14:56:23.000+02:00
100 create prepare performance data and prepare performance data times functions
diff --git a/docs/.gitignore b/docs/.gitignore
@@ -1,3 +1,4 @@
 /.quarto/
 
 **/*.quarto_ipynb
+_sidebar.yml
diff --git a/docs/_quarto.yml b/docs/_quarto.yml
@@ -10,6 +10,8 @@ website:
     left:
       - href: reference/
         text: Reference
+      - href: before_we_validate.qmd
+        text: Before We Validate
 
 quartodoc:
   # the name used to import the package you want to create reference docs for
diff --git a/docs/before_we_validate.qmd b/docs/before_we_validate.qmd
@@ -0,0 +1,341 @@
+---
+title: "Before we Validate Performance"
+author: "Uriah Finkel"
+format: 
+    html:
+        echo: false
+mermaid-format: svg
+---
+
+Ideally we would like to keep Performance Validation as agnostic as possible. However, the structure of the validation set (`probs`, `reals` and `times`) implies the nature of the related assumptions and the required use case.
+
+So before we validate performance, let us consider the underlying process.
+
+✍️ The User Inputs\
+🪛 Internal Function
+
+# ✍️ Declare reference groups
+
+The dimentions of the `probs` and the `real` dictionaries imply the nature of the use case:
+
+TODO: copy from rtichoke r README.
+
+##### One Model, One Population:
+
+-   Just one reference group: "model".
+
+##### Several Models, One Population:
+
+Compare between different candidate models. - Each model stand as a reference groups such as "thin" model, or a "full" model.
+
+##### Several Models, Several Populations
+
+Compare performance over different sub-populations. - Internal Validation: "test", "val" and "train". - External Validation: "Framingham", "Australia". - Fairness: "Male", "Female".
+
+# ✍️ Declare how to stratify predictions ✂️
+
+The `stratified_by` argument is designed for the user to choose how to stratify predictions for decision-making, each method implies different problem:
+
+::: {.panel-tabset}
+
+## Probability Threshold 
+
+::: {.panel-tabset}
+
+By choosing Probability Threshold as a cutoff the implied assumption is that you are concerned with individual harm or benefit.
+
+### Baseline Strategy: Treat None
+
+```{mermaid}
+
+graph LR
+    subgraph trt[Treatment Decision]
+        linkStyle default stroke:#000
+        A("😷") -->|"Treatment 💊"|B("<B>Predicted<br>Positive</B><br>💊<br>😷")
+        A -->|"No Treatment"|C("<B>Predicted<br>Negative</B><br>😷")    
+    end
+
+    subgraph ut[Utility of the Decision]
+        subgraph pred[Prediction Model]
+            B -->|"Disease 🤢"| D["<B>TP</B><br>💊<br>🤢"]
+            B -->|"No Disease 🤨"| E["<B>FP</B><br>💊<br>🤨"]
+            C -->|"Disease 🤢"| F["<B>FN</B><br>🤢"]
+            C -->|"No Disease 🤨"| G["<B>TN</B><br>🤨"]
+        end
+        subgraph baselinestrategy[Baseline Strategy: Treat None]
+            Dnone["<B>FN</B><br>🤢"]
+            Enone["<B>TN</B><br>🤨"]
+            Fnone["<B>FN</B><br>🤢"]
+            Gnone["<B>TN</B><br>🤨"]
+        
+            D---Dnone
+            E---Enone
+            F---Fnone
+            G---Gnone
+        end
+        subgraph nb[Net Benefit]
+            Dnb[1]
+            Enb["pt / (1-pt)"]
+            Fnb[0]
+            Gnb[0]
+        Dnone---Dnb
+        Enone---Enb
+        Fnone---Fnb
+        Gnone---Gnb
+        end
+    end
+
+
+
+    style A fill:#E8F4FF, stroke:black,color:black
+    style B fill:#E8F4FF, stroke:black,color:black
+    style C fill:#E8F4FF, stroke:black,color:black
+    style D fill:#C0FFC0,stroke:black,color:black
+    style Dnone fill:#FFCCE0,stroke:black,color:black
+    style Dnb fill: #C0FFC0,stroke:black,color:black
+    style E fill: #FFCCE0,stroke:black,color:black
+    style Enone fill: #C0FFC0,stroke:black,color:black
+    style Enb fill: #FFCCE0,stroke:black,color:black
+    style F fill:#FFCCE0,stroke:black,color:black
+    style Fnone fill: #FFCCE0,stroke:black,color:black
+    style Fnb fill: #E8F4FF,stroke:black,color:black
+    style G fill: #C0FFC0,stroke:black,color:black
+    style Gnone fill: #C0FFC0,stroke:black,color:black
+    style Gnb fill: #E8F4FF,stroke:black,color:black
+    style nb fill: #E8F4FF,stroke:black,color:black 
+    style pred fill: #E8F4FF,stroke:black,color:black
+    style baselinestrategy fill: #E8F4FF,stroke:black,color:black
+
+    classDef subgraphStyle fill:#FAF6EC,stroke:#333,stroke-width:1px
+    class trt,ut subgraphStyle
+
+```
+
+### Baseline Strategy: Treat All
+
+```{mermaid}
+
+graph LR
+    subgraph trt[Treatment Decision]
+        linkStyle default stroke:#000
+        A("😷") -->|"Treatment 💊"|B("<B>Predicted<br>Positive</B><br>💊<br>😷")
+        A -->|"No Treatment"|C("<B>Predicted<br>Negative</B><br>😷")    
+    end
+
+    subgraph ut[Utility of the Decision]
+        subgraph pred[Prediction Model]
+            B -->|"Disease 🤢"| D["<B>TP</B><br>💊<br>🤢"]
+            B -->|"No Disease 🤨"| E["<B>FP</B><br>💊<br>🤨"]
+            C -->|"Disease 🤢"| F["<B>FN</B><br>🤢"]
+            C -->|"No Disease 🤨"| G["<B>TN</B><br>🤨"]
+        end
+        subgraph baselinestrategy[Baseline Strategy: Treat All]
+            Dall["<B>TP</B><br>💊<br>🤢"]
+            Eall["<B>FP</B><br>💊<br>🤨"]
+            Fall["<B>TP</B><br>💊<br>🤢"]
+            Gall["<B>FP</B><br>💊<br>🤨"]
+        
+            D---Dall
+            E---Eall
+            F---Fall
+            G---Gall
+        end
+        subgraph nb[Net Benefit]
+            Dnb[0]
+            Enb[0]
+            Fnb["(1-pt) / pt"]
+            Gnb["1"]
+        Dall---Dnb
+        Eall---Enb
+        Fall---Fnb
+        Gall---Gnb
+        end
+    end
+
+
+
+    style A fill:#E8F4FF, stroke:black,color:black
+    style B fill:#E8F4FF, stroke:black,color:black
+    style C fill:#E8F4FF, stroke:black,color:black
+    style D fill:#C0FFC0,stroke:black,color:black
+    style Dall fill:#C0FFC0,stroke:black,color:black
+    style Dnb fill:#E8F4FF,stroke:black,color:black
+    style E fill:#FFCCE0,stroke:black,color:black
+    style Eall fill:#FFCCE0,stroke:black,color:black
+    style Enb fill:#E8F4FF,stroke:black,color:black
+    style F fill:#FFCCE0,stroke:black,color:black
+    style Fall fill:#C0FFC0,stroke:black,color:black
+    style Fnb fill:#FFCCE0,stroke:black,color:black
+    style G fill:#C0FFC0,stroke:black,color:black
+    style Gall fill:#FFCCE0,stroke:black,color:black
+    style Gnb fill:#C0FFC0,stroke:black,color:black
+    style nb fill: #E8F4FF,stroke:black,color:black 
+    style pred fill: #E8F4FF,stroke:black,color:black
+    style baselinestrategy fill: #E8F4FF,stroke:black,color:black
+
+    classDef subgraphStyle fill:#FAF6EC,stroke:#333,stroke-width:1px
+    class trt,ut subgraphStyle
+
+```
+
+*Regardless* of ranking each prediction is categorised to a bin: 0.32 -\> `[0.3, 0.4)`.
+
+1.  Categorise Absolute Risk: 0.32 -\> `[0.3, 0.4)`
+
+References: Pauker SG, Kassirer JP. Therapeutic decision making: a cost-benefit analysis. N Engl J Med. 1975;293(5):229-234. doi:10.1056/NEJM197507312930505
+
+:::
+
+## PPCR
+
+![](line_ppcr_04.svg)
+
+```{mermaid}
+
+graph LR
+    subgraph trt[Treatment Allocation Decision]
+        linkStyle default stroke:#000
+        A("😷<br>😷<br>😷<br>😷<br>😷<br>😷<br>😷<br>😷<br>😷<br>😷") -->|"Treatment 💊💊💊💊"|B("<B>Σ Predicted<br>Positives</B><br>💊💊💊💊<br>😷😷😷😷")
+        A -->|"No Treatment"|C("<B>Σ Predicted<br>Negatives</B><br>😷😷😷😷😷😷")    
+    end
+
+    subgraph ut[Utility of the Decision]
+        B -->|"Disease 🤢🤢🤢"| D["<B>Σ TP</B><br>💊💊💊<br>🤢🤢🤢"]
+        B -->|"No Disease 🤨"| E["<B>Σ FP</B><br>💊<br>🤨"]
+        C -->|"Disease 🤢"| F["<B>Σ FN</B><br>🤢"]
+        C -->|"No Disease 🤨🤨🤨🤨🤨"| G["<B>Σ TN</B><br>🤨🤨🤨🤨🤨"]  
+    end
+
+
+
+    style A fill:#E8F4FF, stroke:black,color:black
+    style B fill:#E8F4FF, stroke:black,color:black
+    style C fill:#E8F4FF, stroke:black,color:black
+    style D fill:#C0FFC0,stroke:black,color:black
+    style E fill:#FFCCE0,stroke:black,color:black
+    style F fill:#FFCCE0,stroke:black,color:black
+    style G fill:#C0FFC0,stroke:black,color:black
+
+    classDef subgraphStyle fill:#FAF6EC,stroke:#333,stroke-width:1px
+    class trt,ut subgraphStyle
+
+```
+
+By choosing PPCR as a cutoff the implied assumption is that you are concerned with resource constraint and assume no individual treatment harm.
+
+*Regarding* the ranking each prediction is categorised to a bin: if the absolute probability 0.32 is the 18th highest predictions out of 100, it will be categorised to the second decile -\> `0.18`.
+
+1.  Calculate Risk-Quantile from Absolute Risk: 0.32 -\> `0.18`
+
+References: https://en.wikipedia.org/wiki/Precision_and_recall
+
+:::
+
+# ✍️ Declare Fixed Time Horizons 🌅 (📅🤬)
+
+The `fixed_time_horizons` argument is designed for the user to choose the set of time horizons to follow.
+
+Different followups contain different distributions of observed outcomes: Declare fixed time horizons for the prediction model, such as \[5, 10\] years of prediction for CVD evet.
+
+## 🪛 Update Administrative Censorng
+
+For cases with observed time-to-event is shorter than the prediction time horizon, the outcomes might change:
+
+-   `Real Positives` 🤢 should be considered as `Real Negatives` 🤨, the outcome of interest did not happen yet.
+
+-   Always included and Encoded as 0.
+
+-   `Real Neagtives` 🤨 should be considered as `Real Censored` 🤬, the event of interest could have happened in the gap between the observed time and the fixed time horizon.
+
+-   If adjusted: encoded as 0.
+
+-   If excluded: counted with crude estimate.
+
+```{python}
+
+import numpy as np
+
+times = np.array([24.1, 9.7, 49.9, 18.6, 34.8, 14.2, 39.2, 46.0, 31.5, 4.3])
+reals = np.array([1, 1, 1, 1, 0, 2, 1, 2, 0, 1])
+time_horizons = [10, 20, 30, 40, 50]
+
+# Icons
+def get_icon(outcome, t, h):
+    if outcome == 0:
+        return "🤬" if t < h else "🤨"
+    elif outcome == 1:
+        return "🤢"
+    elif outcome == 2:
+        return "💀"
+
+# Displayed time
+def get_time(outcome, t, h):
+    if outcome == 0:
+        return t if t < h else h
+    else:
+        return t
+
+# Final output
+final_data = []
+
+for i in range(len(times)):
+    id_ = i + 1
+    t = times[i]
+    r = reals[i]
+
+    for h in time_horizons:
+        outcome = r if t <= h else 0  # override outcome after horizon
+        final_data.append({
+            "id": id_,
+            "time_horizon": h,
+            "time": get_time(outcome, t, h),
+            "real": get_icon(outcome, t, h)
+        })
+
+ojs_define(data = final_data)
+
+```
+
+```{ojs}
+
+filteredData = data.filter((d) => d.time_horizon == timeHorizon)
+
+viewof timeHorizon = Inputs.range([10, 50], {
+  step: 10,
+  value: 50,
+  label: "Time Horizon"
+})
+
+Plot.plot({
+  x: {
+    domain: [0, 50]
+  },
+  y: {
+    domain: [0, 11],
+    axis: false
+  },
+  marks: [
+    Plot.ruleX([timeHorizon], {
+      stroke: "#D9E8A3",
+      strokeWidth: 6,
+      strokeDasharray: "5,5",
+      y1: 0,
+      y2: 10 // Should match the y-domain max
+    }),
+    Plot.ruleY(filteredData, {
+      x: "time",
+      y: "id",
+      strokeWidth: 1.5
+    }),
+    Plot.text(filteredData, {
+      x: "time",
+      y: "id",
+      text: "real",
+      tip: true,
+      fontSize: 30
+    })
+  ]
+})
+
+```
+

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
`1`	`1`	`/.quarto/`
`2`	`2`
`3`	`3`	`*/.quarto_ipynb`
	`4`	`+_sidebar.yml`