diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 94e9257..0f6c886 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.10"] + python-version: ["3.10"] permissions: id-token: write contents: write diff --git a/docs/_brand.yml b/docs/_brand.yml index 5c057de..f9f51aa 100644 --- a/docs/_brand.yml +++ b/docs/_brand.yml @@ -17,6 +17,10 @@ color: typography: fonts: + - source: file + family: Fraunces9pt-Light + files: + - fonts/Fraunces9pt-Light.woff2 - family: Fraunces source: google weight: [400, 700] @@ -31,7 +35,7 @@ typography: weight: 400 line-height: 1.5 headings: - family: Fraunces + family: Fraunces9pt-Light weight: 700 style: normal line-height: 1.2 diff --git a/docs/_quarto.yml b/docs/_quarto.yml index 31b406d..57c7443 100644 --- a/docs/_quarto.yml +++ b/docs/_quarto.yml @@ -8,12 +8,8 @@ website: title: "rtichoke" navbar: left: - - href: reference.qmd + - href: reference/ text: Reference - - href: walkthrough_aj_estimate.qmd - text: Example - - href: before_we_validate.qmd - text: Before we Validate quartodoc: # the name used to import the package you want to create reference docs for diff --git a/docs/aj_estimate_summary_report.qmd b/docs/aj_estimate_summary_report.qmd deleted file mode 100644 index 69a74f1..0000000 --- a/docs/aj_estimate_summary_report.qmd +++ /dev/null @@ -1,52 +0,0 @@ ---- -title: "Summary Report" -format: html -echo: false -message: false -warning: false ---- - -```{python} -#| tags: [parameters] - -alpha = 0.1 -ratio = 0.1 - - -``` - -```{python} - -print(f"alpha: {alpha}") -print(f"ratio: {ratio}") -``` - - -```{python} -from lifelines import AalenJohansenFitter -import numpy as np -from itertools import product -import itertools -from rtichoke.helpers.sandbox_observable_helpers import * -from lifelines import CoxPHFitter -from lifelines import WeibullAFTFitter -import polars as pl -print("Polars version:", pl.__version__) - -import pandas as pd -import pickle - -print(2+2) - -# with open(r'C:\Users\I\Documents\GitHub\rtichoke_python\probs_dict.pkl', 'rb') as file: -# probs_dict = pickle.load(file) - -# with open(r'C:\Users\I\Documents\GitHub\rtichoke_python\reals_dict.pkl', 'rb') as file: -# reals_dict = pickle.load(file) - -# with open(r'C:\Users\I\Documents\GitHub\rtichoke_python\times_dict.pkl', 'rb') as file: -# times_dict = pickle.load(file) - - -``` - diff --git a/docs/before_we_validate.qmd b/docs/before_we_validate.qmd deleted file mode 100644 index e448426..0000000 --- a/docs/before_we_validate.qmd +++ /dev/null @@ -1,525 +0,0 @@ ---- -title: "Before we Validate Performance" -author: "Uriah Finkel" -format: - html: - toc: true ---- - -Ideally we would like to keep Performance Validation as agnostic as possible. -However, the structure of the validation set (`probs`, `reals` and `times`) implies the nature of the related assumptions and the required use case. - -So before we validate performance, let us consider the underlying process. - -โœ๏ธ The User Inputs -๐Ÿช› Internal Function - -# โœ๏ธ Declare reference groups - -The dimentions of the `probs` and the `real` dictionaries imply the nature of the use case: - -TODO: copy from rtichoke r README. - -##### One Model, One Population: -- Just one reference group: "model". - -##### Several Models, One Population: - -Compare between different candidate models. -- Each model stand as a reference groups such as "thin" model, or a "full" model. - -##### Several Models, Several Populations - -Compare performance over different sub-populations. -- Internal Validation: "test", "val" and "train". -- External Validation: "Framingham", "Australia". -- Fairness: "Male", "Female". - -# โœ๏ธ Declare how to stratify predictions โœ‚๏ธ - -The `stratified_by` argument is designed for the user to choose how to stratify predictions for decision-making, each method implies different problem: - - -## Probability Threshold - -By choosing Probability Threshold as a cutoff the implied assumption is that you are concerned with individual harm or benefit. - -### Baseline Strategy: Treat None - -```{mermaid} - -graph LR - subgraph trt[Treatment Decision] - linkStyle default stroke:#000 - A("๐Ÿ˜ท") -->|"Treatment ๐Ÿ’Š"|B("Predicted
Positive

๐Ÿ’Š
๐Ÿ˜ท") - A -->|"No Treatment"|C("Predicted
Negative

๐Ÿ˜ท") - end - - subgraph ut[Utility of the Decision] - subgraph pred[Prediction Model] - B -->|"Disease ๐Ÿคข"| D["TP
๐Ÿ’Š
๐Ÿคข"] - B -->|"No Disease ๐Ÿคจ"| E["FP
๐Ÿ’Š
๐Ÿคจ"] - C -->|"Disease ๐Ÿคข"| F["FN
๐Ÿคข"] - C -->|"No Disease ๐Ÿคจ"| G["TN
๐Ÿคจ"] - end - subgraph baselinestrategy[Baseline Strategy: Treat None] - Dnone["FN
๐Ÿคข"] - Enone["TN
๐Ÿคจ"] - Fnone["FN
๐Ÿคข"] - Gnone["TN
๐Ÿคจ"] - - D---Dnone - E---Enone - F---Fnone - G---Gnone - end - subgraph nb[Net Benefit] - Dnb[1] - Enb["pt / (1-pt)"] - Fnb[0] - Gnb[0] - Dnone---Dnb - Enone---Enb - Fnone---Fnb - Gnone---Gnb - end - end - - - - style A fill:#E8F4FF, stroke:black,color:black - style B fill:#E8F4FF, stroke:black,color:black - style C fill:#E8F4FF, stroke:black,color:black - style D fill:#C0FFC0,stroke:black,color:black - style Dnone fill:#FFCCE0,stroke:black,color:black - style Dnb fill: #C0FFC0,stroke:black,color:black - style E fill: #FFCCE0,stroke:black,color:black - style Enone fill: #C0FFC0,stroke:black,color:black - style Enb fill: #FFCCE0,stroke:black,color:black - style F fill:#FFCCE0,stroke:black,color:black - style Fnone fill: #FFCCE0,stroke:black,color:black - style Fnb fill: #E8F4FF,stroke:black,color:black - style G fill: #C0FFC0,stroke:black,color:black - style Gnone fill: #C0FFC0,stroke:black,color:black - style Gnb fill: #E8F4FF,stroke:black,color:black - style nb fill: #E8F4FF,stroke:black,color:black - style pred fill: #E8F4FF,stroke:black,color:black - style baselinestrategy fill: #E8F4FF,stroke:black,color:black - - classDef subgraphStyle fill:#FAF6EC,stroke:#333,stroke-width:1px - class trt,ut subgraphStyle - -``` - - -### Baseline Strategy: Treat All - -```{mermaid} - -graph LR - subgraph trt[Treatment Decision] - linkStyle default stroke:#000 - A("๐Ÿ˜ท") -->|"Treatment ๐Ÿ’Š"|B("Predicted
Positive

๐Ÿ’Š
๐Ÿ˜ท") - A -->|"No Treatment"|C("Predicted
Negative

๐Ÿ˜ท") - end - - subgraph ut[Utility of the Decision] - subgraph pred[Prediction Model] - B -->|"Disease ๐Ÿคข"| D["TP
๐Ÿ’Š
๐Ÿคข"] - B -->|"No Disease ๐Ÿคจ"| E["FP
๐Ÿ’Š
๐Ÿคจ"] - C -->|"Disease ๐Ÿคข"| F["FN
๐Ÿคข"] - C -->|"No Disease ๐Ÿคจ"| G["TN
๐Ÿคจ"] - end - subgraph baselinestrategy[Baseline Strategy: Treat All] - Dall["TP
๐Ÿ’Š
๐Ÿคข"] - Eall["FP
๐Ÿ’Š
๐Ÿคจ"] - Fall["TP
๐Ÿ’Š
๐Ÿคข"] - Gall["FP
๐Ÿ’Š
๐Ÿคจ"] - - D---Dall - E---Eall - F---Fall - G---Gall - end - subgraph nb[Net Benefit] - Dnb[0] - Enb[0] - Fnb[1] - Gnb["(1-pt) / pt"] - Dall---Dnb - Eall---Enb - Fall---Fnb - Gall---Gnb - end - end - - - - style A fill:#E8F4FF, stroke:black,color:black - style B fill:#E8F4FF, stroke:black,color:black - style C fill:#E8F4FF, stroke:black,color:black - style D fill:#C0FFC0,stroke:black,color:black - style Dall fill:#C0FFC0,stroke:black,color:black - style Dnb fill:#E8F4FF,stroke:black,color:black - style E fill:#FFCCE0,stroke:black,color:black - style Eall fill:#FFCCE0,stroke:black,color:black - style Enb fill:#E8F4FF,stroke:black,color:black - style F fill:#FFCCE0,stroke:black,color:black - style Fall fill:#C0FFC0,stroke:black,color:black - style Fnb fill:#C0FFC0,stroke:black,color:black - style G fill:#C0FFC0,stroke:black,color:black - style Gall fill:#FFCCE0,stroke:black,color:black - style Gnb fill:#FFCCE0,stroke:black,color:black - style nb fill: #E8F4FF,stroke:black,color:black - style pred fill: #E8F4FF,stroke:black,color:black - style baselinestrategy fill: #E8F4FF,stroke:black,color:black - - classDef subgraphStyle fill:#FAF6EC,stroke:#333,stroke-width:1px - class trt,ut subgraphStyle - -``` - - -*Regardless* of ranking each prediction is categorised to a bin: 0.32 -> `[0.3, 0.4)`. - -1. Categorise Absolute Risk: 0.32 -> `[0.3, 0.4)` - - -References: -Pauker SG, Kassirer JP. Therapeutic decision making: a cost-benefit analysis. N Engl J Med. 1975;293(5):229-234. doi:10.1056/NEJM197507312930505 - -## PPCR - -![](line_ppcr_04.svg) - -```{mermaid} - -graph LR - subgraph trt[Treatment Allocation Decision] - linkStyle default stroke:#000 - A("๐Ÿ˜ท
๐Ÿ˜ท
๐Ÿ˜ท
๐Ÿ˜ท
๐Ÿ˜ท
๐Ÿ˜ท
๐Ÿ˜ท
๐Ÿ˜ท
๐Ÿ˜ท
๐Ÿ˜ท") -->|"Treatment ๐Ÿ’Š๐Ÿ’Š๐Ÿ’Š๐Ÿ’Š"|B("ฮฃ Predicted
Positives

๐Ÿ’Š๐Ÿ’Š๐Ÿ’Š๐Ÿ’Š
๐Ÿ˜ท๐Ÿ˜ท๐Ÿ˜ท๐Ÿ˜ท") - A -->|"No Treatment"|C("ฮฃ Predicted
Negatives

๐Ÿ˜ท๐Ÿ˜ท๐Ÿ˜ท๐Ÿ˜ท๐Ÿ˜ท๐Ÿ˜ท") - end - - subgraph ut[Utility of the Decision] - B -->|"Disease ๐Ÿคข๐Ÿคข๐Ÿคข"| D["ฮฃ TP
๐Ÿ’Š๐Ÿ’Š๐Ÿ’Š
๐Ÿคข๐Ÿคข๐Ÿคข"] - B -->|"No Disease ๐Ÿคจ"| E["ฮฃ FP
๐Ÿ’Š
๐Ÿคจ"] - C -->|"Disease ๐Ÿคข"| F["ฮฃ FN
๐Ÿคข"] - C -->|"No Disease ๐Ÿคจ๐Ÿคจ๐Ÿคจ๐Ÿคจ๐Ÿคจ"| G["ฮฃ TN
๐Ÿคจ๐Ÿคจ๐Ÿคจ๐Ÿคจ๐Ÿคจ"] - end - - - - style A fill:#E8F4FF, stroke:black,color:black - style B fill:#E8F4FF, stroke:black,color:black - style C fill:#E8F4FF, stroke:black,color:black - style D fill:#C0FFC0,stroke:black,color:black - style E fill:#FFCCE0,stroke:black,color:black - style F fill:#FFCCE0,stroke:black,color:black - style G fill:#C0FFC0,stroke:black,color:black - - classDef subgraphStyle fill:#FAF6EC,stroke:#333,stroke-width:1px - class trt,ut subgraphStyle - -``` - - -By choosing PPCR as a cutoff the implied assumption is that you are concerned with resource constraint and assume no individual treatment harm. - -*Regarding* the ranking each prediction is categorised to a bin: if the absolute probability 0.32 is the 18th highest predictions out of 100, it will be categorised to the second decile -> `0.18`. - -1. Calculate Risk-Quantile from Absolute Risk: 0.32 -> `0.18` - -References: -https://en.wikipedia.org/wiki/Precision_and_recall - - - - -# โœ๏ธ Declare Fixed Time Horizons ๐ŸŒ… (๐Ÿ“…๐Ÿคฌ) - -The `fixed_time_horizons` argument is designed for the user to choose the set of time horizons to follow. - -Different followups contain different distributions of observed outcomes: Declare fixed time horizons for the prediction model, such as [5, 10] years of prediction for CVD evet. - - - -## ๐Ÿช› Update Administrative Censorng - -For cases with observed time-to-event is shorter than the prediction time horizon, the outcomes might change: - -- `Real Positives` ๐Ÿคข should be considered as `Real Negatives` ๐Ÿคจ, the outcome of interest did not happen yet. -- Always included and Encoded as 0. - -- `Real Neagtives` ๐Ÿคจ should be considered as `Real Censored` ๐Ÿคฌ, the event of interest could have happened in the gap between the observed time and the fixed time horizon. -- If adjusted: encoded as 0. -- If excluded: counted with crude estimate. - -```{ojs} -//| echo: false - -data = [ - { time: 1, real: "๐Ÿคจ", id: 1, time_horizon: 1 }, - { time: 3, real: "๐Ÿคจ", id: 1, time_horizon: 3 }, - { time: 5, real: "๐Ÿคจ", id: 1, time_horizon: 5 }, - { time: 1, real: "๐Ÿคจ", id: 2, time_horizon: 1 }, - { time: 3, real: "๐Ÿคจ", id: 2, time_horizon: 3 }, - { time: 5, real: "๐Ÿคจ", id: 2, time_horizon: 5 }, - { time: 1, real: "๐Ÿคจ", id: 3, time_horizon: 1 }, - { time: 3, real: "๐Ÿคจ", id: 3, time_horizon: 3 }, - { time: 4.6, real: "๐Ÿคฌ", id: 3, time_horizon: 5 }, - { time: 1, real: "๐Ÿคจ", id: 4, time_horizon: 1 }, - { time: 3, real: "๐Ÿคจ", id: 4, time_horizon: 3 }, - { time: 5, real: "๐Ÿคจ", id: 4, time_horizon: 5 }, - { time: 1, real: "๐Ÿคจ", id: 5, time_horizon: 1 }, - { time: 2.4, real: "๐Ÿคข", id: 5, time_horizon: 3 }, - { time: 2.4, real: "๐Ÿคข", id: 5, time_horizon: 5 }, - { time: 1, real: "๐Ÿคจ", id: 6, time_horizon: 1 }, - { time: 3, real: "๐Ÿคจ", id: 6, time_horizon: 3 }, - { time: 4.4, real: "๐Ÿคฌ", id: 6, time_horizon: 5 }, - { time: 0.4, real: "๐Ÿคข", id: 7, time_horizon: 1 }, - { time: 0.4, real: "๐Ÿคข", id: 7, time_horizon: 3 }, - { time: 0.4, real: "๐Ÿคข", id: 7, time_horizon: 5 }, - { time: 1, real: "๐Ÿคจ", id: 8, time_horizon: 1 }, - { time: 1.6, real: "๐Ÿ’€", id: 8, time_horizon: 3 }, - { time: 1.6, real: "๐Ÿ’€", id: 8, time_horizon: 5 }, - { time: 0.8, real: "๐Ÿคข", id: 9, time_horizon: 1 }, - { time: 0.8, real: "๐Ÿคข", id: 9, time_horizon: 3 }, - { time: 0.8, real: "๐Ÿคข", id: 9, time_horizon: 5 }, - { time: 1, real: "๐Ÿคจ", id: 10, time_horizon: 1 }, - { time: 2.9, real: "๐Ÿคข", id: 10, time_horizon: 3 }, - { time: 2.9, real: "๐Ÿคข", id: 10, time_horizon: 5 } -] - -filteredData = data.filter((d) => d.time_horizon == timeHorizon) - -viewof timeHorizon = Inputs.range([1, 5], { - step: 2, - value: 5, - label: "Time Horizon" -}) - -Plot.plot({ - x: { - domain: [0, 5] - }, - y: { - domain: [0, 11], - axis: false - }, - marks: [ - Plot.ruleX([timeHorizon], { - stroke: "#D9E8A3", - strokeWidth: 6, - strokeDasharray: "5,5", - y1: 0, - y2: 10 // Should match the y-domain max - }), - Plot.ruleY(filteredData, { - x: "time", - y: "id", - strokeWidth: 1.5 - }), - Plot.text(filteredData, { - x: "time", - y: "id", - text: "real", - tip: true, - fontSize: 30 - }) - ] -}) - -``` - - -# Declare Heuristics Regarding ambigious `reals` - -## โœ๏ธ Declare Heuristics Regarding Censored Events ๐Ÿ“…๐Ÿคฌ - -```{mermaid} - -graph LR - S0["Non Event
0 ๐Ÿคจ / ๐Ÿคฌ"] -->|"?"|S1["Primary Event
1 ๐Ÿคข"] - S0-->|"?"|S2["Competing Event
2 ๐Ÿ’€"] - - - classDef nonEvent fill:#E0E0E0,stroke:#333,stroke-width:1px,color:black - classDef primaryEvent fill:#808080,stroke:#333,stroke-width:1px,color:white - classDef competingEvent fill:#9DB4C0,stroke:#333,stroke-width:1px,color:black - - class S0 nonEvent - class S1 primaryEvent - class S2 competingEvent - class S3 censoredEvent - - linkStyle 0 stroke:#333,background:yellow - -``` - -The `censored_heuristic` argument is designed for the user to choose how interpret censored events. - -Performance Validation in the face of censored observations require assumptions regarding the unobserved followup. - -TODO: add link to nan-van-geloven article - -##### Exclude Censored Events - -```{mermaid} - -graph LR - S0["Non Event
0 ๐Ÿคจ"] -->S1["Primary Event
1 ๐Ÿคข"] - S0-->S2["Competing Event
2 ๐Ÿ’€"] - - S3["Censored
0 ๐Ÿคฌ"] - - - classDef nonEvent fill:#E0E0E0,stroke:#333,stroke-width:1px,color:black - classDef primaryEvent fill:#808080,stroke:#333,stroke-width:1px,color:white - classDef censoredEvent fill:#E3F09B,stroke:#333,stroke-width:1px,color:black - classDef competingEvent fill:#9DB4C0,stroke:#333,stroke-width:1px,color:black - - class S0 nonEvent - class S1 primaryEvent - class S2 competingEvent - class S3 censoredEvent - - linkStyle 0 stroke:#333,background:yellow - -``` - -All censored events to be excluded. - -Underlying Assumption: Small amount of censored events. -Violation of the assumption leads to: Overestimation of the observed outcomes. - -##### Adjust Censored as partially seen Non-Event - -Observed outcomes for each strata are estimated using the AJ-estimate (equivalent to CIF and KM): Each censored observation is assumed to be similar to the ones who weren't censored. - -TODO: Link to article - -Underlying Assumption: Independent Censoring. -Violation of the assumption leads to: Biased estimate for observed outcomes. - -# โœ๏ธ Declare Heuristics Regarding Competing Events ๐Ÿ“…๐Ÿ’€ - -The `competing_heuristic` argument is designed for the user to choose how interpret censored events. - -Performance Validation in the face of competing observations require assumptions regarding the unobserved followup. - -TODO: add link to nan-van-geloven article - -##### Exclude Competing Events - -```{mermaid} - -graph LR - subgraph adj[Adjusted for Censoring] - S0["Non Event
0 ๐Ÿคจ / ๐Ÿคฌ"] -->S1["Primary Event
1 ๐Ÿคข"] - end - S0 -->S2["Competing Event
2 ๐Ÿ’€"] - - - classDef nonEvent fill:#E0E0E0,stroke:#333,stroke-width:1px,color:black - classDef primaryEvent fill:#808080,stroke:#333,stroke-width:1px,color:white - classDef competingEvent fill:#9DB4C0,stroke:#333,stroke-width:1px,color:black - - class S0 nonEvent - class S1 primaryEvent - class S2 competingEvent - - linkStyle 0 stroke:#333 - - style adj fill:#E3F09B,color:black - - -``` - -All competing events to be excluded. - -Underlying Assumption: Small amount of competing events. -Violation of the assumption leads to: Overestimation of the observed outcomes. A competing event means that the primary event cannot happen. - -##### Adjust Competing Events as Censored (partially seen Non-Event) - -Check - -```{mermaid} - -graph LR - subgraph adj[Adjusted for Censoring] - direction LR - S0["Non Event
0 ๐Ÿคจ / ๐Ÿคฌ

Competing Event
2 ๐Ÿ’€"] -->S1["Primary Event
1 ๐Ÿคข"] - end - - - classDef nonEvent fill:#E0E0E0,stroke:#333,stroke-width:1px,color:black - classDef primaryEvent fill:#808080,stroke:#333,stroke-width:1px,color:white - classDef competingEvent fill:#9DB4C0,stroke:#333,stroke-width:1px,color:black - - class S0 nonEvent - class S1 primaryEvent - class S2 competingEvent - - style adj fill:#E3F09B,color:black - - - linkStyle 0 stroke:#333 - -``` - - -All competing events to be treated as censored. - -Underlying Assumption: We consider a patient experiencing a competing event equivalent to independent censoring. -Violation of the assumption leads to: Overestimation of the observed outcomes. A competing event means that the primary event cannot happen. - -##### Adjust Competing Events as Competing - -All competing events to be treated as Competing event to the primary event-of-interest. - -In a way, a patient experiencing a competing event is "more" of a "real-negative" than a conventional "real-negative". - -This is derived from the assumed state-covention - -Beyond the horizon time the following transition is possible: -`Real Neagtives` ๐Ÿคจ => `Real Positives` ๐Ÿคข -๐Ÿ’€ 2 - -```{mermaid} - -graph LR -subgraph adj[Adjusted for Censoring] - direction LR - S0["Non Event
0 ๐Ÿคจ"] -->S1["Primary Event
1 ๐Ÿคข"] - S0 -->S2["Competing Event
2 ๐Ÿ’€"] - end - - - classDef nonEvent fill:#E0E0E0,stroke:#333,stroke-width:1px,color:black - classDef primaryEvent fill:#808080,stroke:#333,stroke-width:1px,color:white - classDef competingEvent fill:#9DB4C0,stroke:#333,stroke-width:1px,color:black - - class S0 nonEvent - class S1 primaryEvent - class S2 competingEvent - - linkStyle 0 stroke:#333 - style adj fill:#E3F09B,color:black - -``` - -Underlying Assumption: We consider a patient experiencing a competing event as a definite non-event. -Violation of the assumption leads to Underestimation of the observed outcomes if a competing event can be considered as a different form of the primary event. - -# What rtichoke from now on? - -## Render Predictions Histogram - -### Extract AJ Estimate by Assumptions - -For each requried combination of reference_group x predictions_strata x fixed_time_horizons x censored_heuristic x competing_heuristic a separate AJ estimated is calculated for the adjusted `reals` and a Crude estimate is calculated for the excluded `reals`. - -The sum of the AJ estimates for each predictions_strata is equal to the overal AJ estimate. - diff --git a/docs/dcurves_example.py b/docs/dcurves_example.py new file mode 100644 index 0000000..e5c3a43 --- /dev/null +++ b/docs/dcurves_example.py @@ -0,0 +1,724 @@ +import marimo + +__generated_with = "0.14.7" +app = marimo.App(width="columns") + + +@app.cell(column=0) +def _(): + from dcurves import dca + import pandas as pd + import numpy as np + import lifelines + import plotly.express as px + import polars as pl + from rtichoke.helpers.sandbox_observable_helpers import ( + create_list_data_to_adjust, + create_adjusted_data, + create_aj_data_combinations, + cast_and_join_adjusted_data, + create_breaks_values, + ) + + df_time_to_cancer_dx = pd.read_csv( + "https://raw.githubusercontent.com/ddsjoberg/dca-tutorial/main/data/df_time_to_cancer_dx.csv" + ) + return ( + cast_and_join_adjusted_data, + create_adjusted_data, + create_aj_data_combinations, + create_breaks_values, + create_list_data_to_adjust, + dca, + df_time_to_cancer_dx, + lifelines, + np, + pl, + px, + ) + + +@app.cell +def _(df_time_to_cancer_dx, lifelines): + cph = lifelines.CoxPHFitter() + cph.fit( + df=df_time_to_cancer_dx, + duration_col="ttcancer", + event_col="cancer", + formula="age + famhistory + marker", + ) + + cph_pred_vals = cph.predict_survival_function( + df_time_to_cancer_dx[["age", "famhistory", "marker"]], times=[1.5] + ) + + df_time_to_cancer_dx["pr_failure18"] = [1 - val for val in cph_pred_vals.iloc[0, :]] + return + + +@app.cell +def _(df_time_to_cancer_dx): + (df_time_to_cancer_dx["pr_failure18"] >= 0.5).sum() + return + + +@app.cell +def _(df_time_to_cancer_dx): + df_time_to_cancer_dx + return + + +@app.cell +def _(): + outcome = "cancer" + time_to_outcome_col = "ttcancer" + prevalence = None + time = 1.5 + return outcome, prevalence, time, time_to_outcome_col + + +@app.cell +def _(df_time_to_cancer_dx): + (df_time_to_cancer_dx["pr_failure18"] >= 0.5).sum() + return + + +@app.cell +def _(df_time_to_cancer_dx, outcome, time_to_outcome_col): + from dcurves.risks import _create_risks_df + + risks_df = _create_risks_df( + data=df_time_to_cancer_dx, + outcome=outcome, + time=1.5, + time_to_outcome_col=time_to_outcome_col, + ) + + risks_df + return (risks_df,) + + +@app.cell +def _(risks_df): + risks_df["pr_failure18"].hist() + return + + +@app.cell +def _(risks_df): + (risks_df["pr_failure18"] >= 0.5).sum() + return + + +@app.cell +def _(df_time_to_cancer_dx, risks_df): + import plotly.graph_objects as go + + x = risks_df["pr_failure18"] + y = df_time_to_cancer_dx["pr_failure18"] + cancer = risks_df["cancer"] + + fig_test = go.Figure() + + # Cancer = 0 (circle) + fig_test.add_trace( + go.Scatter( + x=x[cancer == 0], + y=y[cancer == 0], + mode="markers", + marker=dict(symbol="circle", size=8, opacity=0.6), + name="Cancer = 0", + ) + ) + + # Cancer = 1 (square) + fig_test.add_trace( + go.Scatter( + x=x[cancer == 1], + y=y[cancer == 1], + mode="markers", + marker=dict(symbol="square", size=8, opacity=0.6), + name="Cancer = 1", + ) + ) + + fig_test.update_layout( + title="Comparison of pr_failure18 across DataFrames", + xaxis_title="risks_df['pr_failure18']", + yaxis_title="df_time_to_cancer_dx['pr_failure18']", + template="plotly_white", + ) + + fig_test.show() + return + + +@app.cell +def _(risks_df): + from dcurves.risks import _rectify_model_risk_boundaries + + modelnames = ["pr_failure18"] + + rectified_risks_df = _rectify_model_risk_boundaries( + risks_df=risks_df, modelnames=modelnames + ) + + rectified_risks_df + return modelnames, rectified_risks_df + + +@app.cell +def _(outcome, prevalence, rectified_risks_df, time, time_to_outcome_col): + from dcurves.prevalence import _calc_prevalence + + prevalence_value = _calc_prevalence( + risks_df=rectified_risks_df, + outcome=outcome, + prevalence=prevalence, + time=time, + time_to_outcome_col=time_to_outcome_col, + ) + + prevalence_value + return (prevalence_value,) + + +@app.cell +def _(modelnames, np, prevalence_value, rectified_risks_df): + from dcurves.dca import _create_initial_df + + thresholds = np.arange(0, 1, 0.5) + + initial_df = _create_initial_df( + thresholds=thresholds, + modelnames=modelnames, + input_df_rownum=len(rectified_risks_df.index), + prevalence_value=prevalence_value, + ) + + initial_df + return initial_df, thresholds + + +@app.cell +def _(outcome, risks_df, thresholds, time, time_to_outcome_col): + from dcurves.dca import _calc_risk_rate_among_test_pos + + risk_rate_among_test_pos = _calc_risk_rate_among_test_pos( + risks_df=risks_df, + outcome=outcome, + model="pr_failure18", + thresholds=thresholds, + time_to_outcome_col=time_to_outcome_col, + time=time, + ) + + risk_rate_among_test_pos + return + + +@app.cell +def _( + outcome, + prevalence_value, + risks_df, + thresholds, + time, + time_to_outcome_col, +): + from dcurves.dca import _calc_test_pos_rate, _calc_tp_rate + + test_pos_rate = _calc_test_pos_rate( + risks_df=risks_df, thresholds=thresholds, model="pr_failure18" + ) + + print("test positive rate:", test_pos_rate) + + tp_rate = _calc_tp_rate( + risks_df=risks_df, + thresholds=thresholds, + model="pr_failure18", + outcome=outcome, + time=time, + time_to_outcome_col=time_to_outcome_col, + test_pos_rate=test_pos_rate, + prevalence_value=prevalence_value, + ) + + print("true positive rate:", tp_rate) + return + + +@app.cell +def _( + initial_df, + outcome, + prevalence_value, + rectified_risks_df, + thresholds, + time, + time_to_outcome_col, +): + from dcurves.dca import _calc_initial_stats + + initial_stats_df = _calc_initial_stats( + initial_df=initial_df, + risks_df=rectified_risks_df, + thresholds=thresholds, + outcome=outcome, + prevalence_value=prevalence_value, + time=time, + time_to_outcome_col=time_to_outcome_col, + ) + + initial_stats_df + return + + +@app.cell +def _(rectified_risks_df): + rectified_risks_df + return + + +@app.cell +def _(df_time_to_cancer_dx): + probs_dict = {"full": df_time_to_cancer_dx["pr_failure18"]} + + reals_mapping = { + "censor": 0, + "diagnosed with cancer": 1, + "dead other causes": 2, + } + + reals_dict = df_time_to_cancer_dx["cancer_cr"].map(reals_mapping) + + times_dict = df_time_to_cancer_dx["ttcancer"] + + df_time_to_cancer_dx["cancer_enum"] = reals_dict + + df_time_to_cancer_dx + return probs_dict, reals_dict, times_dict + + +@app.cell +def _(dca, df_time_to_cancer_dx, np): + stdca_coxph_results_composite = dca( + data=df_time_to_cancer_dx, + outcome="cancer_enum", + modelnames=["pr_failure18"], + # thresholds=np.arange(0, 0.51, 0.1), + # thresholds=np.arange(0.5, 1, 0.1), + thresholds=np.arange(0, 1, 0.5), + time=1.5, + time_to_outcome_col="ttcancer", + ) + + stdca_coxph_results_composite + return + + +@app.cell +def _(create_aj_data_combinations, create_breaks_values, probs_dict): + stratified_by = ["probability_threshold"] + # stratified_by = ["probability_threshold"] + # stratified_by = ["ppcr"] + # stratified_by = ["probability_threshold"] + + by = 0.01 + breaks = create_breaks_values(None, "probability_threshold", by) + + heuristics_sets = [ + { + "censoring_heuristic": "adjusted", + "competing_heuristic": "adjusted_as_negative", + }, + { + "censoring_heuristic": "adjusted", + "competing_heuristic": "adjusted_as_censored", + }, + { + "censoring_heuristic": "adjusted", + "competing_heuristic": "adjusted_as_composite", + }, + ] + + aj_data_combinations = create_aj_data_combinations( + list(probs_dict.keys()), + heuristics_sets, + fixed_time_horizons=[1.5], + stratified_by=stratified_by, + by=by, + breaks=breaks, + ) + + aj_data_combinations + return aj_data_combinations, breaks, by, heuristics_sets, stratified_by + + +@app.cell +def _( + aj_data_combinations, + by, + create_list_data_to_adjust, + probs_dict, + reals_dict, + stratified_by, + times_dict, +): + list_data_to_adjust_polars = create_list_data_to_adjust( + aj_data_combinations, + probs_dict, + reals_dict, + times_dict, + stratified_by=stratified_by, + by=by, + ) + list_data_to_adjust_polars + return (list_data_to_adjust_polars,) + + +@app.cell +def _( + breaks, + create_adjusted_data, + heuristics_sets, + list_data_to_adjust_polars, + stratified_by, +): + adjusted_data = create_adjusted_data( + list_data_to_adjust_polars, + heuristics_sets=heuristics_sets, + fixed_time_horizons=[1.5], + breaks=breaks, + stratified_by=stratified_by, + # risk_set_scope=["within_stratum"]#, # , , + # risk_set_scope=["pooled_by_cutoff"], # , # , , + risk_set_scope=["pooled_by_cutoff", "within_stratum"], # , , + ) + + adjusted_data + return (adjusted_data,) + + +@app.cell +def _(adjusted_data, aj_data_combinations, cast_and_join_adjusted_data): + final_adjusted_data_polars = cast_and_join_adjusted_data( + aj_data_combinations, adjusted_data + ) + + final_adjusted_data_polars + return (final_adjusted_data_polars,) + + +@app.cell +def _(final_adjusted_data_polars): + final_adjusted_data_polars + return + + +@app.cell +def _(final_adjusted_data_polars): + from rtichoke.helpers.sandbox_observable_helpers import ( + _calculate_cumulative_aj_data, + ) + + cumulative_aj_data = _calculate_cumulative_aj_data(final_adjusted_data_polars) + + cumulative_aj_data + return (cumulative_aj_data,) + + +@app.cell +def _(): + return + + +@app.cell +def _(cumulative_aj_data): + from rtichoke.helpers.sandbox_observable_helpers import ( + _turn_cumulative_aj_to_performance_data, + ) + + performance_data = _turn_cumulative_aj_to_performance_data(cumulative_aj_data) + + performance_data + return (performance_data,) + + +@app.cell +def _(performance_data): + from rtichoke.discrimination.gains import plot_gains_curve + + plot_gains_curve(performance_data) + return + + +@app.cell +def _(performance_data, pl): + performance_data_with_nb_calculated = ( + performance_data.with_columns( + ( + (pl.col("true_positives") / pl.col("n")) + - (pl.col("false_positives") / pl.col("n")) + * pl.col("chosen_cutoff") + / (1 - pl.col("chosen_cutoff")) + ).alias("net_benefit") + ) + .filter( + pl.col("censoring_heuristic") == "adjusted", + pl.col("competing_heuristic") == "adjusted_as_censored", + ) + .sort(pl.col("chosen_cutoff")) + ) + + performance_data_with_nb_calculated + return + + +@app.cell +def _(dca, df_time_to_cancer_dx, np): + stdca_coxph_results = dca( + data=df_time_to_cancer_dx, + outcome="cancer", + modelnames=["pr_failure18"], + thresholds=np.arange(0, 0.51, 0.01), + time=1.5, + time_to_outcome_col="ttcancer", + ) + + stdca_coxph_results + return (stdca_coxph_results,) + + +@app.cell +def _(px, stdca_coxph_results): + # Create plotly express figure + fig = px.line( + stdca_coxph_results, + x="threshold", + y="net_benefit", + color="model", + markers=True, + title="Decision Curve Analysis", + labels={ + "threshold": "Threshold Probability", + "net_benefit": "Net Benefit", + }, + ) + + # Update layout to match rtichoke look + fig.update_layout( + template="simple_white", + title_font_size=20, + title_x=0.5, + legend_title_text="", + legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1), + margin=dict(l=40, r=40, t=60, b=40), + xaxis=dict(range=[-0.01, 0.23], showgrid=False, tickmode="linear", dtick=0.05), + yaxis=dict( + range=[-0.01, 0.23], + showgrid=False, + zeroline=True, + zerolinewidth=1, + zerolinecolor="gray", + ), + ) + + fig.show() + return + + +@app.cell(column=1, hide_code=True) +def _(): + import marimo as mo + + fill_color_radio = mo.ui.radio( + options=["classification_outcome", "reals_labels"], + value="classification_outcome", + label="Fill Colors", + ) + + fill_color_radio + return fill_color_radio, mo + + +@app.cell(hide_code=True) +def _(mo): + risk_set_scope_radio = mo.ui.radio( + options=["pooled_by_cutoff", "within_stratum"], + value="pooled_by_cutoff", + label="Risk Set Scope", + ) + + risk_set_scope_radio + return (risk_set_scope_radio,) + + +@app.cell(hide_code=True) +def _(mo): + stratified_by_radio = mo.ui.radio( + options=["probability_threshold"], + value="probability_threshold", + label="Stratified By", + ) + + stratified_by_radio + return (stratified_by_radio,) + + +@app.cell(hide_code=True) +def _(mo): + censoring_heuristic_radio = mo.ui.radio( + options=["adjusted"], + value="adjusted", + label="Censoring Heuristic", + ) + + censoring_heuristic_radio + return (censoring_heuristic_radio,) + + +@app.cell(hide_code=True) +def _(mo): + competing_heuristic_radio = mo.ui.radio( + options=[ + "adjusted_as_negative", + "adjusted_as_censored", + "adjusted_as_composite", + ], + value="adjusted_as_negative", + label="Competing Heuristic", + ) + + competing_heuristic_radio + return (competing_heuristic_radio,) + + +@app.cell(hide_code=True) +def _(by, mo): + slider_cutoff = mo.ui.slider(start=0, stop=1, step=by, label="Cutoff") + slider_cutoff + return (slider_cutoff,) + + +@app.cell(column=2, hide_code=True) +def _( + by, + censoring_heuristic_radio, + competing_heuristic_radio, + fill_color_radio, + final_adjusted_data_polars, + pl, + px, + risk_set_scope_radio, + slider_cutoff, + stratified_by_radio, +): + chosen_cutoff_data = final_adjusted_data_polars.filter( + pl.col("censoring_heuristic") == censoring_heuristic_radio.value, + pl.col("competing_heuristic") == competing_heuristic_radio.value, + pl.col("chosen_cutoff") == slider_cutoff.value, + pl.col("risk_set_scope") == risk_set_scope_radio.value, + pl.col("stratified_by") == stratified_by_radio.value, + ).sort(pl.col("strata")) + + color_discrete_map = { + "real_positives": "#4C5454", + "real_competing": "#C880B7", + "real_negatives": "#E0E0E0", + "real_censored": "#E3F09B", + "true_negatives": "#009e73", + "true_positives": "#009e73", + "false_negatives": "#FAC8CD", + "false_positives": "#FAC8CD", + } + + fig_new = px.bar( + chosen_cutoff_data, + x="mid_point", + y="reals_estimate", + color=fill_color_radio.value, + color_discrete_map=color_discrete_map, + # color="reals_labels", + # color_discrete_map=color_discrete_map, + category_orders={ + "reals_labels": list(color_discrete_map.keys()) + }, # fixes domain order + hover_data=chosen_cutoff_data.columns, # like tip: true + ) + + fig_new.update_layout( + barmode="stack", # stacked bars (use "group" for side-by-side) + plot_bgcolor="rgba(0,0,0,0)", # transparent background + paper_bgcolor="rgba(0,0,0,0)", + legend=dict(title=""), + ) + + if stratified_by_radio.value == "probability_threshold": + vertical_line = slider_cutoff.value + else: + vertical_line = 1 - slider_cutoff.value + by / 2 + + fig_new.add_vline( + x=vertical_line, + line=dict(color="red", width=2, dash="dash"), + annotation_text=f"Cutoff: {slider_cutoff.value}", + annotation_position="top right", + ) + + fig_new + return + + +@app.cell(hide_code=True) +def _( + censoring_heuristic_radio, + competing_heuristic_radio, + performance_data, + pl, + px, + stratified_by_radio, +): + chosen_performance_data = performance_data.filter( + pl.col("censoring_heuristic") == censoring_heuristic_radio.value, + pl.col("competing_heuristic") == competing_heuristic_radio.value, + pl.col("stratified_by") == stratified_by_radio.value, + ).sort(pl.col("chosen_cutoff")) + + # Create plotly express figure + fig_rtichoke = px.line( + chosen_performance_data, + x="chosen_cutoff", + y="net_benefit", + markers=True, + title="Decision Curve Analysis", + labels={ + "threshold": "Threshold Probability", + "net_benefit": "Net Benefit", + }, + ) + + # Update layout to match rtichoke look + fig_rtichoke.update_layout( + template="simple_white", + title_font_size=20, + title_x=0.5, + legend_title_text="", + legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1), + margin=dict(l=40, r=40, t=60, b=40), + xaxis=dict(showgrid=False, tickmode="linear", dtick=0.05), + yaxis=dict( + showgrid=False, + zeroline=True, + zerolinewidth=1, + zerolinecolor="gray", + ), + ) + + fig_rtichoke.show() + return + + +if __name__ == "__main__": + app.run() diff --git a/docs/fonts/Fraunces9pt-Light.woff2 b/docs/fonts/Fraunces9pt-Light.woff2 new file mode 100644 index 0000000..6e75fb9 Binary files /dev/null and b/docs/fonts/Fraunces9pt-Light.woff2 differ diff --git a/docs/render_summary.qmd b/docs/render_summary.qmd deleted file mode 100644 index 4e2be27..0000000 --- a/docs/render_summary.qmd +++ /dev/null @@ -1,42 +0,0 @@ ---- -title: "Summary Report" -format: html -author: "Your Name" -date: "`r Sys.Date()`" ---- - -# Introduction - -This is a summary report generated with Quarto. - - -```{python} - -# import pickle - - -# with open(r'C:\Users\I\Documents\GitHub\rtichoke_python\probs_dict.pkl', 'rb') as file: -# probs_dict = pickle.load(file) - -# with open(r'C:\Users\I\Documents\GitHub\rtichoke_python\reals_dict.pkl', 'rb') as file: -# reals_dict = pickle.load(file) - -# with open(r'C:\Users\I\Documents\GitHub\rtichoke_python\times_dict.pkl', 'rb') as file: -# times_dict = pickle.load(file) - -from rtichoke.summary_report.summary_report import * - -# create_data_for_summary_report() - -``` - -```{python} -from rtichoke.summary_report.summary_report import * - -# render_summary_report( ) -``` - - -## Section 2 - -Add more content or code here. diff --git a/docs/small_data_example.py b/docs/small_data_example.py new file mode 100644 index 0000000..50573a1 --- /dev/null +++ b/docs/small_data_example.py @@ -0,0 +1,350 @@ +import marimo + +__generated_with = "0.14.7" +app = marimo.App(width="columns") + + +@app.cell(column=0) +def _(): + import numpy as np + import polars as pl + import plotly.express as px + + from rtichoke.helpers.sandbox_observable_helpers import ( + create_breaks_values, + create_list_data_to_adjust, + create_adjusted_data, + create_aj_data_combinations, + cast_and_join_adjusted_data, + ) + + return ( + cast_and_join_adjusted_data, + create_adjusted_data, + create_aj_data_combinations, + create_breaks_values, + create_list_data_to_adjust, + np, + pl, + px, + ) + + +@app.cell +def _(np, pl): + probs_test = { + "small_data_set": np.array( + [0.9, 0.85, 0.95, 0.88, 0.6, 0.7, 0.51, 0.2, 0.1, 0.33] + ) + } + reals_dict_test = [1, 1, 1, 1, 0, 2, 1, 2, 0, 1] + times_dict_test = [24.1, 9.7, 49.9, 18.6, 34.8, 14.2, 39.2, 46.0, 31.5, 4.3] + + data_to_adjust = pl.DataFrame( + { + "strata": np.repeat("small_data_test", 10), + # "probs": probs_test["test_data"], + "reals": reals_dict_test, + "times": times_dict_test, + } + ) + + data_to_adjust + return probs_test, reals_dict_test, times_dict_test + + +@app.cell +def _(create_aj_data_combinations, create_breaks_values): + by = 0.2 + breaks = create_breaks_values(None, "probability_threshold", by) + stratified_by = ["probability_threshold", "ppcr"] + # stratified_by = ["probability_threshold"] + + # stratified_by = ["ppcr"] + + heuristics_sets = [ + { + "censoring_heuristic": "excluded", + "competing_heuristic": "adjusted_as_negative", + }, + { + "censoring_heuristic": "excluded", + "competing_heuristic": "adjusted_as_composite", + }, + { + "censoring_heuristic": "excluded", + "competing_heuristic": "adjusted_as_censored", + }, + { + "censoring_heuristic": "adjusted", + "competing_heuristic": "adjusted_as_negative", + }, + { + "censoring_heuristic": "adjusted", + "competing_heuristic": "adjusted_as_censored", + }, + { + "censoring_heuristic": "adjusted", + "competing_heuristic": "adjusted_as_composite", + }, + { + "censoring_heuristic": "excluded", + "competing_heuristic": "excluded", + }, + { + "censoring_heuristic": "adjusted", + "competing_heuristic": "excluded", + }, + ] + + aj_data_combinations = create_aj_data_combinations( + ["small_data_set"], + heuristics_sets=heuristics_sets, + fixed_time_horizons=[10.0, 20.0, 30.0, 40.0, 50.0], + stratified_by=stratified_by, + by=by, + breaks=breaks, + risk_set_scope=["pooled_by_cutoff", "within_stratum"], + ) + + # aj_data_combinations + + aj_data_combinations + return aj_data_combinations, breaks, by, heuristics_sets, stratified_by + + +@app.cell +def _( + aj_data_combinations, + by, + create_list_data_to_adjust, + probs_test, + reals_dict_test, + stratified_by, + times_dict_test, +): + list_data_to_adjust_polars_probability_threshold = create_list_data_to_adjust( + aj_data_combinations, + probs_test, + reals_dict_test, + times_dict_test, + stratified_by=stratified_by, + by=by, + ) + + list_data_to_adjust_polars_probability_threshold + return (list_data_to_adjust_polars_probability_threshold,) + + +@app.cell +def _( + breaks, + create_adjusted_data, + heuristics_sets, + list_data_to_adjust_polars_probability_threshold, + stratified_by, +): + adjusted_data = create_adjusted_data( + list_data_to_adjust_polars_probability_threshold, + heuristics_sets=heuristics_sets, + fixed_time_horizons=[10.0, 20.0, 30.0, 40.0, 50.0], + breaks=breaks, + stratified_by=stratified_by, + # risk_set_scope = ["pooled_by_cutoff"] + risk_set_scope=["pooled_by_cutoff", "within_stratum"], + ) + + adjusted_data + return (adjusted_data,) + + +@app.cell +def _(adjusted_data, aj_data_combinations, cast_and_join_adjusted_data): + final_adjusted_data_polars = cast_and_join_adjusted_data( + aj_data_combinations, adjusted_data + ) + + final_adjusted_data_polars + return (final_adjusted_data_polars,) + + +@app.cell +def _(final_adjusted_data_polars): + from rtichoke.helpers.sandbox_observable_helpers import ( + _calculate_cumulative_aj_data, + ) + + cumulative_aj_data = _calculate_cumulative_aj_data(final_adjusted_data_polars) + + cumulative_aj_data + return (cumulative_aj_data,) + + +@app.cell +def _(cumulative_aj_data): + from rtichoke.helpers.sandbox_observable_helpers import ( + _turn_cumulative_aj_to_performance_data, + ) + + performance_data = _turn_cumulative_aj_to_performance_data(cumulative_aj_data) + + performance_data + return + + +@app.cell(column=1, hide_code=True) +def _(mo): + fill_color_radio = mo.ui.radio( + options=["classification_outcome", "reals_labels"], + value="classification_outcome", + label="Fill Colors", + ) + + fill_color_radio + return (fill_color_radio,) + + +@app.cell(hide_code=True) +def _(mo): + risk_set_scope_radio = mo.ui.radio( + options=["pooled_by_cutoff", "within_stratum"], + value="pooled_by_cutoff", + label="Risk Set Scope", + ) + + risk_set_scope_radio + return (risk_set_scope_radio,) + + +@app.cell(hide_code=True) +def _(mo): + stratified_by_radio = mo.ui.radio( + options=["probability_threshold", "ppcr"], + value="probability_threshold", + label="Stratified By", + ) + + stratified_by_radio + return (stratified_by_radio,) + + +@app.cell(hide_code=True) +def _(by): + import marimo as mo + + slider_cutoff = mo.ui.slider(start=0, stop=1, step=by, label="Cutoff") + slider_cutoff + return mo, slider_cutoff + + +@app.cell(hide_code=True) +def _(mo): + fixed_time_horizons_slider = mo.ui.slider( + start=10, stop=50, step=10, label="Fixed Time Horizon" + ) + fixed_time_horizons_slider + return (fixed_time_horizons_slider,) + + +@app.cell(hide_code=True) +def _(mo): + censoring_heuristic_radio = mo.ui.radio( + options=["adjusted", "excluded"], + value="adjusted", + label="Censoring Heuristic", + ) + + censoring_heuristic_radio + return (censoring_heuristic_radio,) + + +@app.cell(hide_code=True) +def _(mo): + competing_heuristic_radio = mo.ui.radio( + options=[ + "adjusted_as_negative", + "adjusted_as_censored", + "adjusted_as_composite", + "excluded", + ], + value="adjusted_as_negative", + label="Censoring Heuristic", + ) + + competing_heuristic_radio + return (competing_heuristic_radio,) + + +@app.cell(column=2, hide_code=True) +def _( + by, + censoring_heuristic_radio, + competing_heuristic_radio, + fill_color_radio, + final_adjusted_data_polars, + fixed_time_horizons_slider, + pl, + px, + risk_set_scope_radio, + slider_cutoff, + stratified_by_radio, +): + chosen_cutoff_data = final_adjusted_data_polars.filter( + pl.col("censoring_heuristic") == censoring_heuristic_radio.value, + pl.col("competing_heuristic") == competing_heuristic_radio.value, + pl.col("chosen_cutoff") == slider_cutoff.value, + pl.col("fixed_time_horizon") == fixed_time_horizons_slider.value, + pl.col("risk_set_scope") == risk_set_scope_radio.value, + pl.col("stratified_by") == stratified_by_radio.value, + ).sort(pl.col("strata")) + + color_discrete_map = { + "real_positives": "#4C5454", + "real_competing": "#C880B7", + "real_negatives": "#E0E0E0", + "real_censored": "#E3F09B", + "true_negatives": "#009e73", + "true_positives": "#009e73", + "false_negatives": "#FAC8CD", + "false_positives": "#FAC8CD", + } + + fig_new = px.bar( + chosen_cutoff_data, + x="mid_point", + y="reals_estimate", + color=fill_color_radio.value, + color_discrete_map=color_discrete_map, + # color="reals_labels", + # color_discrete_map=color_discrete_map, + category_orders={ + "reals_labels": list(color_discrete_map.keys()) + }, # fixes domain order + hover_data=chosen_cutoff_data.columns, # like tip: true + ) + + fig_new.update_layout( + barmode="stack", # stacked bars (use "group" for side-by-side) + plot_bgcolor="rgba(0,0,0,0)", # transparent background + paper_bgcolor="rgba(0,0,0,0)", + legend=dict(title=""), + ) + + if stratified_by_radio.value == "probability_threshold": + vertical_line = slider_cutoff.value + else: + vertical_line = 1 - slider_cutoff.value + by / 2 + + fig_new.add_vline( + x=vertical_line, + line=dict(color="red", width=2, dash="dash"), + annotation_text=f"Cutoff: {slider_cutoff.value}", + annotation_position="top right", + ) + + fig_new + return + + +if __name__ == "__main__": + app.run() diff --git a/docs/walkthrough_aj_estimate.py b/docs/walkthrough_aj_estimate.py index a950b4d..dc70c32 100644 --- a/docs/walkthrough_aj_estimate.py +++ b/docs/walkthrough_aj_estimate.py @@ -1,50 +1,104 @@ import marimo __generated_with = "0.14.7" -app = marimo.App(width="medium") +app = marimo.App(width="columns") -@app.cell +@app.cell(column=0) def _(): - return + import polars as pl + import pandas as pd + import plotly.express as px + from lifelines import CoxPHFitter, WeibullAFTFitter + + df_time_to_cancer_dx = pd.read_csv( + "https://raw.githubusercontent.com/ddsjoberg/dca-tutorial/main/data/df_time_to_cancer_dx.csv" + ) + return CoxPHFitter, WeibullAFTFitter, df_time_to_cancer_dx, pl, px @app.cell -def _(mo): - mo.md(r"""## Import data and Packages""") - return +def _(CoxPHFitter, WeibullAFTFitter, df_time_to_cancer_dx): + cph = CoxPHFitter() + thin_model = CoxPHFitter() + aft_model = WeibullAFTFitter() + + cox_formula = "age + famhistory + marker" + thin_formula = "age + marker" + aft_formula = "age + marker" + + cph.fit( + df_time_to_cancer_dx, + duration_col="ttcancer", + event_col="cancer", + formula=cox_formula, + ) + thin_model.fit( + df_time_to_cancer_dx, + duration_col="ttcancer", + event_col="cancer", + formula=thin_formula, + ) -@app.cell -def _(): - from lifelines import AalenJohansenFitter - import numpy as np - from itertools import product - import itertools - from lifelines import CoxPHFitter - from lifelines import WeibullAFTFitter - import polars as pl + aft_model.fit( + df_time_to_cancer_dx, + duration_col="ttcancer", + event_col="cancer", + formula=aft_formula, + ) - print("Polars version:", pl.__version__) + cph_pred_vals = ( + ( + 1 + - cph.predict_survival_function( + df_time_to_cancer_dx[["age", "famhistory", "marker"]], times=[1.5] + ) + ) + .iloc[0, :] + .values + ) - import pandas as pd - import pickle + thin_pred_vals = ( + ( + 1 + - thin_model.predict_survival_function( + df_time_to_cancer_dx[["age", "famhistory", "marker"]], times=[1.5] + ) + ) + .iloc[0, :] + .values + ) + + aft_pred_vals = ( + ( + 1 + - aft_model.predict_survival_function( + df_time_to_cancer_dx[["age", "famhistory", "marker"]], times=[1.5] + ) + ) + .iloc[0, :] + .values + ) - with open( - r"C:\Users\I\Documents\GitHub\rtichoke_python\probs_dict.pkl", "rb" - ) as file: - probs_dict = pickle.load(file) + print(type(cph_pred_vals)) - with open( - r"C:\Users\I\Documents\GitHub\rtichoke_python\reals_dict.pkl", "rb" - ) as file: - reals_dict = pickle.load(file) + probs_dict = { + "full": cph_pred_vals, + "thin": thin_pred_vals, + "aft": aft_pred_vals, + } - with open( - r"C:\Users\I\Documents\GitHub\rtichoke_python\times_dict.pkl", "rb" - ) as file: - times_dict = pickle.load(file) - return pl, probs_dict, reals_dict, times_dict + reals_mapping = { + "censor": 0, + "diagnosed with cancer": 1, + "dead other causes": 2, + } + + reals_dict = df_time_to_cancer_dx["cancer_cr"].map(reals_mapping) + + times_dict = df_time_to_cancer_dx["ttcancer"] + return probs_dict, reals_dict, times_dict @app.cell @@ -57,21 +111,66 @@ def _(): @app.cell def _(probs_dict): from rtichoke.helpers.sandbox_observable_helpers import ( - create_aj_data_combinations_polars, - extract_aj_estimate_for_strata, - create_aj_data_polars, + create_aj_data_combinations, + create_breaks_values, ) - fixed_time_horizons = [1.0, 3.0, 5.0] stratified_by = ["probability_threshold", "ppcr"] + + # stratified_by = ["probability_threshold"] + # stratified_by = ["ppcr"] + by = 0.1 + breaks = create_breaks_values(None, "probability_threshold", by) + # fixed_time_horizons = [1.0, 1.5, 3.0, 5.0] + fixed_time_horizons = [1.0, 3.0, 5.0] + stratified_by = stratified_by + + heuristics_sets = [ + { + "censoring_heuristic": "adjusted", + "competing_heuristic": "adjusted_as_negative", + }, + { + "censoring_heuristic": "excluded", + "competing_heuristic": "adjusted_as_negative", + }, + # { + # "censoring_assumption": "adjusted", + # "competing_assumption": "adjusted_as_censored", + # }, + # { + # "censoring_assumption": "excluded", + # "competing_assumption": "adjusted_as_censored", + # }, + # {"censoring_assumption": "adjusted", "competing_assumption": "excluded"}, + # {"censoring_assumption": "excluded", "competing_assumption": "excluded"}, + ] + + aj_data_combinations = create_aj_data_combinations( + list(probs_dict.keys()), + heuristics_sets, + fixed_time_horizons, + stratified_by, + by, + breaks, + ) - aj_data_combinations = create_aj_data_combinations_polars( - list(probs_dict.keys()), fixed_time_horizons, stratified_by, by + aj_data_combinations + return ( + aj_data_combinations, + breaks, + by, + fixed_time_horizons, + heuristics_sets, + stratified_by, ) - print(aj_data_combinations["strata"]) - return by, create_aj_data_polars, fixed_time_horizons, stratified_by + +@app.cell +def _(aj_data_combinations): + aj_data_combinations + return @app.cell @@ -81,98 +180,261 @@ def _(mo): @app.cell -def _(by, probs_dict, reals_dict, stratified_by, times_dict): +def _( + aj_data_combinations, + by, + probs_dict, + reals_dict, + stratified_by, + times_dict, +): from rtichoke.helpers.sandbox_observable_helpers import ( - create_list_data_to_adjust_polars, + create_list_data_to_adjust, + create_adjusted_data, + cast_and_join_adjusted_data, ) - list_data_to_adjust_polars = create_list_data_to_adjust_polars( - probs_dict, reals_dict, times_dict, stratified_by=stratified_by, by=by + list_data_to_adjust_polars = create_list_data_to_adjust( + aj_data_combinations, + probs_dict, + reals_dict, + times_dict, + stratified_by=stratified_by, + by=by, ) list_data_to_adjust_polars - return (list_data_to_adjust_polars,) + return ( + cast_and_join_adjusted_data, + create_adjusted_data, + list_data_to_adjust_polars, + ) @app.cell -def _(mo): - mo.md(r"""## create adjusted data list polars""") - return +def _( + breaks, + create_adjusted_data, + fixed_time_horizons, + heuristics_sets, + list_data_to_adjust_polars, + stratified_by, +): + adjusted_data = create_adjusted_data( + list_data_to_adjust_polars, + heuristics_sets=heuristics_sets, + fixed_time_horizons=fixed_time_horizons, + breaks=breaks, + stratified_by=stratified_by, + # stratified_by=["probability_threshold", "ppcr"] + risk_set_scope=["pooled_by_cutoff", "within_stratum"], + ) + + adjusted_data + return (adjusted_data,) @app.cell -def _(list_data_to_adjust_polars, pl): - example_polars_df = list_data_to_adjust_polars.get("full").select( - pl.col("strata"), pl.col("reals"), pl.col("times") +def _(adjusted_data, aj_data_combinations, cast_and_join_adjusted_data): + final_adjusted_data_polars = cast_and_join_adjusted_data( + aj_data_combinations, adjusted_data ) - example_polars_df - return (example_polars_df,) + final_adjusted_data_polars + return (final_adjusted_data_polars,) @app.cell -def _(mo): - mo.md(r"""## Create AJ estimates Data""") +def _(final_adjusted_data_polars): + final_adjusted_data_polars return @app.cell -def _(mo): - mo.md(r"""## Create aj_data""") - return +def _(final_adjusted_data_polars): + from rtichoke.helpers.sandbox_observable_helpers import ( + _calculate_cumulative_aj_data, + ) + + cumulative_aj_data = _calculate_cumulative_aj_data(final_adjusted_data_polars) + + cumulative_aj_data + return (cumulative_aj_data,) @app.cell -def _(create_aj_data_polars, example_polars_df, fixed_time_horizons, pl): - aj_estimates_per_strata_adj_adjneg = create_aj_data_polars( - example_polars_df, "adjusted", "adjusted_as_negative", fixed_time_horizons +def _(cumulative_aj_data): + from rtichoke.helpers.sandbox_observable_helpers import ( + _turn_cumulative_aj_to_performance_data, ) - aj_estimates_per_strata_excl_adjneg = create_aj_data_polars( - example_polars_df, "excluded", "adjusted_as_negative", fixed_time_horizons + performance_data = _turn_cumulative_aj_to_performance_data(cumulative_aj_data) + + performance_data + return + + +@app.cell(column=1, hide_code=True) +def _(mo): + reference_group_radio = mo.ui.radio( + options=["full", "thin", "aft"], value="full", label="Model" ) - aj_estimates_per_strata_adj_adjcens = create_aj_data_polars( - example_polars_df, "adjusted", "adjusted_as_censored", fixed_time_horizons + reference_group_radio + return (reference_group_radio,) + + +@app.cell(hide_code=True) +def _(mo): + fill_color_radio = mo.ui.radio( + options=["classification_outcome", "reals_labels"], + value="classification_outcome", + label="Fill Colors", ) - aj_estimates_per_strata_excl_adjcens = create_aj_data_polars( - example_polars_df, "excluded", "adjusted_as_censored", fixed_time_horizons + fill_color_radio + return (fill_color_radio,) + + +@app.cell(hide_code=True) +def _(mo): + risk_set_scope_radio = mo.ui.radio( + options=["pooled_by_cutoff", "within_stratum"], + value="pooled_by_cutoff", + label="Risk Set Scope", ) - aj_estimates_per_strata_adj_excl = create_aj_data_polars( - example_polars_df, "adjusted", "excluded", fixed_time_horizons + risk_set_scope_radio + return (risk_set_scope_radio,) + + +@app.cell(hide_code=True) +def _(mo): + stratified_by_radio = mo.ui.radio( + options=["probability_threshold", "ppcr"], + value="probability_threshold", + label="Stratified By", ) - aj_estimates_per_strata_excl_excl = create_aj_data_polars( - example_polars_df, "excluded", "excluded", fixed_time_horizons + stratified_by_radio + return (stratified_by_radio,) + + +@app.cell(hide_code=True) +def _(by, mo): + slider_cutoff = mo.ui.slider(start=0, stop=1, step=by, label="Cutoff") + slider_cutoff + return (slider_cutoff,) + + +@app.cell(hide_code=True) +def _(mo): + fixed_time_horizons_slider = mo.ui.slider( + start=1, stop=5, step=2, label="Fixed Time Horizon" ) + fixed_time_horizons_slider + return (fixed_time_horizons_slider,) + - aj_estimates_data = pl.concat( - [ - aj_estimates_per_strata_adj_adjneg, - aj_estimates_per_strata_adj_adjcens, - aj_estimates_per_strata_adj_excl, - aj_estimates_per_strata_excl_adjneg, - aj_estimates_per_strata_excl_adjcens, - aj_estimates_per_strata_excl_excl, - ] - ).unpivot( - index=[ - "strata", - "fixed_time_horizon", - "censoring_assumption", - "competing_assumption", +@app.cell(hide_code=True) +def _(mo): + competing_heuristic_radio = mo.ui.radio( + options=[ + "adjusted_as_negative", + "adjusted_as_censored", + "adjusted_as_composite", + "excluded", ], - variable_name="reals_labels", - value_name="reals_estimate", + value="adjusted_as_negative", + label="Censoring Heuristic", ) - return (aj_estimates_data,) + competing_heuristic_radio + return (competing_heuristic_radio,) -@app.cell -def _(aj_estimates_data): - aj_estimates_data + +@app.cell(hide_code=True) +def _(mo): + censoring_heuristic_radio = mo.ui.radio( + options=["adjusted", "excluded"], + value="adjusted", + label="Censoring Heuristic", + ) + + censoring_heuristic_radio + return (censoring_heuristic_radio,) + + +@app.cell(column=2, hide_code=True) +def _( + by, + censoring_heuristic_radio, + competing_heuristic_radio, + fill_color_radio, + final_adjusted_data_polars, + fixed_time_horizons_slider, + pl, + px, + reference_group_radio, + risk_set_scope_radio, + slider_cutoff, + stratified_by_radio, +): + chosen_cutoff_data = final_adjusted_data_polars.filter( + pl.col("chosen_cutoff") == slider_cutoff.value, + pl.col("fixed_time_horizon") == fixed_time_horizons_slider.value, + pl.col("reference_group") == reference_group_radio.value, + pl.col("risk_set_scope") == risk_set_scope_radio.value, + pl.col("stratified_by") == stratified_by_radio.value, + pl.col("censoring_heuristic") == censoring_heuristic_radio.value, + pl.col("competing_heuristic") == competing_heuristic_radio.value, + ).sort(pl.col("strata")) + + color_discrete_map = { + "real_positives": "#4C5454", + "real_competing": "#C880B7", + "real_negatives": "#E0E0E0", + "real_censored": "#E3F09B", + "true_negatives": "#009e73", + "true_positives": "#009e73", + "false_negatives": "#FAC8CD", + "false_positives": "#FAC8CD", + } + + fig_new = px.bar( + chosen_cutoff_data, + x="mid_point", + y="reals_estimate", + color=fill_color_radio.value, + color_discrete_map=color_discrete_map, + # color="reals_labels", + # color_discrete_map=color_discrete_map, + category_orders={ + "reals_labels": list(color_discrete_map.keys()) + }, # fixes domain order + hover_data=chosen_cutoff_data.columns, # like tip: true + ) + + fig_new.update_layout( + barmode="stack", # stacked bars (use "group" for side-by-side) + plot_bgcolor="rgba(0,0,0,0)", # transparent background + paper_bgcolor="rgba(0,0,0,0)", + legend=dict(title=""), + ) + + if stratified_by_radio.value == "probability_threshold": + vertical_line = slider_cutoff.value + else: + vertical_line = 1 - slider_cutoff.value + by / 2 + + fig_new.add_vline( + x=vertical_line, + line=dict(color="red", width=2, dash="dash"), + annotation_text=f"Cutoff: {slider_cutoff.value}", + annotation_position="top right", + ) + + # fig_new return diff --git a/docs/walkthrough_aj_estimate.qmd b/docs/walkthrough_aj_estimate.qmd deleted file mode 100644 index 106e596..0000000 --- a/docs/walkthrough_aj_estimate.qmd +++ /dev/null @@ -1,316 +0,0 @@ ---- -title: "Hello, Quarto" -format: html -echo: false -message: false -warning: false ---- - -```{python} -import polars as pl -import pandas as pd -import numpy as np -from lifelines import AalenJohansenFitter, CoxPHFitter, WeibullAFTFitter - -df_time_to_cancer_dx = pd.read_csv( - "https://raw.githubusercontent.com/ddsjoberg/dca-tutorial/main/data/df_time_to_cancer_dx.csv" -) -``` - - -```{python} - -import numpy as np -from itertools import product -import itertools -from rtichoke.helpers.sandbox_observable_helpers import * -import polars as pl -print("Polars version:", pl.__version__) - -import pandas as pd -import pickle - -cph = CoxPHFitter() -thin_model = CoxPHFitter() -aft_model = WeibullAFTFitter() - -cox_formula = "age + famhistory + marker" -thin_formula = "age + marker" -aft_formula = "age + marker" - -cph.fit( - df_time_to_cancer_dx, - duration_col="ttcancer", - event_col="cancer", - formula=cox_formula, -) - -thin_model.fit( - df_time_to_cancer_dx, - duration_col="ttcancer", - event_col="cancer", - formula=thin_formula, -) - -aft_model.fit( - df_time_to_cancer_dx, - duration_col="ttcancer", - event_col="cancer", - formula=aft_formula, -) - - - -cph_pred_vals = (1 - cph.predict_survival_function(df_time_to_cancer_dx[['age', 'famhistory', 'marker']], times=[1.5])).iloc[0, :].values - -thin_pred_vals = (1 - thin_model.predict_survival_function(df_time_to_cancer_dx[['age', 'famhistory', 'marker']], times=[1.5])).iloc[0, :].values - -aft_pred_vals = (1 - aft_model.predict_survival_function(df_time_to_cancer_dx[['age', 'famhistory', 'marker']], times=[1.5])).iloc[0, :].values - -probs_dict = {"full": cph_pred_vals, "thin": thin_pred_vals, "aft": aft_pred_vals} - -reals_mapping = {"censor": 0, "diagnosed with cancer": 1, "dead other causes": 2} - -reals_dict = df_time_to_cancer_dx["cancer_cr"].map(reals_mapping) - -times_dict = df_time_to_cancer_dx["ttcancer"] - -``` - - -## polars - -```{python} - - - -fixed_time_horizons = [1.0, 3.0, 5.0] -stratified_by = ["probability_threshold", "ppcr"] -by=0.1 - -aj_data_combinations = create_aj_data_combinations_polars(list(probs_dict.keys()), fixed_time_horizons, stratified_by, by) - - - -print(aj_data_combinations['strata']) -``` - -# try polars - -## create list data to adjust polars - -```{python} - -from rtichoke.helpers.sandbox_observable_helpers import * - -list_data_to_adjust_polars = create_list_data_to_adjust_polars( - probs_dict, reals_dict, times_dict, stratified_by=stratified_by, by=by -) - - - -``` - - -## create adjusted data list polars - -### New extract aj estimate by assumptions polars - -## Create aj_estimates_data - -```{python} - -fixed_time_horizons = [1.0, 3.0, 5.0] - -assumption_sets = [ - { - "censoring_assumption": "adjusted", - "competing_assumption": "adjusted_as_negative", - }, - { - "censoring_assumption": "excluded", - "competing_assumption": "adjusted_as_negative", - }, - { - "censoring_assumption": "adjusted", - "competing_assumption": "adjusted_as_censored", - }, - { - "censoring_assumption": "excluded", - "competing_assumption": "adjusted_as_censored", - }, - {"censoring_assumption": "adjusted", "competing_assumption": "excluded"}, - {"censoring_assumption": "excluded", "competing_assumption": "excluded"}, -] - -# aj_estimates_data = extract_aj_estimate_by_assumptions( -# example_polars_df, -# assumption_sets=assumption_sets, -# fixed_time_horizons=fixed_time_horizons, -# ) - - -aj_estimates_data = create_adjusted_data( - list_data_to_adjust_polars, - assumption_sets=assumption_sets, - fixed_time_horizons=fixed_time_horizons -) - -``` - - -### Check strata values - -```{python} - -aj_data_combinations.select(pl.col('strata')).with_columns( - pl.col("strata").cast(str) -).join( - aj_estimates_data.select(pl.col('strata')).unique(), - on = 'strata' -) - -result = aj_data_combinations.select(pl.col('strata')).with_columns( - pl.col("strata").cast(str) -).with_columns( - pl.col("strata").is_in(aj_estimates_data["strata"]).alias("is_in_df2") -) - -print(result) - - -result = aj_estimates_data.select(pl.col('strata')).with_columns( - pl.col("strata") -).with_columns( - pl.col("strata").is_in(aj_data_combinations["strata"].cast(str)).alias("is_in_df2") -) - -print(result.filter(pl.col("is_in_df2") == False)) - - -``` - -### Cast varibles with hacks - -```{python} - -final_adjusted_data_polars = cast_and_join_adjusted_data(aj_data_combinations, aj_estimates_data) - -``` - - -```{python} - -import pandas as pd - - -reference_groups = list(probs_dict.keys()) - - -ojs_define(reference_groups_data = reference_groups) - -ojs_define(data = final_adjusted_data_polars.to_pandas()) - -``` - -## Observable stuff - -```{ojs} -//| panel: input - -viewof time_horizon = Inputs.range( - [1, 5], - {value: 3, step: 2, label: "Time Horizon:"} -) - -viewof reference_group = Inputs.radio( - reference_groups_data, {label: "Reference Group"}, {value: 'thin'} -) - -viewof stratified_by = Inputs.radio( - ["probability_threshold", "ppcr"], {value: "probability_threshold", label: "Stratified By"} -) - -viewof censored_assumption = Inputs.radio( - ["excluded", "adjusted"], {value: "excluded", label: "Censored Assumption"} -) - -viewof competing_assumption = Inputs.radio( - ["excluded", "adjusted_as_negative", "adjusted_as_censored"], {value: "excluded", label: "Competing Assumption"} -) - -``` - -```{ojs} - -//cumulative_aj_data_filtered = transpose(cumulative_aj_data).filter(function(subset) { -// -// return time_horizon == subset.fixed_time_horizon && -// censored_assumption == subset.censored_assumption && -// competing_assumption == subset.competing_assumption && -// stratified_by == subset.stratified_by && -// reference_group === subset.reference_group; -//}) - -filtered = transpose(data).filter(function(subset) { - - return time_horizon == subset.fixed_time_horizon && - censored_assumption == subset.censoring_assumption && - competing_assumption == subset.competing_assumption && - stratified_by === subset.stratified_by && - reference_group === subset.reference_group; -}) - -filtered - - -``` - -```{ojs} - - -Plot.plot({ - marks: [ - Plot.barY(filtered, { - x: "strata", - y: "reals_estimate", - fill: "reals_labels", - tip: true - }) - ], - color: { - domain: ["real_positives", "real_competing", "real_negatives", "real_censored"], - range: ["#009e73", "#9DB4C0", "#FAC8CD", "#E3F09B"], - legend: true - }, - style: { - background: "none" - } -}) - -``` - -```{python} - -# combined_adjusted_data.dropna(subset=['reals_estimate']) -# # - -# Perform left join between aj_data_combinations and final_adjusted_data on 'strata' and 'reals_estimate' -# only when stratified_by == 'probability_threshold' for aj_data_combinations - -# aj_data_combinations_prob_threshold = aj_data_combinations[aj_data_combinations['stratified_by'] == 'probability_threshold'] - -# # Convert 'strata' columns to strings -# aj_data_combinations_prob_threshold['strata'] = aj_data_combinations_prob_threshold['strata'].astype(str) -# final_adjusted_data['strata'] = final_adjusted_data['strata'].astype(str) - -# combined_adjusted_data = aj_data_combinations_prob_threshold.merge( -# final_adjusted_data[['strata', 'reals', 'reals_estimate']], -# on=['strata', 'reals'], -# how='left' -# ) - - -# aj_data_combinations_prob_threshold[['strata']] -# final_adjusted_data[['strata']] -``` diff --git a/monkeytype.sqlite3 b/monkeytype.sqlite3 deleted file mode 100644 index da60bb2..0000000 Binary files a/monkeytype.sqlite3 and /dev/null differ diff --git a/pyproject.toml b/pyproject.toml index 46065df..8da0fee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,8 @@ dependencies = [ "pyarrow>=20.0.0", "ty>=0.0.1a5", "pandas>=2.2.3", - "polarstate>=0.1.6", + "typing>=3.7.4.3", + "polarstate==0.1.8", ] name = "rtichoke" version = "0.1.11" @@ -28,9 +29,6 @@ readme = "README.md" dev = [ "jupyter<2.0.0,>=1.0.0", "myst-nb<1.0.0,>=0.17.1; python_version ~= \"3.9\"", - "sphinx-autoapi<3.0.0,>=2.1.0", - "sphinx-rtd-theme<2.0.0,>=1.2.0", - "mypy>=1.2.0,<2.0.0", "pytest-cov<5.0.0,>=4.0.0", "pytest<8.0.0,>=7.3.0", "pyzmq<27.0.0,>=26.3.0", @@ -38,14 +36,18 @@ dev = [ "ipykernel>=6.29.5", "lifelines>=0.30.0", "uv>=0.6.11", - "monkeytype>=23.3.0", "marimo>=0.14.7", "pre-commit>=4.2.0", + "dcurves>=1.1.5", + "plotly>=5.24.1", + "ty>=0.0.1a12", + "scikit-learn>=1.6.1", + "polarstate>=0.1.6", ] [tool.uv.workspace] members = ["rtichoke"] [build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" +requires = ["uv_build>=0.7.20,<0.8.0"] +build-backend = "uv_build" diff --git a/src/rtichoke/calibration/calibration.py b/src/rtichoke/calibration/calibration.py index 0c1f81d..d2a8820 100644 --- a/src/rtichoke/calibration/calibration.py +++ b/src/rtichoke/calibration/calibration.py @@ -15,7 +15,28 @@ def create_calibration_curve( reals: Dict[str, List[int]], calibration_type: str = "discrete", size: Optional[int] = None, - color_values: List[str] = None, + color_values: Optional[List[str]] = [ + "#1b9e77", + "#d95f02", + "#7570b3", + "#e7298a", + "#07004D", + "#E6AB02", + "#FE5F55", + "#54494B", + "#006E90", + "#BC96E6", + "#52050A", + "#1F271B", + "#BE7C4D", + "#63768D", + "#08A045", + "#320A28", + "#82FF9E", + "#2176FF", + "#D1603D", + "#585123", + ], url_api: str = "http://localhost:4242/", ) -> Figure: """Creates Calibration Curve @@ -31,29 +52,6 @@ def create_calibration_curve( Returns: Figure: _description_ """ - if color_values is None: - color_values = [ - "#1b9e77", - "#d95f02", - "#7570b3", - "#e7298a", - "#07004D", - "#E6AB02", - "#FE5F55", - "#54494B", - "#006E90", - "#BC96E6", - "#52050A", - "#1F271B", - "#BE7C4D", - "#63768D", - "#08A045", - "#320A28", - "#82FF9E", - "#2176FF", - "#D1603D", - "#585123", - ] rtichoke_response = send_requests_to_rtichoke_r( dictionary_to_send={ diff --git a/src/rtichoke/discrimination/gains.py b/src/rtichoke/discrimination/gains.py index 06fc222..e2a552b 100644 --- a/src/rtichoke/discrimination/gains.py +++ b/src/rtichoke/discrimination/gains.py @@ -15,7 +15,28 @@ def create_gains_curve( by: float = 0.01, stratified_by: str = "probability_threshold", size: Optional[int] = None, - color_values: List[str] = None, + color_values: List[str] = [ + "#1b9e77", + "#d95f02", + "#7570b3", + "#e7298a", + "#07004D", + "#E6AB02", + "#FE5F55", + "#54494B", + "#006E90", + "#BC96E6", + "#52050A", + "#1F271B", + "#BE7C4D", + "#63768D", + "#08A045", + "#320A28", + "#82FF9E", + "#2176FF", + "#D1603D", + "#585123", + ], url_api: str = "http://localhost:4242/", ) -> Figure: """Create Gains Curve @@ -48,7 +69,28 @@ def create_gains_curve( def plot_gains_curve( performance_data: DataFrame, size: Optional[int] = None, - color_values: List[str] = None, + color_values: List[str] = [ + "#1b9e77", + "#d95f02", + "#7570b3", + "#e7298a", + "#07004D", + "#E6AB02", + "#FE5F55", + "#54494B", + "#006E90", + "#BC96E6", + "#52050A", + "#1F271B", + "#BE7C4D", + "#63768D", + "#08A045", + "#320A28", + "#82FF9E", + "#2176FF", + "#D1603D", + "#585123", + ], url_api: str = "http://localhost:4242/", ) -> Figure: """Plot Gains Curve diff --git a/src/rtichoke/discrimination/lift.py b/src/rtichoke/discrimination/lift.py index 02d3c0e..a796c29 100644 --- a/src/rtichoke/discrimination/lift.py +++ b/src/rtichoke/discrimination/lift.py @@ -15,7 +15,28 @@ def create_lift_curve( by: float = 0.01, stratified_by: str = "probability_threshold", size: Optional[int] = None, - color_values: List[str] = None, + color_values: List[str] = [ + "#1b9e77", + "#d95f02", + "#7570b3", + "#e7298a", + "#07004D", + "#E6AB02", + "#FE5F55", + "#54494B", + "#006E90", + "#BC96E6", + "#52050A", + "#1F271B", + "#BE7C4D", + "#63768D", + "#08A045", + "#320A28", + "#82FF9E", + "#2176FF", + "#D1603D", + "#585123", + ], url_api: str = "http://localhost:4242/", ) -> Figure: """Create Lift Curve @@ -48,7 +69,28 @@ def create_lift_curve( def plot_lift_curve( performance_data: DataFrame, size: Optional[int] = None, - color_values: List[str] = None, + color_values: List[str] = [ + "#1b9e77", + "#d95f02", + "#7570b3", + "#e7298a", + "#07004D", + "#E6AB02", + "#FE5F55", + "#54494B", + "#006E90", + "#BC96E6", + "#52050A", + "#1F271B", + "#BE7C4D", + "#63768D", + "#08A045", + "#320A28", + "#82FF9E", + "#2176FF", + "#D1603D", + "#585123", + ], url_api: str = "http://localhost:4242/", ) -> Figure: """Plot Lift Curve diff --git a/src/rtichoke/discrimination/precision_recall.py b/src/rtichoke/discrimination/precision_recall.py index 3c0723e..274e36e 100644 --- a/src/rtichoke/discrimination/precision_recall.py +++ b/src/rtichoke/discrimination/precision_recall.py @@ -15,7 +15,28 @@ def create_precision_recall_curve( by: float = 0.01, stratified_by: str = "probability_threshold", size: Optional[int] = None, - color_values: List[str] = None, + color_values: List[str] = [ + "#1b9e77", + "#d95f02", + "#7570b3", + "#e7298a", + "#07004D", + "#E6AB02", + "#FE5F55", + "#54494B", + "#006E90", + "#BC96E6", + "#52050A", + "#1F271B", + "#BE7C4D", + "#63768D", + "#08A045", + "#320A28", + "#82FF9E", + "#2176FF", + "#D1603D", + "#585123", + ], url_api: str = "http://localhost:4242/", ) -> Figure: """Create Precision Recall Curve @@ -48,7 +69,28 @@ def create_precision_recall_curve( def plot_precision_recall_curve( performance_data: DataFrame, size: Optional[int] = None, - color_values: List[str] = None, + color_values: List[str] = [ + "#1b9e77", + "#d95f02", + "#7570b3", + "#e7298a", + "#07004D", + "#E6AB02", + "#FE5F55", + "#54494B", + "#006E90", + "#BC96E6", + "#52050A", + "#1F271B", + "#BE7C4D", + "#63768D", + "#08A045", + "#320A28", + "#82FF9E", + "#2176FF", + "#D1603D", + "#585123", + ], url_api: str = "http://localhost:4242/", ) -> Figure: """Plot Precision Recall Curve diff --git a/src/rtichoke/discrimination/roc.py b/src/rtichoke/discrimination/roc.py index 084232d..4c1c3bf 100644 --- a/src/rtichoke/discrimination/roc.py +++ b/src/rtichoke/discrimination/roc.py @@ -15,7 +15,28 @@ def create_roc_curve( by: float = 0.01, stratified_by: str = "probability_threshold", size: Optional[int] = None, - color_values: List[str] = None, + color_values: List[str] = [ + "#1b9e77", + "#d95f02", + "#7570b3", + "#e7298a", + "#07004D", + "#E6AB02", + "#FE5F55", + "#54494B", + "#006E90", + "#BC96E6", + "#52050A", + "#1F271B", + "#BE7C4D", + "#63768D", + "#08A045", + "#320A28", + "#82FF9E", + "#2176FF", + "#D1603D", + "#585123", + ], url_api: str = "http://localhost:4242/", ) -> Figure: """Create ROC Curve @@ -48,7 +69,28 @@ def create_roc_curve( def plot_roc_curve( performance_data: DataFrame, size: Optional[int] = None, - color_values: List[str] = None, + color_values: List[str] = [ + "#1b9e77", + "#d95f02", + "#7570b3", + "#e7298a", + "#07004D", + "#E6AB02", + "#FE5F55", + "#54494B", + "#006E90", + "#BC96E6", + "#52050A", + "#1F271B", + "#BE7C4D", + "#63768D", + "#08A045", + "#320A28", + "#82FF9E", + "#2176FF", + "#D1603D", + "#585123", + ], url_api: str = "http://localhost:4242/", ) -> Figure: """Plot ROC Curve diff --git a/src/rtichoke/helpers/sandbox_observable_helpers.py b/src/rtichoke/helpers/sandbox_observable_helpers.py index 1f808e6..730712f 100644 --- a/src/rtichoke/helpers/sandbox_observable_helpers.py +++ b/src/rtichoke/helpers/sandbox_observable_helpers.py @@ -1,10 +1,18 @@ from lifelines import AalenJohansenFitter import pandas as pd import numpy as np -import itertools import polars as pl from polarstate import predict_aj_estimates from polarstate import prepare_event_table +from typing import Dict, Union +from collections.abc import Sequence + + +def _enum_dataframe(column_name: str, values: Sequence[str]) -> pl.DataFrame: + """Create a single-column DataFrame with an enum dtype.""" + enum_values = list(dict.fromkeys(values)) + enum_dtype = pl.Enum(enum_values) + return pl.DataFrame({column_name: pl.Series(values, dtype=enum_dtype)}) def extract_aj_estimate(data_to_adjust, fixed_time_horizons): @@ -96,119 +104,90 @@ def extract_aj_estimate(data_to_adjust, fixed_time_horizons): return result_df -def extract_crude_estimate(data_to_adjust: pd.DataFrame) -> pd.DataFrame: - df = safe_pl_from_pandas(data_to_adjust) - - crude_estimate = df.group_by(["strata", "reals", "fixed_time_horizon"]).agg( - pl.count().alias("reals_estimate") - ) - - unique_strata = df.select("strata").unique().to_series().to_list() - unique_reals = df.select("reals").unique().to_series().to_list() - unique_horizons = df.select("fixed_time_horizon").unique().to_series().to_list() - - all_combinations = pl.DataFrame( - itertools.product(unique_strata, unique_reals, unique_horizons), - schema=["strata", "reals", "fixed_time_horizon"], - ) - - final = all_combinations.join( - crude_estimate, on=["strata", "reals", "fixed_time_horizon"], how="left" - ).fill_null(0) - - return final.to_pandas() - - -def add_cutoff_strata_polars(data: pl.DataFrame, by: float) -> pl.DataFrame: - def transform_group(group: pl.DataFrame) -> pl.DataFrame: - # Convert to NumPy for numeric ops +def add_cutoff_strata(data: pl.DataFrame, by: float, stratified_by) -> pl.DataFrame: + def transform_group(group: pl.DataFrame, by: float) -> pl.DataFrame: probs = group["probs"].to_numpy() + columns_to_add = [] - # --- Compute strata_probability_threshold --- breaks = create_breaks_values(probs, "probability_threshold", by) - # strata_prob = np.digitize(probs, breaks, right=False) - 1 - # Clamp indices to avoid out-of-bounds error when accessing breaks[i+1] - # strata_prob = np.clip(strata_prob, 0, len(breaks) - 2) - # strata_prob_labels = [ - # f"({breaks[i]:.3f}, {breaks[i+1]:.3f}]" for i in strata_prob - # ] + if "probability_threshold" in stratified_by: + last_bin_index = len(breaks) - 2 - last_bin_index = len(breaks) - 2 + bin_indices = np.digitize(probs, bins=breaks, right=False) - 1 + bin_indices = np.where(probs == 1.0, last_bin_index, bin_indices) - bin_indices = np.digitize(probs, bins=breaks, right=False) - 1 - bin_indices = np.where(probs == 1.0, last_bin_index, bin_indices) + lower_bounds = breaks[bin_indices] + upper_bounds = breaks[bin_indices + 1] - lower_bounds = breaks[bin_indices] - upper_bounds = breaks[bin_indices + 1] + include_upper_bounds = bin_indices == last_bin_index - include_upper_bounds = bin_indices == last_bin_index + strata_prob_labels = np.where( + include_upper_bounds, + [f"[{lo:.2f}, {hi:.2f}]" for lo, hi in zip(lower_bounds, upper_bounds)], + [f"[{lo:.2f}, {hi:.2f})" for lo, hi in zip(lower_bounds, upper_bounds)], + ).astype(str) - strata_prob_labels = np.where( - include_upper_bounds, - [f"[{lo:.2f}, {hi:.2f}]" for lo, hi in zip(lower_bounds, upper_bounds)], - [f"[{lo:.2f}, {hi:.2f})" for lo, hi in zip(lower_bounds, upper_bounds)], - ) + columns_to_add.append( + pl.Series("strata_probability_threshold", strata_prob_labels) + ) - # --- Compute strata_ppcr as quantiles on -probs --- - try: - q = int(1 / by) - quantile_edges = np.quantile(-probs, np.linspace(0, 1, q)) - strata_ppcr = np.digitize(-probs, quantile_edges, right=False) - strata_ppcr = (strata_ppcr / (1 / by)).astype(str) - except ValueError: - strata_ppcr = np.array(["1"] * len(probs)) # fallback for small group + if "ppcr" in stratified_by: + # --- Compute strata_ppcr as equal-frequency quantile bins by rank --- + by = float(by) + q = int(round(1 / by)) # e.g. 0.2 -> 5 bins - return group.with_columns( - [ - pl.Series("strata_probability_threshold", strata_prob_labels), - pl.Series("strata_ppcr", strata_ppcr), - ] - ) + probs = np.asarray(probs, float) + n = probs.size + print(f"q = {q}, n = {n}") + print("probs:", probs) - # Apply per-group transformation - grouped = data.partition_by("reference_group", as_dict=True) - transformed_groups = [transform_group(group) for group in grouped.values()] - return pl.concat(transformed_groups) + edges = np.quantile(probs, np.linspace(0.0, 1.0, q + 1), method="linear") + print("edges before accumulating:", edges) + edges = np.maximum.accumulate(edges) + print("edges after accumulating:", edges) -def add_cutoff_strata(data, by): - result = data.copy() + edges[0] = 0.0 + edges[-1] = 1.0 - grouped = result.groupby("reference_group") + print("edges after setting 0 and 1:", edges) - def transform_group(group): - group["strata_probability_threshold"] = pd.cut( - group["probs"], - bins=create_breaks_values(group["probs"], "probability_threshold", by), - include_lowest=True, - ) + bin_idx = np.digitize(probs, bins=edges[1:-1], right=True) + print("bin_idx:", bin_idx) - group["strata_ppcr"] = ( - pd.qcut(-group["probs"], q=int(1 / by), labels=False, duplicates="drop") + 1 - ) + s = str(by) + decimals = len(s.split(".")[-1]) if "." in s else 0 - group["strata_ppcr"] = (group["strata_ppcr"] / (1 / by)).astype(str) + labels = [f"{x:.{decimals}f}" for x in np.linspace(by, 1.0, q)] + print("bin_labels", labels) - return group + strata_labels = np.array([labels[i] for i in bin_idx], dtype=object) + print("strata_labels:", strata_labels) - result = grouped.apply(transform_group) + columns_to_add.append( + pl.Series("strata_ppcr", strata_labels).cast(pl.Enum(labels)) + ) + return group.with_columns(columns_to_add) - result = result.reset_index(drop=True) + # Apply per-group transformation + grouped = data.partition_by("reference_group", as_dict=True) + transformed_groups = [transform_group(group, by) for group in grouped.values()] + return pl.concat(transformed_groups) - return result +def create_strata_combinations(stratified_by: str, by: float, breaks) -> pl.DataFrame: + s_by = str(by) + decimals = len(s_by.split(".")[-1]) if "." in s_by else 0 + fmt = f"{{:.{decimals}f}}" -def create_strata_combinations_polars(stratified_by: str, by: float) -> pl.DataFrame: if stratified_by == "probability_threshold": - breaks = create_breaks_values(None, "probability_threshold", by) - upper_bound = breaks[1:] # breaks lower_bound = breaks[:-1] # np.roll(upper_bound, 1) # lower_bound[0] = 0.0 mid_point = upper_bound - by / 2 include_lower_bound = lower_bound > -0.1 include_upper_bound = upper_bound == 1.0 # upper_bound != 0.0 - chosen_cutoff = upper_bound + # chosen_cutoff = upper_bound strata = format_strata_column( lower_bound=lower_bound, upper_bound=upper_bound, @@ -218,18 +197,19 @@ def create_strata_combinations_polars(stratified_by: str, by: float) -> pl.DataF ) elif stratified_by == "ppcr": - strata_mid = create_breaks_values(None, "probability_threshold", by)[1:] - lower_bound = strata_mid - by - upper_bound = strata_mid + by - mid_point = upper_bound - by + strata_mid = breaks[1:] + lower_bound = strata_mid - by / 2 + upper_bound = strata_mid + by / 2 + mid_point = breaks[1:] include_lower_bound = np.ones_like(strata_mid, dtype=bool) include_upper_bound = np.zeros_like(strata_mid, dtype=bool) - chosen_cutoff = strata_mid - strata = np.round(mid_point, 3).astype(str) + # chosen_cutoff = strata_mid + strata = np.array([fmt.format(x) for x in strata_mid], dtype=object) + print("strata", strata) else: raise ValueError(f"Unsupported stratified_by: {stratified_by}") - return pl.DataFrame( + bins_df = pl.DataFrame( { "strata": pl.Series(strata), "lower_bound": lower_bound, @@ -237,11 +217,15 @@ def create_strata_combinations_polars(stratified_by: str, by: float) -> pl.DataF "mid_point": mid_point, "include_lower_bound": include_lower_bound, "include_upper_bound": include_upper_bound, - "chosen_cutoff": chosen_cutoff, + # "chosen_cutoff": chosen_cutoff, "stratified_by": [stratified_by] * len(strata), } ) + cutoffs_df = pl.DataFrame({"chosen_cutoff": breaks}) + + return bins_df.join(cutoffs_df, how="cross") + def format_strata_column( lower_bound: list[float], @@ -269,59 +253,6 @@ def format_strata_interval( return f"{left}{lower:.3f}, {upper:.3f}{right}" -def create_strata_combinations(stratified_by, by): - if stratified_by == "probability_threshold": - upper_bound = create_breaks_values(None, "probability_threshold", by) - lower_bound = np.roll(upper_bound, 1) - lower_bound[0] = 0 - mid_point = upper_bound - by / 2 - include_lower_bound = lower_bound == 0 - include_upper_bound = upper_bound != 0 - strata = [ - f"{'[' if include_lower else '('}{lower}, {upper}{']' if include_upper else ')'}" - for include_lower, lower, upper, include_upper in zip( - include_lower_bound, lower_bound, upper_bound, include_upper_bound - ) - ] - chosen_cutoff = upper_bound - elif stratified_by == "ppcr": - strata = create_breaks_values(None, "probability_threshold", by)[1:] - lower_bound = strata - by - upper_bound = strata + by - mid_point = upper_bound - by / 2 - include_lower_bound = np.ones_like(strata, dtype=bool) - include_upper_bound = np.zeros_like(strata, dtype=bool) - chosen_cutoff = strata - return pd.DataFrame( - { - "strata": strata, - "lower_bound": lower_bound, - "upper_bound": upper_bound, - "mid_point": mid_point, - "include_lower_bound": include_lower_bound, - "include_upper_bound": include_upper_bound, - "chosen_cutoff": chosen_cutoff, - "stratified_by": stratified_by, - } - ) - - -def create_breaks_values_polars(probs_vec, stratified_by, by): - # Ensure probs_vec is a NumPy array (in case it's a Polars Series) - if hasattr(probs_vec, "to_numpy"): - probs_vec = probs_vec.to_numpy() - - if stratified_by != "probability_threshold": - # Quantile-based bin edges (descending) - breaks = np.quantile(probs_vec, np.linspace(1, 0, int(1 / by) + 1)) - else: - # Fixed-width bin edges (ascending) - decimal_places = len(str(by).split(".")[-1]) - breaks = np.round(np.arange(0, 1 + by, by), decimals=decimal_places) - - return breaks - - def create_breaks_values(probs_vec, stratified_by, by): if stratified_by != "probability_threshold": breaks = np.quantile(probs_vec, np.linspace(1, 0, int(1 / by) + 1)) @@ -332,20 +263,28 @@ def create_breaks_values(probs_vec, stratified_by, by): return breaks -def create_aj_data_combinations_polars( - reference_groups, fixed_time_horizons, stratified_by, by -): - # Create strata combinations using Polars - strata_combinations_list = [ - create_strata_combinations_polars(x, by) for x in stratified_by - ] - strata_combinations = pl.concat(strata_combinations_list, how="vertical") +def create_aj_data_combinations( + reference_groups: Sequence[str], + heuristics_sets: list[Dict], + fixed_time_horizons: Sequence[float], + stratified_by: Sequence[str], + by: float, + breaks: Sequence[float], + risk_set_scope: Sequence[str] = ["within_stratum", "pooled_by_cutoff"], +) -> pl.DataFrame: + dfs = [create_strata_combinations(sb, by, breaks) for sb in stratified_by] + strata_combinations = pl.concat(dfs, how="vertical") - strata_labels = strata_combinations["strata"] - strata_enum = pl.Enum(strata_labels) + # strata_enum = pl.Enum(strata_combinations["strata"]) - stratified_by_labels = ["probability_threshold", "ppcr"] - stratified_by_enum = pl.Enum(stratified_by_labels) + strata_cats = ( + strata_combinations.select(pl.col("strata").unique(maintain_order=True)) + .to_series() + .to_list() + ) + + strata_enum = pl.Enum(strata_cats) + stratified_by_enum = pl.Enum(["probability_threshold", "ppcr"]) strata_combinations = strata_combinations.with_columns( [ @@ -354,6 +293,14 @@ def create_aj_data_combinations_polars( ] ) + risk_set_scope_combinations = pl.DataFrame( + { + "risk_set_scope": pl.Series(risk_set_scope).cast( + pl.Enum(["within_stratum", "pooled_by_cutoff"]) + ) + } + ) + # Define values for Cartesian product reals_labels = [ "real_negatives", @@ -361,120 +308,39 @@ def create_aj_data_combinations_polars( "real_competing", "real_censored", ] - reals_enum = pl.Enum(reals_labels) - df_reals = pl.DataFrame({"reals_labels": pl.Series(reals_labels, dtype=reals_enum)}) - df_reference_groups = pl.DataFrame( - { - "reference_group": pl.Series( - reference_groups, dtype=pl.Enum(reference_groups) - ) - } - ) - - censoring_assumptions_labels = ["excluded", "adjusted"] - censoring_assumptions_enum = pl.Enum(censoring_assumptions_labels) - df_censoring_assumptions = pl.DataFrame( - { - "censoring_assumption": pl.Series( - censoring_assumptions_labels, dtype=censoring_assumptions_enum - ) - } - ) - - competing_assumptions_labels = [ - "excluded", - "adjusted_as_negative", - "adjusted_as_censored", - ] - competing_assumptions_enum = pl.Enum(competing_assumptions_labels) - df_competing_assumptions = pl.DataFrame( - { - "competing_assumption": pl.Series( - competing_assumptions_labels, dtype=competing_assumptions_enum - ) - } - ) - - # Create all combinations - combinations = list( - itertools.product( - # reference_groups, - fixed_time_horizons, - # censoring_assumptions, - # competing_assumptions - ) - ) - - df_combinations = pl.DataFrame( - combinations, - schema=[ - # "reference_group", # str - "fixed_time_horizon", # cast to Float64 - # "censoring_assumption", # str - # "competing_assumption" # str - ], - ).with_columns( - [ - pl.col("fixed_time_horizon").cast(pl.Float64), - # pl.col("censoring_assumption").cast(pl.String), - # pl.col("competing_assumption").cast(pl.String), - # pl.col("reference_group").cast(pl.String) - ] - ) - # Cross join (cartesian product) with strata_combinations - return ( - df_reference_groups.join(df_combinations, how="cross") - .join(df_censoring_assumptions, how="cross") - .join(df_competing_assumptions, how="cross") - .join(strata_combinations, how="cross") - .join(df_reals, how="cross") - ) + print("heuristics_sets", pl.DataFrame(heuristics_sets)) + heuristics_combinations = pl.DataFrame(heuristics_sets) -def create_aj_data_combinations( - reference_groups, fixed_time_horizons, stratified_by, by -): - strata_combinations = pd.concat( - [create_strata_combinations(x, by) for x in stratified_by], ignore_index=True + censoring_heuristics_enum = pl.Enum( + heuristics_combinations["censoring_heuristic"].unique(maintain_order=True) ) - - reals = pd.Categorical( - ["real_negatives", "real_positives", "real_competing", "real_censored"], - categories=[ - "real_negatives", - "real_positives", - "real_competing", - "real_censored", - ], - ordered=True, + competing_heuristics_enum = pl.Enum( + heuristics_combinations["competing_heuristic"].unique(maintain_order=True) ) - censoring_assumptions = ["excluded", "adjusted"] - competing_assumptions = ["excluded", "adjusted_as_negative", "adjusted_as_censored"] - - combinations = list( - itertools.product( - reference_groups, - fixed_time_horizons, - reals, - censoring_assumptions, - competing_assumptions, - ) - ) + combinations_frames: list[pl.DataFrame] = [ + _enum_dataframe("reference_group", reference_groups), + pl.DataFrame( + {"fixed_time_horizon": pl.Series(fixed_time_horizons, dtype=pl.Float64)} + ), + heuristics_combinations.with_columns( + [ + pl.col("censoring_heuristic").cast(censoring_heuristics_enum), + pl.col("competing_heuristic").cast(competing_heuristics_enum), + ] + ), + strata_combinations, + risk_set_scope_combinations, + _enum_dataframe("reals_labels", reals_labels), + ] - df_combinations = pd.DataFrame( - combinations, - columns=[ - "reference_group", - "fixed_time_horizon", - "reals", - "censoring_assumption", - "competing_assumption", - ], - ) + result = combinations_frames[0] + for frame in combinations_frames[1:]: + result = result.join(frame, how="cross") - return df_combinations.merge(strata_combinations, how="cross") + return result def pivot_longer_strata(data: pl.DataFrame) -> pl.DataFrame: @@ -501,34 +367,6 @@ def pivot_longer_strata(data: pl.DataFrame) -> pl.DataFrame: return data_long -def update_administrative_censoring(data_to_adjust: pd.DataFrame) -> pd.DataFrame: - data_to_adjust = data_to_adjust.copy() - data_to_adjust["reals"] = data_to_adjust["reals"].astype(str) - - pl_data = safe_pl_from_pandas(data_to_adjust) - - # Define logic in Python and map it row-wise (this avoids any column reference issues) - def adjust(row): - t = row["times"] - h = row["fixed_time_horizon"] - r = row["reals"] - if t > h and r == "real_positives": - return "real_negatives" - if t < h and r == "real_negatives": - return "real_censored" - return r - - pl_data = pl_data.with_columns( - [ - pl.struct(["times", "fixed_time_horizon", "reals"]) - .map_elements(adjust) - .alias("reals") - ] - ) - - return pl_data.to_pandas() - - def map_reals_to_labels_polars(data: pl.DataFrame) -> pl.DataFrame: return data.with_columns( [ @@ -567,333 +405,136 @@ def update_administrative_censoring_polars(data: pl.DataFrame) -> pl.DataFrame: def create_aj_data( reference_group_data, - censoring_assumption, - competing_assumption, + breaks, + censoring_heuristic, + competing_heuristic, fixed_time_horizons, + stratified_by: Sequence[str], + full_event_table: bool = False, + risk_set_scope: Sequence[str] = "within_stratum", ): """ - Create AJ estimates per strata based on censoring and competing assumptions. + Create AJ estimates per strata based on censoring and competing heuristicss. """ - if ( - censoring_assumption == "adjusted" - and competing_assumption == "adjusted_as_negative" - ): - aj_estimates_per_strata_adj_adjneg = ( - reference_group_data.group_by("strata") - .map_groups( - lambda group: extract_aj_estimate_for_strata(group, fixed_time_horizons) - ) - .join( - pl.DataFrame( - { - "real_censored_est": 0.0, - "censoring_assumption": "adjusted", - "competing_assumption": "adjusted_as_negative", - } - ), - how="cross", - ) - ) - return aj_estimates_per_strata_adj_adjneg - - elif ( - censoring_assumption == "excluded" - and competing_assumption == "adjusted_as_negative" - ): - exploded_data = reference_group_data.with_columns( - fixed_time_horizon=pl.lit(fixed_time_horizons) - ).explode("fixed_time_horizon") - - aj_estimates_per_strata_censored = ( - exploded_data.filter( - (pl.col("times") < pl.col("fixed_time_horizon")) - & (pl.col("reals") == 0) - ) - .group_by(["strata", "fixed_time_horizon"]) - .count() - .rename({"count": "real_censored_est"}) - .with_columns(pl.col("real_censored_est").cast(pl.Float64)) - ) + print("stratified_by", stratified_by) + print("Creating aj data") - non_censored_data = exploded_data.filter( - (pl.col("times") >= pl.col("fixed_time_horizon")) | (pl.col("reals") > 0) - ) + def aj_estimates_with_cross(df, extra_cols): + return df.join(pl.DataFrame(extra_cols), how="cross") - aj_estimates_per_strata_noncensored = pl.concat( - [ - non_censored_data.filter( - pl.col("fixed_time_horizon") == fixed_time_horizon - ) - .group_by("strata") - .map_groups( - lambda group: extract_aj_estimate_for_strata( - group, [fixed_time_horizon] - ) - ) - for fixed_time_horizon in fixed_time_horizons - ], - how="vertical", - ) + exploded = assign_and_explode_polars(reference_group_data, fixed_time_horizons) - return aj_estimates_per_strata_noncensored.join( - aj_estimates_per_strata_censored, on=["strata", "fixed_time_horizon"] - ).join( - pl.DataFrame( - { - "censoring_assumption": "excluded", - "competing_assumption": "adjusted_as_negative", - } - ), - how="cross", - ) + event_table = prepare_event_table(reference_group_data) - elif ( - censoring_assumption == "adjusted" - and competing_assumption == "adjusted_as_censored" - ): - aj_estimates_per_strata_adj_adjcens = ( - reference_group_data.with_columns( - [ - pl.when(pl.col("reals") == 2) - .then(pl.lit(0)) - .otherwise(pl.col("reals")) - .alias("reals") - ] - ) - .group_by("strata") - .map_groups( - lambda group: extract_aj_estimate_for_strata(group, fixed_time_horizons) - ) - .join( - pl.DataFrame( - { - "real_censored_est": 0.0, - "censoring_assumption": "adjusted", - "competing_assumption": "adjusted_as_censored", - } - ), - how="cross", - ) - ) - return aj_estimates_per_strata_adj_adjcens - - elif ( - censoring_assumption == "excluded" - and competing_assumption == "adjusted_as_censored" - ): - exploded_data = reference_group_data.with_columns( - fixed_time_horizon=pl.lit(fixed_time_horizons) - ).explode("fixed_time_horizon") - - aj_estimates_per_strata_censored = ( - exploded_data.filter( - (pl.col("times") < pl.col("fixed_time_horizon")) & pl.col("reals") == 0 - ) - .group_by(["strata", "fixed_time_horizon"]) - .count() - .rename({"count": "real_censored_est"}) - .with_columns(pl.col("real_censored_est").cast(pl.Float64)) - ) + # TODO: solve strata in the pipeline - non_censored_data = exploded_data.filter( - (pl.col("times") >= pl.col("fixed_time_horizon")) | pl.col("reals") > 0 - ).with_columns( - [ - pl.when((pl.col("reals") == 2)) - .then(pl.lit(0)) - .otherwise(pl.col("reals")) - .alias("reals") - ] - ) + excluded_events = _extract_excluded_events( + event_table, fixed_time_horizons, censoring_heuristic, competing_heuristic + ) - aj_estimates_per_strata_noncensored = pl.concat( - [ - non_censored_data.filter( - pl.col("fixed_time_horizon") == fixed_time_horizon - ) - .group_by("strata") - .map_groups( - lambda group: extract_aj_estimate_for_strata( - group, [fixed_time_horizon] - ) - ) - for fixed_time_horizon in fixed_time_horizons - ], - how="vertical", - ) + print("stratified_by before _aj_adjusted_events", stratified_by) - aj_estimates_per_strata_excl_adjcens = aj_estimates_per_strata_noncensored.join( - aj_estimates_per_strata_censored, on=["strata", "fixed_time_horizon"] - ).join( - pl.DataFrame( - { - "censoring_assumption": "excluded", - "competing_assumption": "adjusted_as_censored", - } - ), - how="cross", + aj_dfs = [] + for rscope in risk_set_scope: + aj_res = _aj_adjusted_events( + reference_group_data, + breaks, + exploded, + censoring_heuristic, + competing_heuristic, + fixed_time_horizons, + stratified_by, + full_event_table, + rscope, ) - return aj_estimates_per_strata_excl_adjcens - - elif censoring_assumption == "adjusted" and competing_assumption == "excluded": - exploded_data = reference_group_data.with_columns( - fixed_time_horizon=fixed_time_horizons - ).explode("fixed_time_horizon") - - aj_estimates_per_strata_competing = ( - exploded_data.filter( - (pl.col("reals") == 2) - & (pl.col("times") < pl.col("fixed_time_horizon")) - ) - .group_by(["strata", "fixed_time_horizon"]) - .count() - .rename({"count": "real_competing_est"}) - .with_columns(pl.col("real_competing_est").cast(pl.Float64)) - ) + print("aj_res before select", aj_res.columns) + print("aj_res", aj_res) - non_competing_data = exploded_data.filter( - (pl.col("times") >= pl.col("fixed_time_horizon")) | pl.col("reals") != 2 - ).with_columns( + aj_res = aj_res.select( [ - pl.when((pl.col("reals") == 2)) - .then(pl.lit(0)) - .otherwise(pl.col("reals")) - .alias("reals") + "strata", + "times", + "chosen_cutoff", + "real_negatives_est", + "real_positives_est", + "real_competing_est", + "estimate_origin", + "fixed_time_horizon", + "risk_set_scope", ] ) - aj_estimates_per_strata_noncompeting = pl.concat( - [ - non_competing_data.filter( - pl.col("fixed_time_horizon") == fixed_time_horizon - ) - .group_by("strata") - .map_groups( - lambda group: extract_aj_estimate_for_strata( - group, [fixed_time_horizon] - ) - ) - for fixed_time_horizon in fixed_time_horizons - ], - how="vertical", - ).select(pl.exclude("real_competing_est")) - - aj_estimates_per_strata_adj_excl = ( - aj_estimates_per_strata_competing.join( - aj_estimates_per_strata_noncompeting, - on=["strata", "fixed_time_horizon"], - ) - .join( - pl.DataFrame( - { - "real_censored_est": 0.0, - "censoring_assumption": "adjusted", - "competing_assumption": "excluded", - } - ), - how="cross", - ) - .select( - [ - "strata", - "fixed_time_horizon", - "real_negatives_est", - "real_positives_est", - "real_competing_est", - "real_censored_est", - "censoring_assumption", - "competing_assumption", - ] - ) - ) - - return aj_estimates_per_strata_adj_excl + print("aj_res columns", aj_res.columns) + print("aj_res", aj_res) - elif censoring_assumption == "excluded" and competing_assumption == "excluded": - exploded_data = reference_group_data.with_columns( - fixed_time_horizon=pl.lit(fixed_time_horizons) - ).explode("fixed_time_horizon") + aj_dfs.append(aj_res) - print("Exploded data:", exploded_data) + aj_df = pl.concat(aj_dfs, how="vertical") - aj_estimates_per_strata_censored = ( - exploded_data.filter( - (pl.col("times") < pl.col("fixed_time_horizon")) & pl.col("reals") == 0 - ) - .group_by(["strata", "fixed_time_horizon"]) - .count() - .rename({"count": "real_censored_est"}) - .with_columns(pl.col("real_censored_est").cast(pl.Float64)) - ) + print("aj_df columns", aj_df.columns) - print("AJ estimates per strata censored:", aj_estimates_per_strata_censored) + # print("aj_df") + # print(aj_df) - aj_estimates_per_strata_competing = ( - exploded_data.filter( - (pl.col("reals") == 2) - & (pl.col("times") < pl.col("fixed_time_horizon")) - ) - .group_by(["strata", "fixed_time_horizon"]) - .count() - .rename({"count": "real_competing_est"}) - .with_columns(pl.col("real_competing_est").cast(pl.Float64)) - ) + result = aj_df.join(excluded_events, on=["fixed_time_horizon"], how="left") - print("AJ estimates per strata competing:", aj_estimates_per_strata_competing) + return aj_estimates_with_cross( + result, + { + "censoring_heuristic": censoring_heuristic, + "competing_heuristic": competing_heuristic, + }, + ).select( + [ + "strata", + "chosen_cutoff", + "fixed_time_horizon", + "times", + "real_negatives_est", + "real_positives_est", + "real_competing_est", + "real_censored_est", + "censoring_heuristic", + "competing_heuristic", + "estimate_origin", + "risk_set_scope", + ] + ) - non_censored_non_competing_data = exploded_data.filter( - ((pl.col("times") >= pl.col("fixed_time_horizon")) | pl.col("reals") == 1) - ) - aj_estimates_per_strata_noncensored_noncompeting = pl.concat( - [ - non_censored_non_competing_data.filter( - pl.col("fixed_time_horizon") == fixed_time_horizon - ) - .group_by("strata") - .map_groups( - lambda group: extract_aj_estimate_for_strata( - group, [fixed_time_horizon] - ) - ) - for fixed_time_horizon in fixed_time_horizons - ], - how="vertical", +def _extract_excluded_events( + event_table: pl.DataFrame, + fixed_time_horizons: list[float], + censoring_heuristic: str, + competing_heuristic: str, +) -> pl.DataFrame: + horizons_df = pl.DataFrame({"times": fixed_time_horizons}).sort("times") + + excluded_events = horizons_df.join_asof( + event_table.with_columns( + pl.col("count_0").cum_sum().cast(pl.Float64).alias("real_censored_est"), + pl.col("count_2").cum_sum().cast(pl.Float64).alias("real_competing_est"), + ).select( + pl.col("times"), + pl.col("real_censored_est"), + pl.col("real_competing_est"), + ), + left_on="times", + right_on="times", + ).with_columns([pl.col("times").alias("fixed_time_horizon")]) + + if censoring_heuristic != "excluded": + excluded_events = excluded_events.with_columns( + pl.lit(0.0).alias("real_censored_est") ) - aj_estimates_per_strata_excl_excl = ( - aj_estimates_per_strata_competing.join( - aj_estimates_per_strata_censored, on=["strata", "fixed_time_horizon"] - ) - .join( - aj_estimates_per_strata_noncensored_noncompeting, - on=["strata", "fixed_time_horizon"], - ) - .join( - pl.DataFrame( - { - "censoring_assumption": "excluded", - "competing_assumption": "excluded", - } - ), - how="cross", - ) - .select( - [ - "strata", - "fixed_time_horizon", - "real_negatives_est", - "real_positives_est", - "real_competing_est", - "real_censored_est", - "censoring_assumption", - "competing_assumption", - ] - ) + if competing_heuristic != "excluded": + excluded_events = excluded_events.with_columns( + pl.lit(0.0).alias("real_competing_est") ) - return aj_estimates_per_strata_excl_excl + return excluded_events def extract_crude_estimate_polars(data: pl.DataFrame) -> pl.DataFrame: @@ -934,156 +575,245 @@ def extract_crude_estimate_polars(data: pl.DataFrame) -> pl.DataFrame: # return result_pandas -def extract_aj_estimate_for_strata(data_to_adjust, horizons): - n = data_to_adjust.height - event_table = prepare_event_table(data_to_adjust) - aj_estimate_for_strata_polars = predict_aj_estimates( - event_table, pl.Series(horizons) - ) - - aj_estimate_for_strata_polars = aj_estimate_for_strata_polars.rename( - {"fixed_time_horizons": "fixed_time_horizon"} - ) +def extract_aj_estimate_by_cutoffs( + data_to_adjust, horizons, breaks, stratified_by, full_event_table: bool +): + # n = data_to_adjust.height - return aj_estimate_for_strata_polars.with_columns( - [ - (pl.col("state_occupancy_probability_0") * n).alias("real_negatives_est"), - (pl.col("state_occupancy_probability_1") * n).alias("real_positives_est"), - (pl.col("state_occupancy_probability_2") * n).alias("real_competing_est"), - pl.col("fixed_time_horizon").cast(pl.Float64), - pl.lit(data_to_adjust["strata"][0]).alias("strata"), - ] - ).select( - [ - "strata", - "fixed_time_horizon", - "real_negatives_est", - "real_positives_est", - "real_competing_est", - ] + counts_per_strata = ( + data_to_adjust.group_by( + ["strata", "stratified_by", "upper_bound", "lower_bound"] + ) + .len(name="strata_count") + .with_columns(pl.col("strata_count").cast(pl.Float64)) ) + aj_estimates_predicted_positives = pl.DataFrame() + aj_estimates_predicted_negatives = pl.DataFrame() -def assign_and_explode(data: pd.DataFrame, fixed_time_horizons) -> pd.DataFrame: - # Ensure list type - if not isinstance(fixed_time_horizons, list): - fixed_time_horizons = [fixed_time_horizons] + for stratification_criteria in stratified_by: + for chosen_cutoff in breaks: + if stratification_criteria == "probability_threshold": + mask_predicted_positives = (pl.col("upper_bound") > chosen_cutoff) & ( + pl.col("stratified_by") == "probability_threshold" + ) + mask_predicted_negatives = (pl.col("upper_bound") <= chosen_cutoff) & ( + pl.col("stratified_by") == "probability_threshold" + ) - # Convert safely to Polars - df = safe_pl_from_pandas(data) + elif stratification_criteria == "ppcr": + mask_predicted_positives = ( + pl.col("lower_bound") > 1 - chosen_cutoff + ) & (pl.col("stratified_by") == "ppcr") + mask_predicted_negatives = ( + pl.col("lower_bound") <= 1 - chosen_cutoff + ) & (pl.col("stratified_by") == "ppcr") - # Add the repeated list to each row, then explode - df = df.with_columns( - pl.Series("fixed_time_horizon", [fixed_time_horizons] * df.height) - ).explode("fixed_time_horizon") + predicted_positives = data_to_adjust.filter(mask_predicted_positives) + predicted_negatives = data_to_adjust.filter(mask_predicted_negatives) - return df.to_pandas() + counts_per_strata_predicted_positives = counts_per_strata.filter( + mask_predicted_positives + ) + counts_per_strata_predicted_negatives = counts_per_strata.filter( + mask_predicted_negatives + ) + event_table_predicted_positives = prepare_event_table(predicted_positives) + event_table_predicted_negatives = prepare_event_table(predicted_negatives) -def assign_and_explode_polars( - data: pl.DataFrame, fixed_time_horizons: list[float] -) -> pl.DataFrame: - return ( - data.with_columns(pl.lit(fixed_time_horizons).alias("fixed_time_horizon")) - .explode("fixed_time_horizon") - .with_columns(pl.col("fixed_time_horizon").cast(pl.Float64)) - ) - + aj_estimate_predicted_positives = ( + ( + predict_aj_estimates( + event_table_predicted_positives, + pl.Series(horizons), + full_event_table, + ) + .with_columns( + pl.lit(chosen_cutoff).alias("chosen_cutoff"), + pl.lit(stratification_criteria) + .alias("stratified_by") + .cast(pl.Enum(["probability_threshold", "ppcr"])), + ) + .join( + counts_per_strata_predicted_positives, + on=["stratified_by"], + how="left", + ) + .with_columns( + [ + ( + pl.col("state_occupancy_probability_0") + * pl.col("strata_count") + ).alias("real_negatives_est"), + ( + pl.col("state_occupancy_probability_1") + * pl.col("strata_count") + ).alias("real_positives_est"), + ( + pl.col("state_occupancy_probability_2") + * pl.col("strata_count") + ).alias("real_competing_est"), + ] + ) + ) + .select( + [ + "strata", + # "stratified_by", + "times", + "chosen_cutoff", + "real_negatives_est", + "real_positives_est", + "real_competing_est", + "estimate_origin", + ] + ) + .with_columns([pl.col("times").alias("fixed_time_horizon")]) + ) -def extract_aj_estimate_by_assumptions_polars( - data_to_adjust: pl.DataFrame, - fixed_time_horizons: list[float], - censoring_assumption="excluded", - competing_assumption="excluded", -) -> pl.DataFrame: - def to_pd(df): - return df.to_pandas() + aj_estimate_predicted_negatives = ( + ( + predict_aj_estimates( + event_table_predicted_negatives, + pl.Series(horizons), + full_event_table, + ) + .with_columns( + pl.lit(chosen_cutoff).alias("chosen_cutoff"), + pl.lit(stratification_criteria) + .alias("stratified_by") + .cast(pl.Enum(["probability_threshold", "ppcr"])), + ) + .join( + counts_per_strata_predicted_negatives, + on=["stratified_by"], + how="left", + ) + .with_columns( + [ + ( + pl.col("state_occupancy_probability_0") + * pl.col("strata_count") + ).alias("real_negatives_est"), + ( + pl.col("state_occupancy_probability_1") + * pl.col("strata_count") + ).alias("real_positives_est"), + ( + pl.col("state_occupancy_probability_2") + * pl.col("strata_count") + ).alias("real_competing_est"), + ] + ) + ) + .select( + [ + "strata", + # "stratified_by", + "times", + "chosen_cutoff", + "real_negatives_est", + "real_positives_est", + "real_competing_est", + "estimate_origin", + ] + ) + .with_columns([pl.col("times").alias("fixed_time_horizon")]) + ) - def to_pl(df): - return pl.from_pandas(df) + aj_estimates_predicted_negatives = pl.concat( + [aj_estimates_predicted_negatives, aj_estimate_predicted_negatives], + how="vertical", + ) - if censoring_assumption == "excluded" and competing_assumption == "excluded": - aj_estimate_data = ( - assign_and_explode_polars(data_to_adjust, fixed_time_horizons) - .pipe(update_administrative_censoring_polars) - .pipe(extract_crude_estimate_polars) - ) + aj_estimates_predicted_positives = pl.concat( + [aj_estimates_predicted_positives, aj_estimate_predicted_positives], + how="vertical", + ) - aj_estimate_data = aj_estimate_data.with_columns( - pl.col("reals_estimate").cast(pl.Float64).alias("reals_estimate") - ) + aj_estimate_by_cutoffs = pl.concat( + [aj_estimates_predicted_negatives, aj_estimates_predicted_positives], + how="vertical", + ) - aj_estimate_data = aj_estimate_data.with_columns( - pl.col("strata").cast(pl.Categorical).alias("strata") - ) + print("aj_estimate_by_cutoffs", aj_estimate_by_cutoffs) - aj_estimate_data = aj_estimate_data.with_columns( - pl.col("fixed_time_horizon").cast(pl.Int64).alias("fixed_time_horizon") - ) + return aj_estimate_by_cutoffs - if censoring_assumption == "adjusted" and competing_assumption == "excluded": - exploded = assign_and_explode_polars(data_to_adjust, fixed_time_horizons) - exploded = update_administrative_censoring_polars(exploded) - # Separate "real_competing" for crude estimation - real_competing_data = exploded.filter( - pl.col("reals_labels") == "real_competing" - ) - non_competing_data = exploded.filter(pl.col("reals_labels") != "real_competing") +def extract_aj_estimate_for_strata(data_to_adjust, horizons, full_event_table: bool): + n = data_to_adjust.height - # Crude estimate for "real_competing" using Polars - aj_estimate_competing = extract_crude_estimate_polars(real_competing_data) + event_table = prepare_event_table(data_to_adjust) - aj_estimate_competing = aj_estimate_competing.with_columns( - pl.col("strata").cast(pl.Categorical).alias("strata") - ) + aj_estimate_for_strata_polars = predict_aj_estimates( + event_table, pl.Series(horizons), full_event_table + ) - aj_estimate_competing = aj_estimate_competing.with_columns( - pl.col("fixed_time_horizon").cast(pl.Int64).alias("fixed_time_horizon") + if len(horizons) == 1: + aj_estimate_for_strata_polars = aj_estimate_for_strata_polars.with_columns( + pl.lit(horizons[0]).alias("fixed_time_horizon") ) - aj_estimate_competing = aj_estimate_competing.with_columns( - pl.col("reals_estimate").cast(pl.Float64).alias("reals_estimate") - ) + else: + fixed_df = aj_estimate_for_strata_polars.filter( + pl.col("estimate_origin") == "fixed_time_horizons" + ).with_columns([pl.col("times").alias("fixed_time_horizon")]) - # Aalen-Johansen estimate for non-competing using Lifelines (pandas) - aj_estimate_adjusted_list = [ - extract_aj_estimate( - to_pd(non_competing_data.filter(pl.col("fixed_time_horizon") == h)), - fixed_time_horizons=[h], + event_df = ( + aj_estimate_for_strata_polars.filter( + pl.col("estimate_origin") == "event_table" ) - for h in fixed_time_horizons - ] - - # Combine results - aj_estimate_adjusted = to_pl( - pd.concat(aj_estimate_adjusted_list, ignore_index=True) + .with_columns([pl.lit(horizons).alias("fixed_time_horizon")]) + .explode("fixed_time_horizon") ) - reals_labels = [ - "real_negatives", - "real_positives", - "real_competing", - "real_censored", - ] - reals_enum = pl.Enum(reals_labels) - - aj_estimate_adjusted = aj_estimate_adjusted.with_columns( - pl.col("reals").cast(reals_enum).alias("reals") - ) + aj_estimate_for_strata_polars = pl.concat( + [fixed_df, event_df], how="vertical" + ).sort("estimate_origin", "fixed_time_horizon", "times") - aj_estimate_data = pl.concat([aj_estimate_competing, aj_estimate_adjusted]) + # print("aj_estimate_for_strata_polars") + # print(aj_estimate_for_strata_polars) - return aj_estimate_data.with_columns( + return aj_estimate_for_strata_polars.with_columns( + [ + (pl.col("state_occupancy_probability_0") * n).alias("real_negatives_est"), + (pl.col("state_occupancy_probability_1") * n).alias("real_positives_est"), + (pl.col("state_occupancy_probability_2") * n).alias("real_competing_est"), + pl.col("fixed_time_horizon").cast(pl.Float64), + pl.lit(data_to_adjust["strata"][0]).alias("strata"), + ] + ).select( [ - pl.lit(censoring_assumption).alias("censoring_assumption"), - pl.lit(competing_assumption).alias("competing_assumption"), + "strata", + "times", + "fixed_time_horizon", + "real_negatives_est", + "real_positives_est", + "real_competing_est", + pl.col("estimate_origin"), ] ) -def create_list_data_to_adjust_polars( - probs_dict, reals_dict, times_dict, stratified_by, by +def assign_and_explode_polars( + data: pl.DataFrame, fixed_time_horizons: list[float] +) -> pl.DataFrame: + return ( + data.with_columns(pl.lit(fixed_time_horizons).alias("fixed_time_horizon")) + .explode("fixed_time_horizon") + .with_columns(pl.col("fixed_time_horizon").cast(pl.Float64)) + ) + + +def create_list_data_to_adjust( + aj_data_combinations: pl.DataFrame, + probs_dict: Dict[str, np.ndarray], + reals_dict: Union[np.ndarray, Dict[str, np.ndarray]], + times_dict: Union[np.ndarray, Dict[str, np.ndarray]], + stratified_by, + by, ): # reference_groups = list(probs_dict.keys()) reference_group_labels = list(probs_dict.keys()) @@ -1091,24 +821,42 @@ def create_list_data_to_adjust_polars( reference_group_enum = pl.Enum(reference_group_labels) + strata_enum_dtype = aj_data_combinations.schema["strata"] + # Flatten and ensure list format data_to_adjust = pl.DataFrame( { - "reference_group": sum( - [[group] * num_reals for group in reference_group_labels], [] - ), - "probs": sum( - [probs_dict[group].tolist() for group in reference_group_labels], [] + "reference_group": np.repeat(reference_group_labels, num_reals), + "probs": np.concatenate( + [probs_dict[group] for group in reference_group_labels] ), - "reals": list(reals_dict) * len(reference_group_labels), - "times": list(times_dict) * len(reference_group_labels), + "reals": np.tile(np.asarray(reals_dict), len(reference_group_labels)), + "times": np.tile(np.asarray(times_dict), len(reference_group_labels)), } ).with_columns(pl.col("reference_group").cast(reference_group_enum)) # Apply strata - data_to_adjust = add_cutoff_strata_polars(data_to_adjust, by=by) + data_to_adjust = add_cutoff_strata( + data_to_adjust, by=by, stratified_by=stratified_by + ) + data_to_adjust = pivot_longer_strata(data_to_adjust) + data_to_adjust = ( + data_to_adjust.with_columns([pl.col("strata")]) + .with_columns(pl.col("strata").cast(strata_enum_dtype)) + .join( + aj_data_combinations.select( + pl.col("strata"), + pl.col("stratified_by"), + pl.col("upper_bound"), + pl.col("lower_bound"), + ).unique(), + how="left", + on=["strata", "stratified_by"], + ) + ) + reals_labels = [ "real_negatives", "real_positives", @@ -1137,23 +885,6 @@ def create_list_data_to_adjust_polars( return list_data_to_adjust -def safe_pl_from_pandas(df: pd.DataFrame) -> pl.DataFrame: - df = df.copy() - for col in df.select_dtypes(include="category").columns: - df[col] = df[col].astype(str) - for col in df.columns: - if df[col].dtype == "object": - try: - if any( - isinstance(val, pd._libs.interval.Interval) - for val in df[col].dropna() - ): - df[col] = df[col].astype(str) - except Exception: - df[col] = df[col].astype(str) - return pl.from_pandas(df) - - def ensure_no_categorical(df: pd.DataFrame) -> pd.DataFrame: df = df.copy() for col in df.select_dtypes(include="category").columns: @@ -1161,98 +892,105 @@ def ensure_no_categorical(df: pd.DataFrame) -> pd.DataFrame: return df -def ensure_arrow_safe(df: pd.DataFrame) -> pd.DataFrame: - df = df.copy() - - # Convert all category columns to string - for col in df.select_dtypes(include="category").columns: - df[col] = df[col].astype(str) - - # Convert Interval and other Arrow-unsafe objects to string - for col in df.columns: - if df[col].dtype == "object": - try: - # Try to catch Interval or any other problematic type - if any( - isinstance(val, pd._libs.interval.Interval) - for val in df[col].dropna() - ): - df[col] = df[col].astype(str) - except Exception: - df[col] = df[col].astype( - str - ) # fallback: convert whole column to string - - return df - - -def extract_aj_estimate_by_assumptions( +def extract_aj_estimate_by_heuristics( df: pl.DataFrame, - assumption_sets: list[dict], - fixed_time_horizons: pl.Series, + breaks: Sequence[float], + heuristics_sets: list[dict], + fixed_time_horizons: list[float], + stratified_by: Sequence[str], + risk_set_scope: str = "within_stratum", ) -> pl.DataFrame: aj_dfs = [] - for assumption in assumption_sets: - censoring = assumption["censoring_assumption"] - competing = assumption["competing_assumption"] + print("stratified_by", stratified_by) + + for heuristic in heuristics_sets: + censoring = heuristic["censoring_heuristic"] + competing = heuristic["competing_heuristic"] + + print("stratified_by", stratified_by) + + print("df before create_aj_data") + print(df.columns) + print(df.schema) aj_df = create_aj_data( - df, censoring, competing, fixed_time_horizons + df, + breaks, + censoring, + competing, + fixed_time_horizons, + stratified_by=stratified_by, + full_event_table=False, + risk_set_scope=risk_set_scope, ).with_columns( [ - pl.lit(censoring).alias("censoring_assumption"), - pl.lit(competing).alias("competing_assumption"), + pl.lit(censoring).alias("censoring_heuristic"), + pl.lit(competing).alias("competing_heuristic"), ] ) - print( - f"Assumption: censoring={censoring}, competing={competing}, rows={aj_df.height}" - ) + aj_dfs.append(aj_df) - aj_estimates_data = pl.concat(aj_dfs) + # print("aj_dfs", aj_dfs) + + aj_estimates_data = pl.concat(aj_dfs).drop(["estimate_origin", "times"]) + + print("aj_estimates_data", aj_estimates_data) aj_estimates_unpivoted = aj_estimates_data.unpivot( index=[ "strata", + "chosen_cutoff", "fixed_time_horizon", - "censoring_assumption", - "competing_assumption", + "censoring_heuristic", + "competing_heuristic", + "risk_set_scope", ], variable_name="reals_labels", value_name="reals_estimate", ) + print("aj_estimates_unpivoted", aj_estimates_unpivoted) + return aj_estimates_unpivoted def create_adjusted_data( list_data_to_adjust_polars: dict[str, pl.DataFrame], - assumption_sets: list[dict[str, str]], + heuristics_sets: list[dict[str, str]], fixed_time_horizons: list[float], + breaks: Sequence[float], + stratified_by: Sequence[str], + risk_set_scope: str = "within_stratum", ) -> pl.DataFrame: all_results = [] reference_groups = list(list_data_to_adjust_polars.keys()) reference_group_enum = pl.Enum(reference_groups) - censoring_assumption_labels = ["excluded", "adjusted"] - censoring_assumption_enum = pl.Enum(censoring_assumption_labels) - - competing_assumption_labels = [ - "excluded", - "adjusted_as_negative", - "adjusted_as_censored", - ] - competing_assumption_enum = pl.Enum(competing_assumption_labels) + heuristics_df = pl.DataFrame(heuristics_sets) + censoring_heuristic_enum = pl.Enum( + heuristics_df["censoring_heuristic"].unique(maintain_order=True) + ) + competing_heuristic_enum = pl.Enum( + heuristics_df["competing_heuristic"].unique(maintain_order=True) + ) for reference_group, df in list_data_to_adjust_polars.items(): - input_df = df.select(["strata", "reals", "times"]) + input_df = df.select( + ["strata", "reals", "times", "upper_bound", "lower_bound", "stratified_by"] + ) + + print("stratified_by", stratified_by) - aj_result = extract_aj_estimate_by_assumptions( + aj_result = extract_aj_estimate_by_heuristics( input_df, - assumption_sets=assumption_sets, + breaks, + heuristics_sets=heuristics_sets, fixed_time_horizons=fixed_time_horizons, + stratified_by=stratified_by, + risk_set_scope=risk_set_scope, ) aj_result_with_group = aj_result.with_columns( @@ -1265,6 +1003,8 @@ def create_adjusted_data( all_results.append(aj_result_with_group) + print("all_results", all_results) + reals_enum_dtype = pl.Enum( [ "real_negatives", @@ -1280,8 +1020,8 @@ def create_adjusted_data( .with_columns( [ pl.col("reals_labels").str.replace(r"_est$", "").cast(reals_enum_dtype), - pl.col("censoring_assumption").cast(censoring_assumption_enum), - pl.col("competing_assumption").cast(competing_assumption_enum), + pl.col("censoring_heuristic").cast(censoring_heuristic_enum), + pl.col("competing_heuristic").cast(competing_heuristic_enum), ] ) ) @@ -1294,18 +1034,440 @@ def cast_and_join_adjusted_data(aj_data_combinations, aj_estimates_data): pl.col("strata").cast(strata_enum_dtype) ) - final_adjusted_data_polars = aj_data_combinations.with_columns( - [pl.col("strata")] - ).join( - aj_estimates_data, - on=[ - "strata", - "fixed_time_horizon", - "censoring_assumption", - "competing_assumption", - "reals_labels", - "reference_group", - ], - how="left", + final_adjusted_data_polars = ( + aj_data_combinations.with_columns([pl.col("strata")]) + .join( + aj_estimates_data, + on=[ + "strata", + "fixed_time_horizon", + "censoring_heuristic", + "competing_heuristic", + "reals_labels", + "reference_group", + "chosen_cutoff", + "risk_set_scope", + ], + how="left", + ) + .with_columns( + pl.when( + ( + (pl.col("chosen_cutoff") >= pl.col("upper_bound")) + & (pl.col("stratified_by") == "probability_threshold") + ) + | ( + ((1 - pl.col("chosen_cutoff")) >= pl.col("mid_point")) + & (pl.col("stratified_by") == "ppcr") + ) + ) + .then(pl.lit("predicted_negatives")) + .otherwise(pl.lit("predicted_positives")) + .cast(pl.Enum(["predicted_negatives", "predicted_positives"])) + .alias("prediction_label") + ) + .with_columns( + ( + pl.when( + (pl.col("prediction_label") == pl.lit("predicted_positives")) + & (pl.col("reals_labels") == pl.lit("real_positives")) + ) + .then(pl.lit("true_positives")) + .when( + (pl.col("prediction_label") == pl.lit("predicted_positives")) + & (pl.col("reals_labels") == pl.lit("real_negatives")) + ) + .then(pl.lit("false_positives")) + .when( + (pl.col("prediction_label") == pl.lit("predicted_negatives")) + & (pl.col("reals_labels") == pl.lit("real_negatives")) + ) + .then(pl.lit("true_negatives")) + .when( + (pl.col("prediction_label") == pl.lit("predicted_negatives")) + & (pl.col("reals_labels") == pl.lit("real_positives")) + ) + .then(pl.lit("false_negatives")) + .when( + (pl.col("prediction_label") == pl.lit("predicted_negatives")) + & (pl.col("reals_labels") == pl.lit("real_competing")) + & (pl.col("competing_heuristic") == pl.lit("adjusted_as_negative")) + ) + .then(pl.lit("true_negatives")) + .when( + (pl.col("prediction_label") == pl.lit("predicted_positives")) + & (pl.col("reals_labels") == pl.lit("real_competing")) + & (pl.col("competing_heuristic") == pl.lit("adjusted_as_negative")) + ) + .then(pl.lit("false_positives")) + .otherwise(pl.lit("excluded")) # or pl.lit(None) if you prefer nulls + .cast( + pl.Enum( + [ + "true_positives", + "false_positives", + "true_negatives", + "false_negatives", + "excluded", + ] + ) + ) + ).alias("classification_outcome") + ) ) return final_adjusted_data_polars + + +def _censored_count(df: pl.DataFrame) -> pl.DataFrame: + return ( + df.with_columns( + ((pl.col("times") <= pl.col("fixed_time_horizon")) & (pl.col("reals") == 0)) + .cast(pl.Float64) + .alias("is_censored") + ) + .group_by(["strata", "fixed_time_horizon"]) + .agg(pl.col("is_censored").sum().alias("real_censored_est")) + ) + + +def _competing_count(df: pl.DataFrame) -> pl.DataFrame: + return ( + df.with_columns( + ((pl.col("times") <= pl.col("fixed_time_horizon")) & (pl.col("reals") == 2)) + .cast(pl.Float64) + .alias("is_competing") + ) + .group_by(["strata", "fixed_time_horizon"]) + .agg(pl.col("is_competing").sum().alias("real_competing_est")) + ) + + +def _aj_estimates_by_cutoff_per_horizon( + df: pl.DataFrame, + horizons: list[float], + breaks: Sequence[float], + stratified_by: Sequence[str], +) -> pl.DataFrame: + return pl.concat( + [ + df.filter(pl.col("fixed_time_horizon") == h) + .group_by("strata") + .map_groups( + lambda group: extract_aj_estimate_by_cutoffs( + group, [h], breaks, stratified_by, full_event_table=False + ) + ) + for h in horizons + ], + how="vertical", + ) + + +def _aj_estimates_per_horizon( + df: pl.DataFrame, horizons: list[float], full_event_table: bool +) -> pl.DataFrame: + return pl.concat( + [ + df.filter(pl.col("fixed_time_horizon") == h) + .group_by("strata") + .map_groups( + lambda group: extract_aj_estimate_for_strata( + group, [h], full_event_table + ) + ) + for h in horizons + ], + how="vertical", + ) + + +def _aj_adjusted_events( + reference_group_data: pl.DataFrame, + breaks: Sequence[float], + exploded: pl.DataFrame, + censoring: str, + competing: str, + horizons: list[float], + stratified_by: Sequence[str], + full_event_table: bool = False, + risk_set_scope: str = "within_stratum", +) -> pl.DataFrame: + print("reference_group_data") + print(reference_group_data) + + strata_enum_dtype = reference_group_data.schema["strata"] + + # Special-case: adjusted censoring + competing adjusted_as_negative supports pooled_by_cutoff + if censoring == "adjusted" and competing == "adjusted_as_negative": + if risk_set_scope == "within_stratum": + print("reference_group_data", reference_group_data) + + adjusted = ( + reference_group_data.group_by("strata") + .map_groups( + lambda group: extract_aj_estimate_for_strata( + group, horizons, full_event_table + ) + ) + .join(pl.DataFrame({"chosen_cutoff": breaks}), how="cross") + ) + # preserve the original enum dtype for 'strata' coming from reference_group_data + + adjusted = adjusted.with_columns( + [ + pl.col("strata").cast(strata_enum_dtype), + pl.lit(risk_set_scope) + .cast(pl.Enum(["within_stratum", "pooled_by_cutoff"])) + .alias("risk_set_scope"), + ] + ) + + return adjusted + + elif risk_set_scope == "pooled_by_cutoff": + print("reference_group_data", reference_group_data) + + adjusted = extract_aj_estimate_by_cutoffs( + reference_group_data, horizons, breaks, stratified_by, full_event_table + ) + adjusted = adjusted.with_columns( + pl.lit(risk_set_scope) + .cast(pl.Enum(["within_stratum", "pooled_by_cutoff"])) + .alias("risk_set_scope") + ) + return adjusted + + # Special-case: both excluded (faster branch in original) + if censoring == "excluded" and competing == "excluded": + non_censored_non_competing = exploded.filter( + (pl.col("times") > pl.col("fixed_time_horizon")) | (pl.col("reals") == 1) + ) + + adjusted = _aj_estimates_per_horizon( + non_censored_non_competing, horizons, full_event_table + ) + + adjusted = adjusted.with_columns( + [ + pl.col("strata").cast(strata_enum_dtype), + pl.lit(risk_set_scope) + .cast(pl.Enum(["within_stratum", "pooled_by_cutoff"])) + .alias("risk_set_scope"), + ] + ).join(pl.DataFrame({"chosen_cutoff": breaks}), how="cross") + + return adjusted + + # Special-case: competing excluded (handled by filtering out competing events) + if competing == "excluded": + print("running for censoring adjusted and competing excluded") + + # Use exploded to apply filters that depend on fixed_time_horizon consistently + non_competing = exploded.filter( + (pl.col("times") > pl.col("fixed_time_horizon")) | (pl.col("reals") != 2) + ).with_columns( + pl.when(pl.col("reals") == 2) + .then(pl.lit(0)) + .otherwise(pl.col("reals")) + .alias("reals") + ) + + print("non_competing data", non_competing) + + if risk_set_scope == "within_stratum": + adjusted = ( + _aj_estimates_per_horizon(non_competing, horizons, full_event_table) + # .select(pl.exclude("real_competing_est")) + .join(pl.DataFrame({"chosen_cutoff": breaks}), how="cross") + ) + + elif risk_set_scope == "pooled_by_cutoff": + adjusted = extract_aj_estimate_by_cutoffs( + non_competing, horizons, breaks, stratified_by, full_event_table + ) + + print("adjusted after join cutoffs", adjusted) + + adjusted = adjusted.with_columns( + [ + pl.col("strata").cast(strata_enum_dtype), + pl.lit(risk_set_scope) + .cast(pl.Enum(["within_stratum", "pooled_by_cutoff"])) + .alias("risk_set_scope"), + ] + ) + return adjusted + + # For remaining cases, determine base dataframe depending on censoring rule: + # - "adjusted": use the full reference_group_data (events censored at horizon are kept/adjusted) + # - "excluded": remove administratively censored observations (use exploded with filter) + base_df = ( + exploded.filter( + (pl.col("times") > pl.col("fixed_time_horizon")) | (pl.col("reals") > 0) + ) + if censoring == "excluded" + else reference_group_data + ) + + # Apply competing-event transformation if required + if competing == "adjusted_as_censored": + base_df = base_df.with_columns( + pl.when(pl.col("reals") == 2) + .then(pl.lit(0)) + .otherwise(pl.col("reals")) + .alias("reals") + ) + elif competing == "adjusted_as_composite": + base_df = base_df.with_columns( + pl.when(pl.col("reals") == 2) + .then(pl.lit(1)) + .otherwise(pl.col("reals")) + .alias("reals") + ) + # competing == "adjusted_as_negative": keep reals as-is (no transform) + + # Finally choose aggregation strategy: per-stratum or horizon-wise + if censoring == "excluded": + # For excluded censoring we always evaluate per-horizon on the filtered (exploded) dataset + + if risk_set_scope == "within_stratum": + adjusted = _aj_estimates_per_horizon(base_df, horizons, full_event_table) + + adjusted = adjusted.join( + pl.DataFrame({"chosen_cutoff": breaks}), how="cross" + ) + + print("adjusted after join", adjusted) + + elif risk_set_scope == "pooled_by_cutoff": + adjusted = _aj_estimates_by_cutoff_per_horizon( + base_df, horizons, breaks, stratified_by + ) + + adjusted = adjusted.with_columns( + pl.lit(risk_set_scope) + .cast(pl.Enum(["within_stratum", "pooled_by_cutoff"])) + .alias("risk_set_scope") + ) + + return adjusted.with_columns(pl.col("strata").cast(strata_enum_dtype)) + else: + # For adjusted censoring we aggregate within strata + + if risk_set_scope == "within_stratum": + adjusted = ( + base_df.group_by("strata") + .map_groups( + lambda group: extract_aj_estimate_for_strata( + group, horizons, full_event_table + ) + ) + .join(pl.DataFrame({"chosen_cutoff": breaks}), how="cross") + ) + + elif risk_set_scope == "pooled_by_cutoff": + adjusted = extract_aj_estimate_by_cutoffs( + base_df, horizons, breaks, stratified_by, full_event_table + ) + + adjusted = adjusted.with_columns( + [ + pl.col("strata").cast(strata_enum_dtype), + pl.lit(risk_set_scope) + .cast(pl.Enum(["within_stratum", "pooled_by_cutoff"])) + .alias("risk_set_scope"), + ] + ) + + return adjusted + + +def _calculate_cumulative_aj_data(aj_data: pl.DataFrame) -> pl.DataFrame: + cumulative_aj_data = ( + aj_data.filter(pl.col("risk_set_scope") == "pooled_by_cutoff") + .group_by( + [ + "reference_group", + "fixed_time_horizon", + "censoring_heuristic", + "competing_heuristic", + "stratified_by", + "chosen_cutoff", + "classification_outcome", + ] + ) + .agg([pl.col("reals_estimate").sum()]) + .pivot(on="classification_outcome", values="reals_estimate") + .with_columns( + (pl.col("true_positives") + pl.col("false_positives")).alias( + "predicted_positives" + ), + (pl.col("true_negatives") + pl.col("false_negatives")).alias( + "predicted_negatives" + ), + (pl.col("true_positives") + pl.col("false_negatives")).alias( + "real_positives" + ), + (pl.col("false_positives") + pl.col("true_negatives")).alias( + "real_negatives" + ), + ( + pl.col("true_positives") + + pl.col("true_negatives") + + pl.col("false_positives") + + pl.col("false_negatives") + ).alias("n"), + ) + .with_columns( + (pl.col("true_positives") + pl.col("false_positives")).alias( + "predicted_positives" + ), + (pl.col("true_negatives") + pl.col("false_negatives")).alias( + "predicted_negatives" + ), + (pl.col("true_positives") + pl.col("false_negatives")).alias( + "real_positives" + ), + (pl.col("false_positives") + pl.col("true_negatives")).alias( + "real_negatives" + ), + ( + pl.col("true_positives") + + pl.col("true_negatives") + + pl.col("false_positives") + + pl.col("false_negatives") + ).alias("n"), + ) + ) + + return cumulative_aj_data + + +def _turn_cumulative_aj_to_performance_data( + cumulative_aj_data: pl.DataFrame, +) -> pl.DataFrame: + performance_data = cumulative_aj_data.with_columns( + (pl.col("true_positives") / pl.col("real_positives")).alias("sensitivity"), + (pl.col("true_negatives") / pl.col("real_negatives")).alias("specificity"), + (pl.col("true_positives") / pl.col("predicted_positives")).alias("ppv"), + (pl.col("true_negatives") / pl.col("predicted_negatives")).alias("npv"), + ( + (pl.col("true_positives") / pl.col("real_positives")) + / (pl.col("real_positives") / pl.col("n")) + ).alias("lift"), + pl.when(pl.col("stratified_by") == "probability_threshold") + .then( + (pl.col("true_positives") / pl.col("n")) + - (pl.col("false_positives") / pl.col("n")) + * pl.col("chosen_cutoff") + / (1 - pl.col("chosen_cutoff")) + ) + .otherwise(None) + .alias("net_benefit"), + pl.when(pl.col("stratified_by") == "probability_threshold") + .then(pl.col("predicted_positives") / pl.col("n")) + .otherwise(pl.col("chosen_cutoff")) + .alias("ppcr"), + ) + + return performance_data diff --git a/src/rtichoke/summary_report/summary_report.py b/src/rtichoke/summary_report/summary_report.py index 8e20dc8..9fc6fd3 100644 --- a/src/rtichoke/summary_report/summary_report.py +++ b/src/rtichoke/summary_report/summary_report.py @@ -4,7 +4,7 @@ from rtichoke.helpers.send_post_request_to_r_rtichoke import send_requests_to_rtichoke_r from rtichoke.helpers.sandbox_observable_helpers import ( - create_list_data_to_adjust_polars, + create_list_data_to_adjust, ) import subprocess @@ -67,7 +67,7 @@ def create_data_for_summary_report(probs, reals, times, fixed_time_horizons): stratified_by = ["probability_threshold", "ppcr"] by = 0.1 - list_data_to_adjust_polars = create_list_data_to_adjust_polars( + list_data_to_adjust_polars = create_list_data_to_adjust( probs, reals, times, stratified_by=stratified_by, by=by ) diff --git a/src/rtichoke/utility/decision.py b/src/rtichoke/utility/decision.py index 4de7d0e..62436e8 100644 --- a/src/rtichoke/utility/decision.py +++ b/src/rtichoke/utility/decision.py @@ -18,7 +18,28 @@ def create_decision_curve( by: float = 0.01, stratified_by: str = "probability_threshold", size: Optional[int] = None, - color_values: List[str] = None, + color_values: List[str] = [ + "#1b9e77", + "#d95f02", + "#7570b3", + "#e7298a", + "#07004D", + "#E6AB02", + "#FE5F55", + "#54494B", + "#006E90", + "#BC96E6", + "#52050A", + "#1F271B", + "#BE7C4D", + "#63768D", + "#08A045", + "#320A28", + "#82FF9E", + "#2176FF", + "#D1603D", + "#585123", + ], url_api: str = "http://localhost:4242/", ) -> Figure: """Create Decision Curve @@ -64,7 +85,28 @@ def plot_decision_curve( min_p_threshold: int = 0, max_p_threshold: int = 1, size: Optional[int] = None, - color_values: List[str] = None, + color_values: List[str] = [ + "#1b9e77", + "#d95f02", + "#7570b3", + "#e7298a", + "#07004D", + "#E6AB02", + "#FE5F55", + "#54494B", + "#006E90", + "#BC96E6", + "#52050A", + "#1F271B", + "#BE7C4D", + "#63768D", + "#08A045", + "#320A28", + "#82FF9E", + "#2176FF", + "#D1603D", + "#585123", + ], url_api: str = "http://localhost:4242/", ) -> Figure: """Plot Decision Curve diff --git a/tests/test_rtichoke.py b/tests/test_rtichoke.py index 2822728..0ff1b91 100644 --- a/tests/test_rtichoke.py +++ b/tests/test_rtichoke.py @@ -3,7 +3,6 @@ """ from rtichoke.helpers.sandbox_observable_helpers import ( - create_aj_data, extract_aj_estimate_for_strata, ) @@ -11,43 +10,38 @@ import polars as pl from polars.testing import assert_frame_equal +TIMES = [24.1, 9.7, 49.9, 18.6, 34.8, 14.2, 39.2, 46.0, 4.3, 31.5] +REALS = [1, 1, 1, 1, 0, 2, 1, 2, 0, 1] +TIME_HORIZONS = [10.0, 30.0, 50.0] +BREAKS: list[float] = [0.0, 0.5, 1.0] -def test_create_aj_data() -> None: - df = pl.DataFrame( - { - "strata": ["group1"] * 5, - "reals": [0, 1, 2, 1, 0], - "times": [5.0, 3.0, 1.0, 4.0, 2.0], - } - ) - horizons = [1.0, 2.0, 3.0] - - result = create_aj_data( - df, - censoring_assumption="adjusted", - competing_assumption="adjusted_as_negative", - fixed_time_horizons=horizons, - ).sort("fixed_time_horizon") - expected = pl.DataFrame( +def _expected( + negatives: list[float], + positives: list[float], + competing: list[float], + censored: list[float], + censoring: str, + competing_assump: str, +) -> pl.DataFrame: + estimate_origin_enum = pl.Enum(["fixed_time_horizons", "event_table"]) + return pl.DataFrame( { "strata": ["group1", "group1", "group1"], "fixed_time_horizon": [1.0, 2.0, 3.0], - "real_negatives_est": [4.0, 4.0, 8 / 3], - "real_positives_est": [0.0, 0.0, 4 / 3], - "real_competing_est": [1.0, 1.0, 1.0], - "real_censored_est": [0.0, 0.0, 0.0], - "censoring_assumption": ["adjusted", "adjusted", "adjusted"], - "competing_assumption": [ - "adjusted_as_negative", - "adjusted_as_negative", - "adjusted_as_negative", - ], + "times": [1.0, 2.0, 3.0], + "real_negatives_est": negatives, + "real_positives_est": positives, + "real_competing_est": competing, + "real_censored_est": censored, + "censoring_assumption": [censoring] * 3, + "competing_assumption": [competing_assump] * 3, + "estimate_origin": pl.Series( + ["fixed_time_horizons"] * 3, dtype=estimate_origin_enum + ), } ) - assert_frame_equal(result, expected) - def test_extract_aj_estimate_for_strata_basic() -> None: df = pl.DataFrame( @@ -58,17 +52,109 @@ def test_extract_aj_estimate_for_strata_basic() -> None: } ) horizons = [1.0, 2.0, 3.0] - - result = extract_aj_estimate_for_strata(df, horizons).sort("fixed_time_horizon") + estimate_origin_enum = pl.Enum(["fixed_time_horizons", "event_table"]) + result = extract_aj_estimate_for_strata(df, horizons, full_event_table=False).sort( + "fixed_time_horizon" + ) expected = pl.DataFrame( { "strata": ["group1", "group1", "group1"], + "times": [1.0, 2.0, 3.0], "fixed_time_horizon": [1.0, 2.0, 3.0], "real_negatives_est": [4.0, 4.0, 8 / 3], "real_positives_est": [0.0, 0.0, 4 / 3], "real_competing_est": [1.0, 1.0, 1.0], + "estimate_origin": pl.Series( + ["fixed_time_horizons"] * 3, dtype=estimate_origin_enum + ), } ) assert_frame_equal(result, expected) + + +AJ_EXPECTED = { + ("adjusted", "adjusted_as_negative"): [ + (8.88888888888889, 1.1111111111111112, 0.0), + (5.555555555555555, 3.3333333333333335, 1.1111111111111112), + (0.0, 7.407407407407407, 2.5925925925925926), + ], + ("adjusted", "adjusted_as_censored"): [ + (8.88888888888889, 1.1111111111111112, 0.0), + (6.349206349206349, 3.6507936507936507, 0.0), + (0.0, 10.0, 0.0), + ], + ("adjusted", "adjusted_as_composite"): [ + (8.88888888888889, 1.1111111111111112, 0.0), + (5.555555555555555, 4.444444444444445, 0.0), + (0.0, 10.0, 0.0), + ], + ("adjusted", "excluded"): [ + (8.88888888888889, 1.1111111111111112, 0.0), + (5.625, 3.375, 0.0), + (0.0, 8.0, 0.0), + ], + ("excluded", "adjusted_as_negative"): [ + (8.0, 1.0, 0.0), + (5.0, 3.0, 1.0), + (0.0, 6.0, 2.0), + ], + ("excluded", "adjusted_as_censored"): [ + (8.0, 1.0, 0.0), + (5.714285714285714, 3.2857142857142856, 0.0), + (0.0, 8.0, 0.0), + ], + ("excluded", "adjusted_as_composite"): [ + (8.0, 1.0, 0.0), + (5.0, 4.0, 0.0), + (0.0, 8.0, 0.0), + ], + ("excluded", "excluded"): [ + (8.0, 1.0, 0.0), + (5.0, 3.0, 0.0), + (0.0, 6.0, 0.0), + ], +} + +EXCLUDED_EXPECTED = { + "adjusted": [0.0, 0.0, 0.0], + "excluded": [1.0, 1.0, 2.0], +} + +COMPETING_EXCLUDED = { + "excluded": [0.0, 1.0, 2.0], + "adjusted_as_negative": [0.0, 0.0, 0.0], + "adjusted_as_censored": [0.0, 0.0, 0.0], + "adjusted_as_composite": [0.0, 0.0, 0.0], +} + + +def _expected_aj_df(neg, pos, comp, include_comp=True): + estimate_origin_enum = pl.Enum(["fixed_time_horizons", "event_table"]) + + data = { + "strata": ["group1"] * 3, + "times": TIME_HORIZONS, + "fixed_time_horizon": TIME_HORIZONS, + "real_negatives_est": [neg[0], neg[1], neg[2]], + "real_positives_est": [pos[0], pos[1], pos[2]], + "estimate_origin": pl.Series( + ["fixed_time_horizons"] * 3, dtype=estimate_origin_enum + ), + } + if include_comp: + data["real_competing_est"] = [comp[0], comp[1], comp[2]] + + cols = [ + "strata", + "times", + "fixed_time_horizon", + "real_negatives_est", + "real_positives_est", + ] + if include_comp: + cols.append("real_competing_est") + cols.append("estimate_origin") + + return pl.DataFrame(data)[cols] diff --git a/uv.lock b/uv.lock index a8f5377..33a00b3 100644 --- a/uv.lock +++ b/uv.lock @@ -848,6 +848,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e7/05/c19819d5e3d95294a6f5947fb9b9629efb316b96de511b418c53d245aae6/cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30", size = 8321, upload-time = "2023-10-07T05:32:16.783Z" }, ] +[[package]] +name = "dcurves" +version = "1.1.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "lifelines" }, + { name = "matplotlib", version = "3.9.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, + { name = "matplotlib", version = "3.10.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "pandas" }, + { name = "scipy", version = "1.13.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, + { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, + { name = "scipy", version = "1.16.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "setuptools" }, + { name = "statsmodels" }, + { name = "typing" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/49/1d/1f1512680c305d0effabec5733676e1f4d2a8f2db246969de69d31c1007e/dcurves-1.1.5.tar.gz", hash = "sha256:08de2dd9a5c3e8917e1a6d483785b9e01c6c7ae2b5afdc08c328a7cd16cc4c12", size = 4903731, upload-time = "2025-07-07T16:46:38.73Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4e/2a/9d71fd5b8132067e442e9c5e7184d2db58987e076401254fa06261f85c85/dcurves-1.1.5-py3-none-any.whl", hash = "sha256:168689c2056471f43503803ef8177bf0e196f498d283e75f061f86c567a9c89b", size = 94024, upload-time = "2025-07-07T16:46:36.837Z" }, +] + [[package]] name = "debugpy" version = "1.8.14" @@ -1476,6 +1497,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" }, ] +[[package]] +name = "joblib" +version = "1.5.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/dc/fe/0f5a938c54105553436dbff7a61dc4fed4b1b2c98852f8833beaf4d5968f/joblib-1.5.1.tar.gz", hash = "sha256:f4f86e351f39fe3d0d32a9f2c3d8af1ee4cec285aafcb27003dda5205576b444", size = 330475, upload-time = "2025-05-23T12:04:37.097Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7d/4f/1195bbac8e0c2acc5f740661631d8d750dc38d4a32b23ee5df3cde6f4e0d/joblib-1.5.1-py3-none-any.whl", hash = "sha256:4719a31f054c7d766948dcd83e9613686b27114f190f717cec7eaa2084f8a74a", size = 307746, upload-time = "2025-05-23T12:04:35.124Z" }, +] + [[package]] name = "json5" version = "0.12.0" @@ -1955,79 +1985,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3a/1d/50ad811d1c5dae091e4cf046beba925bcae0a610e79ae4c538f996f63ed5/kiwisolver-1.4.8-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:65ea09a5a3faadd59c2ce96dc7bf0f364986a315949dc6374f04396b0d60e09b", size = 71762, upload-time = "2024-12-24T18:30:48.903Z" }, ] -[[package]] -name = "libcst" -version = "1.8.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pyyaml", marker = "python_full_version < '3.13'" }, - { name = "pyyaml-ft", marker = "python_full_version >= '3.13'" }, - { name = "typing-extensions", marker = "python_full_version < '3.10'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/89/aa/b52d195b167958fe1bd106a260f64cc80ec384f6ac2a9cda874d8803df06/libcst-1.8.2.tar.gz", hash = "sha256:66e82cedba95a6176194a817be4232c720312f8be6d2c8f3847f3317d95a0c7f", size = 881534, upload-time = "2025-06-13T20:56:37.915Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3c/2e/1d7f67d2ef6f875e9e8798c024f7cb3af3fe861e417bff485c69b655ac96/libcst-1.8.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:67d9720d91f507c87b3e5f070627ad640a00bc6cfdf5635f8c6ee9f2964cf71c", size = 2195106, upload-time = "2025-06-13T20:54:49.166Z" }, - { url = "https://files.pythonhosted.org/packages/82/d0/3d94fee2685f263fd8d85a83e2537fcc78b644eae450738bf2c72604f0df/libcst-1.8.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:94b7c032b72566077614a02baab1929739fd0af0cc1d46deaba4408b870faef2", size = 2080577, upload-time = "2025-06-13T20:54:51.518Z" }, - { url = "https://files.pythonhosted.org/packages/14/87/c9b49bebb9a930fdcb59bf841f1c45719d2a4a39c3eb7efacfd30a2bfb0a/libcst-1.8.2-cp310-cp310-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:11ea148902e3e1688afa392087c728ac3a843e54a87d334d1464d2097d3debb7", size = 2404076, upload-time = "2025-06-13T20:54:53.303Z" }, - { url = "https://files.pythonhosted.org/packages/49/fa/9ca145aa9033f9a8362a5663ceb28dfb67082574de8118424b6b8e445e7a/libcst-1.8.2-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:22c9473a2cc53faabcc95a0ac6ca4e52d127017bf34ba9bc0f8e472e44f7b38e", size = 2219813, upload-time = "2025-06-13T20:54:55.351Z" }, - { url = "https://files.pythonhosted.org/packages/0c/25/496a025c09e96116437a57fd34abefe84c041d930f832c6e42d84d9e028c/libcst-1.8.2-cp310-cp310-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:b5269b96367e65793a7714608f6d906418eb056d59eaac9bba980486aabddbed", size = 2189782, upload-time = "2025-06-13T20:54:57.013Z" }, - { url = "https://files.pythonhosted.org/packages/b3/75/826b5772192826d70480efe93bab3e4f0b4a24d31031f45547257ad5f9a8/libcst-1.8.2-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:d20e932ddd9a389da57b060c26e84a24118c96ff6fc5dcc7b784da24e823b694", size = 2312403, upload-time = "2025-06-13T20:54:58.996Z" }, - { url = "https://files.pythonhosted.org/packages/93/f4/316fa14ea6c61ea8755672d60e012558f0216300b3819e72bebc7864a507/libcst-1.8.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a553d452004e44b841788f6faa7231a02157527ddecc89dbbe5b689b74822226", size = 2280566, upload-time = "2025-06-13T20:55:00.707Z" }, - { url = "https://files.pythonhosted.org/packages/fc/52/74b69350db379b1646739288b88ffab2981b2ad48407faf03df3768d7d2f/libcst-1.8.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7fe762c4c390039b79b818cbc725d8663586b25351dc18a2704b0e357d69b924", size = 2388508, upload-time = "2025-06-13T20:55:02.769Z" }, - { url = "https://files.pythonhosted.org/packages/bc/c6/fa92699b537ed65e93c2869144e23bdf156ec81ae7b84b4f34cbc20d6048/libcst-1.8.2-cp310-cp310-win_amd64.whl", hash = "sha256:5c513e64eff0f7bf2a908e2d987a98653eb33e1062ce2afd3a84af58159a24f9", size = 2093260, upload-time = "2025-06-13T20:55:04.771Z" }, - { url = "https://files.pythonhosted.org/packages/b0/ac/4ec4ae9da311f72cd97e930c325bb605e9ad0baaafcafadb0588e1dc5c4e/libcst-1.8.2-cp310-cp310-win_arm64.whl", hash = "sha256:41613fe08e647213546c7c59a5a1fc5484666e7d4cab6e80260c612acbb20e8c", size = 1985236, upload-time = "2025-06-13T20:55:06.317Z" }, - { url = "https://files.pythonhosted.org/packages/c5/73/f0a4d807bff6931e3d8c3180472cf43d63a121aa60be895425fba2ed4f3a/libcst-1.8.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:688a03bac4dfb9afc5078ec01d53c21556381282bdf1a804dd0dbafb5056de2a", size = 2195040, upload-time = "2025-06-13T20:55:08.117Z" }, - { url = "https://files.pythonhosted.org/packages/e5/fa/ede0cfc410e498e1279eb489603f31077d2ca112d84e1327b04b508c0cbe/libcst-1.8.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c34060ff2991707c710250463ae9f415ebb21653f2f5b013c61c9c376ff9b715", size = 2080304, upload-time = "2025-06-13T20:55:09.729Z" }, - { url = "https://files.pythonhosted.org/packages/39/8d/59f7c488dbedf96454c07038dea72ee2a38de13d52b4f796a875a1dc45a6/libcst-1.8.2-cp311-cp311-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:f54f5c4176d60e7cd6b0880e18fb3fa8501ae046069151721cab457c7c538a3d", size = 2403816, upload-time = "2025-06-13T20:55:11.527Z" }, - { url = "https://files.pythonhosted.org/packages/b5/c2/af8d6cc0c6dcd1a5d0ed5cf846be242354513139a9358e005c63252c6ab7/libcst-1.8.2-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:d11992561de0ad29ec2800230fbdcbef9efaa02805d5c633a73ab3cf2ba51bf1", size = 2219415, upload-time = "2025-06-13T20:55:13.144Z" }, - { url = "https://files.pythonhosted.org/packages/b6/b8/1638698d6c33bdb4397ee6f60e534e7504ef2cd1447b24104df65623dedb/libcst-1.8.2-cp311-cp311-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:fa3b807c2d2b34397c135d19ad6abb20c47a2ddb7bf65d90455f2040f7797e1e", size = 2189568, upload-time = "2025-06-13T20:55:15.119Z" }, - { url = "https://files.pythonhosted.org/packages/05/16/51c1015dada47b8464c5fa0cbf70fecc5fce0facd07d05a5cb6e7eb68b88/libcst-1.8.2-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:b0110140738be1287e3724080a101e7cec6ae708008b7650c9d8a1c1788ec03a", size = 2312018, upload-time = "2025-06-13T20:55:16.831Z" }, - { url = "https://files.pythonhosted.org/packages/d5/ea/8d24158f345ea2921d0d7ff49a6bf86fd4a08b0f05735f14a84ea9e28fa9/libcst-1.8.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a50618f4819a97ef897e055ac7aaf1cad5df84c206f33be35b0759d671574197", size = 2279875, upload-time = "2025-06-13T20:55:18.418Z" }, - { url = "https://files.pythonhosted.org/packages/73/fd/0441cc1bcf188300aaa41ca5d473919a00939cc7f4934b3b08b23c8740c1/libcst-1.8.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e9bb599c175dc34a4511f0e26d5b5374fbcc91ea338871701a519e95d52f3c28", size = 2388060, upload-time = "2025-06-13T20:55:20.304Z" }, - { url = "https://files.pythonhosted.org/packages/f8/fc/28f6380eefd58543f80589b77cab81eb038e7cc86f7c34a815a287dba82f/libcst-1.8.2-cp311-cp311-win_amd64.whl", hash = "sha256:96e2363e1f6e44bd7256bbbf3a53140743f821b5133046e6185491e0d9183447", size = 2093117, upload-time = "2025-06-13T20:55:21.977Z" }, - { url = "https://files.pythonhosted.org/packages/ef/db/cdbd1531bca276c44bc485e40c3156e770e01020f8c1a737282bf884d69f/libcst-1.8.2-cp311-cp311-win_arm64.whl", hash = "sha256:f5391d71bd7e9e6c73dcb3ee8d8c63b09efc14ce6e4dad31568d4838afc9aae0", size = 1985285, upload-time = "2025-06-13T20:55:24.438Z" }, - { url = "https://files.pythonhosted.org/packages/31/2d/8726bf8ea8252e8fd1e48980753eef5449622c5f6cf731102bc43dcdc2c6/libcst-1.8.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2e8c1dfa854e700fcf6cd79b2796aa37d55697a74646daf5ea47c7c764bac31c", size = 2185942, upload-time = "2025-06-13T20:55:26.105Z" }, - { url = "https://files.pythonhosted.org/packages/99/b3/565d24db8daed66eae7653c1fc1bc97793d49d5d3bcef530450ee8da882c/libcst-1.8.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2b5c57a3c1976c365678eb0730bcb140d40510990cb77df9a91bb5c41d587ba6", size = 2072622, upload-time = "2025-06-13T20:55:27.548Z" }, - { url = "https://files.pythonhosted.org/packages/8c/d6/5a433e8a58eeb5c5d46635cfe958d0605f598d87977d4560484e3662d438/libcst-1.8.2-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:0f23409add2aaebbb6d8e881babab43c2d979f051b8bd8aed5fe779ea180a4e8", size = 2402738, upload-time = "2025-06-13T20:55:29.539Z" }, - { url = "https://files.pythonhosted.org/packages/85/e4/0dd752c1880b570118fa91ac127589e6cf577ddcb2eef1aaf8b81ecc3f79/libcst-1.8.2-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:b88e9104c456590ad0ef0e82851d4fc03e9aa9d621fa8fdd4cd0907152a825ae", size = 2219932, upload-time = "2025-06-13T20:55:31.17Z" }, - { url = "https://files.pythonhosted.org/packages/42/bc/fceae243c6a329477ac6d4edb887bcaa2ae7a3686158d8d9b9abb3089c37/libcst-1.8.2-cp312-cp312-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e5ba3ea570c8fb6fc44f71aa329edc7c668e2909311913123d0d7ab8c65fc357", size = 2191891, upload-time = "2025-06-13T20:55:33.066Z" }, - { url = "https://files.pythonhosted.org/packages/7d/7d/eb341bdc11f1147e7edeccffd0f2f785eff014e72134f5e46067472012b0/libcst-1.8.2-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:460fcf3562f078781e1504983cb11909eb27a1d46eaa99e65c4b0fafdc298298", size = 2311927, upload-time = "2025-06-13T20:55:34.614Z" }, - { url = "https://files.pythonhosted.org/packages/d8/19/78bfc7aa5a542574d2ab0768210d084901dec5fc373103ca119905408cf2/libcst-1.8.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c1381ddbd1066d543e05d580c15beacf671e1469a0b2adb6dba58fec311f4eed", size = 2281098, upload-time = "2025-06-13T20:55:36.089Z" }, - { url = "https://files.pythonhosted.org/packages/83/37/a41788a72dc06ed3566606f7cf50349c9918cee846eeae45d1bac03d54c2/libcst-1.8.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a70e40ce7600e1b32e293bb9157e9de3b69170e2318ccb219102f1abb826c94a", size = 2387649, upload-time = "2025-06-13T20:55:37.797Z" }, - { url = "https://files.pythonhosted.org/packages/bb/df/7a49576c9fd55cdfd8bcfb725273aa4ee7dc41e87609f3451a4901d68057/libcst-1.8.2-cp312-cp312-win_amd64.whl", hash = "sha256:3ece08ba778b6eeea74d9c705e9af2d1b4e915e9bc6de67ad173b962e575fcc0", size = 2094574, upload-time = "2025-06-13T20:55:39.833Z" }, - { url = "https://files.pythonhosted.org/packages/29/60/27381e194d2af08bfd0fed090c905b2732907b69da48d97d86c056d70790/libcst-1.8.2-cp312-cp312-win_arm64.whl", hash = "sha256:5efd1bf6ee5840d1b0b82ec8e0b9c64f182fa5a7c8aad680fbd918c4fa3826e0", size = 1984568, upload-time = "2025-06-13T20:55:41.511Z" }, - { url = "https://files.pythonhosted.org/packages/11/9c/e3d4c7f1eb5c23907f905f84a4da271b60cd15b746ac794d42ea18bb105e/libcst-1.8.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:08e9dca4ab6f8551794ce7ec146f86def6a82da41750cbed2c07551345fa10d3", size = 2185848, upload-time = "2025-06-13T20:55:43.653Z" }, - { url = "https://files.pythonhosted.org/packages/59/e0/635cbb205d42fd296c01ab5cd1ba485b0aee92bffe061de587890c81f1bf/libcst-1.8.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8310521f2ccb79b5c4345750d475b88afa37bad930ab5554735f85ad5e3add30", size = 2072510, upload-time = "2025-06-13T20:55:45.287Z" }, - { url = "https://files.pythonhosted.org/packages/fe/45/8911cfe9413fd690a024a1ff2c8975f060dd721160178679d3f6a21f939e/libcst-1.8.2-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:da2d8b008aff72acd5a4a588491abdda1b446f17508e700f26df9be80d8442ae", size = 2403226, upload-time = "2025-06-13T20:55:46.927Z" }, - { url = "https://files.pythonhosted.org/packages/38/83/819d2b1b1fd870ad34ce4f34ec68704ca69bf48ef2d7665483115f267ec4/libcst-1.8.2-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:be821d874ce8b26cbadd7277fa251a9b37f6d2326f8b5682b6fc8966b50a3a59", size = 2220669, upload-time = "2025-06-13T20:55:48.597Z" }, - { url = "https://files.pythonhosted.org/packages/d4/2f/2c4742bf834f88a9803095915c4f41cafefb7b04bde66ea86f74668b4b7b/libcst-1.8.2-cp313-cp313-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:f74b0bc7378ad5afcf25ac9d0367b4dbba50f6f6468faa41f5dfddcf8bf9c0f8", size = 2191919, upload-time = "2025-06-13T20:55:50.092Z" }, - { url = "https://files.pythonhosted.org/packages/64/f4/107e13815f1ee5aad642d4eb4671c0273ee737f3832e3dbca9603b39f8d9/libcst-1.8.2-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:b68ea4a6018abfea1f68d50f74de7d399172684c264eb09809023e2c8696fc23", size = 2311965, upload-time = "2025-06-13T20:55:51.974Z" }, - { url = "https://files.pythonhosted.org/packages/03/63/2948b6e4be367ad375d273a8ad00df573029cffe5ac8f6c09398c250de5b/libcst-1.8.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2e264307ec49b2c72480422abafe80457f90b4e6e693b7ddf8a23d24b5c24001", size = 2281704, upload-time = "2025-06-13T20:55:54.036Z" }, - { url = "https://files.pythonhosted.org/packages/c8/d3/590cde9c8c386d5f4f05fdef3394c437ea51060478a5141ff4a1f289e747/libcst-1.8.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a5d5519962ce7c72d81888fb0c09e58e308ba4c376e76bcd853b48151063d6a8", size = 2387511, upload-time = "2025-06-13T20:55:55.538Z" }, - { url = "https://files.pythonhosted.org/packages/96/3d/ba5e36c663028043fc607dc33e5c390c7f73136fb15a890fb3710ee9d158/libcst-1.8.2-cp313-cp313-win_amd64.whl", hash = "sha256:b62aa11d6b74ed5545e58ac613d3f63095e5fd0254b3e0d1168fda991b9a6b41", size = 2094526, upload-time = "2025-06-13T20:55:57.486Z" }, - { url = "https://files.pythonhosted.org/packages/a5/34/530ca3b972dddad562f266c81190bea29376f8ba70054ea7b45b114504cd/libcst-1.8.2-cp313-cp313-win_arm64.whl", hash = "sha256:9c2bd4ac288a9cdb7ffc3229a9ce8027a66a3fd3f2ab9e13da60f5fbfe91f3b2", size = 1984627, upload-time = "2025-06-13T20:55:59.017Z" }, - { url = "https://files.pythonhosted.org/packages/19/9f/491f7b8d9d93444cd9bf711156ee1f122c38d25b903599e363d669acc8ab/libcst-1.8.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:08a8c7d9922ca6eed24e2c13a3c552b3c186af8fc78e5d4820b58487d780ec19", size = 2175415, upload-time = "2025-06-13T20:56:01.157Z" }, - { url = "https://files.pythonhosted.org/packages/2e/fe/4d13437f453f92687246aa7c5138e102ee5186fe96609ee4c598bb9f9ecb/libcst-1.8.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:bba7c2b5063e8ada5a5477f9fa0c01710645426b5a8628ec50d558542a0a292e", size = 2063719, upload-time = "2025-06-13T20:56:02.787Z" }, - { url = "https://files.pythonhosted.org/packages/94/59/758ae142c6607f275269021362b731e0f22ff5c9aa7cc67b0ed3a6bc930f/libcst-1.8.2-cp313-cp313t-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:d97c9fe13aacfbefded6861f5200dcb8e837da7391a9bdeb44ccb133705990af", size = 2380624, upload-time = "2025-06-13T20:56:04.909Z" }, - { url = "https://files.pythonhosted.org/packages/ac/c5/31d214a0bcb3523243a9b5643b597ff653d6ec9e1f3326cfcc16bcbf185d/libcst-1.8.2-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:d2194ae959630aae4176a4b75bd320b3274c20bef2a5ca6b8d6fc96d3c608edf", size = 2208801, upload-time = "2025-06-13T20:56:06.983Z" }, - { url = "https://files.pythonhosted.org/packages/70/16/a53f852322b266c63b492836a5c4968f192ee70fb52795a79feb4924e9ed/libcst-1.8.2-cp313-cp313t-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:0be639f5b2e1999a4b4a82a0f4633969f97336f052d0c131627983589af52f56", size = 2179557, upload-time = "2025-06-13T20:56:09.09Z" }, - { url = "https://files.pythonhosted.org/packages/fa/49/12a5664c73107187ba3af14869d3878fca1fd4c37f6fbb9adb943cb7a791/libcst-1.8.2-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:6753e50904e05c27915933da41518ecd7a8ca4dd3602112ba44920c6e353a455", size = 2302499, upload-time = "2025-06-13T20:56:10.751Z" }, - { url = "https://files.pythonhosted.org/packages/e9/46/2d62552a9346a040c045d6619b645d59bb707a586318121f099abd0cd5c4/libcst-1.8.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:706d07106af91c343150be86caeae1ea3851b74aa0730fcbbf8cd089e817f818", size = 2271070, upload-time = "2025-06-13T20:56:12.445Z" }, - { url = "https://files.pythonhosted.org/packages/af/67/b625fd6ae22575255aade0a24f45e1d430b7e7279729c9c51d4faac982d2/libcst-1.8.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dd4310ea8ddc49cc8872e083737cf806299b17f93159a1f354d59aa08993e876", size = 2380767, upload-time = "2025-06-13T20:56:13.995Z" }, - { url = "https://files.pythonhosted.org/packages/e6/84/fb88f2ffdb045ff7323a6c05dd3d243a9eb3cb3517a6269dee43fbfb9990/libcst-1.8.2-cp313-cp313t-win_amd64.whl", hash = "sha256:51bbafdd847529e8a16d1965814ed17831af61452ee31943c414cb23451de926", size = 2083403, upload-time = "2025-06-13T20:56:15.959Z" }, - { url = "https://files.pythonhosted.org/packages/d3/8f/da755d6d517eb8ec9664afae967b00a9b8dd567bbbb350e261359c1b47fc/libcst-1.8.2-cp313-cp313t-win_arm64.whl", hash = "sha256:4f14f5045766646ed9e8826b959c6d07194788babed1e0ba08c94ea4f39517e3", size = 1974355, upload-time = "2025-06-13T20:56:18.064Z" }, - { url = "https://files.pythonhosted.org/packages/2e/55/7c223ffc44fa623cc4c6c45e932d8e0724e31c8daede8a66d6a53ccd49a1/libcst-1.8.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:f69582e24667715e3860d80d663f1caeb2398110077e23cc0a1e0066a851f5ab", size = 2195291, upload-time = "2025-06-13T20:56:20.114Z" }, - { url = "https://files.pythonhosted.org/packages/77/3a/dced5455963238f1ebedd28cf48bfd5e5d84c847132846a2567f5beaf7fc/libcst-1.8.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1ba85f9e6a7f37ef998168aa3fd28d263d7f83016bd306a4508a2394e5e793b4", size = 2080544, upload-time = "2025-06-13T20:56:22.096Z" }, - { url = "https://files.pythonhosted.org/packages/da/ec/2bce80fb362961191e3ac67a38619780f9bd5203732ad95962458a3b71c0/libcst-1.8.2-cp39-cp39-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:43ccaa6c54daa1749cec53710c70d47150965574d4c6d4c4f2e3f87b9bf9f591", size = 2404396, upload-time = "2025-06-13T20:56:24.215Z" }, - { url = "https://files.pythonhosted.org/packages/6a/33/dd10a5ad783f3c1edc55fe97f5cbfe3924f6a7ce3556464538640a348e04/libcst-1.8.2-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:8a81d816c2088d2055112af5ecd82fdfbe8ff277600e94255e2639b07de10234", size = 2219446, upload-time = "2025-06-13T20:56:25.84Z" }, - { url = "https://files.pythonhosted.org/packages/dd/66/e7a208e5208bbd37b5be989e22b7abd117c40866b7880e7c447f4fb8ee46/libcst-1.8.2-cp39-cp39-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:449f9ff8a5025dcd5c8d4ad28f6c291de5de89e4c044b0bda96b45bef8999b75", size = 2189946, upload-time = "2025-06-13T20:56:27.472Z" }, - { url = "https://files.pythonhosted.org/packages/08/6f/5ef938f947e7cdd83bdffb6929697e7f27b0ae4a6f84a7f30e044690ba1c/libcst-1.8.2-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:36d5ab95f39f855521585b0e819dc2d4d1b2a4080bad04c2f3de1e387a5d2233", size = 2312416, upload-time = "2025-06-13T20:56:29.49Z" }, - { url = "https://files.pythonhosted.org/packages/04/5b/2f965ae65ef12bc0800a35c5668df3eda26437f6a8bcc0f5520b02f3c3a5/libcst-1.8.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:207575dec2dae722acf6ab39b4b361151c65f8f895fd37edf9d384f5541562e1", size = 2280429, upload-time = "2025-06-13T20:56:30.995Z" }, - { url = "https://files.pythonhosted.org/packages/35/1d/f67e6cb1146c0b546f095baf0d6ff6fa561bd61c1e1a5357e9557a16d501/libcst-1.8.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:52a1067cf31d9e9e4be514b253bea6276f1531dd7de6ab0917df8ce5b468a820", size = 2388615, upload-time = "2025-06-13T20:56:32.655Z" }, - { url = "https://files.pythonhosted.org/packages/b7/83/b4d659782e88f46c073ea5cbd9a4e99bf7ea17883632371795f91121b220/libcst-1.8.2-cp39-cp39-win_amd64.whl", hash = "sha256:59e8f611c977206eba294c296c2d29a1c1b1b88206cb97cd0d4847c1a3d923e7", size = 2093194, upload-time = "2025-06-13T20:56:34.348Z" }, - { url = "https://files.pythonhosted.org/packages/01/4a/3614b732cb25a3bba93ffde84b9e006007c687a9c84d22e64add56dee5fd/libcst-1.8.2-cp39-cp39-win_arm64.whl", hash = "sha256:ae22376633cfa3db21c4eed2870d1c36b5419289975a41a45f34a085b2d9e6ea", size = 1985259, upload-time = "2025-06-13T20:56:36.337Z" }, -] - [[package]] name = "lifelines" version = "0.30.0" @@ -2453,19 +2410,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/01/4d/23c4e4f09da849e127e9f123241946c23c1e30f45a88366879e064211815/mistune-3.1.3-py3-none-any.whl", hash = "sha256:1a32314113cff28aa6432e99e522677c8587fd83e3d51c29b82a52409c842bd9", size = 53410, upload-time = "2025-03-19T14:27:23.451Z" }, ] -[[package]] -name = "monkeytype" -version = "23.3.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "libcst" }, - { name = "mypy-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/de/66/7006d51ed537648107c28086f8c390030b4b4c5524b77598a3bbb657d3ec/MonkeyType-23.3.0.tar.gz", hash = "sha256:f2595db34d57cdddbde5a990117a50a22f373dbb917a2a0fa91ffbe07dfe0313", size = 34847, upload-time = "2023-03-20T14:08:01.69Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/0c/6d/de1fd4624ba300a98cc22f4db38f24bf89e660b6fc0be2740406347e5bca/MonkeyType-23.3.0-py3-none-any.whl", hash = "sha256:38ce8ad6568190f54c334b9fe835608af29b40a33ad448ecae749ae8790cdbf9", size = 40850, upload-time = "2023-03-20T14:07:58.815Z" }, -] - [[package]] name = "multidict" version = "6.6.3" @@ -2583,51 +2527,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d8/30/9aec301e9772b098c1f5c0ca0279237c9766d94b97802e9888010c64b0ed/multidict-6.6.3-py3-none-any.whl", hash = "sha256:8db10f29c7541fc5da4defd8cd697e1ca429db743fa716325f236079b96f775a", size = 12313, upload-time = "2025-06-30T15:53:45.437Z" }, ] -[[package]] -name = "mypy" -version = "1.16.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "mypy-extensions" }, - { name = "pathspec" }, - { name = "tomli", marker = "python_full_version < '3.11'" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/81/69/92c7fa98112e4d9eb075a239caa4ef4649ad7d441545ccffbd5e34607cbb/mypy-1.16.1.tar.gz", hash = "sha256:6bd00a0a2094841c5e47e7374bb42b83d64c527a502e3334e1173a0c24437bab", size = 3324747, upload-time = "2025-06-16T16:51:35.145Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8e/12/2bf23a80fcef5edb75de9a1e295d778e0f46ea89eb8b115818b663eff42b/mypy-1.16.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b4f0fed1022a63c6fec38f28b7fc77fca47fd490445c69d0a66266c59dd0b88a", size = 10958644, upload-time = "2025-06-16T16:51:11.649Z" }, - { url = "https://files.pythonhosted.org/packages/08/50/bfe47b3b278eacf348291742fd5e6613bbc4b3434b72ce9361896417cfe5/mypy-1.16.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:86042bbf9f5a05ea000d3203cf87aa9d0ccf9a01f73f71c58979eb9249f46d72", size = 10087033, upload-time = "2025-06-16T16:35:30.089Z" }, - { url = "https://files.pythonhosted.org/packages/21/de/40307c12fe25675a0776aaa2cdd2879cf30d99eec91b898de00228dc3ab5/mypy-1.16.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ea7469ee5902c95542bea7ee545f7006508c65c8c54b06dc2c92676ce526f3ea", size = 11875645, upload-time = "2025-06-16T16:35:48.49Z" }, - { url = "https://files.pythonhosted.org/packages/a6/d8/85bdb59e4a98b7a31495bd8f1a4445d8ffc86cde4ab1f8c11d247c11aedc/mypy-1.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:352025753ef6a83cb9e7f2427319bb7875d1fdda8439d1e23de12ab164179574", size = 12616986, upload-time = "2025-06-16T16:48:39.526Z" }, - { url = "https://files.pythonhosted.org/packages/0e/d0/bb25731158fa8f8ee9e068d3e94fcceb4971fedf1424248496292512afe9/mypy-1.16.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ff9fa5b16e4c1364eb89a4d16bcda9987f05d39604e1e6c35378a2987c1aac2d", size = 12878632, upload-time = "2025-06-16T16:36:08.195Z" }, - { url = "https://files.pythonhosted.org/packages/2d/11/822a9beb7a2b825c0cb06132ca0a5183f8327a5e23ef89717c9474ba0bc6/mypy-1.16.1-cp310-cp310-win_amd64.whl", hash = "sha256:1256688e284632382f8f3b9e2123df7d279f603c561f099758e66dd6ed4e8bd6", size = 9484391, upload-time = "2025-06-16T16:37:56.151Z" }, - { url = "https://files.pythonhosted.org/packages/9a/61/ec1245aa1c325cb7a6c0f8570a2eee3bfc40fa90d19b1267f8e50b5c8645/mypy-1.16.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:472e4e4c100062488ec643f6162dd0d5208e33e2f34544e1fc931372e806c0cc", size = 10890557, upload-time = "2025-06-16T16:37:21.421Z" }, - { url = "https://files.pythonhosted.org/packages/6b/bb/6eccc0ba0aa0c7a87df24e73f0ad34170514abd8162eb0c75fd7128171fb/mypy-1.16.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ea16e2a7d2714277e349e24d19a782a663a34ed60864006e8585db08f8ad1782", size = 10012921, upload-time = "2025-06-16T16:51:28.659Z" }, - { url = "https://files.pythonhosted.org/packages/5f/80/b337a12e2006715f99f529e732c5f6a8c143bb58c92bb142d5ab380963a5/mypy-1.16.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:08e850ea22adc4d8a4014651575567b0318ede51e8e9fe7a68f25391af699507", size = 11802887, upload-time = "2025-06-16T16:50:53.627Z" }, - { url = "https://files.pythonhosted.org/packages/d9/59/f7af072d09793d581a745a25737c7c0a945760036b16aeb620f658a017af/mypy-1.16.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:22d76a63a42619bfb90122889b903519149879ddbf2ba4251834727944c8baca", size = 12531658, upload-time = "2025-06-16T16:33:55.002Z" }, - { url = "https://files.pythonhosted.org/packages/82/c4/607672f2d6c0254b94a646cfc45ad589dd71b04aa1f3d642b840f7cce06c/mypy-1.16.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:2c7ce0662b6b9dc8f4ed86eb7a5d505ee3298c04b40ec13b30e572c0e5ae17c4", size = 12732486, upload-time = "2025-06-16T16:37:03.301Z" }, - { url = "https://files.pythonhosted.org/packages/b6/5e/136555ec1d80df877a707cebf9081bd3a9f397dedc1ab9750518d87489ec/mypy-1.16.1-cp311-cp311-win_amd64.whl", hash = "sha256:211287e98e05352a2e1d4e8759c5490925a7c784ddc84207f4714822f8cf99b6", size = 9479482, upload-time = "2025-06-16T16:47:37.48Z" }, - { url = "https://files.pythonhosted.org/packages/b4/d6/39482e5fcc724c15bf6280ff5806548c7185e0c090712a3736ed4d07e8b7/mypy-1.16.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:af4792433f09575d9eeca5c63d7d90ca4aeceda9d8355e136f80f8967639183d", size = 11066493, upload-time = "2025-06-16T16:47:01.683Z" }, - { url = "https://files.pythonhosted.org/packages/e6/e5/26c347890efc6b757f4d5bb83f4a0cf5958b8cf49c938ac99b8b72b420a6/mypy-1.16.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:66df38405fd8466ce3517eda1f6640611a0b8e70895e2a9462d1d4323c5eb4b9", size = 10081687, upload-time = "2025-06-16T16:48:19.367Z" }, - { url = "https://files.pythonhosted.org/packages/44/c7/b5cb264c97b86914487d6a24bd8688c0172e37ec0f43e93b9691cae9468b/mypy-1.16.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:44e7acddb3c48bd2713994d098729494117803616e116032af192871aed80b79", size = 11839723, upload-time = "2025-06-16T16:49:20.912Z" }, - { url = "https://files.pythonhosted.org/packages/15/f8/491997a9b8a554204f834ed4816bda813aefda31cf873bb099deee3c9a99/mypy-1.16.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0ab5eca37b50188163fa7c1b73c685ac66c4e9bdee4a85c9adac0e91d8895e15", size = 12722980, upload-time = "2025-06-16T16:37:40.929Z" }, - { url = "https://files.pythonhosted.org/packages/df/f0/2bd41e174b5fd93bc9de9a28e4fb673113633b8a7f3a607fa4a73595e468/mypy-1.16.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dedb6229b2c9086247e21a83c309754b9058b438704ad2f6807f0d8227f6ebdd", size = 12903328, upload-time = "2025-06-16T16:34:35.099Z" }, - { url = "https://files.pythonhosted.org/packages/61/81/5572108a7bec2c46b8aff7e9b524f371fe6ab5efb534d38d6b37b5490da8/mypy-1.16.1-cp312-cp312-win_amd64.whl", hash = "sha256:1f0435cf920e287ff68af3d10a118a73f212deb2ce087619eb4e648116d1fe9b", size = 9562321, upload-time = "2025-06-16T16:48:58.823Z" }, - { url = "https://files.pythonhosted.org/packages/28/e3/96964af4a75a949e67df4b95318fe2b7427ac8189bbc3ef28f92a1c5bc56/mypy-1.16.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ddc91eb318c8751c69ddb200a5937f1232ee8efb4e64e9f4bc475a33719de438", size = 11063480, upload-time = "2025-06-16T16:47:56.205Z" }, - { url = "https://files.pythonhosted.org/packages/f5/4d/cd1a42b8e5be278fab7010fb289d9307a63e07153f0ae1510a3d7b703193/mypy-1.16.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:87ff2c13d58bdc4bbe7dc0dedfe622c0f04e2cb2a492269f3b418df2de05c536", size = 10090538, upload-time = "2025-06-16T16:46:43.92Z" }, - { url = "https://files.pythonhosted.org/packages/c9/4f/c3c6b4b66374b5f68bab07c8cabd63a049ff69796b844bc759a0ca99bb2a/mypy-1.16.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0a7cfb0fe29fe5a9841b7c8ee6dffb52382c45acdf68f032145b75620acfbd6f", size = 11836839, upload-time = "2025-06-16T16:36:28.039Z" }, - { url = "https://files.pythonhosted.org/packages/b4/7e/81ca3b074021ad9775e5cb97ebe0089c0f13684b066a750b7dc208438403/mypy-1.16.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:051e1677689c9d9578b9c7f4d206d763f9bbd95723cd1416fad50db49d52f359", size = 12715634, upload-time = "2025-06-16T16:50:34.441Z" }, - { url = "https://files.pythonhosted.org/packages/e9/95/bdd40c8be346fa4c70edb4081d727a54d0a05382d84966869738cfa8a497/mypy-1.16.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d5d2309511cc56c021b4b4e462907c2b12f669b2dbeb68300110ec27723971be", size = 12895584, upload-time = "2025-06-16T16:34:54.857Z" }, - { url = "https://files.pythonhosted.org/packages/5a/fd/d486a0827a1c597b3b48b1bdef47228a6e9ee8102ab8c28f944cb83b65dc/mypy-1.16.1-cp313-cp313-win_amd64.whl", hash = "sha256:4f58ac32771341e38a853c5d0ec0dfe27e18e27da9cdb8bbc882d2249c71a3ee", size = 9573886, upload-time = "2025-06-16T16:36:43.589Z" }, - { url = "https://files.pythonhosted.org/packages/49/5e/ed1e6a7344005df11dfd58b0fdd59ce939a0ba9f7ed37754bf20670b74db/mypy-1.16.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7fc688329af6a287567f45cc1cefb9db662defeb14625213a5b7da6e692e2069", size = 10959511, upload-time = "2025-06-16T16:47:21.945Z" }, - { url = "https://files.pythonhosted.org/packages/30/88/a7cbc2541e91fe04f43d9e4577264b260fecedb9bccb64ffb1a34b7e6c22/mypy-1.16.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5e198ab3f55924c03ead626ff424cad1732d0d391478dfbf7bb97b34602395da", size = 10075555, upload-time = "2025-06-16T16:50:14.084Z" }, - { url = "https://files.pythonhosted.org/packages/93/f7/c62b1e31a32fbd1546cca5e0a2e5f181be5761265ad1f2e94f2a306fa906/mypy-1.16.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:09aa4f91ada245f0a45dbc47e548fd94e0dd5a8433e0114917dc3b526912a30c", size = 11874169, upload-time = "2025-06-16T16:49:42.276Z" }, - { url = "https://files.pythonhosted.org/packages/c8/15/db580a28034657fb6cb87af2f8996435a5b19d429ea4dcd6e1c73d418e60/mypy-1.16.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:13c7cd5b1cb2909aa318a90fd1b7e31f17c50b242953e7dd58345b2a814f6383", size = 12610060, upload-time = "2025-06-16T16:34:15.215Z" }, - { url = "https://files.pythonhosted.org/packages/ec/78/c17f48f6843048fa92d1489d3095e99324f2a8c420f831a04ccc454e2e51/mypy-1.16.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:58e07fb958bc5d752a280da0e890c538f1515b79a65757bbdc54252ba82e0b40", size = 12875199, upload-time = "2025-06-16T16:35:14.448Z" }, - { url = "https://files.pythonhosted.org/packages/bc/d6/ed42167d0a42680381653fd251d877382351e1bd2c6dd8a818764be3beb1/mypy-1.16.1-cp39-cp39-win_amd64.whl", hash = "sha256:f895078594d918f93337a505f8add9bd654d1a24962b4c6ed9390e12531eb31b", size = 9487033, upload-time = "2025-06-16T16:49:57.907Z" }, - { url = "https://files.pythonhosted.org/packages/cf/d3/53e684e78e07c1a2bf7105715e5edd09ce951fc3f47cf9ed095ec1b7a037/mypy-1.16.1-py3-none-any.whl", hash = "sha256:5fc2ac4027d0ef28d6ba69a0343737a23c4d1b83672bf38d1fe237bdc0643b37", size = 2265923, upload-time = "2025-06-16T16:48:02.366Z" }, -] - [[package]] name = "mypy-extensions" version = "1.1.0" @@ -3095,6 +2994,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191, upload-time = "2023-12-10T22:30:43.14Z" }, ] +[[package]] +name = "patsy" +version = "1.0.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, + { name = "numpy", version = "2.3.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d1/81/74f6a65b848ffd16c18f920620ce999fe45fe27f01ab3911260ce4ed85e4/patsy-1.0.1.tar.gz", hash = "sha256:e786a9391eec818c054e359b737bbce692f051aee4c661f4141cc88fb459c0c4", size = 396010, upload-time = "2024-11-12T14:10:54.642Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/87/2b/b50d3d08ea0fc419c183a84210571eba005328efa62b6b98bc28e9ead32a/patsy-1.0.1-py2.py3-none-any.whl", hash = "sha256:751fb38f9e97e62312e921a1954b81e1bb2bcda4f5eeabaf94db251ee791509c", size = 232923, upload-time = "2024-11-12T14:10:52.85Z" }, +] + [[package]] name = "pexpect" version = "4.9.0" @@ -3274,14 +3187,14 @@ wheels = [ [[package]] name = "polarstate" -version = "0.1.6" +version = "0.1.8" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "polars" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/79/02/f1c77f1d76846202cd0c885fca02882c261e49068bfdc7936078f515a810/polarstate-0.1.6.tar.gz", hash = "sha256:350cd3978a06bcd049f57136a2eb163e3c9702cc32aca7c3ac6f703c943509ab", size = 4222089, upload-time = "2025-07-03T17:54:06.682Z" } +sdist = { url = "https://files.pythonhosted.org/packages/9b/09/107eb49e8fc33392d1f226c353adbaa2c345e9e77b82039bf0b9edbfadfc/polarstate-0.1.8.tar.gz", hash = "sha256:803e158f5961f234700aa0121e47ca754ff76796d01dc0230e5133f020cbe7bd", size = 3155, upload-time = "2025-07-29T02:24:27.829Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/4a/0c/f35a4e67d5daa8554d74f6b5be8f8b4ee8ce287a364bc4c1539656f7567c/polarstate-0.1.6-py3-none-any.whl", hash = "sha256:2cfc6de578cc714da83d51ca8d798ed85fc713a4d46263e730c67f8490e1c455", size = 3988, upload-time = "2025-07-03T17:54:05.811Z" }, + { url = "https://files.pythonhosted.org/packages/d4/68/c80de613a28e48be2ce15e00118bf8a018b74229349fd1c26347b352dcee/polarstate-0.1.8-py3-none-any.whl", hash = "sha256:710d791e67ea09c46f79030ecd1253e67a9d2f100aceb848c25967d88314db6a", size = 4403, upload-time = "2025-07-29T02:24:27.122Z" }, ] [[package]] @@ -3836,30 +3749,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/19/87/5124b1c1f2412bb95c59ec481eaf936cd32f0fe2a7b16b97b81c4c017a6a/PyYAML-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8", size = 162312, upload-time = "2024-08-06T20:33:49.073Z" }, ] -[[package]] -name = "pyyaml-ft" -version = "8.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/5e/eb/5a0d575de784f9a1f94e2b1288c6886f13f34185e13117ed530f32b6f8a8/pyyaml_ft-8.0.0.tar.gz", hash = "sha256:0c947dce03954c7b5d38869ed4878b2e6ff1d44b08a0d84dc83fdad205ae39ab", size = 141057, upload-time = "2025-06-10T15:32:15.613Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/68/ba/a067369fe61a2e57fb38732562927d5bae088c73cb9bb5438736a9555b29/pyyaml_ft-8.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8c1306282bc958bfda31237f900eb52c9bedf9b93a11f82e1aab004c9a5657a6", size = 187027, upload-time = "2025-06-10T15:31:48.722Z" }, - { url = "https://files.pythonhosted.org/packages/ad/c5/a3d2020ce5ccfc6aede0d45bcb870298652ac0cf199f67714d250e0cdf39/pyyaml_ft-8.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:30c5f1751625786c19de751e3130fc345ebcba6a86f6bddd6e1285342f4bbb69", size = 176146, upload-time = "2025-06-10T15:31:50.584Z" }, - { url = "https://files.pythonhosted.org/packages/e3/bb/23a9739291086ca0d3189eac7cd92b4d00e9fdc77d722ab610c35f9a82ba/pyyaml_ft-8.0.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3fa992481155ddda2e303fcc74c79c05eddcdbc907b888d3d9ce3ff3e2adcfb0", size = 746792, upload-time = "2025-06-10T15:31:52.304Z" }, - { url = "https://files.pythonhosted.org/packages/5f/c2/e8825f4ff725b7e560d62a3609e31d735318068e1079539ebfde397ea03e/pyyaml_ft-8.0.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cec6c92b4207004b62dfad1f0be321c9f04725e0f271c16247d8b39c3bf3ea42", size = 786772, upload-time = "2025-06-10T15:31:54.712Z" }, - { url = "https://files.pythonhosted.org/packages/35/be/58a4dcae8854f2fdca9b28d9495298fd5571a50d8430b1c3033ec95d2d0e/pyyaml_ft-8.0.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06237267dbcab70d4c0e9436d8f719f04a51123f0ca2694c00dd4b68c338e40b", size = 778723, upload-time = "2025-06-10T15:31:56.093Z" }, - { url = "https://files.pythonhosted.org/packages/86/ed/fed0da92b5d5d7340a082e3802d84c6dc9d5fa142954404c41a544c1cb92/pyyaml_ft-8.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8a7f332bc565817644cdb38ffe4739e44c3e18c55793f75dddb87630f03fc254", size = 758478, upload-time = "2025-06-10T15:31:58.314Z" }, - { url = "https://files.pythonhosted.org/packages/f0/69/ac02afe286275980ecb2dcdc0156617389b7e0c0a3fcdedf155c67be2b80/pyyaml_ft-8.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7d10175a746be65f6feb86224df5d6bc5c049ebf52b89a88cf1cd78af5a367a8", size = 799159, upload-time = "2025-06-10T15:31:59.675Z" }, - { url = "https://files.pythonhosted.org/packages/4e/ac/c492a9da2e39abdff4c3094ec54acac9747743f36428281fb186a03fab76/pyyaml_ft-8.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:58e1015098cf8d8aec82f360789c16283b88ca670fe4275ef6c48c5e30b22a96", size = 158779, upload-time = "2025-06-10T15:32:01.029Z" }, - { url = "https://files.pythonhosted.org/packages/5d/9b/41998df3298960d7c67653669f37710fa2d568a5fc933ea24a6df60acaf6/pyyaml_ft-8.0.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:e64fa5f3e2ceb790d50602b2fd4ec37abbd760a8c778e46354df647e7c5a4ebb", size = 191331, upload-time = "2025-06-10T15:32:02.602Z" }, - { url = "https://files.pythonhosted.org/packages/0f/16/2710c252ee04cbd74d9562ebba709e5a284faeb8ada88fcda548c9191b47/pyyaml_ft-8.0.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:8d445bf6ea16bb93c37b42fdacfb2f94c8e92a79ba9e12768c96ecde867046d1", size = 182879, upload-time = "2025-06-10T15:32:04.466Z" }, - { url = "https://files.pythonhosted.org/packages/9a/40/ae8163519d937fa7bfa457b6f78439cc6831a7c2b170e4f612f7eda71815/pyyaml_ft-8.0.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c56bb46b4fda34cbb92a9446a841da3982cdde6ea13de3fbd80db7eeeab8b49", size = 811277, upload-time = "2025-06-10T15:32:06.214Z" }, - { url = "https://files.pythonhosted.org/packages/f9/66/28d82dbff7f87b96f0eeac79b7d972a96b4980c1e445eb6a857ba91eda00/pyyaml_ft-8.0.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dab0abb46eb1780da486f022dce034b952c8ae40753627b27a626d803926483b", size = 831650, upload-time = "2025-06-10T15:32:08.076Z" }, - { url = "https://files.pythonhosted.org/packages/e8/df/161c4566facac7d75a9e182295c223060373d4116dead9cc53a265de60b9/pyyaml_ft-8.0.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bd48d639cab5ca50ad957b6dd632c7dd3ac02a1abe0e8196a3c24a52f5db3f7a", size = 815755, upload-time = "2025-06-10T15:32:09.435Z" }, - { url = "https://files.pythonhosted.org/packages/05/10/f42c48fa5153204f42eaa945e8d1fd7c10d6296841dcb2447bf7da1be5c4/pyyaml_ft-8.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:052561b89d5b2a8e1289f326d060e794c21fa068aa11255fe71d65baf18a632e", size = 810403, upload-time = "2025-06-10T15:32:11.051Z" }, - { url = "https://files.pythonhosted.org/packages/d5/d2/e369064aa51009eb9245399fd8ad2c562bd0bcd392a00be44b2a824ded7c/pyyaml_ft-8.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:3bb4b927929b0cb162fb1605392a321e3333e48ce616cdcfa04a839271373255", size = 835581, upload-time = "2025-06-10T15:32:12.897Z" }, - { url = "https://files.pythonhosted.org/packages/c0/28/26534bed77109632a956977f60d8519049f545abc39215d086e33a61f1f2/pyyaml_ft-8.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:de04cfe9439565e32f178106c51dd6ca61afaa2907d143835d501d84703d3793", size = 171579, upload-time = "2025-06-10T15:32:14.34Z" }, -] - [[package]] name = "pyzmq" version = "26.4.0" @@ -4182,24 +4071,27 @@ dependencies = [ { name = "sphinx-autoapi" }, { name = "sphinx-rtd-theme" }, { name = "ty" }, + { name = "typing" }, ] [package.dev-dependencies] dev = [ + { name = "dcurves" }, { name = "ipykernel" }, { name = "jupyter" }, { name = "lifelines" }, { name = "marimo" }, - { name = "monkeytype" }, - { name = "mypy" }, { name = "myst-nb", marker = "python_full_version < '4'" }, + { name = "plotly" }, + { name = "polarstate" }, { name = "pre-commit" }, { name = "pytest" }, { name = "pytest-cov" }, { name = "pyzmq" }, { name = "ruff" }, - { name = "sphinx-autoapi" }, - { name = "sphinx-rtd-theme" }, + { name = "scikit-learn", version = "1.6.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, + { name = "scikit-learn", version = "1.7.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "ty" }, { name = "uv" }, ] @@ -4212,30 +4104,32 @@ requires-dist = [ { name = "papermill", specifier = ">=2.6.0" }, { name = "plotly", specifier = ">=5.13.1,<6.0.0" }, { name = "polars", specifier = ">=1.28.0" }, - { name = "polarstate", specifier = ">=0.1.6" }, + { name = "polarstate", specifier = "==0.1.8" }, { name = "pyarrow", specifier = ">=20.0.0" }, { name = "quartodoc", specifier = ">=0.9.1" }, { name = "sphinx-autoapi", specifier = ">=2.1.0,<3.0.0" }, { name = "sphinx-rtd-theme", specifier = ">=1.2.0,<2.0.0" }, { name = "ty", specifier = ">=0.0.1a5" }, + { name = "typing", specifier = ">=3.7.4.3" }, ] [package.metadata.requires-dev] dev = [ + { name = "dcurves", specifier = ">=1.1.5" }, { name = "ipykernel", specifier = ">=6.29.5" }, { name = "jupyter", specifier = ">=1.0.0,<2.0.0" }, { name = "lifelines", specifier = ">=0.30.0" }, { name = "marimo", specifier = ">=0.14.7" }, - { name = "monkeytype", specifier = ">=23.3.0" }, - { name = "mypy", specifier = ">=1.2.0,<2.0.0" }, { name = "myst-nb", marker = "python_full_version >= '3.9' and python_full_version < '4'", specifier = ">=0.17.1,<1.0.0" }, + { name = "plotly", specifier = ">=5.24.1" }, + { name = "polarstate", specifier = ">=0.1.6" }, { name = "pre-commit", specifier = ">=4.2.0" }, { name = "pytest", specifier = ">=7.3.0,<8.0.0" }, { name = "pytest-cov", specifier = ">=4.0.0,<5.0.0" }, { name = "pyzmq", specifier = ">=26.3.0,<27.0.0" }, { name = "ruff", specifier = ">=0.11.0" }, - { name = "sphinx-autoapi", specifier = ">=2.1.0,<3.0.0" }, - { name = "sphinx-rtd-theme", specifier = ">=1.2.0,<2.0.0" }, + { name = "scikit-learn", specifier = ">=1.6.1" }, + { name = "ty", specifier = ">=0.0.1a12" }, { name = "uv", specifier = ">=0.6.11" }, ] @@ -4264,6 +4158,98 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d0/33/4d3e79e4a84533d6cd526bfb42c020a23256ae5e4265d858bd1287831f7d/ruff-0.12.0-py3-none-win_arm64.whl", hash = "sha256:8cd24580405ad8c1cc64d61725bca091d6b6da7eb3d36f72cc605467069d7e8b", size = 10724946, upload-time = "2025-06-17T15:19:23.952Z" }, ] +[[package]] +name = "scikit-learn" +version = "1.6.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.10'", +] +dependencies = [ + { name = "joblib", marker = "python_full_version < '3.10'" }, + { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, + { name = "scipy", version = "1.13.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, + { name = "threadpoolctl", marker = "python_full_version < '3.10'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9e/a5/4ae3b3a0755f7b35a280ac90b28817d1f380318973cff14075ab41ef50d9/scikit_learn-1.6.1.tar.gz", hash = "sha256:b4fc2525eca2c69a59260f583c56a7557c6ccdf8deafdba6e060f94c1c59738e", size = 7068312, upload-time = "2025-01-10T08:07:55.348Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2e/3a/f4597eb41049110b21ebcbb0bcb43e4035017545daa5eedcfeb45c08b9c5/scikit_learn-1.6.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d056391530ccd1e501056160e3c9673b4da4805eb67eb2bdf4e983e1f9c9204e", size = 12067702, upload-time = "2025-01-10T08:05:56.515Z" }, + { url = "https://files.pythonhosted.org/packages/37/19/0423e5e1fd1c6ec5be2352ba05a537a473c1677f8188b9306097d684b327/scikit_learn-1.6.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:0c8d036eb937dbb568c6242fa598d551d88fb4399c0344d95c001980ec1c7d36", size = 11112765, upload-time = "2025-01-10T08:06:00.272Z" }, + { url = "https://files.pythonhosted.org/packages/70/95/d5cb2297a835b0f5fc9a77042b0a2d029866379091ab8b3f52cc62277808/scikit_learn-1.6.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8634c4bd21a2a813e0a7e3900464e6d593162a29dd35d25bdf0103b3fce60ed5", size = 12643991, upload-time = "2025-01-10T08:06:04.813Z" }, + { url = "https://files.pythonhosted.org/packages/b7/91/ab3c697188f224d658969f678be86b0968ccc52774c8ab4a86a07be13c25/scikit_learn-1.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:775da975a471c4f6f467725dff0ced5c7ac7bda5e9316b260225b48475279a1b", size = 13497182, upload-time = "2025-01-10T08:06:08.42Z" }, + { url = "https://files.pythonhosted.org/packages/17/04/d5d556b6c88886c092cc989433b2bab62488e0f0dafe616a1d5c9cb0efb1/scikit_learn-1.6.1-cp310-cp310-win_amd64.whl", hash = "sha256:8a600c31592bd7dab31e1c61b9bbd6dea1b3433e67d264d17ce1017dbdce8002", size = 11125517, upload-time = "2025-01-10T08:06:12.783Z" }, + { url = "https://files.pythonhosted.org/packages/6c/2a/e291c29670795406a824567d1dfc91db7b699799a002fdaa452bceea8f6e/scikit_learn-1.6.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:72abc587c75234935e97d09aa4913a82f7b03ee0b74111dcc2881cba3c5a7b33", size = 12102620, upload-time = "2025-01-10T08:06:16.675Z" }, + { url = "https://files.pythonhosted.org/packages/25/92/ee1d7a00bb6b8c55755d4984fd82608603a3cc59959245068ce32e7fb808/scikit_learn-1.6.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:b3b00cdc8f1317b5f33191df1386c0befd16625f49d979fe77a8d44cae82410d", size = 11116234, upload-time = "2025-01-10T08:06:21.83Z" }, + { url = "https://files.pythonhosted.org/packages/30/cd/ed4399485ef364bb25f388ab438e3724e60dc218c547a407b6e90ccccaef/scikit_learn-1.6.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc4765af3386811c3ca21638f63b9cf5ecf66261cc4815c1db3f1e7dc7b79db2", size = 12592155, upload-time = "2025-01-10T08:06:27.309Z" }, + { url = "https://files.pythonhosted.org/packages/a8/f3/62fc9a5a659bb58a03cdd7e258956a5824bdc9b4bb3c5d932f55880be569/scikit_learn-1.6.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:25fc636bdaf1cc2f4a124a116312d837148b5e10872147bdaf4887926b8c03d8", size = 13497069, upload-time = "2025-01-10T08:06:32.515Z" }, + { url = "https://files.pythonhosted.org/packages/a1/a6/c5b78606743a1f28eae8f11973de6613a5ee87366796583fb74c67d54939/scikit_learn-1.6.1-cp311-cp311-win_amd64.whl", hash = "sha256:fa909b1a36e000a03c382aade0bd2063fd5680ff8b8e501660c0f59f021a6415", size = 11139809, upload-time = "2025-01-10T08:06:35.514Z" }, + { url = "https://files.pythonhosted.org/packages/0a/18/c797c9b8c10380d05616db3bfb48e2a3358c767affd0857d56c2eb501caa/scikit_learn-1.6.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:926f207c804104677af4857b2c609940b743d04c4c35ce0ddc8ff4f053cddc1b", size = 12104516, upload-time = "2025-01-10T08:06:40.009Z" }, + { url = "https://files.pythonhosted.org/packages/c4/b7/2e35f8e289ab70108f8cbb2e7a2208f0575dc704749721286519dcf35f6f/scikit_learn-1.6.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:2c2cae262064e6a9b77eee1c8e768fc46aa0b8338c6a8297b9b6759720ec0ff2", size = 11167837, upload-time = "2025-01-10T08:06:43.305Z" }, + { url = "https://files.pythonhosted.org/packages/a4/f6/ff7beaeb644bcad72bcfd5a03ff36d32ee4e53a8b29a639f11bcb65d06cd/scikit_learn-1.6.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1061b7c028a8663fb9a1a1baf9317b64a257fcb036dae5c8752b2abef31d136f", size = 12253728, upload-time = "2025-01-10T08:06:47.618Z" }, + { url = "https://files.pythonhosted.org/packages/29/7a/8bce8968883e9465de20be15542f4c7e221952441727c4dad24d534c6d99/scikit_learn-1.6.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e69fab4ebfc9c9b580a7a80111b43d214ab06250f8a7ef590a4edf72464dd86", size = 13147700, upload-time = "2025-01-10T08:06:50.888Z" }, + { url = "https://files.pythonhosted.org/packages/62/27/585859e72e117fe861c2079bcba35591a84f801e21bc1ab85bce6ce60305/scikit_learn-1.6.1-cp312-cp312-win_amd64.whl", hash = "sha256:70b1d7e85b1c96383f872a519b3375f92f14731e279a7b4c6cfd650cf5dffc52", size = 11110613, upload-time = "2025-01-10T08:06:54.115Z" }, + { url = "https://files.pythonhosted.org/packages/2e/59/8eb1872ca87009bdcdb7f3cdc679ad557b992c12f4b61f9250659e592c63/scikit_learn-1.6.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2ffa1e9e25b3d93990e74a4be2c2fc61ee5af85811562f1288d5d055880c4322", size = 12010001, upload-time = "2025-01-10T08:06:58.613Z" }, + { url = "https://files.pythonhosted.org/packages/9d/05/f2fc4effc5b32e525408524c982c468c29d22f828834f0625c5ef3d601be/scikit_learn-1.6.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:dc5cf3d68c5a20ad6d571584c0750ec641cc46aeef1c1507be51300e6003a7e1", size = 11096360, upload-time = "2025-01-10T08:07:01.556Z" }, + { url = "https://files.pythonhosted.org/packages/c8/e4/4195d52cf4f113573fb8ebc44ed5a81bd511a92c0228889125fac2f4c3d1/scikit_learn-1.6.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c06beb2e839ecc641366000ca84f3cf6fa9faa1777e29cf0c04be6e4d096a348", size = 12209004, upload-time = "2025-01-10T08:07:06.931Z" }, + { url = "https://files.pythonhosted.org/packages/94/be/47e16cdd1e7fcf97d95b3cb08bde1abb13e627861af427a3651fcb80b517/scikit_learn-1.6.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8ca8cb270fee8f1f76fa9bfd5c3507d60c6438bbee5687f81042e2bb98e5a97", size = 13171776, upload-time = "2025-01-10T08:07:11.715Z" }, + { url = "https://files.pythonhosted.org/packages/34/b0/ca92b90859070a1487827dbc672f998da95ce83edce1270fc23f96f1f61a/scikit_learn-1.6.1-cp313-cp313-win_amd64.whl", hash = "sha256:7a1c43c8ec9fde528d664d947dc4c0789be4077a3647f232869f41d9bf50e0fb", size = 11071865, upload-time = "2025-01-10T08:07:16.088Z" }, + { url = "https://files.pythonhosted.org/packages/12/ae/993b0fb24a356e71e9a894e42b8a9eec528d4c70217353a1cd7a48bc25d4/scikit_learn-1.6.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a17c1dea1d56dcda2fac315712f3651a1fea86565b64b48fa1bc090249cbf236", size = 11955804, upload-time = "2025-01-10T08:07:20.385Z" }, + { url = "https://files.pythonhosted.org/packages/d6/54/32fa2ee591af44507eac86406fa6bba968d1eb22831494470d0a2e4a1eb1/scikit_learn-1.6.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:6a7aa5f9908f0f28f4edaa6963c0a6183f1911e63a69aa03782f0d924c830a35", size = 11100530, upload-time = "2025-01-10T08:07:23.675Z" }, + { url = "https://files.pythonhosted.org/packages/3f/58/55856da1adec655bdce77b502e94a267bf40a8c0b89f8622837f89503b5a/scikit_learn-1.6.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0650e730afb87402baa88afbf31c07b84c98272622aaba002559b614600ca691", size = 12433852, upload-time = "2025-01-10T08:07:26.817Z" }, + { url = "https://files.pythonhosted.org/packages/ff/4f/c83853af13901a574f8f13b645467285a48940f185b690936bb700a50863/scikit_learn-1.6.1-cp313-cp313t-win_amd64.whl", hash = "sha256:3f59fe08dc03ea158605170eb52b22a105f238a5d512c4470ddeca71feae8e5f", size = 11337256, upload-time = "2025-01-10T08:07:31.084Z" }, + { url = "https://files.pythonhosted.org/packages/d2/37/b305b759cc65829fe1b8853ff3e308b12cdd9d8884aa27840835560f2b42/scikit_learn-1.6.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6849dd3234e87f55dce1db34c89a810b489ead832aaf4d4550b7ea85628be6c1", size = 12101868, upload-time = "2025-01-10T08:07:34.189Z" }, + { url = "https://files.pythonhosted.org/packages/83/74/f64379a4ed5879d9db744fe37cfe1978c07c66684d2439c3060d19a536d8/scikit_learn-1.6.1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:e7be3fa5d2eb9be7d77c3734ff1d599151bb523674be9b834e8da6abe132f44e", size = 11144062, upload-time = "2025-01-10T08:07:37.67Z" }, + { url = "https://files.pythonhosted.org/packages/fd/dc/d5457e03dc9c971ce2b0d750e33148dd060fefb8b7dc71acd6054e4bb51b/scikit_learn-1.6.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:44a17798172df1d3c1065e8fcf9019183f06c87609b49a124ebdf57ae6cb0107", size = 12693173, upload-time = "2025-01-10T08:07:42.713Z" }, + { url = "https://files.pythonhosted.org/packages/79/35/b1d2188967c3204c78fa79c9263668cf1b98060e8e58d1a730fe5b2317bb/scikit_learn-1.6.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8b7a3b86e411e4bce21186e1c180d792f3d99223dcfa3b4f597ecc92fa1a422", size = 13518605, upload-time = "2025-01-10T08:07:46.551Z" }, + { url = "https://files.pythonhosted.org/packages/fb/d8/8d603bdd26601f4b07e2363032b8565ab82eb857f93d86d0f7956fcf4523/scikit_learn-1.6.1-cp39-cp39-win_amd64.whl", hash = "sha256:7a73d457070e3318e32bdb3aa79a8d990474f19035464dfd8bede2883ab5dc3b", size = 11155078, upload-time = "2025-01-10T08:07:51.376Z" }, +] + +[[package]] +name = "scikit-learn" +version = "1.7.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.13'", + "python_full_version == '3.12.*'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", +] +dependencies = [ + { name = "joblib", marker = "python_full_version >= '3.10'" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, + { name = "numpy", version = "2.3.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, + { name = "scipy", version = "1.16.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "threadpoolctl", marker = "python_full_version >= '3.10'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/df/3b/29fa87e76b1d7b3b77cc1fcbe82e6e6b8cd704410705b008822de530277c/scikit_learn-1.7.0.tar.gz", hash = "sha256:c01e869b15aec88e2cdb73d27f15bdbe03bce8e2fb43afbe77c45d399e73a5a3", size = 7178217, upload-time = "2025-06-05T22:02:46.703Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a4/70/e725b1da11e7e833f558eb4d3ea8b7ed7100edda26101df074f1ae778235/scikit_learn-1.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9fe7f51435f49d97bd41d724bb3e11eeb939882af9c29c931a8002c357e8cdd5", size = 11728006, upload-time = "2025-06-05T22:01:43.007Z" }, + { url = "https://files.pythonhosted.org/packages/32/aa/43874d372e9dc51eb361f5c2f0a4462915c9454563b3abb0d9457c66b7e9/scikit_learn-1.7.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:d0c93294e1e1acbee2d029b1f2a064f26bd928b284938d51d412c22e0c977eb3", size = 10726255, upload-time = "2025-06-05T22:01:46.082Z" }, + { url = "https://files.pythonhosted.org/packages/f5/1a/da73cc18e00f0b9ae89f7e4463a02fb6e0569778120aeab138d9554ecef0/scikit_learn-1.7.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf3755f25f145186ad8c403312f74fb90df82a4dfa1af19dc96ef35f57237a94", size = 12205657, upload-time = "2025-06-05T22:01:48.729Z" }, + { url = "https://files.pythonhosted.org/packages/fb/f6/800cb3243dd0137ca6d98df8c9d539eb567ba0a0a39ecd245c33fab93510/scikit_learn-1.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2726c8787933add436fb66fb63ad18e8ef342dfb39bbbd19dc1e83e8f828a85a", size = 12877290, upload-time = "2025-06-05T22:01:51.073Z" }, + { url = "https://files.pythonhosted.org/packages/4c/bd/99c3ccb49946bd06318fe194a1c54fb7d57ac4fe1c2f4660d86b3a2adf64/scikit_learn-1.7.0-cp310-cp310-win_amd64.whl", hash = "sha256:e2539bb58886a531b6e86a510c0348afaadd25005604ad35966a85c2ec378800", size = 10713211, upload-time = "2025-06-05T22:01:54.107Z" }, + { url = "https://files.pythonhosted.org/packages/5a/42/c6b41711c2bee01c4800ad8da2862c0b6d2956a399d23ce4d77f2ca7f0c7/scikit_learn-1.7.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8ef09b1615e1ad04dc0d0054ad50634514818a8eb3ee3dee99af3bffc0ef5007", size = 11719657, upload-time = "2025-06-05T22:01:56.345Z" }, + { url = "https://files.pythonhosted.org/packages/a3/24/44acca76449e391b6b2522e67a63c0454b7c1f060531bdc6d0118fb40851/scikit_learn-1.7.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:7d7240c7b19edf6ed93403f43b0fcb0fe95b53bc0b17821f8fb88edab97085ef", size = 10712636, upload-time = "2025-06-05T22:01:59.093Z" }, + { url = "https://files.pythonhosted.org/packages/9f/1b/fcad1ccb29bdc9b96bcaa2ed8345d56afb77b16c0c47bafe392cc5d1d213/scikit_learn-1.7.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:80bd3bd4e95381efc47073a720d4cbab485fc483966f1709f1fd559afac57ab8", size = 12242817, upload-time = "2025-06-05T22:02:01.43Z" }, + { url = "https://files.pythonhosted.org/packages/c6/38/48b75c3d8d268a3f19837cb8a89155ead6e97c6892bb64837183ea41db2b/scikit_learn-1.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9dbe48d69aa38ecfc5a6cda6c5df5abef0c0ebdb2468e92437e2053f84abb8bc", size = 12873961, upload-time = "2025-06-05T22:02:03.951Z" }, + { url = "https://files.pythonhosted.org/packages/f4/5a/ba91b8c57aa37dbd80d5ff958576a9a8c14317b04b671ae7f0d09b00993a/scikit_learn-1.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:8fa979313b2ffdfa049ed07252dc94038def3ecd49ea2a814db5401c07f1ecfa", size = 10717277, upload-time = "2025-06-05T22:02:06.77Z" }, + { url = "https://files.pythonhosted.org/packages/70/3a/bffab14e974a665a3ee2d79766e7389572ffcaad941a246931c824afcdb2/scikit_learn-1.7.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c2c7243d34aaede0efca7a5a96d67fddaebb4ad7e14a70991b9abee9dc5c0379", size = 11646758, upload-time = "2025-06-05T22:02:09.51Z" }, + { url = "https://files.pythonhosted.org/packages/58/d8/f3249232fa79a70cb40595282813e61453c1e76da3e1a44b77a63dd8d0cb/scikit_learn-1.7.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:9f39f6a811bf3f15177b66c82cbe0d7b1ebad9f190737dcdef77cfca1ea3c19c", size = 10673971, upload-time = "2025-06-05T22:02:12.217Z" }, + { url = "https://files.pythonhosted.org/packages/67/93/eb14c50533bea2f77758abe7d60a10057e5f2e2cdcf0a75a14c6bc19c734/scikit_learn-1.7.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63017a5f9a74963d24aac7590287149a8d0f1a0799bbe7173c0d8ba1523293c0", size = 11818428, upload-time = "2025-06-05T22:02:14.947Z" }, + { url = "https://files.pythonhosted.org/packages/08/17/804cc13b22a8663564bb0b55fb89e661a577e4e88a61a39740d58b909efe/scikit_learn-1.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b2f8a0b1e73e9a08b7cc498bb2aeab36cdc1f571f8ab2b35c6e5d1c7115d97d", size = 12505887, upload-time = "2025-06-05T22:02:17.824Z" }, + { url = "https://files.pythonhosted.org/packages/68/c7/4e956281a077f4835458c3f9656c666300282d5199039f26d9de1dabd9be/scikit_learn-1.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:34cc8d9d010d29fb2b7cbcd5ccc24ffdd80515f65fe9f1e4894ace36b267ce19", size = 10668129, upload-time = "2025-06-05T22:02:20.536Z" }, + { url = "https://files.pythonhosted.org/packages/9a/c3/a85dcccdaf1e807e6f067fa95788a6485b0491d9ea44fd4c812050d04f45/scikit_learn-1.7.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5b7974f1f32bc586c90145df51130e02267e4b7e77cab76165c76cf43faca0d9", size = 11559841, upload-time = "2025-06-05T22:02:23.308Z" }, + { url = "https://files.pythonhosted.org/packages/d8/57/eea0de1562cc52d3196eae51a68c5736a31949a465f0b6bb3579b2d80282/scikit_learn-1.7.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:014e07a23fe02e65f9392898143c542a50b6001dbe89cb867e19688e468d049b", size = 10616463, upload-time = "2025-06-05T22:02:26.068Z" }, + { url = "https://files.pythonhosted.org/packages/10/a4/39717ca669296dfc3a62928393168da88ac9d8cbec88b6321ffa62c6776f/scikit_learn-1.7.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e7e7ced20582d3a5516fb6f405fd1d254e1f5ce712bfef2589f51326af6346e8", size = 11766512, upload-time = "2025-06-05T22:02:28.689Z" }, + { url = "https://files.pythonhosted.org/packages/d5/cd/a19722241d5f7b51e08351e1e82453e0057aeb7621b17805f31fcb57bb6c/scikit_learn-1.7.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1babf2511e6ffd695da7a983b4e4d6de45dce39577b26b721610711081850906", size = 12461075, upload-time = "2025-06-05T22:02:31.233Z" }, + { url = "https://files.pythonhosted.org/packages/f3/bc/282514272815c827a9acacbe5b99f4f1a4bc5961053719d319480aee0812/scikit_learn-1.7.0-cp313-cp313-win_amd64.whl", hash = "sha256:5abd2acff939d5bd4701283f009b01496832d50ddafa83c90125a4e41c33e314", size = 10652517, upload-time = "2025-06-05T22:02:34.139Z" }, + { url = "https://files.pythonhosted.org/packages/ea/78/7357d12b2e4c6674175f9a09a3ba10498cde8340e622715bcc71e532981d/scikit_learn-1.7.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:e39d95a929b112047c25b775035c8c234c5ca67e681ce60d12413afb501129f7", size = 12111822, upload-time = "2025-06-05T22:02:36.904Z" }, + { url = "https://files.pythonhosted.org/packages/d0/0c/9c3715393343f04232f9d81fe540eb3831d0b4ec351135a145855295110f/scikit_learn-1.7.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:0521cb460426c56fee7e07f9365b0f45ec8ca7b2d696534ac98bfb85e7ae4775", size = 11325286, upload-time = "2025-06-05T22:02:39.739Z" }, + { url = "https://files.pythonhosted.org/packages/64/e0/42282ad3dd70b7c1a5f65c412ac3841f6543502a8d6263cae7b466612dc9/scikit_learn-1.7.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:317ca9f83acbde2883bd6bb27116a741bfcb371369706b4f9973cf30e9a03b0d", size = 12380865, upload-time = "2025-06-05T22:02:42.137Z" }, + { url = "https://files.pythonhosted.org/packages/4e/d0/3ef4ab2c6be4aa910445cd09c5ef0b44512e3de2cfb2112a88bb647d2cf7/scikit_learn-1.7.0-cp313-cp313t-win_amd64.whl", hash = "sha256:126c09740a6f016e815ab985b21e3a0656835414521c81fc1a8da78b679bdb75", size = 11549609, upload-time = "2025-06-05T22:02:44.483Z" }, +] + [[package]] name = "scipy" version = "1.13.1" @@ -4685,6 +4671,54 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/82/95/38ef0cd7fa11eaba6a99b3c4f5ac948d8bc6ff199aabd327a29cc000840c/starlette-0.47.1-py3-none-any.whl", hash = "sha256:5e11c9f5c7c3f24959edbf2dffdc01bba860228acf657129467d8a7468591527", size = 72747, upload-time = "2025-06-21T04:03:15.705Z" }, ] +[[package]] +name = "statsmodels" +version = "0.14.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, + { name = "numpy", version = "2.3.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "packaging" }, + { name = "pandas" }, + { name = "patsy" }, + { name = "scipy", version = "1.13.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, + { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, + { name = "scipy", version = "1.16.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/64/cc/8c1bf59bf8203dea1bf2ea811cfe667d7bcc6909c83d8afb02b08e30f50b/statsmodels-0.14.5.tar.gz", hash = "sha256:de260e58cccfd2ceddf835b55a357233d6ca853a1aa4f90f7553a52cc71c6ddf", size = 20525016, upload-time = "2025-07-07T12:14:23.195Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3a/2c/55b2a5d10c1a211ecab3f792021d2581bbe1c5ca0a1059f6715dddc6899d/statsmodels-0.14.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9fc2b5cdc0c95cba894849651fec1fa1511d365e3eb72b0cc75caac44077cd48", size = 10058241, upload-time = "2025-07-07T12:13:16.286Z" }, + { url = "https://files.pythonhosted.org/packages/66/d9/6967475805de06691e951072d05e40e3f1c71b6221bb92401193ee19bd2a/statsmodels-0.14.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b8d96b0bbaeabd3a557c35cc7249baa9cfbc6dd305c32a9f2cbdd7f46c037e7f", size = 9734017, upload-time = "2025-07-07T12:05:08.498Z" }, + { url = "https://files.pythonhosted.org/packages/df/a8/803c280419a7312e2472969fe72cf461c1210a27770a662cbe3b5cd7c6fe/statsmodels-0.14.5-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:145bc39b2cb201efb6c83cc3f2163c269e63b0d4809801853dec6f440bd3bc37", size = 10459677, upload-time = "2025-07-07T14:21:51.809Z" }, + { url = "https://files.pythonhosted.org/packages/a1/25/edf20acbd670934b02cd9344e29c9a03ce040122324b3491bb075ae76b2d/statsmodels-0.14.5-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d7c14fb2617bb819fb2532e1424e1da2b98a3419a80e95f33365a72d437d474e", size = 10678631, upload-time = "2025-07-07T14:22:05.496Z" }, + { url = "https://files.pythonhosted.org/packages/64/22/8b1e38310272e766abd6093607000a81827420a3348f09eff08a9e54cbaf/statsmodels-0.14.5-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:1e9742d8a5ac38a3bfc4b7f4b0681903920f20cbbf466d72b1fd642033846108", size = 10699273, upload-time = "2025-07-07T14:22:19.487Z" }, + { url = "https://files.pythonhosted.org/packages/d1/6f/6de51f1077b7cef34611f1d6721392ea170153251b4d977efcf6d100f779/statsmodels-0.14.5-cp310-cp310-win_amd64.whl", hash = "sha256:1cab9e6fce97caf4239cdb2df375806937da5d0b7ba2699b13af33a07f438464", size = 9644785, upload-time = "2025-07-07T12:05:20.927Z" }, + { url = "https://files.pythonhosted.org/packages/14/30/fd49902b30416b828de763e161c0d6e2cc04d119ae4fbdd3f3b43dc8f1be/statsmodels-0.14.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4b7091a8442076c708c926de3603653a160955e80a2b6d931475b7bb8ddc02e5", size = 10053330, upload-time = "2025-07-07T12:07:39.689Z" }, + { url = "https://files.pythonhosted.org/packages/ca/c1/2654541ff6f5790d01d1e5ba36405fde873f4a854f473e90b4fe56b37333/statsmodels-0.14.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:128872be8f3208f4446d91ea9e4261823902fc7997fee7e1a983eb62fd3b7c6e", size = 9735555, upload-time = "2025-07-07T12:13:28.935Z" }, + { url = "https://files.pythonhosted.org/packages/ce/da/6ebb64d0db4e86c0d2d9cde89e03247702da0ab191789f7813d4f9a348da/statsmodels-0.14.5-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f2ad5aee04ae7196c429df2174df232c057e478c5fa63193d01c8ec9aae04d31", size = 10307522, upload-time = "2025-07-07T14:22:32.853Z" }, + { url = "https://files.pythonhosted.org/packages/67/49/ac803ca093ec3845184a752a91cd84511245e1f97103b15cfe32794a3bb0/statsmodels-0.14.5-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f402fc793458dd6d96e099acb44cd1de1428565bf7ef3030878a8daff091f08a", size = 10474665, upload-time = "2025-07-07T14:22:46.011Z" }, + { url = "https://files.pythonhosted.org/packages/f0/c8/ae82feb00582f4814fac5d2cb3ec32f93866b413cf5878b2fe93688ec63c/statsmodels-0.14.5-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:26c028832730aebfbfd4e7501694e1f9ad31ec8536e776716673f4e7afd4059a", size = 10713120, upload-time = "2025-07-07T14:23:00.067Z" }, + { url = "https://files.pythonhosted.org/packages/05/ac/4276459ea71aa46e2967ea283fc88ee5631c11f29a06787e16cf4aece1b8/statsmodels-0.14.5-cp311-cp311-win_amd64.whl", hash = "sha256:ec56f771d9529cdc17ed2fb2a950d100b6e83a7c5372aae8ac5bb065c474b856", size = 9640980, upload-time = "2025-07-07T12:05:33.085Z" }, + { url = "https://files.pythonhosted.org/packages/5f/a5/fcc4f5f16355660ce7a1742e28a43e3a9391b492fc4ff29fdd6893e81c05/statsmodels-0.14.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:37e7364a39f9aa3b51d15a208c2868b90aadb8412f868530f5cba9197cb00eaa", size = 10042891, upload-time = "2025-07-07T12:13:41.671Z" }, + { url = "https://files.pythonhosted.org/packages/1c/6f/db0cf5efa48277ac6218d9b981c8fd5e63c4c43e0d9d65015fdc38eed0ef/statsmodels-0.14.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4263d7f4d0f1d5ac6eb4db22e1ee34264a14d634b9332c975c9d9109b6b46e12", size = 9698912, upload-time = "2025-07-07T12:07:54.674Z" }, + { url = "https://files.pythonhosted.org/packages/4a/93/4ddc3bc4a59c51e6a57c49df1b889882c40d9e141e855b3517f6a8de3232/statsmodels-0.14.5-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:86224f6e36f38486e471e75759d241fe2912d8bc25ab157d54ee074c6aedbf45", size = 10237801, upload-time = "2025-07-07T14:23:12.593Z" }, + { url = "https://files.pythonhosted.org/packages/66/de/dc6bf2f6e8c8eb4c5815560ebdbdf2d69a767bc0f65fde34bc086cf5b36d/statsmodels-0.14.5-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c3dd760a6fa80cd5e0371685c697bb9c2c0e6e1f394d975e596a1e6d0bbb9372", size = 10424154, upload-time = "2025-07-07T14:23:25.365Z" }, + { url = "https://files.pythonhosted.org/packages/16/4f/2d5a8d14bebdf2b03b3ea89b8c6a2c837bb406ba5b7a41add8bd303bce29/statsmodels-0.14.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6264fb00e02f858b86bd01ef2dc05055a71d4a0cc7551b9976b07b0f0e6cf24f", size = 10652915, upload-time = "2025-07-07T14:23:39.337Z" }, + { url = "https://files.pythonhosted.org/packages/df/4c/2feda3a9f0e17444a84ba5398ada6a4d2e1b8f832760048f04e2b8ea0c41/statsmodels-0.14.5-cp312-cp312-win_amd64.whl", hash = "sha256:b2ed065bfbaf8bb214c7201656df840457c2c8c65e1689e3eb09dc7440f9c61c", size = 9611236, upload-time = "2025-07-07T12:08:06.794Z" }, + { url = "https://files.pythonhosted.org/packages/84/fd/4c374108cf108b3130240a5b45847a61f70ddf973429044a81a05189b046/statsmodels-0.14.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:906263134dd1a640e55ecb01fda4a9be7b9e08558dba9e4c4943a486fdb0c9c8", size = 10013958, upload-time = "2025-07-07T14:35:01.04Z" }, + { url = "https://files.pythonhosted.org/packages/5a/36/bf3d7f0e36acd3ba9ec0babd79ace25506b6872780cbd710fb7cd31f0fa2/statsmodels-0.14.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9118f76344f77cffbb3a9cbcff8682b325be5eed54a4b3253e09da77a74263d3", size = 9674243, upload-time = "2025-07-07T12:08:22.571Z" }, + { url = "https://files.pythonhosted.org/packages/90/ce/a55a6f37b5277683ceccd965a5828b24672bbc427db6b3969ae0b0fc29fb/statsmodels-0.14.5-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9dc4ee159070557c9a6c000625d85f653de437772fe7086857cff68f501afe45", size = 10219521, upload-time = "2025-07-07T14:23:52.646Z" }, + { url = "https://files.pythonhosted.org/packages/1e/48/973da1ee8bc0743519759e74c3615b39acdc3faf00e0a0710f8c856d8c9d/statsmodels-0.14.5-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5a085d47c8ef5387279a991633883d0e700de2b0acc812d7032d165888627bef", size = 10453538, upload-time = "2025-07-07T14:24:06.959Z" }, + { url = "https://files.pythonhosted.org/packages/c7/d6/18903fb707afd31cf1edaec5201964dbdacb2bfae9a22558274647a7c88f/statsmodels-0.14.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9f866b2ebb2904b47c342d00def83c526ef2eb1df6a9a3c94ba5fe63d0005aec", size = 10681584, upload-time = "2025-07-07T14:24:21.038Z" }, + { url = "https://files.pythonhosted.org/packages/44/d6/80df1bbbfcdc50bff4152f43274420fa9856d56e234d160d6206eb1f5827/statsmodels-0.14.5-cp313-cp313-win_amd64.whl", hash = "sha256:2a06bca03b7a492f88c8106103ab75f1a5ced25de90103a89f3a287518017939", size = 9604641, upload-time = "2025-07-07T12:08:36.23Z" }, + { url = "https://files.pythonhosted.org/packages/39/2d/3ab5a8e736b94a91434a70dcbdc4363775711ef17c733e6bde5f24cb2f62/statsmodels-0.14.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b23b8f646dd78ef5e8d775d879208f8dc0a73418b41c16acac37361ff9ab7738", size = 10077385, upload-time = "2025-07-07T12:13:55.07Z" }, + { url = "https://files.pythonhosted.org/packages/44/ec/091dc1e69bbc84139e3409e45ac26e285ef41eb67116d13e094cdde7804d/statsmodels-0.14.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4e5e26b21d2920905764fb0860957d08b5ba2fae4466ef41b1f7c53ecf9fc7fa", size = 9752723, upload-time = "2025-07-07T12:08:52.238Z" }, + { url = "https://files.pythonhosted.org/packages/72/0a/0ab3a900fc3245ebdaaca59018567b1e23bcab13c9eea2d7b3d8ffcbb82e/statsmodels-0.14.5-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4a060c7e0841c549c8ce2825fd6687e6757e305d9c11c9a73f6c5a0ce849bb69", size = 10470566, upload-time = "2025-07-07T14:33:03.356Z" }, + { url = "https://files.pythonhosted.org/packages/2b/58/08e21dda0d52e4119b0e1eab8e865ce3e9c6bf59f0f879a9448deb827e83/statsmodels-0.14.5-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:56da20def5350d676388213a330fd40ed15d0e8dd0bb1b92c0e4b0f2a65d3ad2", size = 10678264, upload-time = "2025-07-07T14:33:17.141Z" }, + { url = "https://files.pythonhosted.org/packages/fe/7d/3608f14237daccc0f3116b006ee3a42ca0e4dbe296496950624934138171/statsmodels-0.14.5-cp39-cp39-win_amd64.whl", hash = "sha256:afb37ca1d70d99b5fd876e8574ea46372298ae0f0a8b17e4cf0a9afd2373ae62", size = 9658081, upload-time = "2025-07-07T12:09:04.856Z" }, +] + [[package]] name = "tabulate" version = "0.9.0" @@ -4717,6 +4751,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6a/9e/2064975477fdc887e47ad42157e214526dcad8f317a948dee17e1659a62f/terminado-0.18.1-py3-none-any.whl", hash = "sha256:a4468e1b37bb318f8a86514f65814e1afc977cf29b3992a4500d9dd305dcceb0", size = 14154, upload-time = "2024-03-12T14:34:36.569Z" }, ] +[[package]] +name = "threadpoolctl" +version = "3.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b7/4d/08c89e34946fce2aec4fbb45c9016efd5f4d7f24af8e5d93296e935631d8/threadpoolctl-3.6.0.tar.gz", hash = "sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e", size = 21274, upload-time = "2025-03-13T13:49:23.031Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb", size = 18638, upload-time = "2025-03-13T13:49:21.846Z" }, +] + [[package]] name = "tinycss2" version = "1.4.0" @@ -4851,6 +4894,12 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c5/3f/b0e8db149896005adc938a1e7f371d6d7e9eca4053a29b108978ed15e0c2/types_python_dateutil-2.9.0.20250516-py3-none-any.whl", hash = "sha256:2b2b3f57f9c6a61fba26a9c0ffb9ea5681c9b83e69cd897c6b5f668d9c0cab93", size = 14356, upload-time = "2025-05-16T03:06:57.249Z" }, ] +[[package]] +name = "typing" +version = "3.7.4.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/05/d9/6eebe19d46bd05360c9a9aae822e67a80f9242aabbfc58b641b957546607/typing-3.7.4.3.tar.gz", hash = "sha256:1187fb9c82fd670d10aa07bbb6cfcfe4bdda42d6fab8d5134f04e8c4d0b71cc9", size = 78592, upload-time = "2020-07-13T23:19:43.758Z" } + [[package]] name = "typing-extensions" version = "4.14.0"