From 8ac2b02e94c759b676a937bbdb413275409de7b0 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Wed, 3 Dec 2025 19:53:00 +0000 Subject: [PATCH] Refactor: Clean up codebase and improve documentation - Removed dead and incomplete code from the `rtichoke` package, including the unused `calibration` module and the R-dependent `send_post_request_to_r_rtichoke` helper. - Cleaned up the main `__init__.py` file to remove unused imports and exports. - Updated the `README.md` with a comprehensive description of the package's features and purpose. - Corrected the `pyproject.toml` to use the standard `[project.optional-dependencies]` for development dependencies. - Added a `try-except` block to `__init__.py` to handle `PackageNotFoundError` during testing. --- README.md | 22 ++ pyproject.toml | 2 +- src/rtichoke/__init__.py | 14 +- src/rtichoke/calibration/calibration.py | 286 ------------------ .../send_post_request_to_r_rtichoke.py | 196 ------------ src/rtichoke/rtichoke.py | 0 6 files changed, 28 insertions(+), 492 deletions(-) delete mode 100644 src/rtichoke/calibration/calibration.py delete mode 100644 src/rtichoke/helpers/send_post_request_to_r_rtichoke.py delete mode 100644 src/rtichoke/rtichoke.py diff --git a/README.md b/README.md index 43d65fd..610eaa9 100644 --- a/README.md +++ b/README.md @@ -1 +1,23 @@ # rtichoke_python + +`rtichoke_python` is a Python package for interactive visualization of performance metrics for binary classification models. It provides a set of tools to create, plot, and analyze various discrimination and utility curves, helping data scientists and machine learning engineers assess and compare model performance. + +## Features + +- **Discrimination Curves**: + - Receiver Operating Characteristic (ROC) + - Precision-Recall + - Lift + - Gains + +- **Utility Curves**: + - Decision Curve Analysis (DCA) + +- **Interactive Plots**: + - Generate interactive plots using Plotly for in-depth analysis. + +- **Performance Data**: + - Easily prepare and handle performance data for plotting. + +- **Summary Reports**: + - Create summary reports to quickly assess model performance. diff --git a/pyproject.toml b/pyproject.toml index 10ba1ce..23cc2aa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ version = "0.1.22" description = "interactive visualizations for performance of predictive models" readme = "README.md" -[dependency-groups] +[project.optional-dependencies] dev = [ "jupyter<2.0.0,>=1.0.0", "myst-nb<1.0.0,>=0.17.1; python_version ~= \"3.9\"", diff --git a/src/rtichoke/__init__.py b/src/rtichoke/__init__.py index c60435b..5b5f156 100644 --- a/src/rtichoke/__init__.py +++ b/src/rtichoke/__init__.py @@ -1,8 +1,11 @@ """rtichoke is a package for interactive vizualization of performance metrics""" -from importlib.metadata import version +from importlib.metadata import PackageNotFoundError, version -__version__ = version("rtichoke") +try: + __version__ = version("rtichoke") +except PackageNotFoundError: + __version__ = "0.0.0" from rtichoke.discrimination.roc import create_roc_curve as create_roc_curve from rtichoke.discrimination.roc import plot_roc_curve as plot_roc_curve @@ -20,21 +23,15 @@ from rtichoke.discrimination.gains import create_gains_curve as create_gains_curve from rtichoke.discrimination.gains import plot_gains_curve as plot_gains_curve -# from rtichoke.calibration.calibration import ( -# create_calibration_curve as create_calibration_curve, -# ) - from rtichoke.utility.decision import create_decision_curve as create_decision_curve from rtichoke.utility.decision import plot_decision_curve as plot_decision_curve from rtichoke.performance_data.performance_data import ( prepare_performance_data as prepare_performance_data, - prepare_binned_classification_data as prepare_binned_classification_data, ) from rtichoke.performance_data.performance_data_times import ( prepare_performance_data_times as prepare_performance_data_times, - prepare_binned_classification_data_times as prepare_binned_classification_data_times, ) from rtichoke.summary_report.summary_report import ( @@ -50,7 +47,6 @@ "plot_precision_recall_curve", "create_gains_curve", "plot_gains_curve", - "create_calibration_curve", "create_decision_curve", "plot_decision_curve", "prepare_performance_data", diff --git a/src/rtichoke/calibration/calibration.py b/src/rtichoke/calibration/calibration.py deleted file mode 100644 index f9d32f3..0000000 --- a/src/rtichoke/calibration/calibration.py +++ /dev/null @@ -1,286 +0,0 @@ -""" -A module for Calibration Curves -""" - -from typing import Any, Dict, List, Optional - -# import pandas as pd -import plotly.graph_objects as go -from plotly.subplots import make_subplots -from plotly.graph_objs._figure import Figure -# from rtichoke.helpers.send_post_request_to_r_rtichoke import send_requests_to_rtichoke_r - - -def create_calibration_curve( - probs: Dict[str, List[float]], - reals: Dict[str, List[int]], - calibration_type: str = "discrete", - size: Optional[int] = None, - color_values: Optional[List[str]] = [ - "#1b9e77", - "#d95f02", - "#7570b3", - "#e7298a", - "#07004D", - "#E6AB02", - "#FE5F55", - "#54494B", - "#006E90", - "#BC96E6", - "#52050A", - "#1F271B", - "#BE7C4D", - "#63768D", - "#08A045", - "#320A28", - "#82FF9E", - "#2176FF", - "#D1603D", - "#585123", - ], - url_api: str = "http://localhost:4242/", -) -> Figure: - """Creates Calibration Curve - - Args: - probs (Dict[str, List[float]]): _description_ - reals (Dict[str, List[int]]): _description_ - calibration_type (str, optional): _description_. Defaults to "discrete". - size (Optional[int], optional): _description_. Defaults to None. - color_values (List[str], optional): _description_. Defaults to None. - url_api (_type_, optional): _description_. Defaults to "http://localhost:4242/". - - Returns: - Figure: _description_ - """ - pass - - # rtichoke_response = send_requests_to_rtichoke_r( - # dictionary_to_send={ - # "probs": probs, - # "reals": reals, - # "size": size, - # "color_values ": color_values, - # }, - # url_api=url_api, - # endpoint="create_calibration_curve_list", - # ) - - # calibration_curve_list = rtichoke_response.json() - - # calibration_curve_list["deciles_dat"] = pd.DataFrame.from_dict( - # calibration_curve_list["deciles_dat"] - # ) - # calibration_curve_list["smooth_dat"] = pd.DataFrame.from_dict( - # calibration_curve_list["smooth_dat"] - # ) - # calibration_curve_list["reference_data"] = pd.DataFrame.from_dict( - # calibration_curve_list["reference_data"] - # ) - # calibration_curve_list["histogram_for_calibration"] = pd.DataFrame.from_dict( - # calibration_curve_list["histogram_for_calibration"] - # ) - - # calibration_curve = create_plotly_curve_from_calibration_curve_list( - # calibration_curve_list=calibration_curve_list, calibration_type=calibration_type - # ) - - # return calibration_curve - - -def create_plotly_curve_from_calibration_curve_list( - calibration_curve_list: Dict[str, Any], calibration_type: str = "discrete" -) -> Figure: - """Create plotly curve from calibration curve list - - Args: - calibration_curve_list (Dict[str, Any]): _description_ - calibration_type (str, optional): _description_. Defaults to "discrete". - - Returns: - Figure: _description_ - """ - calibration_curve = make_subplots( - rows=2, cols=1, shared_xaxes=True, x_title="Predicted", row_heights=[0.8, 0.2] - ) - - calibration_curve.update_layout( - { - "xaxis": {"showgrid": False}, - "yaxis": {"showgrid": False}, - "barmode": "overlay", - "plot_bgcolor": "rgba(0, 0, 0, 0)", - "legend": { - "orientation": "h", - "xanchor": "center", - "yanchor": "top", - "x": 0.5, - "y": 1.3, - "bgcolor": "rgba(0, 0, 0, 0)", - }, - "showlegend": calibration_curve_list["performance_type"][0] != "one model", - } - ) - - calibration_curve.add_trace( - go.Scatter( - x=calibration_curve_list["reference_data"]["x"].values.tolist(), - y=calibration_curve_list["reference_data"]["y"].values.tolist(), - hovertext=calibration_curve_list["reference_data"]["text"].values.tolist(), - name="Perfectly Calibrated", - legendgroup="Perfectly Calibrated", - hoverinfo="text", - line={ - "width": 2, - "dash": "dot", - "color": calibration_curve_list["group_colors_vec"]["reference_line"][ - 0 - ], - }, - showlegend=False, - ), - row=1, - col=1, - ) - - if calibration_type == "discrete": - for reference_group in list(calibration_curve_list["group_colors_vec"].keys()): - if any( - calibration_curve_list["deciles_dat"]["reference_group"] - == reference_group - ): - calibration_curve.add_trace( - go.Scatter( - x=calibration_curve_list["deciles_dat"]["x"][ - calibration_curve_list["deciles_dat"]["reference_group"] - == reference_group - ].values.tolist(), - y=calibration_curve_list["deciles_dat"]["y"][ - calibration_curve_list["deciles_dat"]["reference_group"] - == reference_group - ].values.tolist(), - hovertext=calibration_curve_list["deciles_dat"]["text"][ - calibration_curve_list["deciles_dat"]["reference_group"] - == reference_group - ].values.tolist(), - name=reference_group, - legendgroup=reference_group, - hoverinfo="text", - mode="lines+markers", - marker={ - "size": 10, - "color": calibration_curve_list["group_colors_vec"][ - reference_group - ][0], - }, - ), - row=1, - col=1, - ) - - if calibration_type == "smooth": - for reference_group in list(calibration_curve_list["group_colors_vec"].keys()): - if any( - calibration_curve_list["smooth_dat"]["reference_group"] - == reference_group - ): - calibration_curve.add_trace( - go.Scatter( - x=calibration_curve_list["smooth_dat"]["x"][ - calibration_curve_list["smooth_dat"]["reference_group"] - == reference_group - ].values.tolist(), - y=calibration_curve_list["smooth_dat"]["y"][ - calibration_curve_list["smooth_dat"]["reference_group"] - == reference_group - ].values.tolist(), - hovertext=calibration_curve_list["smooth_dat"]["text"][ - calibration_curve_list["smooth_dat"]["reference_group"] - == reference_group - ].values.tolist(), - name=reference_group, - legendgroup=reference_group, - hoverinfo="text", - mode="lines", - marker={ - "size": 10, - "color": calibration_curve_list["group_colors_vec"][ - reference_group - ][0], - }, - ), - row=1, - col=1, - ) - - for reference_group in list(calibration_curve_list["group_colors_vec"].keys()): - if any( - calibration_curve_list["histogram_for_calibration"]["reference_group"] - == reference_group - ): - calibration_curve.add_trace( - go.Bar( - x=calibration_curve_list["histogram_for_calibration"]["mids"][ - calibration_curve_list["histogram_for_calibration"][ - "reference_group" - ] - == reference_group - ].values.tolist(), - y=calibration_curve_list["histogram_for_calibration"]["counts"][ - calibration_curve_list["histogram_for_calibration"][ - "reference_group" - ] - == reference_group - ].values.tolist(), - hovertext=calibration_curve_list["histogram_for_calibration"][ - "text" - ][ - calibration_curve_list["histogram_for_calibration"][ - "reference_group" - ] - == reference_group - ].values.tolist(), - name=reference_group, - width=0.01, - legendgroup=reference_group, - hoverinfo="text", - marker_color=calibration_curve_list["group_colors_vec"][ - reference_group - ][0], - showlegend=False, - opacity=calibration_curve_list["histogram_opacity"][0], - ), - row=2, - col=1, - ) - - print(calibration_curve_list["axes_ranges"]["xaxis"]) - - calibration_curve.update_xaxes( - zeroline=True, - range=calibration_curve_list["axes_ranges"]["xaxis"], - zerolinewidth=1, - zerolinecolor="black", - fixedrange=False, - ) - calibration_curve.update_yaxes( - zeroline=True, - range=calibration_curve_list["axes_ranges"]["yaxis"], - zerolinewidth=1, - zerolinecolor="black", - fixedrange=False, - row=1, - col=1, - ) - calibration_curve.update_yaxes(title="Observed", row=1, col=1) - - print("size") - print(calibration_curve_list["size"]) - print(calibration_curve_list["size"][0]) - - calibration_curve.update_layout( - width=calibration_curve_list["size"][0][0], - height=calibration_curve_list["size"][0][0], - ) - - return calibration_curve diff --git a/src/rtichoke/helpers/send_post_request_to_r_rtichoke.py b/src/rtichoke/helpers/send_post_request_to_r_rtichoke.py deleted file mode 100644 index aaca6f3..0000000 --- a/src/rtichoke/helpers/send_post_request_to_r_rtichoke.py +++ /dev/null @@ -1,196 +0,0 @@ -""" -A module for sending post requests to rtichoke r api -""" - -# import requests -import pandas as pd -from rtichoke.helpers.exported_functions import create_plotly_curve - - -def send_requests_to_rtichoke_r(dictionary_to_send, url_api, endpoint): - """Send requests to rtichoke r - - Args: - dictionary_to_send (_type_): _description_ - url_api (_type_): _description_ - endpoint (_type_): _description_ - - Returns: - _type_: _description_ - """ - pass - # rtichoke_response = requests.post(f"{url_api}{endpoint}", json=dictionary_to_send) - - # return rtichoke_response - - -def create_rtichoke_curve( - probs, - reals, - by, - stratified_by, - size, - color_values=None, - url_api="http://localhost:4242/", - curve="roc", - min_p_threshold=0, - max_p_threshold=1, -): - """Create rtichoke curve - - Args: - probs (_type_): _description_ - reals (_type_): _description_ - by (_type_): _description_ - stratified_by (_type_): _description_ - size (_type_): _description_ - color_values (_type_, optional): _description_. Defaults to None. - url_api (str, optional): _description_. Defaults to "http://localhost:4242/". - curve (str, optional): _description_. Defaults to "roc". - min_p_threshold (int, optional): _description_. Defaults to 0. - max_p_threshold (int, optional): _description_. Defaults to 1. - - Returns: - _type_: _description_ - """ - if color_values is None: - color_values = [ - "#1b9e77", - "#d95f02", - "#7570b3", - "#e7298a", - "#07004D", - "#E6AB02", - "#FE5F55", - "#54494B", - "#006E90", - "#BC96E6", - "#52050A", - "#1F271B", - "#BE7C4D", - "#63768D", - "#08A045", - "#320A28", - "#82FF9E", - "#2176FF", - "#D1603D", - "#585123", - ] - - rtichoke_response = send_requests_to_rtichoke_r( - dictionary_to_send={ - "probs": probs, - "reals": reals, - "curve": curve, - "by": by, - "stratified_by": stratified_by, - "size": size, - "color_values": color_values, - "min_p_threshold": min_p_threshold, - "max_p_threshold": max_p_threshold, - }, - url_api=url_api, - endpoint="create_rtichoke_curve_list", - ) - - rtichoke_curve_list = rtichoke_response.json() - - if rtichoke_curve_list["size"][0] is None: - rtichoke_curve_list["size"] = [[None]] - - rtichoke_curve_list["reference_data"] = pd.DataFrame.from_dict( - rtichoke_curve_list["reference_data"] - ) - rtichoke_curve_list["performance_data_ready_for_curve"] = pd.DataFrame.from_dict( - rtichoke_curve_list["performance_data_ready_for_curve"] - ) - rtichoke_curve_list["performance_data_for_interactive_marker"] = ( - pd.DataFrame.from_dict( - rtichoke_curve_list["performance_data_for_interactive_marker"] - ) - ) - - fig = create_plotly_curve(rtichoke_curve_list) - - return fig - - -def plot_rtichoke_curve( - performance_data, - size=None, - color_values=None, - url_api="http://localhost:4242/", - curve="roc", - min_p_threshold=0, - max_p_threshold=1, -): - """plot rtichoke curve - - Args: - performance_data (_type_): _description_ - size (_type_, optional): _description_. Defaults to None. - color_values (_type_, optional): _description_. Defaults to None. - url_api (str, optional): _description_. Defaults to "http://localhost:4242/". - curve (str, optional): _description_. Defaults to "roc". - min_p_threshold (int, optional): _description_. Defaults to 0. - max_p_threshold (int, optional): _description_. Defaults to 1. - - Returns: - _type_: _description_ - """ - if color_values is None: - color_values = [ - "#1b9e77", - "#d95f02", - "#7570b3", - "#e7298a", - "#07004D", - "#E6AB02", - "#FE5F55", - "#54494B", - "#006E90", - "#BC96E6", - "#52050A", - "#1F271B", - "#BE7C4D", - "#63768D", - "#08A045", - "#320A28", - "#82FF9E", - "#2176FF", - "#D1603D", - "#585123", - ] - rtichoke_response = send_requests_to_rtichoke_r( - dictionary_to_send={ - "performance_data": performance_data.to_json(orient="records"), - "curve": curve, - "size": size, - "color_values": color_values, - "min_p_threshold": min_p_threshold, - "max_p_threshold": max_p_threshold, - }, - url_api=url_api, - endpoint="plot_rtichoke_curve_list", - ) - - rtichoke_curve_list = rtichoke_response.json() - - if rtichoke_curve_list["size"][0] is None: - rtichoke_curve_list["size"] = [[None]] - - rtichoke_curve_list["reference_data"] = pd.DataFrame.from_dict( - rtichoke_curve_list["reference_data"] - ) - rtichoke_curve_list["performance_data_ready_for_curve"] = pd.DataFrame.from_dict( - rtichoke_curve_list["performance_data_ready_for_curve"] - ) - rtichoke_curve_list["performance_data_for_interactive_marker"] = ( - pd.DataFrame.from_dict( - rtichoke_curve_list["performance_data_for_interactive_marker"] - ) - ) - - fig = create_plotly_curve(rtichoke_curve_list) - - return fig diff --git a/src/rtichoke/rtichoke.py b/src/rtichoke/rtichoke.py deleted file mode 100644 index e69de29..0000000