[RAPTOR-12324] Using logging as best practice in model_templates (#1407)

peterzdeb · web-flow · commit 9f6c8f3f624a · 2025-05-06T12:18:11.000+03:00
* [RAPTOR-12324] Using logging as best practive in model_templates

* [RAPTOR-12324] updated log formatting
diff --git a/model_templates/gpu_nim_textgen/custom.py b/model_templates/gpu_nim_textgen/custom.py
@@ -3,7 +3,7 @@
 
 
 def load_model(code_dir: str):
-    # print(f"Downloading model to {MODEL_DIR}...")
+    # logger.info("Downloading model to %s...", MODEL_DIR)
 
     # Here is where you can put code that downloads the model artifacts
     # from an internal source. See the official documentation for more details:
diff --git a/model_templates/gpu_vllm_textgen/custom.py b/model_templates/gpu_vllm_textgen/custom.py
@@ -3,7 +3,7 @@
 
 
 def load_model(code_dir: str):
-    # print(f"Downloading model to {MODEL_DIR}...")
+    # logger.info("Downloading model to %s...", MODEL_DIR)
 
     # Add custom code to download supported OSS LLM here, otherwise we will
     # download the weights from the HuggingFace Hub based on the model name
diff --git a/model_templates/python3_pytorch_multiclass/custom.py b/model_templates/python3_pytorch_multiclass/custom.py
@@ -8,6 +8,7 @@
 """
 This example shows how to create a multiclass neural net with pytorch
 """
+import logging
 import os
 import pickle
 from typing import List, Optional, Any, Dict
@@ -20,6 +21,10 @@
 from preprocessing import dense_preprocessing_pipeline
 from model_utils import build_classifier, train_classifier, save_torch_model
 
+
+logger = logging.getLogger(__name__)
+
+
 preprocessor = None
 
 
@@ -76,12 +81,12 @@ def fit(
         so that the trained object can be used during scoring.
     """
 
-    print("Fitting Preprocessing pipeline")
+    logger.info("Fitting Preprocessing pipeline")
     preprocessor = dense_preprocessing_pipeline.fit(X)
     lb = LabelEncoder().fit(y)
 
     # write out the class labels file
-    print("Serializing preprocessor and class labels")
+    logger.info("Serializing preprocessor and class labels")
     with open(os.path.join(output_dir, "class_labels.txt"), mode="w") as f:
         f.write("\n".join(str(label) for label in lb.classes_))
 
@@ -93,15 +98,15 @@ def fit(
     with open(os.path.join(output_dir, "preprocessor.pkl"), mode="wb") as f:
         pickle.dump(preprocessor, f)
 
-    print("Transforming input data")
+    logger.info("Transforming input data")
     X = preprocessor.transform(X)
     y = lb.transform(y)
 
     # For reproducible results
     torch.manual_seed(0)
 
     estimator, optimizer, criterion = build_classifier(X, len(lb.classes_))
-    print("Training classifier")
+    logger.info("Training classifier")
     train_classifier(X, y, estimator, optimizer, criterion)
     artifact_name = "artifact.pth"
     save_torch_model(estimator, output_dir, artifact_name)
diff --git a/model_templates/python3_sklearn_runtime_params/custom.py b/model_templates/python3_sklearn_runtime_params/custom.py
@@ -4,47 +4,55 @@
 This is proprietary source code of DataRobot, Inc. and its affiliates.
 Released under the terms of DataRobot Tool and Utility Agreement.
 """
+import logging
+
 # Use this helper class to access the runtime parameter values in your model
 from datarobot_drum import RuntimeParameters
 
 
+logger = logging.getLogger(__name__)
+
+
 # This is a naive function so as to not dump the full credential values
 # during this demonstration.
 def mask(value, visible=3):
     return value[:visible] + ("*" * len(value[visible:]))
 
 
 def transform(data, model):
-    print("=" * 40)
-    print("Loading the following Runtime Parameters:")
+    logger.info("=" * 40)
     option1 = RuntimeParameters.get("option1")
-    print(f"\toption1: {option1}")
     option2 = RuntimeParameters.get("option2")
-    print(f"\toption2: {option2}")
     option3 = RuntimeParameters.get("option3")
-    print(f"\toption3: {option3}")
+    logger.info(
+        "Loading the following Runtime Parameters: "
+        f"option1: {option1}, option2: {option2}, option3: {option3}",
+    )
 
     credential = RuntimeParameters.get("encryption_key")
     if credential is not None:
         credential_type = credential.pop("credentialType")
-        print(
-            f"\tapi_key(type={credential_type}): "
-            + str({k: mask(v) for k, v in credential.items()})
+        logger.info(
+            "Using credentials api_key: ",
+            extra={
+                "credential_type": credential_type,
+                "api_key": str({k: mask(v) for k, v in credential.items()}),
+            },
         )
     else:
-        print("No credential data set")
+        logger.info("No credential data set")
 
     # boolean runtime param
     bool_var = RuntimeParameters.get("bool_var")
-    print(f"\tbool_var: {bool_var}")
+    logger.info("\tbool_var: %s", bool_var)
 
     # numeric runtime param
     number1 = RuntimeParameters.get("number1")
-    print(f"\tnumber1: {number1}")
+    logger.info("\tnumber1: %s", number1)
     number2 = RuntimeParameters.get("number2")
-    print(f"\tnumber2: {number2}")
+    logger.info("\tnumber2: %s", number2)
 
-    print("=" * 40)
+    logger.info("=" * 40)
 
     # This transform function is just for illustrative purposes so just
     # return the data back unaltered.
diff --git a/model_templates/python3_unstructured/custom.py b/model_templates/python3_unstructured/custom.py
@@ -4,27 +4,31 @@
 This is proprietary source code of DataRobot, Inc. and its affiliates.
 Released under the terms of DataRobot Tool and Utility Agreement.
 """
+import logging
 
 from io import BytesIO
 import werkzeug
 
 
+logger = logging.getLogger(__name__)
+
+
 def load_model(input_dir):
     return "dummy"
 
 
 def score_unstructured(model, data, query, **kwargs):
-    print("Model: ", model)
-    print("Incoming content type params: ", kwargs)
-    print("Incoming data type: ", type(data))
-    print("Incoming data: ", data)
+    logger.info("Running scoring for unstructured model: %s", model)
+    logger.info("Incoming content type params: %s", kwargs)
+    logger.info("Incoming data type: %s", type(data))
+    logger.info("Incoming data: %s", data)
 
     mlops = kwargs.get("mlops")
-    print(f"MLOps supported: {mlops is not None}")
+    logger.info("MLOps supported: %s", mlops is not None)
 
     headers = kwargs.get("headers")
-    print("Incoming request headers: ", headers)
-    print("Incoming query params: ", query)
+    logger.info("Incoming request headers: %s", headers)
+    logger.info("Incoming query params: %s", query)
 
     if headers and "multipart/form-data" in headers.get("Content-Type"):
         # For more information refer to:
diff --git a/model_templates/python3_unstructured_with_mlops_reporting/custom.py b/model_templates/python3_unstructured_with_mlops_reporting/custom.py
@@ -9,27 +9,30 @@
 the usage of the `mlops` instance.
 The
 """
+import logging
 
 import pickle
 import time
 from pathlib import Path
-import sys
 
 import pandas as pd
 import tempfile
 
 
+logger = logging.getLogger(__name__)
+
+
 def load_model(input_dir):
     model_path = str(Path(input_dir) / "model.pkl")
-    print(f"Loading model: {model_path}")
+    logger.info("Loading model: %s", model_path)
     return pickle.load(open(model_path, "rb"))
 
 
 def score_unstructured(model, data, query, **kwargs):
-    print(f"Model: {model} ", flush=True)
-    print(f"Incoming data type: {type(data)}", flush=True)
-    print(f"Incoming kwargs: {kwargs}", flush=True)
-    print(f"Incoming query params: {query}", flush=True)
+    logger.info("Running scoring for unstructured model: %s", model)
+    logger.info("Incoming data type: %s", type(data))
+    logger.info("Incoming kwargs: %s", kwargs)
+    logger.info("Incoming query params: %s", query)
 
     # The 'mlops' instance is available only when the 'MLOPS_REPORTING_FROM_UNSTRUCTURED_MODELS'
     # feature-flag is enabled.
@@ -51,7 +54,7 @@ def score_unstructured(model, data, query, **kwargs):
             (end_time - start_time) * 1000,  # Prediction execution's time
         )
     else:
-        print("Skip mlops reporting because mlops is not enabled.", flush=True)
+        logger.info("Skip mlops reporting because mlops is not enabled.")
 
     reporting_predictions = _prepare_reporting_predictions(predictions_array)
 
diff --git a/model_templates/python_multi_codejen_reg_class/custom.py b/model_templates/python_multi_codejen_reg_class/custom.py
@@ -4,9 +4,14 @@
 This is proprietary source code of DataRobot, Inc. and its affiliates.
 Released under the terms of DataRobot Tool and Utility Agreement.
 """
+import logging
+
 from datarobot_drum.drum.language_predictors.java_predictor.java_predictor import JavaPredictor
 
 
+logger = logging.getLogger(__name__)
+
+
 class ScoringCodePredictor(JavaPredictor):
     def __init__(
         self,
@@ -65,7 +70,7 @@ def score(data, model, **kwargs):
         pred_df2 = model[1].predict(data)
         print(pred_df2)
     except Exception as e:
-        print(e)
+        logger.error(e, exc_info=True)
 
     # Predict and print on regression predictor
     pred_df3 = model[2].predict(data)
diff --git a/model_templates/triton_onnx_unstructured/client/datarobot-predict.py b/model_templates/triton_onnx_unstructured/client/datarobot-predict.py
@@ -7,6 +7,7 @@
 We highly recommend that you update SSL certificates with:
     pip install -U "urllib3[secure]" certifi
 """
+import logging
 import sys
 from json import JSONDecoder
 
@@ -17,6 +18,10 @@
 import requests
 import json
 
+
+logger = logging.getLogger(__name__)
+
+
 # See README.md on how to set up those keys
 API_URL = "<DATAROBOT_API_URL>"
 API_KEY = "<DATAROBOT_API_KEY>"
@@ -135,18 +140,18 @@ def main(filename, deployment_id, mimetype, charset):
 
     data_size = sys.getsizeof(data)
     if data_size >= MAX_PREDICTION_FILE_SIZE_BYTES:
-        print(
-            ("Input file is too large: {} bytes. " "Max allowed size is: {} bytes.").format(
-                data_size, MAX_PREDICTION_FILE_SIZE_BYTES
-            )
+        logger.warning(
+            "Input file is too large: %s bytes. Max allowed size is: %s bytes.",
+            data_size,
+            MAX_PREDICTION_FILE_SIZE_BYTES,
         )
         return 1
     try:
         response = make_datarobot_deployment_unstructured_predictions(
             data, deployment_id, mimetype, charset
         )
     except DataRobotPredictionError as exc:
-        print(exc)
+        logger.error(exc, exc_info=True)
         return 1
 
     predictions = binary_response_as_numpy(response)