[RAPTOR-13895] Implement inline predictor based on DRUM score (#1504)

klichukb · web-flow · commit 121a0c38eca9 · 2025-06-23T00:45:18.000+03:00
* Demo

* .

* .

* .

* .

* .

* .

* .

* .

* .

* .

* .

* .

* Add moderations as dep to tests

* type annotation

* Move moderations to a separate file and bump resources for harness

* No moderation test for now, its triples the duration of the test suite

* Proper skip

* .
diff --git a/custom_model_runner/datarobot_drum/drum/common.py b/custom_model_runner/datarobot_drum/drum/common.py
@@ -4,6 +4,7 @@
 This is proprietary source code of DataRobot, Inc. and its affiliates.
 Released under the terms of DataRobot Tool and Utility Agreement.
 """
+
 import logging
 import os
 import sys
@@ -21,6 +22,8 @@
     PayloadFormat,
 )
 from datarobot_drum.drum.exceptions import DrumCommonException
+from datarobot_drum.drum.lazy_loading.lazy_loading_handler import LazyLoadingHandler
+from datarobot_drum.runtime_parameters.runtime_parameters import RuntimeParametersLoader
 from opentelemetry import trace, context
 from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
 from opentelemetry.sdk.resources import Resource
@@ -229,3 +232,12 @@ def extract_chat_response_attributes(response):
         # last completion wins
         attributes["gen_ai.completion"] = m.get("content")
     return attributes
+
+
+def setup_required_environment_variables(options):
+    if "runtime_params_file" in options and options.runtime_params_file:
+        loader = RuntimeParametersLoader(options.runtime_params_file, options.code_dir)
+        loader.setup_environment_variables()
+
+    if "lazy_loading_file" in options and options.lazy_loading_file:
+        LazyLoadingHandler.setup_environment_variables_from_values_file(options.lazy_loading_file)
diff --git a/custom_model_runner/datarobot_drum/drum/drum.py b/custom_model_runner/datarobot_drum/drum/drum.py
@@ -803,10 +803,7 @@ def _prepare_prediction_server_or_batch_pipeline(self, run_language):
 
         return DrumUtils.render_file(functional_pipeline_filepath, replace_data)
 
-    def _run_predictions(self, stats_collector: Optional[StatsCollector] = None):
-        if self.run_mode not in [RunMode.SCORE, RunMode.SERVER]:
-            raise NotImplemented(f"The given run mode is supported here: {self.run_mode}")
-
+    def get_predictor_params(self):
         run_language = self._check_artifacts_and_get_run_language()
         infra_pipeline_str = self._prepare_prediction_server_or_batch_pipeline(run_language)
 
@@ -815,12 +812,17 @@ def _run_predictions(self, stats_collector: Optional[StatsCollector] = None):
             raise DrumCommonException("Pipeline is empty")
         if "arguments" not in pipeline["pipe"][0]:
             raise DrumCommonException("Arguments are missing in the pipeline")
+        return pipeline["pipe"][0]["arguments"]
+
+    def _run_predictions(self, stats_collector: Optional[StatsCollector] = None):
+        if self.run_mode not in [RunMode.SCORE, RunMode.SERVER]:
+            raise NotImplemented(f"The given run mode is supported here: {self.run_mode}")
 
         self.logger.info(
             f">>> Start {ArgumentsOptions.MAIN_COMMAND} in the {self.run_mode.value} mode"
         )
 
-        params = pipeline["pipe"][0]["arguments"]
+        params = self.get_predictor_params()
         predictor = None
         try:
             if stats_collector:
diff --git a/custom_model_runner/datarobot_drum/drum/main.py b/custom_model_runner/datarobot_drum/drum/main.py
@@ -4,6 +4,7 @@
 This is proprietary source code of DataRobot, Inc. and its affiliates.
 Released under the terms of DataRobot Tool and Utility Agreement.
 """
+
 from datarobot_drum.drum.lazy_loading.lazy_loading_handler import LazyLoadingHandler
 
 #!/usr/bin/env python3
@@ -43,12 +44,15 @@
 import sys
 
 from datarobot_drum.drum.args_parser import CMRunnerArgsRegistry
-from datarobot_drum.drum.common import config_logging, setup_tracer
+from datarobot_drum.drum.common import (
+    config_logging,
+    setup_tracer,
+    setup_required_environment_variables,
+)
 from datarobot_drum.drum.enum import RunMode
 from datarobot_drum.drum.enum import ExitCodes
 from datarobot_drum.drum.exceptions import DrumSchemaValidationException
 from datarobot_drum.drum.runtime import DrumRuntime
-from datarobot_drum.runtime_parameters.exceptions import RuntimeParameterException
 from datarobot_drum.runtime_parameters.runtime_parameters import (
     RuntimeParametersLoader,
     RuntimeParameters,
@@ -92,7 +96,12 @@ def signal_handler(sig, frame):
 
         options = arg_parser.parse_args()
         CMRunnerArgsRegistry.verify_options(options)
-        _setup_required_environment_variables(options)
+
+        try:
+            setup_required_environment_variables(options)
+        except Exception as exc:
+            print(str(exc))
+            exit(255)
 
         if RuntimeParameters.has("CUSTOM_MODEL_WORKERS"):
             options.max_workers = RuntimeParameters.get("CUSTOM_MODEL_WORKERS")
@@ -112,24 +121,5 @@ def signal_handler(sig, frame):
             sys.exit(ExitCodes.SCHEMA_VALIDATION_ERROR.value)
 
 
-def _setup_required_environment_variables(options):
-    if "runtime_params_file" in options and options.runtime_params_file:
-        try:
-            loader = RuntimeParametersLoader(options.runtime_params_file, options.code_dir)
-            loader.setup_environment_variables()
-        except RuntimeParameterException as exc:
-            print(str(exc))
-            exit(255)
-
-    if "lazy_loading_file" in options and options.lazy_loading_file:
-        try:
-            LazyLoadingHandler.setup_environment_variables_from_values_file(
-                options.lazy_loading_file
-            )
-        except Exception as exc:
-            print(str(exc))
-            exit(255)
-
-
 if __name__ == "__main__":
     main()
diff --git a/custom_model_runner/datarobot_drum/drum/root_predictors/drum_inline_utils.py b/custom_model_runner/datarobot_drum/drum/root_predictors/drum_inline_utils.py
@@ -0,0 +1,81 @@
+"""
+Copyright 2025 DataRobot, Inc. and its affiliates.
+All rights reserved.
+This is proprietary source code of DataRobot, Inc. and its affiliates.
+Released under the terms of DataRobot Tool and Utility Agreement.
+
+Example:
+
+import json
+
+payload = json.loads(open("input.json", "r").read())
+code_dir = (
+    '/datarobot-user-models/model_templates/python3_dummy_chat'
+)
+
+with drum_inline_predictor(target_type=TargetType.AGENTIC_WORKFLOW.value, custom_model_dir=code_dir,
+                           target_name='response') as predictor:
+    result = predictor.chat(payload)
+    print(result)
+
+"""
+
+import contextlib
+import os
+import tempfile
+from typing import Generator, List
+
+from datarobot_drum.drum.args_parser import CMRunnerArgsRegistry
+from datarobot_drum.drum.common import setup_required_environment_variables, setup_tracer
+from datarobot_drum.drum.drum import CMRunner
+from datarobot_drum.drum.language_predictors.base_language_predictor import BaseLanguagePredictor
+from datarobot_drum.drum.runtime import DrumRuntime
+from datarobot_drum.drum.root_predictors.generic_predictor import GenericPredictorComponent
+from datarobot_drum.runtime_parameters.runtime_parameters import RuntimeParameters
+
+
+@contextlib.contextmanager
+def drum_inline_predictor(
+    target_type: str, custom_model_dir: str, target_name: str, *cmd_args: List[str]
+) -> Generator[BaseLanguagePredictor, None, None]:
+    """
+    Drum run for a custom model code definition. Yields a predictor, ready to work with.
+    Caller can work with the predictor directly.
+
+    :param target_type: Target type.
+    :param custom_model_dir: Directory where the custom model code artifacts are located.
+    :param target_name: Name of the target
+    :param cmd_args: Extra command line arguments
+    :return:
+    """
+    with DrumRuntime() as runtime, tempfile.NamedTemporaryFile(mode="wb") as tf:
+        # setup
+
+        os.environ["TARGET_NAME"] = target_name
+        arg_parser = CMRunnerArgsRegistry.get_arg_parser()
+        CMRunnerArgsRegistry.extend_sys_argv_with_env_vars()
+        args = [
+            "score",
+            "--code-dir",
+            custom_model_dir,
+            # regular score is actually a CLI thing, so it expects input/output,
+            # we can ignore these as we hand over the predictor directly to the caller to do I/O.
+            "--input",
+            tf.name,
+            "--output",
+            tf.name,
+            "--target-type",
+            target_type,
+            *cmd_args,
+        ]
+        options = arg_parser.parse_args(args)
+        CMRunnerArgsRegistry.verify_options(options)
+        setup_required_environment_variables(options)
+
+        runtime.options = options
+        setup_tracer(RuntimeParameters, options)
+        runtime.cm_runner = CMRunner(runtime)
+        params = runtime.cm_runner.get_predictor_params()
+        predictor = GenericPredictorComponent(params)
+
+        yield predictor.predictor
diff --git a/custom_model_runner/datarobot_drum/drum/root_predictors/generic_predictor.py b/custom_model_runner/datarobot_drum/drum/root_predictors/generic_predictor.py
@@ -5,7 +5,6 @@
 Released under the terms of DataRobot Tool and Utility Agreement.
 """
 import urllib
-from typing import Optional
 
 import werkzeug
 from datarobot_drum.drum.adapters.cli.drum_score_adapter import DrumScoreAdapter
@@ -39,6 +38,10 @@ def __init__(self, params: dict):
         )
         self._predictor = self._setup_predictor()
 
+    @property
+    def predictor(self):
+        return self._predictor
+
     def _setup_predictor(self):
         if self._run_language == RunLanguage.PYTHON:
             from datarobot_drum.drum.language_predictors.python_predictor.python_predictor import (
@@ -92,7 +95,6 @@ def _setup_predictor(self):
 
     def materialize(self):
         output_filename = self._params.get("output_filename")
-
         if self.cli_adapter.target_type == TargetType.UNSTRUCTURED:
             # TODO: add support to use cli_adapter for unstructured
             return self._materialize_unstructured(
diff --git a/requirements_test.txt b/requirements_test.txt
@@ -9,5 +9,6 @@ retry
 scikit-learn==1.3.2
 scipy>=1.1,<2
 urllib3>=1.25.0,<2.0.0
+openai>=1.55.3
 # strictly not needed for testing, but used when updating environment
-bson
+bson
diff --git a/tests/fixtures/python3_dummy_chat/custom.py b/tests/fixtures/python3_dummy_chat/custom.py
@@ -0,0 +1,100 @@
+"""
+Copyright 2025 DataRobot, Inc. and its affiliates.
+All rights reserved.
+This is proprietary source code of DataRobot, Inc. and its affiliates.
+Released under the terms of DataRobot Tool and Utility Agreement.
+"""
+import calendar
+import time
+from typing import Iterator
+
+from openai.types.chat import ChatCompletion
+from openai.types.chat import ChatCompletionChunk
+from openai.types.chat import ChatCompletionMessage
+from openai.types.chat import CompletionCreateParams
+from openai.types.chat.chat_completion import Choice
+from openai.types.model import Model
+
+from datarobot_drum import RuntimeParameters
+
+"""
+This example shows how to create a text generation model supporting OpenAI chat
+"""
+
+from typing import Any, Dict
+
+
+def get_supported_llm_models(model: Any):
+    """
+    Return a list of supported LLM models; response to /v1/models and OpenAI models.list().
+    If custom.py does not define this function, DRUM will return a list of either:
+    * the model defined in the LLM_ID runtime parameter, if that exists, or:
+    * an empty list
+
+    Parameters
+    ----------
+    model: a model ID to compare against; optional
+
+    Returns: list of openai.types.model.Model
+    -------
+
+    """
+    return [
+        Model(
+            id="datarobot_llm_id",
+            created=1744854432,
+            object="model",
+            owned_by="tester@datarobot.com",
+        )
+    ]
+
+
+def load_model(code_dir: str) -> Any:
+    """
+    Can be used to load supported models if your model has multiple artifacts, or for loading
+    models that **drum** does not natively support
+
+    Parameters
+    ----------
+    code_dir : is the directory where model artifact and additional code are provided, passed in
+
+    Returns
+    -------
+    If used, this hook must return a non-None value
+    """
+    return "dummy"
+
+
+def chat(
+    completion_create_params: CompletionCreateParams, model: Any
+) -> ChatCompletion | Iterator[ChatCompletionChunk]:
+    """
+    This hook supports chat completions; see https://platform.openai.com/docs/api-reference/chat/create.
+    In this non-streaming example, the "LLM" echoes back the user's prompt,
+    acting as the model specified  in the chat completion request.
+
+    Parameters
+    ----------
+    completion_create_params: the chat completion request.
+    model: the deserialized model loaded by DRUM or by `load_model`, if supplied
+
+    Returns: a chat completion.
+    -------
+
+    """
+    model = completion_create_params["model"]
+    message_content = "Echo: " + completion_create_params["messages"][0]["content"]
+
+    return ChatCompletion(
+        id="association_id",
+        choices=[
+            Choice(
+                finish_reason="stop",
+                index=0,
+                message=ChatCompletionMessage(role="assistant", content=message_content),
+            )
+        ],
+        created=calendar.timegm(time.gmtime()),
+        model=model,
+        object="chat.completion",
+    )
diff --git a/tests/fixtures/python3_dummy_chat/moderation_config.yaml b/tests/fixtures/python3_dummy_chat/moderation_config.yaml
@@ -0,0 +1,15 @@
+guards:
+- description: Track the number of tokens associated with the input to the LLM, and/or  retrieved
+    text from the vector database.
+  name: prompt_tokens
+  ootb_type: token_count
+  stage: prompt
+  type: ootb
+- description: Track the number of tokens associated with the input to the LLM, and/or  retrieved
+    text from the vector database.
+  name: response_tokens
+  ootb_type: token_count
+  stage: response
+  type: ootb
+timeout_action: score
+timeout_sec: 60
diff --git a/tests/functional/test_drum_inline_utils.py b/tests/functional/test_drum_inline_utils.py