Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions google/cloud/aiplatform/metadata/_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import os
import pickle
import tempfile
import warnings
from typing import Any, Dict, Optional, Sequence, Union

from google.auth import credentials as auth_credentials
Expand Down Expand Up @@ -147,6 +148,13 @@ def _load_sklearn_model(
f"You are using sklearn {sklearn.__version__}."
"Attempting to load model..."
)

warnings.warn(
"Loading a scikit-learn model via pickle is insecure. "
"Ensure the model artifact is from a trusted source.",
RuntimeWarning
)

with open(model_file, "rb") as f:
sk_model = pickle.load(f)

Expand Down
5 changes: 4 additions & 1 deletion google/cloud/aiplatform/prediction/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,15 @@ def __init__(self):
return

@abstractmethod
def load(self, artifacts_uri: str) -> None:
def load(self, artifacts_uri: str, **kwargs) -> None:
"""Loads the model artifact.

Args:
artifacts_uri (str):
Required. The value of the environment variable AIP_STORAGE_URI.
**kwargs:
Optional. Additional keyword arguments for security or
configuration (e.g., allowed_extensions).
"""
pass

Expand Down
31 changes: 28 additions & 3 deletions google/cloud/aiplatform/prediction/sklearn/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import numpy as np
import os
import pickle
import warnings

from google.cloud.aiplatform.constants import prediction
from google.cloud.aiplatform.utils import prediction_utils
Expand All @@ -31,29 +32,53 @@ class SklearnPredictor(Predictor):
def __init__(self):
return

def load(self, artifacts_uri: str) -> None:
def load(self, artifacts_uri: str, **kwargs) -> None:
"""Loads the model artifact.

Args:
artifacts_uri (str):
Required. The value of the environment variable AIP_STORAGE_URI.
**kwargs:
Optional. Additional keyword arguments for security or
configuration. Supported arguments:
allowed_extensions (list[str]):
The allowed file extensions for model artifacts.
If not provided, a UserWarning is issued.

Raises:
ValueError: If there's no required model files provided in the artifacts
uri.
"""

allowed_extensions = kwargs.get("allowed_extensions", None)

if allowed_extensions is None:
warnings.warn(
"No 'allowed_extensions' provided. Loading model artifacts from "
"untrusted sources may lead to remote code execution.",
UserWarning
)

prediction_utils.download_model_artifacts(artifacts_uri)
if os.path.exists(prediction.MODEL_FILENAME_JOBLIB):
self._model = joblib.load(prediction.MODEL_FILENAME_JOBLIB)
elif os.path.exists(prediction.MODEL_FILENAME_PKL):
elif os.path.exists(prediction.MODEL_FILENAME_PKL) and prediction_utils.is_allowed(
filename=prediction.MODEL_FILENAME_PKL,
allowed_extensions=allowed_extensions
):
warnings.warn(
f"Loading {prediction.MODEL_FILENAME_PKL} using pickle, which is unsafe. "
"Only load files from trusted sources.",
RuntimeWarning
)
self._model = pickle.load(open(prediction.MODEL_FILENAME_PKL, "rb"))
else:
valid_filenames = [
prediction.MODEL_FILENAME_JOBLIB,
prediction.MODEL_FILENAME_PKL,
]
raise ValueError(
f"One of the following model files must be provided: {valid_filenames}."
f"One of the following model files must be provided and allowed: {valid_filenames}."
)

def preprocess(self, prediction_input: dict) -> np.ndarray:
Expand Down
46 changes: 41 additions & 5 deletions google/cloud/aiplatform/prediction/xgboost/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import logging
import os
import pickle
import warnings

import numpy as np
import xgboost as xgb
Expand All @@ -34,21 +35,48 @@ class XgboostPredictor(Predictor):
def __init__(self):
return

def load(self, artifacts_uri: str) -> None:
def load(self, artifacts_uri: str, **kwargs) -> None:
"""Loads the model artifact.

Args:
artifacts_uri (str):
Required. The value of the environment variable AIP_STORAGE_URI.
**kwargs:
Optional. Additional keyword arguments for security or
configuration. Supported arguments:
allowed_extensions (list[str]):
The allowed file extensions for model artifacts.
If not provided, a UserWarning is issued.

Raises:
ValueError: If there's no required model files provided in the artifacts
uri.
"""
allowed_extensions = kwargs.get("allowed_extensions", None)

if allowed_extensions is None:
warnings.warn(
"No 'allowed_extensions' provided. Loading model artifacts from "
"untrusted sources may lead to remote code execution.",
UserWarning,
)

prediction_utils.download_model_artifacts(artifacts_uri)
if os.path.exists(prediction.MODEL_FILENAME_BST):

if os.path.exists(prediction.MODEL_FILENAME_BST) and prediction_utils.is_allowed(
filename=prediction.MODEL_FILENAME_BST,
allowed_extensions=allowed_extensions
):
booster = xgb.Booster(model_file=prediction.MODEL_FILENAME_BST)
elif os.path.exists(prediction.MODEL_FILENAME_JOBLIB):
elif os.path.exists(prediction.MODEL_FILENAME_JOBLIB) and prediction_utils.is_allowed(
filename=prediction.MODEL_FILENAME_JOBLIB,
allowed_extensions=allowed_extensions
):
warnings.warn(
f"Loading {prediction.MODEL_FILENAME_JOBLIB} using joblib (pickle), which is unsafe. "
"Only load files from trusted sources.",
RuntimeWarning,
)
try:
booster = joblib.load(prediction.MODEL_FILENAME_JOBLIB)
except KeyError:
Expand All @@ -58,7 +86,15 @@ def load(self, artifacts_uri: str) -> None:
)
booster = xgb.Booster()
booster.load_model(prediction.MODEL_FILENAME_JOBLIB)
elif os.path.exists(prediction.MODEL_FILENAME_PKL):
elif os.path.exists(prediction.MODEL_FILENAME_PKL) and prediction_utils.is_allowed(
filename=prediction.MODEL_FILENAME_PKL,
allowed_extensions=allowed_extensions
):
warnings.warn(
f"Loading {prediction.MODEL_FILENAME_PKL} using pickle, which is unsafe. "
"Only load files from trusted sources.",
RuntimeWarning,
)
booster = pickle.load(open(prediction.MODEL_FILENAME_PKL, "rb"))
else:
valid_filenames = [
Expand All @@ -67,7 +103,7 @@ def load(self, artifacts_uri: str) -> None:
prediction.MODEL_FILENAME_PKL,
]
raise ValueError(
f"One of the following model files must be provided: {valid_filenames}."
f"One of the following model files must be provided and allowed: {valid_filenames}."
)
self._booster = booster

Expand Down
8 changes: 8 additions & 0 deletions google/cloud/aiplatform/utils/prediction_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,3 +174,11 @@ def add_flex_start_to_dedicated_resources(
dedicated_resources.flex_start = gca_machine_resources_compat.FlexStart(
max_runtime_duration=duration_pb2.Duration(seconds=max_runtime_duration)
)


def is_allowed(
filename: str, allowed_extensions: Optional[list[str]]
) -> bool:
if allowed_extensions is None:
return True
return any(filename.endswith(ext) for ext in allowed_extensions)
78 changes: 78 additions & 0 deletions tests/unit/aiplatform/test_prediction_security.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# -*- coding: utf-8 -*-

# Copyright 2026 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import pytest
from unittest import mock
from google.cloud.aiplatform.prediction.xgboost.predictor import XgboostPredictor
from google.cloud.aiplatform.prediction.sklearn.predictor import SklearnPredictor


class TestPredictorSecurity:
@pytest.mark.parametrize("predictor_class", [XgboostPredictor, SklearnPredictor])
def test_load_warns_no_allowed_extensions(self, predictor_class):
"""Verifies UserWarning is issued when allowed_extensions is missing."""
predictor = predictor_class()
with mock.patch("google.cloud.aiplatform.aiplatform.utils.prediction_utils.download_model_artifacts"):
with mock.patch("os.path.exists", return_value=True):
with mock.patch("joblib.load"), \
mock.patch("pickle.load"), \
mock.patch("google.cloud.aiplatform.aiplatform.prediction.xgboost.predictor.xgb.Booster"), \
mock.patch("builtins.open", mock.mock_open()):
with pytest.warns(UserWarning, match="No 'allowed_extensions' provided"):
predictor.load("gs://test-bucket")

def test_xgboost_load_warns_on_joblib(self):
"""Verifies RuntimeWarning is issued when loading a .joblib file."""
predictor = XgboostPredictor()
with mock.patch(
"google.cloud.aiplatform.aiplatform.utils.prediction_utils.download_model_artifacts"
):
with mock.patch(
"os.path.exists", side_effect=lambda p: p.endswith(".joblib")
):
with mock.patch("joblib.load"):
with pytest.warns(
RuntimeWarning, match="using joblib \(pickle\), which is unsafe"
):
predictor.load("gs://test-bucket", allowed_extensions=[".joblib"])

def test_xgboost_load_raises_not_allowed(self):
"""Verifies ValueError is raised if the file exists but is not allowed."""
predictor = XgboostPredictor()
with mock.patch(
"google.cloud.aiplatform.aiplatform.utils.prediction_utils.download_model_artifacts"
):
with mock.patch("google.cloud.aiplatform.aiplatform.prediction.xgboost.predictor.xgb.Booster"):
with mock.patch("os.path.exists", side_effect=lambda p: p.endswith(".pkl")):
with pytest.raises(ValueError, match="must be provided and allowed"):
predictor.load("gs://test-bucket", allowed_extensions=[".bst"])

def test_sklearn_load_warns_on_pickle(self):
"""Verifies RuntimeWarning is issued when loading a .pkl file."""
predictor = SklearnPredictor()
with mock.patch(
"google.cloud.aiplatform.aiplatform.utils.prediction_utils.download_model_artifacts"
):
with mock.patch("os.path.exists", side_effect=lambda p: p.endswith(".pkl")):
with mock.patch("builtins.open", mock.mock_open()):
with mock.patch("pickle.load"):
with pytest.warns(
RuntimeWarning, match="using pickle, which is unsafe"
):
predictor.load(
"gs://test-bucket", allowed_extensions=[".pkl"]
)
Loading