From 4bba07cb5a6fe5aad58e4dd03330a58d3f970849 Mon Sep 17 00:00:00 2001 From: Maksym Shokaliuk Date: Tue, 13 May 2025 20:47:29 +0300 Subject: [PATCH 1/2] TEMPLATE-268 Adding Base ENV with Python311 and Nodejs 2215 to run with notebooks --- .../python311_node22/Dockerfile | 169 +++++++++ .../python311_node22/README.md | 25 ++ .../python311_node22/agent/README.md | 2 + .../python311_node22/agent/agent.py | 62 ++++ .../python311_node22/agent/cgroup_watchers.py | 172 +++++++++ .../agent/requirements-agent.txt | 2 + .../python311_node22/common-user-limits.sh | 9 + .../python311_node22/dr_requirements.txt | 9 + .../python311_node22/env_info.json | 9 + .../python311_node22/extensions/README.md | 2 + .../extensions/dataframe_formatter.py | 331 ++++++++++++++++++ .../python311_node22/ipython_config.py | 10 + .../jupyter_kernel_gateway_config.py | 26 ++ .../python311_node22/kernel.json | 15 + .../python311_node22/requirements.txt | 3 + .../python311_node22/setup-prompt.sh | 6 + .../python311_node22/setup-ssh.sh | 19 + .../python311_node22/setup-venv.sh | 66 ++++ .../python311_node22/sshd_config | 130 +++++++ .../python311_node22/start_server.sh | 47 +++ .../python311_node22/start_server_drum.sh | 18 + 21 files changed, 1132 insertions(+) create mode 100644 public_dropin_apps_environments/python311_node22/Dockerfile create mode 100644 public_dropin_apps_environments/python311_node22/README.md create mode 100644 public_dropin_apps_environments/python311_node22/agent/README.md create mode 100644 public_dropin_apps_environments/python311_node22/agent/agent.py create mode 100644 public_dropin_apps_environments/python311_node22/agent/cgroup_watchers.py create mode 100644 public_dropin_apps_environments/python311_node22/agent/requirements-agent.txt create mode 100644 public_dropin_apps_environments/python311_node22/common-user-limits.sh create mode 100644 public_dropin_apps_environments/python311_node22/dr_requirements.txt create mode 100644 public_dropin_apps_environments/python311_node22/env_info.json create mode 100644 public_dropin_apps_environments/python311_node22/extensions/README.md create mode 100644 public_dropin_apps_environments/python311_node22/extensions/dataframe_formatter.py create mode 100644 public_dropin_apps_environments/python311_node22/ipython_config.py create mode 100644 public_dropin_apps_environments/python311_node22/jupyter_kernel_gateway_config.py create mode 100644 public_dropin_apps_environments/python311_node22/kernel.json create mode 100644 public_dropin_apps_environments/python311_node22/requirements.txt create mode 100644 public_dropin_apps_environments/python311_node22/setup-prompt.sh create mode 100644 public_dropin_apps_environments/python311_node22/setup-ssh.sh create mode 100644 public_dropin_apps_environments/python311_node22/setup-venv.sh create mode 100644 public_dropin_apps_environments/python311_node22/sshd_config create mode 100644 public_dropin_apps_environments/python311_node22/start_server.sh create mode 100644 public_dropin_apps_environments/python311_node22/start_server_drum.sh diff --git a/public_dropin_apps_environments/python311_node22/Dockerfile b/public_dropin_apps_environments/python311_node22/Dockerfile new file mode 100644 index 000000000..dc73c9f8e --- /dev/null +++ b/public_dropin_apps_environments/python311_node22/Dockerfile @@ -0,0 +1,169 @@ +# Copyright 2024 DataRobot, Inc. and its affiliates. +# All rights reserved. +# DataRobot, Inc. Confidential. +# This is unpublished proprietary source code of DataRobot, Inc. +# and its affiliates. +# The copyright notice above does not evidence any actual or intended +# publication of such source code. + + +################### !NOTA BENE! ####################### +# All the files, parameters and packages are necessary # +# for the proper functioning of Notebooks. # +# If needed, you can include any system package # +# that will be installed through microdnf or # +# add a required package to the requirements.txt file. # +# Please note that removing predefined packages # +# may result in issues with Notebooks functionality. # +########################################################### + +ARG WORKDIR=/etc/system/kernel +ARG AGENTDIR=/etc/system/kernel/agent +ARG VENV_PATH=${WORKDIR}/.venv + +ARG UNAME=notebooks +ARG UID=10101 +ARG GID=10101 + +# You can specify a different python version here +# be sure that package available in microdnf repo +# to check use this bash commands: +# ```bash``` +# docker run --rm -it registry.access.redhat.com/ubi9/ubi-minimal:9.5 bash +# microdnf repoquery python3* +# ``` +ARG PYTHON_VERSION=3.11 +ARG PYTHON_EXACT_VERSION=3.11.9 + +FROM registry.access.redhat.com/ubi9/ubi-minimal:9.5 AS base +# some globally required dependencies + +ARG UNAME +ARG UID +ARG GID +ARG WORKDIR +ARG AGENTDIR +ARG VENV_PATH +ARG PYTHON_VERSION +ARG PYTHON_EXACT_VERSION +ARG GIT_COMMIT + +# Set the SHELL option -o pipefail before RUN with a pipe in it. +# Rationale: https://github.com/hadolint/hadolint/wiki/DL4006 +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +# Add any package that will be installed on system level here: +RUN echo "[Python$PYTHON_VERSION] GIT_COMMIT: $GIT_COMMIT" \ + && microdnf update -y \ + && microdnf install -y python$PYTHON_VERSION-$PYTHON_EXACT_VERSION python3-devel-3.9.21 \ + gcc-11.5.0 gcc-c++-11.5.0 glib2-devel-2.68.4 libffi-devel-3.4.2 graphviz-2.44.0 python$PYTHON_VERSION-pip \ + openblas-0.3.26 python$PYTHON_VERSION-scipy shadow-utils passwd git openssh-server tar gzip unzip zip wget \ + java-11-openjdk-headless vim-minimal nano \ + && microdnf clean all + +ENV PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 \ + VENV_PATH=${VENV_PATH} \ + PIP_NO_CACHE_DIR=1 \ + NOTEBOOKS_KERNEL="python" + +ENV PATH="$VENV_PATH/bin:$PATH" \ + PYTHONPATH="/home/notebooks/.ipython/extensions:/home/notebooks/storage" + +RUN python3 -m venv ${VENV_PATH} && pip3 install -U pip setuptools +WORKDIR ${WORKDIR} + +# Install git helper binary used for private git authentication in Notebooks/Codepaces +RUN curl -L -o drgithelper https://github.com/datarobot-oss/drgithelper/releases/download/v0.0.9/drgithelper && chmod +x drgithelper + +COPY ./agent/agent.py ./agent/cgroup_watchers.py ${AGENTDIR}/ +COPY ./jupyter_kernel_gateway_config.py ./start_server.sh ${WORKDIR}/ +COPY ./ipython_config.py /etc/ipython/ +COPY ./extensions /etc/ipython/extensions + +# Adding SSHD requirements +COPY ./sshd_config /etc/ssh/ +RUN cp -a /etc/ssh /etc/ssh.cache && rm -rf /var/cache/apk/* +RUN mkdir /etc/authorized_keys + +# Custom user to run the image from +RUN groupadd -g $GID -o $UNAME && \ + useradd -l -m -u $UID -g $GID -o -s /bin/bash $UNAME + +# Prompt customizations +COPY ./setup-prompt.sh /etc/profile.d/setup-prompt.sh + +RUN curl -fsSL https://rpm.nodesource.com/setup_22.x | bash - && \ + microdnf install -y nodejs && \ + microdnf clean all + +# remove microdnf +RUN microdnf remove microdnf -y + +# additional setup scripts +COPY ./setup-ssh.sh ./common-user-limits.sh ./setup-venv.sh ${WORKDIR}/ + +# Adding SSHD requirements +RUN chown -R $UNAME:$UNAME ${WORKDIR} ${VENV_PATH} /home/notebooks /etc/ssh /etc/authorized_keys \ + # sshd prep + && touch /etc/profile.d/notebooks-load-env.sh \ + && chown -R $UNAME:$UNAME /etc/profile.d/notebooks-load-env.sh \ + # Limit max processes + && touch /etc/profile.d/bash-profile-load.sh \ + && chown -R $UNAME:$UNAME /etc/profile.d/bash-profile-load.sh + +USER $UNAME + +# Jupyter Gateway port +EXPOSE 8888 +# sshd port +EXPOSE 22 + +FROM base AS minimal +# this stage has only bare minimal of dependencies installed to optimize build time for the local development + +ARG WORKDIR +ARG VENV_PATH + +COPY ./dr_requirements.txt ./agent/requirements-agent.txt ${WORKDIR}/ +RUN python3 -m pip install --no-cache-dir -r ${WORKDIR}/dr_requirements.txt \ + && python3 -m pip install --no-cache-dir -r ${WORKDIR}/requirements-agent.txt \ + && rm ${WORKDIR}/dr_requirements.txt \ + && rm ${WORKDIR}/requirements-agent.txt \ + && rm ${VENV_PATH}/share/jupyter/kernels/python3/kernel.json \ + && chmod a+x ${WORKDIR}/start_server.sh + +# Monitoring agent port +EXPOSE 8889 + +FROM minimal AS builder +# this stage includes all data science dependencies we want to have in the kernel runtime out of the box + +ARG WORKDIR +ARG VENV_PATH +ARG PYTHON_VERSION + +COPY ./kernel.json ${VENV_PATH}/share/jupyter/kernels/python3/ +COPY ./requirements.txt ${WORKDIR}/ +RUN pip3 install --no-cache-dir -r ${WORKDIR}/requirements.txt \ + && rm ${WORKDIR}/requirements.txt + +FROM base AS kernel +# this stage is what actually going to be run as kernel image and it's clean from all build junks + +ARG UNAME + +ARG WORKDIR + +ARG GIT_COMMIT + +LABEL com.datarobot.repo-name="notebooks" +LABEL com.datarobot.repo-sha=$GIT_COMMIT + +RUN chown -R $UNAME:$UNAME ${WORKDIR} /home/notebooks + +COPY --from=builder --chown=$UNAME $WORKDIR $WORKDIR + +# This is required for custom models to work with this image +COPY ./start_server_drum.sh /opt/code/start_server.sh +ENV HOME=/opt CODE_DIR=/opt/code ADDRESS=0.0.0.0:8080 diff --git a/public_dropin_apps_environments/python311_node22/README.md b/public_dropin_apps_environments/python311_node22/README.md new file mode 100644 index 000000000..4603da396 --- /dev/null +++ b/public_dropin_apps_environments/python311_node22/README.md @@ -0,0 +1,25 @@ +# Python 3.11 with NodeJS 22.15 Base Notebook Drop-In Template Environment + +This template environment can be used to create custom notebook environments based on Python 3.11 and Node.js 22.15. + +## Supported Libraries + +This environment is built for Python 3.11 and includes only the minimal required dependencies. It also supports managing React-based applications with NodeJS for use within DataRobot Notebooks. + +## Instructions + +1. Update [requirements](requirements.txt) to add your custom libraries supported by Python 3.11. +2. From the terminal, run: + + ``` + tar -czvf py311_notebook_dropin.tar.gz -C /path/to/public_dropin_notebook_environments/python311_notebook_base/ . + ``` + +3. Using either the API or from the UI create a new Custom Environment with the tarball created in step 2. + +### Using this environment in notebooks + +Upon successful build, the custom environment can be used in notebooks, by selecting it +from `Session environment` > `Environment` in the notebook sidebar. + +Please see [DataRobot documentation](https://docs.datarobot.com/en/docs/workbench/wb-notebook/wb-code-nb/wb-env-nb.html#custom-environment-images) for more information. diff --git a/public_dropin_apps_environments/python311_node22/agent/README.md b/public_dropin_apps_environments/python311_node22/agent/README.md new file mode 100644 index 000000000..731a699df --- /dev/null +++ b/public_dropin_apps_environments/python311_node22/agent/README.md @@ -0,0 +1,2 @@ +This folder contains dependencies required to use this custom environment for DataRobot Notebooks. +Please do not modify or delete this folder from your Docker context. diff --git a/public_dropin_apps_environments/python311_node22/agent/agent.py b/public_dropin_apps_environments/python311_node22/agent/agent.py new file mode 100644 index 000000000..4250dab62 --- /dev/null +++ b/public_dropin_apps_environments/python311_node22/agent/agent.py @@ -0,0 +1,62 @@ +# Copyright 2024 DataRobot, Inc. and its affiliates. +# All rights reserved. +# DataRobot, Inc. Confidential. +# This is unpublished proprietary source code of DataRobot, Inc. +# and its affiliates. +# The copyright notice above does not evidence any actual or intended +# publication of such source code. + +import asyncio + +from websockets.exceptions import ConnectionClosedOK, ConnectionClosedError + +from cgroup_watchers import ( + CGroupFileReader, + CGroupWatcher, + DummyWatcher, + SystemWatcher, + CGroupVersionUnsupported, +) +from fastapi import FastAPI, WebSocket +import logging +import ecs_logging + +logger = logging.getLogger("kernel_agent") + +logger.setLevel(logging.DEBUG) +handler = logging.StreamHandler() +handler.setFormatter(ecs_logging.StdlibFormatter()) +logger.addHandler(handler) + +app = FastAPI() + +try: + watcher = CGroupWatcher(CGroupFileReader(), SystemWatcher()) +except CGroupVersionUnsupported: + logger.warning("CGroup Version Unsupported. Dummy utilization will be broadcasted") + watcher = DummyWatcher() + + +@app.websocket_route("/ws") +async def websocket_endpoint(websocket: WebSocket): + await websocket.accept() + + try: + while True: + await websocket.send_json( + { + "cpu_percent": watcher.cpu_usage_percentage(), + "mem_percent": watcher.memory_usage_percentage(), + } + ) + + await asyncio.sleep(3) + except ConnectionClosedError: + logger.warning( + "utilization consumer unconnected", + extra={"connection": websocket.client}, + exc_info=True, + ) + except ConnectionClosedOK: + # https://github.com/encode/starlette/issues/759 + logger.info("utilization consumer unconnected", extra={"connection": websocket.client}) diff --git a/public_dropin_apps_environments/python311_node22/agent/cgroup_watchers.py b/public_dropin_apps_environments/python311_node22/agent/cgroup_watchers.py new file mode 100644 index 000000000..38112c4c7 --- /dev/null +++ b/public_dropin_apps_environments/python311_node22/agent/cgroup_watchers.py @@ -0,0 +1,172 @@ +# Copyright 2024 DataRobot, Inc. and its affiliates. +# All rights reserved. +# DataRobot, Inc. Confidential. +# This is unpublished proprietary source code of DataRobot, Inc. +# and its affiliates. +# The copyright notice above does not evidence any actual or intended +# publication of such source code. +import re +import time +from pathlib import Path + +import psutil # type: ignore + +# Parts of this code have been reused from repo: +# https://github.com/neptune-ai/neptune-client/blob/master/LICENSE + +NANO_SECS = 10**9 + + +class CGroupVersionUnsupported(Exception): + """There are two versions of CGroups, the agent is compatible with V1 only. + This error occurs when the agent was tried to be ran in V2""" + + +class SystemWatcher: + @staticmethod + def cpu_count() -> int: + return psutil.cpu_count() + + @staticmethod + def cpu_percent() -> float: + return psutil.cpu_percent() + + @staticmethod + def virtual_memory(): + return psutil.virtual_memory() + + +class CGroupFileReader: + def __init__(self) -> None: + cgroup_memory_dir = self._cgroup_mount_dir(subsystem="memory") + cgroup_cpu_dir = self._cgroup_mount_dir(subsystem="cpu") + cgroup_cpuacct_dir = self._cgroup_mount_dir(subsystem="cpuacct") + + self._memory_usage_file = cgroup_memory_dir / "memory.stat" + self._memory_limit_file = cgroup_memory_dir / "memory.limit_in_bytes" + + self._cpu_period_file = cgroup_cpu_dir / "cpu.cfs_period_us" + self._cpu_quota_file = cgroup_cpu_dir / "cpu.cfs_quota_us" + + self._cpuacct_usage_file = cgroup_cpuacct_dir / "cpuacct.usage" + + def memory_usage_in_bytes(self) -> int: + memory_stat_str = self._memory_usage_file.read_text() + total_rss_str = next( + iter([stat for stat in memory_stat_str.split("\n") if stat.startswith("total_rss")]), + "0", + ) + total_rss = int(total_rss_str.split(" ")[-1]) + return total_rss + + def memory_limit_in_bytes(self) -> int: + return self._read_metric(self._memory_limit_file) + + def cpu_quota_micros(self) -> int: + return self._read_metric(self._cpu_quota_file) + + def cpu_period_micros(self) -> int: + return self._read_metric(self._cpu_period_file) + + def cpuacct_usage_nanos(self) -> int: + return self._read_metric(self._cpuacct_usage_file) + + def _read_metric(self, filename: Path) -> int: + with open(filename) as f: + return int(f.read()) + + def _cgroup_mount_dir(self, subsystem: str) -> Path: + """ + :param subsystem: cgroup subsystem like memory, cpu etc. + :return: directory where subsystem is mounted + """ + try: + with open("/proc/mounts", "r") as f: + for line in f.readlines(): + split_line = re.split(r"\s+", line) + mount_dir = split_line[1] + + if "cgroup" in mount_dir: + dirname = mount_dir.split("/")[-1] + subsystems = dirname.split(",") + + if subsystem in subsystems: + return Path(mount_dir) + except FileNotFoundError: + ... + + raise CGroupVersionUnsupported + + +class BaseWatcher: + def cpu_usage_percentage(self) -> float: + raise NotImplementedError + + def memory_usage_percentage(self) -> float: + raise NotImplementedError + + +class CGroupWatcher(BaseWatcher): + def __init__(self, cgroup_file_reader: CGroupFileReader, system_watcher: SystemWatcher) -> None: + self._cgroup_file_reader = cgroup_file_reader + self._system_watcher = system_watcher + + self._last_cpu_usage_ts_nanos = 0.0 + self._last_cpu_cum_usage_nanos = 0.0 + + def memory_usage_in_bytes(self) -> float: + return self._cgroup_file_reader.memory_usage_in_bytes() + + def memory_limit_in_bytes(self) -> float: + cgroup_mem_limit = self._cgroup_file_reader.memory_limit_in_bytes() + total_virtual_memory = self._system_watcher.virtual_memory().total + return min(cgroup_mem_limit, total_virtual_memory) + + def memory_usage_percentage(self) -> float: + return round(self.memory_usage_in_bytes() / self.memory_limit_in_bytes() * 100, 2) + + def cpu_usage_limit_in_cores(self) -> float: + cpu_quota_micros = self._cgroup_file_reader.cpu_quota_micros() + + if cpu_quota_micros == -1: + return float(self._system_watcher.cpu_count()) + else: + cpu_period_micros = self._cgroup_file_reader.cpu_period_micros() + return float(cpu_quota_micros) / float(cpu_period_micros) + + def cpu_usage_percentage(self) -> float: + current_timestamp_nanos = time.time() * NANO_SECS + cpu_cum_usage_nanos = self._cgroup_file_reader.cpuacct_usage_nanos() + + if self._is_first_measurement(): + current_usage = 0.0 + else: + usage_diff = cpu_cum_usage_nanos - self._last_cpu_cum_usage_nanos + time_diff = current_timestamp_nanos - self._last_cpu_usage_ts_nanos + current_usage = ( + float(usage_diff) / float(time_diff) / self.cpu_usage_limit_in_cores() * 100.0 + ) + + self._last_cpu_usage_ts_nanos = current_timestamp_nanos + self._last_cpu_cum_usage_nanos = cpu_cum_usage_nanos + + # In case the cpu usage exceeds the limit, we need to limit it + return round(self._limit(current_usage, lower_limit=0.0, upper_limit=100.0), 2) + + def _is_first_measurement(self) -> bool: + return self._last_cpu_usage_ts_nanos is None or self._last_cpu_cum_usage_nanos is None + + @staticmethod + def _limit(value: float, lower_limit: float, upper_limit: float) -> float: + return max(lower_limit, min(value, upper_limit)) + + +class DummyWatcher(BaseWatcher): + def __init__(self): + self._system_watcher = SystemWatcher() + + def cpu_usage_percentage(self) -> float: + return self._system_watcher.cpu_percent() + + def memory_usage_percentage(self) -> float: + return self._system_watcher.virtual_memory().percent diff --git a/public_dropin_apps_environments/python311_node22/agent/requirements-agent.txt b/public_dropin_apps_environments/python311_node22/agent/requirements-agent.txt new file mode 100644 index 000000000..f483b8109 --- /dev/null +++ b/public_dropin_apps_environments/python311_node22/agent/requirements-agent.txt @@ -0,0 +1,2 @@ +fastapi[All]==0.111.0 +psutil==5.9.8 diff --git a/public_dropin_apps_environments/python311_node22/common-user-limits.sh b/public_dropin_apps_environments/python311_node22/common-user-limits.sh new file mode 100644 index 000000000..eba90c7f5 --- /dev/null +++ b/public_dropin_apps_environments/python311_node22/common-user-limits.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +echo "Generating common bash profile..." +{ + echo "#!/bin/bash" + echo "# Setting user process limits." + echo "ulimit -Su 2048" + echo "ulimit -Hu 2048" +} > /etc/profile.d/bash-profile-load.sh diff --git a/public_dropin_apps_environments/python311_node22/dr_requirements.txt b/public_dropin_apps_environments/python311_node22/dr_requirements.txt new file mode 100644 index 000000000..771cb9620 --- /dev/null +++ b/public_dropin_apps_environments/python311_node22/dr_requirements.txt @@ -0,0 +1,9 @@ +setuptools==70.0.0 +ecs-logging==2.0.0 +jupyter-client==7.4.9 +jupyter_kernel_gateway==2.5.2 +jupyter_core==5.2.0 +ipykernel==6.28.0 +pandas==1.5.1 +numpy<2.0.0 +mistune==2.0.4 diff --git a/public_dropin_apps_environments/python311_node22/env_info.json b/public_dropin_apps_environments/python311_node22/env_info.json new file mode 100644 index 000000000..18c387d7e --- /dev/null +++ b/public_dropin_apps_environments/python311_node22/env_info.json @@ -0,0 +1,9 @@ +{ + "id": "6807b77d70d573235153b370", + "name": "[Experimental] Python 3.12 + NodeJS 22.15 Base ENV", + "description": "This is experimental base environment that includes: \n- Python 3.12.9 \n - NodeJs v22.15", + "programmingLanguage": "python", + "environmentVersionId": "6807c1653f295b1c8f79edb6", + "isPublic": false, + "useCases": ["notebook", "customApplication"] +} diff --git a/public_dropin_apps_environments/python311_node22/extensions/README.md b/public_dropin_apps_environments/python311_node22/extensions/README.md new file mode 100644 index 000000000..731a699df --- /dev/null +++ b/public_dropin_apps_environments/python311_node22/extensions/README.md @@ -0,0 +1,2 @@ +This folder contains dependencies required to use this custom environment for DataRobot Notebooks. +Please do not modify or delete this folder from your Docker context. diff --git a/public_dropin_apps_environments/python311_node22/extensions/dataframe_formatter.py b/public_dropin_apps_environments/python311_node22/extensions/dataframe_formatter.py new file mode 100644 index 000000000..d889a5535 --- /dev/null +++ b/public_dropin_apps_environments/python311_node22/extensions/dataframe_formatter.py @@ -0,0 +1,331 @@ +# Copyright 2022 DataRobot, Inc. and its affiliates. +# All rights reserved. +# DataRobot, Inc. Confidential. +# This is unpublished proprietary source code of DataRobot, Inc. +# and its affiliates. +# The copyright notice above does not evidence any actual or intended +# publication of such source code. + +# -*- coding: utf-8 -*- +import json +import sys +import traceback +from enum import Enum +from typing import Any, Callable, Dict, List, Optional, Union, cast + +from IPython.core.formatters import BaseFormatter +from IPython.core.magic import Magics +from pydantic import BaseModel +from traitlets import ObjectName, Unicode + +is_pandas_loaded = True + +try: + from pandas import DataFrame, io +except ImportError: + is_pandas_loaded = False + + +class Entity(BaseModel): + """ + Base class for data transfer objects + """ + + class Config: + allow_population_by_field_name = True + + +class DataframePaginationAttributes(Entity): + limit: int + offset: int + + +class DataframeAggregationParams(Entity): + group_by: str + aggregate_by: str + aggregation_func: str + + +class DataframeFilterParams(Entity): + filter_by: Optional[str] + filter: str + + +class DataframesProcessSteps(str, Enum): + CHART_CELL_DATAFRAME = "chart_cell_dataframe" + AGGREGATION = "aggregation" + PAGINATION = "pagination" + SORTING = "sorting" + GET_COLUMNS = "get_columns" + DEFAULT = "get_columns" + + +Columns = List[Dict[str, Any]] + +DEFAULT_INDEX_KEY = "index" + + +def _register_exception( + e: Exception, + step: str, +) -> Dict[str, Any]: + exc_info = sys.exc_info() + traceback_msg = traceback.format_exception(*exc_info) + + return { + "step": step, + "message": str(e), + "traceback": traceback_msg, + } + + +def _validate_columns(data: DataFrame) -> None: + """To prevent failing some DataFrame process steps like columns extraction + and converting to json we need ensure that columns dtypes can be converted + + Args: + data (DataFrame): in-memory DataFrame + + Returns: + None + """ + convertable_types = [ + "int64", + "float64", + "float32", + "bool", + "category", + "geometry", + "object", + "datetime64[ns]", + "timedelta[ns]", + ] + for column in data.columns: + dtype = data[column].dtype + if dtype not in convertable_types: + # Try to keep datetime dtype, remove the timezone information + # but converting to UTC, so yielding naive UTC time + if hasattr(data[column], "dt") and hasattr(data[column].dt, "tz_convert"): + data[column] = data[column].dt.tz_convert(None) + else: + # Otherwise, keep going working with dataframe but set pandas column type to str + data[column] = data[column].astype(str) + + +def _get_dataframe_columns(df: DataFrame) -> Columns: + schema = io.json.build_table_schema(df) + columns = cast(Columns, schema["fields"]) + return columns + + +# DataFrame pagination if pagination attrs exist +def _paginate_dataframe(df: DataFrame, pagination: DataframePaginationAttributes) -> DataFrame: + start_row = pagination.offset + end_row = start_row + pagination.limit + return df[start_row:end_row] + + +def _sort_dataframe(df: DataFrame, sort_by: str) -> DataFrame: + sorting_list = sort_by.split(",") + sort_by_list = [] + ascending_list = [] + # sorting by default index (None or "index") raise KeyValue error + # pandas allow to sort by columns and explicit indices + allowed_fields = set(list(df.columns) + list(df.index.names)) + for sort_key in sorting_list: + if sort_key not in allowed_fields: + continue + sort_by_list.append(sort_key.lstrip("-")) + ascending_list.append(not sort_key.startswith("-")) + + return df.sort_values(by=sort_by_list, ascending=ascending_list, ignore_index=False) + + +def _aggregate_dataframe( + df: DataFrame, aggregation_params: DataframeAggregationParams +) -> DataFrame: + aggregated = df.groupby(aggregation_params.group_by).aggregate( + {f"{aggregation_params.aggregate_by}": aggregation_params.aggregation_func} + ) + return aggregated.reset_index() + + +def _transform_to_json(data: DataFrame) -> Any: + if isinstance(data, list): + return data + + if data.__class__.__name__ == "GeoDataFrame": + return json.loads(data.to_json())["features"] + return json.loads(data.to_json(orient="table", index=True, default_handler=str))["data"] + + +def _prepare_df_for_chart_cell(val: DataFrame, columns: List[str]) -> Union[DataFrame, List[str]]: + if len(columns) == 0: + data = [] + elif len(columns) == 1: + # Return counts if only one column was selected or selected count of records + data = val.groupby(columns)[columns[0]].count().reset_index(name="count").set_index("count") + else: + # Return only selected columns + data = val[columns] + + return data + + +# This formatter can operate with data that we have received as a DataFrame +def formatter( # noqa: C901,PLR0912 + val: "DataFrame", + formatter: Optional[Callable[..., List[str]]] = None, + **formatter_kwargs: Any, +) -> Dict[str, Any]: + error = [] + dataframe_limit = 5000 + dataframe_id = id(val) + pagination = DataframePaginationAttributes(limit=10, offset=0) + data = val + sort_by = "" + selected_columns = [] + _validate_columns(data) + try: + columns = _get_dataframe_columns(data) + except Exception as e: + error.append(_register_exception(e, DataframesProcessSteps.GET_COLUMNS.value)) + + index_key = data.index.name if data.index.name is not None else DEFAULT_INDEX_KEY + + # check if it's a dataframe for ChartCell then return full dataframe + if hasattr(val, "attrs") and "returnAll" in val.attrs and val.attrs["returnAll"]: + # Validate what to return to UI + if hasattr(val, "attrs") and "selected_columns" in val.attrs: + selected_columns = list( + filter(lambda item: item is not index_key, val.attrs["selected_columns"]) + ) + try: + data = _prepare_df_for_chart_cell(val=data, columns=selected_columns) + except Exception as e: + error.append( + _register_exception(e, DataframesProcessSteps.CHART_CELL_DATAFRAME.value) + ) + if len(selected_columns) < 2: + # Reset `returnAll` attribute to prevent returning a whole DF on next formatter call + val.attrs.update({"returnAll": False}) + data = [] if len(error) > 0 else data + + return { + "columns": columns, + "data": _transform_to_json(data), + "referenceId": dataframe_id, + "error": error, + "indexKey": index_key, + } + + aggregation_func = val.attrs.get("aggregation", {}).get("aggregation_func") + if aggregation_func and aggregation_func != "no-aggregation": + aggregation = DataframeAggregationParams( + group_by=val.attrs["aggregation"]["group_by"], + aggregate_by=val.attrs["aggregation"]["aggregate_by"], + aggregation_func=val.attrs["aggregation"]["aggregation_func"], + ) + try: + data = _aggregate_dataframe(data, aggregation) + except Exception as e: + error.append(_register_exception(e, DataframesProcessSteps.AGGREGATION.value)) + + if len(data.index) >= dataframe_limit: + pagination = DataframePaginationAttributes(limit=dataframe_limit, offset=0) + try: + data = _paginate_dataframe(data, pagination) + except Exception as e: + error.append(_register_exception(e, DataframesProcessSteps.PAGINATION.value)) + + # Reset `returnAll` attribute to prevent returning a whole DF on next formatter call + val.attrs.update({"returnAll": False}) + + return { + "columns": columns, + "data": _transform_to_json(data), + "referenceId": dataframe_id, + "error": error, + "indexKey": index_key, + } + + # Sorting step, gets attrs that have been set up in DataframeProcessor + if hasattr(val, "attrs") and "sort_by" in val.attrs: + try: + data = _sort_dataframe(df=data, sort_by=val.attrs["sort_by"]) + sort_by = val.attrs["sort_by"] + except Exception as e: + error.append(_register_exception(e, DataframesProcessSteps.SORTING.value)) + + # Pagination step, gets attrs that have been set up in DataframeProcessor + if hasattr(val, "attrs") and "pagination" in val.attrs: + pagination = DataframePaginationAttributes( + limit=val.attrs["pagination"]["limit"], offset=val.attrs["pagination"]["offset"] + ) + + # If dataframe length is less than pagination limit no need to paginate it + if len(data.index) > int(pagination.limit): + try: + data = _paginate_dataframe(data, pagination) + except Exception as e: + error.append(_register_exception(e, DataframesProcessSteps.PAGINATION.value)) + + return { + "data": _transform_to_json(data), + "columns": columns, + "count": len(data.index), + "totalCount": len(val.index), + "offset": int(pagination.offset), + "limit": int(pagination.limit), + "referenceId": dataframe_id, + "sortedBy": sort_by, + "indexKey": index_key, + "error": error, + } + + +# To Add a new data formatter we need create a new class instance based on a +# BaseFormatter from the iPython kernel +# +# Ignoring mypy error: Class cannot subclass "BaseFormatter" (has type "Any") +class DataFrameFormatter(BaseFormatter): # type: ignore[misc] + """A DataFrame formatter. This is basically a copy of the JSONFormatter, + so it will return as a new mime type: application/vnd.dataframe+json in output. + """ + + format_type = Unicode("application/vnd.dataframe+json") + _return_type = (list, dict) + + print_method = ObjectName("_repr_json_") + + def _check_return(self, r: Any, obj: Any) -> Any: + """Check that a return value is appropriate + Return the value if so, None otherwise, warning if invalid. + """ + if r is None: + return + md = None + if isinstance(r, tuple): + # unpack data, metadata tuple for type checking on first element + r, md = r + + assert not isinstance(r, str), "JSON-as-string has been deprecated since IPython < 3" + + if md is not None: + # put the tuple back together + r = (r, md) + return super(DataFrameFormatter, self)._check_return(r, obj) + + +# Load our extension into ipython kernel +def load_ipython_extension(ipython: Magics) -> None: + if is_pandas_loaded: + dataframe_json_formatter = DataFrameFormatter() + ipython.display_formatter.formatters[ + "application/vnd.dataframe+json" + ] = dataframe_json_formatter + dataframe_json_formatter.for_type(DataFrame, formatter) + + print("Pandas DataFrame MimeType Extension loaded") + else: + print("Please execute `pip install pandas` to use DataFrame extension") diff --git a/public_dropin_apps_environments/python311_node22/ipython_config.py b/public_dropin_apps_environments/python311_node22/ipython_config.py new file mode 100644 index 000000000..225b94840 --- /dev/null +++ b/public_dropin_apps_environments/python311_node22/ipython_config.py @@ -0,0 +1,10 @@ +# Copyright 2024 DataRobot, Inc. and its affiliates. +# All rights reserved. +# DataRobot, Inc. Confidential. +# This is unpublished proprietary source code of DataRobot, Inc. +# and its affiliates. +# The copyright notice above does not evidence any actual or intended +# publication of such source code. + +# This need to load extensions automaticaly when kernel starting +c.InteractiveShellApp.extensions = ["dataframe_formatter"] diff --git a/public_dropin_apps_environments/python311_node22/jupyter_kernel_gateway_config.py b/public_dropin_apps_environments/python311_node22/jupyter_kernel_gateway_config.py new file mode 100644 index 000000000..1d1126789 --- /dev/null +++ b/public_dropin_apps_environments/python311_node22/jupyter_kernel_gateway_config.py @@ -0,0 +1,26 @@ +# Copyright 2022 DataRobot, Inc. and its affiliates. +# All rights reserved. +# DataRobot, Inc. Confidential. +# This is unpublished proprietary source code of DataRobot, Inc. +# and its affiliates. +# The copyright notice above does not evidence any actual or intended +# publication of such source code. +c.KernelGatewayApp.ip = "0.0.0.0" # nosec +c.KernelGatewayApp.prespawn_count = 1 +c.KernelGatewayApp.max_kernels = 100 +c.KernelGatewayApp.default_kernel_name = "python3" +c.JupyterWebsocketPersonality.list_kernels = True +c.KernelRestarter.restart_limit = ( + 3 # if restart happens 3 consecutive times (before kernel is ready) +) + +c.KernelGatewayApp.logging_config = { + "formatters": { + "console": {"class": "ecs_logging.StdlibFormatter"}, + }, + "loggers": { + "KernelGatewayApp": { + "handlers": ["console"], + } + }, +} diff --git a/public_dropin_apps_environments/python311_node22/kernel.json b/public_dropin_apps_environments/python311_node22/kernel.json new file mode 100644 index 000000000..9cc7c031e --- /dev/null +++ b/public_dropin_apps_environments/python311_node22/kernel.json @@ -0,0 +1,15 @@ +{ + "argv": [ + "python", + "-m", + "ipykernel_launcher", + "-f", + "{connection_file}" + ], + "display_name": "Python 3 (ipykernel)", + "language": "python", + "metadata": { + "debugger": true + }, + "interrupt_mode": "message" +} diff --git a/public_dropin_apps_environments/python311_node22/requirements.txt b/public_dropin_apps_environments/python311_node22/requirements.txt new file mode 100644 index 000000000..21076e6c9 --- /dev/null +++ b/public_dropin_apps_environments/python311_node22/requirements.txt @@ -0,0 +1,3 @@ +datarobot-drum==1.16.3 +numpy==1.26.2 +uwsgi diff --git a/public_dropin_apps_environments/python311_node22/setup-prompt.sh b/public_dropin_apps_environments/python311_node22/setup-prompt.sh new file mode 100644 index 000000000..704cdb1ce --- /dev/null +++ b/public_dropin_apps_environments/python311_node22/setup-prompt.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +PS1='[\[\033[38;5;172m\]\u\[$(tput sgr0)\]@kernel \[$(tput sgr0)\]\[\033[38;5;39m\]\w\[$(tput sgr0)\]]\$ \[$(tput sgr0)\]' + +# shellcheck disable=SC1091 +source /etc/system/kernel/setup-venv.sh diff --git a/public_dropin_apps_environments/python311_node22/setup-ssh.sh b/public_dropin_apps_environments/python311_node22/setup-ssh.sh new file mode 100644 index 000000000..da4607cbe --- /dev/null +++ b/public_dropin_apps_environments/python311_node22/setup-ssh.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +echo "Persisting container environment variables for sshd..." +{ + echo "#!/bin/bash" + echo "# This file is auto-populated with kernel env vars on container creation" + echo "# to ensure that they are exposed in ssh sessions" + echo "# Ref: https://github.com/jenkinsci/docker-ssh-agent/issues/33#issuecomment-597367846" + echo "set -a" + env | grep -E -v "^(PWD=|HOME=|TERM=|SHLVL=|LD_PRELOAD=|PS1=|_=|KUBERNETES_)" | while read -r line; do + NAME=$(echo "$line" | cut -d'=' -f1) + VALUE=$(echo "$line" | cut -d'=' -f2-) + # Use eval to handle complex cases like export commands with spaces + echo "$NAME='$VALUE'" + done + echo "set +a" + # setup the working directory for terminal sessions + echo "cd $WORKING_DIR" +} > /etc/profile.d/notebooks-load-env.sh diff --git a/public_dropin_apps_environments/python311_node22/setup-venv.sh b/public_dropin_apps_environments/python311_node22/setup-venv.sh new file mode 100644 index 000000000..8cec461a0 --- /dev/null +++ b/public_dropin_apps_environments/python311_node22/setup-venv.sh @@ -0,0 +1,66 @@ +#!/bin/bash + +# we don't want it output anything in the terminal session setup +VERBOSE_MODE=${1:-false} + +IS_CODESPACE=$([[ "${WORKING_DIR}" == *"/storage"* ]] && echo true || echo false) +IS_PYTHON_KERNEL=$([[ "${NOTEBOOKS_KERNEL}" == "python" ]] && echo true || echo false) + +if [[ $IS_CODESPACE == true && $IS_PYTHON_KERNEL == true && -z "${NOTEBOOKS_NO_PERSISTENT_DEPENDENCIES}" ]]; then + export POETRY_VIRTUALENVS_CREATE=false + export XDG_CACHE_HOME="${WORKING_DIR%/}/.cache" + # Persistent HF artifact installation + export HF_HOME="${WORKING_DIR%/}/.cache" + export HF_HUB_CACHE="${WORKING_DIR%/}/.cache" + export HF_DATASETS_CACHE="${WORKING_DIR%/}/.datasets" + export TRANSFORMERS_CACHE="${WORKING_DIR%/}/.models" + export SENTENCE_TRANSFORMERS_HOME="${WORKING_DIR%/}/.models" + + USR_VENV="${WORKING_DIR%/}/.venv" + [[ $VERBOSE_MODE == true ]] && echo "Setting up a user venv ($USR_VENV)..." + + # we need to make sure both kernel & user venv's site-packages are in PYTHONPATH because: + # - when the user venv is activated (e.g. terminal sessions), it ignores the kernel venv + # - when Jupyter kernel is running (e.g. notebook cells) it uses the kernel venv ignoring the user venv + + # shellcheck disable=SC1091 + source "$VENV_PATH/bin/activate" + KERNEL_PACKAGES=$(python -c "import site; print(site.getsitepackages()[0])") + deactivate + + # If a user has previously created a session with a different python version we need to figure that out + # If so we'll delete the existing venv to avoid errors and issues - for example when pip installing new packages + if [ -d "$USR_VENV" ]; then + [[ $VERBOSE_MODE == true ]] && echo "$USR_VENV does exist - will check python symlinks to see if they are broken..." + # Here we are getting all the symlinks for the venv and checking if any of them are broken + readarray -d '' VENV_SYMLINKS < <(find "$USR_VENV" -type l -print0) + python_symlinks_broken=false + for i in "${VENV_SYMLINKS[@]}"; do + if [[ "$i" == *"python"* ]]; then + [[ $VERBOSE_MODE == true ]] && echo "Checking symlink (${i})."; + if [ ! -e "$i" ] ; then + [[ $VERBOSE_MODE == true ]] && echo "Symlink (${i}) broken..."; + python_symlinks_broken=true + break + fi + fi + done + + # If any python symlinks are broken delete the venv that we know exists from checks above + if [[ $python_symlinks_broken == true ]]; then + [[ $VERBOSE_MODE == true ]] && echo "Python symlinks are broken - deleting existing virtual env..." + rm -rf "${USR_VENV}" + fi + fi + + python3 -m venv "${USR_VENV}" + # shellcheck disable=SC1091 + source "${USR_VENV}/bin/activate" + USER_PACKAGES=$(python -c "import site; print(site.getsitepackages()[0])") + + export PYTHONPATH="$USER_PACKAGES:$KERNEL_PACKAGES:$PYTHONPATH" +else + [[ $VERBOSE_MODE == true ]] && echo "Skipping user venv setup..." + # shellcheck disable=SC1091 + source "$VENV_PATH/bin/activate" +fi diff --git a/public_dropin_apps_environments/python311_node22/sshd_config b/public_dropin_apps_environments/python311_node22/sshd_config new file mode 100644 index 000000000..baaa53141 --- /dev/null +++ b/public_dropin_apps_environments/python311_node22/sshd_config @@ -0,0 +1,130 @@ +# $OpenBSD: sshd_config,v 1.103 2018/04/09 20:41:22 tj Exp $ + +# This is the sshd server system-wide configuration file. See +# sshd_config(5) for more information. + +# This sshd was compiled with PATH=/bin:/usr/bin:/sbin:/usr/sbin + +# The strategy used for options in the default sshd_config shipped with +# OpenSSH is to specify options with their default value where +# possible, but leave them commented. Uncommented options override the +# default value. + +#Port 22 +#AddressFamily any +#ListenAddress 0.0.0.0 +#ListenAddress :: + +#HostKey /etc/ssh/ssh_host_rsa_key +#HostKey /etc/ssh/ssh_host_ecdsa_key +#HostKey /etc/ssh/ssh_host_ed25519_key + +# Ciphers and keying +#RekeyLimit default none + +# Logging +#SyslogFacility AUTH +#LogLevel INFO + +# Authentication: + +#LoginGraceTime 2m +#PermitRootLogin prohibit-password +#StrictModes yes +#MaxAuthTries 6 +#MaxSessions 10 + +#PubkeyAuthentication yes + +# The default is to check both .ssh/authorized_keys and .ssh/authorized_keys2 +# but this is overridden so installations will only check .ssh/authorized_keys +AuthorizedKeysFile .ssh/authorized_keys /etc/authorized_keys/%u + +#AuthorizedPrincipalsFile none + +#AuthorizedKeysCommand none +#AuthorizedKeysCommandUser nobody + +# For this to work you will also need host keys in /etc/ssh/ssh_known_hosts +#HostbasedAuthentication no +# Change to yes if you don't trust ~/.ssh/known_hosts for +# HostbasedAuthentication +#IgnoreUserKnownHosts no +# Don't read the user's ~/.rhosts and ~/.shosts files +#IgnoreRhosts yes + +# To disable tunneled clear text passwords, change to no here! +#PasswordAuthentication yes +#PermitEmptyPasswords no + +# Change to no to disable s/key passwords +#ChallengeResponseAuthentication yes + +# Kerberos options +#KerberosAuthentication no +#KerberosOrLocalPasswd yes +#KerberosTicketCleanup yes +#KerberosGetAFSToken no + +# GSSAPI options +#GSSAPIAuthentication no +#GSSAPICleanupCredentials yes + +# Set this to 'yes' to enable PAM authentication, account processing, +# and session processing. If this is enabled, PAM authentication will +# be allowed through the ChallengeResponseAuthentication and +# PasswordAuthentication. Depending on your PAM configuration, +# PAM authentication via ChallengeResponseAuthentication may bypass +# the setting of "PermitRootLogin without-password". +# If you just want the PAM account and session checks to run without +# PAM authentication, then enable this but set PasswordAuthentication +# and ChallengeResponseAuthentication to 'no'. +#UsePAM no + +#AllowAgentForwarding yes +# Feel free to re-enable these if your use case requires them. +AllowTcpForwarding no +GatewayPorts no +X11Forwarding no +#X11DisplayOffset 10 +#X11UseLocalhost yes +#PermitTTY yes +#PrintMotd yes +#PrintLastLog yes +#TCPKeepAlive yes +#PermitUserEnvironment no +#Compression delayed +#ClientAliveInterval 0 +#ClientAliveCountMax 3 +#UseDNS no +#PidFile /run/sshd.pid +#MaxStartups 10:30:100 +#PermitTunnel no +#ChrootDirectory none +#VersionAddendum none + +# no default banner path +#Banner none + +# override default of no subsystems +Subsystem sftp /usr/lib/ssh/sftp-server + +# Example of overriding settings on a per-user basis +#Match User anoncvs +# X11Forwarding no +# AllowTcpForwarding no +# PermitTTY no +# ForceCommand cvs server +Port 8022 + +HostKey /etc/ssh/keys/ssh_host_key + +# FIPS compliant and supported by https://asyncssh.readthedocs.io/en/stable/api.html#key-exchange-algorithms +Ciphers aes256-ctr,aes192-ctr,aes128-ctr +HostKeyAlgorithms ecdsa-sha2-nistp256,ssh-ed25519 +KexAlgorithms ecdh-sha2-nistp256,ecdh-sha2-nistp521,ecdh-sha2-nistp384,diffie-hellman-group-exchange-sha256,diffie-hellman-group-exchange-sha1,diffie-hellman-group14-sha1 +MACs hmac-sha2-256,hmac-sha2-512,hmac-sha1 + +PasswordAuthentication no +ChallengeResponseAuthentication no +PermitEmptyPasswords no diff --git a/public_dropin_apps_environments/python311_node22/start_server.sh b/public_dropin_apps_environments/python311_node22/start_server.sh new file mode 100644 index 000000000..738f1ba41 --- /dev/null +++ b/public_dropin_apps_environments/python311_node22/start_server.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +export HOME="/home/notebooks" + +# setup the working directory for the kernel +if [ -z "$1" ]; then + # Set default working directory if no argument is provided + WORKING_DIR="/home/notebooks" +else + # Use the provided working directory + WORKING_DIR="$1" +fi + +export WORKING_DIR + +VERBOSE_MODE=true +# shellcheck disable=SC1091 +source /etc/system/kernel/setup-venv.sh $VERBOSE_MODE + +cd /etc/system/kernel/agent || exit +nohup uvicorn agent:app --host 0.0.0.0 --port 8889 & + +# shellcheck disable=SC1091 +source /etc/system/kernel/common-user-limits.sh + +# shellcheck disable=SC1091 +source /etc/system/kernel/setup-ssh.sh +cp -L /var/run/notebooks/ssh/authorized_keys/notebooks /etc/authorized_keys/ && chmod 600 /etc/authorized_keys/notebooks +mkdir /etc/ssh/keys && cp -L /var/run/notebooks/ssh/keys/ssh_host_* /etc/ssh/keys/ && chmod 600 /etc/ssh/keys/ssh_host_* +nohup /usr/sbin/sshd -D & + +# Initialize the git helper. Turn on/off features dependent on `GITHELPER_*` env vars +/etc/system/kernel/drgithelper configs set + +# no trailing slash in the working dir path +git config --global --add safe.directory "${WORKING_DIR%/}" + +# setup the working directory for the kernel +cd "$WORKING_DIR" || exit + +# setup ipython extensions +cp -r /etc/ipython/ /home/notebooks/.ipython/ + +# clear out kubernetes_specific env vars before starting kernel gateway as it will inherit them +prefix="KUBERNETES_"; for var in $(printenv | cut -d= -f1); do [[ "$var" == "$prefix"* ]] && unset "$var"; done + +exec jupyter kernelgateway --config=/etc/system/kernel/jupyter_kernel_gateway_config.py --debug diff --git a/public_dropin_apps_environments/python311_node22/start_server_drum.sh b/public_dropin_apps_environments/python311_node22/start_server_drum.sh new file mode 100644 index 000000000..f55622688 --- /dev/null +++ b/public_dropin_apps_environments/python311_node22/start_server_drum.sh @@ -0,0 +1,18 @@ +#!/bin/sh +# Copyright 2024 DataRobot, Inc. and its affiliates. +# +# All rights reserved. +# This is proprietary source code of DataRobot, Inc. and its affiliates. +# +# Released under the terms of DataRobot Tool and Utility Agreement. +echo "Starting Custom Model environment with DRUM prediction server" + +if [ "${ENABLE_CUSTOM_MODEL_RUNTIME_ENV_DUMP}" = 1 ]; then + echo "Environment variables:" + env +fi + +echo +echo "Executing command: drum server $*" +echo +exec drum server "$@" From e7ee1e2ca4475c9057511ef0f463d4998c16bb73 Mon Sep 17 00:00:00 2001 From: Maksym Shokaliuk Date: Wed, 14 May 2025 22:32:03 +0300 Subject: [PATCH 2/2] TEMPLATE-268 Updating the env --- .../python311_node22/Dockerfile | 13 ++++++++++++- .../python311_node22/README.md | 2 +- .../python311_node22/env_info.json | 4 ++-- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/public_dropin_apps_environments/python311_node22/Dockerfile b/public_dropin_apps_environments/python311_node22/Dockerfile index dc73c9f8e..9bf5c2479 100644 --- a/public_dropin_apps_environments/python311_node22/Dockerfile +++ b/public_dropin_apps_environments/python311_node22/Dockerfile @@ -33,7 +33,15 @@ ARG GID=10101 # microdnf repoquery python3* # ``` ARG PYTHON_VERSION=3.11 -ARG PYTHON_EXACT_VERSION=3.11.9 +ARG PYTHON_EXACT_VERSION=3.11.11 + +FROM registry.access.redhat.com/ubi8/ubi-minimal:8.9 AS pulumi_builder +RUN microdnf update -y \ + && microdnf install -y tar gzip \ + && microdnf clean all + +RUN curl -o install_pulumi.sh https://get.pulumi.com && chmod +x install_pulumi.sh +RUN ./install_pulumi.sh --version latest --install-root ./pulumi --no-edit-path FROM registry.access.redhat.com/ubi9/ubi-minimal:9.5 AS base # some globally required dependencies @@ -97,6 +105,8 @@ RUN curl -fsSL https://rpm.nodesource.com/setup_22.x | bash - && \ microdnf install -y nodejs && \ microdnf clean all +RUN sh -c "$(curl --location https://taskfile.dev/install.sh)" -- -d -b /usr/local/bin + # remove microdnf RUN microdnf remove microdnf -y @@ -163,6 +173,7 @@ LABEL com.datarobot.repo-sha=$GIT_COMMIT RUN chown -R $UNAME:$UNAME ${WORKDIR} /home/notebooks COPY --from=builder --chown=$UNAME $WORKDIR $WORKDIR +COPY --from=pulumi_builder --chown=$UNAME /pulumi/bin $VENV_PATH/bin # This is required for custom models to work with this image COPY ./start_server_drum.sh /opt/code/start_server.sh diff --git a/public_dropin_apps_environments/python311_node22/README.md b/public_dropin_apps_environments/python311_node22/README.md index 4603da396..179b3ff2a 100644 --- a/public_dropin_apps_environments/python311_node22/README.md +++ b/public_dropin_apps_environments/python311_node22/README.md @@ -12,7 +12,7 @@ This environment is built for Python 3.11 and includes only the minimal required 2. From the terminal, run: ``` - tar -czvf py311_notebook_dropin.tar.gz -C /path/to/public_dropin_notebook_environments/python311_notebook_base/ . + tar -czvf python311_node22_dropin.tar.gz -C /path/to/public_dropin_notebook_environments/python311_node22/ . ``` 3. Using either the API or from the UI create a new Custom Environment with the tarball created in step 2. diff --git a/public_dropin_apps_environments/python311_node22/env_info.json b/public_dropin_apps_environments/python311_node22/env_info.json index 18c387d7e..49541072d 100644 --- a/public_dropin_apps_environments/python311_node22/env_info.json +++ b/public_dropin_apps_environments/python311_node22/env_info.json @@ -1,7 +1,7 @@ { "id": "6807b77d70d573235153b370", - "name": "[Experimental] Python 3.12 + NodeJS 22.15 Base ENV", - "description": "This is experimental base environment that includes: \n- Python 3.12.9 \n - NodeJs v22.15", + "name": "[Experimental] Python 3.11 + NodeJS 22.15 Base ENV", + "description": "This is experimental base environment that includes: \n- Python 3.11 \n - NodeJs v22.15", "programmingLanguage": "python", "environmentVersionId": "6807c1653f295b1c8f79edb6", "isPublic": false,