update to match qa

johnnygreco · johnnygreco · commit d5146b5c32cb · 2025-07-29T20:54:07.000-04:00
diff --git a/nemo/NeMo-Data-Designer/README.md b/nemo/NeMo-Data-Designer/README.md
@@ -0,0 +1,25 @@
+# 🎨 NeMo Data Designer Tutorial Notebooks
+
+This directory contains the tutorial notebooks for getting started with NeMo Data Designer.
+
+## 🐳 Deploy the NeMo Data Designer microservice locally
+
+In order to run these notebooks, you must have the NeMo Data Designer microservice deployed locally via docker compose. See the [deployment guide](https://aire.gitlab-master-pages.nvidia.com/microservices/nmp/latest/nemo-microservices/latest/set-up/deploy-as-microservices/data-designer/docker-compose.html) for more details.
+
+## 📦 Set up the environment
+
+We will use the `uv` package manager to set up our environment and install the necessary dependencies. If you don't have `uv` installed, you can follow the installation instructions from the [uv documentation](https://docs.astral.sh/uv/getting-started/installation/).
+
+Once you have `uv` installed, be sure you are in the `Nemo-Data-Designer` directory and run the following command:
+
+```bash
+uv sync
+```
+
+This will create a virtual environment and install the necessary dependencies. Activate the virtual environment by running the following command:
+
+```bash
+source .venv/bin/activate
+```
+
+Be sure to select this virtual environment as your kernel when running the notebooks.
diff --git a/nemo/NeMo-Data-Designer/intro-tutorials/1-the-basics.ipynb b/nemo/NeMo-Data-Designer/intro-tutorials/1-the-basics.ipynb
@@ -14,18 +14,9 @@
     "\n",
     "In this notebook, we will demonstrate the basics of Data Designer by generating a simple product review dataset.\n",
     "\n",
-    "#### 💾 Install `nemo-microservices` with the `[data-designer]` extra option.\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%capture\n",
-    "%pip install \"nemo-microservices[data-designer]==1.1.0rc4\" --index-url https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi/simple\n",
-    "%pip install huggingface-hub"
+    "#### 💾 Install dependencies\n",
+    "\n",
+    "If you haven't already, follow the instructions in the [README](../README.md) to install the necessary dependencies. Note you may need to restart your kernel after setting up the environment.\n"
    ]
   },
   {
@@ -41,6 +32,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "from getpass import getpass\n",
+    "\n",
     "from nemo_microservices import NeMoMicroservices\n",
     "from nemo_microservices.beta.data_designer import (\n",
     "    DataDesignerConfigBuilder,\n",
@@ -83,6 +76,27 @@
     "- This list contains the models you can choose from (via the `model_alias` argument) during the generation process.\n"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# build.nvidia.com model endpoint\n",
+    "endpoint = \"https://integrate.api.nvidia.com/v1\"\n",
+    "model_id = \"mistralai/mistral-small-24b-instruct\"\n",
+    "\n",
+    "model_alias = \"mistral-small\"\n",
+    "\n",
+    "# You will need to enter your model provider API key to run this notebook.\n",
+    "api_key = getpass(\"Enter model provider API key: \")\n",
+    "\n",
+    "if len(api_key) > 0:\n",
+    "    print(\"✅ API key received.\")\n",
+    "else:\n",
+    "    print(\"❌ No API key provided. Please enter your model provider API key.\")"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -91,21 +105,17 @@
    "source": [
     "model_configs = [\n",
     "    P.ModelConfig(\n",
-    "        alias=\"llama\",\n",
+    "        alias=model_alias,\n",
     "        inference_parameters=P.InferenceParameters(\n",
     "            max_tokens=1024,\n",
     "            temperature=0.5,\n",
     "            top_p=1.0,\n",
     "        ),\n",
     "        model=P.Model(\n",
     "            api_endpoint=P.ApiEndpoint(\n",
-    "                # For this API key to be visible in the microservice, you must set\n",
-    "                # the MODEL_API_KEY_NIM environment variable in the data-designer\n",
-    "                # section of the docker-compose file. Note the custom environment\n",
-    "                # variable names must start with the prefix \"MODEL_API_KEY_\".\n",
-    "                api_key=\"@os.environ/MODEL_API_KEY_NIM\",\n",
-    "                model_id=\"meta/llama-3.3-70b-instruct\",\n",
-    "                url=\"https://nim.int.aire.nvidia.com/v1\",\n",
+    "                api_key=api_key,\n",
+    "                model_id=model_id,\n",
+    "                url=endpoint,\n",
     "            ),\n",
     "        ),\n",
     "    )\n",
@@ -292,7 +302,7 @@
     "            \"You are a helpful assistant that generates product names. You respond with only the product name, \"\n",
     "            \"no other text. You do NOT add quotes around the product name. \"\n",
     "        ),\n",
-    "        model_alias=\"llama\",\n",
+    "        model_alias=model_alias,\n",
     "    )\n",
     ")\n",
     "\n",
@@ -305,7 +315,7 @@
     "            \"Write a review of this product, which you gave a rating of {{ number_of_stars }} stars. \"\n",
     "            \"The style of the review should be '{{ review_style }}'. \"\n",
     "        ),\n",
-    "        model_alias=\"llama\",\n",
+    "        model_alias=model_alias,\n",
     "    )\n",
     ")\n",
     "\n",
@@ -339,7 +349,7 @@
    "outputs": [],
    "source": [
     "# The preview dataset is available as a pandas DataFrame.\n",
-    "preview.dataset.head()"
+    "preview.dataset"
    ]
   },
   {
@@ -394,6 +404,18 @@
     "results = ndd.create(config_builder, num_records=20, wait_until_done=True)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load the dataset into a pandas DataFrame\n",
+    "dataset = results.load_dataset()\n",
+    "\n",
+    "dataset.head()"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
diff --git a/nemo/NeMo-Data-Designer/intro-tutorials/2-structured-outputs-and-jinja-expressions.ipynb b/nemo/NeMo-Data-Designer/intro-tutorials/2-structured-outputs-and-jinja-expressions.ipynb
@@ -14,20 +14,9 @@
     "\n",
     "In this notebook, we will continue our exploration of Data Designer, demonstrating more advanced data generation using structured outputs and Jinja expressions.\n",
     "\n",
-    "If this is your first time using Data Designer, we recommend starting with the [first notebook](./1-the-basics.ipynb) in this 101 series.\n",
+    "#### 💾 Install dependencies\n",
     "\n",
-    "#### 💾 Install `nemo-microservices` with the `[data-designer]` extra option.\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%capture\n",
-    "%pip install \"nemo-microservices[data-designer]==1.1.0rc4\" --index-url https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi/simple\n",
-    "%pip install huggingface-hub"
+    "If you haven't already, follow the instructions in the [README](../README.md) to install the necessary dependencies. Note you may need to restart your kernel after setting up the environment.\n"
    ]
   },
   {
@@ -43,6 +32,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "from getpass import getpass\n",
+    "\n",
     "from nemo_microservices import NeMoMicroservices\n",
     "from nemo_microservices.beta.data_designer import (\n",
     "    DataDesignerConfigBuilder,\n",
@@ -131,6 +122,27 @@
     "- This list contains the models you can choose from (via the `model_alias` argument) during the generation process.\n"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# build.nvidia.com model endpoint\n",
+    "endpoint = \"https://integrate.api.nvidia.com/v1\"\n",
+    "model_id = \"mistralai/mistral-small-24b-instruct\"\n",
+    "\n",
+    "model_alias = \"mistral-small\"\n",
+    "\n",
+    "# You will need to enter your model provider API key to run this notebook.\n",
+    "api_key = getpass(\"Enter model provider API key: \")\n",
+    "\n",
+    "if len(api_key) > 0:\n",
+    "    print(\"✅ API key received.\")\n",
+    "else:\n",
+    "    print(\"❌ No API key provided. Please enter your model provider API key.\")"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -139,21 +151,17 @@
    "source": [
     "model_configs = [\n",
     "    P.ModelConfig(\n",
-    "        alias=\"llama\",\n",
+    "        alias=model_alias,\n",
     "        inference_parameters=P.InferenceParameters(\n",
     "            max_tokens=1024,\n",
     "            temperature=0.5,\n",
     "            top_p=1.0,\n",
     "        ),\n",
     "        model=P.Model(\n",
     "            api_endpoint=P.ApiEndpoint(\n",
-    "                # For this API key to be visible in the microservice, you must set\n",
-    "                # the MODEL_API_KEY_NIM environment variable in the data-designer\n",
-    "                # section of the docker-compose file. Note the custom environment\n",
-    "                # variable names must start with the prefix \"MODEL_API_KEY_\".\n",
-    "                api_key=\"@os.environ/MODEL_API_KEY_NIM\",\n",
-    "                model_id=\"meta/llama-3.3-70b-instruct\",\n",
-    "                url=\"https://nim.int.aire.nvidia.com/v1\",\n",
+    "                api_key=api_key,\n",
+    "                model_id=model_id,\n",
+    "                url=endpoint,\n",
     "            ),\n",
     "        ),\n",
     "    )\n",
@@ -288,7 +296,7 @@
     "            \"{{ target_age_range }} years old. The product should be priced between $10 and $1000.\"\n",
     "        ),\n",
     "        output_format=Product,\n",
-    "        model_alias=\"llama\",\n",
+    "        model_alias=model_alias,\n",
     "    )\n",
     ")\n",
     "\n",
@@ -304,7 +312,7 @@
     "            \"Write the review in a style that is '{{ review_style }}'.\"\n",
     "        ),\n",
     "        output_format=ProductReview,\n",
-    "        model_alias=\"llama\",\n",
+    "        model_alias=model_alias,\n",
     "    )\n",
     ")\n",
     "\n",
@@ -341,7 +349,7 @@
    "outputs": [],
    "source": [
     "# The preview dataset is available as a pandas DataFrame.\n",
-    "preview.dataset.head()"
+    "preview.dataset"
    ]
   },
   {
diff --git a/nemo/NeMo-Data-Designer/intro-tutorials/3-seeding-with-a-dataset.ipynb b/nemo/NeMo-Data-Designer/intro-tutorials/3-seeding-with-a-dataset.ipynb
@@ -16,19 +16,9 @@
     "\n",
     "If this is your first time using Data Designer, we recommend starting with the [first notebook](./1-the-basics.ipynb) in this 101 series.\n",
     "\n",
-    "#### 💾 Install `nemo-microservices` with the `[data-designer]` extra option.\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%capture\n",
-    "%pip install \"nemo-microservices[data-designer]==1.1.0rc4\" --index-url https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi/simple\n",
-    "%pip install huggingface-hub\n",
-    "%pip install datasets"
+    "#### 💾 Install dependencies\n",
+    "\n",
+    "If you haven't already, follow the instructions in the [README](../README.md) to install the necessary dependencies. Note you may need to restart your kernel after setting up the environment.\n"
    ]
   },
   {
@@ -44,6 +34,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "from getpass import getpass\n",
+    "\n",
     "from nemo_microservices import NeMoMicroservices\n",
     "from nemo_microservices.beta.data_designer import (\n",
     "    DataDesignerConfigBuilder,\n",
@@ -84,30 +76,47 @@
     "- This list contains the models you can choose from (via the `model_alias` argument) during the generation process.\n"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# build.nvidia.com model endpoint\n",
+    "endpoint = \"https://integrate.api.nvidia.com/v1\"\n",
+    "model_id = \"mistralai/mistral-small-24b-instruct\"\n",
+    "\n",
+    "model_alias = \"mistral-small\"\n",
+    "\n",
+    "# You will need to enter your model provider API key to run this notebook.\n",
+    "api_key = getpass(\"Enter model provider API key: \")\n",
+    "\n",
+    "if len(api_key) > 0:\n",
+    "    print(\"✅ API key received.\")\n",
+    "else:\n",
+    "    print(\"❌ No API key provided. Please enter your model provider API key.\")"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "# You can also load the model configs from a YAML string or file.\n",
-    "# For the API key to be visible in the microservice, you must set\n",
-    "# the MODEL_API_KEY_NIM environment variable in the data-designer\n",
-    "# section of the docker-compose file. Note the custom environment\n",
-    "# variable names must start with the prefix \"MODEL_API_KEY_\".\n",
     "\n",
-    "model_configs_yaml = \"\"\"\\\n",
+    "model_configs_yaml = f\"\"\"\\\n",
     "model_configs:\n",
-    "  - alias: llama\n",
+    "  - alias: \"{model_alias}\"\n",
     "    inference_parameters:\n",
     "      max_tokens: 1024\n",
     "      temperature: 0.5\n",
     "      top_p: 1.0\n",
     "    model:\n",
     "      api_endpoint:\n",
-    "        api_key: \"@os.environ/MODEL_API_KEY_NIM\"\n",
-    "        model_id: \"meta/llama-3.3-70b-instruct\"\n",
-    "        url: \"https://nim.int.aire.nvidia.com/v1\"\n",
+    "        api_key: \"{api_key}\"\n",
+    "        model_id: \"{model_id}\"\n",
+    "        url: \"{endpoint}\"\n",
     "\"\"\"\n",
     "\n",
     "config_builder = DataDesignerConfigBuilder(model_configs=model_configs_yaml)"
@@ -271,7 +280,7 @@
     "\n",
     "Format the notes as a busy doctor might.\n",
     "\"\"\",\n",
-    "    model_alias=\"llama\",\n",
+    "    model_alias=model_alias,\n",
     ")\n",
     "\n",
     "config_builder.validate()"
@@ -304,7 +313,7 @@
    "outputs": [],
    "source": [
     "# The preview dataset is available as a pandas DataFrame.\n",
-    "preview.dataset.head()"
+    "preview.dataset"
    ]
   },
   {
diff --git a/nemo/NeMo-Data-Designer/pyproject.toml b/nemo/NeMo-Data-Designer/pyproject.toml
@@ -0,0 +1,21 @@
+[project]
+name = "nemo-data-designer"
+version = "0.0.1"
+description = "NeMo Data Designer tutorial notebooks"
+readme = "README.md"
+requires-python = ">=3.11"
+
+dependencies = [
+    "datasets",
+    "huggingface-hub",
+    "jupyter",
+    "nemo-microservices[data-designer]==1.1.0rc7",
+]
+
+[[tool.uv.index]]
+name = "nv-shared-pypi"
+url = "https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi/simple"
+default = true
+
+[tool.uv.sources]
+nemo-microservices = { index = "nv-shared-pypi" }