Skip to content

Commit d5146b5

Browse files
committed
update to match qa
1 parent f43421c commit d5146b5

File tree

5 files changed

+156
-71
lines changed

5 files changed

+156
-71
lines changed

nemo/NeMo-Data-Designer/README.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# 🎨 NeMo Data Designer Tutorial Notebooks
2+
3+
This directory contains the tutorial notebooks for getting started with NeMo Data Designer.
4+
5+
## 🐳 Deploy the NeMo Data Designer microservice locally
6+
7+
In order to run these notebooks, you must have the NeMo Data Designer microservice deployed locally via docker compose. See the [deployment guide](https://aire.gitlab-master-pages.nvidia.com/microservices/nmp/latest/nemo-microservices/latest/set-up/deploy-as-microservices/data-designer/docker-compose.html) for more details.
8+
9+
## 📦 Set up the environment
10+
11+
We will use the `uv` package manager to set up our environment and install the necessary dependencies. If you don't have `uv` installed, you can follow the installation instructions from the [uv documentation](https://docs.astral.sh/uv/getting-started/installation/).
12+
13+
Once you have `uv` installed, be sure you are in the `Nemo-Data-Designer` directory and run the following command:
14+
15+
```bash
16+
uv sync
17+
```
18+
19+
This will create a virtual environment and install the necessary dependencies. Activate the virtual environment by running the following command:
20+
21+
```bash
22+
source .venv/bin/activate
23+
```
24+
25+
Be sure to select this virtual environment as your kernel when running the notebooks.

nemo/NeMo-Data-Designer/intro-tutorials/1-the-basics.ipynb

Lines changed: 45 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -14,18 +14,9 @@
1414
"\n",
1515
"In this notebook, we will demonstrate the basics of Data Designer by generating a simple product review dataset.\n",
1616
"\n",
17-
"#### 💾 Install `nemo-microservices` with the `[data-designer]` extra option.\n"
18-
]
19-
},
20-
{
21-
"cell_type": "code",
22-
"execution_count": null,
23-
"metadata": {},
24-
"outputs": [],
25-
"source": [
26-
"%%capture\n",
27-
"%pip install \"nemo-microservices[data-designer]==1.1.0rc4\" --index-url https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi/simple\n",
28-
"%pip install huggingface-hub"
17+
"#### 💾 Install dependencies\n",
18+
"\n",
19+
"If you haven't already, follow the instructions in the [README](../README.md) to install the necessary dependencies. Note you may need to restart your kernel after setting up the environment.\n"
2920
]
3021
},
3122
{
@@ -41,6 +32,8 @@
4132
"metadata": {},
4233
"outputs": [],
4334
"source": [
35+
"from getpass import getpass\n",
36+
"\n",
4437
"from nemo_microservices import NeMoMicroservices\n",
4538
"from nemo_microservices.beta.data_designer import (\n",
4639
" DataDesignerConfigBuilder,\n",
@@ -83,6 +76,27 @@
8376
"- This list contains the models you can choose from (via the `model_alias` argument) during the generation process.\n"
8477
]
8578
},
79+
{
80+
"cell_type": "code",
81+
"execution_count": null,
82+
"metadata": {},
83+
"outputs": [],
84+
"source": [
85+
"# build.nvidia.com model endpoint\n",
86+
"endpoint = \"https://integrate.api.nvidia.com/v1\"\n",
87+
"model_id = \"mistralai/mistral-small-24b-instruct\"\n",
88+
"\n",
89+
"model_alias = \"mistral-small\"\n",
90+
"\n",
91+
"# You will need to enter your model provider API key to run this notebook.\n",
92+
"api_key = getpass(\"Enter model provider API key: \")\n",
93+
"\n",
94+
"if len(api_key) > 0:\n",
95+
" print(\"✅ API key received.\")\n",
96+
"else:\n",
97+
" print(\"❌ No API key provided. Please enter your model provider API key.\")"
98+
]
99+
},
86100
{
87101
"cell_type": "code",
88102
"execution_count": null,
@@ -91,21 +105,17 @@
91105
"source": [
92106
"model_configs = [\n",
93107
" P.ModelConfig(\n",
94-
" alias=\"llama\",\n",
108+
" alias=model_alias,\n",
95109
" inference_parameters=P.InferenceParameters(\n",
96110
" max_tokens=1024,\n",
97111
" temperature=0.5,\n",
98112
" top_p=1.0,\n",
99113
" ),\n",
100114
" model=P.Model(\n",
101115
" api_endpoint=P.ApiEndpoint(\n",
102-
" # For this API key to be visible in the microservice, you must set\n",
103-
" # the MODEL_API_KEY_NIM environment variable in the data-designer\n",
104-
" # section of the docker-compose file. Note the custom environment\n",
105-
" # variable names must start with the prefix \"MODEL_API_KEY_\".\n",
106-
" api_key=\"@os.environ/MODEL_API_KEY_NIM\",\n",
107-
" model_id=\"meta/llama-3.3-70b-instruct\",\n",
108-
" url=\"https://nim.int.aire.nvidia.com/v1\",\n",
116+
" api_key=api_key,\n",
117+
" model_id=model_id,\n",
118+
" url=endpoint,\n",
109119
" ),\n",
110120
" ),\n",
111121
" )\n",
@@ -292,7 +302,7 @@
292302
" \"You are a helpful assistant that generates product names. You respond with only the product name, \"\n",
293303
" \"no other text. You do NOT add quotes around the product name. \"\n",
294304
" ),\n",
295-
" model_alias=\"llama\",\n",
305+
" model_alias=model_alias,\n",
296306
" )\n",
297307
")\n",
298308
"\n",
@@ -305,7 +315,7 @@
305315
" \"Write a review of this product, which you gave a rating of {{ number_of_stars }} stars. \"\n",
306316
" \"The style of the review should be '{{ review_style }}'. \"\n",
307317
" ),\n",
308-
" model_alias=\"llama\",\n",
318+
" model_alias=model_alias,\n",
309319
" )\n",
310320
")\n",
311321
"\n",
@@ -339,7 +349,7 @@
339349
"outputs": [],
340350
"source": [
341351
"# The preview dataset is available as a pandas DataFrame.\n",
342-
"preview.dataset.head()"
352+
"preview.dataset"
343353
]
344354
},
345355
{
@@ -394,6 +404,18 @@
394404
"results = ndd.create(config_builder, num_records=20, wait_until_done=True)"
395405
]
396406
},
407+
{
408+
"cell_type": "code",
409+
"execution_count": null,
410+
"metadata": {},
411+
"outputs": [],
412+
"source": [
413+
"# load the dataset into a pandas DataFrame\n",
414+
"dataset = results.load_dataset()\n",
415+
"\n",
416+
"dataset.head()"
417+
]
418+
},
397419
{
398420
"cell_type": "markdown",
399421
"metadata": {},

nemo/NeMo-Data-Designer/intro-tutorials/2-structured-outputs-and-jinja-expressions.ipynb

Lines changed: 32 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -14,20 +14,9 @@
1414
"\n",
1515
"In this notebook, we will continue our exploration of Data Designer, demonstrating more advanced data generation using structured outputs and Jinja expressions.\n",
1616
"\n",
17-
"If this is your first time using Data Designer, we recommend starting with the [first notebook](./1-the-basics.ipynb) in this 101 series.\n",
17+
"#### 💾 Install dependencies\n",
1818
"\n",
19-
"#### 💾 Install `nemo-microservices` with the `[data-designer]` extra option.\n"
20-
]
21-
},
22-
{
23-
"cell_type": "code",
24-
"execution_count": null,
25-
"metadata": {},
26-
"outputs": [],
27-
"source": [
28-
"%%capture\n",
29-
"%pip install \"nemo-microservices[data-designer]==1.1.0rc4\" --index-url https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi/simple\n",
30-
"%pip install huggingface-hub"
19+
"If you haven't already, follow the instructions in the [README](../README.md) to install the necessary dependencies. Note you may need to restart your kernel after setting up the environment.\n"
3120
]
3221
},
3322
{
@@ -43,6 +32,8 @@
4332
"metadata": {},
4433
"outputs": [],
4534
"source": [
35+
"from getpass import getpass\n",
36+
"\n",
4637
"from nemo_microservices import NeMoMicroservices\n",
4738
"from nemo_microservices.beta.data_designer import (\n",
4839
" DataDesignerConfigBuilder,\n",
@@ -131,6 +122,27 @@
131122
"- This list contains the models you can choose from (via the `model_alias` argument) during the generation process.\n"
132123
]
133124
},
125+
{
126+
"cell_type": "code",
127+
"execution_count": null,
128+
"metadata": {},
129+
"outputs": [],
130+
"source": [
131+
"# build.nvidia.com model endpoint\n",
132+
"endpoint = \"https://integrate.api.nvidia.com/v1\"\n",
133+
"model_id = \"mistralai/mistral-small-24b-instruct\"\n",
134+
"\n",
135+
"model_alias = \"mistral-small\"\n",
136+
"\n",
137+
"# You will need to enter your model provider API key to run this notebook.\n",
138+
"api_key = getpass(\"Enter model provider API key: \")\n",
139+
"\n",
140+
"if len(api_key) > 0:\n",
141+
" print(\"✅ API key received.\")\n",
142+
"else:\n",
143+
" print(\"❌ No API key provided. Please enter your model provider API key.\")"
144+
]
145+
},
134146
{
135147
"cell_type": "code",
136148
"execution_count": null,
@@ -139,21 +151,17 @@
139151
"source": [
140152
"model_configs = [\n",
141153
" P.ModelConfig(\n",
142-
" alias=\"llama\",\n",
154+
" alias=model_alias,\n",
143155
" inference_parameters=P.InferenceParameters(\n",
144156
" max_tokens=1024,\n",
145157
" temperature=0.5,\n",
146158
" top_p=1.0,\n",
147159
" ),\n",
148160
" model=P.Model(\n",
149161
" api_endpoint=P.ApiEndpoint(\n",
150-
" # For this API key to be visible in the microservice, you must set\n",
151-
" # the MODEL_API_KEY_NIM environment variable in the data-designer\n",
152-
" # section of the docker-compose file. Note the custom environment\n",
153-
" # variable names must start with the prefix \"MODEL_API_KEY_\".\n",
154-
" api_key=\"@os.environ/MODEL_API_KEY_NIM\",\n",
155-
" model_id=\"meta/llama-3.3-70b-instruct\",\n",
156-
" url=\"https://nim.int.aire.nvidia.com/v1\",\n",
162+
" api_key=api_key,\n",
163+
" model_id=model_id,\n",
164+
" url=endpoint,\n",
157165
" ),\n",
158166
" ),\n",
159167
" )\n",
@@ -288,7 +296,7 @@
288296
" \"{{ target_age_range }} years old. The product should be priced between $10 and $1000.\"\n",
289297
" ),\n",
290298
" output_format=Product,\n",
291-
" model_alias=\"llama\",\n",
299+
" model_alias=model_alias,\n",
292300
" )\n",
293301
")\n",
294302
"\n",
@@ -304,7 +312,7 @@
304312
" \"Write the review in a style that is '{{ review_style }}'.\"\n",
305313
" ),\n",
306314
" output_format=ProductReview,\n",
307-
" model_alias=\"llama\",\n",
315+
" model_alias=model_alias,\n",
308316
" )\n",
309317
")\n",
310318
"\n",
@@ -341,7 +349,7 @@
341349
"outputs": [],
342350
"source": [
343351
"# The preview dataset is available as a pandas DataFrame.\n",
344-
"preview.dataset.head()"
352+
"preview.dataset"
345353
]
346354
},
347355
{

nemo/NeMo-Data-Designer/intro-tutorials/3-seeding-with-a-dataset.ipynb

Lines changed: 33 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -16,19 +16,9 @@
1616
"\n",
1717
"If this is your first time using Data Designer, we recommend starting with the [first notebook](./1-the-basics.ipynb) in this 101 series.\n",
1818
"\n",
19-
"#### 💾 Install `nemo-microservices` with the `[data-designer]` extra option.\n"
20-
]
21-
},
22-
{
23-
"cell_type": "code",
24-
"execution_count": null,
25-
"metadata": {},
26-
"outputs": [],
27-
"source": [
28-
"%%capture\n",
29-
"%pip install \"nemo-microservices[data-designer]==1.1.0rc4\" --index-url https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi/simple\n",
30-
"%pip install huggingface-hub\n",
31-
"%pip install datasets"
19+
"#### 💾 Install dependencies\n",
20+
"\n",
21+
"If you haven't already, follow the instructions in the [README](../README.md) to install the necessary dependencies. Note you may need to restart your kernel after setting up the environment.\n"
3222
]
3323
},
3424
{
@@ -44,6 +34,8 @@
4434
"metadata": {},
4535
"outputs": [],
4636
"source": [
37+
"from getpass import getpass\n",
38+
"\n",
4739
"from nemo_microservices import NeMoMicroservices\n",
4840
"from nemo_microservices.beta.data_designer import (\n",
4941
" DataDesignerConfigBuilder,\n",
@@ -84,30 +76,47 @@
8476
"- This list contains the models you can choose from (via the `model_alias` argument) during the generation process.\n"
8577
]
8678
},
79+
{
80+
"cell_type": "code",
81+
"execution_count": null,
82+
"metadata": {},
83+
"outputs": [],
84+
"source": [
85+
"# build.nvidia.com model endpoint\n",
86+
"endpoint = \"https://integrate.api.nvidia.com/v1\"\n",
87+
"model_id = \"mistralai/mistral-small-24b-instruct\"\n",
88+
"\n",
89+
"model_alias = \"mistral-small\"\n",
90+
"\n",
91+
"# You will need to enter your model provider API key to run this notebook.\n",
92+
"api_key = getpass(\"Enter model provider API key: \")\n",
93+
"\n",
94+
"if len(api_key) > 0:\n",
95+
" print(\"✅ API key received.\")\n",
96+
"else:\n",
97+
" print(\"❌ No API key provided. Please enter your model provider API key.\")"
98+
]
99+
},
87100
{
88101
"cell_type": "code",
89102
"execution_count": null,
90103
"metadata": {},
91104
"outputs": [],
92105
"source": [
93106
"# You can also load the model configs from a YAML string or file.\n",
94-
"# For the API key to be visible in the microservice, you must set\n",
95-
"# the MODEL_API_KEY_NIM environment variable in the data-designer\n",
96-
"# section of the docker-compose file. Note the custom environment\n",
97-
"# variable names must start with the prefix \"MODEL_API_KEY_\".\n",
98107
"\n",
99-
"model_configs_yaml = \"\"\"\\\n",
108+
"model_configs_yaml = f\"\"\"\\\n",
100109
"model_configs:\n",
101-
" - alias: llama\n",
110+
" - alias: \"{model_alias}\"\n",
102111
" inference_parameters:\n",
103112
" max_tokens: 1024\n",
104113
" temperature: 0.5\n",
105114
" top_p: 1.0\n",
106115
" model:\n",
107116
" api_endpoint:\n",
108-
" api_key: \"@os.environ/MODEL_API_KEY_NIM\"\n",
109-
" model_id: \"meta/llama-3.3-70b-instruct\"\n",
110-
" url: \"https://nim.int.aire.nvidia.com/v1\"\n",
117+
" api_key: \"{api_key}\"\n",
118+
" model_id: \"{model_id}\"\n",
119+
" url: \"{endpoint}\"\n",
111120
"\"\"\"\n",
112121
"\n",
113122
"config_builder = DataDesignerConfigBuilder(model_configs=model_configs_yaml)"
@@ -271,7 +280,7 @@
271280
"\n",
272281
"Format the notes as a busy doctor might.\n",
273282
"\"\"\",\n",
274-
" model_alias=\"llama\",\n",
283+
" model_alias=model_alias,\n",
275284
")\n",
276285
"\n",
277286
"config_builder.validate()"
@@ -304,7 +313,7 @@
304313
"outputs": [],
305314
"source": [
306315
"# The preview dataset is available as a pandas DataFrame.\n",
307-
"preview.dataset.head()"
316+
"preview.dataset"
308317
]
309318
},
310319
{
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
[project]
2+
name = "nemo-data-designer"
3+
version = "0.0.1"
4+
description = "NeMo Data Designer tutorial notebooks"
5+
readme = "README.md"
6+
requires-python = ">=3.11"
7+
8+
dependencies = [
9+
"datasets",
10+
"huggingface-hub",
11+
"jupyter",
12+
"nemo-microservices[data-designer]==1.1.0rc7",
13+
]
14+
15+
[[tool.uv.index]]
16+
name = "nv-shared-pypi"
17+
url = "https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi/simple"
18+
default = true
19+
20+
[tool.uv.sources]
21+
nemo-microservices = { index = "nv-shared-pypi" }

0 commit comments

Comments
 (0)