Skip to content

Commit 265e9d7

Browse files
authored
Usability improvements for 25.08 release (#324)
- Remedy review feedback - Enhancements to fully automate such that notebooks can run without interaction end to end Signed-off-by: Shashank Verma <shashankv@nvidia.com>
1 parent cd25625 commit 265e9d7

File tree

4 files changed

+54
-6
lines changed

4 files changed

+54
-6
lines changed

nemo/data-flywheel/tool-calling/2_finetuning_and_inference.ipynb

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -594,7 +594,7 @@
594594
"\n",
595595
"customization = client_with_wandb.customization.jobs.create(\n",
596596
" name=\"llama-3.2-1b-xlam-ft\",\n",
597-
" output_model=f\"{NMS_NAMESPACE}/llama-3.2-1b-xlam-run1\",\n",
597+
" output_model=CUSTOM_MODEL,\n",
598598
" config=f\"{BASE_MODEL}@{BASE_MODEL_VERSION}\",\n",
599599
" dataset={\"name\": DATASET_NAME, \"namespace\": NMS_NAMESPACE},\n",
600600
" hyperparameters={\n",
@@ -952,12 +952,57 @@
952952
"print(\"Job Status:\", json.dumps(job_status.model_dump(), indent=2, default=str))"
953953
]
954954
},
955+
{
956+
"cell_type": "code",
957+
"execution_count": null,
958+
"id": "e84514e1",
959+
"metadata": {},
960+
"outputs": [],
961+
"source": [
962+
"# Add wait job function to wait for the customization job to complete\n",
963+
"\n",
964+
"from time import sleep, time\n",
965+
"\n",
966+
"def wait_job(nemo_client, job_id: str, polling_interval: int = 10, timeout: int = 6000):\n",
967+
" \"\"\"Helper for waiting an eval job using SDK.\"\"\"\n",
968+
" start_time = time()\n",
969+
" job = nemo_client.customization.jobs.retrieve(job_id=job_id)\n",
970+
" status = job.status\n",
971+
"\n",
972+
" while (status in [\"pending\", \"created\", \"running\"]):\n",
973+
" # Check for timeout\n",
974+
" if time() - start_time > timeout:\n",
975+
" raise RuntimeError(f\"Took more than {timeout} seconds.\")\n",
976+
"\n",
977+
" # Sleep before polling again\n",
978+
" sleep(polling_interval)\n",
979+
"\n",
980+
" # Fetch updated status and progress\n",
981+
" job = nemo_client.customization.jobs.retrieve(job_id=job_id)\n",
982+
" status = job.status\n",
983+
" progress = 0.0\n",
984+
" if status == \"running\" and job.status_details:\n",
985+
" progress = job.status_details.percentage_done or 0.0\n",
986+
" elif status == \"completed\":\n",
987+
" progress = 100\n",
988+
"\n",
989+
" print(f\"Job status: {status} after {time() - start_time:.2f} seconds. Progress: {progress}%\")\n",
990+
"\n",
991+
"\n",
992+
" return job\n",
993+
"\n",
994+
"job = wait_job(nemo_client, JOB_ID, polling_interval=5, timeout=2400)\n",
995+
"\n",
996+
"# Wait for 2 minutes, because sometimes, the job is finished, but the finetuned model is not ready in NIM yet.\n",
997+
"sleep(120)"
998+
]
999+
},
9551000
{
9561001
"cell_type": "markdown",
9571002
"id": "42b721be-8ca0-4e8f-99a7-5eb12ea1b47f",
9581003
"metadata": {},
9591004
"source": [
960-
"**IMPORTANT:** Monitor the job status. Ensure training is completed before proceeding by observing the `percentage_done` key in the response frame."
1005+
"**IMPORTANT:** At this point, the customization job should be completed. If waiting for the job to finish failed or the status is not `\"completed\"`, please check the logs (`job.status_details.status_logs`)."
9611006
]
9621007
},
9631008
{

nemo/data-flywheel/tool-calling/3_model_evaluation.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@
124124
"metadata": {},
125125
"outputs": [],
126126
"source": [
127-
"CUSTOMIZED_MODEL = \"\" # paste from the previous notebook"
127+
"CUSTOMIZED_MODEL = CUSTOM_MODEL # paste from the previous notebook"
128128
]
129129
},
130130
{

nemo/data-flywheel/tool-calling/4_adding_safety_guardrails.ipynb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@
101101
"metadata": {},
102102
"outputs": [],
103103
"source": [
104-
"CUSTOMIZED_MODEL = \"\" # paste from the previous notebook"
104+
"CUSTOMIZED_MODEL = CUSTOM_MODEL # paste from the previous notebook"
105105
]
106106
},
107107
{
@@ -262,7 +262,7 @@
262262
"# Create guardrails configuration\n",
263263
"guardrail_config = nemo_client.guardrail.configs.create(\n",
264264
" name=\"toolcalling\",\n",
265-
" namespace=\"default\",\n",
265+
" namespace=NMS_NAMESPACE,\n",
266266
" data={\n",
267267
" \"models\": [\n",
268268
" { \n",
@@ -454,7 +454,7 @@
454454
" }\n",
455455
" ],\n",
456456
" guardrails={\n",
457-
" \"config_id\": \"toolcalling\"\n",
457+
" \"config_id\": f\"{NMS_NAMESPACE}/toolcalling\"\n",
458458
" },\n",
459459
" temperature=0.2,\n",
460460
" top_p=1\n",

nemo/data-flywheel/tool-calling/config.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,6 @@
1919
# (Optional) Configure the base model. Must be one supported by the NeMo Customizer deployment!
2020
BASE_MODEL = "meta/llama-3.2-1b-instruct"
2121
BASE_MODEL_VERSION = "v1.0.0+A100"
22+
23+
# (Optional) Configure the custom model. Ensure the custom model name can be pass to the other notebooks
24+
CUSTOM_MODEL = f"{NMS_NAMESPACE}/llama-3.2-1b-xlam-run1@v1"

0 commit comments

Comments
 (0)