Skip to content

Commit 12f5aa5

Browse files
BenjaminKazemicopybara-github
authored andcommitted
feat: GenAI SDK client(multimodal) - Support Assess Tuning Validity for multimodal dataset.
PiperOrigin-RevId: 868760597
1 parent 75991fd commit 12f5aa5

File tree

2 files changed

+172
-1
lines changed

2 files changed

+172
-1
lines changed

tests/unit/vertexai/genai/replays/test_assess_multimodal_dataset.py

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,29 @@ def test_assess_tuning_resources(client):
6666
assert isinstance(response, types.TuningResourceUsageAssessmentResult)
6767

6868

69+
def test_assess_tuning_validity(client):
70+
response = client.datasets.assess_tuning_validity(
71+
dataset_name=DATASET,
72+
dataset_usage="SFT_VALIDATION",
73+
model_name="gemini-2.5-flash-001",
74+
template_config=types.GeminiTemplateConfig(
75+
gemini_example=types.GeminiExample(
76+
contents=[
77+
{
78+
"role": "user",
79+
"parts": [{"text": "What is the capital of {name}?"}],
80+
},
81+
{
82+
"role": "model",
83+
"parts": [{"text": "{capital}"}],
84+
},
85+
],
86+
),
87+
),
88+
)
89+
assert isinstance(response, types.TuningValidationAssessmentResult)
90+
91+
6992
pytestmark = pytest_helper.setup(
7093
file=__file__,
7194
globals_for_file=globals(),
@@ -88,7 +111,7 @@ async def test_assess_dataset_async(client):
88111
{
89112
"role": "user",
90113
"parts": [{"text": "What is the capital of {name}?"}],
91-
}
114+
},
92115
],
93116
),
94117
),
@@ -114,3 +137,27 @@ async def test_assess_tuning_resources_async(client):
114137
),
115138
)
116139
assert isinstance(response, types.TuningResourceUsageAssessmentResult)
140+
141+
142+
@pytest.mark.asyncio
143+
async def test_assess_tuning_validity_async(client):
144+
response = await client.aio.datasets.assess_tuning_validity(
145+
dataset_name=DATASET,
146+
dataset_usage="SFT_VALIDATION",
147+
model_name="gemini-2.5-flash-001",
148+
template_config=types.GeminiTemplateConfig(
149+
gemini_example=types.GeminiExample(
150+
contents=[
151+
{
152+
"role": "user",
153+
"parts": [{"text": "What is the capital of {name}?"}],
154+
},
155+
{
156+
"role": "model",
157+
"parts": [{"text": "{capital}"}],
158+
},
159+
],
160+
),
161+
),
162+
)
163+
assert isinstance(response, types.TuningValidationAssessmentResult)

vertexai/_genai/datasets.py

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1054,6 +1054,68 @@ def assess_tuning_resources(
10541054
response["tuningResourceUsageAssessmentResult"],
10551055
)
10561056

1057+
def assess_tuning_validity(
1058+
self,
1059+
*,
1060+
dataset_name: str,
1061+
model_name: str,
1062+
dataset_usage: str,
1063+
template_config: Optional[types.GeminiTemplateConfigOrDict] = None,
1064+
config: Optional[types.AssessDatasetConfigOrDict] = None,
1065+
) -> types.TuningValidationAssessmentResult:
1066+
"""Assess if the assembled dataset is valid in terms of tuning a given
1067+
model.
1068+
1069+
Args:
1070+
dataset_name:
1071+
Required. The name of the dataset to assess the tuning validity
1072+
for.
1073+
model_name:
1074+
Required. The name of the model to assess the tuning validity
1075+
for.
1076+
dataset_usage:
1077+
Required. The dataset usage to assess the tuning validity for.
1078+
Must be one of the following: SFT_TRAINING, SFT_VALIDATION.
1079+
template_config:
1080+
Optional. The template config used to assemble the dataset
1081+
before assessing the tuning validity. If not provided, the
1082+
template config attached to the dataset will be used. Required
1083+
if no template config is attached to the dataset.
1084+
config:
1085+
Optional. A configuration for assessing the tuning validity. If not
1086+
provided, the default configuration will be used.
1087+
1088+
Returns:
1089+
A dict containing the tuning validity assessment result. The dict
1090+
contains the following keys:
1091+
- errors: A list of errors that occurred during the tuning validity
1092+
assessment.
1093+
"""
1094+
if isinstance(config, dict):
1095+
config = types.AssessDatasetConfig(**config)
1096+
elif not config:
1097+
config = types.AssessDatasetConfig()
1098+
1099+
operation = self._assess_multimodal_dataset(
1100+
name=dataset_name,
1101+
tuning_validation_assessment_config=types.TuningValidationAssessmentConfig(
1102+
model_name=model_name,
1103+
dataset_usage=dataset_usage,
1104+
),
1105+
gemini_request_read_config=types.GeminiRequestReadConfig(
1106+
template_config=template_config,
1107+
),
1108+
config=config,
1109+
)
1110+
response = self._wait_for_operation(
1111+
operation=operation,
1112+
timeout_seconds=config.timeout,
1113+
)
1114+
return _datasets_utils.create_from_response(
1115+
types.TuningValidationAssessmentResult,
1116+
response["tuningValidationAssessmentResult"],
1117+
)
1118+
10571119

10581120
class AsyncDatasets(_api_module.BaseModule):
10591121

@@ -1875,3 +1937,65 @@ async def assess_tuning_resources(
18751937
types.TuningResourceUsageAssessmentResult,
18761938
response["tuningResourceUsageAssessmentResult"],
18771939
)
1940+
1941+
async def assess_tuning_validity(
1942+
self,
1943+
*,
1944+
dataset_name: str,
1945+
model_name: str,
1946+
dataset_usage: str,
1947+
template_config: Optional[types.GeminiTemplateConfigOrDict] = None,
1948+
config: Optional[types.AssessDatasetConfigOrDict] = None,
1949+
) -> types.TuningValidationAssessmentResult:
1950+
"""Assess if the assembled dataset is valid in terms of tuning a given
1951+
model.
1952+
1953+
Args:
1954+
dataset_name:
1955+
Required. The name of the dataset to assess the tuning validity
1956+
for.
1957+
model_name:
1958+
Required. The name of the model to assess the tuning validity
1959+
for.
1960+
dataset_usage:
1961+
Required. The dataset usage to assess the tuning validity for.
1962+
Must be one of the following: SFT_TRAINING, SFT_VALIDATION.
1963+
template_config:
1964+
Optional. The template config used to assemble the dataset
1965+
before assessing the tuning validity. If not provided, the
1966+
template config attached to the dataset will be used. Required
1967+
if no template config is attached to the dataset.
1968+
config:
1969+
Optional. A configuration for assessing the tuning validity. If not
1970+
provided, the default configuration will be used.
1971+
1972+
Returns:
1973+
A dict containing the tuning validity assessment result. The dict
1974+
contains the following keys:
1975+
- errors: A list of errors that occurred during the tuning validity
1976+
assessment.
1977+
"""
1978+
if isinstance(config, dict):
1979+
config = types.AssessDatasetConfig(**config)
1980+
elif not config:
1981+
config = types.AssessDatasetConfig()
1982+
1983+
operation = await self._assess_multimodal_dataset(
1984+
name=dataset_name,
1985+
tuning_validation_assessment_config=types.TuningValidationAssessmentConfig(
1986+
model_name=model_name,
1987+
dataset_usage=dataset_usage,
1988+
),
1989+
gemini_request_read_config=types.GeminiRequestReadConfig(
1990+
template_config=template_config,
1991+
),
1992+
config=config,
1993+
)
1994+
response = await self._wait_for_operation(
1995+
operation=operation,
1996+
timeout_seconds=config.timeout,
1997+
)
1998+
return _datasets_utils.create_from_response(
1999+
types.TuningValidationAssessmentResult,
2000+
response["tuningValidationAssessmentResult"],
2001+
)

0 commit comments

Comments
 (0)