Skip to content

Commit 2170dc5

Browse files
committed
docs(genai): Add Bounding Box Sample for Gen AI SDK
1 parent 01a285c commit 2170dc5

File tree

5 files changed

+188
-0
lines changed

5 files changed

+188
-0
lines changed
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
def generate_content() -> str:
17+
# [START googlegenaisdk_boundingbox_with_txt_img]
18+
import requests
19+
20+
from google import genai
21+
from google.genai.types import (
22+
GenerateContentConfig,
23+
HttpOptions,
24+
Part,
25+
SafetySetting,
26+
)
27+
28+
from PIL import Image, ImageColor, ImageDraw
29+
30+
from pydantic import BaseModel
31+
32+
class BoundingBox(BaseModel):
33+
box_2d: list[int]
34+
label: str
35+
36+
def plot_bounding_boxes(image_uri: str, bounding_boxes: list[BoundingBox]) -> None:
37+
"""
38+
Plots bounding boxes on an image with markers for each a name, using PIL, normalized coordinates, and different colors.
39+
40+
Args:
41+
img_path: The path to the image file.
42+
bounding_boxes: A list of bounding boxes containing the name of the object
43+
and their positions in normalized [y1 x1 y2 x2] format.
44+
"""
45+
46+
with Image.open(requests.get(image_uri, stream=True, timeout=10).raw) as im:
47+
width, height = im.size
48+
draw = ImageDraw.Draw(im)
49+
50+
colors = list(ImageColor.colormap.keys())
51+
52+
for i, bbox in enumerate(bounding_boxes):
53+
y1, x1, y2, x2 = bbox.box_2d
54+
abs_y1 = int(y1 / 1000 * height)
55+
abs_x1 = int(x1 / 1000 * width)
56+
abs_y2 = int(y2 / 1000 * height)
57+
abs_x2 = int(x2 / 1000 * width)
58+
59+
# Use a single color selection:
60+
color = colors[i % len(colors)]
61+
62+
draw.rectangle(
63+
((abs_x1, abs_y1), (abs_x2, abs_y2)), outline=color, width=4
64+
)
65+
if bbox.label:
66+
draw.text((abs_x1 + 8, abs_y1 + 6), bbox.label, fill=color)
67+
68+
im.show()
69+
70+
client = genai.Client(http_options=HttpOptions(api_version="v1"))
71+
72+
config = GenerateContentConfig(
73+
system_instruction="""Return bounding boxes as an array with labels. Never return masks. Limit to 25 objects.
74+
If an object is present multiple times, give each object a unique label according to its distinct characteristics (colors, size, position, etc..).""",
75+
temperature=0.5,
76+
safety_settings=[
77+
SafetySetting(
78+
category="HARM_CATEGORY_DANGEROUS_CONTENT",
79+
threshold="BLOCK_ONLY_HIGH",
80+
),
81+
],
82+
response_mime_type="application/json",
83+
response_schema=list[BoundingBox],
84+
)
85+
86+
image_uri = "https://storage.googleapis.com/generativeai-downloads/images/socks.jpg"
87+
88+
response = client.models.generate_content(
89+
model="gemini-2.0-flash-001",
90+
contents=[
91+
Part.from_uri(
92+
file_uri=image_uri,
93+
mime_type="image/jpeg",
94+
),
95+
"Output the positions of the socks with a face. Label according to position in the image.",
96+
],
97+
config=config,
98+
)
99+
print(response.text)
100+
plot_bounding_boxes(image_uri, response.parsed)
101+
102+
# Example response:
103+
# [
104+
# {"box_2d": [36, 246, 380, 492], "label": "top left sock with face"},
105+
# {"box_2d": [260, 663, 640, 917], "label": "top right sock with face"},
106+
# ]
107+
# [END googlegenaisdk_boundingbox_with_txt_img]
108+
return response.text
109+
110+
111+
if __name__ == "__main__":
112+
generate_content()
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# Copyright 2021 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# Default TEST_CONFIG_OVERRIDE for python repos.
16+
17+
# You can copy this file into your directory, then it will be imported from
18+
# the noxfile.py.
19+
20+
# The source of truth:
21+
# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/main/noxfile_config.py
22+
23+
TEST_CONFIG_OVERRIDE = {
24+
# You can opt out from the test for specific Python versions.
25+
"ignored_versions": ["2.7", "3.7", "3.8", "3.10", "3.11", "3.13"],
26+
# Old samples are opted out of enforcing Python type hints
27+
# All new samples should feature them
28+
"enforce_type_hints": True,
29+
# An envvar key for determining the project id to use. Change it
30+
# to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a
31+
# build specific Cloud project. You can also use your own string
32+
# to use your own Cloud project.
33+
"gcloud_project_env": "GOOGLE_CLOUD_PROJECT",
34+
# 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT',
35+
# If you need to use a specific version of pip,
36+
# change pip_version_override to the string representation
37+
# of the version number, for example, "20.2.4"
38+
"pip_version_override": None,
39+
# A dictionary you want to inject into your test. Don't put any
40+
# secrets here. These values will override predefined values.
41+
"envs": {},
42+
}
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
google-api-core==2.24.0
2+
pytest==8.2.0
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
google-genai==1.2.0
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
#
16+
# Using Google Cloud Vertex AI to test the code samples.
17+
#
18+
19+
import os
20+
21+
import boundingbox_with_txt_img
22+
23+
os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = "True"
24+
os.environ["GOOGLE_CLOUD_LOCATION"] = "us-central1"
25+
# The project name is included in the CICD pipeline
26+
# os.environ['GOOGLE_CLOUD_PROJECT'] = "add-your-project-name"
27+
28+
29+
def test_boundingbox_with_txt_img() -> None:
30+
response = boundingbox_with_txt_img.generate_content()
31+
assert response

0 commit comments

Comments
 (0)