|
| 1 | +# Copyright 2025 Google LLC |
| 2 | +# |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +# you may not use this file except in compliance with the License. |
| 5 | +# You may obtain a copy of the License at |
| 6 | +# |
| 7 | +# https://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +# |
| 9 | +# Unless required by applicable law or agreed to in writing, software |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +# See the License for the specific language governing permissions and |
| 13 | +# limitations under the License. |
| 14 | + |
| 15 | + |
| 16 | +def generate_content() -> str: |
| 17 | + # [START googlegenaisdk_boundingbox_with_txt_img] |
| 18 | + import requests |
| 19 | + |
| 20 | + from google import genai |
| 21 | + from google.genai.types import ( |
| 22 | + GenerateContentConfig, |
| 23 | + HttpOptions, |
| 24 | + Part, |
| 25 | + SafetySetting, |
| 26 | + ) |
| 27 | + |
| 28 | + from PIL import Image, ImageColor, ImageDraw |
| 29 | + |
| 30 | + from pydantic import BaseModel |
| 31 | + |
| 32 | + class BoundingBox(BaseModel): |
| 33 | + box_2d: list[int] |
| 34 | + label: str |
| 35 | + |
| 36 | + def plot_bounding_boxes(image_uri: str, bounding_boxes: list[BoundingBox]) -> None: |
| 37 | + """ |
| 38 | + Plots bounding boxes on an image with markers for each a name, using PIL, normalized coordinates, and different colors. |
| 39 | +
|
| 40 | + Args: |
| 41 | + img_path: The path to the image file. |
| 42 | + bounding_boxes: A list of bounding boxes containing the name of the object |
| 43 | + and their positions in normalized [y1 x1 y2 x2] format. |
| 44 | + """ |
| 45 | + |
| 46 | + with Image.open(requests.get(image_uri, stream=True, timeout=10).raw) as im: |
| 47 | + width, height = im.size |
| 48 | + draw = ImageDraw.Draw(im) |
| 49 | + |
| 50 | + colors = list(ImageColor.colormap.keys()) |
| 51 | + |
| 52 | + for i, bbox in enumerate(bounding_boxes): |
| 53 | + y1, x1, y2, x2 = bbox.box_2d |
| 54 | + abs_y1 = int(y1 / 1000 * height) |
| 55 | + abs_x1 = int(x1 / 1000 * width) |
| 56 | + abs_y2 = int(y2 / 1000 * height) |
| 57 | + abs_x2 = int(x2 / 1000 * width) |
| 58 | + |
| 59 | + # Use a single color selection: |
| 60 | + color = colors[i % len(colors)] |
| 61 | + |
| 62 | + draw.rectangle( |
| 63 | + ((abs_x1, abs_y1), (abs_x2, abs_y2)), outline=color, width=4 |
| 64 | + ) |
| 65 | + if bbox.label: |
| 66 | + draw.text((abs_x1 + 8, abs_y1 + 6), bbox.label, fill=color) |
| 67 | + |
| 68 | + im.show() |
| 69 | + |
| 70 | + client = genai.Client(http_options=HttpOptions(api_version="v1")) |
| 71 | + |
| 72 | + config = GenerateContentConfig( |
| 73 | + system_instruction="""Return bounding boxes as an array with labels. Never return masks. Limit to 25 objects. |
| 74 | + If an object is present multiple times, give each object a unique label according to its distinct characteristics (colors, size, position, etc..).""", |
| 75 | + temperature=0.5, |
| 76 | + safety_settings=[ |
| 77 | + SafetySetting( |
| 78 | + category="HARM_CATEGORY_DANGEROUS_CONTENT", |
| 79 | + threshold="BLOCK_ONLY_HIGH", |
| 80 | + ), |
| 81 | + ], |
| 82 | + response_mime_type="application/json", |
| 83 | + response_schema=list[BoundingBox], |
| 84 | + ) |
| 85 | + |
| 86 | + image_uri = "https://storage.googleapis.com/generativeai-downloads/images/socks.jpg" |
| 87 | + |
| 88 | + response = client.models.generate_content( |
| 89 | + model="gemini-2.0-flash-001", |
| 90 | + contents=[ |
| 91 | + Part.from_uri( |
| 92 | + file_uri=image_uri, |
| 93 | + mime_type="image/jpeg", |
| 94 | + ), |
| 95 | + "Output the positions of the socks with a face. Label according to position in the image.", |
| 96 | + ], |
| 97 | + config=config, |
| 98 | + ) |
| 99 | + print(response.text) |
| 100 | + plot_bounding_boxes(image_uri, response.parsed) |
| 101 | + |
| 102 | + # Example response: |
| 103 | + # [ |
| 104 | + # {"box_2d": [36, 246, 380, 492], "label": "top left sock with face"}, |
| 105 | + # {"box_2d": [260, 663, 640, 917], "label": "top right sock with face"}, |
| 106 | + # ] |
| 107 | + # [END googlegenaisdk_boundingbox_with_txt_img] |
| 108 | + return response.text |
| 109 | + |
| 110 | + |
| 111 | +if __name__ == "__main__": |
| 112 | + generate_content() |
0 commit comments