1616def generate_content () -> str :
1717 # [START googlegenaisdk_boundingbox_with_txt_img]
1818 import requests
19-
2019 from google import genai
21- from google .genai .types import GenerateContentConfig , HttpOptions , Part , SafetySetting
22-
20+ from google .genai .types import (
21+ GenerateContentConfig ,
22+ HarmBlockThreshold ,
23+ HarmCategory ,
24+ HttpOptions ,
25+ Part ,
26+ SafetySetting ,
27+ )
2328 from PIL import Image , ImageColor , ImageDraw
24-
2529 from pydantic import BaseModel
2630
2731 # Helper class to represent a bounding box
@@ -31,7 +35,7 @@ class BoundingBox(BaseModel):
3135
3236 Attributes:
3337 box_2d (list[int]): A list of integers representing the 2D coordinates of the bounding box,
34- typically in the format [x_min, y_min, x_max , y_max].
38+ typically in the format [y_min, x_min , y_max, x_max ].
3539 label (str): A string representing the label or class associated with the object within the bounding box.
3640 """
3741
@@ -41,12 +45,12 @@ class BoundingBox(BaseModel):
4145 # Helper function to plot bounding boxes on an image
4246 def plot_bounding_boxes (image_uri : str , bounding_boxes : list [BoundingBox ]) -> None :
4347 """
44- Plots bounding boxes on an image with markers for each a name , using PIL, normalized coordinates, and different colors .
48+ Plots bounding boxes on an image with labels , using PIL and normalized coordinates .
4549
4650 Args:
47- img_path : The path to the image file.
48- bounding_boxes: A list of bounding boxes containing the name of the object
49- and their positions in normalized [y1 x1 y2 x2 ] format.
51+ image_uri : The URI of the image file.
52+ bounding_boxes: A list of BoundingBox objects. Each box's coordinates are in
53+ normalized [y_min, x_min, y_max, x_max ] format.
5054 """
5155 with Image .open (requests .get (image_uri , stream = True , timeout = 10 ).raw ) as im :
5256 width , height = im .size
@@ -55,19 +59,23 @@ def plot_bounding_boxes(image_uri: str, bounding_boxes: list[BoundingBox]) -> No
5559 colors = list (ImageColor .colormap .keys ())
5660
5761 for i , bbox in enumerate (bounding_boxes ):
58- y1 , x1 , y2 , x2 = bbox . box_2d
59- abs_y1 = int (y1 / 1000 * height )
60- abs_x1 = int (x1 / 1000 * width )
61- abs_y2 = int (y2 / 1000 * height )
62- abs_x2 = int (x2 / 1000 * width )
62+ # Scale normalized coordinates to image dimensions
63+ abs_y_min = int (bbox . box_2d [ 0 ] / 1000 * height )
64+ abs_x_min = int (bbox . box_2d [ 1 ] / 1000 * width )
65+ abs_y_max = int (bbox . box_2d [ 2 ] / 1000 * height )
66+ abs_x_max = int (bbox . box_2d [ 3 ] / 1000 * width )
6367
6468 color = colors [i % len (colors )]
6569
70+ # Draw the rectangle using the correct (x, y) pairs
6671 draw .rectangle (
67- ((abs_x1 , abs_y1 ), (abs_x2 , abs_y2 )), outline = color , width = 4
72+ ((abs_x_min , abs_y_min ), (abs_x_max , abs_y_max )),
73+ outline = color ,
74+ width = 4 ,
6875 )
6976 if bbox .label :
70- draw .text ((abs_x1 + 8 , abs_y1 + 6 ), bbox .label , fill = color )
77+ # Position the text at the top-left corner of the box
78+ draw .text ((abs_x_min + 8 , abs_y_min + 6 ), bbox .label , fill = color )
7179
7280 im .show ()
7381
@@ -83,12 +91,12 @@ def plot_bounding_boxes(image_uri: str, bounding_boxes: list[BoundingBox]) -> No
8391 temperature = 0.5 ,
8492 safety_settings = [
8593 SafetySetting (
86- category = " HARM_CATEGORY_DANGEROUS_CONTENT" ,
87- threshold = " BLOCK_ONLY_HIGH" ,
94+ category = HarmCategory . HARM_CATEGORY_DANGEROUS_CONTENT ,
95+ threshold = HarmBlockThreshold . BLOCK_ONLY_HIGH ,
8896 ),
8997 ],
9098 response_mime_type = "application/json" ,
91- response_schema = list [BoundingBox ], # Add BoundingBox class to the response schema
99+ response_schema = list [BoundingBox ],
92100 )
93101
94102 image_uri = "https://storage.googleapis.com/generativeai-downloads/images/socks.jpg"
@@ -109,8 +117,8 @@ def plot_bounding_boxes(image_uri: str, bounding_boxes: list[BoundingBox]) -> No
109117
110118 # Example response:
111119 # [
112- # {"box_2d": [36 , 246, 380, 492 ], "label": "top left sock with face"},
113- # {"box_2d": [260, 663, 640, 917 ], "label": "top right sock with face"},
120+ # {"box_2d": [6 , 246, 386, 526 ], "label": "top- left light blue sock with cat face"},
121+ # {"box_2d": [234, 649, 650, 863 ], "label": "top- right light blue sock with cat face"},
114122 # ]
115123 # [END googlegenaisdk_boundingbox_with_txt_img]
116124 return response .text
0 commit comments