change to ai.generate

shuoweil · shuoweil · commit e982229e8ed5 · 2025-10-09T06:43:22.000Z
diff --git a/bigframes/operations/blob.py b/bigframes/operations/blob.py
@@ -804,35 +804,27 @@ def audio_transcribe(
             raise ValueError("Must specify the engine, supported value is 'bigquery'.")
 
         import bigframes.bigquery as bbq
-        import bigframes.ml.llm as llm
         import bigframes.pandas as bpd
 
         # col name doesn't matter here. Rename to avoid column name conflicts
         audio_series = bigframes.series.Series(self._block)
 
         prompt_text = "**Task:** Transcribe the provided audio. **Instructions:** - Your response must contain only the verbatim transcription of the audio. - Do not include any introductory text, summaries, or conversational filler in your response. The output should begin directly with the first word of the audio."
 
-        llm_model = llm.GeminiTextGenerator(
-            model_name=model_name,
-            session=self._block.session,
-            connection_name=connection,
+        # Use bbq.ai.generate() to transcribe audio
+        transcribed_results = bbq.ai.generate(
+            prompt=(prompt_text, audio_series),
+            connection_id=connection,
+            endpoint=model_name,
+            request_type="unspecified",
         )
 
-        # transcribe audio using ML.GENERATE_TEXT
-        transcribed_results = llm_model.predict(
-            X=audio_series,
-            prompt=[prompt_text, audio_series],
-            temperature=0.0,
+        transcribed_content_series = transcribed_results.struct.field("result").rename(
+            "transcribed_content"
         )
 
-        transcribed_content_series = cast(
-            bpd.Series, transcribed_results["ml_generate_text_llm_result"]
-        ).rename("transcribed_content")
-
         if verbose:
-            transcribed_status_series = cast(
-                bpd.Series, transcribed_results["ml_generate_text_status"]
-            )
+            transcribed_status_series = transcribed_results.struct.field("status")
             results_df = bpd.DataFrame(
                 {
                     "status": transcribed_status_series,
diff --git a/tests/system/large/blob/test_function.py b/tests/system/large/blob/test_function.py
@@ -424,6 +424,7 @@ def test_blob_transcribe(
         )
         .to_pandas()
     )
+    print(actual)
 
     # check relative length
     expected_text = "Now, as all books not primarily intended as picture-books consist principally of types composed to form letterpress"

Original file line number	Diff line number	Diff line change
`@@ -424,6 +424,7 @@ def test_blob_transcribe(`
`424`	`424`	`)`
`425`	`425`	`.to_pandas()`
`426`	`426`	`)`
	`427`	`+ print(actual)`
`427`	`428`
`428`	`429`	`# check relative length`
`429`	`430`	`expected_text = "Now, as all books not primarily intended as picture-books consist principally of types composed to form letterpress"`