Merge branch 'main' into sycai_ai_gen_bool

sycai · web-flow · commit 6ebb05b6ef73 · 2025-09-12T10:12:01.000-07:00
diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py
@@ -641,6 +641,9 @@ def _dtype_from_string(dtype_string: str) -> typing.Optional[Dtype]:
         return BIGFRAMES_STRING_TO_BIGFRAMES[
             typing.cast(DtypeString, str(dtype_string))
         ]
+    if isinstance(dtype_string, str) and dtype_string.lower() == "json":
+        return JSON_DTYPE
+
     raise TypeError(
         textwrap.dedent(
             f"""
@@ -652,9 +655,9 @@ def _dtype_from_string(dtype_string: str) -> typing.Optional[Dtype]:
                         The following pandas.ExtensionDtype are supported:
                         pandas.BooleanDtype(), pandas.Float64Dtype(),
                         pandas.Int64Dtype(), pandas.StringDtype(storage="pyarrow"),
-                        pd.ArrowDtype(pa.date32()), pd.ArrowDtype(pa.time64("us")),
-                        pd.ArrowDtype(pa.timestamp("us")),
-                        pd.ArrowDtype(pa.timestamp("us", tz="UTC")).
+                        pandas.ArrowDtype(pa.date32()), pandas.ArrowDtype(pa.time64("us")),
+                        pandas.ArrowDtype(pa.timestamp("us")),
+                        pandas.ArrowDtype(pa.timestamp("us", tz="UTC")).
                 {constants.FEEDBACK_LINK}
                 """
         )
diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py
@@ -849,10 +849,14 @@ class Claude3TextGenerator(base.RetriableRemotePredictor):
 
         The models only available in specific regions. Check https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-claude#regions for details.
 
+    .. note::
+
+        claude-3-sonnet model is deprecated. Use other models instead.
+
     Args:
         model_name (str, Default to "claude-3-sonnet"):
             The model for natural language tasks. Possible values are "claude-3-sonnet", "claude-3-haiku", "claude-3-5-sonnet" and "claude-3-opus".
-            "claude-3-sonnet" is Anthropic's dependable combination of skills and speed. It is engineered to be dependable for scaled AI deployments across a variety of use cases.
+            "claude-3-sonnet" (deprecated) is Anthropic's dependable combination of skills and speed. It is engineered to be dependable for scaled AI deployments across a variety of use cases.
             "claude-3-haiku" is Anthropic's fastest, most compact vision and text model for near-instant responses to simple queries, meant for seamless AI experiences mimicking human interactions.
             "claude-3-5-sonnet" is Anthropic's most powerful AI model and maintains the speed and cost of Claude 3 Sonnet, which is a mid-tier model.
             "claude-3-opus" is Anthropic's second-most powerful AI model, with strong performance on highly complex tasks.
diff --git a/tests/system/load/test_llm.py b/tests/system/load/test_llm.py
@@ -100,7 +100,7 @@ def test_llm_gemini_w_ground_with_google_search(llm_remote_text_df):
 # (b/366290533): Claude models are of extremely low capacity. The tests should reside in small tests. Moving these here just to protect BQML's shared capacity(as load test only runs once per day.) and make sure we still have minimum coverage.
 @pytest.mark.parametrize(
     "model_name",
-    ("claude-3-sonnet", "claude-3-haiku", "claude-3-5-sonnet", "claude-3-opus"),
+    ("claude-3-haiku", "claude-3-5-sonnet", "claude-3-opus"),
 )
 @pytest.mark.flaky(retries=3, delay=120)
 def test_claude3_text_generator_create_load(
@@ -125,7 +125,7 @@ def test_claude3_text_generator_create_load(
 
 @pytest.mark.parametrize(
     "model_name",
-    ("claude-3-sonnet", "claude-3-haiku", "claude-3-5-sonnet", "claude-3-opus"),
+    ("claude-3-haiku", "claude-3-5-sonnet", "claude-3-opus"),
 )
 @pytest.mark.flaky(retries=3, delay=120)
 def test_claude3_text_generator_predict_default_params_success(
@@ -144,7 +144,7 @@ def test_claude3_text_generator_predict_default_params_success(
 
 @pytest.mark.parametrize(
     "model_name",
-    ("claude-3-sonnet", "claude-3-haiku", "claude-3-5-sonnet", "claude-3-opus"),
+    ("claude-3-haiku", "claude-3-5-sonnet", "claude-3-opus"),
 )
 @pytest.mark.flaky(retries=3, delay=120)
 def test_claude3_text_generator_predict_with_params_success(
@@ -165,7 +165,7 @@ def test_claude3_text_generator_predict_with_params_success(
 
 @pytest.mark.parametrize(
     "model_name",
-    ("claude-3-sonnet", "claude-3-haiku", "claude-3-5-sonnet", "claude-3-opus"),
+    ("claude-3-haiku", "claude-3-5-sonnet", "claude-3-opus"),
 )
 @pytest.mark.flaky(retries=3, delay=120)
 def test_claude3_text_generator_predict_multi_col_success(
diff --git a/tests/system/small/engines/test_generic_ops.py b/tests/system/small/engines/test_generic_ops.py
@@ -275,6 +275,29 @@ def test_engines_astype_from_json(scalars_array_value: array_value.ArrayValue, e
     assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)
 
 
+@pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True)
+def test_engines_astype_to_json(scalars_array_value: array_value.ArrayValue, engine):
+    exprs = [
+        ops.AsTypeOp(to_type=bigframes.dtypes.JSON_DTYPE).as_expr(
+            expression.deref("int64_col")
+        ),
+        ops.AsTypeOp(to_type=bigframes.dtypes.JSON_DTYPE).as_expr(
+            # Use a const since float to json has precision issues
+            expression.const(5.2, bigframes.dtypes.FLOAT_DTYPE)
+        ),
+        ops.AsTypeOp(to_type=bigframes.dtypes.JSON_DTYPE).as_expr(
+            expression.deref("bool_col")
+        ),
+        ops.AsTypeOp(to_type=bigframes.dtypes.JSON_DTYPE).as_expr(
+            # Use a const since "str_col" has special chars.
+            expression.const('"hello world"', bigframes.dtypes.STRING_DTYPE)
+        ),
+    ]
+    arr, _ = scalars_array_value.compute_values(exprs)
+
+    assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)
+
+
 @pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True)
 def test_engines_astype_timedelta(scalars_array_value: array_value.ArrayValue, engine):
     arr = apply_op(
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
@@ -3903,6 +3903,18 @@ def test_float_astype_json(errors):
     pd.testing.assert_series_equal(bf_result.to_pandas(), expected_result)
 
 
+def test_float_astype_json_str():
+    data = ["1.25", "2500000000", None, "-12323.24"]
+    bf_series = series.Series(data, dtype=dtypes.FLOAT_DTYPE)
+
+    bf_result = bf_series.astype("json")
+    assert bf_result.dtype == dtypes.JSON_DTYPE
+
+    expected_result = pd.Series(data, dtype=dtypes.JSON_DTYPE)
+    expected_result.index = expected_result.index.astype("Int64")
+    pd.testing.assert_series_equal(bf_result.to_pandas(), expected_result)
+
+
 @pytest.mark.parametrize("errors", ["raise", "null"])
 def test_string_astype_json(errors):
     data = [