5555BIGNUMERIC_DTYPE = pd .ArrowDtype (pa .decimal256 (76 , 38 ))
5656# No arrow equivalent
5757GEO_DTYPE = gpd .array .GeometryDtype ()
58+ # JSON
59+ JSON_DTYPE = pd .ArrowDtype (pa .large_string ())
5860
5961# Used when storing Null expressions
6062DEFAULT_DTYPE = FLOAT_DTYPE
@@ -132,6 +134,13 @@ class SimpleDtypeInfo:
132134 orderable = True ,
133135 clusterable = True ,
134136 ),
137+ SimpleDtypeInfo (
138+ dtype = JSON_DTYPE ,
139+ arrow_dtype = pa .large_string (),
140+ type_kind = ("JSON" ,),
141+ orderable = False ,
142+ clusterable = False ,
143+ ),
135144 SimpleDtypeInfo (
136145 dtype = DATE_DTYPE ,
137146 arrow_dtype = pa .date32 (),
@@ -281,7 +290,7 @@ def is_struct_like(type_: ExpressionType) -> bool:
281290
282291def is_json_like (type_ : ExpressionType ) -> bool :
283292 # TODO: Add JSON type support
284- return type_ == STRING_DTYPE
293+ return type_ == JSON_DTYPE or type_ == STRING_DTYPE # Including JSON string
285294
286295
287296def is_json_encoding_type (type_ : ExpressionType ) -> bool :
@@ -455,8 +464,6 @@ def infer_literal_arrow_type(literal) -> typing.Optional[pa.DataType]:
455464 return bigframes_dtype_to_arrow_dtype (infer_literal_type (literal ))
456465
457466
458- # Don't have dtype for json, so just end up interpreting as STRING
459- _REMAPPED_TYPEKINDS = {"JSON" : "STRING" }
460467_TK_TO_BIGFRAMES = {
461468 type_kind : mapping .dtype
462469 for mapping in SIMPLE_TYPES
@@ -480,16 +487,13 @@ def convert_schema_field(
480487 pa_struct = pa .struct (fields )
481488 pa_type = pa .list_ (pa_struct ) if is_repeated else pa_struct
482489 return field .name , pd .ArrowDtype (pa_type )
483- elif (
484- field .field_type in _TK_TO_BIGFRAMES or field .field_type in _REMAPPED_TYPEKINDS
485- ):
486- singular_type = _TK_TO_BIGFRAMES [
487- _REMAPPED_TYPEKINDS .get (field .field_type , field .field_type )
488- ]
490+ elif field .field_type in _TK_TO_BIGFRAMES :
489491 if is_repeated :
490- pa_type = pa .list_ (bigframes_dtype_to_arrow_dtype (singular_type ))
492+ pa_type = pa .list_ (
493+ bigframes_dtype_to_arrow_dtype (_TK_TO_BIGFRAMES [field .field_type ])
494+ )
491495 return field .name , pd .ArrowDtype (pa_type )
492- return field .name , singular_type
496+ return field .name , _TK_TO_BIGFRAMES [ field . field_type ]
493497 else :
494498 raise ValueError (f"Cannot handle type: { field .field_type } " )
495499
@@ -639,7 +643,7 @@ def can_coerce(source_type: ExpressionType, target_type: ExpressionType) -> bool
639643 return True # None can be coerced to any supported type
640644 else :
641645 return (source_type == STRING_DTYPE ) and (
642- target_type in TEMPORAL_BIGFRAMES_TYPES
646+ target_type in TEMPORAL_BIGFRAMES_TYPES + [ JSON_DTYPE ]
643647 )
644648
645649
0 commit comments