@@ -622,32 +622,11 @@ def compile_readlocal(self, node: nodes.ReadLocalNode):
622622 for scan_item in node .scan_list .items
623623 }
624624
625- # Workaround for PyArrow bug https://github.com/apache/arrow/issues/45262
626- # Convert JSON columns to strings before Polars processing
627- arrow_data = node .local_data_source .data
628- schema = arrow_data .schema
629-
630- # Check if any columns are JSON type
631- json_field_indices = [
632- i
633- for i , field in enumerate (schema )
634- if pa .types .is_extension_type (field .type )
635- and field .type .extension_name == "google:sqlType:json"
636- ]
637-
638- if json_field_indices :
639- # Convert JSON columns to string columns
640- new_arrays = []
641- new_fields = []
642- for i , field in enumerate (schema ):
643- if i in json_field_indices :
644- # Cast JSON to string
645- new_arrays .append (arrow_data .column (i ).cast (pa .string ()))
646- new_fields .append (pa .field (field .name , pa .string ()))
647- else :
648- new_arrays .append (arrow_data .column (i ))
649- new_fields .append (field )
650- arrow_data = pa .table (new_arrays , schema = pa .schema (new_fields ))
625+ if hasattr (node .local_data_source , "to_arrow" ):
626+ schema , batches = node .local_data_source .to_arrow (json_type = "string" )
627+ arrow_data = pa .Table .from_batches (batches , schema )
628+ else :
629+ arrow_data = node .local_data_source .data
651630
652631 lazy_frame = cast (pl .DataFrame , pl .from_arrow (arrow_data )).lazy ()
653632 lazy_frame = lazy_frame .select (cols_to_read .keys ()).rename (cols_to_read )
0 commit comments