@@ -267,17 +267,20 @@ def testAllWriteAndReadApi(self):
267267 table_write .close ()
268268 table_commit .close ()
269269
270+ all_data = pd .DataFrame ({
271+ 'f0' : [1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 ],
272+ 'f1' : ['a' , 'b' , 'c' , 'd' , 'e' , 'f' , 'g' , 'h' , 'i' ],
273+ })
274+ all_data ['f0' ] = all_data ['f0' ].astype ('int32' )
275+
270276 read_builder = table .new_read_builder ()
271277 table_scan = read_builder .new_scan ()
272278 table_read = read_builder .new_read ()
273279 splits = table_scan .plan ().splits ()
274280
275281 # to_arrow
276282 actual = table_read .to_arrow (splits )
277- expected = pa .Table .from_pydict ({
278- 'f0' : [1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 ],
279- 'f1' : ['a' , 'b' , 'c' , 'd' , 'e' , 'f' , 'g' , 'h' , 'i' ],
280- }, schema = self .simple_pa_schema )
283+ expected = pa .Table .from_pandas (all_data , schema = self .simple_pa_schema )
281284 self .assertEqual (actual , expected )
282285
283286 # to_arrow_batch_reader
@@ -286,18 +289,42 @@ def testAllWriteAndReadApi(self):
286289 for batch in table_read .to_arrow_batch_reader (splits )
287290 ]
288291 actual = pd .concat (data_frames )
289- expected = pd .DataFrame ({
290- 'f0' : [1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 ],
291- 'f1' : ['a' , 'b' , 'c' , 'd' , 'e' , 'f' , 'g' , 'h' , 'i' ],
292- })
293- expected ['f0' ] = expected ['f0' ].astype ('int32' )
294292 pd .testing .assert_frame_equal (
295- actual .reset_index (drop = True ), expected .reset_index (drop = True ))
293+ actual .reset_index (drop = True ), all_data .reset_index (drop = True ))
296294
297295 # to_pandas
298296 actual = table_read .to_pandas (splits )
299297 pd .testing .assert_frame_equal (
300- actual .reset_index (drop = True ), expected .reset_index (drop = True ))
298+ actual .reset_index (drop = True ), all_data .reset_index (drop = True ))
299+
300+ # to_duckdb
301+ duckdb_con = table_read .to_duckdb (splits , 'duckdb_table' )
302+ # select *
303+ result1 = duckdb_con .query ("SELECT * FROM duckdb_table" ).fetchdf ()
304+ pd .testing .assert_frame_equal (
305+ result1 .reset_index (drop = True ), all_data .reset_index (drop = True ))
306+ # select * where
307+ result2 = duckdb_con .query ("SELECT * FROM duckdb_table WHERE f0 < 4" ).fetchdf ()
308+ expected2 = pd .DataFrame ({
309+ 'f0' : [1 , 2 , 3 ],
310+ 'f1' : ['a' , 'b' , 'c' ]
311+ })
312+ expected2 ['f0' ] = expected2 ['f0' ].astype ('int32' )
313+ pd .testing .assert_frame_equal (
314+ result2 .reset_index (drop = True ), expected2 .reset_index (drop = True ))
315+ # select f0 where
316+ result3 = duckdb_con .query ("SELECT f0 FROM duckdb_table WHERE f0 < 4" ).fetchdf ()
317+ expected3 = pd .DataFrame ({
318+ 'f0' : [1 , 2 , 3 ]
319+ })
320+ expected3 ['f0' ] = expected3 ['f0' ].astype ('int32' )
321+ pd .testing .assert_frame_equal (
322+ result3 .reset_index (drop = True ), expected3 .reset_index (drop = True ))
323+
324+ # to_ray
325+ ray_dataset = table_read .to_ray (splits )
326+ pd .testing .assert_frame_equal (
327+ ray_dataset .to_pandas ().reset_index (drop = True ), all_data .reset_index (drop = True ))
301328
302329 def test_overwrite (self ):
303330 schema = Schema (self .simple_pa_schema , partition_keys = ['f0' ],
0 commit comments