@@ -297,3 +297,77 @@ def testAllWriteAndReadApi(self):
297297 actual = table_read .to_pandas (splits )
298298 pd .testing .assert_frame_equal (
299299 actual .reset_index (drop = True ), expected .reset_index (drop = True ))
300+
301+ def test_overwrite (self ):
302+ schema = Schema (self .simple_pa_schema , partition_keys = ['f0' ],
303+ options = {'dynamic-partition-overwrite' : 'false' })
304+ self .catalog .create_table ('default.test_overwrite' , schema , False )
305+ table = self .catalog .get_table ('default.test_overwrite' )
306+ read_builder = table .new_read_builder ()
307+
308+ write_builder = table .new_batch_write_builder ()
309+ table_write = write_builder .new_write ()
310+ table_commit = write_builder .new_commit ()
311+
312+ df0 = pd .DataFrame ({
313+ 'f0' : [1 , 1 , 2 , 2 ],
314+ 'f1' : ['apple' , 'banana' , 'dog' , 'cat' ],
315+ })
316+
317+ table_write .write_pandas (df0 )
318+ table_commit .commit (table_write .prepare_commit ())
319+ table_write .close ()
320+ table_commit .close ()
321+
322+ table_scan = read_builder .new_scan ()
323+ table_read = read_builder .new_read ()
324+ actual_df0 = table_read .to_pandas (table_scan .plan ().splits ()).sort_values (by = 'f0' )
325+ df0 ['f0' ] = df0 ['f0' ].astype ('int32' )
326+ pd .testing .assert_frame_equal (
327+ actual_df0 .reset_index (drop = True ), df0 .reset_index (drop = True ))
328+
329+ write_builder = table .new_batch_write_builder ().overwrite ({'f0' : '1' })
330+ table_write = write_builder .new_write ()
331+ table_commit = write_builder .new_commit ()
332+
333+ df1 = pd .DataFrame ({
334+ 'f0' : [1 ],
335+ 'f1' : ['watermelon' ],
336+ })
337+
338+ table_write .write_pandas (df1 )
339+ table_commit .commit (table_write .prepare_commit ())
340+ table_write .close ()
341+ table_commit .close ()
342+
343+ table_scan = read_builder .new_scan ()
344+ table_read = read_builder .new_read ()
345+ actual_df1 = table_read .to_pandas (table_scan .plan ().splits ())
346+ expected_df1 = pd .DataFrame ({
347+ 'f0' : [2 , 2 , 1 ],
348+ 'f1' : ['dog' , 'cat' , 'watermelon' ]
349+ })
350+ expected_df1 ['f0' ] = expected_df1 ['f0' ].astype ('int32' )
351+ pd .testing .assert_frame_equal (
352+ actual_df1 .reset_index (drop = True ), expected_df1 .reset_index (drop = True ))
353+
354+ write_builder = table .new_batch_write_builder ().overwrite ()
355+ table_write = write_builder .new_write ()
356+ table_commit = write_builder .new_commit ()
357+
358+ df2 = pd .DataFrame ({
359+ 'f0' : [3 ],
360+ 'f1' : ['Neo' ],
361+ })
362+
363+ table_write .write_pandas (df2 )
364+ table_commit .commit (table_write .prepare_commit ())
365+ table_write .close ()
366+ table_commit .close ()
367+
368+ table_scan = read_builder .new_scan ()
369+ table_read = read_builder .new_read ()
370+ actual_df2 = table_read .to_pandas (table_scan .plan ().splits ())
371+ df2 ['f0' ] = df2 ['f0' ].astype ('int32' )
372+ pd .testing .assert_frame_equal (
373+ actual_df2 .reset_index (drop = True ), df2 .reset_index (drop = True ))
0 commit comments