@@ -342,7 +342,7 @@ def append(con: duckdb.DuckDBPyConnection, tabledef, msgs):
342342 for msg in msgs :
343343 try :
344344 values = msg .data
345- # key_value format
345+ # key_value
346346 if fmt == "key_value" :
347347 obj = getobj (topic , values )
348348 set_default (msg , tabledef , obj , add_nullable = True )
@@ -356,7 +356,7 @@ def append(con: duckdb.DuckDBPyConnection, tabledef, msgs):
356356
357357 fields = [obj [f ] for f in tblfields ]
358358
359- # dataframe format
359+ # dataframe
360360 elif fmt == "dataframe" :
361361 df = tag2df (values [0 ])
362362 if list (df .columns ) != tblfields :
@@ -424,90 +424,66 @@ def schema_to_polars(tabledef: Table):
424424 return schema
425425
426426
427- def upsert (con : duckdb .DuckDBPyConnection , tabledef , messages ):
428- tname = tabledef .table
429- fmt = tabledef .format
430- col_names = columns (tabledef ) + list (tabledef .extras .values ())
431- indexes = list (tabledef .keys )
432- records = []
433- dataframes = []
427+ def add_extras (table , msg , obj ):
428+ if "recv_ts" in table .extras :
429+ obj [table .extras ["recv_ts" ]] = msg .recvts
430+ if "slot" in table .extras :
431+ obj [table .extras ["slot" ]] = msg .slot
434432
435- # Build final DataFrame (tdf)
436- for msg in messages :
437- try :
438- values = msg .data
439433
440- # key_value
441- if fmt == "key_value" :
442- obj = getobj ( tname , values )
443- set_default ( msg , tabledef , obj , add_nullable = True )
434+ def handle_key_value ( msg , table , col_names , records , tname ):
435+ obj = getobj ( tname , msg . data )
436+ set_default ( msg , table , obj , add_nullable = True )
437+ add_extras ( table , msg , obj )
444438
445- if "recv_ts" in tabledef .extras :
446- obj [tabledef .extras ["recv_ts" ]] = msg .recvts
447- if "slot" in tabledef .extras :
448- obj [tabledef .extras ["slot" ]] = msg .slot
439+ if all (k in obj for k in col_names ):
440+ records .append (obj )
441+ else :
442+ logger .warning (
443+ "[%s] unsaved %s missing required fields %s" , tname , obj , col_names
444+ )
449445
450- # Ensure all fields exist
451- if all (k in obj for k in col_names ):
452- records .append (obj )
453- else :
454- logger .warning (
455- "[%s] unsaved %s missing required fields %s" ,
456- tname ,
457- obj ,
458- col_names ,
459- )
460- continue
461446
462- # dataframe
463- elif fmt == "dataframe" :
464- df = tag2df (values [0 ])
465- df = df_extras (tabledef , df , msg )
466- if list (df .columns ) == col_names :
467- dataframes .append (df )
468- else :
469- logger .warning ("[%s] unsaved df (mismatched fields)" , tname )
470- continue
447+ def handle_dataframe (msg , table , col_names , dataframes , tname ):
448+ df = tag2df (msg .data [0 ])
449+ df = df_extras (table , df , msg )
471450
472- # default format
473- else :
474- vals = values
475- if len (vals ) == len (tabledef .columns ):
476- extra_vals = extras (tabledef , msg )
477- all_vals = vals + extra_vals
478- records .append (all_vals )
479- else :
480- logger .warning (
481- "[%s] unsaved %s with mismatched fields" , tname , vals
482- )
483- continue
484- except Exception as e :
485- logger .error ("[upsert] %s: %s" , tname , e )
451+ if list (df .columns ) == col_names :
452+ dataframes .append (df )
453+ else :
454+ logger .warning ("[%s] unsaved df (mismatched fields)" , tname )
455+
456+
457+ def handle_default (msg , table , col_names , records , tname ):
458+ vals = msg .data
459+ if len (vals ) != len (table .columns ):
460+ logger .warning ("[%s] unsaved %s with mismatched fields" , tname , vals )
461+ return
462+
463+ extra_vals = extras (table , msg )
464+ records .append (vals + extra_vals )
465+
466+
467+ def execute_upsert (con , table , col_names , indexes , records , dataframes ):
468+ tname = table .table
486469
487- tdf = pl .DataFrame (records , schema = schema_to_polars (tabledef ), orient = "row" )
470+ tdf = pl .DataFrame (records , schema = schema_to_polars (table ), orient = "row" )
488471 tdf = pl .concat ([tdf , * dataframes ], how = "vertical" )
472+
489473 if tdf .is_empty ():
490474 return
491475
492476 if indexes :
493- tdf = (
494- tdf .sort (indexes ) # Sort so last row is last
495- .group_by (indexes )
496- .agg ([pl .all ().last ()]) # Take last row of each group
497- )
498-
499- # logger.debug("[%s] upserting dataframe:\n%s", tname, tdf)
477+ tdf = tdf .sort (indexes ).group_by (indexes ).agg ([pl .all ().last ()])
500478
501479 con .register ("df_view" , tdf )
502480
503- conds = [f"df_view.{ k } = { tname } .{ k } " for k in indexes ]
504- cond_str = " AND " .join (conds )
505-
481+ cond_str = " AND " .join (f"df_view.{ k } = { tname } .{ k } " for k in indexes )
506482 col_list = ", " .join (col_names )
507483 val_list = ", " .join (f"df_view.{ c } " for c in col_names )
508484
509- update_columns = [c for c in col_names if c not in indexes ]
510- update_list = ", " .join (f"{ c } = df_view.{ c } " for c in update_columns )
485+ update_cols = [c for c in col_names if c not in indexes ]
486+ update_list = ", " .join (f"{ c } = df_view.{ c } " for c in update_cols )
511487
512488 sql = f"""
513489 MERGE INTO { tname }
@@ -521,6 +497,33 @@ def upsert(con: duckdb.DuckDBPyConnection, tabledef, messages):
521497 con .unregister ("df_view" )
522498
523499
500+ def upsert (con : duckdb .DuckDBPyConnection , table , messages ):
501+ """Insert/update a record in a table with keywords."""
502+ tname = table .table
503+ fmt = table .format
504+ col_names = columns (table ) + list (table .extras .values ())
505+ indexes = list (table .keys )
506+
507+ records = []
508+ dataframes = []
509+
510+ for msg in messages :
511+ try :
512+ if fmt == "key_value" :
513+ handle_key_value (msg , table , col_names , records , tname )
514+
515+ elif fmt == "dataframe" :
516+ handle_dataframe (msg , table , col_names , dataframes , tname )
517+
518+ else :
519+ handle_default (msg , table , col_names , records , tname )
520+
521+ except Exception as e : # pylint: disable=broad-except
522+ logger .error ("[upsert] %s: %s" , tname , e )
523+
524+ execute_upsert (con , table , col_names , indexes , records , dataframes )
525+
526+
524527def expand (msg : PubSubMsg ):
525528 """
526529 Given a message `msg` containing a topic string
0 commit comments