@@ -333,21 +333,6 @@ async def export_by_token_ranges(
333333
334334 stats .end_time = time .time ()
335335
336- async def export_to_iceberg (
337- self ,
338- source_keyspace : str ,
339- source_table : str ,
340- iceberg_warehouse_path : str ,
341- iceberg_table : str ,
342- partition_by : list [str ] | None = None ,
343- split_count : int | None = None ,
344- batch_size : int = 10000 ,
345- progress_callback : Callable [[BulkOperationStats ], None ] | None = None ,
346- ) -> BulkOperationStats :
347- """Export Cassandra table to Iceberg format."""
348- # This will be implemented when we add Iceberg integration
349- raise NotImplementedError ("Iceberg export will be implemented in next phase" )
350-
351336 async def import_from_iceberg (
352337 self ,
353338 iceberg_warehouse_path : str ,
@@ -519,3 +504,69 @@ async def export_to_parquet(
519504 parallelism = parallelism ,
520505 progress_callback = progress_callback ,
521506 )
507+
508+ async def export_to_iceberg (
509+ self ,
510+ keyspace : str ,
511+ table : str ,
512+ namespace : str | None = None ,
513+ table_name : str | None = None ,
514+ catalog : Any | None = None ,
515+ catalog_config : dict [str , Any ] | None = None ,
516+ warehouse_path : str | Path | None = None ,
517+ partition_spec : Any | None = None ,
518+ table_properties : dict [str , str ] | None = None ,
519+ compression : str = "snappy" ,
520+ row_group_size : int = 100000 ,
521+ columns : list [str ] | None = None ,
522+ split_count : int | None = None ,
523+ parallelism : int | None = None ,
524+ progress_callback : Any | None = None ,
525+ ) -> Any :
526+ """Export table data to Apache Iceberg format.
527+
528+ This enables modern data lakehouse features like ACID transactions,
529+ time travel, and schema evolution.
530+
531+ Args:
532+ keyspace: Cassandra keyspace to export from
533+ table: Cassandra table to export
534+ namespace: Iceberg namespace (default: keyspace name)
535+ table_name: Iceberg table name (default: Cassandra table name)
536+ catalog: Pre-configured Iceberg catalog (optional)
537+ catalog_config: Custom catalog configuration (optional)
538+ warehouse_path: Path to Iceberg warehouse (for filesystem catalog)
539+ partition_spec: Iceberg partition specification
540+ table_properties: Additional Iceberg table properties
541+ compression: Parquet compression (default: snappy)
542+ row_group_size: Rows per Parquet file (default: 100000)
543+ columns: Columns to export (default: all)
544+ split_count: Number of token range splits
545+ parallelism: Max concurrent operations
546+ progress_callback: Progress callback function
547+
548+ Returns:
549+ ExportProgress with Iceberg metadata
550+ """
551+ from .iceberg import IcebergExporter
552+
553+ exporter = IcebergExporter (
554+ self ,
555+ catalog = catalog ,
556+ catalog_config = catalog_config ,
557+ warehouse_path = warehouse_path ,
558+ compression = compression ,
559+ row_group_size = row_group_size ,
560+ )
561+ return await exporter .export (
562+ keyspace = keyspace ,
563+ table = table ,
564+ namespace = namespace ,
565+ table_name = table_name ,
566+ partition_spec = partition_spec ,
567+ table_properties = table_properties ,
568+ columns = columns ,
569+ split_count = split_count ,
570+ parallelism = parallelism ,
571+ progress_callback = progress_callback ,
572+ )
0 commit comments