axonops
diff --git a/‎libs/async-cassandra-bulk/src/async_cassandra_bulk/operators/bulk_operator.py‎
Lines changed: 18 additions & 3 deletions b/‎libs/async-cassandra-bulk/src/async_cassandra_bulk/operators/bulk_operator.py‎
Lines changed: 18 additions & 3 deletions
diff --git a/‎libs/async-cassandra-bulk/src/async_cassandra_bulk/parallel_export.py‎
Lines changed: 64 additions & 20 deletions b/‎libs/async-cassandra-bulk/src/async_cassandra_bulk/parallel_export.py‎
Lines changed: 64 additions & 20 deletions
diff --git a/‎libs/async-cassandra-bulk/src/async_cassandra_bulk/serializers/writetime.py‎
Lines changed: 18 additions & 12 deletions b/‎libs/async-cassandra-bulk/src/async_cassandra_bulk/serializers/writetime.py‎
Lines changed: 18 additions & 12 deletions
@@ -110,13 +110,16 @@ def _parse_timestamp_to_micros(self, timestamp: Union[str, int, float, datetime]
             if timestamp < 0:
                 raise ValueError("Timestamp cannot be negative")
 
-            # Detect if it's seconds or milliseconds
+            # Detect if it's seconds, milliseconds, or microseconds
             # If timestamp is less than year 3000 in seconds, assume seconds
             if timestamp < 32503680000:  # Jan 1, 3000 in seconds
                 return int(timestamp * 1_000_000)
-            else:
-                # Assume milliseconds
+            # If timestamp is less than year 3000 in milliseconds, assume milliseconds
+            elif timestamp < 32503680000000:  # Jan 1, 3000 in milliseconds
                 return int(timestamp * 1_000)
+            else:
+                # Assume microseconds (already in the correct unit)
+                return int(timestamp)
 
         else:
             raise TypeError(f"Unsupported timestamp type: {type(timestamp)}")
@@ -212,6 +215,9 @@ async def export(
                 - writetime_before: Export rows where ANY column was written before this time
                 - writetime_filter_mode: "any" (default) or "all" - whether ANY or ALL
                   writetime columns must match the filter criteria
+                - include_ttl: Include TTL (time to live) for columns (default: False)
+                - ttl_columns: List of columns to get TTL for
+                  (default: None, use ["*"] for all non-key columns)
             csv_options: CSV-specific options
             json_options: JSON-specific options
             parquet_options: Parquet-specific options
@@ -264,6 +270,14 @@ async def export(
         if export_options.get("include_writetime") and not writetime_columns:
             # Default to all columns if include_writetime is True
             writetime_columns = ["*"]
+            # Update the options dict so validation sees it
+            export_options["writetime_columns"] = writetime_columns
+
+        # Extract TTL options
+        ttl_columns = export_options.get("ttl_columns")
+        if export_options.get("include_ttl") and not ttl_columns:
+            # Default to all columns if include_ttl is True
+            ttl_columns = ["*"]
 
         # Validate writetime options
         self._validate_writetime_options(export_options)
@@ -287,6 +301,7 @@ async def export(
             resume_from=resume_from,
             columns=columns,
             writetime_columns=writetime_columns,
+            ttl_columns=ttl_columns,
             writetime_after_micros=writetime_after_micros,
             writetime_before_micros=writetime_before_micros,
             writetime_filter_mode=writetime_filter_mode,
 
@@ -45,6 +45,7 @@ def __init__(
         resume_from: Optional[Dict[str, Any]] = None,
         columns: Optional[List[str]] = None,
         writetime_columns: Optional[List[str]] = None,
+        ttl_columns: Optional[List[str]] = None,
         writetime_after_micros: Optional[int] = None,
         writetime_before_micros: Optional[int] = None,
         writetime_filter_mode: str = "any",
@@ -64,6 +65,7 @@ def __init__(
             resume_from: Previous checkpoint to resume from
             columns: Optional list of columns to export (default: all)
             writetime_columns: Optional list of columns to get writetime for
+            ttl_columns: Optional list of columns to get TTL for
             writetime_after_micros: Only export rows with writetime after this (microseconds)
             writetime_before_micros: Only export rows with writetime before this (microseconds)
             writetime_filter_mode: "any" or "all" - how to combine writetime filters
@@ -79,6 +81,7 @@ def __init__(
         self.resume_from = resume_from
         self.columns = columns
         self.writetime_columns = writetime_columns
+        self.ttl_columns = ttl_columns
         self.writetime_after_micros = writetime_after_micros
         self.writetime_before_micros = writetime_before_micros
         self.writetime_filter_mode = writetime_filter_mode
@@ -129,6 +132,11 @@ def _load_checkpoint(self, checkpoint: Dict[str, Any]) -> None:
                     f"Writetime columns changed from {config['writetime_columns']} to {self.writetime_columns}"
                 )
 
+            if config.get("ttl_columns") != self.ttl_columns:
+                logger.warning(
+                    f"TTL columns changed from {config['ttl_columns']} to {self.ttl_columns}"
+                )
+
             # Check writetime filter changes
             if config.get("writetime_after_micros") != self.writetime_after_micros:
                 logger.warning(
@@ -220,11 +228,19 @@ def _should_filter_row(self, row_dict: Dict[str, Any]) -> bool:
                 else:
                     writetime_values.append(value)
 
+        # DEBUG
+        if row_dict.get("id") == 4:
+            logger.info(f"DEBUG: Row 4 writetime values: {writetime_values}")
+            logger.info(f"DEBUG: Filtering with after={self.writetime_after_micros}")
+            logger.info(f"DEBUG: Row 4 full dict keys: {list(row_dict.keys())}")
+            wt_entries = {k: v for k, v in row_dict.items() if "_writetime" in k}
+            logger.info(f"DEBUG: Row 4 writetime entries: {wt_entries}")
+
         if not writetime_values:
-            # No writetime values found - this shouldn't happen if writetime filtering is enabled
-            # but if it does, we'll include the row to be safe
-            logger.warning("No writetime values found in row for filtering")
-            return False
+            # No writetime values found - all columns are NULL or primary keys
+            # When filtering by writetime, rows with no writetime values should be excluded
+            # as they cannot match any writetime criteria
+            return True  # Filter out the row
 
         # Apply filtering based on mode
         if self.writetime_filter_mode == "any":
@@ -290,6 +306,7 @@ async def _export_range(self, token_range: TokenRange, stats: BulkOperationStats
                     ),
                     self._resolved_columns or self.columns,
                     self.writetime_columns,
+                    self.ttl_columns,
                     clustering_keys,
                     counter_columns,
                 )
@@ -302,7 +319,10 @@ async def _export_range(self, token_range: TokenRange, stats: BulkOperationStats
                         row_dict[field] = getattr(row, field)
 
                     # Apply writetime filtering if enabled
-                    if not self._should_filter_row(row_dict):
+                    should_filter = self._should_filter_row(row_dict)
+                    if row_dict.get("id") == 4:
+                        logger.info(f"DEBUG: Row 4 should_filter={should_filter}")
+                    if not should_filter:
                         await self.exporter.write_row(row_dict)
                         row_count += 1
                         stats.rows_processed += 1
@@ -315,6 +335,7 @@ async def _export_range(self, token_range: TokenRange, stats: BulkOperationStats
                     TokenRange(start=MIN_TOKEN, end=token_range.end, replicas=token_range.replicas),
                     self._resolved_columns or self.columns,
                     self.writetime_columns,
+                    self.ttl_columns,
                     clustering_keys,
                     counter_columns,
                 )
@@ -327,7 +348,10 @@ async def _export_range(self, token_range: TokenRange, stats: BulkOperationStats
                         row_dict[field] = getattr(row, field)
 
                     # Apply writetime filtering if enabled
-                    if not self._should_filter_row(row_dict):
+                    should_filter = self._should_filter_row(row_dict)
+                    if row_dict.get("id") == 4:
+                        logger.info(f"DEBUG: Row 4 should_filter={should_filter}")
+                    if not should_filter:
                         await self.exporter.write_row(row_dict)
                         row_count += 1
                         stats.rows_processed += 1
@@ -340,6 +364,7 @@ async def _export_range(self, token_range: TokenRange, stats: BulkOperationStats
                     token_range,
                     self._resolved_columns or self.columns,
                     self.writetime_columns,
+                    self.ttl_columns,
                     clustering_keys,
                     counter_columns,
                 )
@@ -352,7 +377,10 @@ async def _export_range(self, token_range: TokenRange, stats: BulkOperationStats
                         row_dict[field] = getattr(row, field)
 
                     # Apply writetime filtering if enabled
-                    if not self._should_filter_row(row_dict):
+                    should_filter = self._should_filter_row(row_dict)
+                    if row_dict.get("id") == 4:
+                        logger.info(f"DEBUG: Row 4 should_filter={should_filter}")
+                    if not should_filter:
                         await self.exporter.write_row(row_dict)
                         row_count += 1
                         stats.rows_processed += 1
@@ -424,6 +452,7 @@ async def _save_checkpoint(self, stats: BulkOperationStats) -> None:
                 "table": self.table,
                 "columns": self.columns,
                 "writetime_columns": self.writetime_columns,
+                "ttl_columns": self.ttl_columns,
                 "batch_size": self.batch_size,
                 "concurrency": self.concurrency,
                 "writetime_after_micros": self.writetime_after_micros,
@@ -527,21 +556,22 @@ async def export(self) -> BulkOperationStats:
 
             # Write header including writetime columns
             header_columns = columns.copy()
-            if self.writetime_columns:
-                # Get key columns and counter columns to exclude
-                cluster = self.session._session.cluster
-                metadata = cluster.metadata
-                table_meta = metadata.keyspaces[self.keyspace].tables[self.table_name]
-                partition_keys = {col.name for col in table_meta.partition_key}
-                clustering_keys = {col.name for col in table_meta.clustering_key}
-                key_columns = partition_keys | clustering_keys
 
-                # Get counter columns (they don't support writetime)
-                counter_columns = set()
-                for col_name, col_meta in table_meta.columns.items():
-                    if col_meta.cql_type == "counter":
-                        counter_columns.add(col_name)
+            # Get key columns and counter columns to exclude (needed for both writetime and TTL)
+            cluster = self.session._session.cluster
+            metadata = cluster.metadata
+            table_meta = metadata.keyspaces[self.keyspace].tables[self.table_name]
+            partition_keys = {col.name for col in table_meta.partition_key}
+            clustering_keys = {col.name for col in table_meta.clustering_key}
+            key_columns = partition_keys | clustering_keys
+
+            # Get counter columns (they don't support writetime or TTL)
+            counter_columns = set()
+            for col_name, col_meta in table_meta.columns.items():
+                if col_meta.cql_type == "counter":
+                    counter_columns.add(col_name)
 
+            if self.writetime_columns:
                 # Add writetime columns to header
                 if self.writetime_columns == ["*"]:
                     # Add writetime for all non-key, non-counter columns
@@ -554,6 +584,20 @@ async def export(self) -> BulkOperationStats:
                         if col not in key_columns and col not in counter_columns:
                             header_columns.append(f"{col}_writetime")
 
+            # Add TTL columns to header
+            if self.ttl_columns:
+                # TTL uses same exclusions as writetime
+                if self.ttl_columns == ["*"]:
+                    # Add TTL for all non-key, non-counter columns
+                    for col in columns:
+                        if col not in key_columns and col not in counter_columns:
+                            header_columns.append(f"{col}_ttl")
+                else:
+                    # Add TTL for specific columns (excluding keys and counters)
+                    for col in self.ttl_columns:
+                        if col not in key_columns and col not in counter_columns:
+                            header_columns.append(f"{col}_ttl")
+
             # Write header only if not resuming
             if not self._header_written:
                 await self.exporter.write_header(header_columns)
 
@@ -45,18 +45,24 @@ def serialize(self, value: Any, context: SerializationContext) -> Any:
             else:
                 return None
 
-        # Convert microseconds to datetime
-        # Cassandra writetime is microseconds since epoch
-        timestamp = datetime.fromtimestamp(value / 1_000_000, tz=timezone.utc)
-
-        if context.format == "csv":
-            # For CSV, use configurable format or ISO
-            fmt = context.options.get("writetime_format")
-            if fmt is None:
-                fmt = "%Y-%m-%d %H:%M:%S.%f"
-            return timestamp.strftime(fmt)
-        elif context.format == "json":
-            # For JSON, use ISO format with timezone
+        # Check if raw writetime values are requested
+        if context.options.get("writetime_raw", False):
+            # Return raw microsecond value for exact precision
+            return value
+
+        # For maximum precision, we need to handle large microsecond values carefully
+        # Python's datetime has limitations with very large timestamps
+
+        if context.format in ("csv", "json"):
+            # Convert to seconds and microseconds separately to avoid float precision loss
+            seconds = value // 1_000_000
+            microseconds = value % 1_000_000
+
+            # Create datetime from seconds, then adjust microseconds
+            timestamp = datetime.fromtimestamp(seconds, tz=timezone.utc)
+            timestamp = timestamp.replace(microsecond=microseconds)
+
+            # Return ISO format for both CSV and JSON
             return timestamp.isoformat()
         else:
             # For other formats, return as-is