diff --git a/.github/workflows/ci-monorepo.yml b/.github/workflows/ci-monorepo.yml
index a37ecd2..9c30edb 100644
--- a/.github/workflows/ci-monorepo.yml
+++ b/.github/workflows/ci-monorepo.yml
@@ -209,7 +209,7 @@ jobs:
           - name: "BDD Tests"
             command: "pytest tests/bdd -v"
           - name: "Example App"
-            command: "cd ../../examples/fastapi_app && pytest tests/ -v"
+            command: "cd examples/fastapi_app && pytest tests/ -v"
 
     services:
       cassandra:
diff --git a/libs/async-cassandra-bulk/examples/Makefile b/libs/async-cassandra-bulk/examples/Makefile
deleted file mode 100644
index 2f2a0e7..0000000
--- a/libs/async-cassandra-bulk/examples/Makefile
+++ /dev/null
@@ -1,121 +0,0 @@
-.PHONY: help install dev-install test test-unit test-integration lint format type-check clean docker-up docker-down run-example
-
-# Default target
-.DEFAULT_GOAL := help
-
-help: ## Show this help message
-	@echo "Available commands:"
-	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}'
-
-install: ## Install production dependencies
-	pip install -e .
-
-dev-install: ## Install development dependencies
-	pip install -e ".[dev]"
-
-test: ## Run all tests
-	pytest -v
-
-test-unit: ## Run unit tests only
-	pytest -v -m unit
-
-test-integration: ## Run integration tests (requires Cassandra cluster)
-	./run_integration_tests.sh
-
-test-integration-only: ## Run integration tests without managing cluster
-	pytest -v -m integration
-
-test-slow: ## Run slow tests
-	pytest -v -m slow
-
-lint: ## Run linting checks
-	ruff check .
-	black --check .
-
-format: ## Format code
-	black .
-	ruff check --fix .
-
-type-check: ## Run type checking
-	mypy bulk_operations tests
-
-clean: ## Clean up generated files
-	rm -rf build/ dist/ *.egg-info/
-	rm -rf .pytest_cache/ .coverage htmlcov/
-	rm -rf iceberg_warehouse/
-	find . -type d -name __pycache__ -exec rm -rf {} +
-	find . -type f -name "*.pyc" -delete
-
-# Container runtime detection
-CONTAINER_RUNTIME ?= $(shell which docker >/dev/null 2>&1 && echo docker || which podman >/dev/null 2>&1 && echo podman)
-ifeq ($(CONTAINER_RUNTIME),podman)
-    COMPOSE_CMD = podman-compose
-else
-    COMPOSE_CMD = docker-compose
-endif
-
-docker-up: ## Start 3-node Cassandra cluster
-	$(COMPOSE_CMD) up -d
-	@echo "Waiting for Cassandra cluster to be ready..."
-	@sleep 30
-	@$(CONTAINER_RUNTIME) exec cassandra-1 cqlsh -e "DESCRIBE CLUSTER" || (echo "Cluster not ready, waiting more..." && sleep 30)
-	@echo "Cassandra cluster is ready!"
-
-docker-down: ## Stop and remove Cassandra cluster
-	$(COMPOSE_CMD) down -v
-
-docker-logs: ## Show Cassandra logs
-	$(COMPOSE_CMD) logs -f
-
-# Cassandra cluster management
-cassandra-up: ## Start 3-node Cassandra cluster
-	$(COMPOSE_CMD) up -d
-
-cassandra-down: ## Stop and remove Cassandra cluster
-	$(COMPOSE_CMD) down -v
-
-cassandra-wait: ## Wait for Cassandra to be ready
-	@echo "Waiting for Cassandra cluster to be ready..."
-	@for i in {1..30}; do \
-		if $(CONTAINER_RUNTIME) exec bulk-cassandra-1 cqlsh -e "SELECT now() FROM system.local" >/dev/null 2>&1; then \
-			echo "Cassandra is ready!"; \
-			break; \
-		fi; \
-		echo "Waiting for Cassandra... ($$i/30)"; \
-		sleep 5; \
-	done
-
-cassandra-logs: ## Show Cassandra logs
-	$(COMPOSE_CMD) logs -f
-
-# Example commands
-example-count: ## Run bulk count example
-	@echo "Running bulk count example..."
-	python example_count.py
-
-example-export: ## Run export to Iceberg example (not yet implemented)
-	@echo "Export example not yet implemented"
-	# python example_export.py
-
-example-import: ## Run import from Iceberg example (not yet implemented)
-	@echo "Import example not yet implemented"
-	# python example_import.py
-
-# Quick demo
-demo: cassandra-up cassandra-wait example-count ## Run quick demo with count example
-
-# Development workflow
-dev-setup: dev-install docker-up ## Complete development setup
-
-ci: lint type-check test-unit ## Run CI checks (no integration tests)
-
-# Vnode validation
-validate-vnodes: cassandra-up cassandra-wait ## Validate vnode token distribution
-	@echo "Checking vnode configuration..."
-	@$(CONTAINER_RUNTIME) exec bulk-cassandra-1 nodetool info | grep "Token"
-	@echo ""
-	@echo "Token ownership by node:"
-	@$(CONTAINER_RUNTIME) exec bulk-cassandra-1 nodetool ring | grep "^[0-9]" | awk '{print $$8}' | sort | uniq -c
-	@echo ""
-	@echo "Sample token ranges (first 10):"
-	@$(CONTAINER_RUNTIME) exec bulk-cassandra-1 nodetool describering test 2>/dev/null | grep "TokenRange" | head -10 || echo "Create test keyspace first"
diff --git a/libs/async-cassandra-bulk/examples/README.md b/libs/async-cassandra-bulk/examples/README.md
deleted file mode 100644
index 8399851..0000000
--- a/libs/async-cassandra-bulk/examples/README.md
+++ /dev/null
@@ -1,225 +0,0 @@
-# Token-Aware Bulk Operations Example
-
-This example demonstrates how to perform efficient bulk operations on Apache Cassandra using token-aware parallel processing, similar to DataStax Bulk Loader (DSBulk).
-
-## 🚀 Features
-
-- **Token-aware operations**: Leverages Cassandra's token ring for parallel processing
-- **Streaming exports**: Memory-efficient data export using async generators
-- **Progress tracking**: Real-time progress updates during operations
-- **Multi-node support**: Automatically distributes work across cluster nodes
-- **Multiple export formats**: CSV, JSON, and Parquet with compression support ✅
-- **Apache Iceberg integration**: Export Cassandra data to the modern lakehouse format (coming in Phase 3)
-
-## 📋 Prerequisites
-
-- Python 3.12+
-- Docker or Podman (for running Cassandra)
-- 30GB+ free disk space (for 3-node cluster)
-- 32GB+ RAM recommended
-
-## 🛠️ Installation
-
-1. **Install the example with dependencies:**
-   ```bash
-   pip install -e .
-   ```
-
-2. **Install development dependencies (optional):**
-   ```bash
-   make dev-install
-   ```
-
-## 🎯 Quick Start
-
-1. **Start a 3-node Cassandra cluster:**
-   ```bash
-   make cassandra-up
-   make cassandra-wait
-   ```
-
-2. **Run the bulk count demo:**
-   ```bash
-   make demo
-   ```
-
-3. **Stop the cluster when done:**
-   ```bash
-   make cassandra-down
-   ```
-
-## 📖 Examples
-
-### Basic Bulk Count
-
-Count all rows in a table using token-aware parallel processing:
-
-```python
-from async_cassandra import AsyncCluster
-from bulk_operations.bulk_operator import TokenAwareBulkOperator
-
-async with AsyncCluster(['localhost']) as cluster:
-    async with cluster.connect() as session:
-        operator = TokenAwareBulkOperator(session)
-
-        # Count with automatic parallelism
-        count = await operator.count_by_token_ranges(
-            keyspace="my_keyspace",
-            table="my_table"
-        )
-        print(f"Total rows: {count:,}")
-```
-
-### Count with Progress Tracking
-
-```python
-def progress_callback(stats):
-    print(f"Progress: {stats.progress_percentage:.1f}% "
-          f"({stats.rows_processed:,} rows, "
-          f"{stats.rows_per_second:,.0f} rows/sec)")
-
-count, stats = await operator.count_by_token_ranges_with_stats(
-    keyspace="my_keyspace",
-    table="my_table",
-    split_count=32,  # Use 32 parallel ranges
-    progress_callback=progress_callback
-)
-```
-
-### Streaming Export
-
-Export large tables without loading everything into memory:
-
-```python
-async for row in operator.export_by_token_ranges(
-    keyspace="my_keyspace",
-    table="my_table",
-    split_count=16
-):
-    # Process each row as it arrives
-    process_row(row)
-```
-
-## 🏗️ Architecture
-
-### Token Range Discovery
-The operator discovers natural token ranges from the cluster topology and can further split them for increased parallelism.
-
-### Parallel Execution
-Multiple token ranges are queried concurrently, with configurable parallelism limits to prevent overwhelming the cluster.
-
-### Streaming Results
-Data is streamed using async generators, ensuring constant memory usage regardless of dataset size.
-
-## 🧪 Testing
-
-Run the test suite:
-
-```bash
-# Unit tests only
-make test-unit
-
-# All tests (requires running Cassandra)
-make test
-
-# With coverage report
-pytest --cov=bulk_operations --cov-report=html
-```
-
-## 🔧 Configuration
-
-### Split Count
-Controls the number of token ranges to process in parallel:
-- **Default**: 4 × number of nodes
-- **Higher values**: More parallelism, higher resource usage
-- **Lower values**: Less parallelism, more stable
-
-### Parallelism
-Controls concurrent query execution:
-- **Default**: 2 × number of nodes
-- **Adjust based on**: Cluster capacity, network bandwidth
-
-## 📊 Performance
-
-Example performance on a 3-node cluster:
-
-| Operation | Rows | Split Count | Time | Rate |
-|-----------|------|-------------|------|------|
-| Count | 1M | 1 | 45s | 22K/s |
-| Count | 1M | 8 | 12s | 83K/s |
-| Count | 1M | 32 | 6s | 167K/s |
-| Export | 10M | 16 | 120s | 83K/s |
-
-## 🎓 How It Works
-
-1. **Token Range Discovery**
-   - Query cluster metadata for natural token ranges
-   - Each range has start/end tokens and replica nodes
-   - With vnodes (256 per node), expect ~768 ranges in a 3-node cluster
-
-2. **Range Splitting**
-   - Split ranges proportionally based on size
-   - Larger ranges get more splits for balance
-   - Small vnode ranges may not split further
-
-3. **Parallel Execution**
-   - Execute queries for each range concurrently
-   - Use semaphore to limit parallelism
-   - Queries use `token()` function: `WHERE token(pk) > X AND token(pk) <= Y`
-
-4. **Result Aggregation**
-   - Stream results as they arrive
-   - Track progress and statistics
-   - No duplicates due to exclusive range boundaries
-
-## 🔍 Understanding Vnodes
-
-Our test cluster uses 256 virtual nodes (vnodes) per physical node. This means:
-
-- Each physical node owns 256 non-contiguous token ranges
-- Token ownership is distributed evenly across the ring
-- Smaller ranges mean better load distribution but more metadata
-
-To visualize token distribution:
-```bash
-python visualize_tokens.py
-```
-
-To validate vnodes configuration:
-```bash
-make validate-vnodes
-```
-
-## 🧪 Integration Testing
-
-The integration tests validate our token handling against a real Cassandra cluster:
-
-```bash
-# Run all integration tests with cluster management
-make test-integration
-
-# Run integration tests only (cluster must be running)
-make test-integration-only
-```
-
-Key integration tests:
-- **Token range discovery**: Validates all vnodes are discovered
-- **Nodetool comparison**: Compares with `nodetool describering` output
-- **Data coverage**: Ensures no rows are missed or duplicated
-- **Performance scaling**: Verifies parallel execution benefits
-
-## 📚 References
-
-- [DataStax Bulk Loader (DSBulk)](https://docs.datastax.com/en/dsbulk/docs/)
-- [Cassandra Token Ranges](https://cassandra.apache.org/doc/latest/cassandra/architecture/dynamo.html#consistent-hashing-using-a-token-ring)
-- [Apache Iceberg](https://iceberg.apache.org/)
-
-## ⚠️ Important Notes
-
-1. **Memory Usage**: While streaming reduces memory usage, the thread pool and connection pool still consume resources
-
-2. **Network Bandwidth**: Bulk operations can saturate network links. Monitor and adjust parallelism accordingly.
-
-3. **Cluster Impact**: High parallelism can impact cluster performance. Test in non-production first.
-
-4. **Token Ranges**: The implementation assumes Murmur3Partitioner (Cassandra default).
diff --git a/libs/async-cassandra-bulk/examples/bulk_operations/__init__.py b/libs/async-cassandra-bulk/examples/bulk_operations/__init__.py
deleted file mode 100644
index 467d6d5..0000000
--- a/libs/async-cassandra-bulk/examples/bulk_operations/__init__.py
+++ /dev/null
@@ -1,18 +0,0 @@
-"""
-Token-aware bulk operations for Apache Cassandra using async-cassandra.
-
-This package provides efficient, parallel bulk operations by leveraging
-Cassandra's token ranges for data distribution.
-"""
-
-__version__ = "0.1.0"
-
-from .bulk_operator import BulkOperationStats, TokenAwareBulkOperator
-from .token_utils import TokenRange, TokenRangeSplitter
-
-__all__ = [
-    "TokenAwareBulkOperator",
-    "BulkOperationStats",
-    "TokenRange",
-    "TokenRangeSplitter",
-]
diff --git a/libs/async-cassandra-bulk/examples/bulk_operations/bulk_operator.py b/libs/async-cassandra-bulk/examples/bulk_operations/bulk_operator.py
deleted file mode 100644
index 2d502cb..0000000
--- a/libs/async-cassandra-bulk/examples/bulk_operations/bulk_operator.py
+++ /dev/null
@@ -1,566 +0,0 @@
-"""
-Token-aware bulk operator for parallel Cassandra operations.
-"""
-
-import asyncio
-import time
-from collections.abc import AsyncIterator, Callable
-from pathlib import Path
-from typing import Any
-
-from cassandra import ConsistencyLevel
-
-from async_cassandra import AsyncCassandraSession
-
-from .parallel_export import export_by_token_ranges_parallel
-from .stats import BulkOperationStats
-from .token_utils import TokenRange, TokenRangeSplitter, discover_token_ranges
-
-
-class BulkOperationError(Exception):
-    """Error during bulk operation."""
-
-    def __init__(
-        self, message: str, partial_result: Any = None, errors: list[Exception] | None = None
-    ):
-        super().__init__(message)
-        self.partial_result = partial_result
-        self.errors = errors or []
-
-
-class TokenAwareBulkOperator:
-    """Performs bulk operations using token ranges for parallelism.
-
-    This class uses prepared statements for all token range queries to:
-    - Improve performance through query plan caching
-    - Provide protection against injection attacks
-    - Ensure type safety and validation
-    - Follow Cassandra best practices
-
-    Token range boundaries are passed as parameters to prepared statements,
-    not embedded in the query string.
-    """
-
-    def __init__(self, session: AsyncCassandraSession):
-        self.session = session
-        self.splitter = TokenRangeSplitter()
-        self._prepared_statements: dict[str, dict[str, Any]] = {}
-
-    async def _get_prepared_statements(
-        self, keyspace: str, table: str, partition_keys: list[str]
-    ) -> dict[str, Any]:
-        """Get or prepare statements for token range queries."""
-        pk_list = ", ".join(partition_keys)
-        key = f"{keyspace}.{table}"
-
-        if key not in self._prepared_statements:
-            # Prepare all the statements we need for this table
-            self._prepared_statements[key] = {
-                "count_range": await self.session.prepare(
-                    f"""
-                    SELECT COUNT(*) FROM {keyspace}.{table}
-                    WHERE token({pk_list}) > ?
-                    AND token({pk_list}) <= ?
-                """
-                ),
-                "count_wraparound_gt": await self.session.prepare(
-                    f"""
-                    SELECT COUNT(*) FROM {keyspace}.{table}
-                    WHERE token({pk_list}) > ?
-                """
-                ),
-                "count_wraparound_lte": await self.session.prepare(
-                    f"""
-                    SELECT COUNT(*) FROM {keyspace}.{table}
-                    WHERE token({pk_list}) <= ?
-                """
-                ),
-                "select_range": await self.session.prepare(
-                    f"""
-                    SELECT * FROM {keyspace}.{table}
-                    WHERE token({pk_list}) > ?
-                    AND token({pk_list}) <= ?
-                """
-                ),
-                "select_wraparound_gt": await self.session.prepare(
-                    f"""
-                    SELECT * FROM {keyspace}.{table}
-                    WHERE token({pk_list}) > ?
-                """
-                ),
-                "select_wraparound_lte": await self.session.prepare(
-                    f"""
-                    SELECT * FROM {keyspace}.{table}
-                    WHERE token({pk_list}) <= ?
-                """
-                ),
-            }
-
-        return self._prepared_statements[key]
-
-    async def count_by_token_ranges(
-        self,
-        keyspace: str,
-        table: str,
-        split_count: int | None = None,
-        parallelism: int | None = None,
-        progress_callback: Callable[[BulkOperationStats], None] | None = None,
-        consistency_level: ConsistencyLevel | None = None,
-    ) -> int:
-        """Count all rows in a table using parallel token range queries.
-
-        Args:
-            keyspace: The keyspace name.
-            table: The table name.
-            split_count: Number of token range splits (default: 4 * number of nodes).
-            parallelism: Max concurrent operations (default: 2 * number of nodes).
-            progress_callback: Optional callback for progress updates.
-            consistency_level: Consistency level for queries (default: None, uses driver default).
-
-        Returns:
-            Total row count.
-        """
-        count, _ = await self.count_by_token_ranges_with_stats(
-            keyspace=keyspace,
-            table=table,
-            split_count=split_count,
-            parallelism=parallelism,
-            progress_callback=progress_callback,
-            consistency_level=consistency_level,
-        )
-        return count
-
-    async def count_by_token_ranges_with_stats(
-        self,
-        keyspace: str,
-        table: str,
-        split_count: int | None = None,
-        parallelism: int | None = None,
-        progress_callback: Callable[[BulkOperationStats], None] | None = None,
-        consistency_level: ConsistencyLevel | None = None,
-    ) -> tuple[int, BulkOperationStats]:
-        """Count all rows and return statistics."""
-        # Get table metadata
-        table_meta = await self._get_table_metadata(keyspace, table)
-        partition_keys = [col.name for col in table_meta.partition_key]
-
-        # Discover and split token ranges
-        ranges = await discover_token_ranges(self.session, keyspace)
-
-        if split_count is None:
-            # Default: 4 splits per node
-            split_count = len(self.session._session.cluster.contact_points) * 4
-
-        splits = self.splitter.split_proportionally(ranges, split_count)
-
-        # Initialize stats
-        stats = BulkOperationStats(total_ranges=len(splits))
-
-        # Determine parallelism
-        if parallelism is None:
-            parallelism = min(len(splits), len(self.session._session.cluster.contact_points) * 2)
-
-        # Get prepared statements for this table
-        prepared_stmts = await self._get_prepared_statements(keyspace, table, partition_keys)
-
-        # Create count tasks
-        semaphore = asyncio.Semaphore(parallelism)
-        tasks = []
-
-        for split in splits:
-            task = self._count_range(
-                keyspace,
-                table,
-                partition_keys,
-                split,
-                semaphore,
-                stats,
-                progress_callback,
-                prepared_stmts,
-                consistency_level,
-            )
-            tasks.append(task)
-
-        # Execute all tasks
-        results = await asyncio.gather(*tasks, return_exceptions=True)
-
-        # Process results
-        total_count = 0
-        for result in results:
-            if isinstance(result, Exception):
-                stats.errors.append(result)
-            else:
-                total_count += int(result)
-
-        stats.end_time = time.time()
-
-        if stats.errors:
-            raise BulkOperationError(
-                f"Failed to count all ranges: {len(stats.errors)} errors",
-                partial_result=total_count,
-                errors=stats.errors,
-            )
-
-        return total_count, stats
-
-    async def _count_range(
-        self,
-        keyspace: str,
-        table: str,
-        partition_keys: list[str],
-        token_range: TokenRange,
-        semaphore: asyncio.Semaphore,
-        stats: BulkOperationStats,
-        progress_callback: Callable[[BulkOperationStats], None] | None,
-        prepared_stmts: dict[str, Any],
-        consistency_level: ConsistencyLevel | None,
-    ) -> int:
-        """Count rows in a single token range."""
-        async with semaphore:
-            # Check if this is a wraparound range
-            if token_range.end < token_range.start:
-                # Wraparound range needs to be split into two queries
-                # First part: from start to MAX_TOKEN
-                stmt = prepared_stmts["count_wraparound_gt"]
-                if consistency_level is not None:
-                    stmt.consistency_level = consistency_level
-                result1 = await self.session.execute(stmt, (token_range.start,))
-                row1 = result1.one()
-                count1 = row1.count if row1 else 0
-
-                # Second part: from MIN_TOKEN to end
-                stmt = prepared_stmts["count_wraparound_lte"]
-                if consistency_level is not None:
-                    stmt.consistency_level = consistency_level
-                result2 = await self.session.execute(stmt, (token_range.end,))
-                row2 = result2.one()
-                count2 = row2.count if row2 else 0
-
-                count = count1 + count2
-            else:
-                # Normal range - use prepared statement
-                stmt = prepared_stmts["count_range"]
-                if consistency_level is not None:
-                    stmt.consistency_level = consistency_level
-                result = await self.session.execute(stmt, (token_range.start, token_range.end))
-                row = result.one()
-                count = row.count if row else 0
-
-            # Update stats
-            stats.rows_processed += count
-            stats.ranges_completed += 1
-
-            # Call progress callback if provided
-            if progress_callback:
-                progress_callback(stats)
-
-            return int(count)
-
-    async def export_by_token_ranges(
-        self,
-        keyspace: str,
-        table: str,
-        split_count: int | None = None,
-        parallelism: int | None = None,
-        progress_callback: Callable[[BulkOperationStats], None] | None = None,
-        consistency_level: ConsistencyLevel | None = None,
-    ) -> AsyncIterator[Any]:
-        """Export all rows from a table by streaming token ranges in parallel.
-
-        This method uses parallel queries to stream data from multiple token ranges
-        concurrently, providing high performance for large table exports.
-
-        Args:
-            keyspace: The keyspace name.
-            table: The table name.
-            split_count: Number of token range splits (default: 4 * number of nodes).
-            parallelism: Max concurrent queries (default: 2 * number of nodes).
-            progress_callback: Optional callback for progress updates.
-            consistency_level: Consistency level for queries (default: None, uses driver default).
-
-        Yields:
-            Row data from the table, streamed as results arrive from parallel queries.
-        """
-        # Get table metadata
-        table_meta = await self._get_table_metadata(keyspace, table)
-        partition_keys = [col.name for col in table_meta.partition_key]
-
-        # Discover and split token ranges
-        ranges = await discover_token_ranges(self.session, keyspace)
-
-        if split_count is None:
-            split_count = len(self.session._session.cluster.contact_points) * 4
-
-        splits = self.splitter.split_proportionally(ranges, split_count)
-
-        # Determine parallelism
-        if parallelism is None:
-            parallelism = min(len(splits), len(self.session._session.cluster.contact_points) * 2)
-
-        # Initialize stats
-        stats = BulkOperationStats(total_ranges=len(splits))
-
-        # Get prepared statements for this table
-        prepared_stmts = await self._get_prepared_statements(keyspace, table, partition_keys)
-
-        # Use parallel export
-        async for row in export_by_token_ranges_parallel(
-            operator=self,
-            keyspace=keyspace,
-            table=table,
-            splits=splits,
-            prepared_stmts=prepared_stmts,
-            parallelism=parallelism,
-            consistency_level=consistency_level,
-            stats=stats,
-            progress_callback=progress_callback,
-        ):
-            yield row
-
-        stats.end_time = time.time()
-
-    async def import_from_iceberg(
-        self,
-        iceberg_warehouse_path: str,
-        iceberg_table: str,
-        target_keyspace: str,
-        target_table: str,
-        parallelism: int | None = None,
-        batch_size: int = 1000,
-        progress_callback: Callable[[BulkOperationStats], None] | None = None,
-    ) -> BulkOperationStats:
-        """Import data from Iceberg to Cassandra."""
-        # This will be implemented when we add Iceberg integration
-        raise NotImplementedError("Iceberg import will be implemented in next phase")
-
-    async def _get_table_metadata(self, keyspace: str, table: str) -> Any:
-        """Get table metadata from cluster."""
-        metadata = self.session._session.cluster.metadata
-
-        if keyspace not in metadata.keyspaces:
-            raise ValueError(f"Keyspace '{keyspace}' not found")
-
-        keyspace_meta = metadata.keyspaces[keyspace]
-
-        if table not in keyspace_meta.tables:
-            raise ValueError(f"Table '{table}' not found in keyspace '{keyspace}'")
-
-        return keyspace_meta.tables[table]
-
-    async def export_to_csv(
-        self,
-        keyspace: str,
-        table: str,
-        output_path: str | Path,
-        columns: list[str] | None = None,
-        delimiter: str = ",",
-        null_string: str = "",
-        compression: str | None = None,
-        split_count: int | None = None,
-        parallelism: int | None = None,
-        progress_callback: Callable[[Any], Any] | None = None,
-        consistency_level: ConsistencyLevel | None = None,
-    ) -> Any:
-        """Export table to CSV format.
-
-        Args:
-            keyspace: Keyspace name
-            table: Table name
-            output_path: Output file path
-            columns: Columns to export (None for all)
-            delimiter: CSV delimiter
-            null_string: String to represent NULL values
-            compression: Compression type (gzip, bz2, lz4)
-            split_count: Number of token range splits
-            parallelism: Max concurrent operations
-            progress_callback: Progress callback function
-            consistency_level: Consistency level for queries
-
-        Returns:
-            ExportProgress object
-        """
-        from .exporters import CSVExporter
-
-        exporter = CSVExporter(
-            self,
-            delimiter=delimiter,
-            null_string=null_string,
-            compression=compression,
-        )
-
-        return await exporter.export(
-            keyspace=keyspace,
-            table=table,
-            output_path=Path(output_path),
-            columns=columns,
-            split_count=split_count,
-            parallelism=parallelism,
-            progress_callback=progress_callback,
-            consistency_level=consistency_level,
-        )
-
-    async def export_to_json(
-        self,
-        keyspace: str,
-        table: str,
-        output_path: str | Path,
-        columns: list[str] | None = None,
-        format_mode: str = "jsonl",
-        indent: int | None = None,
-        compression: str | None = None,
-        split_count: int | None = None,
-        parallelism: int | None = None,
-        progress_callback: Callable[[Any], Any] | None = None,
-        consistency_level: ConsistencyLevel | None = None,
-    ) -> Any:
-        """Export table to JSON format.
-
-        Args:
-            keyspace: Keyspace name
-            table: Table name
-            output_path: Output file path
-            columns: Columns to export (None for all)
-            format_mode: 'jsonl' (line-delimited) or 'array'
-            indent: JSON indentation
-            compression: Compression type (gzip, bz2, lz4)
-            split_count: Number of token range splits
-            parallelism: Max concurrent operations
-            progress_callback: Progress callback function
-            consistency_level: Consistency level for queries
-
-        Returns:
-            ExportProgress object
-        """
-        from .exporters import JSONExporter
-
-        exporter = JSONExporter(
-            self,
-            format_mode=format_mode,
-            indent=indent,
-            compression=compression,
-        )
-
-        return await exporter.export(
-            keyspace=keyspace,
-            table=table,
-            output_path=Path(output_path),
-            columns=columns,
-            split_count=split_count,
-            parallelism=parallelism,
-            progress_callback=progress_callback,
-            consistency_level=consistency_level,
-        )
-
-    async def export_to_parquet(
-        self,
-        keyspace: str,
-        table: str,
-        output_path: str | Path,
-        columns: list[str] | None = None,
-        compression: str = "snappy",
-        row_group_size: int = 50000,
-        split_count: int | None = None,
-        parallelism: int | None = None,
-        progress_callback: Callable[[Any], Any] | None = None,
-        consistency_level: ConsistencyLevel | None = None,
-    ) -> Any:
-        """Export table to Parquet format.
-
-        Args:
-            keyspace: Keyspace name
-            table: Table name
-            output_path: Output file path
-            columns: Columns to export (None for all)
-            compression: Parquet compression (snappy, gzip, brotli, lz4, zstd)
-            row_group_size: Rows per row group
-            split_count: Number of token range splits
-            parallelism: Max concurrent operations
-            progress_callback: Progress callback function
-
-        Returns:
-            ExportProgress object
-        """
-        from .exporters import ParquetExporter
-
-        exporter = ParquetExporter(
-            self,
-            compression=compression,
-            row_group_size=row_group_size,
-        )
-
-        return await exporter.export(
-            keyspace=keyspace,
-            table=table,
-            output_path=Path(output_path),
-            columns=columns,
-            split_count=split_count,
-            parallelism=parallelism,
-            progress_callback=progress_callback,
-            consistency_level=consistency_level,
-        )
-
-    async def export_to_iceberg(
-        self,
-        keyspace: str,
-        table: str,
-        namespace: str | None = None,
-        table_name: str | None = None,
-        catalog: Any | None = None,
-        catalog_config: dict[str, Any] | None = None,
-        warehouse_path: str | Path | None = None,
-        partition_spec: Any | None = None,
-        table_properties: dict[str, str] | None = None,
-        compression: str = "snappy",
-        row_group_size: int = 100000,
-        columns: list[str] | None = None,
-        split_count: int | None = None,
-        parallelism: int | None = None,
-        progress_callback: Any | None = None,
-    ) -> Any:
-        """Export table data to Apache Iceberg format.
-
-        This enables modern data lakehouse features like ACID transactions,
-        time travel, and schema evolution.
-
-        Args:
-            keyspace: Cassandra keyspace to export from
-            table: Cassandra table to export
-            namespace: Iceberg namespace (default: keyspace name)
-            table_name: Iceberg table name (default: Cassandra table name)
-            catalog: Pre-configured Iceberg catalog (optional)
-            catalog_config: Custom catalog configuration (optional)
-            warehouse_path: Path to Iceberg warehouse (for filesystem catalog)
-            partition_spec: Iceberg partition specification
-            table_properties: Additional Iceberg table properties
-            compression: Parquet compression (default: snappy)
-            row_group_size: Rows per Parquet file (default: 100000)
-            columns: Columns to export (default: all)
-            split_count: Number of token range splits
-            parallelism: Max concurrent operations
-            progress_callback: Progress callback function
-
-        Returns:
-            ExportProgress with Iceberg metadata
-        """
-        from .iceberg import IcebergExporter
-
-        exporter = IcebergExporter(
-            self,
-            catalog=catalog,
-            catalog_config=catalog_config,
-            warehouse_path=warehouse_path,
-            compression=compression,
-            row_group_size=row_group_size,
-        )
-        return await exporter.export(
-            keyspace=keyspace,
-            table=table,
-            namespace=namespace,
-            table_name=table_name,
-            partition_spec=partition_spec,
-            table_properties=table_properties,
-            columns=columns,
-            split_count=split_count,
-            parallelism=parallelism,
-            progress_callback=progress_callback,
-        )
diff --git a/libs/async-cassandra-bulk/examples/bulk_operations/exporters/__init__.py b/libs/async-cassandra-bulk/examples/bulk_operations/exporters/__init__.py
deleted file mode 100644
index 6053593..0000000
--- a/libs/async-cassandra-bulk/examples/bulk_operations/exporters/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-"""Export format implementations for bulk operations."""
-
-from .base import Exporter, ExportFormat, ExportProgress
-from .csv_exporter import CSVExporter
-from .json_exporter import JSONExporter
-from .parquet_exporter import ParquetExporter
-
-__all__ = [
-    "ExportFormat",
-    "Exporter",
-    "ExportProgress",
-    "CSVExporter",
-    "JSONExporter",
-    "ParquetExporter",
-]
diff --git a/libs/async-cassandra-bulk/examples/bulk_operations/exporters/base.py b/libs/async-cassandra-bulk/examples/bulk_operations/exporters/base.py
deleted file mode 100644
index 015d629..0000000
--- a/libs/async-cassandra-bulk/examples/bulk_operations/exporters/base.py
+++ /dev/null
@@ -1,229 +0,0 @@
-"""Base classes for export format implementations."""
-
-import asyncio
-import json
-from abc import ABC, abstractmethod
-from dataclasses import dataclass, field
-from datetime import datetime
-from enum import Enum
-from pathlib import Path
-from typing import Any
-
-from cassandra.util import OrderedMap, OrderedMapSerializedKey
-
-from bulk_operations.bulk_operator import TokenAwareBulkOperator
-
-
-class ExportFormat(Enum):
-    """Supported export formats."""
-
-    CSV = "csv"
-    JSON = "json"
-    PARQUET = "parquet"
-    ICEBERG = "iceberg"
-
-
-@dataclass
-class ExportProgress:
-    """Tracks export progress for resume capability."""
-
-    export_id: str
-    keyspace: str
-    table: str
-    format: ExportFormat
-    output_path: str
-    started_at: datetime
-    completed_at: datetime | None = None
-    total_ranges: int = 0
-    completed_ranges: list[tuple[int, int]] = field(default_factory=list)
-    rows_exported: int = 0
-    bytes_written: int = 0
-    errors: list[dict[str, Any]] = field(default_factory=list)
-    metadata: dict[str, Any] = field(default_factory=dict)
-
-    def to_json(self) -> str:
-        """Serialize progress to JSON."""
-        data = {
-            "export_id": self.export_id,
-            "keyspace": self.keyspace,
-            "table": self.table,
-            "format": self.format.value,
-            "output_path": self.output_path,
-            "started_at": self.started_at.isoformat(),
-            "completed_at": self.completed_at.isoformat() if self.completed_at else None,
-            "total_ranges": self.total_ranges,
-            "completed_ranges": self.completed_ranges,
-            "rows_exported": self.rows_exported,
-            "bytes_written": self.bytes_written,
-            "errors": self.errors,
-            "metadata": self.metadata,
-        }
-        return json.dumps(data, indent=2)
-
-    @classmethod
-    def from_json(cls, json_str: str) -> "ExportProgress":
-        """Deserialize progress from JSON."""
-        data = json.loads(json_str)
-        return cls(
-            export_id=data["export_id"],
-            keyspace=data["keyspace"],
-            table=data["table"],
-            format=ExportFormat(data["format"]),
-            output_path=data["output_path"],
-            started_at=datetime.fromisoformat(data["started_at"]),
-            completed_at=(
-                datetime.fromisoformat(data["completed_at"]) if data["completed_at"] else None
-            ),
-            total_ranges=data["total_ranges"],
-            completed_ranges=[(r[0], r[1]) for r in data["completed_ranges"]],
-            rows_exported=data["rows_exported"],
-            bytes_written=data["bytes_written"],
-            errors=data["errors"],
-            metadata=data["metadata"],
-        )
-
-    def save(self, progress_file: Path | None = None) -> Path:
-        """Save progress to file."""
-        if progress_file is None:
-            progress_file = Path(f"{self.output_path}.progress")
-        progress_file.write_text(self.to_json())
-        return progress_file
-
-    @classmethod
-    def load(cls, progress_file: Path) -> "ExportProgress":
-        """Load progress from file."""
-        return cls.from_json(progress_file.read_text())
-
-    def is_range_completed(self, start: int, end: int) -> bool:
-        """Check if a token range has been completed."""
-        return (start, end) in self.completed_ranges
-
-    def mark_range_completed(self, start: int, end: int, rows: int) -> None:
-        """Mark a token range as completed."""
-        if not self.is_range_completed(start, end):
-            self.completed_ranges.append((start, end))
-            self.rows_exported += rows
-
-    @property
-    def is_complete(self) -> bool:
-        """Check if export is complete."""
-        return len(self.completed_ranges) == self.total_ranges
-
-    @property
-    def progress_percentage(self) -> float:
-        """Calculate progress percentage."""
-        if self.total_ranges == 0:
-            return 0.0
-        return (len(self.completed_ranges) / self.total_ranges) * 100
-
-
-class Exporter(ABC):
-    """Base class for export format implementations."""
-
-    def __init__(
-        self,
-        operator: TokenAwareBulkOperator,
-        compression: str | None = None,
-        buffer_size: int = 8192,
-    ):
-        """Initialize exporter.
-
-        Args:
-            operator: Token-aware bulk operator instance
-            compression: Compression type (gzip, bz2, lz4, etc.)
-            buffer_size: Buffer size for file operations
-        """
-        self.operator = operator
-        self.compression = compression
-        self.buffer_size = buffer_size
-        self._write_lock = asyncio.Lock()
-
-    @abstractmethod
-    async def export(
-        self,
-        keyspace: str,
-        table: str,
-        output_path: Path,
-        columns: list[str] | None = None,
-        split_count: int | None = None,
-        parallelism: int | None = None,
-        progress: ExportProgress | None = None,
-        progress_callback: Any | None = None,
-        consistency_level: Any | None = None,
-    ) -> ExportProgress:
-        """Export table data to the specified format.
-
-        Args:
-            keyspace: Keyspace name
-            table: Table name
-            output_path: Output file path
-            columns: Columns to export (None for all)
-            split_count: Number of token range splits
-            parallelism: Max concurrent operations
-            progress: Resume from previous progress
-            progress_callback: Callback for progress updates
-
-        Returns:
-            ExportProgress with final statistics
-        """
-        pass
-
-    @abstractmethod
-    async def write_header(self, file_handle: Any, columns: list[str]) -> None:
-        """Write file header if applicable."""
-        pass
-
-    @abstractmethod
-    async def write_row(self, file_handle: Any, row: Any) -> int:
-        """Write a single row and return bytes written."""
-        pass
-
-    @abstractmethod
-    async def write_footer(self, file_handle: Any) -> None:
-        """Write file footer if applicable."""
-        pass
-
-    def _serialize_value(self, value: Any) -> Any:
-        """Serialize Cassandra types to exportable format."""
-        if value is None:
-            return None
-        elif isinstance(value, list | set):
-            return [self._serialize_value(v) for v in value]
-        elif isinstance(value, dict | OrderedMap | OrderedMapSerializedKey):
-            # Handle Cassandra map types
-            return {str(k): self._serialize_value(v) for k, v in value.items()}
-        elif isinstance(value, bytes):
-            # Convert bytes to base64 for JSON compatibility
-            import base64
-
-            return base64.b64encode(value).decode("ascii")
-        elif isinstance(value, datetime):
-            return value.isoformat()
-        else:
-            return value
-
-    async def _open_output_file(self, output_path: Path, mode: str = "w") -> Any:
-        """Open output file with optional compression."""
-        if self.compression == "gzip":
-            import gzip
-
-            return gzip.open(output_path, mode + "t", encoding="utf-8")
-        elif self.compression == "bz2":
-            import bz2
-
-            return bz2.open(output_path, mode + "t", encoding="utf-8")
-        elif self.compression == "lz4":
-            try:
-                import lz4.frame
-
-                return lz4.frame.open(output_path, mode + "t", encoding="utf-8")
-            except ImportError:
-                raise ImportError("lz4 compression requires 'pip install lz4'") from None
-        else:
-            return open(output_path, mode, encoding="utf-8", buffering=self.buffer_size)
-
-    def _get_output_path_with_compression(self, output_path: Path) -> Path:
-        """Add compression extension to output path if needed."""
-        if self.compression:
-            return output_path.with_suffix(output_path.suffix + f".{self.compression}")
-        return output_path
diff --git a/libs/async-cassandra-bulk/examples/bulk_operations/exporters/csv_exporter.py b/libs/async-cassandra-bulk/examples/bulk_operations/exporters/csv_exporter.py
deleted file mode 100644
index 56e6f80..0000000
--- a/libs/async-cassandra-bulk/examples/bulk_operations/exporters/csv_exporter.py
+++ /dev/null
@@ -1,221 +0,0 @@
-"""CSV export implementation."""
-
-import asyncio
-import csv
-import io
-import uuid
-from datetime import UTC, datetime
-from pathlib import Path
-from typing import Any
-
-from bulk_operations.exporters.base import Exporter, ExportFormat, ExportProgress
-
-
-class CSVExporter(Exporter):
-    """Export Cassandra data to CSV format with streaming support."""
-
-    def __init__(
-        self,
-        operator,
-        delimiter: str = ",",
-        quoting: int = csv.QUOTE_MINIMAL,
-        null_string: str = "",
-        compression: str | None = None,
-        buffer_size: int = 8192,
-    ):
-        """Initialize CSV exporter.
-
-        Args:
-            operator: Token-aware bulk operator instance
-            delimiter: Field delimiter (default: comma)
-            quoting: CSV quoting style (default: QUOTE_MINIMAL)
-            null_string: String to represent NULL values (default: empty string)
-            compression: Compression type (gzip, bz2, lz4)
-            buffer_size: Buffer size for file operations
-        """
-        super().__init__(operator, compression, buffer_size)
-        self.delimiter = delimiter
-        self.quoting = quoting
-        self.null_string = null_string
-
-    async def export(  # noqa: C901
-        self,
-        keyspace: str,
-        table: str,
-        output_path: Path,
-        columns: list[str] | None = None,
-        split_count: int | None = None,
-        parallelism: int | None = None,
-        progress: ExportProgress | None = None,
-        progress_callback: Any | None = None,
-        consistency_level: Any | None = None,
-    ) -> ExportProgress:
-        """Export table data to CSV format.
-
-        What this does:
-        --------------
-        1. Discovers table schema if columns not specified
-        2. Creates/resumes progress tracking
-        3. Streams data by token ranges
-        4. Writes CSV with proper escaping
-        5. Supports compression and resume
-
-        Why this matters:
-        ----------------
-        - Memory efficient for large tables
-        - Maintains data fidelity
-        - Resume capability for long exports
-        - Compatible with standard tools
-        """
-        # Get table metadata if columns not specified
-        if columns is None:
-            metadata = self.operator.session._session.cluster.metadata
-            keyspace_metadata = metadata.keyspaces.get(keyspace)
-            if not keyspace_metadata:
-                raise ValueError(f"Keyspace '{keyspace}' not found")
-            table_metadata = keyspace_metadata.tables.get(table)
-            if not table_metadata:
-                raise ValueError(f"Table '{keyspace}.{table}' not found")
-            columns = list(table_metadata.columns.keys())
-
-        # Initialize or resume progress
-        if progress is None:
-            progress = ExportProgress(
-                export_id=str(uuid.uuid4()),
-                keyspace=keyspace,
-                table=table,
-                format=ExportFormat.CSV,
-                output_path=str(output_path),
-                started_at=datetime.now(UTC),
-            )
-
-        # Get actual output path with compression extension
-        actual_output_path = self._get_output_path_with_compression(output_path)
-
-        # Open output file (append mode if resuming)
-        mode = "a" if progress.completed_ranges else "w"
-        file_handle = await self._open_output_file(actual_output_path, mode)
-
-        try:
-            # Write header for new exports
-            if mode == "w":
-                await self.write_header(file_handle, columns)
-
-            # Store columns for row filtering
-            self._export_columns = columns
-
-            # Track bytes written
-            file_handle.tell() if hasattr(file_handle, "tell") else 0
-
-            # Export by token ranges
-            async for row in self.operator.export_by_token_ranges(
-                keyspace=keyspace,
-                table=table,
-                split_count=split_count,
-                parallelism=parallelism,
-                consistency_level=consistency_level,
-            ):
-                # Check if we need to track a new range
-                # (This is simplified - in real implementation we'd track actual ranges)
-                bytes_written = await self.write_row(file_handle, row)
-                progress.rows_exported += 1
-                progress.bytes_written += bytes_written
-
-                # Periodic progress callback
-                if progress_callback and progress.rows_exported % 1000 == 0:
-                    if asyncio.iscoroutinefunction(progress_callback):
-                        await progress_callback(progress)
-                    else:
-                        progress_callback(progress)
-
-            # Mark completion
-            progress.completed_at = datetime.now(UTC)
-
-            # Final callback
-            if progress_callback:
-                if asyncio.iscoroutinefunction(progress_callback):
-                    await progress_callback(progress)
-                else:
-                    progress_callback(progress)
-
-        finally:
-            if hasattr(file_handle, "close"):
-                file_handle.close()
-
-        # Save final progress
-        progress.save()
-        return progress
-
-    async def write_header(self, file_handle: Any, columns: list[str]) -> None:
-        """Write CSV header row."""
-        writer = csv.writer(file_handle, delimiter=self.delimiter, quoting=self.quoting)
-        writer.writerow(columns)
-
-    async def write_row(self, file_handle: Any, row: Any) -> int:
-        """Write a single row to CSV."""
-        # Convert row to list of values in column order
-        # Row objects from Cassandra driver have _fields attribute
-        values = []
-        if hasattr(row, "_fields"):
-            # If we have specific columns, only export those
-            if hasattr(self, "_export_columns") and self._export_columns:
-                for col in self._export_columns:
-                    if hasattr(row, col):
-                        value = getattr(row, col)
-                        values.append(self._serialize_csv_value(value))
-                    else:
-                        values.append(self._serialize_csv_value(None))
-            else:
-                # Export all fields
-                for field in row._fields:
-                    value = getattr(row, field)
-                    values.append(self._serialize_csv_value(value))
-        else:
-            # Fallback for other row types
-            for i in range(len(row)):
-                values.append(self._serialize_csv_value(row[i]))
-
-        # Write to string buffer first to calculate bytes
-        buffer = io.StringIO()
-        writer = csv.writer(buffer, delimiter=self.delimiter, quoting=self.quoting)
-        writer.writerow(values)
-        row_data = buffer.getvalue()
-
-        # Write to actual file
-        async with self._write_lock:
-            file_handle.write(row_data)
-            if hasattr(file_handle, "flush"):
-                file_handle.flush()
-
-        return len(row_data.encode("utf-8"))
-
-    async def write_footer(self, file_handle: Any) -> None:
-        """CSV files don't have footers."""
-        pass
-
-    def _serialize_csv_value(self, value: Any) -> str:
-        """Serialize value for CSV output."""
-        if value is None:
-            return self.null_string
-        elif isinstance(value, bool):
-            return "true" if value else "false"
-        elif isinstance(value, list | set):
-            # Format collections as [item1, item2, ...]
-            items = [self._serialize_csv_value(v) for v in value]
-            return f"[{', '.join(items)}]"
-        elif isinstance(value, dict):
-            # Format maps as {key1: value1, key2: value2}
-            items = [
-                f"{self._serialize_csv_value(k)}: {self._serialize_csv_value(v)}"
-                for k, v in value.items()
-            ]
-            return f"{{{', '.join(items)}}}"
-        elif isinstance(value, bytes):
-            # Hex encode bytes
-            return value.hex()
-        elif isinstance(value, datetime):
-            return value.isoformat()
-        elif isinstance(value, uuid.UUID):
-            return str(value)
-        else:
-            return str(value)
diff --git a/libs/async-cassandra-bulk/examples/bulk_operations/exporters/json_exporter.py b/libs/async-cassandra-bulk/examples/bulk_operations/exporters/json_exporter.py
deleted file mode 100644
index 6067a6c..0000000
--- a/libs/async-cassandra-bulk/examples/bulk_operations/exporters/json_exporter.py
+++ /dev/null
@@ -1,221 +0,0 @@
-"""JSON export implementation."""
-
-import asyncio
-import json
-import uuid
-from datetime import UTC, datetime
-from decimal import Decimal
-from pathlib import Path
-from typing import Any
-
-from bulk_operations.exporters.base import Exporter, ExportFormat, ExportProgress
-
-
-class JSONExporter(Exporter):
-    """Export Cassandra data to JSON format (line-delimited by default)."""
-
-    def __init__(
-        self,
-        operator,
-        format_mode: str = "jsonl",  # jsonl (line-delimited) or array
-        indent: int | None = None,
-        compression: str | None = None,
-        buffer_size: int = 8192,
-    ):
-        """Initialize JSON exporter.
-
-        Args:
-            operator: Token-aware bulk operator instance
-            format_mode: Output format - 'jsonl' (line-delimited) or 'array'
-            indent: JSON indentation (None for compact)
-            compression: Compression type (gzip, bz2, lz4)
-            buffer_size: Buffer size for file operations
-        """
-        super().__init__(operator, compression, buffer_size)
-        self.format_mode = format_mode
-        self.indent = indent
-        self._first_row = True
-
-    async def export(  # noqa: C901
-        self,
-        keyspace: str,
-        table: str,
-        output_path: Path,
-        columns: list[str] | None = None,
-        split_count: int | None = None,
-        parallelism: int | None = None,
-        progress: ExportProgress | None = None,
-        progress_callback: Any | None = None,
-        consistency_level: Any | None = None,
-    ) -> ExportProgress:
-        """Export table data to JSON format.
-
-        What this does:
-        --------------
-        1. Exports as line-delimited JSON (default) or JSON array
-        2. Handles all Cassandra data types with proper serialization
-        3. Supports compression for smaller files
-        4. Maintains streaming for memory efficiency
-
-        Why this matters:
-        ----------------
-        - JSONL works well with streaming tools
-        - JSON arrays are compatible with many APIs
-        - Preserves type information better than CSV
-        - Standard format for data pipelines
-        """
-        # Get table metadata if columns not specified
-        if columns is None:
-            metadata = self.operator.session._session.cluster.metadata
-            keyspace_metadata = metadata.keyspaces.get(keyspace)
-            if not keyspace_metadata:
-                raise ValueError(f"Keyspace '{keyspace}' not found")
-            table_metadata = keyspace_metadata.tables.get(table)
-            if not table_metadata:
-                raise ValueError(f"Table '{keyspace}.{table}' not found")
-            columns = list(table_metadata.columns.keys())
-
-        # Initialize or resume progress
-        if progress is None:
-            progress = ExportProgress(
-                export_id=str(uuid.uuid4()),
-                keyspace=keyspace,
-                table=table,
-                format=ExportFormat.JSON,
-                output_path=str(output_path),
-                started_at=datetime.now(UTC),
-                metadata={"format_mode": self.format_mode},
-            )
-
-        # Get actual output path with compression extension
-        actual_output_path = self._get_output_path_with_compression(output_path)
-
-        # Open output file
-        mode = "a" if progress.completed_ranges else "w"
-        file_handle = await self._open_output_file(actual_output_path, mode)
-
-        try:
-            # Write header for array mode
-            if mode == "w" and self.format_mode == "array":
-                await self.write_header(file_handle, columns)
-
-            # Store columns for row filtering
-            self._export_columns = columns
-
-            # Export by token ranges
-            async for row in self.operator.export_by_token_ranges(
-                keyspace=keyspace,
-                table=table,
-                split_count=split_count,
-                parallelism=parallelism,
-                consistency_level=consistency_level,
-            ):
-                bytes_written = await self.write_row(file_handle, row)
-                progress.rows_exported += 1
-                progress.bytes_written += bytes_written
-
-                # Progress callback
-                if progress_callback and progress.rows_exported % 1000 == 0:
-                    if asyncio.iscoroutinefunction(progress_callback):
-                        await progress_callback(progress)
-                    else:
-                        progress_callback(progress)
-
-            # Write footer for array mode
-            if self.format_mode == "array":
-                await self.write_footer(file_handle)
-
-            # Mark completion
-            progress.completed_at = datetime.now(UTC)
-
-            # Final callback
-            if progress_callback:
-                if asyncio.iscoroutinefunction(progress_callback):
-                    await progress_callback(progress)
-                else:
-                    progress_callback(progress)
-
-        finally:
-            if hasattr(file_handle, "close"):
-                file_handle.close()
-
-        # Save progress
-        progress.save()
-        return progress
-
-    async def write_header(self, file_handle: Any, columns: list[str]) -> None:
-        """Write JSON array opening bracket."""
-        if self.format_mode == "array":
-            file_handle.write("[\n")
-            self._first_row = True
-
-    async def write_row(self, file_handle: Any, row: Any) -> int:  # noqa: C901
-        """Write a single row as JSON."""
-        # Convert row to dictionary
-        row_dict = {}
-        if hasattr(row, "_fields"):
-            # If we have specific columns, only export those
-            if hasattr(self, "_export_columns") and self._export_columns:
-                for col in self._export_columns:
-                    if hasattr(row, col):
-                        value = getattr(row, col)
-                        row_dict[col] = self._serialize_value(value)
-                    else:
-                        row_dict[col] = None
-            else:
-                # Export all fields
-                for field in row._fields:
-                    value = getattr(row, field)
-                    row_dict[field] = self._serialize_value(value)
-        else:
-            # Handle other row types
-            for i, value in enumerate(row):
-                row_dict[f"column_{i}"] = self._serialize_value(value)
-
-        # Format as JSON
-        if self.format_mode == "jsonl":
-            # Line-delimited JSON
-            json_str = json.dumps(row_dict, separators=(",", ":"))
-            json_str += "\n"
-        else:
-            # Array mode
-            if not self._first_row:
-                json_str = ",\n"
-            else:
-                json_str = ""
-                self._first_row = False
-
-            if self.indent:
-                json_str += json.dumps(row_dict, indent=self.indent)
-            else:
-                json_str += json.dumps(row_dict, separators=(",", ":"))
-
-        # Write to file
-        async with self._write_lock:
-            file_handle.write(json_str)
-            if hasattr(file_handle, "flush"):
-                file_handle.flush()
-
-        return len(json_str.encode("utf-8"))
-
-    async def write_footer(self, file_handle: Any) -> None:
-        """Write JSON array closing bracket."""
-        if self.format_mode == "array":
-            file_handle.write("\n]")
-
-    def _serialize_value(self, value: Any) -> Any:
-        """Override to handle UUID and other types."""
-        if isinstance(value, uuid.UUID):
-            return str(value)
-        elif isinstance(value, set | frozenset):
-            # JSON doesn't have sets, convert to list
-            return [self._serialize_value(v) for v in sorted(value)]
-        elif hasattr(value, "__class__") and "SortedSet" in value.__class__.__name__:
-            # Handle SortedSet specifically
-            return [self._serialize_value(v) for v in value]
-        elif isinstance(value, Decimal):
-            # Convert Decimal to float for JSON
-            return float(value)
-        else:
-            # Use parent class serialization
-            return super()._serialize_value(value)
diff --git a/libs/async-cassandra-bulk/examples/bulk_operations/exporters/parquet_exporter.py b/libs/async-cassandra-bulk/examples/bulk_operations/exporters/parquet_exporter.py
deleted file mode 100644
index f9835bc..0000000
--- a/libs/async-cassandra-bulk/examples/bulk_operations/exporters/parquet_exporter.py
+++ /dev/null
@@ -1,311 +0,0 @@
-"""Parquet export implementation using PyArrow."""
-
-import asyncio
-import uuid
-from datetime import UTC, datetime
-from decimal import Decimal
-from pathlib import Path
-from typing import Any
-
-try:
-    import pyarrow as pa
-    import pyarrow.parquet as pq
-except ImportError:
-    raise ImportError(
-        "PyArrow is required for Parquet export. Install with: pip install pyarrow"
-    ) from None
-
-from cassandra.util import OrderedMap, OrderedMapSerializedKey
-
-from bulk_operations.exporters.base import Exporter, ExportFormat, ExportProgress
-
-
-class ParquetExporter(Exporter):
-    """Export Cassandra data to Parquet format - the foundation for Iceberg."""
-
-    def __init__(
-        self,
-        operator,
-        compression: str = "snappy",
-        row_group_size: int = 50000,
-        use_dictionary: bool = True,
-        buffer_size: int = 8192,
-    ):
-        """Initialize Parquet exporter.
-
-        Args:
-            operator: Token-aware bulk operator instance
-            compression: Compression codec (snappy, gzip, brotli, lz4, zstd)
-            row_group_size: Number of rows per row group
-            use_dictionary: Enable dictionary encoding for strings
-            buffer_size: Buffer size for file operations
-        """
-        super().__init__(operator, compression, buffer_size)
-        self.row_group_size = row_group_size
-        self.use_dictionary = use_dictionary
-        self._batch_rows = []
-        self._schema = None
-        self._writer = None
-
-    async def export(  # noqa: C901
-        self,
-        keyspace: str,
-        table: str,
-        output_path: Path,
-        columns: list[str] | None = None,
-        split_count: int | None = None,
-        parallelism: int | None = None,
-        progress: ExportProgress | None = None,
-        progress_callback: Any | None = None,
-        consistency_level: Any | None = None,
-    ) -> ExportProgress:
-        """Export table data to Parquet format.
-
-        What this does:
-        --------------
-        1. Converts Cassandra schema to Arrow schema
-        2. Batches rows into row groups for efficiency
-        3. Applies columnar compression
-        4. Creates Parquet files ready for Iceberg
-
-        Why this matters:
-        ----------------
-        - Parquet is the storage format for Iceberg
-        - Columnar format enables analytics
-        - Excellent compression ratios
-        - Schema evolution support
-        """
-        # Get table metadata
-        metadata = self.operator.session._session.cluster.metadata
-        keyspace_metadata = metadata.keyspaces.get(keyspace)
-        if not keyspace_metadata:
-            raise ValueError(f"Keyspace '{keyspace}' not found")
-        table_metadata = keyspace_metadata.tables.get(table)
-        if not table_metadata:
-            raise ValueError(f"Table '{keyspace}.{table}' not found")
-
-        # Get columns
-        if columns is None:
-            columns = list(table_metadata.columns.keys())
-
-        # Build Arrow schema from Cassandra schema
-        self._schema = self._build_arrow_schema(table_metadata, columns)
-
-        # Initialize progress
-        if progress is None:
-            progress = ExportProgress(
-                export_id=str(uuid.uuid4()),
-                keyspace=keyspace,
-                table=table,
-                format=ExportFormat.PARQUET,
-                output_path=str(output_path),
-                started_at=datetime.now(UTC),
-                metadata={
-                    "compression": self.compression,
-                    "row_group_size": self.row_group_size,
-                },
-            )
-
-        # Note: Parquet doesn't use compression extension in filename
-        # Compression is internal to the format
-
-        try:
-            # Open Parquet writer
-            self._writer = pq.ParquetWriter(
-                output_path,
-                self._schema,
-                compression=self.compression,
-                use_dictionary=self.use_dictionary,
-            )
-
-            # Export by token ranges
-            async for row in self.operator.export_by_token_ranges(
-                keyspace=keyspace,
-                table=table,
-                split_count=split_count,
-                parallelism=parallelism,
-                consistency_level=consistency_level,
-            ):
-                # Add row to batch
-                row_data = self._convert_row_to_dict(row, columns)
-                self._batch_rows.append(row_data)
-
-                # Write batch when full
-                if len(self._batch_rows) >= self.row_group_size:
-                    await self._write_batch()
-                    progress.bytes_written = output_path.stat().st_size
-
-                progress.rows_exported += 1
-
-                # Progress callback
-                if progress_callback and progress.rows_exported % 1000 == 0:
-                    if asyncio.iscoroutinefunction(progress_callback):
-                        await progress_callback(progress)
-                    else:
-                        progress_callback(progress)
-
-            # Write final batch
-            if self._batch_rows:
-                await self._write_batch()
-
-            # Close writer
-            self._writer.close()
-
-            # Final stats
-            progress.bytes_written = output_path.stat().st_size
-            progress.completed_at = datetime.now(UTC)
-
-            # Final callback
-            if progress_callback:
-                if asyncio.iscoroutinefunction(progress_callback):
-                    await progress_callback(progress)
-                else:
-                    progress_callback(progress)
-
-        except Exception:
-            # Ensure writer is closed on error
-            if self._writer:
-                self._writer.close()
-            raise
-
-        # Save progress
-        progress.save()
-        return progress
-
-    def _build_arrow_schema(self, table_metadata, columns):
-        """Build PyArrow schema from Cassandra table metadata."""
-        fields = []
-
-        for col_name in columns:
-            col_meta = table_metadata.columns.get(col_name)
-            if not col_meta:
-                continue
-
-            # Map Cassandra types to Arrow types
-            arrow_type = self._cassandra_to_arrow_type(col_meta.cql_type)
-            fields.append(pa.field(col_name, arrow_type, nullable=True))
-
-        return pa.schema(fields)
-
-    def _cassandra_to_arrow_type(self, cql_type: str) -> pa.DataType:
-        """Map Cassandra types to PyArrow types."""
-        # Handle parameterized types
-        base_type = cql_type.split("<")[0].lower()
-
-        type_mapping = {
-            "ascii": pa.string(),
-            "bigint": pa.int64(),
-            "blob": pa.binary(),
-            "boolean": pa.bool_(),
-            "counter": pa.int64(),
-            "date": pa.date32(),
-            "decimal": pa.decimal128(38, 10),  # Max precision
-            "double": pa.float64(),
-            "float": pa.float32(),
-            "inet": pa.string(),
-            "int": pa.int32(),
-            "smallint": pa.int16(),
-            "text": pa.string(),
-            "time": pa.int64(),  # Nanoseconds since midnight
-            "timestamp": pa.timestamp("us"),  # Microsecond precision
-            "timeuuid": pa.string(),
-            "tinyint": pa.int8(),
-            "uuid": pa.string(),
-            "varchar": pa.string(),
-            "varint": pa.string(),  # Store as string for arbitrary precision
-        }
-
-        # Handle collections
-        if base_type == "list" or base_type == "set":
-            element_type = self._extract_collection_type(cql_type)
-            return pa.list_(self._cassandra_to_arrow_type(element_type))
-        elif base_type == "map":
-            key_type, value_type = self._extract_map_types(cql_type)
-            return pa.map_(
-                self._cassandra_to_arrow_type(key_type),
-                self._cassandra_to_arrow_type(value_type),
-            )
-
-        return type_mapping.get(base_type, pa.string())  # Default to string
-
-    def _extract_collection_type(self, cql_type: str) -> str:
-        """Extract element type from list<type> or set<type>."""
-        start = cql_type.index("<") + 1
-        end = cql_type.rindex(">")
-        return cql_type[start:end].strip()
-
-    def _extract_map_types(self, cql_type: str) -> tuple[str, str]:
-        """Extract key and value types from map<key_type, value_type>."""
-        start = cql_type.index("<") + 1
-        end = cql_type.rindex(">")
-        types = cql_type[start:end].split(",", 1)
-        return types[0].strip(), types[1].strip()
-
-    def _convert_row_to_dict(self, row: Any, columns: list[str]) -> dict[str, Any]:
-        """Convert Cassandra row to dictionary with proper type conversion."""
-        row_dict = {}
-
-        if hasattr(row, "_fields"):
-            for field in row._fields:
-                value = getattr(row, field)
-                row_dict[field] = self._convert_value_for_arrow(value)
-        else:
-            for i, col in enumerate(columns):
-                if i < len(row):
-                    row_dict[col] = self._convert_value_for_arrow(row[i])
-
-        return row_dict
-
-    def _convert_value_for_arrow(self, value: Any) -> Any:
-        """Convert Cassandra value to Arrow-compatible format."""
-        if value is None:
-            return None
-        elif isinstance(value, uuid.UUID):
-            return str(value)
-        elif isinstance(value, Decimal):
-            # Keep as Decimal for Arrow's decimal128 type
-            return value
-        elif isinstance(value, set):
-            # Convert sets to lists
-            return list(value)
-        elif isinstance(value, OrderedMap | OrderedMapSerializedKey):
-            # Convert Cassandra map types to dict
-            return dict(value)
-        elif isinstance(value, bytes):
-            # Keep as bytes for binary columns
-            return value
-        elif isinstance(value, datetime):
-            # Ensure timezone aware
-            if value.tzinfo is None:
-                return value.replace(tzinfo=UTC)
-            return value
-        else:
-            return value
-
-    async def _write_batch(self):
-        """Write accumulated batch to Parquet file."""
-        if not self._batch_rows:
-            return
-
-        # Convert to Arrow Table
-        table = pa.Table.from_pylist(self._batch_rows, schema=self._schema)
-
-        # Write to file
-        async with self._write_lock:
-            self._writer.write_table(table)
-
-        # Clear batch
-        self._batch_rows = []
-
-    async def write_header(self, file_handle: Any, columns: list[str]) -> None:
-        """Parquet handles headers internally."""
-        pass
-
-    async def write_row(self, file_handle: Any, row: Any) -> int:
-        """Parquet uses batch writing, not row-by-row."""
-        # This is handled in export() method
-        return 0
-
-    async def write_footer(self, file_handle: Any) -> None:
-        """Parquet handles footers internally."""
-        pass
diff --git a/libs/async-cassandra-bulk/examples/bulk_operations/iceberg/__init__.py b/libs/async-cassandra-bulk/examples/bulk_operations/iceberg/__init__.py
deleted file mode 100644
index 83d5ba1..0000000
--- a/libs/async-cassandra-bulk/examples/bulk_operations/iceberg/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-"""Apache Iceberg integration for Cassandra bulk operations.
-
-This module provides functionality to export Cassandra data to Apache Iceberg tables,
-enabling modern data lakehouse capabilities including:
-- ACID transactions
-- Schema evolution
-- Time travel
-- Hidden partitioning
-- Efficient analytics
-"""
-
-from bulk_operations.iceberg.exporter import IcebergExporter
-from bulk_operations.iceberg.schema_mapper import CassandraToIcebergSchemaMapper
-
-__all__ = ["IcebergExporter", "CassandraToIcebergSchemaMapper"]
diff --git a/libs/async-cassandra-bulk/examples/bulk_operations/iceberg/catalog.py b/libs/async-cassandra-bulk/examples/bulk_operations/iceberg/catalog.py
deleted file mode 100644
index 2275142..0000000
--- a/libs/async-cassandra-bulk/examples/bulk_operations/iceberg/catalog.py
+++ /dev/null
@@ -1,81 +0,0 @@
-"""Iceberg catalog configuration for filesystem-based tables."""
-
-from pathlib import Path
-from typing import Any
-
-from pyiceberg.catalog import Catalog, load_catalog
-from pyiceberg.catalog.sql import SqlCatalog
-
-
-def create_filesystem_catalog(
-    name: str = "cassandra_export",
-    warehouse_path: str | Path | None = None,
-) -> Catalog:
-    """Create a filesystem-based Iceberg catalog.
-
-    What this does:
-    --------------
-    1. Creates a local filesystem catalog using SQLite
-    2. Stores table metadata in SQLite database
-    3. Stores actual data files in warehouse directory
-    4. No external dependencies (S3, Hive, etc.)
-
-    Why this matters:
-    ----------------
-    - Simple setup for development and testing
-    - No cloud dependencies
-    - Easy to inspect and debug
-    - Can be migrated to production catalogs later
-
-    Args:
-        name: Catalog name
-        warehouse_path: Path to warehouse directory (default: ./iceberg_warehouse)
-
-    Returns:
-        Iceberg catalog instance
-    """
-    if warehouse_path is None:
-        warehouse_path = Path.cwd() / "iceberg_warehouse"
-    else:
-        warehouse_path = Path(warehouse_path)
-
-    # Create warehouse directory if it doesn't exist
-    warehouse_path.mkdir(parents=True, exist_ok=True)
-
-    # SQLite catalog configuration
-    catalog_config = {
-        "type": "sql",
-        "uri": f"sqlite:///{warehouse_path / 'catalog.db'}",
-        "warehouse": str(warehouse_path),
-    }
-
-    # Create catalog
-    catalog = SqlCatalog(name, **catalog_config)
-
-    return catalog
-
-
-def get_or_create_catalog(
-    catalog_name: str = "cassandra_export",
-    warehouse_path: str | Path | None = None,
-    config: dict[str, Any] | None = None,
-) -> Catalog:
-    """Get existing catalog or create a new one.
-
-    This allows for custom catalog configurations while providing
-    sensible defaults for filesystem-based catalogs.
-
-    Args:
-        catalog_name: Name of the catalog
-        warehouse_path: Path to warehouse (for filesystem catalogs)
-        config: Custom catalog configuration (overrides defaults)
-
-    Returns:
-        Iceberg catalog instance
-    """
-    if config is not None:
-        # Use custom configuration
-        return load_catalog(catalog_name, **config)
-    else:
-        # Use filesystem catalog
-        return create_filesystem_catalog(catalog_name, warehouse_path)
diff --git a/libs/async-cassandra-bulk/examples/bulk_operations/iceberg/exporter.py b/libs/async-cassandra-bulk/examples/bulk_operations/iceberg/exporter.py
deleted file mode 100644
index cd6cb7a..0000000
--- a/libs/async-cassandra-bulk/examples/bulk_operations/iceberg/exporter.py
+++ /dev/null
@@ -1,376 +0,0 @@
-"""Export Cassandra data to Apache Iceberg tables."""
-
-import asyncio
-import contextlib
-import uuid
-from datetime import UTC, datetime
-from pathlib import Path
-from typing import Any
-
-import pyarrow as pa
-import pyarrow.parquet as pq
-from pyiceberg.catalog import Catalog
-from pyiceberg.exceptions import NoSuchTableError
-from pyiceberg.partitioning import PartitionSpec
-from pyiceberg.schema import Schema
-from pyiceberg.table import Table
-
-from bulk_operations.exporters.base import ExportFormat, ExportProgress
-from bulk_operations.exporters.parquet_exporter import ParquetExporter
-from bulk_operations.iceberg.catalog import get_or_create_catalog
-from bulk_operations.iceberg.schema_mapper import CassandraToIcebergSchemaMapper
-
-
-class IcebergExporter(ParquetExporter):
-    """Export Cassandra data to Apache Iceberg tables.
-
-    This exporter extends the Parquet exporter to write data in Iceberg format,
-    enabling advanced data lakehouse features like ACID transactions, time travel,
-    and schema evolution.
-
-    What this does:
-    --------------
-    1. Creates Iceberg tables from Cassandra schemas
-    2. Writes data as Parquet files in Iceberg format
-    3. Updates Iceberg metadata and manifests
-    4. Supports partitioning strategies
-    5. Enables time travel and version history
-
-    Why this matters:
-    ----------------
-    - ACID transactions on exported data
-    - Schema evolution without rewriting data
-    - Time travel queries ("SELECT * FROM table AS OF timestamp")
-    - Hidden partitioning for better performance
-    - Integration with modern data tools (Spark, Trino, etc.)
-    """
-
-    def __init__(
-        self,
-        operator,
-        catalog: Catalog | None = None,
-        catalog_config: dict[str, Any] | None = None,
-        warehouse_path: str | Path | None = None,
-        compression: str = "snappy",
-        row_group_size: int = 100000,
-        buffer_size: int = 8192,
-    ):
-        """Initialize Iceberg exporter.
-
-        Args:
-            operator: Token-aware bulk operator instance
-            catalog: Pre-configured Iceberg catalog (optional)
-            catalog_config: Custom catalog configuration (optional)
-            warehouse_path: Path to Iceberg warehouse (for filesystem catalog)
-            compression: Parquet compression codec
-            row_group_size: Rows per Parquet row group
-            buffer_size: Buffer size for file operations
-        """
-        super().__init__(
-            operator=operator,
-            compression=compression,
-            row_group_size=row_group_size,
-            use_dictionary=True,
-            buffer_size=buffer_size,
-        )
-
-        # Set up catalog
-        if catalog is not None:
-            self.catalog = catalog
-        else:
-            self.catalog = get_or_create_catalog(
-                catalog_name="cassandra_export",
-                warehouse_path=warehouse_path,
-                config=catalog_config,
-            )
-
-        self.schema_mapper = CassandraToIcebergSchemaMapper()
-        self._current_table: Table | None = None
-        self._data_files: list[str] = []
-
-    async def export(
-        self,
-        keyspace: str,
-        table: str,
-        output_path: Path | None = None,  # Not used, Iceberg manages paths
-        namespace: str | None = None,
-        table_name: str | None = None,
-        partition_spec: PartitionSpec | None = None,
-        table_properties: dict[str, str] | None = None,
-        columns: list[str] | None = None,
-        split_count: int | None = None,
-        parallelism: int | None = None,
-        progress: ExportProgress | None = None,
-        progress_callback: Any | None = None,
-    ) -> ExportProgress:
-        """Export Cassandra table to Iceberg format.
-
-        Args:
-            keyspace: Cassandra keyspace
-            table: Cassandra table name
-            output_path: Not used - Iceberg manages file paths
-            namespace: Iceberg namespace (default: cassandra keyspace)
-            table_name: Iceberg table name (default: cassandra table name)
-            partition_spec: Iceberg partition specification
-            table_properties: Additional Iceberg table properties
-            columns: Columns to export (default: all)
-            split_count: Number of token range splits
-            parallelism: Max concurrent operations
-            progress: Resume progress (optional)
-            progress_callback: Progress callback function
-
-        Returns:
-            Export progress with Iceberg-specific metadata
-        """
-        # Use Cassandra names as defaults
-        if namespace is None:
-            namespace = keyspace
-        if table_name is None:
-            table_name = table
-
-        # Get Cassandra table metadata
-        metadata = self.operator.session._session.cluster.metadata
-        keyspace_metadata = metadata.keyspaces.get(keyspace)
-        if not keyspace_metadata:
-            raise ValueError(f"Keyspace '{keyspace}' not found")
-        table_metadata = keyspace_metadata.tables.get(table)
-        if not table_metadata:
-            raise ValueError(f"Table '{keyspace}.{table}' not found")
-
-        # Create or get Iceberg table
-        iceberg_schema = self.schema_mapper.map_table_schema(table_metadata)
-        self._current_table = await self._get_or_create_iceberg_table(
-            namespace=namespace,
-            table_name=table_name,
-            schema=iceberg_schema,
-            partition_spec=partition_spec,
-            table_properties=table_properties,
-        )
-
-        # Initialize progress
-        if progress is None:
-            progress = ExportProgress(
-                export_id=str(uuid.uuid4()),
-                keyspace=keyspace,
-                table=table,
-                format=ExportFormat.PARQUET,  # Iceberg uses Parquet format
-                output_path=f"iceberg://{namespace}.{table_name}",
-                started_at=datetime.now(UTC),
-                metadata={
-                    "iceberg_namespace": namespace,
-                    "iceberg_table": table_name,
-                    "catalog": self.catalog.name,
-                    "compression": self.compression,
-                    "row_group_size": self.row_group_size,
-                },
-            )
-
-        # Reset data files list
-        self._data_files = []
-
-        try:
-            # Export data using token ranges
-            await self._export_by_ranges(
-                keyspace=keyspace,
-                table=table,
-                columns=columns,
-                split_count=split_count,
-                parallelism=parallelism,
-                progress=progress,
-                progress_callback=progress_callback,
-            )
-
-            # Commit data files to Iceberg table
-            if self._data_files:
-                await self._commit_data_files()
-
-            # Update progress
-            progress.completed_at = datetime.now(UTC)
-            progress.metadata["data_files"] = len(self._data_files)
-            progress.metadata["iceberg_snapshot"] = (
-                self._current_table.current_snapshot().snapshot_id
-                if self._current_table.current_snapshot()
-                else None
-            )
-
-            # Final callback
-            if progress_callback:
-                if asyncio.iscoroutinefunction(progress_callback):
-                    await progress_callback(progress)
-                else:
-                    progress_callback(progress)
-
-        except Exception as e:
-            progress.errors.append(str(e))
-            raise
-
-        # Save progress
-        progress.save()
-        return progress
-
-    async def _get_or_create_iceberg_table(
-        self,
-        namespace: str,
-        table_name: str,
-        schema: Schema,
-        partition_spec: PartitionSpec | None = None,
-        table_properties: dict[str, str] | None = None,
-    ) -> Table:
-        """Get existing Iceberg table or create a new one.
-
-        Args:
-            namespace: Iceberg namespace
-            table_name: Table name
-            schema: Iceberg schema
-            partition_spec: Partition specification (optional)
-            table_properties: Table properties (optional)
-
-        Returns:
-            Iceberg Table instance
-        """
-        table_identifier = f"{namespace}.{table_name}"
-
-        try:
-            # Try to load existing table
-            table = self.catalog.load_table(table_identifier)
-
-            # TODO: Implement schema evolution check
-            # For now, we'll append to existing tables
-
-            return table
-
-        except NoSuchTableError:
-            # Create new table
-            if table_properties is None:
-                table_properties = {}
-
-            # Add default properties
-            table_properties.setdefault("write.format.default", "parquet")
-            table_properties.setdefault("write.parquet.compression-codec", self.compression)
-
-            # Create namespace if it doesn't exist
-            with contextlib.suppress(Exception):
-                self.catalog.create_namespace(namespace)
-
-            # Create table
-            table = self.catalog.create_table(
-                identifier=table_identifier,
-                schema=schema,
-                partition_spec=partition_spec,
-                properties=table_properties,
-            )
-
-            return table
-
-    async def _export_by_ranges(
-        self,
-        keyspace: str,
-        table: str,
-        columns: list[str] | None,
-        split_count: int | None,
-        parallelism: int | None,
-        progress: ExportProgress,
-        progress_callback: Any | None,
-    ) -> None:
-        """Export data by token ranges to multiple Parquet files."""
-        # Build Arrow schema for the data
-        table_meta = await self._get_table_metadata(keyspace, table)
-
-        if columns is None:
-            columns = list(table_meta.columns.keys())
-
-        self._schema = self._build_arrow_schema(table_meta, columns)
-
-        # Export each token range to a separate file
-        file_index = 0
-
-        async for row in self.operator.export_by_token_ranges(
-            keyspace=keyspace,
-            table=table,
-            split_count=split_count,
-            parallelism=parallelism,
-        ):
-            # Add row to batch
-            row_data = self._convert_row_to_dict(row, columns)
-            self._batch_rows.append(row_data)
-
-            # Write batch when full
-            if len(self._batch_rows) >= self.row_group_size:
-                file_path = await self._write_data_file(file_index)
-                self._data_files.append(str(file_path))
-                file_index += 1
-
-            progress.rows_exported += 1
-
-            # Progress callback
-            if progress_callback and progress.rows_exported % 1000 == 0:
-                if asyncio.iscoroutinefunction(progress_callback):
-                    await progress_callback(progress)
-                else:
-                    progress_callback(progress)
-
-        # Write final batch
-        if self._batch_rows:
-            file_path = await self._write_data_file(file_index)
-            self._data_files.append(str(file_path))
-
-    async def _write_data_file(self, file_index: int) -> Path:
-        """Write a batch of rows to a Parquet data file.
-
-        Args:
-            file_index: Index for file naming
-
-        Returns:
-            Path to the written file
-        """
-        if not self._batch_rows:
-            raise ValueError("No data to write")
-
-        # Generate file path in Iceberg data directory
-        # Format: data/part-{index}-{uuid}.parquet
-        file_name = f"part-{file_index:05d}-{uuid.uuid4()}.parquet"
-        file_path = Path(self._current_table.location()) / "data" / file_name
-
-        # Ensure directory exists
-        file_path.parent.mkdir(parents=True, exist_ok=True)
-
-        # Convert to Arrow table
-        table = pa.Table.from_pylist(self._batch_rows, schema=self._schema)
-
-        # Write Parquet file
-        pq.write_table(
-            table,
-            file_path,
-            compression=self.compression,
-            use_dictionary=self.use_dictionary,
-        )
-
-        # Clear batch
-        self._batch_rows = []
-
-        return file_path
-
-    async def _commit_data_files(self) -> None:
-        """Commit data files to Iceberg table as a new snapshot."""
-        # This is a simplified version - in production, you'd use
-        # proper Iceberg APIs to add data files with statistics
-
-        # For now, we'll just note that files were written
-        # The full implementation would:
-        # 1. Collect file statistics (row count, column bounds, etc.)
-        # 2. Create DataFile objects
-        # 3. Append files to table using transaction API
-
-        # TODO: Implement proper Iceberg commit
-        pass
-
-    async def _get_table_metadata(self, keyspace: str, table: str):
-        """Get Cassandra table metadata."""
-        metadata = self.operator.session._session.cluster.metadata
-        keyspace_metadata = metadata.keyspaces.get(keyspace)
-        if not keyspace_metadata:
-            raise ValueError(f"Keyspace '{keyspace}' not found")
-        table_metadata = keyspace_metadata.tables.get(table)
-        if not table_metadata:
-            raise ValueError(f"Table '{keyspace}.{table}' not found")
-        return table_metadata
diff --git a/libs/async-cassandra-bulk/examples/bulk_operations/iceberg/schema_mapper.py b/libs/async-cassandra-bulk/examples/bulk_operations/iceberg/schema_mapper.py
deleted file mode 100644
index b9c42e3..0000000
--- a/libs/async-cassandra-bulk/examples/bulk_operations/iceberg/schema_mapper.py
+++ /dev/null
@@ -1,196 +0,0 @@
-"""Maps Cassandra table schemas to Iceberg schemas."""
-
-from cassandra.metadata import ColumnMetadata, TableMetadata
-from pyiceberg.schema import Schema
-from pyiceberg.types import (
-    BinaryType,
-    BooleanType,
-    DateType,
-    DecimalType,
-    DoubleType,
-    FloatType,
-    IcebergType,
-    IntegerType,
-    ListType,
-    LongType,
-    MapType,
-    NestedField,
-    StringType,
-    TimestamptzType,
-)
-
-
-class CassandraToIcebergSchemaMapper:
-    """Maps Cassandra table schemas to Apache Iceberg schemas.
-
-    What this does:
-    --------------
-    1. Converts CQL types to Iceberg types
-    2. Preserves column nullability
-    3. Handles complex types (lists, sets, maps)
-    4. Assigns unique field IDs for schema evolution
-
-    Why this matters:
-    ----------------
-    - Enables seamless data migration from Cassandra to Iceberg
-    - Preserves type information for analytics
-    - Supports schema evolution in Iceberg
-    - Maintains data integrity during export
-    """
-
-    def __init__(self):
-        """Initialize the schema mapper."""
-        self._field_id_counter = 1
-
-    def map_table_schema(self, table_metadata: TableMetadata) -> Schema:
-        """Map a Cassandra table schema to an Iceberg schema.
-
-        Args:
-            table_metadata: Cassandra table metadata
-
-        Returns:
-            Iceberg Schema object
-        """
-        fields = []
-
-        # Map each column
-        for column_name, column_meta in table_metadata.columns.items():
-            field = self._map_column(column_name, column_meta)
-            fields.append(field)
-
-        return Schema(*fields)
-
-    def _map_column(self, name: str, column_meta: ColumnMetadata) -> NestedField:
-        """Map a single Cassandra column to an Iceberg field.
-
-        Args:
-            name: Column name
-            column_meta: Cassandra column metadata
-
-        Returns:
-            Iceberg NestedField
-        """
-        # Get the Iceberg type
-        iceberg_type = self._map_cql_type(column_meta.cql_type)
-
-        # Create field with unique ID
-        field_id = self._get_next_field_id()
-
-        # In Cassandra, primary key columns are required (not null)
-        # All other columns are nullable
-        is_required = column_meta.is_primary_key
-
-        return NestedField(
-            field_id=field_id,
-            name=name,
-            field_type=iceberg_type,
-            required=is_required,
-        )
-
-    def _map_cql_type(self, cql_type: str) -> IcebergType:
-        """Map a CQL type string to an Iceberg type.
-
-        Args:
-            cql_type: CQL type string (e.g., "text", "int", "list<text>")
-
-        Returns:
-            Iceberg Type
-        """
-        # Handle parameterized types
-        base_type = cql_type.split("<")[0].lower()
-
-        # Simple type mappings
-        type_mapping = {
-            # String types
-            "ascii": StringType(),
-            "text": StringType(),
-            "varchar": StringType(),
-            # Numeric types
-            "tinyint": IntegerType(),  # 8-bit in Cassandra, 32-bit in Iceberg
-            "smallint": IntegerType(),  # 16-bit in Cassandra, 32-bit in Iceberg
-            "int": IntegerType(),
-            "bigint": LongType(),
-            "counter": LongType(),
-            "varint": DecimalType(38, 0),  # Arbitrary precision integer
-            "decimal": DecimalType(38, 10),  # Default precision/scale
-            "float": FloatType(),
-            "double": DoubleType(),
-            # Boolean
-            "boolean": BooleanType(),
-            # Date/Time types
-            "date": DateType(),
-            "timestamp": TimestamptzType(),  # Cassandra timestamps have timezone
-            "time": LongType(),  # Time as nanoseconds since midnight
-            # Binary
-            "blob": BinaryType(),
-            # UUID types
-            "uuid": StringType(),  # Store as string for compatibility
-            "timeuuid": StringType(),
-            # Network
-            "inet": StringType(),  # IP address as string
-        }
-
-        # Handle simple types
-        if base_type in type_mapping:
-            return type_mapping[base_type]
-
-        # Handle collection types
-        if base_type == "list":
-            element_type = self._extract_collection_type(cql_type)
-            return ListType(
-                element_id=self._get_next_field_id(),
-                element_type=self._map_cql_type(element_type),
-                element_required=False,  # Cassandra allows null elements
-            )
-        elif base_type == "set":
-            # Sets become lists in Iceberg (no native set type)
-            element_type = self._extract_collection_type(cql_type)
-            return ListType(
-                element_id=self._get_next_field_id(),
-                element_type=self._map_cql_type(element_type),
-                element_required=False,
-            )
-        elif base_type == "map":
-            key_type, value_type = self._extract_map_types(cql_type)
-            return MapType(
-                key_id=self._get_next_field_id(),
-                key_type=self._map_cql_type(key_type),
-                value_id=self._get_next_field_id(),
-                value_type=self._map_cql_type(value_type),
-                value_required=False,  # Cassandra allows null values
-            )
-        elif base_type == "tuple":
-            # Tuples become structs in Iceberg
-            # For now, we'll use a string representation
-            # TODO: Implement proper tuple parsing
-            return StringType()
-        elif base_type == "frozen":
-            # Frozen collections - strip "frozen" and process inner type
-            inner_type = cql_type[7:-1]  # Remove "frozen<" and ">"
-            return self._map_cql_type(inner_type)
-        else:
-            # Default to string for unknown types
-            return StringType()
-
-    def _extract_collection_type(self, cql_type: str) -> str:
-        """Extract element type from list<type> or set<type>."""
-        start = cql_type.index("<") + 1
-        end = cql_type.rindex(">")
-        return cql_type[start:end].strip()
-
-    def _extract_map_types(self, cql_type: str) -> tuple[str, str]:
-        """Extract key and value types from map<key_type, value_type>."""
-        start = cql_type.index("<") + 1
-        end = cql_type.rindex(">")
-        types = cql_type[start:end].split(",", 1)
-        return types[0].strip(), types[1].strip()
-
-    def _get_next_field_id(self) -> int:
-        """Get the next available field ID."""
-        field_id = self._field_id_counter
-        self._field_id_counter += 1
-        return field_id
-
-    def reset_field_ids(self) -> None:
-        """Reset field ID counter (useful for testing)."""
-        self._field_id_counter = 1
diff --git a/libs/async-cassandra-bulk/examples/bulk_operations/parallel_export.py b/libs/async-cassandra-bulk/examples/bulk_operations/parallel_export.py
deleted file mode 100644
index 22f0e1c..0000000
--- a/libs/async-cassandra-bulk/examples/bulk_operations/parallel_export.py
+++ /dev/null
@@ -1,203 +0,0 @@
-"""
-Parallel export implementation for production-grade bulk operations.
-
-This module provides a truly parallel export capability that streams data
-from multiple token ranges concurrently, similar to DSBulk.
-"""
-
-import asyncio
-from collections.abc import AsyncIterator, Callable
-from typing import Any
-
-from cassandra import ConsistencyLevel
-
-from .stats import BulkOperationStats
-from .token_utils import TokenRange
-
-
-class ParallelExportIterator:
-    """
-    Parallel export iterator that manages concurrent token range queries.
-
-    This implementation uses asyncio queues to coordinate between multiple
-    worker tasks that query different token ranges in parallel.
-    """
-
-    def __init__(
-        self,
-        operator: Any,
-        keyspace: str,
-        table: str,
-        splits: list[TokenRange],
-        prepared_stmts: dict[str, Any],
-        parallelism: int,
-        consistency_level: ConsistencyLevel | None,
-        stats: BulkOperationStats,
-        progress_callback: Callable[[BulkOperationStats], None] | None,
-    ):
-        self.operator = operator
-        self.keyspace = keyspace
-        self.table = table
-        self.splits = splits
-        self.prepared_stmts = prepared_stmts
-        self.parallelism = parallelism
-        self.consistency_level = consistency_level
-        self.stats = stats
-        self.progress_callback = progress_callback
-
-        # Queue for results from parallel workers
-        self.result_queue: asyncio.Queue[tuple[Any, bool]] = asyncio.Queue(maxsize=parallelism * 10)
-        self.workers_done = False
-        self.worker_tasks: list[asyncio.Task] = []
-
-    async def __aiter__(self) -> AsyncIterator[Any]:
-        """Start parallel workers and yield results as they come in."""
-        # Start worker tasks
-        await self._start_workers()
-
-        # Yield results from the queue
-        while True:
-            try:
-                # Wait for results with a timeout to check if workers are done
-                row, is_end_marker = await asyncio.wait_for(self.result_queue.get(), timeout=0.1)
-
-                if is_end_marker:
-                    # This was an end marker from a worker
-                    continue
-
-                yield row
-
-            except TimeoutError:
-                # Check if all workers are done
-                if self.workers_done and self.result_queue.empty():
-                    break
-                continue
-            except Exception:
-                # Cancel all workers on error
-                await self._cancel_workers()
-                raise
-
-    async def _start_workers(self) -> None:
-        """Start parallel worker tasks to process token ranges."""
-        # Create a semaphore to limit concurrent queries
-        semaphore = asyncio.Semaphore(self.parallelism)
-
-        # Create worker tasks for each split
-        for split in self.splits:
-            task = asyncio.create_task(self._process_split(split, semaphore))
-            self.worker_tasks.append(task)
-
-        # Create a task to monitor when all workers are done
-        asyncio.create_task(self._monitor_workers())
-
-    async def _monitor_workers(self) -> None:
-        """Monitor worker tasks and signal when all are complete."""
-        try:
-            # Wait for all workers to complete
-            await asyncio.gather(*self.worker_tasks, return_exceptions=True)
-        finally:
-            self.workers_done = True
-            # Put a final marker to unblock the iterator if needed
-            await self.result_queue.put((None, True))
-
-    async def _cancel_workers(self) -> None:
-        """Cancel all worker tasks."""
-        for task in self.worker_tasks:
-            if not task.done():
-                task.cancel()
-
-        # Wait for cancellation to complete
-        await asyncio.gather(*self.worker_tasks, return_exceptions=True)
-
-    async def _process_split(self, split: TokenRange, semaphore: asyncio.Semaphore) -> None:
-        """Process a single token range split."""
-        async with semaphore:
-            try:
-                if split.end < split.start:
-                    # Wraparound range - process in two parts
-                    await self._query_and_queue(
-                        self.prepared_stmts["select_wraparound_gt"], (split.start,)
-                    )
-                    await self._query_and_queue(
-                        self.prepared_stmts["select_wraparound_lte"], (split.end,)
-                    )
-                else:
-                    # Normal range
-                    await self._query_and_queue(
-                        self.prepared_stmts["select_range"], (split.start, split.end)
-                    )
-
-                # Update stats
-                self.stats.ranges_completed += 1
-                if self.progress_callback:
-                    self.progress_callback(self.stats)
-
-            except Exception as e:
-                # Add error to stats but don't fail the whole export
-                self.stats.errors.append(e)
-                # Put an end marker to signal this worker is done
-                await self.result_queue.put((None, True))
-                raise
-
-            # Signal this worker is done
-            await self.result_queue.put((None, True))
-
-    async def _query_and_queue(self, stmt: Any, params: tuple) -> None:
-        """Execute a query and queue all results."""
-        # Set consistency level if provided
-        if self.consistency_level is not None:
-            stmt.consistency_level = self.consistency_level
-
-        # Execute streaming query
-        async with await self.operator.session.execute_stream(stmt, params) as result:
-            async for row in result:
-                self.stats.rows_processed += 1
-                # Queue the row for the main iterator
-                await self.result_queue.put((row, False))
-
-
-async def export_by_token_ranges_parallel(
-    operator: Any,
-    keyspace: str,
-    table: str,
-    splits: list[TokenRange],
-    prepared_stmts: dict[str, Any],
-    parallelism: int,
-    consistency_level: ConsistencyLevel | None,
-    stats: BulkOperationStats,
-    progress_callback: Callable[[BulkOperationStats], None] | None,
-) -> AsyncIterator[Any]:
-    """
-    Export rows from token ranges in parallel.
-
-    This function creates a parallel export iterator that manages multiple
-    concurrent queries to different token ranges, similar to how DSBulk works.
-
-    Args:
-        operator: The bulk operator instance
-        keyspace: Keyspace name
-        table: Table name
-        splits: List of token ranges to query
-        prepared_stmts: Prepared statements for queries
-        parallelism: Maximum concurrent queries
-        consistency_level: Consistency level for queries
-        stats: Statistics object to update
-        progress_callback: Optional progress callback
-
-    Yields:
-        Rows from the table, streamed as they arrive from parallel queries
-    """
-    iterator = ParallelExportIterator(
-        operator=operator,
-        keyspace=keyspace,
-        table=table,
-        splits=splits,
-        prepared_stmts=prepared_stmts,
-        parallelism=parallelism,
-        consistency_level=consistency_level,
-        stats=stats,
-        progress_callback=progress_callback,
-    )
-
-    async for row in iterator:
-        yield row
diff --git a/libs/async-cassandra-bulk/examples/bulk_operations/stats.py b/libs/async-cassandra-bulk/examples/bulk_operations/stats.py
deleted file mode 100644
index 6f576d0..0000000
--- a/libs/async-cassandra-bulk/examples/bulk_operations/stats.py
+++ /dev/null
@@ -1,43 +0,0 @@
-"""Statistics tracking for bulk operations."""
-
-import time
-from dataclasses import dataclass, field
-
-
-@dataclass
-class BulkOperationStats:
-    """Statistics for bulk operations."""
-
-    rows_processed: int = 0
-    ranges_completed: int = 0
-    total_ranges: int = 0
-    start_time: float = field(default_factory=time.time)
-    end_time: float | None = None
-    errors: list[Exception] = field(default_factory=list)
-
-    @property
-    def duration_seconds(self) -> float:
-        """Calculate operation duration."""
-        if self.end_time:
-            return self.end_time - self.start_time
-        return time.time() - self.start_time
-
-    @property
-    def rows_per_second(self) -> float:
-        """Calculate processing rate."""
-        duration = self.duration_seconds
-        if duration > 0:
-            return self.rows_processed / duration
-        return 0
-
-    @property
-    def progress_percentage(self) -> float:
-        """Calculate progress as percentage."""
-        if self.total_ranges > 0:
-            return (self.ranges_completed / self.total_ranges) * 100
-        return 0
-
-    @property
-    def is_complete(self) -> bool:
-        """Check if operation is complete."""
-        return self.ranges_completed == self.total_ranges
diff --git a/libs/async-cassandra-bulk/examples/bulk_operations/token_utils.py b/libs/async-cassandra-bulk/examples/bulk_operations/token_utils.py
deleted file mode 100644
index 29c0c1a..0000000
--- a/libs/async-cassandra-bulk/examples/bulk_operations/token_utils.py
+++ /dev/null
@@ -1,185 +0,0 @@
-"""
-Token range utilities for bulk operations.
-
-Handles token range discovery, splitting, and query generation.
-"""
-
-from dataclasses import dataclass
-
-from async_cassandra import AsyncCassandraSession
-
-# Murmur3 token range boundaries
-MIN_TOKEN = -(2**63)  # -9223372036854775808
-MAX_TOKEN = 2**63 - 1  # 9223372036854775807
-TOTAL_TOKEN_RANGE = 2**64 - 1  # Total range size
-
-
-@dataclass
-class TokenRange:
-    """Represents a token range with replica information."""
-
-    start: int
-    end: int
-    replicas: list[str]
-
-    @property
-    def size(self) -> int:
-        """Calculate the size of this token range."""
-        if self.end >= self.start:
-            return self.end - self.start
-        else:
-            # Handle wraparound (e.g., 9223372036854775800 to -9223372036854775800)
-            return (MAX_TOKEN - self.start) + (self.end - MIN_TOKEN) + 1
-
-    @property
-    def fraction(self) -> float:
-        """Calculate what fraction of the total ring this range represents."""
-        return self.size / TOTAL_TOKEN_RANGE
-
-
-class TokenRangeSplitter:
-    """Splits token ranges for parallel processing."""
-
-    def split_single_range(self, token_range: TokenRange, split_count: int) -> list[TokenRange]:
-        """Split a single token range into approximately equal parts."""
-        if split_count <= 1:
-            return [token_range]
-
-        # Calculate split size
-        split_size = token_range.size // split_count
-        if split_size < 1:
-            # Range too small to split further
-            return [token_range]
-
-        splits = []
-        current_start = token_range.start
-
-        for i in range(split_count):
-            if i == split_count - 1:
-                # Last split gets any remainder
-                current_end = token_range.end
-            else:
-                current_end = current_start + split_size
-                # Handle potential overflow
-                if current_end > MAX_TOKEN:
-                    current_end = current_end - TOTAL_TOKEN_RANGE
-
-            splits.append(
-                TokenRange(start=current_start, end=current_end, replicas=token_range.replicas)
-            )
-
-            current_start = current_end
-
-        return splits
-
-    def split_proportionally(
-        self, ranges: list[TokenRange], target_splits: int
-    ) -> list[TokenRange]:
-        """Split ranges proportionally based on their size."""
-        if not ranges:
-            return []
-
-        # Calculate total size
-        total_size = sum(r.size for r in ranges)
-
-        all_splits = []
-        for token_range in ranges:
-            # Calculate number of splits for this range
-            range_fraction = token_range.size / total_size
-            range_splits = max(1, round(range_fraction * target_splits))
-
-            # Split the range
-            splits = self.split_single_range(token_range, range_splits)
-            all_splits.extend(splits)
-
-        return all_splits
-
-    def cluster_by_replicas(
-        self, ranges: list[TokenRange]
-    ) -> dict[tuple[str, ...], list[TokenRange]]:
-        """Group ranges by their replica sets."""
-        clusters: dict[tuple[str, ...], list[TokenRange]] = {}
-
-        for token_range in ranges:
-            # Use sorted tuple as key for consistency
-            replica_key = tuple(sorted(token_range.replicas))
-            if replica_key not in clusters:
-                clusters[replica_key] = []
-            clusters[replica_key].append(token_range)
-
-        return clusters
-
-
-async def discover_token_ranges(session: AsyncCassandraSession, keyspace: str) -> list[TokenRange]:
-    """Discover token ranges from cluster metadata."""
-    # Access cluster through the underlying sync session
-    cluster = session._session.cluster
-    metadata = cluster.metadata
-    token_map = metadata.token_map
-
-    if not token_map:
-        raise RuntimeError("Token map not available")
-
-    # Get all tokens from the ring
-    all_tokens = sorted(token_map.ring)
-    if not all_tokens:
-        raise RuntimeError("No tokens found in ring")
-
-    ranges = []
-
-    # Create ranges from consecutive tokens
-    for i in range(len(all_tokens)):
-        start_token = all_tokens[i]
-        # Wrap around to first token for the last range
-        end_token = all_tokens[(i + 1) % len(all_tokens)]
-
-        # Handle wraparound - last range goes from last token to first token
-        if i == len(all_tokens) - 1:
-            # This is the wraparound range
-            start = start_token.value
-            end = all_tokens[0].value
-        else:
-            start = start_token.value
-            end = end_token.value
-
-        # Get replicas for this token
-        replicas = token_map.get_replicas(keyspace, start_token)
-        replica_addresses = [str(r.address) for r in replicas]
-
-        ranges.append(TokenRange(start=start, end=end, replicas=replica_addresses))
-
-    return ranges
-
-
-def generate_token_range_query(
-    keyspace: str,
-    table: str,
-    partition_keys: list[str],
-    token_range: TokenRange,
-    columns: list[str] | None = None,
-) -> str:
-    """Generate a CQL query for a specific token range.
-
-    Note: This function assumes non-wraparound ranges. Wraparound ranges
-    (where end < start) should be handled by the caller by splitting them
-    into two separate queries.
-    """
-    # Column selection
-    column_list = ", ".join(columns) if columns else "*"
-
-    # Partition key list for token function
-    pk_list = ", ".join(partition_keys)
-
-    # Generate token condition
-    if token_range.start == MIN_TOKEN:
-        # First range uses >= to include minimum token
-        token_condition = (
-            f"token({pk_list}) >= {token_range.start} AND token({pk_list}) <= {token_range.end}"
-        )
-    else:
-        # All other ranges use > to avoid duplicates
-        token_condition = (
-            f"token({pk_list}) > {token_range.start} AND token({pk_list}) <= {token_range.end}"
-        )
-
-    return f"SELECT {column_list} FROM {keyspace}.{table} WHERE {token_condition}"
diff --git a/libs/async-cassandra-bulk/examples/debug_coverage.py b/libs/async-cassandra-bulk/examples/debug_coverage.py
deleted file mode 100644
index ca8c781..0000000
--- a/libs/async-cassandra-bulk/examples/debug_coverage.py
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/usr/bin/env python3
-"""Debug token range coverage issue."""
-
-import asyncio
-
-from async_cassandra import AsyncCluster
-from bulk_operations.bulk_operator import TokenAwareBulkOperator
-from bulk_operations.token_utils import MIN_TOKEN, discover_token_ranges, generate_token_range_query
-
-
-async def debug_coverage():
-    """Debug why we're missing rows."""
-    print("Debugging token range coverage...")
-
-    async with AsyncCluster(contact_points=["localhost"]) as cluster:
-        session = await cluster.connect()
-
-        # First, let's see what tokens our test data actually has
-        print("\nChecking token distribution of test data...")
-
-        # Get a sample of tokens
-        result = await session.execute(
-            """
-            SELECT id, token(id) as token_value
-            FROM bulk_test.test_data
-            LIMIT 20
-        """
-        )
-
-        print("Sample tokens:")
-        for row in result:
-            print(f"  ID {row.id}: token = {row.token_value}")
-
-        # Get min and max tokens in our data
-        result = await session.execute(
-            """
-            SELECT MIN(token(id)) as min_token, MAX(token(id)) as max_token
-            FROM bulk_test.test_data
-        """
-        )
-        row = result.one()
-        print(f"\nActual token range in data: {row.min_token} to {row.max_token}")
-        print(f"MIN_TOKEN constant: {MIN_TOKEN}")
-
-        # Now let's see our token ranges
-        ranges = await discover_token_ranges(session, "bulk_test")
-        sorted_ranges = sorted(ranges, key=lambda r: r.start)
-
-        print("\nFirst 5 token ranges:")
-        for i, r in enumerate(sorted_ranges[:5]):
-            print(f"  Range {i}: {r.start} to {r.end}")
-
-        # Check if any of our data falls outside the discovered ranges
-        print("\nChecking for data outside discovered ranges...")
-
-        # Find the range that should contain MIN_TOKEN
-        min_token_range = None
-        for r in sorted_ranges:
-            if r.start <= row.min_token <= r.end:
-                min_token_range = r
-                break
-
-        if min_token_range:
-            print(
-                f"Range containing minimum data token: {min_token_range.start} to {min_token_range.end}"
-            )
-        else:
-            print("WARNING: No range found containing minimum data token!")
-
-        # Let's also check if we have the wraparound issue
-        print(f"\nLast range: {sorted_ranges[-1].start} to {sorted_ranges[-1].end}")
-        print(f"First range: {sorted_ranges[0].start} to {sorted_ranges[0].end}")
-
-        # The issue might be with how we handle the wraparound
-        # In Cassandra's token ring, the last range wraps to the first
-        # Let's verify this
-        if sorted_ranges[-1].end != sorted_ranges[0].start:
-            print(
-                f"WARNING: Ring not properly closed! Last end: {sorted_ranges[-1].end}, First start: {sorted_ranges[0].start}"
-            )
-
-        # Test the actual queries
-        print("\nTesting actual token range queries...")
-        operator = TokenAwareBulkOperator(session)
-
-        # Get table metadata
-        table_meta = await operator._get_table_metadata("bulk_test", "test_data")
-        partition_keys = [col.name for col in table_meta.partition_key]
-
-        # Test first range query
-        first_query = generate_token_range_query(
-            "bulk_test", "test_data", partition_keys, sorted_ranges[0]
-        )
-        print(f"\nFirst range query: {first_query}")
-        count_query = first_query.replace("SELECT *", "SELECT COUNT(*)")
-        result = await session.execute(count_query)
-        print(f"Rows in first range: {result.one()[0]}")
-
-        # Test last range query
-        last_query = generate_token_range_query(
-            "bulk_test", "test_data", partition_keys, sorted_ranges[-1]
-        )
-        print(f"\nLast range query: {last_query}")
-        count_query = last_query.replace("SELECT *", "SELECT COUNT(*)")
-        result = await session.execute(count_query)
-        print(f"Rows in last range: {result.one()[0]}")
-
-
-if __name__ == "__main__":
-    try:
-        asyncio.run(debug_coverage())
-    except Exception as e:
-        print(f"Error: {e}")
-        import traceback
-
-        traceback.print_exc()
diff --git a/libs/async-cassandra-bulk/examples/docker-compose-single.yml b/libs/async-cassandra-bulk/examples/docker-compose-single.yml
deleted file mode 100644
index 073b12d..0000000
--- a/libs/async-cassandra-bulk/examples/docker-compose-single.yml
+++ /dev/null
@@ -1,46 +0,0 @@
-version: '3.8'
-
-# Single node Cassandra for testing with limited resources
-
-services:
-  cassandra-1:
-    image: cassandra:5.0
-    container_name: bulk-cassandra-1
-    hostname: cassandra-1
-    environment:
-      - CASSANDRA_CLUSTER_NAME=BulkOpsCluster
-      - CASSANDRA_DC=datacenter1
-      - CASSANDRA_ENDPOINT_SNITCH=GossipingPropertyFileSnitch
-      - CASSANDRA_NUM_TOKENS=256
-      - MAX_HEAP_SIZE=1G
-      - JVM_OPTS=-XX:+UseG1GC -XX:G1RSetUpdatingPauseTimePercent=5 -XX:MaxGCPauseMillis=300
-
-    ports:
-      - "9042:9042"
-    volumes:
-      - cassandra1-data:/var/lib/cassandra
-
-    deploy:
-      resources:
-        limits:
-          memory: 2G
-        reservations:
-          memory: 1G
-
-    healthcheck:
-      test: ["CMD-SHELL", "nodetool info | grep -q 'Native Transport active: true' && cqlsh -e 'SELECT now() FROM system.local'"]
-      interval: 30s
-      timeout: 10s
-      retries: 15
-      start_period: 90s
-
-    networks:
-      - cassandra-net
-
-networks:
-  cassandra-net:
-    driver: bridge
-
-volumes:
-  cassandra1-data:
-    driver: local
diff --git a/libs/async-cassandra-bulk/examples/docker-compose.yml b/libs/async-cassandra-bulk/examples/docker-compose.yml
deleted file mode 100644
index 82e571c..0000000
--- a/libs/async-cassandra-bulk/examples/docker-compose.yml
+++ /dev/null
@@ -1,160 +0,0 @@
-version: '3.8'
-
-# Bulk Operations Example - 3-node Cassandra cluster
-# Optimized for token-aware bulk operations testing
-
-services:
-  # First Cassandra node (seed)
-  cassandra-1:
-    image: cassandra:5.0
-    container_name: bulk-cassandra-1
-    hostname: cassandra-1
-    environment:
-      # Cluster configuration
-      - CASSANDRA_CLUSTER_NAME=BulkOpsCluster
-      - CASSANDRA_SEEDS=cassandra-1
-      - CASSANDRA_DC=datacenter1
-      - CASSANDRA_ENDPOINT_SNITCH=GossipingPropertyFileSnitch
-      - CASSANDRA_NUM_TOKENS=256
-
-      # Memory settings (reduced for development)
-      - MAX_HEAP_SIZE=2G
-      - JVM_OPTS=-XX:+UseG1GC -XX:G1RSetUpdatingPauseTimePercent=5 -XX:MaxGCPauseMillis=300
-
-    ports:
-      - "9042:9042"
-    volumes:
-      - cassandra1-data:/var/lib/cassandra
-
-    # Resource limits for stability
-    deploy:
-      resources:
-        limits:
-          memory: 3G
-        reservations:
-          memory: 2G
-
-    healthcheck:
-      test: ["CMD-SHELL", "nodetool info | grep -q 'Native Transport active: true' && cqlsh -e 'SELECT now() FROM system.local'"]
-      interval: 30s
-      timeout: 10s
-      retries: 15
-      start_period: 120s
-
-    networks:
-      - cassandra-net
-
-  # Second Cassandra node
-  cassandra-2:
-    image: cassandra:5.0
-    container_name: bulk-cassandra-2
-    hostname: cassandra-2
-    environment:
-      - CASSANDRA_CLUSTER_NAME=BulkOpsCluster
-      - CASSANDRA_SEEDS=cassandra-1
-      - CASSANDRA_DC=datacenter1
-      - CASSANDRA_ENDPOINT_SNITCH=GossipingPropertyFileSnitch
-      - CASSANDRA_NUM_TOKENS=256
-      - MAX_HEAP_SIZE=2G
-      - JVM_OPTS=-XX:+UseG1GC -XX:G1RSetUpdatingPauseTimePercent=5 -XX:MaxGCPauseMillis=300
-
-    ports:
-      - "9043:9042"
-    volumes:
-      - cassandra2-data:/var/lib/cassandra
-    depends_on:
-      cassandra-1:
-        condition: service_healthy
-
-    deploy:
-      resources:
-        limits:
-          memory: 3G
-        reservations:
-          memory: 2G
-
-    healthcheck:
-      test: ["CMD-SHELL", "nodetool info | grep -q 'Native Transport active: true' && nodetool status | grep -c UN | grep -q 2"]
-      interval: 30s
-      timeout: 10s
-      retries: 15
-      start_period: 120s
-
-    networks:
-      - cassandra-net
-
-  # Third Cassandra node - starts after cassandra-2 to avoid overwhelming the system
-  cassandra-3:
-    image: cassandra:5.0
-    container_name: bulk-cassandra-3
-    hostname: cassandra-3
-    environment:
-      - CASSANDRA_CLUSTER_NAME=BulkOpsCluster
-      - CASSANDRA_SEEDS=cassandra-1
-      - CASSANDRA_DC=datacenter1
-      - CASSANDRA_ENDPOINT_SNITCH=GossipingPropertyFileSnitch
-      - CASSANDRA_NUM_TOKENS=256
-      - MAX_HEAP_SIZE=2G
-      - JVM_OPTS=-XX:+UseG1GC -XX:G1RSetUpdatingPauseTimePercent=5 -XX:MaxGCPauseMillis=300
-
-    ports:
-      - "9044:9042"
-    volumes:
-      - cassandra3-data:/var/lib/cassandra
-    depends_on:
-      cassandra-2:
-        condition: service_healthy
-
-    deploy:
-      resources:
-        limits:
-          memory: 3G
-        reservations:
-          memory: 2G
-
-    healthcheck:
-      test: ["CMD-SHELL", "nodetool info | grep -q 'Native Transport active: true' && nodetool status | grep -c UN | grep -q 3"]
-      interval: 30s
-      timeout: 10s
-      retries: 15
-      start_period: 120s
-
-    networks:
-      - cassandra-net
-
-  # Initialization container - creates keyspace and tables
-  init-cassandra:
-    image: cassandra:5.0
-    container_name: bulk-init
-    depends_on:
-      cassandra-3:
-        condition: service_healthy
-    volumes:
-      - ./scripts/init.cql:/init.cql:ro
-    command: >
-      bash -c "
-        echo 'Waiting for cluster to stabilize...';
-        sleep 15;
-        echo 'Checking cluster status...';
-        until cqlsh cassandra-1 -e 'SELECT now() FROM system.local'; do
-          echo 'Waiting for Cassandra to be ready...';
-          sleep 5;
-        done;
-        echo 'Creating keyspace and tables...';
-        cqlsh cassandra-1 -f /init.cql || echo 'Init script may have already run';
-        echo 'Initialization complete!';
-      "
-    networks:
-      - cassandra-net
-
-networks:
-  cassandra-net:
-    driver: bridge
-
-volumes:
-  cassandra1-data:
-    driver: local
-  cassandra2-data:
-    driver: local
-  cassandra3-data:
-    driver: local
diff --git a/libs/async-cassandra-bulk/examples/example_count.py b/libs/async-cassandra-bulk/examples/example_count.py
deleted file mode 100644
index f8b7b77..0000000
--- a/libs/async-cassandra-bulk/examples/example_count.py
+++ /dev/null
@@ -1,207 +0,0 @@
-#!/usr/bin/env python3
-"""
-Example: Token-aware bulk count operation.
-
-This example demonstrates how to count all rows in a table
-using token-aware parallel processing for maximum performance.
-"""
-
-import asyncio
-import logging
-import time
-
-from rich.console import Console
-from rich.progress import Progress, SpinnerColumn, TimeElapsedColumn
-from rich.table import Table
-
-from async_cassandra import AsyncCluster
-from bulk_operations.bulk_operator import TokenAwareBulkOperator
-
-# Configure logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-
-# Rich console for pretty output
-console = Console()
-
-
-async def count_table_example():
-    """Demonstrate token-aware counting of a large table."""
-
-    # Connect to cluster
-    console.print("[cyan]Connecting to Cassandra cluster...[/cyan]")
-
-    async with AsyncCluster(contact_points=["localhost", "127.0.0.1"], port=9042) as cluster:
-        session = await cluster.connect()
-        # Create test data if needed
-        console.print("[yellow]Setting up test keyspace and table...[/yellow]")
-
-        # Create keyspace
-        await session.execute(
-            """
-        CREATE KEYSPACE IF NOT EXISTS bulk_demo
-        WITH replication = {
-            'class': 'SimpleStrategy',
-            'replication_factor': 3
-        }
-        """
-        )
-
-        # Create table
-        await session.execute(
-            """
-        CREATE TABLE IF NOT EXISTS bulk_demo.large_table (
-            partition_key INT,
-            clustering_key INT,
-            data TEXT,
-            value DOUBLE,
-            PRIMARY KEY (partition_key, clustering_key)
-        )
-        """
-        )
-
-        # Check if we need to insert test data
-        result = await session.execute("SELECT COUNT(*) FROM bulk_demo.large_table LIMIT 1")
-        current_count = result.one().count
-
-        if current_count < 10000:
-            console.print(
-                f"[yellow]Table has {current_count} rows. " f"Inserting test data...[/yellow]"
-            )
-
-            # Insert some test data using prepared statement
-            insert_stmt = await session.prepare(
-                """
-            INSERT INTO bulk_demo.large_table
-            (partition_key, clustering_key, data, value)
-            VALUES (?, ?, ?, ?)
-        """
-            )
-
-            with Progress(
-                SpinnerColumn(),
-                *Progress.get_default_columns(),
-                TimeElapsedColumn(),
-                console=console,
-            ) as progress:
-                task = progress.add_task("[green]Inserting test data...", total=10000)
-
-                for pk in range(100):
-                    for ck in range(100):
-                        await session.execute(
-                            insert_stmt, (pk, ck, f"data-{pk}-{ck}", pk * ck * 0.1)
-                        )
-                        progress.update(task, advance=1)
-
-        # Now demonstrate bulk counting
-        console.print("\n[bold cyan]Token-Aware Bulk Count Demo[/bold cyan]\n")
-
-        operator = TokenAwareBulkOperator(session)
-
-        # Progress tracking
-        stats_list = []
-
-        def progress_callback(stats):
-            """Track progress during operation."""
-            stats_list.append(
-                {
-                    "rows": stats.rows_processed,
-                    "ranges": stats.ranges_completed,
-                    "total_ranges": stats.total_ranges,
-                    "progress": stats.progress_percentage,
-                    "rate": stats.rows_per_second,
-                }
-            )
-
-        # Perform count with different split counts
-        table = Table(title="Bulk Count Performance Comparison")
-        table.add_column("Split Count", style="cyan")
-        table.add_column("Total Rows", style="green")
-        table.add_column("Duration (s)", style="yellow")
-        table.add_column("Rows/Second", style="magenta")
-        table.add_column("Ranges Processed", style="blue")
-
-        for split_count in [1, 4, 8, 16, 32]:
-            console.print(f"\n[cyan]Counting with {split_count} splits...[/cyan]")
-
-            start_time = time.time()
-
-            try:
-                with Progress(
-                    SpinnerColumn(),
-                    *Progress.get_default_columns(),
-                    TimeElapsedColumn(),
-                    console=console,
-                ) as progress:
-                    current_task = progress.add_task(
-                        f"[green]Counting with {split_count} splits...", total=100
-                    )
-
-                    # Track progress
-                    last_progress = 0
-
-                    def update_progress(stats, task=current_task):
-                        nonlocal last_progress
-                        progress.update(task, completed=int(stats.progress_percentage))
-                        last_progress = stats.progress_percentage
-                        progress_callback(stats)
-
-                    count, final_stats = await operator.count_by_token_ranges_with_stats(
-                        keyspace="bulk_demo",
-                        table="large_table",
-                        split_count=split_count,
-                        progress_callback=update_progress,
-                    )
-
-                duration = time.time() - start_time
-
-                table.add_row(
-                    str(split_count),
-                    f"{count:,}",
-                    f"{duration:.2f}",
-                    f"{final_stats.rows_per_second:,.0f}",
-                    str(final_stats.ranges_completed),
-                )
-
-            except Exception as e:
-                console.print(f"[red]Error: {e}[/red]")
-                continue
-
-        # Display results
-        console.print("\n")
-        console.print(table)
-
-        # Show token range distribution
-        console.print("\n[bold]Token Range Analysis:[/bold]")
-
-        from bulk_operations.token_utils import discover_token_ranges
-
-        ranges = await discover_token_ranges(session, "bulk_demo")
-
-        range_table = Table(title="Natural Token Ranges")
-        range_table.add_column("Range #", style="cyan")
-        range_table.add_column("Start Token", style="green")
-        range_table.add_column("End Token", style="yellow")
-        range_table.add_column("Size", style="magenta")
-        range_table.add_column("Replicas", style="blue")
-
-        for i, r in enumerate(ranges[:5]):  # Show first 5
-            range_table.add_row(
-                str(i + 1), str(r.start), str(r.end), f"{r.size:,}", ", ".join(r.replicas)
-            )
-
-        if len(ranges) > 5:
-            range_table.add_row("...", "...", "...", "...", "...")
-
-        console.print(range_table)
-        console.print(f"\nTotal natural ranges: {len(ranges)}")
-
-
-if __name__ == "__main__":
-    try:
-        asyncio.run(count_table_example())
-    except KeyboardInterrupt:
-        console.print("\n[yellow]Operation cancelled by user[/yellow]")
-    except Exception as e:
-        console.print(f"\n[red]Error: {e}[/red]")
-        logger.exception("Unexpected error")
diff --git a/libs/async-cassandra-bulk/examples/example_csv_export.py b/libs/async-cassandra-bulk/examples/example_csv_export.py
deleted file mode 100755
index 1d3ceda..0000000
--- a/libs/async-cassandra-bulk/examples/example_csv_export.py
+++ /dev/null
@@ -1,230 +0,0 @@
-#!/usr/bin/env python3
-"""
-Example: Export Cassandra table to CSV format.
-
-This demonstrates:
-- Basic CSV export
-- Compressed CSV export
-- Custom delimiters and NULL handling
-- Progress tracking
-- Resume capability
-"""
-
-import asyncio
-import logging
-from pathlib import Path
-
-from rich.console import Console
-from rich.logging import RichHandler
-from rich.progress import Progress, SpinnerColumn, TextColumn
-from rich.table import Table
-
-from async_cassandra import AsyncCluster
-from bulk_operations.bulk_operator import TokenAwareBulkOperator
-
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(message)s",
-    handlers=[RichHandler(console=Console(stderr=True))],
-)
-logger = logging.getLogger(__name__)
-
-
-async def export_examples():
-    """Run various CSV export examples."""
-    console = Console()
-
-    # Connect to Cassandra
-    console.print("\n[bold blue]Connecting to Cassandra...[/bold blue]")
-    cluster = AsyncCluster(["localhost"])
-    session = await cluster.connect()
-
-    try:
-        # Ensure test data exists
-        await setup_test_data(session)
-
-        # Create bulk operator
-        operator = TokenAwareBulkOperator(session)
-
-        # Example 1: Basic CSV export
-        console.print("\n[bold green]Example 1: Basic CSV Export[/bold green]")
-        output_path = Path("exports/products.csv")
-        output_path.parent.mkdir(exist_ok=True)
-
-        with Progress(
-            SpinnerColumn(),
-            TextColumn("[progress.description]{task.description}"),
-            console=console,
-        ) as progress:
-            task = progress.add_task("Exporting to CSV...", total=None)
-
-            def progress_callback(export_progress):
-                progress.update(
-                    task,
-                    description=f"Exported {export_progress.rows_exported:,} rows "
-                    f"({export_progress.progress_percentage:.1f}%)",
-                )
-
-            result = await operator.export_to_csv(
-                keyspace="bulk_demo",
-                table="products",
-                output_path=output_path,
-                progress_callback=progress_callback,
-            )
-
-        console.print(f"✓ Exported {result.rows_exported:,} rows to {output_path}")
-        console.print(f"  File size: {result.bytes_written:,} bytes")
-
-        # Example 2: Compressed CSV with custom delimiter
-        console.print("\n[bold green]Example 2: Compressed Tab-Delimited Export[/bold green]")
-        output_path = Path("exports/products_tab.csv")
-
-        with Progress(
-            SpinnerColumn(),
-            TextColumn("[progress.description]{task.description}"),
-            console=console,
-        ) as progress:
-            task = progress.add_task("Exporting compressed CSV...", total=None)
-
-            def progress_callback(export_progress):
-                progress.update(
-                    task,
-                    description=f"Exported {export_progress.rows_exported:,} rows",
-                )
-
-            result = await operator.export_to_csv(
-                keyspace="bulk_demo",
-                table="products",
-                output_path=output_path,
-                delimiter="\t",
-                compression="gzip",
-                progress_callback=progress_callback,
-            )
-
-        console.print(f"✓ Exported to {output_path}.gzip")
-        console.print(f"  Compressed size: {result.bytes_written:,} bytes")
-
-        # Example 3: Export with specific columns and NULL handling
-        console.print("\n[bold green]Example 3: Selective Column Export[/bold green]")
-        output_path = Path("exports/products_summary.csv")
-
-        result = await operator.export_to_csv(
-            keyspace="bulk_demo",
-            table="products",
-            output_path=output_path,
-            columns=["id", "name", "price", "category"],
-            null_string="NULL",
-        )
-
-        console.print(f"✓ Exported {result.rows_exported:,} rows (selected columns)")
-
-        # Show export summary
-        console.print("\n[bold cyan]Export Summary:[/bold cyan]")
-        summary_table = Table(show_header=True, header_style="bold magenta")
-        summary_table.add_column("Export", style="cyan")
-        summary_table.add_column("Format", style="green")
-        summary_table.add_column("Rows", justify="right")
-        summary_table.add_column("Size", justify="right")
-        summary_table.add_column("Compression")
-
-        summary_table.add_row(
-            "products.csv",
-            "CSV",
-            "10,000",
-            "~500 KB",
-            "None",
-        )
-        summary_table.add_row(
-            "products_tab.csv.gzip",
-            "TSV",
-            "10,000",
-            "~150 KB",
-            "gzip",
-        )
-        summary_table.add_row(
-            "products_summary.csv",
-            "CSV",
-            "10,000",
-            "~300 KB",
-            "None",
-        )
-
-        console.print(summary_table)
-
-        # Example 4: Demonstrate resume capability
-        console.print("\n[bold green]Example 4: Resume Capability[/bold green]")
-        console.print("Progress files saved at:")
-        for csv_file in Path("exports").glob("*.csv"):
-            progress_file = csv_file.with_suffix(".csv.progress")
-            if progress_file.exists():
-                console.print(f"  • {progress_file}")
-
-    finally:
-        await session.close()
-        await cluster.shutdown()
-
-
-async def setup_test_data(session):
-    """Create test keyspace and data if not exists."""
-    # Create keyspace
-    await session.execute(
-        """
-        CREATE KEYSPACE IF NOT EXISTS bulk_demo
-        WITH replication = {
-            'class': 'SimpleStrategy',
-            'replication_factor': 1
-        }
-    """
-    )
-
-    # Create table
-    await session.execute(
-        """
-        CREATE TABLE IF NOT EXISTS bulk_demo.products (
-            id INT PRIMARY KEY,
-            name TEXT,
-            description TEXT,
-            price DECIMAL,
-            category TEXT,
-            in_stock BOOLEAN,
-            tags SET<TEXT>,
-            attributes MAP<TEXT, TEXT>,
-            created_at TIMESTAMP
-        )
-    """
-    )
-
-    # Check if data exists
-    result = await session.execute("SELECT COUNT(*) FROM bulk_demo.products")
-    count = result.one().count
-
-    if count < 10000:
-        logger.info("Inserting test data...")
-        insert_stmt = await session.prepare(
-            """
-            INSERT INTO bulk_demo.products
-            (id, name, description, price, category, in_stock, tags, attributes, created_at)
-            VALUES (?, ?, ?, ?, ?, ?, ?, ?, toTimestamp(now()))
-        """
-        )
-
-        # Insert in batches
-        for i in range(10000):
-            await session.execute(
-                insert_stmt,
-                (
-                    i,
-                    f"Product {i}",
-                    f"Description for product {i}" if i % 3 != 0 else None,
-                    float(10 + (i % 1000) * 0.1),
-                    ["Electronics", "Books", "Clothing", "Food"][i % 4],
-                    i % 5 != 0,  # 80% in stock
-                    {"tag1", f"tag{i % 10}"} if i % 2 == 0 else None,
-                    {"color": ["red", "blue", "green"][i % 3], "size": "M"} if i % 4 == 0 else {},
-                ),
-            )
-
-
-if __name__ == "__main__":
-    asyncio.run(export_examples())
diff --git a/libs/async-cassandra-bulk/examples/example_export_formats.py b/libs/async-cassandra-bulk/examples/example_export_formats.py
deleted file mode 100755
index f6ca15f..0000000
--- a/libs/async-cassandra-bulk/examples/example_export_formats.py
+++ /dev/null
@@ -1,283 +0,0 @@
-#!/usr/bin/env python3
-"""
-Example: Export Cassandra data to multiple formats.
-
-This demonstrates exporting to:
-- CSV (with compression)
-- JSON (line-delimited and array)
-- Parquet (foundation for Iceberg)
-
-Shows why Parquet is critical for the Iceberg integration.
-"""
-
-import asyncio
-import logging
-from pathlib import Path
-
-from rich.console import Console
-from rich.logging import RichHandler
-from rich.panel import Panel
-from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn, TimeRemainingColumn
-from rich.table import Table
-
-from async_cassandra import AsyncCluster
-from bulk_operations.bulk_operator import TokenAwareBulkOperator
-
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(message)s",
-    handlers=[RichHandler(console=Console(stderr=True))],
-)
-logger = logging.getLogger(__name__)
-
-
-async def export_format_examples():
-    """Demonstrate all export formats."""
-    console = Console()
-
-    # Header
-    console.print(
-        Panel.fit(
-            "[bold cyan]Cassandra Bulk Export Examples[/bold cyan]\n"
-            "Exporting to CSV, JSON, and Parquet formats",
-            border_style="cyan",
-        )
-    )
-
-    # Connect to Cassandra
-    console.print("\n[bold blue]Connecting to Cassandra...[/bold blue]")
-    cluster = AsyncCluster(["localhost"])
-    session = await cluster.connect()
-
-    try:
-        # Setup test data
-        await setup_test_data(session)
-
-        # Create bulk operator
-        operator = TokenAwareBulkOperator(session)
-
-        # Create exports directory
-        exports_dir = Path("exports")
-        exports_dir.mkdir(exist_ok=True)
-
-        # Export to different formats
-        results = {}
-
-        # 1. CSV Export
-        console.print("\n[bold green]1. CSV Export (Universal Format)[/bold green]")
-        console.print("   • Human readable")
-        console.print("   • Compatible with Excel, databases, etc.")
-        console.print("   • Good for data exchange")
-
-        with Progress(
-            SpinnerColumn(),
-            TextColumn("[progress.description]{task.description}"),
-            BarColumn(),
-            TimeRemainingColumn(),
-            console=console,
-        ) as progress:
-            task = progress.add_task("Exporting to CSV...", total=100)
-
-            def csv_progress(export_progress):
-                progress.update(
-                    task,
-                    completed=export_progress.progress_percentage,
-                    description=f"CSV: {export_progress.rows_exported:,} rows",
-                )
-
-            results["csv"] = await operator.export_to_csv(
-                keyspace="export_demo",
-                table="events",
-                output_path=exports_dir / "events.csv",
-                compression="gzip",
-                progress_callback=csv_progress,
-            )
-
-        # 2. JSON Export (Line-delimited)
-        console.print("\n[bold green]2. JSON Export (Streaming Format)[/bold green]")
-        console.print("   • Preserves data types")
-        console.print("   • Works with streaming tools")
-        console.print("   • Good for data pipelines")
-
-        with Progress(
-            SpinnerColumn(),
-            TextColumn("[progress.description]{task.description}"),
-            BarColumn(),
-            TimeRemainingColumn(),
-            console=console,
-        ) as progress:
-            task = progress.add_task("Exporting to JSONL...", total=100)
-
-            def json_progress(export_progress):
-                progress.update(
-                    task,
-                    completed=export_progress.progress_percentage,
-                    description=f"JSON: {export_progress.rows_exported:,} rows",
-                )
-
-            results["json"] = await operator.export_to_json(
-                keyspace="export_demo",
-                table="events",
-                output_path=exports_dir / "events.jsonl",
-                format_mode="jsonl",
-                compression="gzip",
-                progress_callback=json_progress,
-            )
-
-        # 3. Parquet Export (Foundation for Iceberg)
-        console.print("\n[bold yellow]3. Parquet Export (CRITICAL for Iceberg)[/bold yellow]")
-        console.print("   • Columnar format for analytics")
-        console.print("   • Excellent compression")
-        console.print("   • Schema included in file")
-        console.print("   • [bold red]This is what Iceberg uses![/bold red]")
-
-        with Progress(
-            SpinnerColumn(),
-            TextColumn("[progress.description]{task.description}"),
-            BarColumn(),
-            TimeRemainingColumn(),
-            console=console,
-        ) as progress:
-            task = progress.add_task("Exporting to Parquet...", total=100)
-
-            def parquet_progress(export_progress):
-                progress.update(
-                    task,
-                    completed=export_progress.progress_percentage,
-                    description=f"Parquet: {export_progress.rows_exported:,} rows",
-                )
-
-            results["parquet"] = await operator.export_to_parquet(
-                keyspace="export_demo",
-                table="events",
-                output_path=exports_dir / "events.parquet",
-                compression="snappy",
-                row_group_size=10000,
-                progress_callback=parquet_progress,
-            )
-
-        # Show results comparison
-        console.print("\n[bold cyan]Export Results Comparison:[/bold cyan]")
-        comparison = Table(show_header=True, header_style="bold magenta")
-        comparison.add_column("Format", style="cyan")
-        comparison.add_column("File", style="green")
-        comparison.add_column("Size", justify="right")
-        comparison.add_column("Rows", justify="right")
-        comparison.add_column("Time", justify="right")
-
-        for format_name, result in results.items():
-            file_path = Path(result.output_path)
-            if format_name != "parquet" and result.metadata.get("compression"):
-                file_path = file_path.with_suffix(
-                    file_path.suffix + f".{result.metadata['compression']}"
-                )
-
-            size_mb = result.bytes_written / (1024 * 1024)
-            duration = (result.completed_at - result.started_at).total_seconds()
-
-            comparison.add_row(
-                format_name.upper(),
-                file_path.name,
-                f"{size_mb:.1f} MB",
-                f"{result.rows_exported:,}",
-                f"{duration:.1f}s",
-            )
-
-        console.print(comparison)
-
-        # Explain Parquet importance
-        console.print(
-            Panel(
-                "[bold yellow]Why Parquet Matters for Iceberg:[/bold yellow]\n\n"
-                "• Iceberg tables store data in Parquet files\n"
-                "• Columnar format enables fast analytics queries\n"
-                "• Built-in schema makes evolution easier\n"
-                "• Compression reduces storage costs\n"
-                "• Row groups enable efficient filtering\n\n"
-                "[bold cyan]Next Phase:[/bold cyan] These Parquet files will become "
-                "Iceberg table data files!",
-                title="[bold red]The Path to Iceberg[/bold red]",
-                border_style="yellow",
-            )
-        )
-
-    finally:
-        await session.close()
-        await cluster.shutdown()
-
-
-async def setup_test_data(session):
-    """Create test keyspace and data."""
-    # Create keyspace
-    await session.execute(
-        """
-        CREATE KEYSPACE IF NOT EXISTS export_demo
-        WITH replication = {
-            'class': 'SimpleStrategy',
-            'replication_factor': 1
-        }
-    """
-    )
-
-    # Create events table with various data types
-    await session.execute(
-        """
-        CREATE TABLE IF NOT EXISTS export_demo.events (
-            event_id UUID PRIMARY KEY,
-            event_type TEXT,
-            user_id INT,
-            timestamp TIMESTAMP,
-            properties MAP<TEXT, TEXT>,
-            tags SET<TEXT>,
-            metrics LIST<DOUBLE>,
-            is_processed BOOLEAN,
-            processing_time DECIMAL
-        )
-    """
-    )
-
-    # Check if data exists
-    result = await session.execute("SELECT COUNT(*) FROM export_demo.events")
-    count = result.one().count
-
-    if count < 50000:
-        logger.info("Inserting test events...")
-        insert_stmt = await session.prepare(
-            """
-            INSERT INTO export_demo.events
-            (event_id, event_type, user_id, timestamp, properties,
-             tags, metrics, is_processed, processing_time)
-            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
-        """
-        )
-
-        # Insert test events
-        import uuid
-        from datetime import datetime, timedelta
-        from decimal import Decimal
-
-        base_time = datetime.now() - timedelta(days=30)
-        event_types = ["login", "purchase", "view", "click", "logout"]
-
-        for i in range(50000):
-            event_time = base_time + timedelta(seconds=i * 60)
-
-            await session.execute(
-                insert_stmt,
-                (
-                    uuid.uuid4(),
-                    event_types[i % len(event_types)],
-                    i % 1000,  # user_id
-                    event_time,
-                    {"source": "web", "version": "2.0"} if i % 3 == 0 else {},
-                    {f"tag{i % 5}", f"cat{i % 3}"} if i % 2 == 0 else None,
-                    [float(i), float(i * 0.1), float(i * 0.01)] if i % 4 == 0 else None,
-                    i % 10 != 0,  # 90% processed
-                    Decimal(str(0.001 * (i % 1000))),
-                ),
-            )
-
-
-if __name__ == "__main__":
-    asyncio.run(export_format_examples())
diff --git a/libs/async-cassandra-bulk/examples/example_iceberg_export.py b/libs/async-cassandra-bulk/examples/example_iceberg_export.py
deleted file mode 100644
index 1a08f1b..0000000
--- a/libs/async-cassandra-bulk/examples/example_iceberg_export.py
+++ /dev/null
@@ -1,302 +0,0 @@
-#!/usr/bin/env python3
-"""Example: Export Cassandra data to Apache Iceberg tables.
-
-This demonstrates the power of Apache Iceberg:
-- ACID transactions on data lakes
-- Schema evolution
-- Time travel queries
-- Hidden partitioning
-- Integration with modern analytics tools
-"""
-
-import asyncio
-import logging
-from datetime import datetime, timedelta
-from pathlib import Path
-
-from pyiceberg.partitioning import PartitionField, PartitionSpec
-from pyiceberg.transforms import DayTransform
-from rich.console import Console
-from rich.logging import RichHandler
-from rich.panel import Panel
-from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn, TimeRemainingColumn
-from rich.table import Table as RichTable
-
-from async_cassandra import AsyncCluster
-from bulk_operations.bulk_operator import TokenAwareBulkOperator
-from bulk_operations.iceberg import IcebergExporter
-
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(message)s",
-    handlers=[RichHandler(console=Console(stderr=True))],
-)
-logger = logging.getLogger(__name__)
-
-
-async def iceberg_export_demo():
-    """Demonstrate Cassandra to Iceberg export with advanced features."""
-    console = Console()
-
-    # Header
-    console.print(
-        Panel.fit(
-            "[bold cyan]Apache Iceberg Export Demo[/bold cyan]\n"
-            "Exporting Cassandra data to modern data lakehouse format",
-            border_style="cyan",
-        )
-    )
-
-    # Connect to Cassandra
-    console.print("\n[bold blue]1. Connecting to Cassandra...[/bold blue]")
-    cluster = AsyncCluster(["localhost"])
-    session = await cluster.connect()
-
-    try:
-        # Setup test data
-        await setup_demo_data(session, console)
-
-        # Create bulk operator
-        operator = TokenAwareBulkOperator(session)
-
-        # Configure Iceberg export
-        warehouse_path = Path("iceberg_warehouse")
-        console.print(
-            f"\n[bold blue]2. Setting up Iceberg warehouse at:[/bold blue] {warehouse_path}"
-        )
-
-        # Create Iceberg exporter
-        exporter = IcebergExporter(
-            operator=operator,
-            warehouse_path=warehouse_path,
-            compression="snappy",
-            row_group_size=10000,
-        )
-
-        # Example 1: Basic export
-        console.print("\n[bold green]Example 1: Basic Iceberg Export[/bold green]")
-        console.print("   • Creates Iceberg table from Cassandra schema")
-        console.print("   • Writes data in Parquet format")
-        console.print("   • Enables ACID transactions")
-
-        with Progress(
-            SpinnerColumn(),
-            TextColumn("[progress.description]{task.description}"),
-            BarColumn(),
-            TimeRemainingColumn(),
-            console=console,
-        ) as progress:
-            task = progress.add_task("Exporting to Iceberg...", total=100)
-
-            def iceberg_progress(export_progress):
-                progress.update(
-                    task,
-                    completed=export_progress.progress_percentage,
-                    description=f"Iceberg: {export_progress.rows_exported:,} rows",
-                )
-
-            result = await exporter.export(
-                keyspace="iceberg_demo",
-                table="user_events",
-                namespace="cassandra_export",
-                table_name="user_events",
-                progress_callback=iceberg_progress,
-            )
-
-        console.print(f"✓ Exported {result.rows_exported:,} rows to Iceberg")
-        console.print("  Table: iceberg://cassandra_export.user_events")
-
-        # Example 2: Partitioned export
-        console.print("\n[bold green]Example 2: Partitioned Iceberg Table[/bold green]")
-        console.print("   • Partitions by day for efficient queries")
-        console.print("   • Hidden partitioning (no query changes needed)")
-        console.print("   • Automatic partition pruning")
-
-        # Create partition spec (partition by day)
-        partition_spec = PartitionSpec(
-            PartitionField(
-                source_id=4,  # event_time field ID
-                field_id=1000,
-                transform=DayTransform(),
-                name="event_day",
-            )
-        )
-
-        with Progress(
-            SpinnerColumn(),
-            TextColumn("[progress.description]{task.description}"),
-            BarColumn(),
-            TimeRemainingColumn(),
-            console=console,
-        ) as progress:
-            task = progress.add_task("Exporting with partitions...", total=100)
-
-            def partition_progress(export_progress):
-                progress.update(
-                    task,
-                    completed=export_progress.progress_percentage,
-                    description=f"Partitioned: {export_progress.rows_exported:,} rows",
-                )
-
-            result = await exporter.export(
-                keyspace="iceberg_demo",
-                table="user_events",
-                namespace="cassandra_export",
-                table_name="user_events_partitioned",
-                partition_spec=partition_spec,
-                progress_callback=partition_progress,
-            )
-
-        console.print("✓ Created partitioned Iceberg table")
-        console.print("  Partitioned by: event_day (daily partitions)")
-
-        # Show Iceberg features
-        console.print("\n[bold cyan]Iceberg Features Enabled:[/bold cyan]")
-        features = RichTable(show_header=True, header_style="bold magenta")
-        features.add_column("Feature", style="cyan")
-        features.add_column("Description", style="green")
-        features.add_column("Example Query")
-
-        features.add_row(
-            "Time Travel",
-            "Query data at any point in time",
-            "SELECT * FROM table AS OF '2025-01-01'",
-        )
-        features.add_row(
-            "Schema Evolution",
-            "Add/drop/rename columns safely",
-            "ALTER TABLE table ADD COLUMN new_field STRING",
-        )
-        features.add_row(
-            "Hidden Partitioning",
-            "Partition pruning without query changes",
-            "WHERE event_time > '2025-01-01' -- uses partitions",
-        )
-        features.add_row(
-            "ACID Transactions",
-            "Atomic commits and rollbacks",
-            "Multiple concurrent writers supported",
-        )
-        features.add_row(
-            "Incremental Processing",
-            "Process only new data",
-            "Read incrementally from snapshot N to M",
-        )
-
-        console.print(features)
-
-        # Explain the power of Iceberg
-        console.print(
-            Panel(
-                "[bold yellow]Why Apache Iceberg Matters:[/bold yellow]\n\n"
-                "• [cyan]Netflix Scale:[/cyan] Created by Netflix to handle petabytes\n"
-                "• [cyan]Open Format:[/cyan] Works with Spark, Trino, Flink, and more\n"
-                "• [cyan]Cloud Native:[/cyan] Designed for S3, GCS, Azure storage\n"
-                "• [cyan]Performance:[/cyan] Faster than traditional data lakes\n"
-                "• [cyan]Reliability:[/cyan] ACID guarantees prevent data corruption\n\n"
-                "[bold green]Your Cassandra data is now ready for:[/bold green]\n"
-                "• Analytics with Spark or Trino\n"
-                "• Machine learning pipelines\n"
-                "• Data warehousing with Snowflake/BigQuery\n"
-                "• Real-time processing with Flink",
-                title="[bold red]The Modern Data Lakehouse[/bold red]",
-                border_style="yellow",
-            )
-        )
-
-        # Show next steps
-        console.print("\n[bold blue]Next Steps:[/bold blue]")
-        console.print(
-            "1. Query with Spark: spark.read.format('iceberg').load('cassandra_export.user_events')"
-        )
-        console.print(
-            "2. Time travel: SELECT * FROM user_events FOR SYSTEM_TIME AS OF '2025-01-01'"
-        )
-        console.print("3. Schema evolution: ALTER TABLE user_events ADD COLUMNS (score DOUBLE)")
-        console.print(f"4. Explore warehouse: {warehouse_path}/")
-
-    finally:
-        await session.close()
-        await cluster.shutdown()
-
-
-async def setup_demo_data(session, console):
-    """Create demo keyspace and data."""
-    console.print("\n[bold blue]Setting up demo data...[/bold blue]")
-
-    # Create keyspace
-    await session.execute(
-        """
-        CREATE KEYSPACE IF NOT EXISTS iceberg_demo
-        WITH replication = {
-            'class': 'SimpleStrategy',
-            'replication_factor': 1
-        }
-    """
-    )
-
-    # Create table with various data types
-    await session.execute(
-        """
-        CREATE TABLE IF NOT EXISTS iceberg_demo.user_events (
-            user_id UUID,
-            event_id UUID,
-            event_type TEXT,
-            event_time TIMESTAMP,
-            properties MAP<TEXT, TEXT>,
-            metrics MAP<TEXT, DOUBLE>,
-            tags SET<TEXT>,
-            is_processed BOOLEAN,
-            score DECIMAL,
-            PRIMARY KEY (user_id, event_time, event_id)
-        ) WITH CLUSTERING ORDER BY (event_time DESC, event_id ASC)
-    """
-    )
-
-    # Check if data exists
-    result = await session.execute("SELECT COUNT(*) FROM iceberg_demo.user_events")
-    count = result.one().count
-
-    if count < 10000:
-        console.print("   Inserting sample events...")
-        insert_stmt = await session.prepare(
-            """
-            INSERT INTO iceberg_demo.user_events
-            (user_id, event_id, event_type, event_time, properties,
-             metrics, tags, is_processed, score)
-            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
-        """
-        )
-
-        # Insert events over the last 30 days
-        import uuid
-        from decimal import Decimal
-
-        base_time = datetime.now() - timedelta(days=30)
-        event_types = ["login", "purchase", "view", "click", "share", "logout"]
-
-        for i in range(10000):
-            user_id = uuid.UUID(f"00000000-0000-0000-0000-{i % 100:012d}")
-            event_time = base_time + timedelta(minutes=i * 5)
-
-            await session.execute(
-                insert_stmt,
-                (
-                    user_id,
-                    uuid.uuid4(),
-                    event_types[i % len(event_types)],
-                    event_time,
-                    {"device": "mobile", "version": "2.0"} if i % 3 == 0 else {},
-                    {"duration": float(i % 300), "count": float(i % 10)},
-                    {f"tag{i % 5}", f"category{i % 3}"},
-                    i % 10 != 0,  # 90% processed
-                    Decimal(str(0.1 * (i % 100))),
-                ),
-            )
-
-        console.print("   ✓ Created 10,000 events across 100 users")
-
-
-if __name__ == "__main__":
-    asyncio.run(iceberg_export_demo())
diff --git a/libs/async-cassandra-bulk/examples/exports/.gitignore b/libs/async-cassandra-bulk/examples/exports/.gitignore
deleted file mode 100644
index c4f1b4c..0000000
--- a/libs/async-cassandra-bulk/examples/exports/.gitignore
+++ /dev/null
@@ -1,4 +0,0 @@
-# Ignore all exported files
-*
-# But keep this .gitignore file
-!.gitignore
diff --git a/libs/async-cassandra-bulk/examples/fix_export_consistency.py b/libs/async-cassandra-bulk/examples/fix_export_consistency.py
deleted file mode 100644
index dbd3293..0000000
--- a/libs/async-cassandra-bulk/examples/fix_export_consistency.py
+++ /dev/null
@@ -1,77 +0,0 @@
-#!/usr/bin/env python3
-"""Fix the export_by_token_ranges method to handle consistency level properly."""
-
-# Here's the corrected version of the export_by_token_ranges method
-
-corrected_code = """
-        # Stream results from each range
-        for split in splits:
-            # Check if this is a wraparound range
-            if split.end < split.start:
-                # Wraparound range needs to be split into two queries
-                # First part: from start to MAX_TOKEN
-                if consistency_level is not None:
-                    async with await self.session.execute_stream(
-                        prepared_stmts["select_wraparound_gt"],
-                        (split.start,),
-                        consistency_level=consistency_level
-                    ) as result:
-                        async for row in result:
-                            stats.rows_processed += 1
-                            yield row
-                else:
-                    async with await self.session.execute_stream(
-                        prepared_stmts["select_wraparound_gt"],
-                        (split.start,)
-                    ) as result:
-                        async for row in result:
-                            stats.rows_processed += 1
-                            yield row
-
-                # Second part: from MIN_TOKEN to end
-                if consistency_level is not None:
-                    async with await self.session.execute_stream(
-                        prepared_stmts["select_wraparound_lte"],
-                        (split.end,),
-                        consistency_level=consistency_level
-                    ) as result:
-                        async for row in result:
-                            stats.rows_processed += 1
-                            yield row
-                else:
-                    async with await self.session.execute_stream(
-                        prepared_stmts["select_wraparound_lte"],
-                        (split.end,)
-                    ) as result:
-                        async for row in result:
-                            stats.rows_processed += 1
-                            yield row
-            else:
-                # Normal range - use prepared statement
-                if consistency_level is not None:
-                    async with await self.session.execute_stream(
-                        prepared_stmts["select_range"],
-                        (split.start, split.end),
-                        consistency_level=consistency_level
-                    ) as result:
-                        async for row in result:
-                            stats.rows_processed += 1
-                            yield row
-                else:
-                    async with await self.session.execute_stream(
-                        prepared_stmts["select_range"],
-                        (split.start, split.end)
-                    ) as result:
-                        async for row in result:
-                            stats.rows_processed += 1
-                            yield row
-
-            stats.ranges_completed += 1
-
-            if progress_callback:
-                progress_callback(stats)
-
-        stats.end_time = time.time()
-"""
-
-print(corrected_code)
diff --git a/libs/async-cassandra-bulk/examples/pyproject.toml b/libs/async-cassandra-bulk/examples/pyproject.toml
deleted file mode 100644
index 39dc0a8..0000000
--- a/libs/async-cassandra-bulk/examples/pyproject.toml
+++ /dev/null
@@ -1,102 +0,0 @@
-[build-system]
-requires = ["setuptools>=61.0", "wheel"]
-build-backend = "setuptools.build_meta"
-
-[project]
-name = "async-cassandra-bulk-operations"
-version = "0.1.0"
-description = "Token-aware bulk operations example for async-cassandra"
-readme = "README.md"
-requires-python = ">=3.12"
-license = {text = "Apache-2.0"}
-authors = [
-    {name = "AxonOps", email = "info@axonops.com"},
-]
-dependencies = [
-    # For development, install async-cassandra from parent directory:
-    # pip install -e ../..
-    # For production, use: "async-cassandra>=0.2.0",
-    "pyiceberg[pyarrow]>=0.8.0",
-    "pyarrow>=18.0.0",
-    "pandas>=2.0.0",
-    "rich>=13.0.0",  # For nice progress bars
-    "click>=8.0.0",  # For CLI
-]
-
-[project.optional-dependencies]
-dev = [
-    "pytest>=8.0.0",
-    "pytest-asyncio>=0.24.0",
-    "pytest-cov>=5.0.0",
-    "black>=24.0.0",
-    "ruff>=0.8.0",
-    "mypy>=1.13.0",
-]
-
-[project.scripts]
-bulk-ops = "bulk_operations.cli:main"
-
-[tool.pytest.ini_options]
-minversion = "8.0"
-addopts = [
-    "-ra",
-    "--strict-markers",
-    "--asyncio-mode=auto",
-    "--cov=bulk_operations",
-    "--cov-report=html",
-    "--cov-report=term-missing",
-]
-testpaths = ["tests"]
-python_files = ["test_*.py"]
-python_classes = ["Test*"]
-python_functions = ["test_*"]
-markers = [
-    "unit: Unit tests that don't require Cassandra",
-    "integration: Integration tests that require a running Cassandra cluster",
-    "slow: Tests that take a long time to run",
-]
-
-[tool.black]
-line-length = 100
-target-version = ["py312"]
-include = '\.pyi?$'
-
-[tool.isort]
-profile = "black"
-line_length = 100
-multi_line_output = 3
-include_trailing_comma = true
-force_grid_wrap = 0
-use_parentheses = true
-ensure_newline_before_comments = true
-known_first_party = ["async_cassandra"]
-
-[tool.ruff]
-line-length = 100
-target-version = "py312"
-
-[tool.ruff.lint]
-select = [
-    "E",    # pycodestyle errors
-    "W",    # pycodestyle warnings
-    "F",    # pyflakes
-    # "I",    # isort - disabled since we use isort separately
-    "B",    # flake8-bugbear
-    "C90",  # mccabe complexity
-    "UP",   # pyupgrade
-    "SIM",  # flake8-simplify
-]
-ignore = ["E501"]  # Line too long - handled by black
-
-[tool.mypy]
-python_version = "3.12"
-warn_return_any = true
-warn_unused_configs = true
-disallow_untyped_defs = true
-disallow_incomplete_defs = true
-check_untyped_defs = true
-no_implicit_optional = true
-warn_redundant_casts = true
-warn_unused_ignores = true
-warn_no_return = true
-strict_equality = true
diff --git a/libs/async-cassandra-bulk/examples/run_integration_tests.sh b/libs/async-cassandra-bulk/examples/run_integration_tests.sh
deleted file mode 100755
index a25133f..0000000
--- a/libs/async-cassandra-bulk/examples/run_integration_tests.sh
+++ /dev/null
@@ -1,91 +0,0 @@
-#!/bin/bash
-# Integration test runner for bulk operations
-
-echo "🚀 Bulk Operations Integration Test Runner"
-echo "========================================="
-
-# Check if docker or podman is available
-if command -v podman &> /dev/null; then
-    CONTAINER_TOOL="podman"
-elif command -v docker &> /dev/null; then
-    CONTAINER_TOOL="docker"
-else
-    echo "❌ Error: Neither docker nor podman found. Please install one."
-    exit 1
-fi
-
-echo "Using container tool: $CONTAINER_TOOL"
-
-# Function to wait for cluster to be ready
-wait_for_cluster() {
-    echo "⏳ Waiting for Cassandra cluster to be ready..."
-    local max_attempts=60
-    local attempt=0
-
-    while [ $attempt -lt $max_attempts ]; do
-        if $CONTAINER_TOOL exec bulk-cassandra-1 nodetool status 2>/dev/null | grep -q "UN"; then
-            echo "✅ Cassandra cluster is ready!"
-            return 0
-        fi
-        attempt=$((attempt + 1))
-        echo -n "."
-        sleep 5
-    done
-
-    echo "❌ Timeout waiting for cluster to be ready"
-    return 1
-}
-
-# Function to show cluster status
-show_cluster_status() {
-    echo ""
-    echo "📊 Cluster Status:"
-    echo "=================="
-    $CONTAINER_TOOL exec bulk-cassandra-1 nodetool status || true
-    echo ""
-}
-
-# Main execution
-echo ""
-echo "1️⃣ Starting Cassandra cluster..."
-$CONTAINER_TOOL-compose up -d
-
-if wait_for_cluster; then
-    show_cluster_status
-
-    echo "2️⃣ Running integration tests..."
-    echo ""
-
-    # Run pytest with integration markers
-    pytest tests/test_integration.py -v -s -m integration
-    TEST_RESULT=$?
-
-    echo ""
-    echo "3️⃣ Cluster token information:"
-    echo "=============================="
-    echo "Sample output from nodetool describering:"
-    $CONTAINER_TOOL exec bulk-cassandra-1 nodetool describering bulk_test 2>/dev/null | head -20 || true
-
-    echo ""
-    echo "4️⃣ Test Summary:"
-    echo "================"
-    if [ $TEST_RESULT -eq 0 ]; then
-        echo "✅ All integration tests passed!"
-    else
-        echo "❌ Some tests failed. Please check the output above."
-    fi
-
-    echo ""
-    read -p "Press Enter to stop the cluster, or Ctrl+C to keep it running..."
-
-    echo "Stopping cluster..."
-    $CONTAINER_TOOL-compose down
-else
-    echo "❌ Failed to start cluster. Check container logs:"
-    $CONTAINER_TOOL-compose logs
-    $CONTAINER_TOOL-compose down
-    exit 1
-fi
-
-echo ""
-echo "✨ Done!"
diff --git a/libs/async-cassandra-bulk/examples/scripts/init.cql b/libs/async-cassandra-bulk/examples/scripts/init.cql
deleted file mode 100644
index 70902c6..0000000
--- a/libs/async-cassandra-bulk/examples/scripts/init.cql
+++ /dev/null
@@ -1,72 +0,0 @@
--- Initialize keyspace and tables for bulk operations example
--- This script creates test data for demonstrating token-aware bulk operations
-
--- Create keyspace with NetworkTopologyStrategy for production-like setup
-CREATE KEYSPACE IF NOT EXISTS bulk_ops
-WITH replication = {
-    'class': 'NetworkTopologyStrategy',
-    'datacenter1': 3
-}
-AND durable_writes = true;
-
--- Use the keyspace
-USE bulk_ops;
-
--- Create a large table for bulk operations testing
-CREATE TABLE IF NOT EXISTS large_dataset (
-    id UUID,
-    partition_key INT,
-    clustering_key INT,
-    data TEXT,
-    value DOUBLE,
-    created_at TIMESTAMP,
-    metadata MAP<TEXT, TEXT>,
-    PRIMARY KEY (partition_key, clustering_key, id)
-) WITH CLUSTERING ORDER BY (clustering_key ASC, id ASC)
-  AND compression = {'class': 'LZ4Compressor'}
-  AND compaction = {'class': 'SizeTieredCompactionStrategy'};
-
--- Create an index for testing
-CREATE INDEX IF NOT EXISTS idx_created_at ON large_dataset (created_at);
-
--- Create a table for export/import testing
-CREATE TABLE IF NOT EXISTS orders (
-    order_id UUID,
-    customer_id UUID,
-    order_date DATE,
-    order_time TIMESTAMP,
-    total_amount DECIMAL,
-    status TEXT,
-    items LIST<FROZEN<MAP<TEXT, TEXT>>>,
-    shipping_address MAP<TEXT, TEXT>,
-    PRIMARY KEY ((customer_id), order_date, order_id)
-) WITH CLUSTERING ORDER BY (order_date DESC, order_id ASC)
-  AND compression = {'class': 'LZ4Compressor'};
-
--- Create a simple counter table
-CREATE TABLE IF NOT EXISTS page_views (
-    page_id UUID,
-    date DATE,
-    views COUNTER,
-    PRIMARY KEY ((page_id), date)
-) WITH CLUSTERING ORDER BY (date DESC);
-
--- Create a time series table
-CREATE TABLE IF NOT EXISTS sensor_data (
-    sensor_id UUID,
-    bucket TIMESTAMP,
-    reading_time TIMESTAMP,
-    temperature DOUBLE,
-    humidity DOUBLE,
-    pressure DOUBLE,
-    location FROZEN<MAP<TEXT, DOUBLE>>,
-    PRIMARY KEY ((sensor_id, bucket), reading_time)
-) WITH CLUSTERING ORDER BY (reading_time DESC)
-  AND compression = {'class': 'LZ4Compressor'}
-  AND default_time_to_live = 2592000; -- 30 days TTL
-
--- Grant permissions (if authentication is enabled)
--- GRANT ALL ON KEYSPACE bulk_ops TO cassandra;
-
--- Display confirmation
-SELECT keyspace_name, table_name FROM system_schema.tables WHERE keyspace_name = 'bulk_ops';
diff --git a/libs/async-cassandra-bulk/examples/test_simple_count.py b/libs/async-cassandra-bulk/examples/test_simple_count.py
deleted file mode 100644
index 549f1ea..0000000
--- a/libs/async-cassandra-bulk/examples/test_simple_count.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/usr/bin/env python3
-"""Simple test to debug count issue."""
-
-import asyncio
-
-from async_cassandra import AsyncCluster
-from bulk_operations.bulk_operator import TokenAwareBulkOperator
-
-
-async def test_count():
-    """Test count with error details."""
-    async with AsyncCluster(contact_points=["localhost"]) as cluster:
-        session = await cluster.connect()
-
-        operator = TokenAwareBulkOperator(session)
-
-        try:
-            count = await operator.count_by_token_ranges(
-                keyspace="bulk_test", table="test_data", split_count=4, parallelism=2
-            )
-            print(f"Count successful: {count}")
-        except Exception as e:
-            print(f"Error: {e}")
-            if hasattr(e, "errors"):
-                print(f"Detailed errors: {e.errors}")
-                for err in e.errors:
-                    print(f"  - {err}")
-
-
-if __name__ == "__main__":
-    asyncio.run(test_count())
diff --git a/libs/async-cassandra-bulk/examples/test_single_node.py b/libs/async-cassandra-bulk/examples/test_single_node.py
deleted file mode 100644
index aa762de..0000000
--- a/libs/async-cassandra-bulk/examples/test_single_node.py
+++ /dev/null
@@ -1,98 +0,0 @@
-#!/usr/bin/env python3
-"""Quick test to verify token range discovery with single node."""
-
-import asyncio
-
-from async_cassandra import AsyncCluster
-from bulk_operations.token_utils import (
-    MAX_TOKEN,
-    MIN_TOKEN,
-    TOTAL_TOKEN_RANGE,
-    discover_token_ranges,
-)
-
-
-async def test_single_node():
-    """Test token range discovery with single node."""
-    print("Connecting to single-node cluster...")
-
-    async with AsyncCluster(contact_points=["localhost"]) as cluster:
-        session = await cluster.connect()
-
-        # Create test keyspace
-        await session.execute(
-            """
-            CREATE KEYSPACE IF NOT EXISTS test_single
-            WITH replication = {
-                'class': 'SimpleStrategy',
-                'replication_factor': 1
-            }
-        """
-        )
-
-        print("Discovering token ranges...")
-        ranges = await discover_token_ranges(session, "test_single")
-
-        print(f"\nToken ranges discovered: {len(ranges)}")
-        print("Expected with 1 node × 256 vnodes: 256 ranges")
-
-        # Verify we have the expected number of ranges
-        assert len(ranges) == 256, f"Expected 256 ranges, got {len(ranges)}"
-
-        # Verify ranges cover the entire ring
-        sorted_ranges = sorted(ranges, key=lambda r: r.start)
-
-        # Debug first and last ranges
-        print(f"First range: {sorted_ranges[0].start} to {sorted_ranges[0].end}")
-        print(f"Last range: {sorted_ranges[-1].start} to {sorted_ranges[-1].end}")
-        print(f"MIN_TOKEN: {MIN_TOKEN}, MAX_TOKEN: {MAX_TOKEN}")
-
-        # The token ring is circular, so we need to handle wraparound
-        # The smallest token in the sorted list might not be MIN_TOKEN
-        # because of how Cassandra distributes vnodes
-
-        # Check for gaps or overlaps
-        gaps = []
-        overlaps = []
-        for i in range(len(sorted_ranges) - 1):
-            current = sorted_ranges[i]
-            next_range = sorted_ranges[i + 1]
-            if current.end < next_range.start:
-                gaps.append((current.end, next_range.start))
-            elif current.end > next_range.start:
-                overlaps.append((current.end, next_range.start))
-
-        print(f"\nGaps found: {len(gaps)}")
-        if gaps:
-            for gap in gaps[:3]:
-                print(f"  Gap: {gap[0]} to {gap[1]}")
-
-        print(f"Overlaps found: {len(overlaps)}")
-
-        # Check if ranges form a complete ring
-        # In a proper token ring, each range's end should equal the next range's start
-        # The last range should wrap around to the first
-        total_size = sum(r.size for r in ranges)
-        print(f"\nTotal token space covered: {total_size:,}")
-        print(f"Expected total space: {TOTAL_TOKEN_RANGE:,}")
-
-        # Show sample ranges
-        print("\nSample token ranges (first 5):")
-        for i, r in enumerate(sorted_ranges[:5]):
-            print(f"  Range {i+1}: {r.start} to {r.end} (size: {r.size:,})")
-
-        print("\n✅ All tests passed!")
-
-        # Session is closed automatically by the context manager
-        return True
-
-
-if __name__ == "__main__":
-    try:
-        asyncio.run(test_single_node())
-    except Exception as e:
-        print(f"❌ Error: {e}")
-        import traceback
-
-        traceback.print_exc()
-        exit(1)
diff --git a/libs/async-cassandra-bulk/examples/tests/__init__.py b/libs/async-cassandra-bulk/examples/tests/__init__.py
deleted file mode 100644
index ce61b96..0000000
--- a/libs/async-cassandra-bulk/examples/tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Test package for bulk operations."""
diff --git a/libs/async-cassandra-bulk/examples/tests/conftest.py b/libs/async-cassandra-bulk/examples/tests/conftest.py
deleted file mode 100644
index 4445379..0000000
--- a/libs/async-cassandra-bulk/examples/tests/conftest.py
+++ /dev/null
@@ -1,95 +0,0 @@
-"""
-Pytest configuration for bulk operations tests.
-
-Handles test markers and Docker/Podman support.
-"""
-
-import os
-import subprocess
-from pathlib import Path
-
-import pytest
-
-
-def get_container_runtime():
-    """Detect whether to use docker or podman."""
-    # Check environment variable first
-    runtime = os.environ.get("CONTAINER_RUNTIME", "").lower()
-    if runtime in ["docker", "podman"]:
-        return runtime
-
-    # Auto-detect
-    for cmd in ["docker", "podman"]:
-        try:
-            subprocess.run([cmd, "--version"], capture_output=True, check=True)
-            return cmd
-        except (subprocess.CalledProcessError, FileNotFoundError):
-            continue
-
-    raise RuntimeError("Neither docker nor podman found. Please install one.")
-
-
-# Set container runtime globally
-CONTAINER_RUNTIME = get_container_runtime()
-os.environ["CONTAINER_RUNTIME"] = CONTAINER_RUNTIME
-
-
-def pytest_configure(config):
-    """Configure pytest with custom markers."""
-    config.addinivalue_line("markers", "unit: Unit tests that don't require external services")
-    config.addinivalue_line("markers", "integration: Integration tests requiring Cassandra cluster")
-    config.addinivalue_line("markers", "slow: Tests that take a long time to run")
-
-
-def pytest_collection_modifyitems(config, items):
-    """Automatically skip integration tests if not explicitly requested."""
-    if config.getoption("markexpr"):
-        # User specified markers, respect their choice
-        return
-
-    # Check if Cassandra is available
-    cassandra_available = check_cassandra_available()
-
-    skip_integration = pytest.mark.skip(
-        reason="Integration tests require running Cassandra cluster. Use -m integration to run."
-    )
-
-    for item in items:
-        if "integration" in item.keywords and not cassandra_available:
-            item.add_marker(skip_integration)
-
-
-def check_cassandra_available():
-    """Check if Cassandra cluster is available."""
-    try:
-        # Try to connect to the first node
-        import socket
-
-        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-        sock.settimeout(1)
-        result = sock.connect_ex(("127.0.0.1", 9042))
-        sock.close()
-        return result == 0
-    except Exception:
-        return False
-
-
-@pytest.fixture(scope="session")
-def container_runtime():
-    """Get the container runtime being used."""
-    return CONTAINER_RUNTIME
-
-
-@pytest.fixture(scope="session")
-def docker_compose_file():
-    """Path to docker-compose file."""
-    return Path(__file__).parent.parent / "docker-compose.yml"
-
-
-@pytest.fixture(scope="session")
-def docker_compose_command(container_runtime):
-    """Get the appropriate docker-compose command."""
-    if container_runtime == "podman":
-        return ["podman-compose"]
-    else:
-        return ["docker-compose"]
diff --git a/libs/async-cassandra-bulk/examples/tests/integration/README.md b/libs/async-cassandra-bulk/examples/tests/integration/README.md
deleted file mode 100644
index 25138a4..0000000
--- a/libs/async-cassandra-bulk/examples/tests/integration/README.md
+++ /dev/null
@@ -1,100 +0,0 @@
-# Integration Tests for Bulk Operations
-
-This directory contains integration tests that validate bulk operations against a real Cassandra cluster.
-
-## Test Organization
-
-The integration tests are organized into logical modules:
-
-- **test_token_discovery.py** - Tests for token range discovery with vnodes
-  - Validates token range discovery matches cluster configuration
-  - Compares with nodetool describering output
-  - Ensures complete ring coverage without gaps
-
-- **test_bulk_count.py** - Tests for bulk count operations
-  - Validates full data coverage (no missing/duplicate rows)
-  - Tests wraparound range handling
-  - Performance testing with different parallelism levels
-
-- **test_bulk_export.py** - Tests for bulk export operations
-  - Validates streaming export completeness
-  - Tests memory efficiency for large exports
-  - Handles different CQL data types
-
-- **test_token_splitting.py** - Tests for token range splitting strategies
-  - Tests proportional splitting based on range sizes
-  - Handles small vnode ranges appropriately
-  - Validates replica-aware clustering
-
-## Running Integration Tests
-
-Integration tests require a running Cassandra cluster. They are skipped by default.
-
-### Run all integration tests:
-```bash
-pytest tests/integration --integration
-```
-
-### Run specific test module:
-```bash
-pytest tests/integration/test_bulk_count.py --integration -v
-```
-
-### Run specific test:
-```bash
-pytest tests/integration/test_bulk_count.py::TestBulkCount::test_full_table_coverage_with_token_ranges --integration -v
-```
-
-## Test Infrastructure
-
-### Automatic Cassandra Startup
-
-The tests will automatically start a single-node Cassandra container if one is not already running, using either:
-- `docker-compose-single.yml` (via docker-compose or podman-compose)
-
-### Manual Cassandra Setup
-
-You can also manually start Cassandra:
-
-```bash
-# Single node (recommended for basic tests)
-podman-compose -f docker-compose-single.yml up -d
-
-# Multi-node cluster (for advanced tests)
-podman-compose -f docker-compose.yml up -d
-```
-
-### Test Fixtures
-
-Common fixtures are defined in `conftest.py`:
-- `ensure_cassandra` - Session-scoped fixture that ensures Cassandra is running
-- `cluster` - Creates AsyncCluster connection
-- `session` - Creates test session with keyspace
-
-## Test Requirements
-
-- Cassandra 4.0+ (or ScyllaDB)
-- Docker or Podman with compose
-- Python packages: pytest, pytest-asyncio, async-cassandra
-
-## Debugging Tips
-
-1. **View Cassandra logs:**
-   ```bash
-   podman logs bulk-cassandra-1
-   ```
-
-2. **Check token ranges manually:**
-   ```bash
-   podman exec bulk-cassandra-1 nodetool describering bulk_test
-   ```
-
-3. **Run with verbose output:**
-   ```bash
-   pytest tests/integration --integration -v -s
-   ```
-
-4. **Run with coverage:**
-   ```bash
-   pytest tests/integration --integration --cov=bulk_operations
-   ```
diff --git a/libs/async-cassandra-bulk/examples/tests/integration/__init__.py b/libs/async-cassandra-bulk/examples/tests/integration/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/libs/async-cassandra-bulk/examples/tests/integration/conftest.py b/libs/async-cassandra-bulk/examples/tests/integration/conftest.py
deleted file mode 100644
index c4f43aa..0000000
--- a/libs/async-cassandra-bulk/examples/tests/integration/conftest.py
+++ /dev/null
@@ -1,87 +0,0 @@
-"""
-Shared configuration and fixtures for integration tests.
-"""
-
-import os
-import subprocess
-import time
-
-import pytest
-
-
-def is_cassandra_running():
-    """Check if Cassandra is accessible on localhost."""
-    try:
-        from cassandra.cluster import Cluster
-
-        cluster = Cluster(["localhost"])
-        session = cluster.connect()
-        session.shutdown()
-        cluster.shutdown()
-        return True
-    except Exception:
-        return False
-
-
-def start_cassandra_if_needed():
-    """Start Cassandra using docker-compose if not already running."""
-    if is_cassandra_running():
-        return True
-
-    # Try to start single-node Cassandra
-    compose_file = os.path.join(
-        os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "docker-compose-single.yml"
-    )
-
-    if not os.path.exists(compose_file):
-        return False
-
-    print("\nStarting Cassandra container for integration tests...")
-
-    # Try podman first, then docker
-    for cmd in ["podman-compose", "docker-compose"]:
-        try:
-            subprocess.run([cmd, "-f", compose_file, "up", "-d"], check=True, capture_output=True)
-            break
-        except (subprocess.CalledProcessError, FileNotFoundError):
-            continue
-    else:
-        print("Could not start Cassandra - neither podman-compose nor docker-compose found")
-        return False
-
-    # Wait for Cassandra to be ready
-    print("Waiting for Cassandra to be ready...")
-    for _i in range(60):  # Wait up to 60 seconds
-        if is_cassandra_running():
-            print("Cassandra is ready!")
-            return True
-        time.sleep(1)
-
-    print("Cassandra failed to start in time")
-    return False
-
-
-@pytest.fixture(scope="session", autouse=True)
-def ensure_cassandra():
-    """Ensure Cassandra is running for integration tests."""
-    if not start_cassandra_if_needed():
-        pytest.skip("Cassandra is not available for integration tests")
-
-
-# Skip integration tests if not explicitly requested
-def pytest_collection_modifyitems(config, items):
-    """Skip integration tests unless --integration flag is passed."""
-    if not config.getoption("--integration", default=False):
-        skip_integration = pytest.mark.skip(
-            reason="Integration tests not requested (use --integration flag)"
-        )
-        for item in items:
-            if "integration" in item.keywords:
-                item.add_marker(skip_integration)
-
-
-def pytest_addoption(parser):
-    """Add custom command line options."""
-    parser.addoption(
-        "--integration", action="store_true", default=False, help="Run integration tests"
-    )
diff --git a/libs/async-cassandra-bulk/examples/tests/integration/test_bulk_count.py b/libs/async-cassandra-bulk/examples/tests/integration/test_bulk_count.py
deleted file mode 100644
index 8c94b5d..0000000
--- a/libs/async-cassandra-bulk/examples/tests/integration/test_bulk_count.py
+++ /dev/null
@@ -1,354 +0,0 @@
-"""
-Integration tests for bulk count operations.
-
-What this tests:
----------------
-1. Full data coverage with token ranges (no missing/duplicate rows)
-2. Wraparound range handling
-3. Count accuracy across different data distributions
-4. Performance with parallelism
-
-Why this matters:
-----------------
-- Count is the simplest bulk operation - if it fails, everything fails
-- Proves our token range queries are correct
-- Gaps mean data loss in production
-- Duplicates mean incorrect counting
-- Critical for data integrity
-"""
-
-import asyncio
-
-import pytest
-
-from async_cassandra import AsyncCluster
-from bulk_operations.bulk_operator import TokenAwareBulkOperator
-
-
-@pytest.mark.integration
-class TestBulkCount:
-    """Test bulk count operations against real Cassandra cluster."""
-
-    @pytest.fixture
-    async def cluster(self):
-        """Create connection to test cluster."""
-        cluster = AsyncCluster(
-            contact_points=["localhost"],
-            port=9042,
-        )
-        yield cluster
-        await cluster.shutdown()
-
-    @pytest.fixture
-    async def session(self, cluster):
-        """Create test session with keyspace and table."""
-        session = await cluster.connect()
-
-        # Create test keyspace
-        await session.execute(
-            """
-            CREATE KEYSPACE IF NOT EXISTS bulk_test
-            WITH replication = {
-                'class': 'SimpleStrategy',
-                'replication_factor': 1
-            }
-        """
-        )
-
-        # Create test table
-        await session.execute(
-            """
-            CREATE TABLE IF NOT EXISTS bulk_test.test_data (
-                id INT PRIMARY KEY,
-                data TEXT,
-                value DOUBLE
-            )
-        """
-        )
-
-        # Clear any existing data
-        await session.execute("TRUNCATE bulk_test.test_data")
-
-        yield session
-
-    @pytest.mark.asyncio
-    async def test_full_table_coverage_with_token_ranges(self, session):
-        """
-        Test that token ranges cover all data without gaps or duplicates.
-
-        What this tests:
-        ---------------
-        1. Insert known dataset across token range
-        2. Count using token ranges
-        3. Verify exact match with direct count
-        4. No missing or duplicate rows
-
-        Why this matters:
-        ----------------
-        - Proves our token range queries are correct
-        - Gaps mean data loss in production
-        - Duplicates mean incorrect counting
-        - Critical for data integrity
-        """
-        # Insert test data with known count
-        insert_stmt = await session.prepare(
-            """
-            INSERT INTO bulk_test.test_data (id, data, value)
-            VALUES (?, ?, ?)
-        """
-        )
-
-        expected_count = 10000
-        print(f"\nInserting {expected_count} test rows...")
-
-        # Insert in batches for efficiency
-        batch_size = 100
-        for i in range(0, expected_count, batch_size):
-            tasks = []
-            for j in range(batch_size):
-                if i + j < expected_count:
-                    tasks.append(session.execute(insert_stmt, (i + j, f"data-{i+j}", float(i + j))))
-            await asyncio.gather(*tasks)
-
-        # Count using direct query
-        result = await session.execute("SELECT COUNT(*) FROM bulk_test.test_data")
-        direct_count = result.one().count
-        assert (
-            direct_count == expected_count
-        ), f"Direct count mismatch: {direct_count} vs {expected_count}"
-
-        # Count using token ranges
-        operator = TokenAwareBulkOperator(session)
-        token_count = await operator.count_by_token_ranges(
-            keyspace="bulk_test",
-            table="test_data",
-            split_count=16,  # Moderate splitting
-            parallelism=8,
-        )
-
-        print("\nCount comparison:")
-        print(f"  Direct count: {direct_count}")
-        print(f"  Token range count: {token_count}")
-
-        assert (
-            token_count == direct_count
-        ), f"Token range count mismatch: {token_count} vs {direct_count}"
-
-    @pytest.mark.asyncio
-    async def test_count_with_wraparound_ranges(self, session):
-        """
-        Test counting specifically with wraparound ranges.
-
-        What this tests:
-        ---------------
-        1. Insert data that falls in wraparound range
-        2. Verify wraparound range is properly split
-        3. Count includes all data
-        4. No double counting
-
-        Why this matters:
-        ----------------
-        - Wraparound ranges are tricky edge cases
-        - CQL doesn't support OR in token queries
-        - Must split into two queries properly
-        - Common source of bugs
-        """
-        # Insert test data
-        insert_stmt = await session.prepare(
-            """
-            INSERT INTO bulk_test.test_data (id, data, value)
-            VALUES (?, ?, ?)
-        """
-        )
-
-        # Insert data with IDs that we know will hash to extreme token values
-        test_ids = []
-        for i in range(50000, 60000):  # Test range that includes wraparound tokens
-            test_ids.append(i)
-
-        print(f"\nInserting {len(test_ids)} test rows...")
-        batch_size = 100
-        for i in range(0, len(test_ids), batch_size):
-            tasks = []
-            for j in range(batch_size):
-                if i + j < len(test_ids):
-                    id_val = test_ids[i + j]
-                    tasks.append(
-                        session.execute(insert_stmt, (id_val, f"data-{id_val}", float(id_val)))
-                    )
-            await asyncio.gather(*tasks)
-
-        # Get direct count
-        result = await session.execute("SELECT COUNT(*) FROM bulk_test.test_data")
-        direct_count = result.one().count
-
-        # Count using token ranges with different split counts
-        operator = TokenAwareBulkOperator(session)
-
-        for split_count in [4, 8, 16, 32]:
-            token_count = await operator.count_by_token_ranges(
-                keyspace="bulk_test",
-                table="test_data",
-                split_count=split_count,
-                parallelism=4,
-            )
-
-            print(f"\nSplit count {split_count}: {token_count} rows")
-            assert (
-                token_count == direct_count
-            ), f"Count mismatch with {split_count} splits: {token_count} vs {direct_count}"
-
-    @pytest.mark.asyncio
-    async def test_parallel_count_performance(self, session):
-        """
-        Test parallel execution improves count performance.
-
-        What this tests:
-        ---------------
-        1. Count performance with different parallelism levels
-        2. Results are consistent across parallelism levels
-        3. No deadlocks or timeouts
-        4. Higher parallelism provides benefit
-
-        Why this matters:
-        ----------------
-        - Parallel execution is the main benefit
-        - Must handle concurrent queries properly
-        - Performance validation
-        - Resource efficiency
-        """
-        # Insert more data for meaningful parallelism test
-        insert_stmt = await session.prepare(
-            """
-            INSERT INTO bulk_test.test_data (id, data, value)
-            VALUES (?, ?, ?)
-        """
-        )
-
-        # Clear and insert fresh data
-        await session.execute("TRUNCATE bulk_test.test_data")
-
-        row_count = 50000
-        print(f"\nInserting {row_count} rows for parallel test...")
-
-        batch_size = 500
-        for i in range(0, row_count, batch_size):
-            tasks = []
-            for j in range(batch_size):
-                if i + j < row_count:
-                    tasks.append(session.execute(insert_stmt, (i + j, f"data-{i+j}", float(i + j))))
-            await asyncio.gather(*tasks)
-
-        operator = TokenAwareBulkOperator(session)
-
-        # Test with different parallelism levels
-        import time
-
-        results = []
-        for parallelism in [1, 2, 4, 8]:
-            start_time = time.time()
-
-            count = await operator.count_by_token_ranges(
-                keyspace="bulk_test", table="test_data", split_count=32, parallelism=parallelism
-            )
-
-            duration = time.time() - start_time
-            results.append(
-                {
-                    "parallelism": parallelism,
-                    "count": count,
-                    "duration": duration,
-                    "rows_per_sec": count / duration,
-                }
-            )
-
-            print(f"\nParallelism {parallelism}:")
-            print(f"  Count: {count}")
-            print(f"  Duration: {duration:.2f}s")
-            print(f"  Rows/sec: {count/duration:,.0f}")
-
-        # All counts should be identical
-        counts = [r["count"] for r in results]
-        assert len(set(counts)) == 1, f"Inconsistent counts: {counts}"
-
-        # Higher parallelism should generally be faster
-        # (though not always due to overhead)
-        assert (
-            results[-1]["duration"] < results[0]["duration"] * 1.5
-        ), "Parallel execution not providing benefit"
-
-    @pytest.mark.asyncio
-    async def test_count_with_progress_callback(self, session):
-        """
-        Test progress callback during count operations.
-
-        What this tests:
-        ---------------
-        1. Progress callbacks are invoked correctly
-        2. Stats are accurate and updated
-        3. Progress percentage is calculated correctly
-        4. Final stats match actual results
-
-        Why this matters:
-        ----------------
-        - Users need progress feedback for long operations
-        - Stats help with monitoring and debugging
-        - Progress tracking enables better UX
-        - Critical for production observability
-        """
-        # Insert test data
-        insert_stmt = await session.prepare(
-            """
-            INSERT INTO bulk_test.test_data (id, data, value)
-            VALUES (?, ?, ?)
-        """
-        )
-
-        expected_count = 5000
-        for i in range(expected_count):
-            await session.execute(insert_stmt, (i, f"data-{i}", float(i)))
-
-        operator = TokenAwareBulkOperator(session)
-
-        # Track progress callbacks
-        progress_updates = []
-
-        def progress_callback(stats):
-            progress_updates.append(
-                {
-                    "rows": stats.rows_processed,
-                    "ranges_completed": stats.ranges_completed,
-                    "total_ranges": stats.total_ranges,
-                    "percentage": stats.progress_percentage,
-                }
-            )
-
-        # Count with progress tracking
-        count, stats = await operator.count_by_token_ranges_with_stats(
-            keyspace="bulk_test",
-            table="test_data",
-            split_count=8,
-            parallelism=4,
-            progress_callback=progress_callback,
-        )
-
-        print(f"\nProgress updates received: {len(progress_updates)}")
-        print(f"Final count: {count}")
-        print(
-            f"Final stats: rows={stats.rows_processed}, ranges={stats.ranges_completed}/{stats.total_ranges}"
-        )
-
-        # Verify results
-        assert count == expected_count, f"Count mismatch: {count} vs {expected_count}"
-        assert stats.rows_processed == expected_count
-        assert stats.ranges_completed == stats.total_ranges
-        assert stats.success is True
-        assert len(stats.errors) == 0
-        assert len(progress_updates) > 0, "No progress callbacks received"
-
-        # Verify progress increased monotonically
-        for i in range(1, len(progress_updates)):
-            assert (
-                progress_updates[i]["ranges_completed"]
-                >= progress_updates[i - 1]["ranges_completed"]
-            )
diff --git a/libs/async-cassandra-bulk/examples/tests/integration/test_bulk_export.py b/libs/async-cassandra-bulk/examples/tests/integration/test_bulk_export.py
deleted file mode 100644
index 35e5eef..0000000
--- a/libs/async-cassandra-bulk/examples/tests/integration/test_bulk_export.py
+++ /dev/null
@@ -1,382 +0,0 @@
-"""
-Integration tests for bulk export operations.
-
-What this tests:
----------------
-1. Export captures all rows exactly once
-2. Streaming doesn't exhaust memory
-3. Order within ranges is preserved
-4. Async iteration works correctly
-5. Export handles different data types
-
-Why this matters:
-----------------
-- Export must be complete and accurate
-- Memory efficiency critical for large tables
-- Streaming enables TB-scale exports
-- Foundation for Iceberg integration
-"""
-
-import asyncio
-
-import pytest
-
-from async_cassandra import AsyncCluster
-from bulk_operations.bulk_operator import TokenAwareBulkOperator
-
-
-@pytest.mark.integration
-class TestBulkExport:
-    """Test bulk export operations against real Cassandra cluster."""
-
-    @pytest.fixture
-    async def cluster(self):
-        """Create connection to test cluster."""
-        cluster = AsyncCluster(
-            contact_points=["localhost"],
-            port=9042,
-        )
-        yield cluster
-        await cluster.shutdown()
-
-    @pytest.fixture
-    async def session(self, cluster):
-        """Create test session with keyspace and table."""
-        session = await cluster.connect()
-
-        # Create test keyspace
-        await session.execute(
-            """
-            CREATE KEYSPACE IF NOT EXISTS bulk_test
-            WITH replication = {
-                'class': 'SimpleStrategy',
-                'replication_factor': 1
-            }
-        """
-        )
-
-        # Create test table
-        await session.execute(
-            """
-            CREATE TABLE IF NOT EXISTS bulk_test.test_data (
-                id INT PRIMARY KEY,
-                data TEXT,
-                value DOUBLE
-            )
-        """
-        )
-
-        # Clear any existing data
-        await session.execute("TRUNCATE bulk_test.test_data")
-
-        yield session
-
-    @pytest.mark.asyncio
-    async def test_export_streaming_completeness(self, session):
-        """
-        Test streaming export doesn't miss or duplicate data.
-
-        What this tests:
-        ---------------
-        1. Export captures all rows exactly once
-        2. Streaming doesn't exhaust memory
-        3. Order within ranges is preserved
-        4. Async iteration works correctly
-
-        Why this matters:
-        ----------------
-        - Export must be complete and accurate
-        - Memory efficiency critical for large tables
-        - Streaming enables TB-scale exports
-        - Foundation for Iceberg integration
-        """
-        # Use smaller dataset for export test
-        await session.execute("TRUNCATE bulk_test.test_data")
-
-        # Insert test data
-        insert_stmt = await session.prepare(
-            """
-            INSERT INTO bulk_test.test_data (id, data, value)
-            VALUES (?, ?, ?)
-        """
-        )
-
-        expected_ids = set(range(1000))
-        for i in expected_ids:
-            await session.execute(insert_stmt, (i, f"data-{i}", float(i)))
-
-        # Export using token ranges
-        operator = TokenAwareBulkOperator(session)
-
-        exported_ids = set()
-        row_count = 0
-
-        async for row in operator.export_by_token_ranges(
-            keyspace="bulk_test", table="test_data", split_count=16
-        ):
-            exported_ids.add(row.id)
-            row_count += 1
-
-            # Verify row data integrity
-            assert row.data == f"data-{row.id}"
-            assert row.value == float(row.id)
-
-        print("\nExport results:")
-        print(f"  Expected rows: {len(expected_ids)}")
-        print(f"  Exported rows: {row_count}")
-        print(f"  Unique IDs: {len(exported_ids)}")
-
-        # Verify completeness
-        assert row_count == len(
-            expected_ids
-        ), f"Row count mismatch: {row_count} vs {len(expected_ids)}"
-
-        assert exported_ids == expected_ids, (
-            f"Missing IDs: {expected_ids - exported_ids}, "
-            f"Duplicate IDs: {exported_ids - expected_ids}"
-        )
-
-    @pytest.mark.asyncio
-    async def test_export_with_wraparound_ranges(self, session):
-        """
-        Test export handles wraparound ranges correctly.
-
-        What this tests:
-        ---------------
-        1. Data in wraparound ranges is exported
-        2. No duplicates from split queries
-        3. All edge cases handled
-        4. Consistent with count operation
-
-        Why this matters:
-        ----------------
-        - Wraparound ranges are common with vnodes
-        - Export must handle same edge cases as count
-        - Data integrity is critical
-        - Foundation for all bulk operations
-        """
-        # Insert data that will span wraparound ranges
-        insert_stmt = await session.prepare(
-            """
-            INSERT INTO bulk_test.test_data (id, data, value)
-            VALUES (?, ?, ?)
-        """
-        )
-
-        # Insert data with various IDs to ensure coverage
-        test_data = {}
-        for i in range(0, 10000, 100):  # Sparse data to hit various ranges
-            test_data[i] = f"data-{i}"
-            await session.execute(insert_stmt, (i, test_data[i], float(i)))
-
-        # Export and verify
-        operator = TokenAwareBulkOperator(session)
-
-        exported_data = {}
-        async for row in operator.export_by_token_ranges(
-            keyspace="bulk_test",
-            table="test_data",
-            split_count=32,  # More splits to ensure wraparound handling
-        ):
-            exported_data[row.id] = row.data
-
-        print(f"\nExported {len(exported_data)} rows")
-        assert len(exported_data) == len(
-            test_data
-        ), f"Export count mismatch: {len(exported_data)} vs {len(test_data)}"
-
-        # Verify all data was exported correctly
-        for id_val, expected_data in test_data.items():
-            assert id_val in exported_data, f"Missing ID {id_val}"
-            assert (
-                exported_data[id_val] == expected_data
-            ), f"Data mismatch for ID {id_val}: {exported_data[id_val]} vs {expected_data}"
-
-    @pytest.mark.asyncio
-    async def test_export_memory_efficiency(self, session):
-        """
-        Test export streaming is memory efficient.
-
-        What this tests:
-        ---------------
-        1. Large exports don't consume excessive memory
-        2. Streaming works as expected
-        3. Can handle tables larger than memory
-        4. Progress tracking during export
-
-        Why this matters:
-        ----------------
-        - Production tables can be TB in size
-        - Must stream, not buffer all data
-        - Memory efficiency enables large exports
-        - Critical for operational feasibility
-        """
-        # Insert larger dataset
-        insert_stmt = await session.prepare(
-            """
-            INSERT INTO bulk_test.test_data (id, data, value)
-            VALUES (?, ?, ?)
-        """
-        )
-
-        row_count = 10000
-        print(f"\nInserting {row_count} rows for memory test...")
-
-        # Insert in batches
-        batch_size = 100
-        for i in range(0, row_count, batch_size):
-            tasks = []
-            for j in range(batch_size):
-                if i + j < row_count:
-                    # Create larger data values to test memory
-                    data = f"data-{i+j}" * 10  # Make data larger
-                    tasks.append(session.execute(insert_stmt, (i + j, data, float(i + j))))
-            await asyncio.gather(*tasks)
-
-        operator = TokenAwareBulkOperator(session)
-
-        # Track memory usage indirectly via row processing rate
-        rows_exported = 0
-        batch_timings = []
-
-        import time
-
-        start_time = time.time()
-        last_batch_time = start_time
-
-        async for _row in operator.export_by_token_ranges(
-            keyspace="bulk_test", table="test_data", split_count=16
-        ):
-            rows_exported += 1
-
-            # Track timing every 1000 rows
-            if rows_exported % 1000 == 0:
-                current_time = time.time()
-                batch_duration = current_time - last_batch_time
-                batch_timings.append(batch_duration)
-                last_batch_time = current_time
-                print(f"  Exported {rows_exported} rows...")
-
-        total_duration = time.time() - start_time
-
-        print("\nExport completed:")
-        print(f"  Total rows: {rows_exported}")
-        print(f"  Total time: {total_duration:.2f}s")
-        print(f"  Rows/sec: {rows_exported/total_duration:.0f}")
-
-        # Verify all rows exported
-        assert rows_exported == row_count, f"Export count mismatch: {rows_exported} vs {row_count}"
-
-        # Verify consistent performance (no major slowdowns from memory pressure)
-        if len(batch_timings) > 2:
-            avg_batch_time = sum(batch_timings) / len(batch_timings)
-            max_batch_time = max(batch_timings)
-            assert (
-                max_batch_time < avg_batch_time * 3
-            ), "Export performance degraded, possible memory issue"
-
-    @pytest.mark.asyncio
-    async def test_export_with_different_data_types(self, session):
-        """
-        Test export handles various CQL data types correctly.
-
-        What this tests:
-        ---------------
-        1. Different data types are exported correctly
-        2. NULL values handled properly
-        3. Collections exported accurately
-        4. Special characters preserved
-
-        Why this matters:
-        ----------------
-        - Real tables have diverse data types
-        - Export must preserve data fidelity
-        - Type handling affects Iceberg mapping
-        - Data integrity across formats
-        """
-        # Create table with various data types
-        await session.execute(
-            """
-            CREATE TABLE IF NOT EXISTS bulk_test.complex_data (
-                id INT PRIMARY KEY,
-                text_col TEXT,
-                int_col INT,
-                double_col DOUBLE,
-                bool_col BOOLEAN,
-                list_col LIST<TEXT>,
-                set_col SET<INT>,
-                map_col MAP<TEXT, INT>
-            )
-        """
-        )
-
-        await session.execute("TRUNCATE bulk_test.complex_data")
-
-        # Insert test data with various types
-        insert_stmt = await session.prepare(
-            """
-            INSERT INTO bulk_test.complex_data
-            (id, text_col, int_col, double_col, bool_col, list_col, set_col, map_col)
-            VALUES (?, ?, ?, ?, ?, ?, ?, ?)
-        """
-        )
-
-        test_data = [
-            (1, "normal text", 100, 1.5, True, ["a", "b", "c"], {1, 2, 3}, {"x": 1, "y": 2}),
-            (2, "special chars: 'quotes' \"double\" \n newline", -50, -2.5, False, [], set(), {}),
-            (3, None, None, None, None, None, None, None),  # NULL values
-            (4, "", 0, 0.0, True, [""], {0}, {"": 0}),  # Empty/zero values
-            (5, "unicode: 你好 🌟", 999999, 3.14159, False, ["α", "β", "γ"], {-1, -2}, {"π": 314}),
-        ]
-
-        for row in test_data:
-            await session.execute(insert_stmt, row)
-
-        # Export and verify
-        operator = TokenAwareBulkOperator(session)
-
-        exported_rows = []
-        async for row in operator.export_by_token_ranges(
-            keyspace="bulk_test", table="complex_data", split_count=4
-        ):
-            exported_rows.append(row)
-
-        print(f"\nExported {len(exported_rows)} rows with complex data types")
-        assert len(exported_rows) == len(
-            test_data
-        ), f"Export count mismatch: {len(exported_rows)} vs {len(test_data)}"
-
-        # Sort both by ID for comparison
-        exported_rows.sort(key=lambda r: r.id)
-        test_data.sort(key=lambda r: r[0])
-
-        # Verify each row's data
-        for exported, expected in zip(exported_rows, test_data, strict=False):
-            assert exported.id == expected[0]
-            assert exported.text_col == expected[1]
-            assert exported.int_col == expected[2]
-            assert exported.double_col == expected[3]
-            assert exported.bool_col == expected[4]
-
-            # Collections need special handling
-            # Note: Cassandra treats empty collections as NULL
-            if expected[5] is not None and expected[5] != []:
-                assert exported.list_col is not None, f"list_col is None for row {exported.id}"
-                assert list(exported.list_col) == expected[5]
-            else:
-                # Empty list or None in Cassandra returns as None
-                assert exported.list_col is None
-
-            if expected[6] is not None and expected[6] != set():
-                assert exported.set_col is not None, f"set_col is None for row {exported.id}"
-                assert set(exported.set_col) == expected[6]
-            else:
-                # Empty set or None in Cassandra returns as None
-                assert exported.set_col is None
-
-            if expected[7] is not None and expected[7] != {}:
-                assert exported.map_col is not None, f"map_col is None for row {exported.id}"
-                assert dict(exported.map_col) == expected[7]
-            else:
-                # Empty map or None in Cassandra returns as None
-                assert exported.map_col is None
diff --git a/libs/async-cassandra-bulk/examples/tests/integration/test_data_integrity.py b/libs/async-cassandra-bulk/examples/tests/integration/test_data_integrity.py
deleted file mode 100644
index 1e82a58..0000000
--- a/libs/async-cassandra-bulk/examples/tests/integration/test_data_integrity.py
+++ /dev/null
@@ -1,466 +0,0 @@
-"""
-Integration tests for data integrity - verifying inserted data is correctly returned.
-
-What this tests:
----------------
-1. Data inserted is exactly what gets exported
-2. All data types are preserved correctly
-3. No data corruption during token range queries
-4. Prepared statements maintain data integrity
-
-Why this matters:
-----------------
-- Proves end-to-end data correctness
-- Validates our token range implementation
-- Ensures no data loss or corruption
-- Critical for production confidence
-"""
-
-import asyncio
-import uuid
-from datetime import datetime
-from decimal import Decimal
-
-import pytest
-
-from async_cassandra import AsyncCluster
-from bulk_operations.bulk_operator import TokenAwareBulkOperator
-
-
-@pytest.mark.integration
-class TestDataIntegrity:
-    """Test that data inserted equals data exported."""
-
-    @pytest.fixture
-    async def cluster(self):
-        """Create connection to test cluster."""
-        cluster = AsyncCluster(
-            contact_points=["localhost"],
-            port=9042,
-        )
-        yield cluster
-        await cluster.shutdown()
-
-    @pytest.fixture
-    async def session(self, cluster):
-        """Create test session with keyspace and tables."""
-        session = await cluster.connect()
-
-        # Create test keyspace
-        await session.execute(
-            """
-            CREATE KEYSPACE IF NOT EXISTS bulk_test
-            WITH replication = {
-                'class': 'SimpleStrategy',
-                'replication_factor': 1
-            }
-        """
-        )
-
-        yield session
-
-    @pytest.mark.asyncio
-    async def test_simple_data_round_trip(self, session):
-        """
-        Test that simple data inserted is exactly what we get back.
-
-        What this tests:
-        ---------------
-        1. Insert known dataset with various values
-        2. Export using token ranges
-        3. Verify every field matches exactly
-        4. No missing or corrupted data
-
-        Why this matters:
-        ----------------
-        - Basic data integrity validation
-        - Ensures token range queries don't corrupt data
-        - Validates prepared statement parameter handling
-        - Foundation for trusting bulk operations
-        """
-        # Create a simple test table
-        await session.execute(
-            """
-            CREATE TABLE IF NOT EXISTS bulk_test.integrity_test (
-                id INT PRIMARY KEY,
-                name TEXT,
-                value DOUBLE,
-                active BOOLEAN
-            )
-        """
-        )
-
-        await session.execute("TRUNCATE bulk_test.integrity_test")
-
-        # Insert test data with prepared statement
-        insert_stmt = await session.prepare(
-            """
-            INSERT INTO bulk_test.integrity_test (id, name, value, active)
-            VALUES (?, ?, ?, ?)
-        """
-        )
-
-        # Create test dataset with various values
-        test_data = [
-            (1, "Alice", 100.5, True),
-            (2, "Bob", -50.25, False),
-            (3, "Charlie", 0.0, True),
-            (4, None, 999.999, None),  # Test NULLs
-            (5, "", -0.001, False),  # Empty string
-            (6, "Special chars: 'quotes' \"double\"", 3.14159, True),
-            (7, "Unicode: 你好 🌟", 2.71828, False),
-            (8, "Very long name " * 100, 1.23456, True),  # Long string
-        ]
-
-        # Insert all test data
-        for row in test_data:
-            await session.execute(insert_stmt, row)
-
-        # Export using bulk operator
-        operator = TokenAwareBulkOperator(session)
-        exported_data = []
-
-        async for row in operator.export_by_token_ranges(
-            keyspace="bulk_test",
-            table="integrity_test",
-            split_count=4,  # Use multiple ranges to test splitting
-        ):
-            exported_data.append((row.id, row.name, row.value, row.active))
-
-        # Sort both datasets by ID for comparison
-        test_data_sorted = sorted(test_data, key=lambda x: x[0])
-        exported_data_sorted = sorted(exported_data, key=lambda x: x[0])
-
-        # Verify we got all rows
-        assert len(exported_data_sorted) == len(
-            test_data_sorted
-        ), f"Row count mismatch: exported {len(exported_data_sorted)} vs inserted {len(test_data_sorted)}"
-
-        # Verify each row matches exactly
-        for inserted, exported in zip(test_data_sorted, exported_data_sorted, strict=False):
-            assert (
-                inserted == exported
-            ), f"Data mismatch for ID {inserted[0]}: inserted {inserted} vs exported {exported}"
-
-        print(f"\n✓ All {len(test_data)} rows verified - data integrity maintained")
-
-    @pytest.mark.asyncio
-    async def test_complex_data_types_round_trip(self, session):
-        """
-        Test complex CQL data types maintain integrity.
-
-        What this tests:
-        ---------------
-        1. Collections (list, set, map)
-        2. UUID types
-        3. Timestamp/date types
-        4. Decimal types
-        5. Large text/blob data
-
-        Why this matters:
-        ----------------
-        - Real tables use complex types
-        - Collections need special handling
-        - Precision must be maintained
-        - Production data is complex
-        """
-        # Create table with complex types
-        await session.execute(
-            """
-            CREATE TABLE IF NOT EXISTS bulk_test.complex_integrity (
-                id UUID PRIMARY KEY,
-                created TIMESTAMP,
-                amount DECIMAL,
-                tags SET<TEXT>,
-                metadata MAP<TEXT, INT>,
-                events LIST<TIMESTAMP>,
-                data BLOB
-            )
-        """
-        )
-
-        await session.execute("TRUNCATE bulk_test.complex_integrity")
-
-        # Insert test data
-        insert_stmt = await session.prepare(
-            """
-            INSERT INTO bulk_test.complex_integrity
-            (id, created, amount, tags, metadata, events, data)
-            VALUES (?, ?, ?, ?, ?, ?, ?)
-        """
-        )
-
-        # Create test data
-        test_id = uuid.uuid4()
-        test_created = datetime.utcnow().replace(microsecond=0)  # Cassandra timestamp precision
-        test_amount = Decimal("12345.6789")
-        test_tags = {"python", "cassandra", "async", "test"}
-        test_metadata = {"version": 1, "retries": 3, "timeout": 30}
-        test_events = [
-            datetime(2024, 1, 1, 10, 0, 0),
-            datetime(2024, 1, 2, 11, 30, 0),
-            datetime(2024, 1, 3, 15, 45, 0),
-        ]
-        test_data = b"Binary data with \x00 null bytes and \xff high bytes"
-
-        # Insert the data
-        await session.execute(
-            insert_stmt,
-            (
-                test_id,
-                test_created,
-                test_amount,
-                test_tags,
-                test_metadata,
-                test_events,
-                test_data,
-            ),
-        )
-
-        # Export and verify
-        operator = TokenAwareBulkOperator(session)
-        exported_rows = []
-
-        async for row in operator.export_by_token_ranges(
-            keyspace="bulk_test",
-            table="complex_integrity",
-            split_count=2,
-        ):
-            exported_rows.append(row)
-
-        # Should have exactly one row
-        assert len(exported_rows) == 1, f"Expected 1 row, got {len(exported_rows)}"
-
-        row = exported_rows[0]
-
-        # Verify each field
-        assert row.id == test_id, f"UUID mismatch: {row.id} vs {test_id}"
-        assert row.created == test_created, f"Timestamp mismatch: {row.created} vs {test_created}"
-        assert row.amount == test_amount, f"Decimal mismatch: {row.amount} vs {test_amount}"
-        assert set(row.tags) == test_tags, f"Set mismatch: {set(row.tags)} vs {test_tags}"
-        assert (
-            dict(row.metadata) == test_metadata
-        ), f"Map mismatch: {dict(row.metadata)} vs {test_metadata}"
-        assert (
-            list(row.events) == test_events
-        ), f"List mismatch: {list(row.events)} vs {test_events}"
-        assert bytes(row.data) == test_data, f"Blob mismatch: {bytes(row.data)} vs {test_data}"
-
-        print("\n✓ Complex data types verified - all types preserved correctly")
-
-    @pytest.mark.asyncio
-    async def test_large_dataset_integrity(self, session):  # noqa: C901
-        """
-        Test integrity with larger dataset across many token ranges.
-
-        What this tests:
-        ---------------
-        1. 50K rows with computed values
-        2. Verify no rows lost in token ranges
-        3. Verify no duplicate rows
-        4. Check computed values match
-
-        Why this matters:
-        ----------------
-        - Production tables are large
-        - Token range bugs appear at scale
-        - Wraparound ranges must work correctly
-        - Performance under load
-        """
-        # Create table
-        await session.execute(
-            """
-            CREATE TABLE IF NOT EXISTS bulk_test.large_integrity (
-                id INT PRIMARY KEY,
-                computed_value DOUBLE,
-                hash_value TEXT
-            )
-        """
-        )
-
-        await session.execute("TRUNCATE bulk_test.large_integrity")
-
-        # Insert data with computed values
-        insert_stmt = await session.prepare(
-            """
-            INSERT INTO bulk_test.large_integrity (id, computed_value, hash_value)
-            VALUES (?, ?, ?)
-        """
-        )
-
-        # Function to compute expected values
-        def compute_value(id_val):
-            return float(id_val * 3.14159 + id_val**0.5)
-
-        def compute_hash(id_val):
-            return f"hash_{id_val % 1000:03d}_{id_val}"
-
-        # Insert 50K rows in batches
-        total_rows = 50000
-        batch_size = 1000
-
-        print(f"\nInserting {total_rows} rows for large dataset test...")
-
-        for batch_start in range(0, total_rows, batch_size):
-            tasks = []
-            for i in range(batch_start, min(batch_start + batch_size, total_rows)):
-                tasks.append(
-                    session.execute(
-                        insert_stmt,
-                        (
-                            i,
-                            compute_value(i),
-                            compute_hash(i),
-                        ),
-                    )
-                )
-            await asyncio.gather(*tasks)
-
-            if (batch_start + batch_size) % 10000 == 0:
-                print(f"  Inserted {batch_start + batch_size} rows...")
-
-        # Export all data
-        operator = TokenAwareBulkOperator(session)
-        exported_ids = set()
-        value_mismatches = []
-        hash_mismatches = []
-
-        print("\nExporting and verifying data...")
-
-        async for row in operator.export_by_token_ranges(
-            keyspace="bulk_test",
-            table="large_integrity",
-            split_count=32,  # Many splits to test range handling
-        ):
-            # Check for duplicates
-            if row.id in exported_ids:
-                pytest.fail(f"Duplicate ID exported: {row.id}")
-            exported_ids.add(row.id)
-
-            # Verify computed values
-            expected_value = compute_value(row.id)
-            if abs(row.computed_value - expected_value) > 0.0001:  # Float precision
-                value_mismatches.append((row.id, row.computed_value, expected_value))
-
-            expected_hash = compute_hash(row.id)
-            if row.hash_value != expected_hash:
-                hash_mismatches.append((row.id, row.hash_value, expected_hash))
-
-        # Verify completeness
-        assert (
-            len(exported_ids) == total_rows
-        ), f"Missing rows: exported {len(exported_ids)} vs inserted {total_rows}"
-
-        # Check for missing IDs
-        expected_ids = set(range(total_rows))
-        missing_ids = expected_ids - exported_ids
-        if missing_ids:
-            pytest.fail(f"Missing IDs: {sorted(list(missing_ids))[:10]}...")  # Show first 10
-
-        # Check for value mismatches
-        if value_mismatches:
-            pytest.fail(f"Value mismatches found: {value_mismatches[:5]}...")  # Show first 5
-
-        if hash_mismatches:
-            pytest.fail(f"Hash mismatches found: {hash_mismatches[:5]}...")  # Show first 5
-
-        print(f"\n✓ All {total_rows} rows verified - large dataset integrity maintained")
-        print("  - No missing rows")
-        print("  - No duplicate rows")
-        print("  - All computed values correct")
-        print("  - All hash values correct")
-
-    @pytest.mark.asyncio
-    async def test_wraparound_range_data_integrity(self, session):
-        """
-        Test data integrity specifically for wraparound token ranges.
-
-        What this tests:
-        ---------------
-        1. Insert data with known tokens that span wraparound
-        2. Verify wraparound range handling preserves data
-        3. No data lost at ring boundaries
-        4. Prepared statements work correctly with wraparound
-
-        Why this matters:
-        ----------------
-        - Wraparound ranges are error-prone
-        - Must split into two queries correctly
-        - Data at ring boundaries is critical
-        - Common source of data loss bugs
-        """
-        # Create table
-        await session.execute(
-            """
-            CREATE TABLE IF NOT EXISTS bulk_test.wraparound_test (
-                id INT PRIMARY KEY,
-                token_value BIGINT,
-                data TEXT
-            )
-        """
-        )
-
-        await session.execute("TRUNCATE bulk_test.wraparound_test")
-
-        # First, let's find some IDs that hash to extreme token values
-        print("\nFinding IDs with extreme token values...")
-
-        # Insert some data and check their tokens
-        insert_stmt = await session.prepare(
-            """
-            INSERT INTO bulk_test.wraparound_test (id, token_value, data)
-            VALUES (?, ?, ?)
-        """
-        )
-
-        # Try different IDs to find ones with extreme tokens
-        test_ids = []
-        for i in range(100000, 200000):
-            # First insert a dummy row to query the token
-            await session.execute(insert_stmt, (i, 0, f"dummy_{i}"))
-            result = await session.execute(
-                f"SELECT token(id) as t FROM bulk_test.wraparound_test WHERE id = {i}"
-            )
-            row = result.one()
-            if row:
-                token = row.t
-                # Remove the dummy row
-                await session.execute(f"DELETE FROM bulk_test.wraparound_test WHERE id = {i}")
-
-                # Look for very high positive or very low negative tokens
-                if token > 9000000000000000000 or token < -9000000000000000000:
-                    test_ids.append((i, token))
-                    await session.execute(insert_stmt, (i, token, f"data_{i}"))
-
-            if len(test_ids) >= 20:
-                break
-
-        print(f"  Found {len(test_ids)} IDs with extreme tokens")
-
-        # Export and verify
-        operator = TokenAwareBulkOperator(session)
-        exported_data = {}
-
-        async for row in operator.export_by_token_ranges(
-            keyspace="bulk_test",
-            table="wraparound_test",
-            split_count=8,
-        ):
-            exported_data[row.id] = (row.token_value, row.data)
-
-        # Verify all data was exported
-        for id_val, token_val in test_ids:
-            assert id_val in exported_data, f"Missing ID {id_val} with token {token_val}"
-
-            exported_token, exported_data_val = exported_data[id_val]
-            assert (
-                exported_token == token_val
-            ), f"Token mismatch for ID {id_val}: {exported_token} vs {token_val}"
-            assert (
-                exported_data_val == f"data_{id_val}"
-            ), f"Data mismatch for ID {id_val}: {exported_data_val} vs data_{id_val}"
-
-        print("\n✓ Wraparound range data integrity verified")
-        print(f"  - All {len(test_ids)} extreme token rows exported correctly")
-        print("  - Token values preserved")
-        print("  - Data values preserved")
diff --git a/libs/async-cassandra-bulk/examples/tests/integration/test_export_formats.py b/libs/async-cassandra-bulk/examples/tests/integration/test_export_formats.py
deleted file mode 100644
index eedf0ee..0000000
--- a/libs/async-cassandra-bulk/examples/tests/integration/test_export_formats.py
+++ /dev/null
@@ -1,449 +0,0 @@
-"""
-Integration tests for export formats.
-
-What this tests:
----------------
-1. CSV export with real data
-2. JSON export formats (JSONL and array)
-3. Parquet export with schema mapping
-4. Compression options
-5. Data integrity across formats
-
-Why this matters:
-----------------
-- Export formats are critical for data pipelines
-- Each format has different use cases
-- Parquet is foundation for Iceberg
-- Must preserve data types correctly
-"""
-
-import csv
-import gzip
-import json
-
-import pytest
-
-try:
-    import pyarrow.parquet as pq
-
-    PYARROW_AVAILABLE = True
-except ImportError:
-    PYARROW_AVAILABLE = False
-
-from async_cassandra import AsyncCluster
-from bulk_operations.bulk_operator import TokenAwareBulkOperator
-
-
-@pytest.mark.integration
-class TestExportFormats:
-    """Test export to different formats."""
-
-    @pytest.fixture
-    async def cluster(self):
-        """Create connection to test cluster."""
-        cluster = AsyncCluster(
-            contact_points=["localhost"],
-            port=9042,
-        )
-        yield cluster
-        await cluster.shutdown()
-
-    @pytest.fixture
-    async def session(self, cluster):
-        """Create test session with test data."""
-        session = await cluster.connect()
-
-        # Create test keyspace
-        await session.execute(
-            """
-            CREATE KEYSPACE IF NOT EXISTS export_test
-            WITH replication = {
-                'class': 'SimpleStrategy',
-                'replication_factor': 1
-            }
-        """
-        )
-
-        # Create test table with various types
-        await session.execute(
-            """
-            CREATE TABLE IF NOT EXISTS export_test.data_types (
-                id INT PRIMARY KEY,
-                text_val TEXT,
-                int_val INT,
-                float_val FLOAT,
-                bool_val BOOLEAN,
-                list_val LIST<TEXT>,
-                set_val SET<INT>,
-                map_val MAP<TEXT, TEXT>,
-                null_val TEXT
-            )
-        """
-        )
-
-        # Clear and insert test data
-        await session.execute("TRUNCATE export_test.data_types")
-
-        insert_stmt = await session.prepare(
-            """
-            INSERT INTO export_test.data_types
-            (id, text_val, int_val, float_val, bool_val,
-             list_val, set_val, map_val, null_val)
-            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
-        """
-        )
-
-        # Insert diverse test data
-        test_data = [
-            (1, "test1", 100, 1.5, True, ["a", "b"], {1, 2}, {"k1": "v1"}, None),
-            (2, "test2", -50, -2.5, False, [], None, {}, None),
-            (3, "special'chars\"test", 0, 0.0, True, None, {0}, None, None),
-            (4, "unicode_test_你好", 999, 3.14, False, ["x"], {-1}, {"k": "v"}, None),
-        ]
-
-        for row in test_data:
-            await session.execute(insert_stmt, row)
-
-        yield session
-
-    @pytest.mark.asyncio
-    async def test_csv_export_basic(self, session, tmp_path):
-        """
-        Test basic CSV export functionality.
-
-        What this tests:
-        ---------------
-        1. CSV export creates valid file
-        2. All rows are exported
-        3. Data types are properly serialized
-        4. NULL values handled correctly
-
-        Why this matters:
-        ----------------
-        - CSV is most common export format
-        - Must work with Excel and other tools
-        - Data integrity is critical
-        """
-        operator = TokenAwareBulkOperator(session)
-        output_path = tmp_path / "test.csv"
-
-        # Export to CSV
-        result = await operator.export_to_csv(
-            keyspace="export_test",
-            table="data_types",
-            output_path=output_path,
-        )
-
-        # Verify file exists
-        assert output_path.exists()
-        assert result.rows_exported == 4
-
-        # Read and verify content
-        with open(output_path) as f:
-            reader = csv.DictReader(f)
-            rows = list(reader)
-
-        assert len(rows) == 4
-
-        # Verify first row
-        row1 = rows[0]
-        assert row1["id"] == "1"
-        assert row1["text_val"] == "test1"
-        assert row1["int_val"] == "100"
-        assert row1["float_val"] == "1.5"
-        assert row1["bool_val"] == "true"
-        assert "[a, b]" in row1["list_val"]
-        assert row1["null_val"] == ""  # Default NULL representation
-
-    @pytest.mark.asyncio
-    async def test_csv_export_compressed(self, session, tmp_path):
-        """
-        Test CSV export with compression.
-
-        What this tests:
-        ---------------
-        1. Gzip compression works
-        2. File has correct extension
-        3. Compressed data is valid
-        4. Size reduction achieved
-
-        Why this matters:
-        ----------------
-        - Large exports need compression
-        - Network transfer efficiency
-        - Storage cost reduction
-        """
-        operator = TokenAwareBulkOperator(session)
-        output_path = tmp_path / "test.csv"
-
-        # Export with compression
-        await operator.export_to_csv(
-            keyspace="export_test",
-            table="data_types",
-            output_path=output_path,
-            compression="gzip",
-        )
-
-        # Verify compressed file
-        compressed_path = output_path.with_suffix(".csv.gzip")
-        assert compressed_path.exists()
-
-        # Read compressed content
-        with gzip.open(compressed_path, "rt") as f:
-            reader = csv.DictReader(f)
-            rows = list(reader)
-
-        assert len(rows) == 4
-
-    @pytest.mark.asyncio
-    async def test_json_export_line_delimited(self, session, tmp_path):
-        """
-        Test JSON line-delimited export.
-
-        What this tests:
-        ---------------
-        1. JSONL format (one JSON per line)
-        2. Each line is valid JSON
-        3. Data types preserved
-        4. Collections handled correctly
-
-        Why this matters:
-        ----------------
-        - JSONL works with streaming tools
-        - Each line can be processed independently
-        - Better for large datasets
-        """
-        operator = TokenAwareBulkOperator(session)
-        output_path = tmp_path / "test.jsonl"
-
-        # Export as JSONL
-        result = await operator.export_to_json(
-            keyspace="export_test",
-            table="data_types",
-            output_path=output_path,
-            format_mode="jsonl",
-        )
-
-        assert output_path.exists()
-        assert result.rows_exported == 4
-
-        # Read and verify JSONL
-        with open(output_path) as f:
-            lines = f.readlines()
-
-        assert len(lines) == 4
-
-        # Parse each line
-        rows = [json.loads(line) for line in lines]
-
-        # Verify data types
-        row1 = rows[0]
-        assert row1["id"] == 1
-        assert row1["text_val"] == "test1"
-        assert row1["bool_val"] is True
-        assert row1["list_val"] == ["a", "b"]
-        assert row1["set_val"] == [1, 2]  # Sets become lists in JSON
-        assert row1["map_val"] == {"k1": "v1"}
-        assert row1["null_val"] is None
-
-    @pytest.mark.asyncio
-    async def test_json_export_array(self, session, tmp_path):
-        """
-        Test JSON array export.
-
-        What this tests:
-        ---------------
-        1. Valid JSON array format
-        2. Proper array structure
-        3. Pretty printing option
-        4. Complete document
-
-        Why this matters:
-        ----------------
-        - Some APIs expect JSON arrays
-        - Easier for small datasets
-        - Human readable with indent
-        """
-        operator = TokenAwareBulkOperator(session)
-        output_path = tmp_path / "test.json"
-
-        # Export as JSON array
-        await operator.export_to_json(
-            keyspace="export_test",
-            table="data_types",
-            output_path=output_path,
-            format_mode="array",
-            indent=2,
-        )
-
-        assert output_path.exists()
-
-        # Read and parse JSON
-        with open(output_path) as f:
-            data = json.load(f)
-
-        assert isinstance(data, list)
-        assert len(data) == 4
-
-        # Verify structure
-        assert all(isinstance(row, dict) for row in data)
-
-    @pytest.mark.asyncio
-    @pytest.mark.skipif(not PYARROW_AVAILABLE, reason="PyArrow not installed")
-    async def test_parquet_export(self, session, tmp_path):
-        """
-        Test Parquet export - foundation for Iceberg.
-
-        What this tests:
-        ---------------
-        1. Valid Parquet file created
-        2. Schema correctly mapped
-        3. Data types preserved
-        4. Row groups created
-
-        Why this matters:
-        ----------------
-        - Parquet is THE format for Iceberg
-        - Columnar storage for analytics
-        - Schema evolution support
-        - Excellent compression
-        """
-        operator = TokenAwareBulkOperator(session)
-        output_path = tmp_path / "test.parquet"
-
-        # Export to Parquet
-        result = await operator.export_to_parquet(
-            keyspace="export_test",
-            table="data_types",
-            output_path=output_path,
-            row_group_size=2,  # Small for testing
-        )
-
-        assert output_path.exists()
-        assert result.rows_exported == 4
-
-        # Read Parquet file
-        table = pq.read_table(output_path)
-
-        # Verify schema
-        schema = table.schema
-        assert "id" in schema.names
-        assert "text_val" in schema.names
-        assert "bool_val" in schema.names
-
-        # Verify data
-        df = table.to_pandas()
-        assert len(df) == 4
-
-        # Check data types preserved
-        assert df.loc[0, "id"] == 1
-        assert df.loc[0, "text_val"] == "test1"
-        assert df.loc[0, "bool_val"] is True or df.loc[0, "bool_val"] == 1  # numpy bool comparison
-
-        # Verify row groups
-        parquet_file = pq.ParquetFile(output_path)
-        assert parquet_file.num_row_groups == 2  # 4 rows / 2 per group
-
-    @pytest.mark.asyncio
-    async def test_export_with_column_selection(self, session, tmp_path):
-        """
-        Test exporting specific columns only.
-
-        What this tests:
-        ---------------
-        1. Column selection works
-        2. Only selected columns exported
-        3. Order preserved
-        4. Works across all formats
-
-        Why this matters:
-        ----------------
-        - Reduce export size
-        - Privacy/security (exclude sensitive columns)
-        - Performance optimization
-        """
-        operator = TokenAwareBulkOperator(session)
-        columns = ["id", "text_val", "bool_val"]
-
-        # Test CSV
-        csv_path = tmp_path / "selected.csv"
-        await operator.export_to_csv(
-            keyspace="export_test",
-            table="data_types",
-            output_path=csv_path,
-            columns=columns,
-        )
-
-        with open(csv_path) as f:
-            reader = csv.DictReader(f)
-            row = next(reader)
-            assert set(row.keys()) == set(columns)
-
-        # Test JSON
-        json_path = tmp_path / "selected.jsonl"
-        await operator.export_to_json(
-            keyspace="export_test",
-            table="data_types",
-            output_path=json_path,
-            columns=columns,
-        )
-
-        with open(json_path) as f:
-            row = json.loads(f.readline())
-            assert set(row.keys()) == set(columns)
-
-    @pytest.mark.asyncio
-    async def test_export_progress_tracking(self, session, tmp_path):
-        """
-        Test progress tracking and resume capability.
-
-        What this tests:
-        ---------------
-        1. Progress callbacks invoked
-        2. Progress saved to file
-        3. Resume information correct
-        4. Stats accurately tracked
-
-        Why this matters:
-        ----------------
-        - Long exports need monitoring
-        - Resume saves time on failures
-        - Users need feedback
-        """
-        operator = TokenAwareBulkOperator(session)
-        output_path = tmp_path / "progress_test.csv"
-
-        progress_updates = []
-
-        async def track_progress(progress):
-            progress_updates.append(
-                {
-                    "rows": progress.rows_exported,
-                    "bytes": progress.bytes_written,
-                    "percentage": progress.progress_percentage,
-                }
-            )
-
-        # Export with progress tracking
-        result = await operator.export_to_csv(
-            keyspace="export_test",
-            table="data_types",
-            output_path=output_path,
-            progress_callback=track_progress,
-        )
-
-        # Verify progress was tracked
-        assert len(progress_updates) > 0
-        assert result.rows_exported == 4
-        assert result.bytes_written > 0
-
-        # Verify progress file
-        progress_file = output_path.with_suffix(".csv.progress")
-        assert progress_file.exists()
-
-        # Load and verify progress
-        from bulk_operations.exporters import ExportProgress
-
-        loaded = ExportProgress.load(progress_file)
-        assert loaded.rows_exported == 4
-        assert loaded.is_complete
diff --git a/libs/async-cassandra-bulk/examples/tests/integration/test_token_discovery.py b/libs/async-cassandra-bulk/examples/tests/integration/test_token_discovery.py
deleted file mode 100644
index b99115f..0000000
--- a/libs/async-cassandra-bulk/examples/tests/integration/test_token_discovery.py
+++ /dev/null
@@ -1,198 +0,0 @@
-"""
-Integration tests for token range discovery with vnodes.
-
-What this tests:
----------------
-1. Token range discovery matches cluster vnodes configuration
-2. Validation against nodetool describering output
-3. Token distribution across nodes
-4. Non-overlapping and complete token coverage
-
-Why this matters:
-----------------
-- Vnodes create hundreds of non-contiguous ranges
-- Token metadata must match cluster reality
-- Incorrect discovery means data loss
-- Production clusters always use vnodes
-"""
-
-import subprocess
-from collections import defaultdict
-
-import pytest
-
-from async_cassandra import AsyncCluster
-from bulk_operations.token_utils import TOTAL_TOKEN_RANGE, discover_token_ranges
-
-
-@pytest.mark.integration
-class TestTokenDiscovery:
-    """Test token range discovery against real Cassandra cluster."""
-
-    @pytest.fixture
-    async def cluster(self):
-        """Create connection to test cluster."""
-        # Connect to all three nodes
-        cluster = AsyncCluster(
-            contact_points=["localhost", "127.0.0.1", "127.0.0.2"],
-            port=9042,
-        )
-        yield cluster
-        await cluster.shutdown()
-
-    @pytest.fixture
-    async def session(self, cluster):
-        """Create test session with keyspace."""
-        session = await cluster.connect()
-
-        # Create test keyspace
-        await session.execute(
-            """
-            CREATE KEYSPACE IF NOT EXISTS bulk_test
-            WITH replication = {
-                'class': 'SimpleStrategy',
-                'replication_factor': 3
-            }
-        """
-        )
-
-        yield session
-
-    @pytest.mark.asyncio
-    async def test_token_range_discovery_with_vnodes(self, session):
-        """
-        Test token range discovery matches cluster vnodes configuration.
-
-        What this tests:
-        ---------------
-        1. Number of ranges matches vnode configuration
-        2. Each node owns approximately equal ranges
-        3. All ranges have correct replica information
-        4. Token ranges are non-overlapping and complete
-
-        Why this matters:
-        ----------------
-        - With 256 vnodes × 3 nodes = ~768 ranges expected
-        - Vnodes distribute ownership across the ring
-        - Incorrect discovery means data loss
-        - Must handle non-contiguous ownership correctly
-        """
-        ranges = await discover_token_ranges(session, "bulk_test")
-
-        # With 3 nodes and 256 vnodes each, expect many ranges
-        # Due to replication factor 3, each range has 3 replicas
-        assert len(ranges) > 100, f"Expected many ranges with vnodes, got {len(ranges)}"
-
-        # Count ranges per node
-        ranges_per_node = defaultdict(int)
-        for r in ranges:
-            for replica in r.replicas:
-                ranges_per_node[replica] += 1
-
-        print(f"\nToken ranges discovered: {len(ranges)}")
-        print("Ranges per node:")
-        for node, count in sorted(ranges_per_node.items()):
-            print(f"  {node}: {count} ranges")
-
-        # Each node should own approximately the same number of ranges
-        counts = list(ranges_per_node.values())
-        if len(counts) >= 3:
-            avg_count = sum(counts) / len(counts)
-            for count in counts:
-                # Allow 20% variance
-                assert (
-                    0.8 * avg_count <= count <= 1.2 * avg_count
-                ), f"Uneven distribution: {ranges_per_node}"
-
-        # Verify ranges cover the entire ring
-        sorted_ranges = sorted(ranges, key=lambda r: r.start)
-
-        # With vnodes, tokens are randomly distributed, so the first range
-        # won't necessarily start at MIN_TOKEN. What matters is:
-        # 1. No gaps between consecutive ranges
-        # 2. The last range wraps around to the first range
-        # 3. Total coverage equals the token space
-
-        # Check for gaps or overlaps between consecutive ranges
-        gaps = 0
-        for i in range(len(sorted_ranges) - 1):
-            current = sorted_ranges[i]
-            next_range = sorted_ranges[i + 1]
-
-            # Ranges should be contiguous
-            if current.end != next_range.start:
-                gaps += 1
-                print(f"Gap found: {current.end} to {next_range.start}")
-
-        assert gaps == 0, f"Found {gaps} gaps in token ranges"
-
-        # Verify the last range wraps around to the first
-        assert sorted_ranges[-1].end == sorted_ranges[0].start, (
-            f"Ring not closed: last range ends at {sorted_ranges[-1].end}, "
-            f"first range starts at {sorted_ranges[0].start}"
-        )
-
-        # Verify total coverage
-        total_size = sum(r.size for r in ranges)
-        # Allow for small rounding differences
-        assert abs(total_size - TOTAL_TOKEN_RANGE) <= len(
-            ranges
-        ), f"Total coverage {total_size} differs from expected {TOTAL_TOKEN_RANGE}"
-
-    @pytest.mark.asyncio
-    async def test_compare_with_nodetool_describering(self, session):
-        """
-        Compare discovered ranges with nodetool describering output.
-
-        What this tests:
-        ---------------
-        1. Our discovery matches nodetool output
-        2. Token boundaries are correct
-        3. Replica assignments match
-        4. No missing or extra ranges
-
-        Why this matters:
-        ----------------
-        - nodetool is the source of truth
-        - Mismatches indicate bugs in discovery
-        - Critical for production reliability
-        - Validates driver metadata accuracy
-        """
-        ranges = await discover_token_ranges(session, "bulk_test")
-
-        # Get nodetool output from first node
-        try:
-            result = subprocess.run(
-                ["podman", "exec", "bulk-cassandra-1", "nodetool", "describering", "bulk_test"],
-                capture_output=True,
-                text=True,
-                check=True,
-            )
-            nodetool_output = result.stdout
-        except subprocess.CalledProcessError:
-            # Try docker if podman fails
-            try:
-                result = subprocess.run(
-                    ["docker", "exec", "bulk-cassandra-1", "nodetool", "describering", "bulk_test"],
-                    capture_output=True,
-                    text=True,
-                    check=True,
-                )
-                nodetool_output = result.stdout
-            except subprocess.CalledProcessError as e:
-                pytest.skip(f"Cannot run nodetool: {e}")
-
-        print("\nNodetool describering output (first 20 lines):")
-        print("\n".join(nodetool_output.split("\n")[:20]))
-
-        # Parse token count from nodetool output
-        token_ranges_in_output = nodetool_output.count("TokenRange")
-
-        print("\nComparison:")
-        print(f"  Discovered ranges: {len(ranges)}")
-        print(f"  Nodetool ranges: {token_ranges_in_output}")
-
-        # Should have same number of ranges (allowing small variance)
-        assert (
-            abs(len(ranges) - token_ranges_in_output) <= 5
-        ), f"Mismatch in range count: discovered {len(ranges)} vs nodetool {token_ranges_in_output}"
diff --git a/libs/async-cassandra-bulk/examples/tests/integration/test_token_splitting.py b/libs/async-cassandra-bulk/examples/tests/integration/test_token_splitting.py
deleted file mode 100644
index 72bc290..0000000
--- a/libs/async-cassandra-bulk/examples/tests/integration/test_token_splitting.py
+++ /dev/null
@@ -1,283 +0,0 @@
-"""
-Integration tests for token range splitting functionality.
-
-What this tests:
----------------
-1. Token range splitting with different strategies
-2. Proportional splitting based on range sizes
-3. Handling of very small ranges (vnodes)
-4. Replica-aware clustering
-
-Why this matters:
-----------------
-- Efficient parallelism requires good splitting
-- Vnodes create many small ranges that shouldn't be over-split
-- Replica clustering improves coordinator efficiency
-- Performance optimization foundation
-"""
-
-import pytest
-
-from async_cassandra import AsyncCluster
-from bulk_operations.token_utils import TokenRangeSplitter, discover_token_ranges
-
-
-@pytest.mark.integration
-class TestTokenSplitting:
-    """Test token range splitting strategies."""
-
-    @pytest.fixture
-    async def cluster(self):
-        """Create connection to test cluster."""
-        cluster = AsyncCluster(
-            contact_points=["localhost"],
-            port=9042,
-        )
-        yield cluster
-        await cluster.shutdown()
-
-    @pytest.fixture
-    async def session(self, cluster):
-        """Create test session with keyspace."""
-        session = await cluster.connect()
-
-        # Create test keyspace
-        await session.execute(
-            """
-            CREATE KEYSPACE IF NOT EXISTS bulk_test
-            WITH replication = {
-                'class': 'SimpleStrategy',
-                'replication_factor': 1
-            }
-        """
-        )
-
-        yield session
-
-    @pytest.mark.asyncio
-    async def test_token_range_splitting_with_vnodes(self, session):
-        """
-        Test that splitting handles vnode token ranges correctly.
-
-        What this tests:
-        ---------------
-        1. Natural ranges from vnodes are small
-        2. Splitting respects range boundaries
-        3. Very small ranges aren't over-split
-        4. Large splits still cover all ranges
-
-        Why this matters:
-        ----------------
-        - Vnodes create many small ranges
-        - Over-splitting causes overhead
-        - Under-splitting reduces parallelism
-        - Must balance performance
-        """
-        ranges = await discover_token_ranges(session, "bulk_test")
-        splitter = TokenRangeSplitter()
-
-        # Test different split counts
-        for split_count in [10, 50, 100, 500]:
-            splits = splitter.split_proportionally(ranges, split_count)
-
-            print(f"\nSplitting {len(ranges)} ranges into {split_count} splits:")
-            print(f"  Actual splits: {len(splits)}")
-
-            # Verify coverage
-            total_size = sum(r.size for r in ranges)
-            split_size = sum(s.size for s in splits)
-
-            assert split_size == total_size, f"Split size mismatch: {split_size} vs {total_size}"
-
-            # With vnodes, we might not achieve the exact split count
-            # because many ranges are too small to split
-            if split_count < len(ranges):
-                assert (
-                    len(splits) >= split_count * 0.5
-                ), f"Too few splits: {len(splits)} (wanted ~{split_count})"
-
-    @pytest.mark.asyncio
-    async def test_single_range_splitting(self, session):
-        """
-        Test splitting of individual token ranges.
-
-        What this tests:
-        ---------------
-        1. Single range can be split evenly
-        2. Last split gets remainder
-        3. Small ranges aren't over-split
-        4. Split boundaries are correct
-
-        Why this matters:
-        ----------------
-        - Foundation of proportional splitting
-        - Must handle edge cases correctly
-        - Affects query generation
-        - Performance depends on even distribution
-        """
-        ranges = await discover_token_ranges(session, "bulk_test")
-        splitter = TokenRangeSplitter()
-
-        # Find a reasonably large range to test
-        sorted_ranges = sorted(ranges, key=lambda r: r.size, reverse=True)
-        large_range = sorted_ranges[0]
-
-        print("\nTesting single range splitting:")
-        print(f"  Range size: {large_range.size}")
-        print(f"  Range: {large_range.start} to {large_range.end}")
-
-        # Test different split counts
-        for split_count in [1, 2, 5, 10]:
-            splits = splitter.split_single_range(large_range, split_count)
-
-            print(f"\n  Splitting into {split_count}:")
-            print(f"    Actual splits: {len(splits)}")
-
-            # Verify coverage
-            assert sum(s.size for s in splits) == large_range.size
-
-            # Verify contiguous
-            for i in range(len(splits) - 1):
-                assert splits[i].end == splits[i + 1].start
-
-            # Verify boundaries
-            assert splits[0].start == large_range.start
-            assert splits[-1].end == large_range.end
-
-            # Verify replicas preserved
-            for s in splits:
-                assert s.replicas == large_range.replicas
-
-    @pytest.mark.asyncio
-    async def test_replica_clustering(self, session):
-        """
-        Test clustering ranges by replica sets.
-
-        What this tests:
-        ---------------
-        1. Ranges are correctly grouped by replicas
-        2. All ranges are included in clusters
-        3. No ranges are duplicated
-        4. Replica sets are handled consistently
-
-        Why this matters:
-        ----------------
-        - Coordinator efficiency depends on replica locality
-        - Reduces network hops in multi-DC setups
-        - Improves cache utilization
-        - Foundation for topology-aware operations
-        """
-        # For this test, use multi-node replication
-        await session.execute(
-            """
-            CREATE KEYSPACE IF NOT EXISTS bulk_test_replicated
-            WITH replication = {
-                'class': 'SimpleStrategy',
-                'replication_factor': 3
-            }
-        """
-        )
-
-        ranges = await discover_token_ranges(session, "bulk_test_replicated")
-        splitter = TokenRangeSplitter()
-
-        clusters = splitter.cluster_by_replicas(ranges)
-
-        print("\nReplica clustering results:")
-        print(f"  Total ranges: {len(ranges)}")
-        print(f"  Replica clusters: {len(clusters)}")
-
-        total_clustered = sum(len(ranges_list) for ranges_list in clusters.values())
-        print(f"  Total ranges in clusters: {total_clustered}")
-
-        # Verify all ranges are clustered
-        assert total_clustered == len(
-            ranges
-        ), f"Not all ranges clustered: {total_clustered} vs {len(ranges)}"
-
-        # Verify no duplicates
-        seen_ranges = set()
-        for _replica_set, range_list in clusters.items():
-            for r in range_list:
-                range_key = (r.start, r.end)
-                assert range_key not in seen_ranges, f"Duplicate range: {range_key}"
-                seen_ranges.add(range_key)
-
-        # Print cluster distribution
-        for replica_set, range_list in sorted(clusters.items()):
-            print(f"  Replicas {replica_set}: {len(range_list)} ranges")
-
-    @pytest.mark.asyncio
-    async def test_proportional_splitting_accuracy(self, session):
-        """
-        Test that proportional splitting maintains relative sizes.
-
-        What this tests:
-        ---------------
-        1. Large ranges get more splits than small ones
-        2. Total coverage is preserved
-        3. Split distribution matches range distribution
-        4. No ranges are lost or duplicated
-
-        Why this matters:
-        ----------------
-        - Even work distribution across ranges
-        - Prevents hotspots from uneven splitting
-        - Optimizes parallel execution
-        - Critical for performance
-        """
-        ranges = await discover_token_ranges(session, "bulk_test")
-        splitter = TokenRangeSplitter()
-
-        # Calculate range size distribution
-        total_size = sum(r.size for r in ranges)
-        range_fractions = [(r, r.size / total_size) for r in ranges]
-
-        # Sort by size for analysis
-        range_fractions.sort(key=lambda x: x[1], reverse=True)
-
-        print("\nRange size distribution:")
-        print(f"  Largest range: {range_fractions[0][1]:.2%} of total")
-        print(f"  Smallest range: {range_fractions[-1][1]:.2%} of total")
-        print(f"  Median range: {range_fractions[len(range_fractions)//2][1]:.2%} of total")
-
-        # Test proportional splitting
-        target_splits = 100
-        splits = splitter.split_proportionally(ranges, target_splits)
-
-        # Analyze split distribution
-        splits_per_range = {}
-        for split in splits:
-            # Find which original range this split came from
-            for orig_range in ranges:
-                if (split.start >= orig_range.start and split.end <= orig_range.end) or (
-                    orig_range.start == split.start and orig_range.end == split.end
-                ):
-                    key = (orig_range.start, orig_range.end)
-                    splits_per_range[key] = splits_per_range.get(key, 0) + 1
-                    break
-
-        # Verify proportionality
-        print("\nProportional splitting results:")
-        print(f"  Target splits: {target_splits}")
-        print(f"  Actual splits: {len(splits)}")
-        print(f"  Ranges that got splits: {len(splits_per_range)}")
-
-        # Large ranges should get more splits
-        large_range = range_fractions[0][0]
-        large_range_key = (large_range.start, large_range.end)
-        large_range_splits = splits_per_range.get(large_range_key, 0)
-
-        small_range = range_fractions[-1][0]
-        small_range_key = (small_range.start, small_range.end)
-        small_range_splits = splits_per_range.get(small_range_key, 0)
-
-        print(f"  Largest range got {large_range_splits} splits")
-        print(f"  Smallest range got {small_range_splits} splits")
-
-        # Large ranges should generally get more splits
-        # (unless they're still too small to split effectively)
-        if large_range.size > small_range.size * 10:
-            assert (
-                large_range_splits >= small_range_splits
-            ), "Large range should get at least as many splits as small range"
diff --git a/libs/async-cassandra-bulk/examples/tests/unit/__init__.py b/libs/async-cassandra-bulk/examples/tests/unit/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/libs/async-cassandra-bulk/examples/tests/unit/test_bulk_operator.py b/libs/async-cassandra-bulk/examples/tests/unit/test_bulk_operator.py
deleted file mode 100644
index af03562..0000000
--- a/libs/async-cassandra-bulk/examples/tests/unit/test_bulk_operator.py
+++ /dev/null
@@ -1,381 +0,0 @@
-"""
-Unit tests for TokenAwareBulkOperator.
-
-What this tests:
----------------
-1. Parallel execution of token range queries
-2. Result aggregation and streaming
-3. Progress tracking
-4. Error handling and recovery
-
-Why this matters:
-----------------
-- Ensures correct parallel processing
-- Validates data completeness
-- Confirms non-blocking async behavior
-- Handles failures gracefully
-
-Additional context:
----------------------------------
-These tests mock the async-cassandra library to test
-our bulk operation logic in isolation.
-"""
-
-import asyncio
-from unittest.mock import AsyncMock, Mock, patch
-
-import pytest
-
-from bulk_operations.bulk_operator import (
-    BulkOperationError,
-    BulkOperationStats,
-    TokenAwareBulkOperator,
-)
-
-
-class TestTokenAwareBulkOperator:
-    """Test the main bulk operator class."""
-
-    @pytest.fixture
-    def mock_cluster(self):
-        """Create a mock AsyncCluster."""
-        cluster = Mock()
-        cluster.contact_points = ["127.0.0.1", "127.0.0.2", "127.0.0.3"]
-        return cluster
-
-    @pytest.fixture
-    def mock_session(self, mock_cluster):
-        """Create a mock AsyncSession."""
-        session = Mock()
-        # Mock the underlying sync session that has cluster attribute
-        session._session = Mock()
-        session._session.cluster = mock_cluster
-        session.execute = AsyncMock()
-        session.execute_stream = AsyncMock()
-        session.prepare = AsyncMock(return_value=Mock())  # Mock prepare method
-
-        # Mock metadata structure
-        metadata = Mock()
-
-        # Create proper column mock
-        partition_key_col = Mock()
-        partition_key_col.name = "id"  # Set the name attribute properly
-
-        keyspaces = {
-            "test_ks": Mock(tables={"test_table": Mock(partition_key=[partition_key_col])})
-        }
-        metadata.keyspaces = keyspaces
-        mock_cluster.metadata = metadata
-
-        return session
-
-    @pytest.mark.unit
-    async def test_count_by_token_ranges_single_node(self, mock_session):
-        """
-        Test counting rows with token ranges on single node.
-
-        What this tests:
-        ---------------
-        1. Token range discovery is called correctly
-        2. Queries are generated for each token range
-        3. Results are aggregated properly
-        4. Single node operation works correctly
-
-        Why this matters:
-        ----------------
-        - Ensures basic counting functionality works
-        - Validates token range splitting logic
-        - Confirms proper result aggregation
-        - Foundation for more complex multi-node operations
-        """
-        operator = TokenAwareBulkOperator(mock_session)
-
-        # Mock token range discovery
-        with patch(
-            "bulk_operations.bulk_operator.discover_token_ranges", new_callable=AsyncMock
-        ) as mock_discover:
-            # Create proper TokenRange mocks
-            from bulk_operations.token_utils import TokenRange
-
-            mock_ranges = [
-                TokenRange(start=-1000, end=0, replicas=["127.0.0.1"]),
-                TokenRange(start=0, end=1000, replicas=["127.0.0.1"]),
-            ]
-            mock_discover.return_value = mock_ranges
-
-            # Mock query results
-            mock_session.execute.side_effect = [
-                Mock(one=Mock(return_value=Mock(count=500))),  # First range
-                Mock(one=Mock(return_value=Mock(count=300))),  # Second range
-            ]
-
-            # Execute count
-            result = await operator.count_by_token_ranges(
-                keyspace="test_ks", table="test_table", split_count=2
-            )
-
-            assert result == 800
-            assert mock_session.execute.call_count == 2
-
-    @pytest.mark.unit
-    async def test_count_with_parallel_execution(self, mock_session):
-        """
-        Test that counts are executed in parallel.
-
-        What this tests:
-        ---------------
-        1. Multiple token ranges are processed concurrently
-        2. Parallelism limits are respected
-        3. Total execution time reflects parallel processing
-        4. Results are correctly aggregated from parallel tasks
-
-        Why this matters:
-        ----------------
-        - Parallel execution is critical for performance
-        - Must not block the event loop
-        - Resource limits must be respected
-        - Common pattern in production bulk operations
-        """
-        operator = TokenAwareBulkOperator(mock_session)
-
-        # Track execution times
-        execution_times = []
-
-        async def mock_execute_with_delay(stmt, params=None):
-            start = asyncio.get_event_loop().time()
-            await asyncio.sleep(0.1)  # Simulate query time
-            execution_times.append(asyncio.get_event_loop().time() - start)
-            return Mock(one=Mock(return_value=Mock(count=100)))
-
-        mock_session.execute = mock_execute_with_delay
-
-        with patch(
-            "bulk_operations.bulk_operator.discover_token_ranges", new_callable=AsyncMock
-        ) as mock_discover:
-            # Create 4 ranges
-            from bulk_operations.token_utils import TokenRange
-
-            mock_ranges = [
-                TokenRange(start=i * 1000, end=(i + 1) * 1000, replicas=["node1"]) for i in range(4)
-            ]
-            mock_discover.return_value = mock_ranges
-
-            # Execute count
-            start_time = asyncio.get_event_loop().time()
-            result = await operator.count_by_token_ranges(
-                keyspace="test_ks", table="test_table", split_count=4, parallelism=4
-            )
-            total_time = asyncio.get_event_loop().time() - start_time
-
-            assert result == 400  # 4 ranges * 100 each
-            # If executed in parallel, total time should be ~0.1s, not 0.4s
-            assert total_time < 0.2
-
-    @pytest.mark.unit
-    async def test_count_with_error_handling(self, mock_session):
-        """
-        Test error handling during count operations.
-
-        What this tests:
-        ---------------
-        1. Partial failures are handled gracefully
-        2. BulkOperationError is raised with partial results
-        3. Individual errors are collected and reported
-        4. Operation continues despite individual failures
-
-        Why this matters:
-        ----------------
-        - Network issues can cause partial failures
-        - Users need visibility into what succeeded
-        - Partial results are often useful
-        - Critical for production reliability
-        """
-        operator = TokenAwareBulkOperator(mock_session)
-
-        with patch(
-            "bulk_operations.bulk_operator.discover_token_ranges", new_callable=AsyncMock
-        ) as mock_discover:
-            from bulk_operations.token_utils import TokenRange
-
-            mock_ranges = [
-                TokenRange(start=0, end=1000, replicas=["node1"]),
-                TokenRange(start=1000, end=2000, replicas=["node2"]),
-            ]
-            mock_discover.return_value = mock_ranges
-
-            # First succeeds, second fails
-            mock_session.execute.side_effect = [
-                Mock(one=Mock(return_value=Mock(count=500))),
-                Exception("Connection timeout"),
-            ]
-
-            # Should raise BulkOperationError
-            with pytest.raises(BulkOperationError) as exc_info:
-                await operator.count_by_token_ranges(
-                    keyspace="test_ks", table="test_table", split_count=2
-                )
-
-            assert "Failed to count" in str(exc_info.value)
-            assert exc_info.value.partial_result == 500
-
-    @pytest.mark.unit
-    async def test_export_streaming(self, mock_session):
-        """
-        Test streaming export functionality.
-
-        What this tests:
-        ---------------
-        1. Token ranges are discovered for export
-        2. Results are streamed asynchronously
-        3. Memory usage remains constant (streaming)
-        4. All rows are yielded in order
-
-        Why this matters:
-        ----------------
-        - Streaming prevents memory exhaustion
-        - Essential for large dataset exports
-        - Async iteration must work correctly
-        - Foundation for Iceberg export functionality
-        """
-        operator = TokenAwareBulkOperator(mock_session)
-
-        # Mock token range discovery
-        with patch(
-            "bulk_operations.bulk_operator.discover_token_ranges", new_callable=AsyncMock
-        ) as mock_discover:
-            from bulk_operations.token_utils import TokenRange
-
-            mock_ranges = [TokenRange(start=0, end=1000, replicas=["node1"])]
-            mock_discover.return_value = mock_ranges
-
-            # Mock streaming results
-            async def mock_stream_results():
-                for i in range(10):
-                    row = Mock()
-                    row.id = i
-                    row.name = f"row_{i}"
-                    yield row
-
-            mock_stream_context = AsyncMock()
-            mock_stream_context.__aenter__.return_value = mock_stream_results()
-            mock_stream_context.__aexit__.return_value = None
-
-            mock_session.execute_stream.return_value = mock_stream_context
-
-            # Collect exported rows
-            exported_rows = []
-            async for row in operator.export_by_token_ranges(
-                keyspace="test_ks", table="test_table", split_count=1
-            ):
-                exported_rows.append(row)
-
-            assert len(exported_rows) == 10
-            assert exported_rows[0].id == 0
-            assert exported_rows[9].name == "row_9"
-
-    @pytest.mark.unit
-    async def test_progress_callback(self, mock_session):
-        """
-        Test progress callback functionality.
-
-        What this tests:
-        ---------------
-        1. Progress callbacks are invoked during operation
-        2. Statistics are updated correctly
-        3. Progress percentage is calculated accurately
-        4. Final statistics reflect complete operation
-
-        Why this matters:
-        ----------------
-        - Users need visibility into long-running operations
-        - Progress tracking enables better UX
-        - Statistics help with performance tuning
-        - Critical for production monitoring
-        """
-        operator = TokenAwareBulkOperator(mock_session)
-        progress_updates = []
-
-        def progress_callback(stats: BulkOperationStats):
-            progress_updates.append(
-                {
-                    "rows": stats.rows_processed,
-                    "ranges": stats.ranges_completed,
-                    "progress": stats.progress_percentage,
-                }
-            )
-
-        # Mock setup
-        with patch(
-            "bulk_operations.bulk_operator.discover_token_ranges", new_callable=AsyncMock
-        ) as mock_discover:
-            from bulk_operations.token_utils import TokenRange
-
-            mock_ranges = [
-                TokenRange(start=0, end=1000, replicas=["node1"]),
-                TokenRange(start=1000, end=2000, replicas=["node2"]),
-            ]
-            mock_discover.return_value = mock_ranges
-
-            mock_session.execute.side_effect = [
-                Mock(one=Mock(return_value=Mock(count=500))),
-                Mock(one=Mock(return_value=Mock(count=300))),
-            ]
-
-            # Execute with progress callback
-            await operator.count_by_token_ranges(
-                keyspace="test_ks",
-                table="test_table",
-                split_count=2,
-                progress_callback=progress_callback,
-            )
-
-            assert len(progress_updates) >= 2
-            # Check final progress
-            final_update = progress_updates[-1]
-            assert final_update["ranges"] == 2
-            assert final_update["progress"] == 100.0
-
-    @pytest.mark.unit
-    async def test_operation_stats(self, mock_session):
-        """
-        Test operation statistics collection.
-
-        What this tests:
-        ---------------
-        1. Statistics are collected during operations
-        2. Duration is calculated correctly
-        3. Rows per second metric is accurate
-        4. All statistics fields are populated
-
-        Why this matters:
-        ----------------
-        - Performance metrics guide optimization
-        - Statistics enable capacity planning
-        - Benchmarking requires accurate metrics
-        - Production monitoring depends on these stats
-        """
-        operator = TokenAwareBulkOperator(mock_session)
-
-        with patch(
-            "bulk_operations.bulk_operator.discover_token_ranges", new_callable=AsyncMock
-        ) as mock_discover:
-            from bulk_operations.token_utils import TokenRange
-
-            mock_ranges = [TokenRange(start=0, end=1000, replicas=["node1"])]
-            mock_discover.return_value = mock_ranges
-
-            # Mock returns the same value for all calls (it's a single range)
-            mock_count_result = Mock()
-            mock_count_result.one.return_value = Mock(count=1000)
-            mock_session.execute.return_value = mock_count_result
-
-            # Get stats after operation
-            count, stats = await operator.count_by_token_ranges_with_stats(
-                keyspace="test_ks", table="test_table", split_count=1
-            )
-
-            assert count == 1000
-            assert stats.rows_processed == 1000
-            assert stats.ranges_completed == 1
-            assert stats.duration_seconds > 0
-            assert stats.rows_per_second > 0
diff --git a/libs/async-cassandra-bulk/examples/tests/unit/test_csv_exporter.py b/libs/async-cassandra-bulk/examples/tests/unit/test_csv_exporter.py
deleted file mode 100644
index 9f17fff..0000000
--- a/libs/async-cassandra-bulk/examples/tests/unit/test_csv_exporter.py
+++ /dev/null
@@ -1,365 +0,0 @@
-"""Unit tests for CSV exporter.
-
-What this tests:
----------------
-1. CSV header generation
-2. Row serialization with different data types
-3. NULL value handling
-4. Collection serialization
-5. Compression support
-6. Progress tracking
-
-Why this matters:
-----------------
-- CSV is a common export format
-- Data type handling must be consistent
-- Resume capability is critical for large exports
-- Compression saves disk space
-"""
-
-import csv
-import gzip
-import io
-import uuid
-from datetime import datetime
-from unittest.mock import Mock
-
-import pytest
-
-from bulk_operations.bulk_operator import TokenAwareBulkOperator
-from bulk_operations.exporters import CSVExporter, ExportFormat, ExportProgress
-
-
-class MockRow:
-    """Mock Cassandra row object."""
-
-    def __init__(self, **kwargs):
-        self._fields = list(kwargs.keys())
-        for key, value in kwargs.items():
-            setattr(self, key, value)
-
-
-class TestCSVExporter:
-    """Test CSV export functionality."""
-
-    @pytest.fixture
-    def mock_operator(self):
-        """Create mock bulk operator."""
-        operator = Mock(spec=TokenAwareBulkOperator)
-        operator.session = Mock()
-        operator.session._session = Mock()
-        operator.session._session.cluster = Mock()
-        operator.session._session.cluster.metadata = Mock()
-        return operator
-
-    @pytest.fixture
-    def exporter(self, mock_operator):
-        """Create CSV exporter instance."""
-        return CSVExporter(mock_operator)
-
-    def test_csv_value_serialization(self, exporter):
-        """
-        Test serialization of different value types to CSV.
-
-        What this tests:
-        ---------------
-        1. NULL values become empty strings
-        2. Booleans become true/false
-        3. Collections get formatted properly
-        4. Bytes are hex encoded
-        5. Timestamps use ISO format
-
-        Why this matters:
-        ----------------
-        - CSV needs consistent string representation
-        - Must be reversible for imports
-        - Standard tools should understand the format
-        """
-        # NULL handling
-        assert exporter._serialize_csv_value(None) == ""
-
-        # Primitives
-        assert exporter._serialize_csv_value(True) == "true"
-        assert exporter._serialize_csv_value(False) == "false"
-        assert exporter._serialize_csv_value(42) == "42"
-        assert exporter._serialize_csv_value(3.14) == "3.14"
-        assert exporter._serialize_csv_value("test") == "test"
-
-        # UUID
-        test_uuid = uuid.uuid4()
-        assert exporter._serialize_csv_value(test_uuid) == str(test_uuid)
-
-        # Datetime
-        test_dt = datetime(2024, 1, 1, 12, 0, 0)
-        assert exporter._serialize_csv_value(test_dt) == "2024-01-01T12:00:00"
-
-        # Collections
-        assert exporter._serialize_csv_value([1, 2, 3]) == "[1, 2, 3]"
-        assert exporter._serialize_csv_value({"a", "b"}) == "[a, b]" or "[b, a]"
-        assert exporter._serialize_csv_value({"k1": "v1", "k2": "v2"}) in [
-            "{k1: v1, k2: v2}",
-            "{k2: v2, k1: v1}",
-        ]
-
-        # Bytes
-        assert exporter._serialize_csv_value(b"\x00\x01\x02") == "000102"
-
-    def test_null_string_customization(self, mock_operator):
-        """
-        Test custom NULL string representation.
-
-        What this tests:
-        ---------------
-        1. Default empty string for NULL
-        2. Custom NULL strings like "NULL" or "\\N"
-        3. Consistent handling across all types
-
-        Why this matters:
-        ----------------
-        - Different tools expect different NULL representations
-        - PostgreSQL uses \\N, MySQL uses NULL
-        - Must be configurable for compatibility
-        """
-        # Default exporter uses empty string
-        default_exporter = CSVExporter(mock_operator)
-        assert default_exporter._serialize_csv_value(None) == ""
-
-        # Custom NULL string
-        custom_exporter = CSVExporter(mock_operator, null_string="NULL")
-        assert custom_exporter._serialize_csv_value(None) == "NULL"
-
-        # PostgreSQL style
-        pg_exporter = CSVExporter(mock_operator, null_string="\\N")
-        assert pg_exporter._serialize_csv_value(None) == "\\N"
-
-    @pytest.mark.asyncio
-    async def test_write_header(self, exporter):
-        """
-        Test CSV header writing.
-
-        What this tests:
-        ---------------
-        1. Header contains column names
-        2. Proper delimiter usage
-        3. Quoting when needed
-
-        Why this matters:
-        ----------------
-        - Headers enable column mapping
-        - Must match data row format
-        - Standard CSV compliance
-        """
-        output = io.StringIO()
-        columns = ["id", "name", "created_at", "tags"]
-
-        await exporter.write_header(output, columns)
-        output.seek(0)
-
-        reader = csv.reader(output)
-        header = next(reader)
-        assert header == columns
-
-    @pytest.mark.asyncio
-    async def test_write_row(self, exporter):
-        """
-        Test writing data rows to CSV.
-
-        What this tests:
-        ---------------
-        1. Row data properly formatted
-        2. Complex types serialized
-        3. Byte count tracking
-        4. Thread safety with lock
-
-        Why this matters:
-        ----------------
-        - Data integrity is critical
-        - Concurrent writes must be safe
-        - Progress tracking needs accurate bytes
-        """
-        output = io.StringIO()
-
-        # Create test row
-        row = MockRow(
-            id=1,
-            name="Test User",
-            active=True,
-            score=99.5,
-            tags=["tag1", "tag2"],
-            metadata={"key": "value"},
-            created_at=datetime(2024, 1, 1, 12, 0, 0),
-        )
-
-        bytes_written = await exporter.write_row(output, row)
-        output.seek(0)
-
-        # Verify output
-        reader = csv.reader(output)
-        values = next(reader)
-
-        assert values[0] == "1"
-        assert values[1] == "Test User"
-        assert values[2] == "true"
-        assert values[3] == "99.5"
-        assert values[4] == "[tag1, tag2]"
-        assert values[5] == "{key: value}"
-        assert values[6] == "2024-01-01T12:00:00"
-
-        # Verify byte count
-        assert bytes_written > 0
-
-    @pytest.mark.asyncio
-    async def test_export_with_compression(self, mock_operator, tmp_path):
-        """
-        Test CSV export with compression.
-
-        What this tests:
-        ---------------
-        1. Gzip compression works
-        2. File has correct extension
-        3. Compressed data is valid
-
-        Why this matters:
-        ----------------
-        - Large exports need compression
-        - Must work with standard tools
-        - File naming conventions matter
-        """
-        exporter = CSVExporter(mock_operator, compression="gzip")
-        output_path = tmp_path / "test.csv"
-
-        # Mock the export stream
-        test_rows = [
-            MockRow(id=1, name="Alice", score=95.5),
-            MockRow(id=2, name="Bob", score=87.3),
-        ]
-
-        async def mock_export(*args, **kwargs):
-            for row in test_rows:
-                yield row
-
-        mock_operator.export_by_token_ranges = mock_export
-
-        # Mock metadata
-        mock_keyspace = Mock()
-        mock_table = Mock()
-        mock_table.columns = {"id": None, "name": None, "score": None}
-        mock_keyspace.tables = {"test_table": mock_table}
-        mock_operator.session._session.cluster.metadata.keyspaces = {"test_ks": mock_keyspace}
-
-        # Export
-        await exporter.export(
-            keyspace="test_ks",
-            table="test_table",
-            output_path=output_path,
-        )
-
-        # Verify compressed file exists
-        compressed_path = output_path.with_suffix(".csv.gzip")
-        assert compressed_path.exists()
-
-        # Verify content
-        with gzip.open(compressed_path, "rt") as f:
-            reader = csv.reader(f)
-            header = next(reader)
-            assert header == ["id", "name", "score"]
-
-            row1 = next(reader)
-            assert row1 == ["1", "Alice", "95.5"]
-
-            row2 = next(reader)
-            assert row2 == ["2", "Bob", "87.3"]
-
-    @pytest.mark.asyncio
-    async def test_export_progress_tracking(self, mock_operator, tmp_path):
-        """
-        Test progress tracking during export.
-
-        What this tests:
-        ---------------
-        1. Progress initialized correctly
-        2. Row count tracked
-        3. Progress saved to file
-        4. Completion marked
-
-        Why this matters:
-        ----------------
-        - Long exports need monitoring
-        - Resume capability requires state
-        - Users need feedback
-        """
-        exporter = CSVExporter(mock_operator)
-        output_path = tmp_path / "test.csv"
-
-        # Mock export
-        test_rows = [MockRow(id=i, value=f"test{i}") for i in range(100)]
-
-        async def mock_export(*args, **kwargs):
-            for row in test_rows:
-                yield row
-
-        mock_operator.export_by_token_ranges = mock_export
-
-        # Mock metadata
-        mock_keyspace = Mock()
-        mock_table = Mock()
-        mock_table.columns = {"id": None, "value": None}
-        mock_keyspace.tables = {"test_table": mock_table}
-        mock_operator.session._session.cluster.metadata.keyspaces = {"test_ks": mock_keyspace}
-
-        # Track progress callbacks
-        progress_updates = []
-
-        async def progress_callback(progress):
-            progress_updates.append(progress.rows_exported)
-
-        # Export
-        progress = await exporter.export(
-            keyspace="test_ks",
-            table="test_table",
-            output_path=output_path,
-            progress_callback=progress_callback,
-        )
-
-        # Verify progress
-        assert progress.keyspace == "test_ks"
-        assert progress.table == "test_table"
-        assert progress.format == ExportFormat.CSV
-        assert progress.rows_exported == 100
-        assert progress.completed_at is not None
-
-        # Verify progress file
-        progress_file = output_path.with_suffix(".csv.progress")
-        assert progress_file.exists()
-
-        # Load and verify
-        loaded_progress = ExportProgress.load(progress_file)
-        assert loaded_progress.rows_exported == 100
-
-    def test_custom_delimiter_and_quoting(self, mock_operator):
-        """
-        Test custom CSV formatting options.
-
-        What this tests:
-        ---------------
-        1. Tab delimiter
-        2. Pipe delimiter
-        3. Different quoting styles
-
-        Why this matters:
-        ----------------
-        - Different systems expect different formats
-        - Must handle data with delimiters
-        - Flexibility for integration
-        """
-        # Tab-delimited
-        tab_exporter = CSVExporter(mock_operator, delimiter="\t")
-        assert tab_exporter.delimiter == "\t"
-
-        # Pipe-delimited
-        pipe_exporter = CSVExporter(mock_operator, delimiter="|")
-        assert pipe_exporter.delimiter == "|"
-
-        # Quote all
-        quote_all_exporter = CSVExporter(mock_operator, quoting=csv.QUOTE_ALL)
-        assert quote_all_exporter.quoting == csv.QUOTE_ALL
diff --git a/libs/async-cassandra-bulk/examples/tests/unit/test_helpers.py b/libs/async-cassandra-bulk/examples/tests/unit/test_helpers.py
deleted file mode 100644
index 8f06738..0000000
--- a/libs/async-cassandra-bulk/examples/tests/unit/test_helpers.py
+++ /dev/null
@@ -1,19 +0,0 @@
-"""
-Helper utilities for unit tests.
-"""
-
-
-class MockToken:
-    """Mock token that supports comparison for sorting."""
-
-    def __init__(self, value):
-        self.value = value
-
-    def __lt__(self, other):
-        return self.value < other.value
-
-    def __eq__(self, other):
-        return self.value == other.value
-
-    def __repr__(self):
-        return f"MockToken({self.value})"
diff --git a/libs/async-cassandra-bulk/examples/tests/unit/test_iceberg_catalog.py b/libs/async-cassandra-bulk/examples/tests/unit/test_iceberg_catalog.py
deleted file mode 100644
index c19a2cf..0000000
--- a/libs/async-cassandra-bulk/examples/tests/unit/test_iceberg_catalog.py
+++ /dev/null
@@ -1,241 +0,0 @@
-"""Unit tests for Iceberg catalog configuration.
-
-What this tests:
----------------
-1. Filesystem catalog creation
-2. Warehouse directory setup
-3. Custom catalog configuration
-4. Catalog loading
-
-Why this matters:
-----------------
-- Catalog is the entry point to Iceberg
-- Proper configuration is critical
-- Warehouse location affects data storage
-- Supports multiple catalog types
-"""
-
-import tempfile
-import unittest
-from pathlib import Path
-from unittest.mock import Mock, patch
-
-from pyiceberg.catalog import Catalog
-
-from bulk_operations.iceberg.catalog import create_filesystem_catalog, get_or_create_catalog
-
-
-class TestIcebergCatalog(unittest.TestCase):
-    """Test Iceberg catalog configuration."""
-
-    def setUp(self):
-        """Set up test fixtures."""
-        self.temp_dir = tempfile.mkdtemp()
-        self.warehouse_path = Path(self.temp_dir) / "test_warehouse"
-
-    def tearDown(self):
-        """Clean up test fixtures."""
-        import shutil
-
-        shutil.rmtree(self.temp_dir, ignore_errors=True)
-
-    def test_create_filesystem_catalog_default_path(self):
-        """
-        Test creating filesystem catalog with default path.
-
-        What this tests:
-        ---------------
-        1. Default warehouse path is created
-        2. Catalog is properly configured
-        3. SQLite URI is correct
-
-        Why this matters:
-        ----------------
-        - Easy setup for development
-        - Consistent default behavior
-        - No external dependencies
-        """
-        with patch("bulk_operations.iceberg.catalog.Path.cwd") as mock_cwd:
-            mock_cwd.return_value = Path(self.temp_dir)
-
-            catalog = create_filesystem_catalog("test_catalog")
-
-            # Check catalog properties
-            self.assertEqual(catalog.name, "test_catalog")
-
-            # Check warehouse directory was created
-            expected_warehouse = Path(self.temp_dir) / "iceberg_warehouse"
-            self.assertTrue(expected_warehouse.exists())
-
-    def test_create_filesystem_catalog_custom_path(self):
-        """
-        Test creating filesystem catalog with custom path.
-
-        What this tests:
-        ---------------
-        1. Custom warehouse path is used
-        2. Directory is created if missing
-        3. Path objects are handled
-
-        Why this matters:
-        ----------------
-        - Flexibility in storage location
-        - Integration with existing infrastructure
-        - Path handling consistency
-        """
-        catalog = create_filesystem_catalog(
-            name="custom_catalog", warehouse_path=self.warehouse_path
-        )
-
-        # Check catalog name
-        self.assertEqual(catalog.name, "custom_catalog")
-
-        # Check warehouse directory exists
-        self.assertTrue(self.warehouse_path.exists())
-        self.assertTrue(self.warehouse_path.is_dir())
-
-    def test_create_filesystem_catalog_string_path(self):
-        """
-        Test creating catalog with string path.
-
-        What this tests:
-        ---------------
-        1. String paths are converted to Path objects
-        2. Catalog works with string paths
-
-        Why this matters:
-        ----------------
-        - API flexibility
-        - Backward compatibility
-        - User convenience
-        """
-        str_path = str(self.warehouse_path)
-        catalog = create_filesystem_catalog(name="string_path_catalog", warehouse_path=str_path)
-
-        self.assertEqual(catalog.name, "string_path_catalog")
-        self.assertTrue(Path(str_path).exists())
-
-    def test_get_or_create_catalog_default(self):
-        """
-        Test get_or_create_catalog with defaults.
-
-        What this tests:
-        ---------------
-        1. Default filesystem catalog is created
-        2. Same parameters as create_filesystem_catalog
-
-        Why this matters:
-        ----------------
-        - Simplified API for common case
-        - Consistent behavior
-        """
-        with patch("bulk_operations.iceberg.catalog.create_filesystem_catalog") as mock_create:
-            mock_catalog = Mock(spec=Catalog)
-            mock_create.return_value = mock_catalog
-
-            result = get_or_create_catalog(
-                catalog_name="default_test", warehouse_path=self.warehouse_path
-            )
-
-            # Verify create_filesystem_catalog was called
-            mock_create.assert_called_once_with("default_test", self.warehouse_path)
-            self.assertEqual(result, mock_catalog)
-
-    def test_get_or_create_catalog_custom_config(self):
-        """
-        Test get_or_create_catalog with custom configuration.
-
-        What this tests:
-        ---------------
-        1. Custom config overrides defaults
-        2. load_catalog is used for custom configs
-
-        Why this matters:
-        ----------------
-        - Support for different catalog types
-        - Flexibility for production deployments
-        - Integration with existing catalogs
-        """
-        custom_config = {
-            "type": "rest",
-            "uri": "https://iceberg-catalog.example.com",
-            "credential": "token123",
-        }
-
-        with patch("bulk_operations.iceberg.catalog.load_catalog") as mock_load:
-            mock_catalog = Mock(spec=Catalog)
-            mock_load.return_value = mock_catalog
-
-            result = get_or_create_catalog(catalog_name="rest_catalog", config=custom_config)
-
-            # Verify load_catalog was called with custom config
-            mock_load.assert_called_once_with("rest_catalog", **custom_config)
-            self.assertEqual(result, mock_catalog)
-
-    def test_warehouse_directory_creation(self):
-        """
-        Test that warehouse directory is created with proper permissions.
-
-        What this tests:
-        ---------------
-        1. Directory is created if missing
-        2. Parent directories are created
-        3. Existing directories are not affected
-
-        Why this matters:
-        ----------------
-        - Data needs a place to live
-        - Permissions affect data security
-        - Idempotent operation
-        """
-        nested_path = self.warehouse_path / "nested" / "warehouse"
-
-        # Ensure it doesn't exist
-        self.assertFalse(nested_path.exists())
-
-        # Create catalog
-        create_filesystem_catalog(name="nested_test", warehouse_path=nested_path)
-
-        # Check all directories were created
-        self.assertTrue(nested_path.exists())
-        self.assertTrue(nested_path.is_dir())
-        self.assertTrue(nested_path.parent.exists())
-
-        # Create again - should not fail
-        create_filesystem_catalog(name="nested_test2", warehouse_path=nested_path)
-        self.assertTrue(nested_path.exists())
-
-    def test_catalog_properties(self):
-        """
-        Test that catalog has expected properties.
-
-        What this tests:
-        ---------------
-        1. Catalog type is set correctly
-        2. Warehouse location is set
-        3. URI format is correct
-
-        Why this matters:
-        ----------------
-        - Properties affect catalog behavior
-        - Debugging and monitoring
-        - Integration requirements
-        """
-        catalog = create_filesystem_catalog(
-            name="properties_test", warehouse_path=self.warehouse_path
-        )
-
-        # Check basic properties
-        self.assertEqual(catalog.name, "properties_test")
-
-        # For SQL catalog, we'd check additional properties
-        # but they're not exposed in the base Catalog interface
-
-        # Verify catalog can be used (basic smoke test)
-        # This would fail if catalog is misconfigured
-        namespaces = list(catalog.list_namespaces())
-        self.assertIsInstance(namespaces, list)
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/libs/async-cassandra-bulk/examples/tests/unit/test_iceberg_schema_mapper.py b/libs/async-cassandra-bulk/examples/tests/unit/test_iceberg_schema_mapper.py
deleted file mode 100644
index 9acc402..0000000
--- a/libs/async-cassandra-bulk/examples/tests/unit/test_iceberg_schema_mapper.py
+++ /dev/null
@@ -1,362 +0,0 @@
-"""Unit tests for Cassandra to Iceberg schema mapping.
-
-What this tests:
----------------
-1. CQL type to Iceberg type conversions
-2. Collection type handling (list, set, map)
-3. Field ID assignment
-4. Primary key handling (required vs nullable)
-
-Why this matters:
-----------------
-- Schema mapping is critical for data integrity
-- Type mismatches can cause data loss
-- Field IDs enable schema evolution
-- Nullability affects query semantics
-"""
-
-import unittest
-from unittest.mock import Mock
-
-from pyiceberg.types import (
-    BinaryType,
-    BooleanType,
-    DateType,
-    DecimalType,
-    DoubleType,
-    FloatType,
-    IntegerType,
-    ListType,
-    LongType,
-    MapType,
-    StringType,
-    TimestamptzType,
-)
-
-from bulk_operations.iceberg.schema_mapper import CassandraToIcebergSchemaMapper
-
-
-class TestCassandraToIcebergSchemaMapper(unittest.TestCase):
-    """Test schema mapping from Cassandra to Iceberg."""
-
-    def setUp(self):
-        """Set up test fixtures."""
-        self.mapper = CassandraToIcebergSchemaMapper()
-
-    def test_simple_type_mappings(self):
-        """
-        Test mapping of simple CQL types to Iceberg types.
-
-        What this tests:
-        ---------------
-        1. String types (text, ascii, varchar)
-        2. Numeric types (int, bigint, float, double)
-        3. Boolean type
-        4. Binary type (blob)
-
-        Why this matters:
-        ----------------
-        - Ensures basic data types are preserved
-        - Critical for data integrity
-        - Foundation for complex types
-        """
-        test_cases = [
-            # String types
-            ("text", StringType),
-            ("ascii", StringType),
-            ("varchar", StringType),
-            # Integer types
-            ("tinyint", IntegerType),
-            ("smallint", IntegerType),
-            ("int", IntegerType),
-            ("bigint", LongType),
-            ("counter", LongType),
-            # Floating point
-            ("float", FloatType),
-            ("double", DoubleType),
-            # Other types
-            ("boolean", BooleanType),
-            ("blob", BinaryType),
-            ("date", DateType),
-            ("timestamp", TimestamptzType),
-            ("uuid", StringType),
-            ("timeuuid", StringType),
-            ("inet", StringType),
-        ]
-
-        for cql_type, expected_type in test_cases:
-            with self.subTest(cql_type=cql_type):
-                result = self.mapper._map_cql_type(cql_type)
-                self.assertIsInstance(result, expected_type)
-
-    def test_decimal_type_mapping(self):
-        """
-        Test decimal and varint type mappings.
-
-        What this tests:
-        ---------------
-        1. Decimal type with default precision
-        2. Varint as decimal with 0 scale
-
-        Why this matters:
-        ----------------
-        - Financial data requires exact decimal representation
-        - Varint needs appropriate precision
-        """
-        # Decimal
-        decimal_type = self.mapper._map_cql_type("decimal")
-        self.assertIsInstance(decimal_type, DecimalType)
-        self.assertEqual(decimal_type.precision, 38)
-        self.assertEqual(decimal_type.scale, 10)
-
-        # Varint (arbitrary precision integer)
-        varint_type = self.mapper._map_cql_type("varint")
-        self.assertIsInstance(varint_type, DecimalType)
-        self.assertEqual(varint_type.precision, 38)
-        self.assertEqual(varint_type.scale, 0)
-
-    def test_collection_type_mappings(self):
-        """
-        Test mapping of collection types.
-
-        What this tests:
-        ---------------
-        1. List type with element type
-        2. Set type (becomes list in Iceberg)
-        3. Map type with key and value types
-
-        Why this matters:
-        ----------------
-        - Collections are common in Cassandra
-        - Iceberg has no native set type
-        - Nested types need proper handling
-        """
-        # List<text>
-        list_type = self.mapper._map_cql_type("list<text>")
-        self.assertIsInstance(list_type, ListType)
-        self.assertIsInstance(list_type.element_type, StringType)
-        self.assertFalse(list_type.element_required)
-
-        # Set<int> (becomes List in Iceberg)
-        set_type = self.mapper._map_cql_type("set<int>")
-        self.assertIsInstance(set_type, ListType)
-        self.assertIsInstance(set_type.element_type, IntegerType)
-
-        # Map<text, double>
-        map_type = self.mapper._map_cql_type("map<text, double>")
-        self.assertIsInstance(map_type, MapType)
-        self.assertIsInstance(map_type.key_type, StringType)
-        self.assertIsInstance(map_type.value_type, DoubleType)
-        self.assertFalse(map_type.value_required)
-
-    def test_nested_collection_types(self):
-        """
-        Test mapping of nested collection types.
-
-        What this tests:
-        ---------------
-        1. List<list<int>>
-        2. Map<text, list<double>>
-
-        Why this matters:
-        ----------------
-        - Cassandra supports nested collections
-        - Complex data structures need proper mapping
-        """
-        # List<list<int>>
-        nested_list = self.mapper._map_cql_type("list<list<int>>")
-        self.assertIsInstance(nested_list, ListType)
-        self.assertIsInstance(nested_list.element_type, ListType)
-        self.assertIsInstance(nested_list.element_type.element_type, IntegerType)
-
-        # Map<text, list<double>>
-        nested_map = self.mapper._map_cql_type("map<text, list<double>>")
-        self.assertIsInstance(nested_map, MapType)
-        self.assertIsInstance(nested_map.key_type, StringType)
-        self.assertIsInstance(nested_map.value_type, ListType)
-        self.assertIsInstance(nested_map.value_type.element_type, DoubleType)
-
-    def test_frozen_type_handling(self):
-        """
-        Test handling of frozen collections.
-
-        What this tests:
-        ---------------
-        1. Frozen<list<text>>
-        2. Frozen types are unwrapped
-
-        Why this matters:
-        ----------------
-        - Frozen is a Cassandra concept not in Iceberg
-        - Inner type should be preserved
-        """
-        frozen_list = self.mapper._map_cql_type("frozen<list<text>>")
-        self.assertIsInstance(frozen_list, ListType)
-        self.assertIsInstance(frozen_list.element_type, StringType)
-
-    def test_field_id_assignment(self):
-        """
-        Test unique field ID assignment.
-
-        What this tests:
-        ---------------
-        1. Sequential field IDs
-        2. Unique IDs for nested fields
-        3. ID counter reset
-
-        Why this matters:
-        ----------------
-        - Field IDs enable schema evolution
-        - Must be unique within schema
-        - IDs are permanent for a field
-        """
-        # Reset counter
-        self.mapper.reset_field_ids()
-
-        # Create mock column metadata
-        col1 = Mock()
-        col1.cql_type = "text"
-        col1.is_primary_key = True
-
-        col2 = Mock()
-        col2.cql_type = "int"
-        col2.is_primary_key = False
-
-        col3 = Mock()
-        col3.cql_type = "list<text>"
-        col3.is_primary_key = False
-
-        # Map columns
-        field1 = self.mapper._map_column("id", col1)
-        field2 = self.mapper._map_column("value", col2)
-        field3 = self.mapper._map_column("tags", col3)
-
-        # Check field IDs
-        self.assertEqual(field1.field_id, 1)
-        self.assertEqual(field2.field_id, 2)
-        self.assertEqual(field3.field_id, 4)  # ID 3 was used for list element
-
-        # List type should have element ID too
-        self.assertEqual(field3.field_type.element_id, 3)
-
-    def test_primary_key_required_fields(self):
-        """
-        Test that primary key columns are marked as required.
-
-        What this tests:
-        ---------------
-        1. Primary key columns are required (not null)
-        2. Non-primary columns are nullable
-
-        Why this matters:
-        ----------------
-        - Primary keys cannot be null in Cassandra
-        - Affects Iceberg query semantics
-        - Important for data validation
-        """
-        # Primary key column
-        pk_col = Mock()
-        pk_col.cql_type = "text"
-        pk_col.is_primary_key = True
-
-        pk_field = self.mapper._map_column("id", pk_col)
-        self.assertTrue(pk_field.required)
-
-        # Regular column
-        reg_col = Mock()
-        reg_col.cql_type = "text"
-        reg_col.is_primary_key = False
-
-        reg_field = self.mapper._map_column("name", reg_col)
-        self.assertFalse(reg_field.required)
-
-    def test_table_schema_mapping(self):
-        """
-        Test mapping of complete table schema.
-
-        What this tests:
-        ---------------
-        1. Multiple columns mapped correctly
-        2. Schema contains all fields
-        3. Field order preserved
-
-        Why this matters:
-        ----------------
-        - Complete schema mapping is the main use case
-        - All columns must be included
-        - Order affects data files
-        """
-        # Mock table metadata
-        table_meta = Mock()
-
-        # Mock columns
-        id_col = Mock()
-        id_col.cql_type = "uuid"
-        id_col.is_primary_key = True
-
-        name_col = Mock()
-        name_col.cql_type = "text"
-        name_col.is_primary_key = False
-
-        tags_col = Mock()
-        tags_col.cql_type = "set<text>"
-        tags_col.is_primary_key = False
-
-        table_meta.columns = {
-            "id": id_col,
-            "name": name_col,
-            "tags": tags_col,
-        }
-
-        # Map schema
-        schema = self.mapper.map_table_schema(table_meta)
-
-        # Verify schema
-        self.assertEqual(len(schema.fields), 3)
-
-        # Check field names and types
-        field_names = [f.name for f in schema.fields]
-        self.assertEqual(field_names, ["id", "name", "tags"])
-
-        # Check types
-        self.assertIsInstance(schema.fields[0].field_type, StringType)
-        self.assertIsInstance(schema.fields[1].field_type, StringType)
-        self.assertIsInstance(schema.fields[2].field_type, ListType)
-
-    def test_unknown_type_fallback(self):
-        """
-        Test that unknown types fall back to string.
-
-        What this tests:
-        ---------------
-        1. Unknown CQL types become strings
-        2. No exceptions thrown
-
-        Why this matters:
-        ----------------
-        - Future Cassandra versions may add types
-        - Graceful degradation is better than failure
-        """
-        unknown_type = self.mapper._map_cql_type("future_type")
-        self.assertIsInstance(unknown_type, StringType)
-
-    def test_time_type_mapping(self):
-        """
-        Test time type mapping.
-
-        What this tests:
-        ---------------
-        1. Time type maps to LongType
-        2. Represents nanoseconds since midnight
-
-        Why this matters:
-        ----------------
-        - Time representation differs between systems
-        - Precision must be preserved
-        """
-        time_type = self.mapper._map_cql_type("time")
-        self.assertIsInstance(time_type, LongType)
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/libs/async-cassandra-bulk/examples/tests/unit/test_token_ranges.py b/libs/async-cassandra-bulk/examples/tests/unit/test_token_ranges.py
deleted file mode 100644
index 1949b0e..0000000
--- a/libs/async-cassandra-bulk/examples/tests/unit/test_token_ranges.py
+++ /dev/null
@@ -1,320 +0,0 @@
-"""
-Unit tests for token range operations.
-
-What this tests:
----------------
-1. Token range calculation and splitting
-2. Proportional distribution of ranges
-3. Handling of ring wraparound
-4. Replica awareness
-
-Why this matters:
-----------------
-- Correct token ranges ensure complete data coverage
-- Proportional splitting ensures balanced workload
-- Proper handling prevents missing or duplicate data
-- Replica awareness enables data locality
-
-Additional context:
----------------------------------
-Token ranges in Cassandra use Murmur3 hash with range:
--9223372036854775808 to 9223372036854775807
-"""
-
-from unittest.mock import MagicMock, Mock
-
-import pytest
-
-from bulk_operations.token_utils import (
-    TokenRange,
-    TokenRangeSplitter,
-    discover_token_ranges,
-    generate_token_range_query,
-)
-
-
-class TestTokenRange:
-    """Test TokenRange data class."""
-
-    @pytest.mark.unit
-    def test_token_range_creation(self):
-        """Test creating a token range."""
-        range = TokenRange(start=-9223372036854775808, end=0, replicas=["node1", "node2", "node3"])
-
-        assert range.start == -9223372036854775808
-        assert range.end == 0
-        assert range.size == 9223372036854775808
-        assert range.replicas == ["node1", "node2", "node3"]
-        assert 0.49 < range.fraction < 0.51  # About 50% of ring
-
-    @pytest.mark.unit
-    def test_token_range_wraparound(self):
-        """Test token range that wraps around the ring."""
-        # Range from positive to negative (wraps around)
-        range = TokenRange(start=9223372036854775800, end=-9223372036854775800, replicas=["node1"])
-
-        # Size calculation should handle wraparound
-        expected_size = 16  # Small range wrapping around
-        assert range.size == expected_size
-        assert range.fraction < 0.001  # Very small fraction of ring
-
-    @pytest.mark.unit
-    def test_token_range_full_ring(self):
-        """Test token range covering entire ring."""
-        range = TokenRange(
-            start=-9223372036854775808,
-            end=9223372036854775807,
-            replicas=["node1", "node2", "node3"],
-        )
-
-        assert range.size == 18446744073709551615  # 2^64 - 1
-        assert range.fraction == 1.0  # 100% of ring
-
-
-class TestTokenRangeSplitter:
-    """Test token range splitting logic."""
-
-    @pytest.mark.unit
-    def test_split_single_range_evenly(self):
-        """Test splitting a single range into equal parts."""
-        splitter = TokenRangeSplitter()
-        original = TokenRange(start=0, end=1000, replicas=["node1", "node2"])
-
-        splits = splitter.split_single_range(original, 4)
-
-        assert len(splits) == 4
-        # Check splits are contiguous and cover entire range
-        assert splits[0].start == 0
-        assert splits[0].end == 250
-        assert splits[1].start == 250
-        assert splits[1].end == 500
-        assert splits[2].start == 500
-        assert splits[2].end == 750
-        assert splits[3].start == 750
-        assert splits[3].end == 1000
-
-        # All splits should have same replicas
-        for split in splits:
-            assert split.replicas == ["node1", "node2"]
-
-    @pytest.mark.unit
-    def test_split_proportionally(self):
-        """Test proportional splitting based on range sizes."""
-        splitter = TokenRangeSplitter()
-
-        # Create ranges of different sizes
-        ranges = [
-            TokenRange(start=0, end=1000, replicas=["node1"]),  # 10% of total
-            TokenRange(start=1000, end=9000, replicas=["node2"]),  # 80% of total
-            TokenRange(start=9000, end=10000, replicas=["node3"]),  # 10% of total
-        ]
-
-        # Request 10 splits total
-        splits = splitter.split_proportionally(ranges, 10)
-
-        # Should get approximately 1, 8, 1 splits for each range
-        node1_splits = [s for s in splits if s.replicas == ["node1"]]
-        node2_splits = [s for s in splits if s.replicas == ["node2"]]
-        node3_splits = [s for s in splits if s.replicas == ["node3"]]
-
-        assert len(node1_splits) == 1
-        assert len(node2_splits) == 8
-        assert len(node3_splits) == 1
-        assert len(splits) == 10
-
-    @pytest.mark.unit
-    def test_split_with_minimum_size(self):
-        """Test that small ranges don't get over-split."""
-        splitter = TokenRangeSplitter()
-
-        # Very small range
-        small_range = TokenRange(start=0, end=10, replicas=["node1"])
-
-        # Request many splits
-        splits = splitter.split_single_range(small_range, 100)
-
-        # Should not create more splits than makes sense
-        # (implementation should have minimum split size)
-        assert len(splits) <= 10  # Assuming minimum split size of 1
-
-    @pytest.mark.unit
-    def test_cluster_by_replicas(self):
-        """Test clustering ranges by their replica sets."""
-        splitter = TokenRangeSplitter()
-
-        ranges = [
-            TokenRange(start=0, end=100, replicas=["node1", "node2"]),
-            TokenRange(start=100, end=200, replicas=["node2", "node3"]),
-            TokenRange(start=200, end=300, replicas=["node1", "node2"]),
-            TokenRange(start=300, end=400, replicas=["node2", "node3"]),
-        ]
-
-        clustered = splitter.cluster_by_replicas(ranges)
-
-        # Should have 2 clusters based on replica sets
-        assert len(clustered) == 2
-
-        # Find clusters
-        cluster1 = None
-        cluster2 = None
-        for replicas, cluster_ranges in clustered.items():
-            if set(replicas) == {"node1", "node2"}:
-                cluster1 = cluster_ranges
-            elif set(replicas) == {"node2", "node3"}:
-                cluster2 = cluster_ranges
-
-        assert cluster1 is not None
-        assert cluster2 is not None
-        assert len(cluster1) == 2
-        assert len(cluster2) == 2
-
-
-class TestTokenRangeDiscovery:
-    """Test discovering token ranges from cluster metadata."""
-
-    @pytest.mark.unit
-    async def test_discover_token_ranges(self):
-        """
-        Test discovering token ranges from cluster metadata.
-
-        What this tests:
-        ---------------
-        1. Extraction from Cassandra metadata
-        2. All token ranges are discovered
-        3. Replica information is captured
-        4. Async operation works correctly
-
-        Why this matters:
-        ----------------
-        - Must discover all ranges for completeness
-        - Replica info enables local processing
-        - Integration point with driver metadata
-        - Foundation of token-aware operations
-        """
-        # Mock cluster metadata
-        mock_session = Mock()
-        mock_cluster = Mock()
-        mock_metadata = Mock()
-        mock_token_map = Mock()
-
-        # Set up mock relationships
-        mock_session._session = Mock()
-        mock_session._session.cluster = mock_cluster
-        mock_cluster.metadata = mock_metadata
-        mock_metadata.token_map = mock_token_map
-
-        # Mock tokens in the ring
-        from .test_helpers import MockToken
-
-        mock_token1 = MockToken(-9223372036854775808)
-        mock_token2 = MockToken(0)
-        mock_token3 = MockToken(9223372036854775807)
-        mock_token_map.ring = [mock_token1, mock_token2, mock_token3]
-
-        # Mock replicas
-        mock_token_map.get_replicas = MagicMock(
-            side_effect=[
-                [Mock(address="127.0.0.1"), Mock(address="127.0.0.2")],
-                [Mock(address="127.0.0.2"), Mock(address="127.0.0.3")],
-                [Mock(address="127.0.0.3"), Mock(address="127.0.0.1")],  # For wraparound
-            ]
-        )
-
-        # Discover ranges
-        ranges = await discover_token_ranges(mock_session, "test_keyspace")
-
-        assert len(ranges) == 3  # Three tokens create three ranges
-        assert ranges[0].start == -9223372036854775808
-        assert ranges[0].end == 0
-        assert ranges[0].replicas == ["127.0.0.1", "127.0.0.2"]
-        assert ranges[1].start == 0
-        assert ranges[1].end == 9223372036854775807
-        assert ranges[1].replicas == ["127.0.0.2", "127.0.0.3"]
-        assert ranges[2].start == 9223372036854775807
-        assert ranges[2].end == -9223372036854775808  # Wraparound
-        assert ranges[2].replicas == ["127.0.0.3", "127.0.0.1"]
-
-
-class TestTokenRangeQueryGeneration:
-    """Test generating CQL queries with token ranges."""
-
-    @pytest.mark.unit
-    def test_generate_basic_token_range_query(self):
-        """
-        Test generating a basic token range query.
-
-        What this tests:
-        ---------------
-        1. Valid CQL syntax generation
-        2. Token function usage is correct
-        3. Range boundaries use proper operators
-        4. Fully qualified table names
-
-        Why this matters:
-        ----------------
-        - Query syntax must be valid CQL
-        - Token function enables range scans
-        - Boundary operators prevent gaps/overlaps
-        - Production queries depend on this
-        """
-        range = TokenRange(start=0, end=1000, replicas=["node1"])
-
-        query = generate_token_range_query(
-            keyspace="test_ks", table="test_table", partition_keys=["id"], token_range=range
-        )
-
-        expected = "SELECT * FROM test_ks.test_table " "WHERE token(id) > 0 AND token(id) <= 1000"
-        assert query == expected
-
-    @pytest.mark.unit
-    def test_generate_query_with_multiple_partition_keys(self):
-        """Test query generation with composite partition key."""
-        range = TokenRange(start=-1000, end=1000, replicas=["node1"])
-
-        query = generate_token_range_query(
-            keyspace="test_ks",
-            table="test_table",
-            partition_keys=["country", "city"],
-            token_range=range,
-        )
-
-        expected = (
-            "SELECT * FROM test_ks.test_table "
-            "WHERE token(country, city) > -1000 AND token(country, city) <= 1000"
-        )
-        assert query == expected
-
-    @pytest.mark.unit
-    def test_generate_query_with_column_selection(self):
-        """Test query generation with specific columns."""
-        range = TokenRange(start=0, end=1000, replicas=["node1"])
-
-        query = generate_token_range_query(
-            keyspace="test_ks",
-            table="test_table",
-            partition_keys=["id"],
-            token_range=range,
-            columns=["id", "name", "created_at"],
-        )
-
-        expected = (
-            "SELECT id, name, created_at FROM test_ks.test_table "
-            "WHERE token(id) > 0 AND token(id) <= 1000"
-        )
-        assert query == expected
-
-    @pytest.mark.unit
-    def test_generate_query_with_min_token(self):
-        """Test query generation starting from minimum token."""
-        range = TokenRange(start=-9223372036854775808, end=0, replicas=["node1"])  # Min token
-
-        query = generate_token_range_query(
-            keyspace="test_ks", table="test_table", partition_keys=["id"], token_range=range
-        )
-
-        # First range should use >= instead of >
-        expected = (
-            "SELECT * FROM test_ks.test_table "
-            "WHERE token(id) >= -9223372036854775808 AND token(id) <= 0"
-        )
-        assert query == expected
diff --git a/libs/async-cassandra-bulk/examples/tests/unit/test_token_utils.py b/libs/async-cassandra-bulk/examples/tests/unit/test_token_utils.py
deleted file mode 100644
index 8fe2de9..0000000
--- a/libs/async-cassandra-bulk/examples/tests/unit/test_token_utils.py
+++ /dev/null
@@ -1,388 +0,0 @@
-"""
-Unit tests for token range utilities.
-
-What this tests:
----------------
-1. Token range size calculations
-2. Range splitting logic
-3. Wraparound handling
-4. Proportional distribution
-5. Replica clustering
-
-Why this matters:
-----------------
-- Ensures data completeness
-- Prevents missing rows
-- Maintains proper load distribution
-- Enables efficient parallel processing
-
-Additional context:
----------------------------------
-Token ranges in Cassandra use Murmur3 hash which
-produces 128-bit values from -2^63 to 2^63-1.
-"""
-
-from unittest.mock import Mock
-
-import pytest
-
-from bulk_operations.token_utils import (
-    MAX_TOKEN,
-    MIN_TOKEN,
-    TOTAL_TOKEN_RANGE,
-    TokenRange,
-    TokenRangeSplitter,
-    discover_token_ranges,
-    generate_token_range_query,
-)
-
-
-class TestTokenRange:
-    """Test the TokenRange dataclass."""
-
-    @pytest.mark.unit
-    def test_token_range_size_normal(self):
-        """
-        Test size calculation for normal ranges.
-
-        What this tests:
-        ---------------
-        1. Size calculation for positive ranges
-        2. Size calculation for negative ranges
-        3. Basic arithmetic correctness
-        4. No wraparound edge cases
-
-        Why this matters:
-        ----------------
-        - Token range sizes determine split proportions
-        - Incorrect sizes lead to unbalanced loads
-        - Foundation for all range splitting logic
-        - Critical for even data distribution
-        """
-        range = TokenRange(start=0, end=1000, replicas=["node1"])
-        assert range.size == 1000
-
-        range = TokenRange(start=-1000, end=0, replicas=["node1"])
-        assert range.size == 1000
-
-    @pytest.mark.unit
-    def test_token_range_size_wraparound(self):
-        """
-        Test size calculation for ranges that wrap around.
-
-        What this tests:
-        ---------------
-        1. Wraparound from MAX_TOKEN to MIN_TOKEN
-        2. Correct size calculation across boundaries
-        3. Edge case handling for ring topology
-        4. Boundary arithmetic correctness
-
-        Why this matters:
-        ----------------
-        - Cassandra's token ring wraps around
-        - Last range often crosses the boundary
-        - Incorrect handling causes missing data
-        - Real clusters always have wraparound ranges
-        """
-        # Range wraps from near max to near min
-        range = TokenRange(start=MAX_TOKEN - 1000, end=MIN_TOKEN + 1000, replicas=["node1"])
-        expected_size = 1000 + 1000 + 1  # 1000 on each side plus the boundary
-        assert range.size == expected_size
-
-    @pytest.mark.unit
-    def test_token_range_fraction(self):
-        """Test fraction calculation."""
-        # Quarter of the ring
-        quarter_size = TOTAL_TOKEN_RANGE // 4
-        range = TokenRange(start=0, end=quarter_size, replicas=["node1"])
-        assert abs(range.fraction - 0.25) < 0.001
-
-
-class TestTokenRangeSplitter:
-    """Test the TokenRangeSplitter class."""
-
-    @pytest.fixture
-    def splitter(self):
-        """Create a TokenRangeSplitter instance."""
-        return TokenRangeSplitter()
-
-    @pytest.mark.unit
-    def test_split_single_range_no_split(self, splitter):
-        """Test that requesting 1 or 0 splits returns original range."""
-        range = TokenRange(start=0, end=1000, replicas=["node1"])
-
-        result = splitter.split_single_range(range, 1)
-        assert len(result) == 1
-        assert result[0].start == 0
-        assert result[0].end == 1000
-
-    @pytest.mark.unit
-    def test_split_single_range_even_split(self, splitter):
-        """Test splitting a range into even parts."""
-        range = TokenRange(start=0, end=1000, replicas=["node1"])
-
-        result = splitter.split_single_range(range, 4)
-        assert len(result) == 4
-
-        # Check splits
-        assert result[0].start == 0
-        assert result[0].end == 250
-        assert result[1].start == 250
-        assert result[1].end == 500
-        assert result[2].start == 500
-        assert result[2].end == 750
-        assert result[3].start == 750
-        assert result[3].end == 1000
-
-    @pytest.mark.unit
-    def test_split_single_range_small_range(self, splitter):
-        """Test that very small ranges aren't split."""
-        range = TokenRange(start=0, end=2, replicas=["node1"])
-
-        result = splitter.split_single_range(range, 10)
-        assert len(result) == 1  # Too small to split
-
-    @pytest.mark.unit
-    def test_split_proportionally_empty(self, splitter):
-        """Test proportional splitting with empty input."""
-        result = splitter.split_proportionally([], 10)
-        assert result == []
-
-    @pytest.mark.unit
-    def test_split_proportionally_single_range(self, splitter):
-        """Test proportional splitting with single range."""
-        ranges = [TokenRange(start=0, end=1000, replicas=["node1"])]
-
-        result = splitter.split_proportionally(ranges, 4)
-        assert len(result) == 4
-
-    @pytest.mark.unit
-    def test_split_proportionally_multiple_ranges(self, splitter):
-        """
-        Test proportional splitting with ranges of different sizes.
-
-        What this tests:
-        ---------------
-        1. Proportional distribution based on size
-        2. Larger ranges get more splits
-        3. Rounding behavior is reasonable
-        4. All input ranges are covered
-
-        Why this matters:
-        ----------------
-        - Uneven token distribution is common
-        - Load balancing requires proportional splits
-        - Prevents hotspots in processing
-        - Mimics real cluster token distributions
-        """
-        ranges = [
-            TokenRange(start=0, end=1000, replicas=["node1"]),  # Size 1000
-            TokenRange(start=1000, end=4000, replicas=["node2"]),  # Size 3000
-        ]
-
-        result = splitter.split_proportionally(ranges, 4)
-
-        # Should split proportionally: 1 split for first, 3 for second
-        # But implementation uses round(), so might be slightly different
-        assert len(result) >= 2
-        assert len(result) <= 4
-
-    @pytest.mark.unit
-    def test_cluster_by_replicas(self, splitter):
-        """
-        Test clustering ranges by replica sets.
-
-        What this tests:
-        ---------------
-        1. Ranges are grouped by replica nodes
-        2. Replica order doesn't affect grouping
-        3. All ranges are included in clusters
-        4. Unique replica sets are identified
-
-        Why this matters:
-        ----------------
-        - Enables coordinator-local processing
-        - Reduces network traffic in operations
-        - Improves performance through locality
-        - Critical for multi-datacenter efficiency
-        """
-        ranges = [
-            TokenRange(start=0, end=100, replicas=["node1", "node2"]),
-            TokenRange(start=100, end=200, replicas=["node2", "node3"]),
-            TokenRange(start=200, end=300, replicas=["node1", "node2"]),
-            TokenRange(start=300, end=400, replicas=["node3", "node1"]),
-        ]
-
-        clusters = splitter.cluster_by_replicas(ranges)
-
-        # Should have 3 unique replica sets
-        assert len(clusters) == 3
-
-        # Check that ranges are properly grouped
-        key1 = tuple(sorted(["node1", "node2"]))
-        assert key1 in clusters
-        assert len(clusters[key1]) == 2
-
-
-class TestDiscoverTokenRanges:
-    """Test token range discovery from cluster metadata."""
-
-    @pytest.mark.unit
-    async def test_discover_token_ranges_success(self):
-        """
-        Test successful token range discovery.
-
-        What this tests:
-        ---------------
-        1. Token ranges are extracted from metadata
-        2. Replica information is preserved
-        3. All ranges from token map are returned
-        4. Async operation completes successfully
-
-        Why this matters:
-        ----------------
-        - Discovery is the foundation of token-aware ops
-        - Replica awareness enables local reads
-        - Must handle all Cassandra metadata structures
-        - Critical for multi-datacenter deployments
-        """
-        # Mock session and cluster
-        mock_session = Mock()
-        mock_cluster = Mock()
-        mock_metadata = Mock()
-        mock_token_map = Mock()
-
-        # Setup tokens in the ring
-        from .test_helpers import MockToken
-
-        mock_token1 = MockToken(-1000)
-        mock_token2 = MockToken(0)
-        mock_token3 = MockToken(1000)
-        mock_token_map.ring = [mock_token1, mock_token2, mock_token3]
-
-        # Setup replicas
-        mock_replica1 = Mock()
-        mock_replica1.address = "192.168.1.1"
-        mock_replica2 = Mock()
-        mock_replica2.address = "192.168.1.2"
-
-        mock_token_map.get_replicas.side_effect = [
-            [mock_replica1, mock_replica2],
-            [mock_replica2, mock_replica1],
-            [mock_replica1, mock_replica2],  # For the third token range
-        ]
-
-        mock_metadata.token_map = mock_token_map
-        mock_cluster.metadata = mock_metadata
-        mock_session._session = Mock()
-        mock_session._session.cluster = mock_cluster
-
-        # Test discovery
-        ranges = await discover_token_ranges(mock_session, "test_ks")
-
-        assert len(ranges) == 3  # Three tokens create three ranges
-        assert ranges[0].start == -1000
-        assert ranges[0].end == 0
-        assert ranges[0].replicas == ["192.168.1.1", "192.168.1.2"]
-        assert ranges[1].start == 0
-        assert ranges[1].end == 1000
-        assert ranges[1].replicas == ["192.168.1.2", "192.168.1.1"]
-        assert ranges[2].start == 1000
-        assert ranges[2].end == -1000  # Wraparound range
-        assert ranges[2].replicas == ["192.168.1.1", "192.168.1.2"]
-
-    @pytest.mark.unit
-    async def test_discover_token_ranges_no_token_map(self):
-        """Test error when token map is not available."""
-        mock_session = Mock()
-        mock_cluster = Mock()
-        mock_metadata = Mock()
-        mock_metadata.token_map = None
-        mock_cluster.metadata = mock_metadata
-        mock_session._session = Mock()
-        mock_session._session.cluster = mock_cluster
-
-        with pytest.raises(RuntimeError, match="Token map not available"):
-            await discover_token_ranges(mock_session, "test_ks")
-
-
-class TestGenerateTokenRangeQuery:
-    """Test CQL query generation for token ranges."""
-
-    @pytest.mark.unit
-    def test_generate_query_all_columns(self):
-        """Test query generation with all columns."""
-        query = generate_token_range_query(
-            keyspace="test_ks",
-            table="test_table",
-            partition_keys=["id"],
-            token_range=TokenRange(start=0, end=1000, replicas=["node1"]),
-        )
-
-        expected = "SELECT * FROM test_ks.test_table " "WHERE token(id) > 0 AND token(id) <= 1000"
-        assert query == expected
-
-    @pytest.mark.unit
-    def test_generate_query_specific_columns(self):
-        """Test query generation with specific columns."""
-        query = generate_token_range_query(
-            keyspace="test_ks",
-            table="test_table",
-            partition_keys=["id"],
-            token_range=TokenRange(start=0, end=1000, replicas=["node1"]),
-            columns=["id", "name", "value"],
-        )
-
-        expected = (
-            "SELECT id, name, value FROM test_ks.test_table "
-            "WHERE token(id) > 0 AND token(id) <= 1000"
-        )
-        assert query == expected
-
-    @pytest.mark.unit
-    def test_generate_query_minimum_token(self):
-        """
-        Test query generation for minimum token edge case.
-
-        What this tests:
-        ---------------
-        1. MIN_TOKEN uses >= instead of >
-        2. Prevents missing first token value
-        3. Query syntax is valid CQL
-        4. Edge case is handled correctly
-
-        Why this matters:
-        ----------------
-        - MIN_TOKEN is a valid token value
-        - Using > would skip data at MIN_TOKEN
-        - Common source of missing data bugs
-        - DSBulk compatibility requires this behavior
-        """
-        query = generate_token_range_query(
-            keyspace="test_ks",
-            table="test_table",
-            partition_keys=["id"],
-            token_range=TokenRange(start=MIN_TOKEN, end=0, replicas=["node1"]),
-        )
-
-        expected = (
-            f"SELECT * FROM test_ks.test_table "
-            f"WHERE token(id) >= {MIN_TOKEN} AND token(id) <= 0"
-        )
-        assert query == expected
-
-    @pytest.mark.unit
-    def test_generate_query_compound_partition_key(self):
-        """Test query generation with compound partition key."""
-        query = generate_token_range_query(
-            keyspace="test_ks",
-            table="test_table",
-            partition_keys=["id", "type"],
-            token_range=TokenRange(start=0, end=1000, replicas=["node1"]),
-        )
-
-        expected = (
-            "SELECT * FROM test_ks.test_table "
-            "WHERE token(id, type) > 0 AND token(id, type) <= 1000"
-        )
-        assert query == expected
diff --git a/libs/async-cassandra-bulk/examples/visualize_tokens.py b/libs/async-cassandra-bulk/examples/visualize_tokens.py
deleted file mode 100755
index 98c1c25..0000000
--- a/libs/async-cassandra-bulk/examples/visualize_tokens.py
+++ /dev/null
@@ -1,176 +0,0 @@
-#!/usr/bin/env python3
-"""
-Visualize token distribution in the Cassandra cluster.
-
-This script helps understand how vnodes distribute tokens
-across the cluster and validates our token range discovery.
-"""
-
-import asyncio
-from collections import defaultdict
-
-from rich.console import Console
-from rich.table import Table
-
-from async_cassandra import AsyncCluster
-from bulk_operations.token_utils import MAX_TOKEN, MIN_TOKEN, discover_token_ranges
-
-console = Console()
-
-
-def analyze_node_distribution(ranges):
-    """Analyze and display token distribution by node."""
-    primary_owner_count = defaultdict(int)
-    all_replica_count = defaultdict(int)
-
-    for r in ranges:
-        # First replica is primary owner
-        if r.replicas:
-            primary_owner_count[r.replicas[0]] += 1
-            for replica in r.replicas:
-                all_replica_count[replica] += 1
-
-    # Display node statistics
-    table = Table(title="Token Distribution by Node")
-    table.add_column("Node", style="cyan")
-    table.add_column("Primary Ranges", style="green")
-    table.add_column("Total Ranges (with replicas)", style="yellow")
-    table.add_column("Percentage of Ring", style="magenta")
-
-    total_primary = sum(primary_owner_count.values())
-
-    for node in sorted(all_replica_count.keys()):
-        primary = primary_owner_count.get(node, 0)
-        total = all_replica_count.get(node, 0)
-        percentage = (primary / total_primary * 100) if total_primary > 0 else 0
-
-        table.add_row(node, str(primary), str(total), f"{percentage:.1f}%")
-
-    console.print(table)
-    return primary_owner_count
-
-
-def analyze_range_sizes(ranges):
-    """Analyze and display token range sizes."""
-    console.print("\n[bold]Token Range Size Analysis[/bold]")
-
-    range_sizes = [r.size for r in ranges]
-    avg_size = sum(range_sizes) / len(range_sizes)
-    min_size = min(range_sizes)
-    max_size = max(range_sizes)
-
-    console.print(f"Average range size: {avg_size:,.0f}")
-    console.print(f"Smallest range: {min_size:,}")
-    console.print(f"Largest range: {max_size:,}")
-    console.print(f"Size ratio (max/min): {max_size/min_size:.2f}x")
-
-
-def validate_ring_coverage(ranges):
-    """Validate token ring coverage for gaps."""
-    console.print("\n[bold]Token Ring Coverage Validation[/bold]")
-
-    sorted_ranges = sorted(ranges, key=lambda r: r.start)
-
-    # Check for gaps
-    gaps = []
-    for i in range(len(sorted_ranges) - 1):
-        current = sorted_ranges[i]
-        next_range = sorted_ranges[i + 1]
-        if current.end != next_range.start:
-            gaps.append((current.end, next_range.start))
-
-    if gaps:
-        console.print(f"[red]⚠ Found {len(gaps)} gaps in token ring![/red]")
-        for gap_start, gap_end in gaps[:5]:  # Show first 5
-            console.print(f"  Gap: {gap_start} to {gap_end}")
-    else:
-        console.print("[green]✓ No gaps found - complete ring coverage[/green]")
-
-    # Check first and last ranges
-    if sorted_ranges[0].start == MIN_TOKEN:
-        console.print("[green]✓ First range starts at MIN_TOKEN[/green]")
-    else:
-        console.print(f"[red]⚠ First range starts at {sorted_ranges[0].start}, not MIN_TOKEN[/red]")
-
-    if sorted_ranges[-1].end == MAX_TOKEN:
-        console.print("[green]✓ Last range ends at MAX_TOKEN[/green]")
-    else:
-        console.print(f"[yellow]Last range ends at {sorted_ranges[-1].end}[/yellow]")
-
-    return sorted_ranges
-
-
-def display_sample_ranges(sorted_ranges):
-    """Display sample token ranges."""
-    console.print("\n[bold]Sample Token Ranges (first 5)[/bold]")
-    sample_table = Table()
-    sample_table.add_column("Range #", style="cyan")
-    sample_table.add_column("Start", style="green")
-    sample_table.add_column("End", style="yellow")
-    sample_table.add_column("Size", style="magenta")
-    sample_table.add_column("Replicas", style="blue")
-
-    for i, r in enumerate(sorted_ranges[:5]):
-        sample_table.add_row(
-            str(i + 1), str(r.start), str(r.end), f"{r.size:,}", ", ".join(r.replicas)
-        )
-
-    console.print(sample_table)
-
-
-async def visualize_token_distribution():
-    """Visualize how tokens are distributed across the cluster."""
-
-    console.print("[cyan]Connecting to Cassandra cluster...[/cyan]")
-
-    async with AsyncCluster(contact_points=["localhost"]) as cluster, cluster.connect() as session:
-        # Create test keyspace if needed
-        await session.execute(
-            """
-            CREATE KEYSPACE IF NOT EXISTS token_test
-            WITH replication = {
-                'class': 'SimpleStrategy',
-                'replication_factor': 3
-            }
-        """
-        )
-
-        console.print("[green]✓ Connected to cluster[/green]\n")
-
-        # Discover token ranges
-        ranges = await discover_token_ranges(session, "token_test")
-
-        # Analyze distribution
-        console.print("[bold]Token Range Analysis[/bold]")
-        console.print(f"Total ranges discovered: {len(ranges)}")
-        console.print("Expected with 3 nodes × 256 vnodes: ~768 ranges\n")
-
-        # Analyze node distribution
-        primary_owner_count = analyze_node_distribution(ranges)
-
-        # Analyze range sizes
-        analyze_range_sizes(ranges)
-
-        # Validate ring coverage
-        sorted_ranges = validate_ring_coverage(ranges)
-
-        # Display sample ranges
-        display_sample_ranges(sorted_ranges)
-
-        # Vnode insight
-        console.print("\n[bold]Vnode Configuration Insight[/bold]")
-        console.print(f"With {len(primary_owner_count)} nodes and {len(ranges)} ranges:")
-        console.print(f"Average vnodes per node: {len(ranges) / len(primary_owner_count):.1f}")
-        console.print("This matches the expected 256 vnodes per node configuration.")
-
-
-if __name__ == "__main__":
-    try:
-        asyncio.run(visualize_token_distribution())
-    except KeyboardInterrupt:
-        console.print("\n[yellow]Visualization cancelled[/yellow]")
-    except Exception as e:
-        console.print(f"\n[red]Error: {e}[/red]")
-        import traceback
-
-        traceback.print_exc()
diff --git a/libs/async-cassandra/pyproject.toml b/libs/async-cassandra/pyproject.toml
index d513837..4940021 100644
--- a/libs/async-cassandra/pyproject.toml
+++ b/libs/async-cassandra/pyproject.toml
@@ -62,6 +62,7 @@ test = [
     "httpx>=0.24.0",
     "uvicorn>=0.23.0",
     "psutil>=5.9.0",
+    "pyarrow>=10.0.0",
 ]
 docs = [
     "sphinx>=6.0.0",
diff --git a/libs/async-cassandra/tests/integration/test_example_scripts.py b/libs/async-cassandra/tests/integration/test_example_scripts.py
index 7ed2629..2b67a0f 100644
--- a/libs/async-cassandra/tests/integration/test_example_scripts.py
+++ b/libs/async-cassandra/tests/integration/test_example_scripts.py
@@ -91,13 +91,15 @@ async def test_streaming_basic_example(self, cassandra_cluster):
         # Verify expected output patterns
         # The examples use logging which outputs to stderr
         output = result.stderr if result.stderr else result.stdout
-        assert "Basic Streaming Example" in output
+        assert "BASIC STREAMING EXAMPLE" in output
         assert "Inserted 100000 test events" in output or "Inserted 100,000 test events" in output
-        assert "Streaming completed:" in output
+        assert "Streaming completed!" in output
         assert "Total events: 100,000" in output or "Total events: 100000" in output
-        assert "Filtered Streaming Example" in output
-        assert "Page-Based Streaming Example (True Async Paging)" in output
-        assert "Pages are fetched asynchronously" in output
+        assert "FILTERED STREAMING EXAMPLE" in output
+        assert "PAGE-BASED STREAMING EXAMPLE (True Async Paging)" in output
+        assert (
+            "Pages are fetched ON-DEMAND" in output or "Pages were fetched asynchronously" in output
+        )
 
         # Verify keyspace was cleaned up
         async with AsyncCluster(["localhost"]) as cluster:
@@ -152,8 +154,8 @@ async def test_export_large_table_example(self, cassandra_cluster, tmp_path):
 
             # Verify expected output (might be in stdout or stderr due to logging)
             output = result.stdout + result.stderr
-            assert "Created 5000 sample products" in output
-            assert "Export completed:" in output
+            assert "Created 5,000 sample products" in output
+            assert "EXPORT COMPLETED SUCCESSFULLY!" in output
             assert "Rows exported: 5,000" in output
             assert f"Output directory: {export_dir}" in output
 
@@ -235,16 +237,16 @@ async def test_context_manager_safety_demo(self, cassandra_cluster):
 
         # Verify all demonstrations ran (might be in stdout or stderr due to logging)
         output = result.stdout + result.stderr
-        assert "Demonstrating Query Error Safety" in output
+        assert "QUERY ERROR SAFETY DEMONSTRATION" in output
         assert "Query failed as expected" in output
-        assert "Session still works after error" in output
+        assert "Session is healthy!" in output
 
-        assert "Demonstrating Streaming Error Safety" in output
+        assert "STREAMING ERROR SAFETY DEMONSTRATION" in output
         assert "Streaming failed as expected" in output
         assert "Successfully streamed" in output
 
-        assert "Demonstrating Context Manager Isolation" in output
-        assert "Demonstrating Concurrent Safety" in output
+        assert "CONTEXT MANAGER ISOLATION DEMONSTRATION" in output
+        assert "CONCURRENT OPERATIONS SAFETY DEMONSTRATION" in output
 
         # Verify key takeaways are shown
         assert "Query errors don't close sessions" in output
@@ -285,15 +287,19 @@ async def test_metrics_simple_example(self, cassandra_cluster):
 
         # Verify metrics output (might be in stdout or stderr due to logging)
         output = result.stdout + result.stderr
-        assert "Query Metrics Example" in output or "async-cassandra Metrics Example" in output
-        assert "Connection Health Monitoring" in output
-        assert "Error Tracking Example" in output or "Expected error recorded" in output
-        assert "Performance Summary" in output
+        assert "ASYNC-CASSANDRA METRICS COLLECTION EXAMPLE" in output
+        assert "CONNECTION HEALTH MONITORING" in output
+        assert "ERROR TRACKING DEMONSTRATION" in output or "Expected error captured" in output
+        assert "PERFORMANCE METRICS SUMMARY" in output
 
         # Verify statistics are shown
         assert "Total queries:" in output or "Query Metrics:" in output
         assert "Success rate:" in output or "Success Rate:" in output
-        assert "Average latency:" in output or "Average Duration:" in output
+        assert (
+            "Average latency:" in output
+            or "Average Duration:" in output
+            or "Query Performance:" in output
+        )
 
     @pytest.mark.timeout(240)  # Override default timeout for this test (lots of data)
     async def test_realtime_processing_example(self, cassandra_cluster):
@@ -333,15 +339,19 @@ async def test_realtime_processing_example(self, cassandra_cluster):
         output = result.stdout + result.stderr
 
         # Check that setup completed
-        assert "Setting up sensor data" in output
-        assert "Sample data inserted" in output
+        assert "Setting up IoT sensor data simulation" in output
+        assert "Sample data setup complete" in output
 
         # Check that processing occurred
-        assert "Processing Historical Data" in output or "Processing historical data" in output
-        assert "Processing completed" in output or "readings processed" in output
+        assert "PROCESSING HISTORICAL DATA" in output or "Processing Historical Data" in output
+        assert (
+            "Processing completed" in output
+            or "readings processed" in output
+            or "Analysis complete!" in output
+        )
 
         # Check that real-time simulation ran
-        assert "Simulating Real-Time Processing" in output or "Processing cycle" in output
+        assert "SIMULATING REAL-TIME PROCESSING" in output or "Processing cycle" in output
 
         # Verify cleanup
         assert "Cleaning up" in output
@@ -436,11 +446,12 @@ async def test_export_to_parquet_example(self, cassandra_cluster, tmp_path):
             output = result.stderr if result.stderr else result.stdout
             assert "Setting up test data" in output
             assert "Test data setup complete" in output
-            assert "Example 1: Export Entire Table" in output
-            assert "Example 2: Export Filtered Data" in output
-            assert "Example 3: Export with Different Compression" in output
-            assert "Export completed successfully!" in output
-            assert "Verifying Exported Files" in output
+            assert "EXPORT SUMMARY" in output
+            assert "SNAPPY compression:" in output
+            assert "GZIP compression:" in output
+            assert "LZ4 compression:" in output
+            assert "Three exports completed:" in output
+            assert "VERIFYING EXPORTED PARQUET FILES" in output
             assert f"Output directory: {export_dir}" in output
 
             # Verify Parquet files were created (look recursively in subdirectories)