apache · XiaoHongbo-Hope · Feb 8, 2026 · Feb 8, 2026
diff --git a/docs/content/pypaimon/data-evolution.md b/docs/content/pypaimon/data-evolution.md
@@ -196,3 +196,8 @@ commit.close()
 - **Row order matters**: the batches you write must have the **same number of rows** as the batches you read, in the
   same order for that shard.
 - **Parallelism**: run multiple shards by calling `new_shard_updator(shard_idx, num_shards)` for each shard.
+
+## Read After Partial Shard Update
+
+- **Full table read**: rows from updated shards have the new column; rows from other shards have null for that column.
+- **Per-shard read** (`with_shard(shard_idx, num_shards)`): read only the shard(s) you need. (new column where written, null elsewhere).
diff --git a/paimon-python/pypaimon/globalindex/data_evolution_batch_scan.py b/paimon-python/pypaimon/globalindex/data_evolution_batch_scan.py
@@ -0,0 +1,69 @@
+################################################################################
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+
+from typing import Optional
+
+from pypaimon.common.predicate import Predicate
+from pypaimon.table.special_fields import SpecialFields
+
+
+class DataEvolutionBatchScan:
+    @staticmethod
+    def remove_row_id_filter(predicate: Optional[Predicate]) -> Optional[Predicate]:
+        if predicate is None:
+            return None
+        return DataEvolutionBatchScan._remove(predicate)
+
+    @staticmethod
+    def _remove(predicate: Predicate) -> Optional[Predicate]:
+        if predicate.method == 'and':
+            new_children = []
+            for p in predicate.literals:
+                sub = DataEvolutionBatchScan._remove(p)
+                if sub is not None:
+                    new_children.append(sub)
+            if not new_children:
+                return None
+            if len(new_children) == 1:
+                return new_children[0]
+            return Predicate(
+                method='and',
+                index=predicate.index,
+                field=predicate.field,
+                literals=new_children
+            )
+        if predicate.method == 'or':
+            new_children = []
+            for p in predicate.literals:
+                sub = DataEvolutionBatchScan._remove(p)
+                if sub is None:
+                    return None
+                new_children.append(sub)
+            if len(new_children) == 1:
+                return new_children[0]
+            return Predicate(
+                method='or',
+                index=predicate.index,
+                field=predicate.field,
+                literals=new_children
+            )
+        # Leaf: remove if _ROW_ID
+        if predicate.field == SpecialFields.ROW_ID.name:
+            return None
+        return predicate
diff --git a/paimon-python/pypaimon/globalindex/range.py b/paimon-python/pypaimon/globalindex/range.py
@@ -153,6 +153,14 @@ def merge_sorted_as_possible(ranges: List['Range']) -> List['Range']:
 
         return result
 
+    @staticmethod
+    def to_ranges(ids: List[int]) -> List['Range']:
+        if not ids:
+            return []
+        sorted_ids = sorted(set(ids))
+        ranges = [Range(i, i) for i in sorted_ids]
+        return Range.sort_and_merge_overlap(ranges, merge=True, adjacent=True)
+
     @staticmethod
     def sort_and_merge_overlap(ranges: List['Range'], merge: bool = True, adjacent: bool = True) -> List['Range']:
         """

diff --git a/paimon-python/pypaimon/read/read_builder.py b/paimon-python/pypaimon/read/read_builder.py
@@ -67,7 +67,8 @@ def new_read(self) -> TableRead:
         return TableRead(
             table=self.table,
             predicate=self._predicate,
-            read_type=self.read_type()
+            read_type=self.read_type(),
+            projection=self._projection,
         )
 
     def new_predicate_builder(self) -> PredicateBuilder:

diff --git a/paimon-python/pypaimon/read/reader/concat_batch_reader.py b/paimon-python/pypaimon/read/reader/concat_batch_reader.py
@@ -141,6 +141,8 @@ class DataEvolutionMergeReader(RecordBatchReader):
      - The fourth field comes from batch1, and it is at offset 1 in batch1.
      - The fifth field comes from batch2, and it is at offset 1 in batch2.
      - The sixth field comes from batch1, and it is at offset 0 in batch1.
+
+    When row_offsets[i] == -1 (no file provides that field), output a column of nulls using schema.
     """
 
     def __init__(
@@ -207,14 +209,36 @@ def read_arrow_batch(self) -> Optional[RecordBatch]:
         for i in range(len(self.row_offsets)):
             batch_index = self.row_offsets[i]
             field_index = self.field_offsets[i]
+            field_name = self.schema.field(i).name if self.schema else None
+            column = None
+
             if batch_index >= 0 and batches[batch_index] is not None:
-                columns.append(batches[batch_index].column(field_index).slice(0, min_rows))
-            else:
+                src_batch = batches[batch_index]
+                if field_name is not None and field_name in src_batch.schema.names:
+                    column = src_batch.column(
+                        src_batch.schema.get_field_index(field_name)
+                    ).slice(0, min_rows)
+                elif field_index < src_batch.num_columns:
+                    column = src_batch.column(field_index).slice(0, min_rows)
+
+            if column is None and field_name is not None:
+                for b in batches:
+                    if b is not None and field_name in b.schema.names:
+                        column = b.column(b.schema.get_field_index(field_name)).slice(
+                            0, min_rows
+                        )
+                        break
+
+            if column is not None:
+                columns.append(column)
+            elif self.schema is not None and i < len(self.schema):
                 columns.append(pa.nulls(min_rows, type=self.schema.field(i).type))
 
         for i in range(len(self.readers)):
             if batches[i] is not None and batches[i].num_rows > min_rows:
-                self._buffers[i] = batches[i].slice(min_rows, batches[i].num_rows - min_rows)
+                self._buffers[i] = batches[i].slice(
+                    min_rows, batches[i].num_rows - min_rows
+                )
 
         return pa.RecordBatch.from_arrays(columns, schema=self.schema)