apache · kevinjqliu · Aug 6, 2025 · Jul 23, 2025 · Jul 23, 2025 · Aug 2, 2025
diff --git a/pyiceberg/schema.py b/pyiceberg/schema.py
@@ -78,6 +78,8 @@
 
 INITIAL_SCHEMA_ID = 0
 
+FIELD_ID_PROP = "field-id"
+ICEBERG_FIELD_NAME_PROP = "iceberg-field-name" 
 
 class Schema(IcebergBaseModel):
     """A table Schema.
@@ -1356,6 +1358,21 @@ def primitive(self, primitive: PrimitiveType) -> PrimitiveType:
 
 # Implementation copied from Apache Iceberg repo.
 def make_compatible_name(name: str) -> str:
+    """Make a field name compatible with Avro specification.
+
+    This function sanitizes field names to comply with Avro naming rules:
+    - Names must start with [A-Za-z_]
+    - Subsequent characters must be [A-Za-z0-9_]
+
+    Invalid characters are replaced with _xHHHH where HHHH is the hex code.
+    Names starting with digits get a leading underscore.
+
+    Args:
+        name: The original field name
+
+    Returns:
+        A sanitized name that complies with Avro specification
+    """
     if not _valid_avro_name(name):
         return _sanitize_name(name)
     return name
@@ -1391,7 +1408,9 @@ def _sanitize_name(name: str) -> str:
 
 
 def _sanitize_char(character: str) -> str:
-    return "_" + character if character.isdigit() else "_x" + hex(ord(character))[2:].upper()
+    if character.isdigit():
+        return "_" + character
+    return "_x" + hex(ord(character))[2:].upper()
 
 
 def sanitize_column_names(schema: Schema) -> Schema:

diff --git a/pyiceberg/utils/schema_conversion.py b/pyiceberg/utils/schema_conversion.py
@@ -26,7 +26,7 @@
     Union,
 )
 
-from pyiceberg.schema import Schema, SchemaVisitorPerPrimitiveType, visit
+from pyiceberg.schema import Schema, SchemaVisitorPerPrimitiveType, visit, ICEBERG_FIELD_NAME_PROP, FIELD_ID_PROP, make_compatible_name, _valid_avro_name
 from pyiceberg.types import (
     BinaryType,
     BooleanType,
@@ -225,13 +225,13 @@ def _convert_field(self, field: Dict[str, Any]) -> NestedField:
         Returns:
             The Iceberg equivalent field.
         """
-        if "field-id" not in field:
-            raise ValueError(f"Cannot convert field, missing field-id: {field}")
+        if FIELD_ID_PROP not in field:
+            raise ValueError(f"Cannot convert field, missing {FIELD_ID_PROP}: {field}")
 
         plain_type, required = self._resolve_union(field["type"])
 
         return NestedField(
-            field_id=field["field-id"],
+            field_id=field[FIELD_ID_PROP],
             name=field["name"],
             field_type=self._convert_schema(plain_type),
             required=required,
@@ -524,12 +524,19 @@ def field(self, field: NestedField, field_result: AvroType) -> AvroType:
         if isinstance(field_result, dict) and field_result.get("type") == "record":
             field_result["name"] = f"r{field.field_id}"
 
+        orig_field_name = field.name
+        is_valid_field_name = _valid_avro_name(orig_field_name)
+        field_name = orig_field_name if is_valid_field_name else make_compatible_name(orig_field_name)
+
         result = {
-            "name": field.name,
-            "field-id": field.field_id,
+            "name": field_name,
+            FIELD_ID_PROP: field.field_id,
             "type": field_result if field.required else ["null", field_result],
         }
 
+        if not is_valid_field_name:
+            result[ICEBERG_FIELD_NAME_PROP] = orig_field_name
+
         if field.write_default is not None:
             result["default"] = field.write_default
         elif field.optional:
@@ -564,8 +571,8 @@ def map(self, map_type: MapType, key_result: AvroType, value_result: AvroType) -
                     "type": "record",
                     "name": f"k{self.last_map_key_field_id}_v{self.last_map_value_field_id}",
                     "fields": [
-                        {"name": "key", "type": key_result, "field-id": self.last_map_key_field_id},
-                        {"name": "value", "type": value_result, "field-id": self.last_map_value_field_id},
+                        {"name": "key", "type": key_result, FIELD_ID_PROP: self.last_map_key_field_id},
+                        {"name": "value", "type": value_result, FIELD_ID_PROP: self.last_map_value_field_id},
                     ],
                 },
                 "logicalType": "map",