2020from copy import copy
2121from dataclasses import dataclass
2222from enum import Enum
23- from typing import TYPE_CHECKING , Dict , List , Optional , Set , Tuple , Union
23+ from typing import TYPE_CHECKING , Any , Dict , List , Optional , Set , Tuple , Union
2424
2525from pyiceberg .exceptions import ResolveError , ValidationError
26+ from pyiceberg .expressions import literal # type: ignore
2627from pyiceberg .schema import (
2728 PartnerAccessor ,
2829 Schema ,
@@ -153,7 +154,12 @@ def union_by_name(self, new_schema: Union[Schema, "pa.Schema"]) -> UpdateSchema:
153154 return self
154155
155156 def add_column (
156- self , path : Union [str , Tuple [str , ...]], field_type : IcebergType , doc : Optional [str ] = None , required : bool = False
157+ self ,
158+ path : Union [str , Tuple [str , ...]],
159+ field_type : IcebergType ,
160+ doc : Optional [str ] = None ,
161+ required : bool = False ,
162+ default_value : Optional [Any ] = None ,
157163 ) -> UpdateSchema :
158164 """Add a new column to a nested struct or Add a new top-level column.
159165
@@ -168,6 +174,7 @@ def add_column(
168174 field_type: Type for the new column.
169175 doc: Documentation string for the new column.
170176 required: Whether the new column is required.
177+ default_value: Default value for the new column.
171178
172179 Returns:
173180 This for method chaining.
@@ -177,10 +184,6 @@ def add_column(
177184 raise ValueError (f"Cannot add column with ambiguous name: { path } , provide a tuple instead" )
178185 path = (path ,)
179186
180- if required and not self ._allow_incompatible_changes :
181- # Table format version 1 and 2 cannot add required column because there is no initial value
182- raise ValueError (f"Incompatible change: cannot add required column: { '.' .join (path )} " )
183-
184187 name = path [- 1 ]
185188 parent = path [:- 1 ]
186189
@@ -212,13 +215,34 @@ def add_column(
212215
213216 # assign new IDs in order
214217 new_id = self .assign_new_column_id ()
218+ new_type = assign_fresh_schema_ids (field_type , self .assign_new_column_id )
219+
220+ if default_value is not None :
221+ try :
222+ # To make sure that the value is valid for the type
223+ initial_default = literal (default_value ).to (new_type ).value
224+ except ValueError as e :
225+ raise ValueError (f"Invalid default value: { e } " ) from e
226+ else :
227+ initial_default = default_value
228+
229+ if (required and initial_default is None ) and not self ._allow_incompatible_changes :
230+ # Table format version 1 and 2 cannot add required column because there is no initial value
231+ raise ValueError (f"Incompatible change: cannot add required column: { '.' .join (path )} " )
215232
216233 # update tracking for moves
217234 self ._added_name_to_id [full_name ] = new_id
218235 self ._id_to_parent [new_id ] = parent_full_path
219236
220- new_type = assign_fresh_schema_ids (field_type , self .assign_new_column_id )
221- field = NestedField (field_id = new_id , name = name , field_type = new_type , required = required , doc = doc )
237+ field = NestedField (
238+ field_id = new_id ,
239+ name = name ,
240+ field_type = new_type ,
241+ required = required ,
242+ doc = doc ,
243+ initial_default = initial_default ,
244+ write_default = initial_default ,
245+ )
222246
223247 if parent_id in self ._adds :
224248 self ._adds [parent_id ].append (field )
@@ -330,6 +354,7 @@ def _set_column_requirement(self, path: Union[str, Tuple[str, ...]], required: b
330354 field_type = updated .field_type ,
331355 doc = updated .doc ,
332356 required = required ,
357+ initial_default = updated .initial_default ,
333358 )
334359 else :
335360 self ._updates [field .field_id ] = NestedField (
@@ -338,6 +363,7 @@ def _set_column_requirement(self, path: Union[str, Tuple[str, ...]], required: b
338363 field_type = field .field_type ,
339364 doc = field .doc ,
340365 required = required ,
366+ initial_default = field .initial_default ,
341367 )
342368
343369 def update_column (
@@ -387,6 +413,7 @@ def update_column(
387413 field_type = field_type or updated .field_type ,
388414 doc = doc if doc is not None else updated .doc ,
389415 required = updated .required ,
416+ initial_default = updated .initial_default ,
390417 )
391418 else :
392419 self ._updates [field .field_id ] = NestedField (
@@ -395,6 +422,7 @@ def update_column(
395422 field_type = field_type or field .field_type ,
396423 doc = doc if doc is not None else field .doc ,
397424 required = field .required ,
425+ initial_default = field .initial_default ,
398426 )
399427
400428 if required is not None :
0 commit comments