@@ -228,6 +228,7 @@ class AvroOutputFile(Generic[D]):
228228 encoder : BinaryEncoder
229229 sync_bytes : bytes
230230 writer : Writer
231+ records_written : int
231232
232233 def __init__ (
233234 self ,
@@ -247,6 +248,7 @@ def __init__(
247248 else resolve_writer (record_schema = record_schema , file_schema = self .file_schema )
248249 )
249250 self .metadata = metadata
251+ self .records_written = 0
250252
251253 def __enter__ (self ) -> AvroOutputFile [D ]:
252254 """
@@ -266,6 +268,12 @@ def __exit__(
266268 self , exctype : Optional [Type [BaseException ]], excinst : Optional [BaseException ], exctb : Optional [TracebackType ]
267269 ) -> None :
268270 """Perform cleanup when exiting the scope of a 'with' statement."""
271+ if self .records_written == 0 :
272+ # This is very opinionated, as for Iceberg we should not write empty metadata.
273+ # The `write_block` method should be called at least once to make sure that we
274+ # write the number of blocks and more.
275+ raise ValueError ("No records have been written for this Avro file." )
276+
269277 self .output_stream .close ()
270278
271279 def _write_header (self ) -> None :
@@ -277,8 +285,16 @@ def _write_header(self) -> None:
277285 def write_block (self , objects : List [D ]) -> None :
278286 in_memory = io .BytesIO ()
279287 block_content_encoder = BinaryEncoder (output_stream = in_memory )
288+
289+ records_written_in_block = 0
280290 for obj in objects :
281291 self .writer .write (block_content_encoder , obj )
292+ records_written_in_block += 1
293+
294+ if records_written_in_block == 0 :
295+ raise ValueError ("No records have been written in this block." )
296+
297+ self .records_written += records_written_in_block
282298 block_content = in_memory .getvalue ()
283299
284300 self .encoder .write_int (len (objects ))
0 commit comments