@@ -280,7 +280,7 @@ tbl.overwrite(df)
280280
281281The data is written to the table, and when the table is read using `tbl.scan().to_arrow()` :
282282
283- ` ` `
283+ ` ` ` python
284284pyarrow.Table
285285city: string
286286lat: double
@@ -303,7 +303,7 @@ tbl.append(df)
303303
304304When reading the table `tbl.scan().to_arrow()` you can see that `Groningen` is now also part of the table :
305305
306- ` ` `
306+ ` ` ` python
307307pyarrow.Table
308308city: string
309309lat: double
@@ -342,7 +342,7 @@ tbl.delete(delete_filter="city == 'Paris'")
342342In the above example, any records where the city field value equals to `Paris` will be deleted.
343343Running `tbl.scan().to_arrow()` will now yield :
344344
345- ` ` `
345+ ` ` ` python
346346pyarrow.Table
347347city: string
348348lat: double
@@ -362,7 +362,6 @@ To explore the table metadata, tables can be inspected.
362362!!! tip "Time Travel"
363363 To inspect a tables's metadata with the time travel feature, call the inspect table method with the `snapshot_id` argument.
364364 Time travel is supported on all metadata tables except `snapshots` and `refs`.
365-
366365 ` ` ` python
367366 table.inspect.entries(snapshot_id=805611270568163028)
368367 ` ` `
@@ -377,7 +376,7 @@ Inspect the snapshots of the table:
377376table.inspect.snapshots()
378377` ` `
379378
380- ```
379+ ` ` ` python
381380pyarrow.Table
382381committed_at: timestamp[ms] not null
383382snapshot_id: int64 not null
@@ -405,7 +404,7 @@ Inspect the partitions of the table:
405404table.inspect.partitions()
406405` ` `
407406
408- ```
407+ ` ` ` python
409408pyarrow.Table
410409partition: struct<dt_month: int32, dt_day: date32[day]> not null
411410 child 0, dt_month: int32
@@ -446,7 +445,7 @@ To show all the table's current manifest entries for both data and delete files.
446445table.inspect.entries()
447446` ` `
448447
449- ```
448+ ` ` ` python
450449pyarrow.Table
451450status: int8 not null
452451snapshot_id: int64 not null
@@ -604,7 +603,7 @@ To show a table's known snapshot references:
604603table.inspect.refs()
605604` ` `
606605
607- ```
606+ ` ` ` python
608607pyarrow.Table
609608name: string not null
610609type: string not null
@@ -629,7 +628,7 @@ To show a table's current file manifests:
629628table.inspect.manifests()
630629` ` `
631630
632- ```
631+ ` ` ` python
633632pyarrow.Table
634633content: int8 not null
635634path: string not null
@@ -679,7 +678,7 @@ To show table metadata log entries:
679678table.inspect.metadata_log_entries()
680679` ` `
681680
682- ```
681+ ` ` ` python
683682pyarrow.Table
684683timestamp: timestamp[ms] not null
685684file: string not null
@@ -702,7 +701,7 @@ To show a table's history:
702701table.inspect.history()
703702` ` `
704703
705- ```
704+ ` ` ` python
706705pyarrow.Table
707706made_current_at: timestamp[ms] not null
708707snapshot_id: int64 not null
@@ -723,7 +722,7 @@ Inspect the data files in the current snapshot of the table:
723722table.inspect.files()
724723` ` `
725724
726- ```
725+ ` ` ` python
727726pyarrow.Table
728727content: int8 not null
729728file_path: string not null
@@ -846,11 +845,16 @@ readable_metrics: [
846845[6.0989]]
847846` ` `
848847
848+ !!! info
849+ Content refers to type of content stored by the data file : ` 0` - `Data`, `1` - `Position Deletes`, `2` - `Equality Deletes`
850+
851+ To show only data files or delete files in the current snapshot, use `table.inspect.data_files()` and `table.inspect.delete_files()` respectively.
852+
849853# # Add Files
850854
851855Expert Iceberg users may choose to commit existing parquet files to the Iceberg table as data files, without rewriting them.
852856
853- ```
857+ ` ` ` python
854858# Given that these parquet files have schema consistent with the Iceberg table
855859
856860file_paths = [
@@ -930,7 +934,7 @@ with table.update_schema() as update:
930934
931935Now the table has the union of the two schemas `print(table.schema())` :
932936
933- ```
937+ ` ` ` python
934938table {
935939 1: city: optional string
936940 2: lat: optional double
@@ -1180,7 +1184,7 @@ table.scan(
11801184
11811185This will return a PyArrow table:
11821186
1183- ```
1187+ ``` python
11841188pyarrow.Table
11851189VendorID: int64
11861190tpep_pickup_datetime: timestamp[us, tz=+ 00 :00 ]
@@ -1222,7 +1226,7 @@ table.scan(
12221226
12231227This will return a Pandas dataframe:
12241228
1225- ```
1229+ ``` python
12261230 VendorID tpep_pickup_datetime tpep_dropoff_datetime
122712310 2 2021 - 04 - 01 00 :28 :05 + 00 :00 2021 - 04 - 01 00 :47 :59 + 00 :00
122812321 1 2021 - 04 - 01 00 :39 :01 + 00 :00 2021 - 04 - 01 00 :57 :39 + 00 :00
@@ -1295,7 +1299,7 @@ ray_dataset = table.scan(
12951299
12961300This will return a Ray dataset:
12971301
1298- ```
1302+ ``` python
12991303Dataset(
13001304 num_blocks = 1 ,
13011305 num_rows = 1168798 ,
@@ -1346,7 +1350,7 @@ df = df.select("VendorID", "tpep_pickup_datetime", "tpep_dropoff_datetime")
13461350
13471351This returns a Daft Dataframe which is lazily materialized. Printing ` df ` will display the schema:
13481352
1349- ```
1353+ ``` python
13501354╭──────────┬───────────────────────────────┬───────────────────────────────╮
13511355│ VendorID ┆ tpep_pickup_datetime ┆ tpep_dropoff_datetime │
13521356│ -- - ┆ -- - ┆ -- - │
@@ -1364,7 +1368,7 @@ This is correctly optimized to take advantage of Iceberg features such as hidden
13641368df.show(2 )
13651369```
13661370
1367- ```
1371+ ``` python
13681372╭──────────┬───────────────────────────────┬───────────────────────────────╮
13691373│ VendorID ┆ tpep_pickup_datetime ┆ tpep_dropoff_datetime │
13701374│ -- - ┆ -- - ┆ -- - │
0 commit comments