apache
diff --git a/‎docs/source/user-guide/io/csv.rst‎
Lines changed: 19 additions & 0 deletions b/‎docs/source/user-guide/io/csv.rst‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎examples/csv-read-options.py‎
Lines changed: 96 additions & 0 deletions b/‎examples/csv-read-options.py‎
Lines changed: 96 additions & 0 deletions
diff --git a/‎python/datafusion/__init__.py‎
Lines changed: 3 additions & 0 deletions b/‎python/datafusion/__init__.py‎
Lines changed: 3 additions & 0 deletions
@@ -36,3 +36,22 @@ An alternative is to use :py:func:`~datafusion.context.SessionContext.register_c
 
     ctx.register_csv("file", "file.csv")
     df = ctx.table("file")
+
+If you require additional control over how to read the CSV file, you can use
+:py:class:`~datafusion.options.CsvReadOptions` to set a variety of options.
+
+.. code-block:: python
+
+    from datafusion import CsvReadOptions
+    options = (
+        CsvReadOptions()
+        .with_has_header(True) # File contains a header row
+        .with_delimiter(";") # Use ; as the delimiter instead of ,
+        .with_comment("#")  # Skip lines starting with #
+        .with_escape("\\")  # Escape character
+        .with_null_regex(r"^(null|NULL|N/A)$")  # Treat these as NULL
+        .with_truncated_rows(True) # Allow rows to have incomplete columns
+        .with_file_compression_type("gzip")  # Read gzipped CSV
+        .with_file_extension(".gz") # File extension other than .csv
+    )
+    df = ctx.read_csv("data.csv.gz", options=options)
@@ -0,0 +1,96 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Example demonstrating CsvReadOptions usage."""
+
+from datafusion import CsvReadOptions, SessionContext
+
+# Create a SessionContext
+ctx = SessionContext()
+
+# Example 1: Using CsvReadOptions with default values
+print("Example 1: Default CsvReadOptions")
+options = CsvReadOptions()
+df = ctx.read_csv("data.csv", options=options)
+
+# Example 2: Using CsvReadOptions with custom parameters
+print("\nExample 2: Custom CsvReadOptions")
+options = CsvReadOptions(
+    has_header=True,
+    delimiter=",",
+    quote='"',
+    schema_infer_max_records=1000,
+    file_extension=".csv",
+)
+df = ctx.read_csv("data.csv", options=options)
+
+# Example 3: Using the builder pattern (recommended for readability)
+print("\nExample 3: Builder pattern")
+options = (
+    CsvReadOptions()
+    .with_has_header(True)  # noqa: FBT003
+    .with_delimiter("|")
+    .with_quote("'")
+    .with_schema_infer_max_records(500)
+    .with_truncated_rows(False)  # noqa: FBT003
+    .with_newlines_in_values(True)  # noqa: FBT003
+)
+df = ctx.read_csv("data.csv", options=options)
+
+# Example 4: Advanced options
+print("\nExample 4: Advanced options")
+options = (
+    CsvReadOptions()
+    .with_has_header(True)  # noqa: FBT003
+    .with_delimiter(",")
+    .with_comment("#")  # Skip lines starting with #
+    .with_escape("\\")  # Escape character
+    .with_null_regex(r"^(null|NULL|N/A)$")  # Treat these as NULL
+    .with_truncated_rows(True)  # noqa: FBT003
+    .with_file_compression_type("gzip")  # Read gzipped CSV
+    .with_file_extension(".gz")
+)
+df = ctx.read_csv("data.csv.gz", options=options)
+
+# Example 5: Register CSV table with options
+print("\nExample 5: Register CSV table")
+options = CsvReadOptions().with_has_header(True).with_delimiter(",")  # noqa: FBT003
+ctx.register_csv("my_table", "data.csv", options=options)
+df = ctx.sql("SELECT * FROM my_table")
+
+# Example 6: Backward compatibility (without options)
+print("\nExample 6: Backward compatibility")
+# Still works the old way!
+df = ctx.read_csv("data.csv", has_header=True, delimiter=",")
+
+print("\nAll examples completed!")
+print("\nFor all available options, see the CsvReadOptions documentation:")
+print("  - has_header: bool")
+print("  - delimiter: str")
+print("  - quote: str")
+print("  - terminator: str | None")
+print("  - escape: str | None")
+print("  - comment: str | None")
+print("  - newlines_in_values: bool")
+print("  - schema: pa.Schema | None")
+print("  - schema_infer_max_records: int")
+print("  - file_extension: str")
+print("  - table_partition_cols: list[tuple[str, pa.DataType]]")
+print("  - file_compression_type: str")
+print("  - file_sort_order: list[list[SortExpr]]")
+print("  - null_regex: str | None")
+print("  - truncated_rows: bool")
@@ -54,6 +54,7 @@
 from .dataframe_formatter import configure_formatter
 from .expr import Expr, WindowFrame
 from .io import read_avro, read_csv, read_json, read_parquet
+from .options import CsvReadOptions
 from .plan import ExecutionPlan, LogicalPlan
 from .record_batch import RecordBatch, RecordBatchStream
 from .user_defined import (
@@ -75,6 +76,7 @@
     "AggregateUDF",
     "Catalog",
     "Config",
+    "CsvReadOptions",
     "DFSchema",
     "DataFrame",
     "DataFrameWriteOptions",
@@ -106,6 +108,7 @@
     "lit",
     "literal",
     "object_store",
+    "options",
     "read_avro",
     "read_csv",
     "read_json",