Skip to content

Commit 5ed9d36

Browse files
committed
docs: update deprecation warnings and references for TableProvider to use Table instead
1 parent cc1a230 commit 5ed9d36

File tree

10 files changed

+214
-133
lines changed

10 files changed

+214
-133
lines changed

docs/source/user-guide/data-sources.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,9 @@ work with custom table providers from Python libraries such as Delta Lake.
167167
:py:meth:`~datafusion.context.SessionContext.register_table_provider` is
168168
deprecated. Use
169169
:py:meth:`~datafusion.context.SessionContext.register_table` with a
170-
:py:class:`~datafusion.TableProvider` instead.
170+
:py:class:`~datafusion.Table` instead. The
171+
:py:class:`~datafusion.table_provider.TableProvider` compatibility shim continues
172+
to work but emits :class:`DeprecationWarning` when used.
171173

172174
On older versions of ``deltalake`` (prior to 0.22) you can use the
173175
`Arrow DataSet <https://arrow.apache.org/docs/python/generated/pyarrow.dataset.Dataset.html>`_

python/datafusion/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
from .io import read_avro, read_csv, read_json, read_parquet
5151
from .plan import ExecutionPlan, LogicalPlan
5252
from .record_batch import RecordBatch, RecordBatchStream
53-
from .table_provider import TableProvider
53+
from .table_provider import TableProvider # Deprecated compatibility shim
5454
from .user_defined import (
5555
Accumulator,
5656
AggregateUDF,

python/datafusion/catalog.py

Lines changed: 98 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,12 @@
2020
from __future__ import annotations
2121

2222
from abc import ABC, abstractmethod
23-
from typing import TYPE_CHECKING, Protocol
23+
from typing import TYPE_CHECKING, Any, Protocol
24+
25+
import warnings
2426

2527
import datafusion._internal as df_internal
28+
from datafusion._internal import EXPECTED_PROVIDER_MSG
2629
from datafusion.utils import _normalize_table_provider
2730

2831
if TYPE_CHECKING:
@@ -136,7 +139,9 @@ def register_table(
136139
"""Register a table or table provider in this schema.
137140
138141
Objects implementing ``__datafusion_table_provider__`` are also supported
139-
and treated as :class:`TableProvider` instances.
142+
and treated as table provider instances. The deprecated
143+
:class:`~datafusion.table_provider.TableProvider` wrapper remains accepted
144+
for backwards compatibility.
140145
"""
141146
provider = _normalize_table_provider(table)
142147
return self._raw_schema.register_table(name, provider)
@@ -151,31 +156,108 @@ class Database(Schema):
151156
"""See `Schema`."""
152157

153158

159+
_InternalRawTable = df_internal.catalog.RawTable
160+
_InternalTableProvider = df_internal.TableProvider
161+
162+
# Keep in sync with ``datafusion._internal.TableProvider.from_view``.
163+
_FROM_VIEW_WARN_STACKLEVEL = 2
164+
165+
154166
class Table:
155-
"""DataFusion table."""
167+
"""DataFusion table or table provider wrapper."""
156168

157-
def __init__(self, table: df_internal.catalog.RawTable) -> None:
158-
"""This constructor is not typically called by the end user."""
159-
self.table = table
169+
__slots__ = ("_table",)
170+
171+
def __init__(
172+
self,
173+
table: _InternalRawTable | _InternalTableProvider | Table,
174+
) -> None:
175+
"""Wrap a low level table or table provider."""
176+
177+
if isinstance(table, Table):
178+
table = table.table
179+
180+
if not isinstance(table, (_InternalRawTable, _InternalTableProvider)):
181+
raise TypeError(EXPECTED_PROVIDER_MSG)
182+
183+
self._table = table
184+
185+
def __getattribute__(self, name: str) -> Any:
186+
"""Restrict provider-specific helpers to compatible tables."""
187+
188+
if name == "__datafusion_table_provider__":
189+
table = object.__getattribute__(self, "_table")
190+
if not hasattr(table, "__datafusion_table_provider__"):
191+
raise AttributeError(name)
192+
return object.__getattribute__(self, name)
160193

161194
def __repr__(self) -> str:
162195
"""Print a string representation of the table."""
163-
return self.table.__repr__()
196+
return repr(self._table)
164197

165-
@staticmethod
166-
def from_dataset(dataset: pa.dataset.Dataset) -> Table:
167-
"""Turn a pyarrow Dataset into a Table."""
168-
return Table(df_internal.catalog.RawTable.from_dataset(dataset))
198+
@property
199+
def table(self) -> _InternalRawTable | _InternalTableProvider:
200+
"""Return the wrapped low level table object."""
201+
return self._table
202+
203+
@classmethod
204+
def from_dataset(cls, dataset: pa.dataset.Dataset) -> Table:
205+
"""Turn a :mod:`pyarrow.dataset` ``Dataset`` into a :class:`Table`."""
206+
207+
return cls(_InternalRawTable.from_dataset(dataset))
208+
209+
@classmethod
210+
def from_capsule(cls, capsule: Any) -> Table:
211+
"""Create a :class:`Table` from a PyCapsule exported provider."""
212+
213+
provider = _InternalTableProvider.from_capsule(capsule)
214+
return cls(provider)
215+
216+
@classmethod
217+
def from_dataframe(cls, df: Any) -> Table:
218+
"""Create a :class:`Table` from tabular data."""
219+
220+
from datafusion.dataframe import DataFrame as DataFrameWrapper
221+
222+
dataframe = df if isinstance(df, DataFrameWrapper) else DataFrameWrapper(df)
223+
return dataframe.into_view()
224+
225+
@classmethod
226+
def from_view(cls, df: Any) -> Table:
227+
"""Deprecated helper for constructing tables from views."""
228+
229+
from datafusion.dataframe import DataFrame as DataFrameWrapper
230+
231+
if isinstance(df, DataFrameWrapper):
232+
df = df.df
233+
234+
provider = _InternalTableProvider.from_view(df)
235+
warnings.warn(
236+
"Table.from_view is deprecated; use DataFrame.into_view or "
237+
"Table.from_dataframe instead.",
238+
category=DeprecationWarning,
239+
stacklevel=_FROM_VIEW_WARN_STACKLEVEL,
240+
)
241+
return cls(provider)
169242

170243
@property
171244
def schema(self) -> pa.Schema:
172245
"""Returns the schema associated with this table."""
173-
return self.table.schema
246+
return self._table.schema
174247

175248
@property
176249
def kind(self) -> str:
177250
"""Returns the kind of table."""
178-
return self.table.kind
251+
return self._table.kind
252+
253+
def __datafusion_table_provider__(self) -> Any:
254+
"""Expose the wrapped provider for FFI integrations."""
255+
256+
exporter = getattr(self._table, "__datafusion_table_provider__", None)
257+
if exporter is None:
258+
msg = "Underlying object does not export __datafusion_table_provider__()"
259+
raise AttributeError(msg)
260+
return exporter()
179261

180262

181263
class CatalogProvider(ABC):
@@ -241,7 +323,9 @@ def register_table( # noqa: B027
241323
not need to implement this method.
242324
243325
Objects implementing ``__datafusion_table_provider__`` are also supported
244-
and treated as :class:`TableProvider` instances.
326+
and treated as table provider instances. The deprecated
327+
:class:`~datafusion.table_provider.TableProvider` wrapper remains accepted
328+
for backwards compatibility.
245329
"""
246330

247331
def deregister_table(self, name: str, cascade: bool) -> None: # noqa: B027

python/datafusion/context.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -754,23 +754,25 @@ def register_view(self, name: str, df: DataFrame) -> None:
754754
def register_table(
755755
self, name: str, table: Table | TableProvider | TableProviderExportable
756756
) -> None:
757-
"""Register a Table or TableProvider.
757+
"""Register a :py:class:`~datafusion.Table` with this context.
758758
759759
The registered table can be referenced from SQL statements executed against
760760
this context.
761761
762762
Plain :py:class:`~datafusion.dataframe.DataFrame` objects are not supported;
763763
convert them first with :meth:`datafusion.dataframe.DataFrame.into_view` or
764-
:meth:`datafusion.TableProvider.from_dataframe`.
764+
:meth:`datafusion.Table.from_dataframe`.
765765
766766
Objects implementing ``__datafusion_table_provider__`` are also supported
767-
and treated as :py:class:`~datafusion.TableProvider` instances.
767+
and treated as table provider instances. The deprecated
768+
:py:class:`~datafusion.table_provider.TableProvider` wrapper remains accepted
769+
for backwards compatibility.
768770
769771
Args:
770772
name: Name of the resultant table.
771-
table: DataFusion :class:`Table`, :class:`TableProvider`, or any object
772-
implementing ``__datafusion_table_provider__`` to add to the session
773-
context.
773+
table: DataFusion :class:`Table`, deprecated :class:`TableProvider`, or
774+
any object implementing ``__datafusion_table_provider__`` to add to
775+
the session context.
774776
"""
775777
provider = _normalize_table_provider(table)
776778
self.ctx.register_table(name, provider)
@@ -800,7 +802,7 @@ def register_table_provider(
800802
Deprecated: use :meth:`register_table` instead.
801803
802804
Objects implementing ``__datafusion_table_provider__`` are also supported
803-
and treated as :py:class:`~datafusion.TableProvider` instances.
805+
and treated as table provider instances.
804806
"""
805807
warnings.warn(
806808
"register_table_provider is deprecated; use register_table",

python/datafusion/dataframe.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
import polars as pl
6161
import pyarrow as pa
6262

63-
from datafusion.table_provider import TableProvider
63+
from datafusion.catalog import Table
6464

6565
from enum import Enum
6666

@@ -315,8 +315,8 @@ def __init__(self, df: DataFrameInternal) -> None:
315315
"""
316316
self.df = df
317317

318-
def into_view(self) -> TableProvider:
319-
"""Convert ``DataFrame`` into a ``TableProvider`` view for registration.
318+
def into_view(self) -> Table:
319+
"""Convert ``DataFrame`` into a :class:`~datafusion.Table` for registration.
320320
321321
This is the preferred way to obtain a view for
322322
:py:meth:`~datafusion.context.SessionContext.register_table` for several reasons:
@@ -325,13 +325,13 @@ def into_view(self) -> TableProvider:
325325
``DataFrame.into_view()`` method without intermediate delegations.
326326
2. **Clear semantics**: The ``into_`` prefix follows Rust conventions,
327327
indicating conversion from one type to another.
328-
3. **Canonical method**: Other approaches like ``TableProvider.from_dataframe``
328+
3. **Canonical method**: Other approaches like ``Table.from_dataframe``
329329
delegate to this method internally, making this the single source of truth.
330-
4. **Deprecated alternatives**: The older ``TableProvider.from_view`` helper
330+
4. **Deprecated alternatives**: The older ``Table.from_view`` helper
331331
is deprecated and issues warnings when used.
332332
333-
``datafusion.TableProvider.from_dataframe`` calls this method under the hood,
334-
and the older ``TableProvider.from_view`` helper is deprecated.
333+
``datafusion.Table.from_dataframe`` calls this method under the hood, and the
334+
older ``Table.from_view`` helper is deprecated.
335335
336336
The ``DataFrame`` remains valid after conversion, so it can still be used for
337337
additional queries alongside the returned view.
@@ -345,9 +345,9 @@ def into_view(self) -> TableProvider:
345345
>>> df.collect() # The DataFrame is still usable
346346
>>> ctx.sql("SELECT value FROM values_view").collect()
347347
"""
348-
from datafusion.table_provider import TableProvider as _TableProvider
348+
from datafusion.catalog import Table as _Table
349349

350-
return _TableProvider(self.df.into_view())
350+
return _Table(self.df.into_view())
351351

352352
def __getitem__(self, key: str | list[str]) -> DataFrame:
353353
"""Return a new :py:class`DataFrame` with the specified column or columns.

0 commit comments

Comments
 (0)