Skip to content

Commit 8fc3e1c

Browse files
committed
feat: enhance SessionContext to support automatic registration of Python objects via session config
1 parent db2d239 commit 8fc3e1c

File tree

3 files changed

+45
-39
lines changed

3 files changed

+45
-39
lines changed

docs/source/user-guide/dataframe/index.rst

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -228,10 +228,10 @@ Core Classes
228228
* :py:meth:`~datafusion.SessionContext.from_pandas` - Create from Pandas DataFrame
229229
* :py:meth:`~datafusion.SessionContext.from_arrow` - Create from Arrow data
230230

231-
``SessionContext`` automatically resolves SQL table names that match
232-
in-scope Python data objects. When ``auto_register_python_objects`` is
233-
enabled (the default), a query such as ``ctx.sql("SELECT * FROM pdf")``
234-
will register a pandas or PyArrow object named ``pdf`` without calling
231+
``SessionContext`` can automatically resolve SQL table names that match
232+
in-scope Python data objects. When automatic lookup is enabled, a query
233+
such as ``ctx.sql("SELECT * FROM pdf")`` will register a pandas or
234+
PyArrow object named ``pdf`` without calling
235235
:py:meth:`~datafusion.SessionContext.from_pandas` or
236236
:py:meth:`~datafusion.SessionContext.from_arrow` explicitly. This requires
237237
the corresponding library (``pandas`` for pandas objects, ``pyarrow`` for
@@ -242,16 +242,18 @@ Core Classes
242242
import pandas as pd
243243
from datafusion import SessionContext
244244
245-
ctx = SessionContext()
245+
ctx = SessionContext(auto_register_python_objects=True)
246246
pdf = pd.DataFrame({"value": [1, 2, 3]})
247247
248248
df = ctx.sql("SELECT SUM(value) AS total FROM pdf")
249249
print(df.to_pandas()) # automatically registers `pdf`
250250
251-
To opt out, either pass ``auto_register_python_objects=False`` when
252-
constructing the session, or call
253-
:py:meth:`~datafusion.SessionContext.set_python_table_lookup` with
254-
``False`` to require explicit registration.
251+
Automatic lookup is disabled by default. Enable it by passing
252+
``auto_register_python_objects=True`` when constructing the session or by
253+
configuring :py:class:`~datafusion.SessionConfig` with
254+
:py:meth:`~datafusion.SessionConfig.with_python_table_lookup`. Use
255+
:py:meth:`~datafusion.SessionContext.set_python_table_lookup` to toggle the
256+
behaviour at runtime.
255257

256258
See: :py:class:`datafusion.SessionContext`
257259

python/datafusion/context.py

Lines changed: 30 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -524,7 +524,10 @@ def __init__(
524524
runtime: Runtime configuration options.
525525
auto_register_python_objects: Automatically register referenced
526526
Python objects (such as pandas or PyArrow data) when ``sql``
527-
queries reference them by name.
527+
queries reference them by name. When omitted, this defaults to
528+
the value configured via
529+
:py:meth:`~datafusion.SessionConfig.with_python_table_lookup`
530+
(``False`` unless explicitly enabled).
528531
auto_register_python_variables: Deprecated alias for
529532
``auto_register_python_objects``. When provided, it overrides
530533
the automatic registration behavior.
@@ -543,33 +546,34 @@ def __init__(
543546
config.config_internal if config is not None else None,
544547
runtime.config_internal if runtime is not None else None,
545548
)
549+
546550
if auto_register_python_variables is not None:
547551
warnings.warn(
548552
_AUTO_REGISTER_PYTHON_VARIABLES_DEPRECATED,
549553
DeprecationWarning,
550554
stacklevel=2,
551555
)
552556

553-
if (
554-
auto_register_python_objects is not None
555-
and auto_register_python_variables is not None
556-
and auto_register_python_objects != auto_register_python_variables
557-
):
558-
conflict_message = (
559-
"auto_register_python_objects and auto_register_python_variables "
560-
"were provided with conflicting values."
561-
)
562-
raise ValueError(conflict_message)
557+
if auto_register_python_variables is not None and auto_register_python_objects is not None:
558+
if auto_register_python_objects != auto_register_python_variables:
559+
conflict_message = (
560+
"auto_register_python_objects and auto_register_python_variables "
561+
"were provided with conflicting values."
562+
)
563+
raise ValueError(conflict_message)
563564

564-
if auto_register_python_objects is None:
565-
if auto_register_python_variables is None:
566-
auto_python_table_lookup = True
567-
else:
568-
auto_python_table_lookup = auto_register_python_variables
569-
else:
565+
# Determine the final value for python table lookup
566+
if auto_register_python_objects is not None:
570567
auto_python_table_lookup = auto_register_python_objects
568+
elif auto_register_python_variables is not None:
569+
auto_python_table_lookup = auto_register_python_variables
570+
else:
571+
# Default to session config value or False if not configured
572+
auto_python_table_lookup = getattr(
573+
config, "_python_table_lookup", False
574+
)
571575

572-
self._auto_python_table_lookup = auto_python_table_lookup
576+
self._auto_python_table_lookup = bool(auto_python_table_lookup)
573577

574578
def __repr__(self) -> str:
575579
"""Print a string representation of the Session Context."""
@@ -597,18 +601,18 @@ def enable_url_table(self) -> SessionContext:
597601
obj = klass.__new__(klass)
598602
obj.ctx = self.ctx.enable_url_table()
599603
obj._auto_python_table_lookup = getattr(
600-
self, "_auto_python_table_lookup", True
604+
self, "_auto_python_table_lookup", False
601605
)
602606
return obj
603607

604608
def set_python_table_lookup(self, enabled: bool = True) -> SessionContext:
605609
"""Enable or disable automatic registration of Python objects in SQL.
606610
607611
Args:
608-
enabled: When ``True`` (default), SQL queries automatically attempt
609-
to resolve missing table names by looking up Python objects in
610-
the caller's scope. When ``False``, missing tables will raise an
611-
error unless they have been explicitly registered.
612+
enabled: When ``True``, SQL queries automatically attempt to
613+
resolve missing table names by looking up Python objects in the
614+
caller's scope. Use ``False`` to require explicit registration
615+
of any referenced tables.
612616
613617
Returns:
614618
The current :py:class:`SessionContext` instance for chaining.
@@ -624,7 +628,7 @@ def auto_register_python_variables(self) -> bool:
624628
DeprecationWarning,
625629
stacklevel=2,
626630
)
627-
return getattr(self, "_auto_python_table_lookup", True)
631+
return bool(getattr(self, "_auto_python_table_lookup", False))
628632

629633
@auto_register_python_variables.setter
630634
def auto_register_python_variables(self, enabled: bool) -> None:
@@ -633,7 +637,7 @@ def auto_register_python_variables(self, enabled: bool) -> None:
633637
DeprecationWarning,
634638
stacklevel=2,
635639
)
636-
self.set_python_table_lookup(enabled)
640+
self.set_python_table_lookup(bool(enabled))
637641

638642
def register_object_store(
639643
self, schema: str, store: Any, host: str | None = None
@@ -709,7 +713,7 @@ def _execute_sql() -> DataFrame:
709713
try:
710714
return _execute_sql()
711715
except Exception as err:
712-
if not getattr(self, "_auto_python_table_lookup", True):
716+
if not getattr(self, "_auto_python_table_lookup", False):
713717
raise
714718

715719
missing_tables = self._extract_missing_table_names(err)

python/tests/test_context.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -772,10 +772,10 @@ def test_sql_with_options_no_statements(ctx):
772772
ctx.sql_with_options(sql, options=options)
773773

774774

775-
def test_sql_auto_register_pandas():
775+
def test_session_config_python_table_lookup_enables_auto_registration():
776776
pd = pytest.importorskip("pandas")
777777

778-
ctx = SessionContext()
778+
ctx = SessionContext(config=SessionConfig().with_python_table_lookup(True))
779779
pdf = pd.DataFrame({"value": [1, 2, 3]})
780780
assert len(pdf) == 3
781781

@@ -784,7 +784,7 @@ def test_sql_auto_register_pandas():
784784

785785

786786
def test_sql_auto_register_arrow():
787-
ctx = SessionContext()
787+
ctx = SessionContext(auto_register_python_objects=True)
788788
arrow_table = pa.table({"value": [1, 2, 3, 4]})
789789
assert arrow_table.num_rows == 4
790790

@@ -795,7 +795,7 @@ def test_sql_auto_register_arrow():
795795
def test_sql_auto_register_disabled():
796796
pd = pytest.importorskip("pandas")
797797

798-
ctx = SessionContext(auto_register_python_objects=False)
798+
ctx = SessionContext()
799799
pdf = pd.DataFrame({"value": [1, 2, 3]})
800800
assert len(pdf) == 3
801801

0 commit comments

Comments
 (0)