Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion docs/cli.rst
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,19 @@ You can use the ``--json-cols`` option to automatically detect these JSON column
}
]

.. _cli_use_json_converters:

Automatic JSON deserialization
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

You can use the ``--use-json-converters`` flag to automatically deserialize columns that are declared as ``JSON`` (or inferred as such during insertion).

.. code-block:: bash

sqlite-utils query dogs.db "select * from dogs" --use-json-converters

If you use this flag with ``insert``, ``upsert`` or ``bulk``, it will also cause nested Python dictionaries or lists to be stored in columns with a declared type of ``JSON`` rather than ``TEXT``.

.. _cli_query_csv:

Returning CSV or TSV
Expand Down Expand Up @@ -1935,7 +1948,7 @@ Most of the time creating tables by inserting example data is the quickest appro

This will create a table called ``mytable`` with two columns - an integer ``id`` column and a text ``name`` column. It will set the ``id`` column to be the primary key.

You can pass as many column-name column-type pairs as you like. Valid types are ``integer``, ``text``, ``float`` and ``blob``.
You can pass as many column-name column-type pairs as you like. Valid types are ``integer``, ``text``, ``float``, ``blob`` and ``json``.

Pass ``--pk`` more than once for a compound primary key that covers multiple columns.

Expand Down
8 changes: 8 additions & 0 deletions docs/python-api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,14 @@ By default, any :ref:`sqlite-utils plugins <plugins>` that implement the :ref:`p

db = Database(memory=True, execute_plugins=False)

You can pass ``use_json_converters=True`` to enable automatic JSON conversion for columns declared as ``JSON``. This will register a custom converter with SQLite that uses ``json.loads()`` to deserialize values:

.. code-block:: python

db = Database("my_database.db", use_json_converters=True)

When this is enabled, Python ``dict``, ``list`` and ``tuple`` values will be stored in columns with a declared type of ``JSON``, and those columns will be automatically deserialized back into Python objects when you retrieve them from the database.

You can pass ``strict=True`` to enable `SQLite STRICT mode <https://www.sqlite.org/stricttables.html>`__ for all tables created using this database object:

.. code-block:: python
Expand Down
51 changes: 45 additions & 6 deletions sqlite_utils/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def _close_databases(ctx):
pass


VALID_COLUMN_TYPES = ("INTEGER", "TEXT", "FLOAT", "REAL", "BLOB")
VALID_COLUMN_TYPES = ("INTEGER", "TEXT", "FLOAT", "REAL", "BLOB", "JSON")

UNICODE_ERROR = """
{}
Expand Down Expand Up @@ -962,6 +962,12 @@ def inner(fn):
default=False,
help="Apply STRICT mode to created table",
),
click.option(
"--use-json-converters",
is_flag=True,
default=False,
help="Automatically use JSON columns for nested structures and register JSON converter",
),
)
):
fn = decorator(fn)
Expand Down Expand Up @@ -1006,8 +1012,9 @@ def insert_upsert_implementation(
bulk_sql=None,
functions=None,
strict=False,
use_json_converters=False,
):
db = sqlite_utils.Database(path)
db = sqlite_utils.Database(path, use_json_converters=use_json_converters)
_register_db_for_cleanup(db)
_load_extensions(db, load_extension)
_maybe_register_functions(db, functions)
Expand Down Expand Up @@ -1151,7 +1158,11 @@ def insert_upsert_implementation(

try:
db.table(table).insert_all(
docs, pk=pk, batch_size=batch_size, alter=alter, **extra_kwargs
docs,
pk=pk,
batch_size=batch_size,
alter=alter,
**extra_kwargs,
)
except Exception as e:
if (
Expand Down Expand Up @@ -1248,6 +1259,7 @@ def insert(
not_null,
default,
strict,
use_json_converters,
):
"""
Insert records from FILE into a table, creating the table if it
Expand Down Expand Up @@ -1328,6 +1340,7 @@ def insert(
not_null=not_null,
default=default,
strict=strict,
use_json_converters=use_json_converters,
)
except UnicodeDecodeError as ex:
raise click.ClickException(UNICODE_ERROR.format(ex))
Expand Down Expand Up @@ -1365,6 +1378,7 @@ def upsert(
load_extension,
silent,
strict,
use_json_converters,
):
"""
Upsert records based on their primary key. Works like 'insert' but if
Expand Down Expand Up @@ -1411,6 +1425,7 @@ def upsert(
load_extension=load_extension,
silent=silent,
strict=strict,
use_json_converters=use_json_converters,
)
except UnicodeDecodeError as ex:
raise click.ClickException(UNICODE_ERROR.format(ex))
Expand All @@ -1430,6 +1445,12 @@ def upsert(
help="Python code or file path defining custom SQL functions",
multiple=True,
)
@click.option(
"--use-json-converters",
is_flag=True,
default=False,
help="Automatically use JSON columns for nested structures and register JSON converter",
)
@import_options
@load_extension_option
def bulk(
Expand All @@ -1453,6 +1474,7 @@ def bulk(
no_headers,
encoding,
load_extension,
use_json_converters,
):
"""
Execute parameterized SQL against the provided list of documents.
Expand Down Expand Up @@ -1499,6 +1521,7 @@ def bulk(
silent=False,
bulk_sql=sql,
functions=functions,
use_json_converters=use_json_converters,
)
except (OperationalError, sqlite3.IntegrityError) as e:
raise click.ClickException(str(e))
Expand Down Expand Up @@ -1613,7 +1636,7 @@ def create_table(
height float \\
photo blob --pk id

Valid column types are text, integer, float and blob.
Valid column types are text, integer, float, blob and json.
"""
db = sqlite_utils.Database(path)
_register_db_for_cleanup(db)
Expand Down Expand Up @@ -1830,7 +1853,14 @@ def drop_view(path, view, ignore, load_extension):
multiple=True,
)
@load_extension_option
@click.option(
"--use-json-converters",
is_flag=True,
default=False,
help="Automatically use JSON columns for nested structures and register JSON converter",
)
def query(

path,
sql,
attach,
Expand All @@ -1847,6 +1877,7 @@ def query(
param,
load_extension,
functions,
use_json_converters,
):
"""Execute SQL query and return the results as JSON

Expand All @@ -1857,7 +1888,7 @@ def query(
"select * from chickens where age > :age" \\
-p age 1
"""
db = sqlite_utils.Database(path)
db = sqlite_utils.Database(path, use_json_converters=use_json_converters)
_register_db_for_cleanup(db)
for alias, attach_path in attach:
db.attach(alias, attach_path)
Expand Down Expand Up @@ -1939,7 +1970,14 @@ def query(
is_flag=True,
help="Analyze resulting tables and output results",
)
@click.option(
"--use-json-converters",
is_flag=True,
default=False,
help="Automatically use JSON columns for nested structures and register JSON converter",
)
@load_extension_option

def memory(
paths,
sql,
Expand All @@ -1964,6 +2002,7 @@ def memory(
save,
analyze,
load_extension,
use_json_converters,
return_db=False,
):
"""Execute SQL query against an in-memory database, optionally populated by imported data
Expand Down Expand Up @@ -1992,7 +2031,7 @@ def memory(
\b
sqlite-utils memory animals.csv --schema
"""
db = sqlite_utils.Database(memory=True)
db = sqlite_utils.Database(memory=True, use_json_converters=use_json_converters)
if not return_db:
_register_db_for_cleanup(db)

Expand Down
31 changes: 25 additions & 6 deletions sqlite_utils/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ class Default:
"real": "REAL",
"blob": "BLOB",
"bytes": "BLOB",
"JSON": "JSON",
}
# If numpy is available, add more types
if np:
Expand Down Expand Up @@ -326,6 +327,7 @@ class Database:
:param use_old_upsert: set to ``True`` to force the older upsert implementation. See
:ref:`python_api_old_upsert`
:param strict: Apply STRICT mode to all created tables (unless overridden)
:param use_json_converters: Automatically use JSON columns for nested structures and register JSON converter
"""

_counts_table_name = "_counts"
Expand All @@ -344,24 +346,33 @@ def __init__(
execute_plugins: bool = True,
use_old_upsert: bool = False,
strict: bool = False,
use_json_converters: bool = False,
):
self.memory_name = None
self.memory = False
self.use_old_upsert = use_old_upsert
self.use_json_converters = use_json_converters
assert (filename_or_conn is not None and (not memory and not memory_name)) or (
filename_or_conn is None and (memory or memory_name)
), "Either specify a filename_or_conn or pass memory=True"

detect_types = 0
if use_json_converters:
sqlite3.register_converter("JSON", json.loads)
detect_types = sqlite3.PARSE_DECLTYPES

if memory_name:
uri = "file:{}?mode=memory&cache=shared".format(memory_name)
self.conn = sqlite3.connect(
uri,
uri=True,
check_same_thread=False,
detect_types=detect_types,
)
self.memory = True
self.memory_name = memory_name
elif memory or filename_or_conn == ":memory:":
self.conn = sqlite3.connect(":memory:")
self.conn = sqlite3.connect(":memory:", detect_types=detect_types)
self.memory = True
elif isinstance(filename_or_conn, (str, pathlib.Path)):
if recreate and os.path.exists(filename_or_conn):
Expand All @@ -370,9 +381,9 @@ def __init__(
except OSError:
# Avoid mypy and __repr__ errors, see:
# https://github.com/simonw/sqlite-utils/issues/503
self.conn = sqlite3.connect(":memory:")
self.conn = sqlite3.connect(":memory:", detect_types=detect_types)
raise
self.conn = sqlite3.connect(str(filename_or_conn))
self.conn = sqlite3.connect(str(filename_or_conn), detect_types=detect_types)
else:
assert not recreate, "recreate cannot be used with connections, only paths"
self.conn = filename_or_conn
Expand Down Expand Up @@ -1015,6 +1026,8 @@ def sort_key(p):
)
)
column_type_str = COLUMN_TYPE_MAPPING[column_type]
if self.use_json_converters and column_type in (dict, list, tuple):
column_type_str = "JSON"
# Special case for strict tables to map FLOAT to REAL
# Refs https://github.com/simonw/sqlite-utils/issues/644
if strict and column_type_str == "FLOAT":
Expand Down Expand Up @@ -3557,9 +3570,13 @@ def insert_all(
if list_mode:
# Convert list records to dicts for type detection
chunk_as_dicts = [dict(zip(column_names, row)) for row in chunk]
column_types = suggest_column_types(chunk_as_dicts)
column_types = suggest_column_types(
chunk_as_dicts, json_converters=self.db.use_json_converters
)
else:
column_types = suggest_column_types(chunk) # type: ignore[arg-type]
column_types = suggest_column_types(
chunk, json_converters=self.db.use_json_converters
) # type: ignore[arg-type]
if extracts:
for col in extracts:
if col in column_types:
Expand Down Expand Up @@ -3746,7 +3763,9 @@ def upsert_all(
)

def add_missing_columns(self, records: Iterable[Dict[str, Any]]) -> "Table":
needed_columns = suggest_column_types(records)
needed_columns = suggest_column_types(
records, json_converters=self.db.use_json_converters
)
current_columns = {c.lower() for c in self.columns_dict}
for col_name, col_type in needed_columns.items():
if col_name.lower() not in current_columns:
Expand Down
21 changes: 16 additions & 5 deletions sqlite_utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,16 +132,18 @@ def find_spatialite() -> Optional[str]:

def suggest_column_types(
records: Iterable[Dict[str, Any]],
json_converters: bool = False,
) -> Dict[str, type]:
all_column_types: Dict[str, Set[type]] = {}
for record in records:
for key, value in record.items():
all_column_types.setdefault(key, set()).add(type(value))
return types_for_column_types(all_column_types)
return types_for_column_types(all_column_types, json_converters=json_converters)


def types_for_column_types(
all_column_types: Dict[str, Set[type]],
json_converters: bool = False,
) -> Dict[str, type]:
column_types: Dict[str, type] = {}
for key, types in all_column_types.items():
Expand All @@ -153,10 +155,19 @@ def types_for_column_types(
t = str
elif len(types) == 1:
t = list(types)[0]
# But if it's a subclass of list / tuple / dict, use str
# instead as we will be storing it as JSON in the table
for superclass in (list, tuple, dict):
if issubclass(t, superclass):
if json_converters:
# Normalize subclasses of list / dict to the base class
# so they can be handled by the mapping in db.py
for superclass in (list, dict):
if issubclass(t, superclass):
t = superclass
break
elif issubclass(t, (list, dict)):
t = str
if issubclass(t, tuple):
if json_converters:
t = tuple
else:
t = str
elif {int, bool}.issuperset(types):
t = int
Expand Down
Loading