Skip to content

Commit 666e54c

Browse files
Add automatic JSON deserialization feature
1 parent 8d74ffc commit 666e54c

File tree

7 files changed

+374
-18
lines changed

7 files changed

+374
-18
lines changed

docs/cli.rst

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,19 @@ You can use the ``--json-cols`` option to automatically detect these JSON column
176176
}
177177
]
178178
179+
.. _cli_use_json_converters:
180+
181+
Automatic JSON deserialization
182+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
183+
184+
You can use the ``--use-json-converters`` flag to automatically deserialize columns that are declared as ``JSON`` (or inferred as such during insertion).
185+
186+
.. code-block:: bash
187+
188+
sqlite-utils query dogs.db "select * from dogs" --use-json-converters
189+
190+
If you use this flag with ``insert``, ``upsert`` or ``bulk``, it will also cause nested Python dictionaries or lists to be stored in columns with a declared type of ``JSON`` rather than ``TEXT``.
191+
179192
.. _cli_query_csv:
180193

181194
Returning CSV or TSV
@@ -1935,7 +1948,7 @@ Most of the time creating tables by inserting example data is the quickest appro
19351948
19361949
This will create a table called ``mytable`` with two columns - an integer ``id`` column and a text ``name`` column. It will set the ``id`` column to be the primary key.
19371950

1938-
You can pass as many column-name column-type pairs as you like. Valid types are ``integer``, ``text``, ``float`` and ``blob``.
1951+
You can pass as many column-name column-type pairs as you like. Valid types are ``integer``, ``text``, ``float``, ``blob`` and ``json``.
19391952

19401953
Pass ``--pk`` more than once for a compound primary key that covers multiple columns.
19411954

docs/python-api.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,14 @@ By default, any :ref:`sqlite-utils plugins <plugins>` that implement the :ref:`p
117117
118118
db = Database(memory=True, execute_plugins=False)
119119
120+
You can pass ``use_json_converters=True`` to enable automatic JSON conversion for columns declared as ``JSON``. This will register a custom converter with SQLite that uses ``json.loads()`` to deserialize values:
121+
122+
.. code-block:: python
123+
124+
db = Database("my_database.db", use_json_converters=True)
125+
126+
When this is enabled, Python ``dict``, ``list`` and ``tuple`` values will be stored in columns with a declared type of ``JSON``, and those columns will be automatically deserialized back into Python objects when you retrieve them from the database.
127+
120128
You can pass ``strict=True`` to enable `SQLite STRICT mode <https://www.sqlite.org/stricttables.html>`__ for all tables created using this database object:
121129

122130
.. code-block:: python

sqlite_utils/cli.py

Lines changed: 45 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def _close_databases(ctx):
6666
pass
6767

6868

69-
VALID_COLUMN_TYPES = ("INTEGER", "TEXT", "FLOAT", "REAL", "BLOB")
69+
VALID_COLUMN_TYPES = ("INTEGER", "TEXT", "FLOAT", "REAL", "BLOB", "JSON")
7070

7171
UNICODE_ERROR = """
7272
{}
@@ -962,6 +962,12 @@ def inner(fn):
962962
default=False,
963963
help="Apply STRICT mode to created table",
964964
),
965+
click.option(
966+
"--use-json-converters",
967+
is_flag=True,
968+
default=False,
969+
help="Automatically use JSON columns for nested structures and register JSON converter",
970+
),
965971
)
966972
):
967973
fn = decorator(fn)
@@ -1006,8 +1012,9 @@ def insert_upsert_implementation(
10061012
bulk_sql=None,
10071013
functions=None,
10081014
strict=False,
1015+
use_json_converters=False,
10091016
):
1010-
db = sqlite_utils.Database(path)
1017+
db = sqlite_utils.Database(path, use_json_converters=use_json_converters)
10111018
_register_db_for_cleanup(db)
10121019
_load_extensions(db, load_extension)
10131020
_maybe_register_functions(db, functions)
@@ -1151,7 +1158,11 @@ def insert_upsert_implementation(
11511158

11521159
try:
11531160
db.table(table).insert_all(
1154-
docs, pk=pk, batch_size=batch_size, alter=alter, **extra_kwargs
1161+
docs,
1162+
pk=pk,
1163+
batch_size=batch_size,
1164+
alter=alter,
1165+
**extra_kwargs,
11551166
)
11561167
except Exception as e:
11571168
if (
@@ -1248,6 +1259,7 @@ def insert(
12481259
not_null,
12491260
default,
12501261
strict,
1262+
use_json_converters,
12511263
):
12521264
"""
12531265
Insert records from FILE into a table, creating the table if it
@@ -1328,6 +1340,7 @@ def insert(
13281340
not_null=not_null,
13291341
default=default,
13301342
strict=strict,
1343+
use_json_converters=use_json_converters,
13311344
)
13321345
except UnicodeDecodeError as ex:
13331346
raise click.ClickException(UNICODE_ERROR.format(ex))
@@ -1365,6 +1378,7 @@ def upsert(
13651378
load_extension,
13661379
silent,
13671380
strict,
1381+
use_json_converters,
13681382
):
13691383
"""
13701384
Upsert records based on their primary key. Works like 'insert' but if
@@ -1411,6 +1425,7 @@ def upsert(
14111425
load_extension=load_extension,
14121426
silent=silent,
14131427
strict=strict,
1428+
use_json_converters=use_json_converters,
14141429
)
14151430
except UnicodeDecodeError as ex:
14161431
raise click.ClickException(UNICODE_ERROR.format(ex))
@@ -1430,6 +1445,12 @@ def upsert(
14301445
help="Python code or file path defining custom SQL functions",
14311446
multiple=True,
14321447
)
1448+
@click.option(
1449+
"--use-json-converters",
1450+
is_flag=True,
1451+
default=False,
1452+
help="Automatically use JSON columns for nested structures and register JSON converter",
1453+
)
14331454
@import_options
14341455
@load_extension_option
14351456
def bulk(
@@ -1453,6 +1474,7 @@ def bulk(
14531474
no_headers,
14541475
encoding,
14551476
load_extension,
1477+
use_json_converters,
14561478
):
14571479
"""
14581480
Execute parameterized SQL against the provided list of documents.
@@ -1499,6 +1521,7 @@ def bulk(
14991521
silent=False,
15001522
bulk_sql=sql,
15011523
functions=functions,
1524+
use_json_converters=use_json_converters,
15021525
)
15031526
except (OperationalError, sqlite3.IntegrityError) as e:
15041527
raise click.ClickException(str(e))
@@ -1613,7 +1636,7 @@ def create_table(
16131636
height float \\
16141637
photo blob --pk id
16151638
1616-
Valid column types are text, integer, float and blob.
1639+
Valid column types are text, integer, float, blob and json.
16171640
"""
16181641
db = sqlite_utils.Database(path)
16191642
_register_db_for_cleanup(db)
@@ -1830,7 +1853,14 @@ def drop_view(path, view, ignore, load_extension):
18301853
multiple=True,
18311854
)
18321855
@load_extension_option
1856+
@click.option(
1857+
"--use-json-converters",
1858+
is_flag=True,
1859+
default=False,
1860+
help="Automatically use JSON columns for nested structures and register JSON converter",
1861+
)
18331862
def query(
1863+
18341864
path,
18351865
sql,
18361866
attach,
@@ -1847,6 +1877,7 @@ def query(
18471877
param,
18481878
load_extension,
18491879
functions,
1880+
use_json_converters,
18501881
):
18511882
"""Execute SQL query and return the results as JSON
18521883
@@ -1857,7 +1888,7 @@ def query(
18571888
"select * from chickens where age > :age" \\
18581889
-p age 1
18591890
"""
1860-
db = sqlite_utils.Database(path)
1891+
db = sqlite_utils.Database(path, use_json_converters=use_json_converters)
18611892
_register_db_for_cleanup(db)
18621893
for alias, attach_path in attach:
18631894
db.attach(alias, attach_path)
@@ -1939,7 +1970,14 @@ def query(
19391970
is_flag=True,
19401971
help="Analyze resulting tables and output results",
19411972
)
1973+
@click.option(
1974+
"--use-json-converters",
1975+
is_flag=True,
1976+
default=False,
1977+
help="Automatically use JSON columns for nested structures and register JSON converter",
1978+
)
19421979
@load_extension_option
1980+
19431981
def memory(
19441982
paths,
19451983
sql,
@@ -1964,6 +2002,7 @@ def memory(
19642002
save,
19652003
analyze,
19662004
load_extension,
2005+
use_json_converters,
19672006
return_db=False,
19682007
):
19692008
"""Execute SQL query against an in-memory database, optionally populated by imported data
@@ -1992,7 +2031,7 @@ def memory(
19922031
\b
19932032
sqlite-utils memory animals.csv --schema
19942033
"""
1995-
db = sqlite_utils.Database(memory=True)
2034+
db = sqlite_utils.Database(memory=True, use_json_converters=use_json_converters)
19962035
if not return_db:
19972036
_register_db_for_cleanup(db)
19982037

sqlite_utils/db.py

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,7 @@ class Default:
222222
"real": "REAL",
223223
"blob": "BLOB",
224224
"bytes": "BLOB",
225+
"JSON": "JSON",
225226
}
226227
# If numpy is available, add more types
227228
if np:
@@ -326,6 +327,7 @@ class Database:
326327
:param use_old_upsert: set to ``True`` to force the older upsert implementation. See
327328
:ref:`python_api_old_upsert`
328329
:param strict: Apply STRICT mode to all created tables (unless overridden)
330+
:param use_json_converters: Automatically use JSON columns for nested structures and register JSON converter
329331
"""
330332

331333
_counts_table_name = "_counts"
@@ -344,24 +346,33 @@ def __init__(
344346
execute_plugins: bool = True,
345347
use_old_upsert: bool = False,
346348
strict: bool = False,
349+
use_json_converters: bool = False,
347350
):
348351
self.memory_name = None
349352
self.memory = False
350353
self.use_old_upsert = use_old_upsert
354+
self.use_json_converters = use_json_converters
351355
assert (filename_or_conn is not None and (not memory and not memory_name)) or (
352356
filename_or_conn is None and (memory or memory_name)
353357
), "Either specify a filename_or_conn or pass memory=True"
358+
359+
detect_types = 0
360+
if use_json_converters:
361+
sqlite3.register_converter("JSON", json.loads)
362+
detect_types = sqlite3.PARSE_DECLTYPES
363+
354364
if memory_name:
355365
uri = "file:{}?mode=memory&cache=shared".format(memory_name)
356366
self.conn = sqlite3.connect(
357367
uri,
358368
uri=True,
359369
check_same_thread=False,
370+
detect_types=detect_types,
360371
)
361372
self.memory = True
362373
self.memory_name = memory_name
363374
elif memory or filename_or_conn == ":memory:":
364-
self.conn = sqlite3.connect(":memory:")
375+
self.conn = sqlite3.connect(":memory:", detect_types=detect_types)
365376
self.memory = True
366377
elif isinstance(filename_or_conn, (str, pathlib.Path)):
367378
if recreate and os.path.exists(filename_or_conn):
@@ -370,9 +381,9 @@ def __init__(
370381
except OSError:
371382
# Avoid mypy and __repr__ errors, see:
372383
# https://github.com/simonw/sqlite-utils/issues/503
373-
self.conn = sqlite3.connect(":memory:")
384+
self.conn = sqlite3.connect(":memory:", detect_types=detect_types)
374385
raise
375-
self.conn = sqlite3.connect(str(filename_or_conn))
386+
self.conn = sqlite3.connect(str(filename_or_conn), detect_types=detect_types)
376387
else:
377388
assert not recreate, "recreate cannot be used with connections, only paths"
378389
self.conn = filename_or_conn
@@ -1015,6 +1026,8 @@ def sort_key(p):
10151026
)
10161027
)
10171028
column_type_str = COLUMN_TYPE_MAPPING[column_type]
1029+
if self.use_json_converters and column_type in (dict, list, tuple):
1030+
column_type_str = "JSON"
10181031
# Special case for strict tables to map FLOAT to REAL
10191032
# Refs https://github.com/simonw/sqlite-utils/issues/644
10201033
if strict and column_type_str == "FLOAT":
@@ -3557,9 +3570,13 @@ def insert_all(
35573570
if list_mode:
35583571
# Convert list records to dicts for type detection
35593572
chunk_as_dicts = [dict(zip(column_names, row)) for row in chunk]
3560-
column_types = suggest_column_types(chunk_as_dicts)
3573+
column_types = suggest_column_types(
3574+
chunk_as_dicts, json_converters=self.db.use_json_converters
3575+
)
35613576
else:
3562-
column_types = suggest_column_types(chunk) # type: ignore[arg-type]
3577+
column_types = suggest_column_types(
3578+
chunk, json_converters=self.db.use_json_converters
3579+
) # type: ignore[arg-type]
35633580
if extracts:
35643581
for col in extracts:
35653582
if col in column_types:
@@ -3746,7 +3763,9 @@ def upsert_all(
37463763
)
37473764

37483765
def add_missing_columns(self, records: Iterable[Dict[str, Any]]) -> "Table":
3749-
needed_columns = suggest_column_types(records)
3766+
needed_columns = suggest_column_types(
3767+
records, json_converters=self.db.use_json_converters
3768+
)
37503769
current_columns = {c.lower() for c in self.columns_dict}
37513770
for col_name, col_type in needed_columns.items():
37523771
if col_name.lower() not in current_columns:

sqlite_utils/utils.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -132,16 +132,18 @@ def find_spatialite() -> Optional[str]:
132132

133133
def suggest_column_types(
134134
records: Iterable[Dict[str, Any]],
135+
json_converters: bool = False,
135136
) -> Dict[str, type]:
136137
all_column_types: Dict[str, Set[type]] = {}
137138
for record in records:
138139
for key, value in record.items():
139140
all_column_types.setdefault(key, set()).add(type(value))
140-
return types_for_column_types(all_column_types)
141+
return types_for_column_types(all_column_types, json_converters=json_converters)
141142

142143

143144
def types_for_column_types(
144145
all_column_types: Dict[str, Set[type]],
146+
json_converters: bool = False,
145147
) -> Dict[str, type]:
146148
column_types: Dict[str, type] = {}
147149
for key, types in all_column_types.items():
@@ -153,10 +155,19 @@ def types_for_column_types(
153155
t = str
154156
elif len(types) == 1:
155157
t = list(types)[0]
156-
# But if it's a subclass of list / tuple / dict, use str
157-
# instead as we will be storing it as JSON in the table
158-
for superclass in (list, tuple, dict):
159-
if issubclass(t, superclass):
158+
if json_converters:
159+
# Normalize subclasses of list / dict to the base class
160+
# so they can be handled by the mapping in db.py
161+
for superclass in (list, dict):
162+
if issubclass(t, superclass):
163+
t = superclass
164+
break
165+
elif issubclass(t, (list, dict)):
166+
t = str
167+
if issubclass(t, tuple):
168+
if json_converters:
169+
t = tuple
170+
else:
160171
t = str
161172
elif {int, bool}.issuperset(types):
162173
t = int

0 commit comments

Comments
 (0)