From 55047db79851c45be935790988f00b87c9f9ef2b Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 7 May 2025 17:08:49 -0700 Subject: [PATCH 01/11] supports_strict now caches on self._supports_strict --- sqlite_utils/db.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/sqlite_utils/db.py b/sqlite_utils/db.py index 144330a1b..927e6c676 100644 --- a/sqlite_utils/db.py +++ b/sqlite_utils/db.py @@ -671,16 +671,18 @@ def schema(self) -> str: @property def supports_strict(self) -> bool: "Does this database support STRICT mode?" - try: - table_name = "t{}".format(secrets.token_hex(16)) - with self.conn: - self.conn.execute( - "create table {} (name text) strict".format(table_name) - ) - self.conn.execute("drop table {}".format(table_name)) - return True - except Exception: - return False + if not hasattr(self, "_supports_strict"): + try: + table_name = "t{}".format(secrets.token_hex(16)) + with self.conn: + self.conn.execute( + "create table {} (name text) strict".format(table_name) + ) + self.conn.execute("drop table {}".format(table_name)) + self._supports_strict = True + except Exception: + self._supports_strict = False + return self._supports_strict @property def sqlite_version(self) -> Tuple[int, ...]: From fe3a3d4000c9825f323b83276a1f9b3396dbbe3b Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 7 May 2025 17:10:22 -0700 Subject: [PATCH 02/11] WIP new upsert implementation, refs #652 --- docs/python-api.rst | 7 ++ sqlite_utils/db.py | 291 +++++++++++++++++++++++++++---------------- tests/test_cli.py | 7 +- tests/test_tracer.py | 2 +- 4 files changed, 195 insertions(+), 112 deletions(-) diff --git a/docs/python-api.rst b/docs/python-api.rst index c6bf77620..6893e1c47 100644 --- a/docs/python-api.rst +++ b/docs/python-api.rst @@ -927,6 +927,13 @@ An ``upsert_all()`` method is also available, which behaves like ``insert_all()` .. note:: ``.upsert()`` and ``.upsert_all()`` in sqlite-utils 1.x worked like ``.insert(..., replace=True)`` and ``.insert_all(..., replace=True)`` do in 2.x. See `issue #66 `__ for details of this change. +.. _python_api_old_upsert: + +Alternative upserts using INSERT OR IGNORE +------------------------------------------ + +Upserts use ``INSERT INTO ... ON CONFLICT SET``. Prior to ``sqlite-utils 3.x`` (TODO: fill in version) these used a sequence of ``INSERT OR IGNORE`` followed by an ``UPDATE``. This older method is still used for SQLite 3.23.1 and earlier. You can force the older implementation by passing ``use_old_upsert=True`` to the ``Database()`` constructor. + .. _python_api_convert: Converting data in columns diff --git a/sqlite_utils/db.py b/sqlite_utils/db.py index 927e6c676..b960790a8 100644 --- a/sqlite_utils/db.py +++ b/sqlite_utils/db.py @@ -304,6 +304,8 @@ class Database: ``sql, parameters`` every time a SQL query is executed :param use_counts_table: set to ``True`` to use a cached counts table, if available. See :ref:`python_api_cached_table_counts` + :param use_old_upsert: set to ``True`` to force the older upsert implementation. See + :ref:`python_api_old_upsert` :param strict: Apply STRICT mode to all created tables (unless overridden) """ @@ -320,10 +322,12 @@ def __init__( tracer: Optional[Callable] = None, use_counts_table: bool = False, execute_plugins: bool = True, + use_old_upsert: bool = False, strict: bool = False, ): self.memory_name = None self.memory = False + self.use_old_upsert = use_old_upsert assert (filename_or_conn is not None and (not memory and not memory_name)) or ( filename_or_conn is None and (memory or memory_name) ), "Either specify a filename_or_conn or pass memory=True" @@ -684,6 +688,33 @@ def supports_strict(self) -> bool: self._supports_strict = False return self._supports_strict + @property + def supports_on_conflict(self) -> bool: + # SQLite's upsert is implemented as INSERT INTO ... ON CONFLICT DO ... + if not hasattr(self, "_supports_on_conflict"): + try: + table_name = "t{}".format(secrets.token_hex(16)) + with self.conn: + self.conn.execute( + "create table {} (id integer primary key, name text)".format( + table_name + ) + ) + self.conn.execute( + "insert into {} (id, name) values (1, 'one')".format(table_name) + ) + self.conn.execute( + ( + "insert into {} (id, name) values (1, 'two') " + "on conflict do update set name = 'two'" + ).format(table_name) + ) + self.conn.execute("drop table {}".format(table_name)) + self._supports_on_conflict = True + except Exception: + self._supports_on_conflict = False + return self._supports_on_conflict + @property def sqlite_version(self) -> Tuple[int, ...]: "Version of SQLite, as a tuple of integers for example ``(3, 36, 0)``." @@ -2968,102 +2999,128 @@ def build_insert_queries_and_params( replace, ignore, ): - # values is the list of insert data that is passed to the - # .execute() method - but some of them may be replaced by - # new primary keys if we are extracting any columns. - values = [] + """ + Given a list ``chunk`` of records that should be written to *this* table, + return a list of ``(sql, parameters)`` 2-tuples which, when executed in + order, perform the desired INSERT / UPSERT / REPLACE operation. + """ + if (upsert or replace) and not pk: + raise PrimaryKeyRequired("UPSERT / REPLACE needs a primary key") + if hash_id_columns and hash_id is None: hash_id = "id" + extracts = resolve_extracts(extracts) + + # Build a row-list ready for executemany-style flattening + values: list[list] = [] for record in chunk: - record_values = [] - for key in all_columns: - value = jsonify_if_needed( - record.get( - key, - ( - None - if key != hash_id - else hash_record(record, hash_id_columns) - ), - ) - ) - if key in extracts: - extract_table = extracts[key] - value = self.db[extract_table].lookup({"value": value}) - record_values.append(value) - values.append(record_values) - - queries_and_params = [] - if upsert: - if isinstance(pk, str): - pks = [pk] - else: - pks = pk - self.last_pk = None - for record_values in values: - record = dict(zip(all_columns, record_values)) - placeholders = list(pks) - # Need to populate not-null columns too, or INSERT OR IGNORE ignores - # them since it ignores the resulting integrity errors - if not_null: - placeholders.extend(not_null) - sql = "INSERT OR IGNORE INTO [{table}]({cols}) VALUES({placeholders});".format( - table=self.name, - cols=", ".join(["[{}]".format(p) for p in placeholders]), - placeholders=", ".join(["?" for p in placeholders]), - ) - queries_and_params.append( - (sql, [record[col] for col in pks] + ["" for _ in (not_null or [])]) - ) - # UPDATE [book] SET [name] = 'Programming' WHERE [id] = 1001; - set_cols = [col for col in all_columns if col not in pks] - if set_cols: - sql2 = "UPDATE [{table}] SET {pairs} WHERE {wheres}".format( - table=self.name, - pairs=", ".join( - "[{}] = {}".format(col, conversions.get(col, "?")) - for col in set_cols - ), - wheres=" AND ".join("[{}] = ?".format(pk) for pk in pks), - ) - queries_and_params.append( - ( - sql2, - [record[col] for col in set_cols] - + [record[pk] for pk in pks], - ) - ) - # We can populate .last_pk right here - if num_records_processed == 1: - self.last_pk = tuple(record[pk] for pk in pks) - if len(self.last_pk) == 1: - self.last_pk = self.last_pk[0] + row_vals = [] + for col in all_columns: + if col == hash_id: + row_vals.append(hash_record(record, hash_id_columns)) + continue - else: - or_what = "" - if replace: - or_what = "OR REPLACE " - elif ignore: - or_what = "OR IGNORE " - sql = """ - INSERT {or_what}INTO [{table}] ({columns}) VALUES {rows}; - """.strip().format( - or_what=or_what, - table=self.name, - columns=", ".join("[{}]".format(c) for c in all_columns), - rows=", ".join( - "({placeholders})".format( - placeholders=", ".join( - [conversions.get(col, "?") for col in all_columns] + val = record.get(col) + if val is None and not_null and col in not_null: + val = "" + row_vals.append(jsonify_if_needed(val)) + values.append(row_vals) + + columns_sql = ", ".join(f"[{c}]" for c in all_columns) + placeholder_expr = ", ".join(conversions.get(c, "?") for c in all_columns) + row_placeholders_sql = ", ".join(f"({placeholder_expr})" for _ in values) + flat_params = list(itertools.chain.from_iterable(values)) + + # replace=True mean INSERT OR REPLACE INTO + if replace: + sql = ( + f"INSERT OR REPLACE INTO [{self.name}] " + f"({columns_sql}) VALUES {row_placeholders_sql}" + ) + return [(sql, flat_params)] + + # If not an upsert it's an INSERT, maybe with OR IGNORE + if not upsert: + or_ignore = "" + if ignore: + or_ignore = " OR IGNORE" + sql = ( + f"INSERT{or_ignore} INTO [{self.name}] " + f"({columns_sql}) VALUES {row_placeholders_sql}" + ) + return [(sql, flat_params)] + + # Everything from here on is for upsert=True + pk_cols = [pk] if isinstance(pk, str) else list(pk) + non_pk_cols = [c for c in all_columns if c not in pk_cols] + conflict_sql = ", ".join(f"[{c}]" for c in pk_cols) + + if self.db.supports_on_conflict and not self.db.use_old_upsert: + if non_pk_cols: + # DO UPDATE + assignments = [] + for c in non_pk_cols: + if c in conversions: + assignments.append( + f"[{c}] = {conversions[c].replace('?', f'excluded.[{c}]')}" ) - ) - for record in chunk - ), + else: + assignments.append(f"[{c}] = excluded.[{c}]") + do_clause = "DO UPDATE SET " + ", ".join(assignments) + else: + # All columns are in the PK – nothing to update. + do_clause = "DO NOTHING" + + sql = ( + f"INSERT INTO [{self.name}] ({columns_sql}) " + f"VALUES {row_placeholders_sql} " + f"ON CONFLICT({conflict_sql}) {do_clause}" ) - flat_values = list(itertools.chain(*values)) - queries_and_params = [(sql, flat_values)] + return [(sql, flat_params)] + + # At this point we need compatibility UPSERT for SQLite < 3.24.0 + # (INSERT OR IGNORE + second UPDATE stage) + queries_and_params: list[tuple[str, list]] = [] + + insert_sql = ( + f"INSERT OR IGNORE INTO [{self.name}] " + f"({columns_sql}) VALUES {row_placeholders_sql}" + ) + queries_and_params.append((insert_sql, flat_params)) + + # If there is nothing to update we are done. + if not non_pk_cols: + return queries_and_params + # We can use UPDATE … FROM (VALUES …) on SQLite ≥ 3.33.0 + # Older SQLite versions will run this as one UPDATE per row + # – which is what sqlite-utils did prior to this refactor. + alias_cols_sql = ", ".join(pk_cols + non_pk_cols) + + assignments = [] + for c in non_pk_cols: + if c in conversions: + assignments.append(f"[{c}] = {conversions[c].replace('?', f'v.[{c}]')}") + else: + assignments.append(f"[{c}] = v.[{c}]") + assignments_sql = ", ".join(assignments) + + update_sql = ( + f"UPDATE [{self.name}] AS m SET {assignments_sql} " + f"FROM (VALUES {row_placeholders_sql}) " + f"AS v({alias_cols_sql}) " + f"WHERE " + " AND ".join(f"m.[{c}] = v.[{c}]" for c in pk_cols) + ) + + # Parameters for the UPDATE – pk cols first then non-pk cols + update_params: list = [] + for row in values: + row_dict = dict(zip(all_columns, row)) + ordered = [row_dict[c] for c in pk_cols + non_pk_cols] + update_params.extend(ordered) + + queries_and_params.append((update_sql, update_params)) return queries_and_params def insert_chunk( @@ -3081,7 +3138,7 @@ def insert_chunk( num_records_processed, replace, ignore, - ): + ) -> Optional[sqlite3.Cursor]: queries_and_params = self.build_insert_queries_and_params( extracts, chunk, @@ -3096,9 +3153,8 @@ def insert_chunk( replace, ignore, ) - + result = None with self.db.conn: - result = None for query, params in queries_and_params: try: result = self.db.execute(query, params) @@ -3127,7 +3183,7 @@ def insert_chunk( ignore, ) - self.insert_chunk( + result = self.insert_chunk( alter, extracts, second_half, @@ -3145,20 +3201,7 @@ def insert_chunk( else: raise - if num_records_processed == 1 and not upsert: - self.last_rowid = result.lastrowid - self.last_pk = self.last_rowid - # self.last_rowid will be 0 if a "INSERT OR IGNORE" happened - if (hash_id or pk) and self.last_rowid: - row = list(self.rows_where("rowid = ?", [self.last_rowid]))[0] - if hash_id: - self.last_pk = row[hash_id] - elif isinstance(pk, str): - self.last_pk = row[pk] - else: - self.last_pk = tuple(row[p] for p in pk) - - return + return result def insert( self, @@ -3309,6 +3352,7 @@ def insert_all( self.last_pk = None if truncate and self.exists(): self.db.execute("DELETE FROM [{}];".format(self.name)) + result = None for chunk in chunks(itertools.chain([first_record], records), batch_size): chunk = list(chunk) num_records_processed += len(chunk) @@ -3316,6 +3360,12 @@ def insert_all( if not self.exists(): # Use the first batch to derive the table names column_types = suggest_column_types(chunk) + if extracts: + for col in extracts: + if col in column_types: + column_types[col] = ( + int # This will be an integer foreign key + ) column_types.update(columns or {}) self.create( column_types, @@ -3343,7 +3393,7 @@ def insert_all( first = False - self.insert_chunk( + result = self.insert_chunk( alter, extracts, chunk, @@ -3359,6 +3409,33 @@ def insert_all( ignore, ) + # If we only handled a single row populate self.last_pk + if num_records_processed == 1: + # For an insert we need to use result.lastrowid + if not upsert: + self.last_rowid = result.lastrowid + if (hash_id or pk) and self.last_rowid: + # Set self.last_pk to the pk(s) for that rowid + row = list(self.rows_where("rowid = ?", [self.last_rowid]))[0] + if hash_id: + self.last_pk = row[hash_id] + elif isinstance(pk, str): + self.last_pk = row[pk] + else: + self.last_pk = tuple(row[p] for p in pk) + else: + self.last_pk = self.last_rowid + else: + # For an upsert use first_record from earlier + if hash_id: + self.last_pk = hash_record(first_record, hash_id_columns) + else: + self.last_pk = ( + first_record[pk] + if isinstance(pk, str) + else tuple(first_record[p] for p in pk) + ) + if analyze: self.analyze() diff --git a/tests/test_cli.py b/tests/test_cli.py index 4033af641..88763eecf 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1117,9 +1117,8 @@ def test_upsert_alter(db_path, tmpdir): ) assert result.exit_code == 1 assert ( - "Error: no such column: age\n\n" - "sql = UPDATE [dogs] SET [age] = ? WHERE [id] = ?\n" - "parameters = [5, 1]" + "Error: table dogs has no column named age\n\n" + "Try using --alter to add additional columns" ) == result.output.strip() # Should succeed with --alter result = CliRunner().invoke( @@ -2248,7 +2247,7 @@ def test_integer_overflow_error(tmpdir): assert result.exit_code == 1 assert result.output == ( "Error: Python int too large to convert to SQLite INTEGER\n\n" - "sql = INSERT INTO [items] ([bignumber]) VALUES (?);\n" + "sql = INSERT INTO [items] ([bignumber]) VALUES (?)\n" "parameters = [34223049823094832094802398430298048240]\n" ) diff --git a/tests/test_tracer.py b/tests/test_tracer.py index 26318ae06..9dfb490d6 100644 --- a/tests/test_tracer.py +++ b/tests/test_tracer.py @@ -18,7 +18,7 @@ def test_tracer(): ("select name from sqlite_master where type = 'view'", None), ("CREATE TABLE [dogs] (\n [name] TEXT\n);\n ", None), ("select name from sqlite_master where type = 'view'", None), - ("INSERT INTO [dogs] ([name]) VALUES (?);", ["Cleopaws"]), + ("INSERT INTO [dogs] ([name]) VALUES (?)", ["Cleopaws"]), ("select name from sqlite_master where type = 'view'", None), ( "CREATE VIRTUAL TABLE [dogs_fts] USING FTS5 (\n [name],\n content=[dogs]\n)", From 2ab894dd73f0f14a30edcd643283824a04836570 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 8 May 2025 19:15:46 -0700 Subject: [PATCH 03/11] Remove upsert() requires a pk checks --- sqlite_utils/db.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/sqlite_utils/db.py b/sqlite_utils/db.py index b960790a8..2b43af617 100644 --- a/sqlite_utils/db.py +++ b/sqlite_utils/db.py @@ -3004,9 +3004,6 @@ def build_insert_queries_and_params( return a list of ``(sql, parameters)`` 2-tuples which, when executed in order, perform the desired INSERT / UPSERT / REPLACE operation. """ - if (upsert or replace) and not pk: - raise PrimaryKeyRequired("UPSERT / REPLACE needs a primary key") - if hash_id_columns and hash_id is None: hash_id = "id" @@ -3319,8 +3316,6 @@ def insert_all( if hash_id_columns and hash_id is None: hash_id = "id" - if upsert and (not pk and not hash_id): - raise PrimaryKeyRequired("upsert() requires a pk") assert not (hash_id and pk), "Use either pk= or hash_id=" if hash_id_columns and (hash_id is None): hash_id = "id" From ac9f8208c7d24b92c983ea7d0f23545904a20e1d Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 8 May 2025 19:19:43 -0700 Subject: [PATCH 04/11] Put PrimaryKeyRequired back in the right place --- sqlite_utils/db.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sqlite_utils/db.py b/sqlite_utils/db.py index 2b43af617..89648e880 100644 --- a/sqlite_utils/db.py +++ b/sqlite_utils/db.py @@ -3316,6 +3316,9 @@ def insert_all( if hash_id_columns and hash_id is None: hash_id = "id" + if upsert and (not pk and not hash_id): + raise PrimaryKeyRequired("upsert() requires a pk") + assert not (hash_id and pk), "Use either pk= or hash_id=" if hash_id_columns and (hash_id is None): hash_id = "id" From 058a8dfa4cc8900dbd4cc31104ba3537bb1589ad Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 8 May 2025 19:51:41 -0700 Subject: [PATCH 05/11] Fixed remaining broken tests --- sqlite_utils/db.py | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/sqlite_utils/db.py b/sqlite_utils/db.py index 89648e880..922f9fa76 100644 --- a/sqlite_utils/db.py +++ b/sqlite_utils/db.py @@ -3010,19 +3010,26 @@ def build_insert_queries_and_params( extracts = resolve_extracts(extracts) # Build a row-list ready for executemany-style flattening - values: list[list] = [] - for record in chunk: - row_vals = [] - for col in all_columns: - if col == hash_id: - row_vals.append(hash_record(record, hash_id_columns)) - continue + values: List[list] = [] - val = record.get(col) - if val is None and not_null and col in not_null: - val = "" - row_vals.append(jsonify_if_needed(val)) - values.append(row_vals) + for record in chunk: + record_values = [] + for key in all_columns: + value = jsonify_if_needed( + record.get( + key, + ( + None + if key != hash_id + else hash_record(record, hash_id_columns) + ), + ) + ) + if key in extracts: + extract_table = extracts[key] + value = self.db[extract_table].lookup({"value": value}) + record_values.append(value) + values.append(record_values) columns_sql = ", ".join(f"[{c}]" for c in all_columns) placeholder_expr = ", ".join(conversions.get(c, "?") for c in all_columns) From e9a6b4c176c28526de4db00c056ea52536c271df Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 8 May 2025 19:56:47 -0700 Subject: [PATCH 06/11] Fixed mypy complaints --- sqlite_utils/db.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sqlite_utils/db.py b/sqlite_utils/db.py index 922f9fa76..4a54dfafd 100644 --- a/sqlite_utils/db.py +++ b/sqlite_utils/db.py @@ -3010,7 +3010,7 @@ def build_insert_queries_and_params( extracts = resolve_extracts(extracts) # Build a row-list ready for executemany-style flattening - values: List[list] = [] + values = [] for record in chunk: record_values = [] @@ -3085,7 +3085,7 @@ def build_insert_queries_and_params( # At this point we need compatibility UPSERT for SQLite < 3.24.0 # (INSERT OR IGNORE + second UPDATE stage) - queries_and_params: list[tuple[str, list]] = [] + queries_and_params = [] insert_sql = ( f"INSERT OR IGNORE INTO [{self.name}] " @@ -3118,7 +3118,7 @@ def build_insert_queries_and_params( ) # Parameters for the UPDATE – pk cols first then non-pk cols - update_params: list = [] + update_params = [] for row in values: row_dict = dict(zip(all_columns, row)) ordered = [row_dict[c] for c in pk_cols + non_pk_cols] @@ -3417,7 +3417,7 @@ def insert_all( # If we only handled a single row populate self.last_pk if num_records_processed == 1: # For an insert we need to use result.lastrowid - if not upsert: + if not upsert and result is not None: self.last_rowid = result.lastrowid if (hash_id or pk) and self.last_rowid: # Set self.last_pk to the pk(s) for that rowid From efeba1f18c5688c4dcf152da6e9fa0ec2d4bfb34 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 8 May 2025 20:00:00 -0700 Subject: [PATCH 07/11] Prior to sqlite-utils 4.0 Refs https://github.com/simonw/sqlite-utils/issues/652#issuecomment-2864945717 --- docs/python-api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/python-api.rst b/docs/python-api.rst index 6893e1c47..2e81fa3f1 100644 --- a/docs/python-api.rst +++ b/docs/python-api.rst @@ -932,7 +932,7 @@ An ``upsert_all()`` method is also available, which behaves like ``insert_all()` Alternative upserts using INSERT OR IGNORE ------------------------------------------ -Upserts use ``INSERT INTO ... ON CONFLICT SET``. Prior to ``sqlite-utils 3.x`` (TODO: fill in version) these used a sequence of ``INSERT OR IGNORE`` followed by an ``UPDATE``. This older method is still used for SQLite 3.23.1 and earlier. You can force the older implementation by passing ``use_old_upsert=True`` to the ``Database()`` constructor. +Upserts use ``INSERT INTO ... ON CONFLICT SET``. Prior to ``sqlite-utils 4.0`` these used a sequence of ``INSERT OR IGNORE`` followed by an ``UPDATE``. This older method is still used for SQLite 3.23.1 and earlier. You can force the older implementation by passing ``use_old_upsert=True`` to the ``Database()`` constructor. .. _python_api_convert: From 41497c3210fd90a0f95cebdec54b8909a5fc2cc2 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 8 May 2025 20:10:17 -0700 Subject: [PATCH 08/11] Fix for use_old_upsert branch Refs https://github.com/simonw/sqlite-utils/pull/653#issuecomment-2864951112 --- sqlite_utils/db.py | 81 +++++++++++++++++++++++--------------------- tests/test_create.py | 8 +++-- tests/test_upsert.py | 7 ++-- 3 files changed, 52 insertions(+), 44 deletions(-) diff --git a/sqlite_utils/db.py b/sqlite_utils/db.py index 4a54dfafd..9fc3d7403 100644 --- a/sqlite_utils/db.py +++ b/sqlite_utils/db.py @@ -3086,45 +3086,48 @@ def build_insert_queries_and_params( # At this point we need compatibility UPSERT for SQLite < 3.24.0 # (INSERT OR IGNORE + second UPDATE stage) queries_and_params = [] - - insert_sql = ( - f"INSERT OR IGNORE INTO [{self.name}] " - f"({columns_sql}) VALUES {row_placeholders_sql}" - ) - queries_and_params.append((insert_sql, flat_params)) - - # If there is nothing to update we are done. - if not non_pk_cols: - return queries_and_params - - # We can use UPDATE … FROM (VALUES …) on SQLite ≥ 3.33.0 - # Older SQLite versions will run this as one UPDATE per row - # – which is what sqlite-utils did prior to this refactor. - alias_cols_sql = ", ".join(pk_cols + non_pk_cols) - - assignments = [] - for c in non_pk_cols: - if c in conversions: - assignments.append(f"[{c}] = {conversions[c].replace('?', f'v.[{c}]')}") - else: - assignments.append(f"[{c}] = v.[{c}]") - assignments_sql = ", ".join(assignments) - - update_sql = ( - f"UPDATE [{self.name}] AS m SET {assignments_sql} " - f"FROM (VALUES {row_placeholders_sql}) " - f"AS v({alias_cols_sql}) " - f"WHERE " + " AND ".join(f"m.[{c}] = v.[{c}]" for c in pk_cols) - ) - - # Parameters for the UPDATE – pk cols first then non-pk cols - update_params = [] - for row in values: - row_dict = dict(zip(all_columns, row)) - ordered = [row_dict[c] for c in pk_cols + non_pk_cols] - update_params.extend(ordered) - - queries_and_params.append((update_sql, update_params)) + if isinstance(pk, str): + pks = [pk] + else: + pks = pk + self.last_pk = None + for record_values in values: + record = dict(zip(all_columns, record_values)) + placeholders = list(pks) + # Need to populate not-null columns too, or INSERT OR IGNORE ignores + # them since it ignores the resulting integrity errors + if not_null: + placeholders.extend(not_null) + sql = "INSERT OR IGNORE INTO [{table}]({cols}) VALUES({placeholders});".format( + table=self.name, + cols=", ".join(["[{}]".format(p) for p in placeholders]), + placeholders=", ".join(["?" for p in placeholders]), + ) + queries_and_params.append( + (sql, [record[col] for col in pks] + ["" for _ in (not_null or [])]) + ) + # UPDATE [book] SET [name] = 'Programming' WHERE [id] = 1001; + set_cols = [col for col in all_columns if col not in pks] + if set_cols: + sql2 = "UPDATE [{table}] SET {pairs} WHERE {wheres}".format( + table=self.name, + pairs=", ".join( + "[{}] = {}".format(col, conversions.get(col, "?")) + for col in set_cols + ), + wheres=" AND ".join("[{}] = ?".format(pk) for pk in pks), + ) + queries_and_params.append( + ( + sql2, + [record[col] for col in set_cols] + [record[pk] for pk in pks], + ) + ) + # We can populate .last_pk right here + if num_records_processed == 1: + self.last_pk = tuple(record[pk] for pk in pks) + if len(self.last_pk) == 1: + self.last_pk = self.last_pk[0] return queries_and_params def insert_chunk( diff --git a/tests/test_create.py b/tests/test_create.py index 7825198ac..a04c8b3d3 100644 --- a/tests/test_create.py +++ b/tests/test_create.py @@ -173,14 +173,16 @@ def test_create_table_from_example_with_compound_primary_keys(fresh_db): @pytest.mark.parametrize( "method_name", ("insert", "upsert", "insert_all", "upsert_all") ) -def test_create_table_with_custom_columns(fresh_db, method_name): - table = fresh_db["dogs"] +@pytest.mark.parametrize("use_old_upsert", (False, True)) +def test_create_table_with_custom_columns(method_name, use_old_upsert): + db = Database(memory=True, use_old_upsert=use_old_upsert) + table = db["dogs"] method = getattr(table, method_name) record = {"id": 1, "name": "Cleo", "age": "5"} if method_name.endswith("_all"): record = [record] method(record, pk="id", columns={"age": int, "weight": float}) - assert ["dogs"] == fresh_db.table_names() + assert ["dogs"] == db.table_names() expected_columns = [ {"name": "id", "type": "INTEGER"}, {"name": "name", "type": "TEXT"}, diff --git a/tests/test_upsert.py b/tests/test_upsert.py index 8a34d4ebe..de0e362b5 100644 --- a/tests/test_upsert.py +++ b/tests/test_upsert.py @@ -1,9 +1,12 @@ from sqlite_utils.db import PrimaryKeyRequired +from sqlite_utils import Database import pytest -def test_upsert(fresh_db): - table = fresh_db["table"] +@pytest.mark.parametrize("use_old_upsert", (False, True)) +def test_upsert(use_old_upsert): + db = Database(memory=True, use_old_upsert=use_old_upsert) + table = db["table"] table.insert({"id": 1, "name": "Cleo"}, pk="id") table.upsert({"id": 1, "age": 5}, pk="id", alter=True) assert list(table.rows) == [{"id": 1, "name": "Cleo", "age": 5}] From 494075584f42e9a1a9b99caf593e8ca881fd6bed Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 8 May 2025 20:15:07 -0700 Subject: [PATCH 09/11] Drop table in finally Refs https://github.com/simonw/sqlite-utils/pull/653#issuecomment-2864963011 --- sqlite_utils/db.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sqlite_utils/db.py b/sqlite_utils/db.py index 9fc3d7403..a471bbdf0 100644 --- a/sqlite_utils/db.py +++ b/sqlite_utils/db.py @@ -692,8 +692,8 @@ def supports_strict(self) -> bool: def supports_on_conflict(self) -> bool: # SQLite's upsert is implemented as INSERT INTO ... ON CONFLICT DO ... if not hasattr(self, "_supports_on_conflict"): + table_name = "t{}".format(secrets.token_hex(16)) try: - table_name = "t{}".format(secrets.token_hex(16)) with self.conn: self.conn.execute( "create table {} (id integer primary key, name text)".format( @@ -709,10 +709,11 @@ def supports_on_conflict(self) -> bool: "on conflict do update set name = 'two'" ).format(table_name) ) - self.conn.execute("drop table {}".format(table_name)) - self._supports_on_conflict = True + self._supports_on_conflict = True except Exception: self._supports_on_conflict = False + finally: + self.conn.execute("drop table {}".format(table_name)) return self._supports_on_conflict @property From fa6ae2b3328dc60966c28e7239a1cfb3aada84b7 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 8 May 2025 20:16:56 -0700 Subject: [PATCH 10/11] Use drop table if exists --- sqlite_utils/db.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlite_utils/db.py b/sqlite_utils/db.py index a471bbdf0..363b069d8 100644 --- a/sqlite_utils/db.py +++ b/sqlite_utils/db.py @@ -713,7 +713,7 @@ def supports_on_conflict(self) -> bool: except Exception: self._supports_on_conflict = False finally: - self.conn.execute("drop table {}".format(table_name)) + self.conn.execute("drop table if exists {}".format(table_name)) return self._supports_on_conflict @property From 79913af28a7a68eb98725df3343ccdcbe86a30fa Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 8 May 2025 20:20:27 -0700 Subject: [PATCH 11/11] Handle different SQLite version error messages Refs https://github.com/simonw/sqlite-utils/pull/653#issuecomment-2864967364 --- sqlite_utils/cli.py | 4 +++- tests/test_cli.py | 6 ++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sqlite_utils/cli.py b/sqlite_utils/cli.py index 795e30de0..a19ee4306 100644 --- a/sqlite_utils/cli.py +++ b/sqlite_utils/cli.py @@ -1098,7 +1098,9 @@ def insert_upsert_implementation( if ( isinstance(e, OperationalError) and e.args - and "has no column named" in e.args[0] + and ( + "has no column named" in e.args[0] or "no such column" in e.args[0] + ) ): raise click.ClickException( "{}\n\nTry using --alter to add additional columns".format( diff --git a/tests/test_cli.py b/tests/test_cli.py index 88763eecf..17e5ed6f6 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1116,10 +1116,8 @@ def test_upsert_alter(db_path, tmpdir): cli.cli, ["upsert", db_path, "dogs", json_path, "--pk", "id"] ) assert result.exit_code == 1 - assert ( - "Error: table dogs has no column named age\n\n" - "Try using --alter to add additional columns" - ) == result.output.strip() + # Could be one of two errors depending on SQLite version + assert ("Try using --alter to add additional columns") in result.output.strip() # Should succeed with --alter result = CliRunner().invoke( cli.cli, ["upsert", db_path, "dogs", json_path, "--pk", "id", "--alter"]