From 55047db79851c45be935790988f00b87c9f9ef2b Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Wed, 7 May 2025 17:08:49 -0700
Subject: [PATCH 01/11] supports_strict now caches on self._supports_strict

---
 sqlite_utils/db.py | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/sqlite_utils/db.py b/sqlite_utils/db.py
index 144330a1b..927e6c676 100644
--- a/sqlite_utils/db.py
+++ b/sqlite_utils/db.py
@@ -671,16 +671,18 @@ def schema(self) -> str:
     @property
     def supports_strict(self) -> bool:
         "Does this database support STRICT mode?"
-        try:
-            table_name = "t{}".format(secrets.token_hex(16))
-            with self.conn:
-                self.conn.execute(
-                    "create table {} (name text) strict".format(table_name)
-                )
-                self.conn.execute("drop table {}".format(table_name))
-            return True
-        except Exception:
-            return False
+        if not hasattr(self, "_supports_strict"):
+            try:
+                table_name = "t{}".format(secrets.token_hex(16))
+                with self.conn:
+                    self.conn.execute(
+                        "create table {} (name text) strict".format(table_name)
+                    )
+                    self.conn.execute("drop table {}".format(table_name))
+                self._supports_strict = True
+            except Exception:
+                self._supports_strict = False
+        return self._supports_strict
 
     @property
     def sqlite_version(self) -> Tuple[int, ...]:

From fe3a3d4000c9825f323b83276a1f9b3396dbbe3b Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Wed, 7 May 2025 17:10:22 -0700
Subject: [PATCH 02/11] WIP new upsert implementation, refs #652

---
 docs/python-api.rst  |   7 ++
 sqlite_utils/db.py   | 291 +++++++++++++++++++++++++++----------------
 tests/test_cli.py    |   7 +-
 tests/test_tracer.py |   2 +-
 4 files changed, 195 insertions(+), 112 deletions(-)

diff --git a/docs/python-api.rst b/docs/python-api.rst
index c6bf77620..6893e1c47 100644
--- a/docs/python-api.rst
+++ b/docs/python-api.rst
@@ -927,6 +927,13 @@ An ``upsert_all()`` method is also available, which behaves like ``insert_all()`
 .. note::
     ``.upsert()`` and ``.upsert_all()`` in sqlite-utils 1.x worked like ``.insert(..., replace=True)`` and ``.insert_all(..., replace=True)`` do in 2.x. See `issue #66 <https://github.com/simonw/sqlite-utils/issues/66>`__ for details of this change.
 
+.. _python_api_old_upsert:
+
+Alternative upserts using INSERT OR IGNORE
+------------------------------------------
+
+Upserts use ``INSERT INTO ... ON CONFLICT SET``. Prior to ``sqlite-utils 3.x`` (TODO: fill in version) these used a sequence of ``INSERT OR IGNORE`` followed by an ``UPDATE``. This older method is still used for SQLite 3.23.1 and earlier. You can force the older implementation by passing ``use_old_upsert=True`` to the ``Database()`` constructor.
+
 .. _python_api_convert:
 
 Converting data in columns
diff --git a/sqlite_utils/db.py b/sqlite_utils/db.py
index 927e6c676..b960790a8 100644
--- a/sqlite_utils/db.py
+++ b/sqlite_utils/db.py
@@ -304,6 +304,8 @@ class Database:
       ``sql, parameters`` every time a SQL query is executed
     :param use_counts_table: set to ``True`` to use a cached counts table, if available. See
       :ref:`python_api_cached_table_counts`
+    :param use_old_upsert: set to ``True`` to force the older upsert implementation. See
+      :ref:`python_api_old_upsert`
     :param strict: Apply STRICT mode to all created tables (unless overridden)
     """
 
@@ -320,10 +322,12 @@ def __init__(
         tracer: Optional[Callable] = None,
         use_counts_table: bool = False,
         execute_plugins: bool = True,
+        use_old_upsert: bool = False,
         strict: bool = False,
     ):
         self.memory_name = None
         self.memory = False
+        self.use_old_upsert = use_old_upsert
         assert (filename_or_conn is not None and (not memory and not memory_name)) or (
             filename_or_conn is None and (memory or memory_name)
         ), "Either specify a filename_or_conn or pass memory=True"
@@ -684,6 +688,33 @@ def supports_strict(self) -> bool:
                 self._supports_strict = False
         return self._supports_strict
 
+    @property
+    def supports_on_conflict(self) -> bool:
+        # SQLite's upsert is implemented as INSERT INTO ... ON CONFLICT DO ...
+        if not hasattr(self, "_supports_on_conflict"):
+            try:
+                table_name = "t{}".format(secrets.token_hex(16))
+                with self.conn:
+                    self.conn.execute(
+                        "create table {} (id integer primary key, name text)".format(
+                            table_name
+                        )
+                    )
+                    self.conn.execute(
+                        "insert into {} (id, name) values (1, 'one')".format(table_name)
+                    )
+                    self.conn.execute(
+                        (
+                            "insert into {} (id, name) values (1, 'two') "
+                            "on conflict do update set name = 'two'"
+                        ).format(table_name)
+                    )
+                    self.conn.execute("drop table {}".format(table_name))
+                self._supports_on_conflict = True
+            except Exception:
+                self._supports_on_conflict = False
+        return self._supports_on_conflict
+
     @property
     def sqlite_version(self) -> Tuple[int, ...]:
         "Version of SQLite, as a tuple of integers for example ``(3, 36, 0)``."
@@ -2968,102 +2999,128 @@ def build_insert_queries_and_params(
         replace,
         ignore,
     ):
-        # values is the list of insert data that is passed to the
-        # .execute() method - but some of them may be replaced by
-        # new primary keys if we are extracting any columns.
-        values = []
+        """
+        Given a list ``chunk`` of records that should be written to *this* table,
+        return a list of ``(sql, parameters)`` 2-tuples which, when executed in
+        order, perform the desired INSERT / UPSERT / REPLACE operation.
+        """
+        if (upsert or replace) and not pk:
+            raise PrimaryKeyRequired("UPSERT / REPLACE needs a primary key")
+
         if hash_id_columns and hash_id is None:
             hash_id = "id"
+
         extracts = resolve_extracts(extracts)
+
+        # Build a row-list ready for executemany-style flattening
+        values: list[list] = []
         for record in chunk:
-            record_values = []
-            for key in all_columns:
-                value = jsonify_if_needed(
-                    record.get(
-                        key,
-                        (
-                            None
-                            if key != hash_id
-                            else hash_record(record, hash_id_columns)
-                        ),
-                    )
-                )
-                if key in extracts:
-                    extract_table = extracts[key]
-                    value = self.db[extract_table].lookup({"value": value})
-                record_values.append(value)
-            values.append(record_values)
-
-        queries_and_params = []
-        if upsert:
-            if isinstance(pk, str):
-                pks = [pk]
-            else:
-                pks = pk
-            self.last_pk = None
-            for record_values in values:
-                record = dict(zip(all_columns, record_values))
-                placeholders = list(pks)
-                # Need to populate not-null columns too, or INSERT OR IGNORE ignores
-                # them since it ignores the resulting integrity errors
-                if not_null:
-                    placeholders.extend(not_null)
-                sql = "INSERT OR IGNORE INTO [{table}]({cols}) VALUES({placeholders});".format(
-                    table=self.name,
-                    cols=", ".join(["[{}]".format(p) for p in placeholders]),
-                    placeholders=", ".join(["?" for p in placeholders]),
-                )
-                queries_and_params.append(
-                    (sql, [record[col] for col in pks] + ["" for _ in (not_null or [])])
-                )
-                # UPDATE [book] SET [name] = 'Programming' WHERE [id] = 1001;
-                set_cols = [col for col in all_columns if col not in pks]
-                if set_cols:
-                    sql2 = "UPDATE [{table}] SET {pairs} WHERE {wheres}".format(
-                        table=self.name,
-                        pairs=", ".join(
-                            "[{}] = {}".format(col, conversions.get(col, "?"))
-                            for col in set_cols
-                        ),
-                        wheres=" AND ".join("[{}] = ?".format(pk) for pk in pks),
-                    )
-                    queries_and_params.append(
-                        (
-                            sql2,
-                            [record[col] for col in set_cols]
-                            + [record[pk] for pk in pks],
-                        )
-                    )
-                # We can populate .last_pk right here
-                if num_records_processed == 1:
-                    self.last_pk = tuple(record[pk] for pk in pks)
-                    if len(self.last_pk) == 1:
-                        self.last_pk = self.last_pk[0]
+            row_vals = []
+            for col in all_columns:
+                if col == hash_id:
+                    row_vals.append(hash_record(record, hash_id_columns))
+                    continue
 
-        else:
-            or_what = ""
-            if replace:
-                or_what = "OR REPLACE "
-            elif ignore:
-                or_what = "OR IGNORE "
-            sql = """
-                INSERT {or_what}INTO [{table}] ({columns}) VALUES {rows};
-            """.strip().format(
-                or_what=or_what,
-                table=self.name,
-                columns=", ".join("[{}]".format(c) for c in all_columns),
-                rows=", ".join(
-                    "({placeholders})".format(
-                        placeholders=", ".join(
-                            [conversions.get(col, "?") for col in all_columns]
+                val = record.get(col)
+                if val is None and not_null and col in not_null:
+                    val = ""
+                row_vals.append(jsonify_if_needed(val))
+            values.append(row_vals)
+
+        columns_sql = ", ".join(f"[{c}]" for c in all_columns)
+        placeholder_expr = ", ".join(conversions.get(c, "?") for c in all_columns)
+        row_placeholders_sql = ", ".join(f"({placeholder_expr})" for _ in values)
+        flat_params = list(itertools.chain.from_iterable(values))
+
+        # replace=True mean INSERT OR REPLACE INTO
+        if replace:
+            sql = (
+                f"INSERT OR REPLACE INTO [{self.name}] "
+                f"({columns_sql}) VALUES {row_placeholders_sql}"
+            )
+            return [(sql, flat_params)]
+
+        # If not an upsert it's an INSERT, maybe with OR IGNORE
+        if not upsert:
+            or_ignore = ""
+            if ignore:
+                or_ignore = " OR IGNORE"
+            sql = (
+                f"INSERT{or_ignore} INTO [{self.name}] "
+                f"({columns_sql}) VALUES {row_placeholders_sql}"
+            )
+            return [(sql, flat_params)]
+
+        # Everything from here on is for upsert=True
+        pk_cols = [pk] if isinstance(pk, str) else list(pk)
+        non_pk_cols = [c for c in all_columns if c not in pk_cols]
+        conflict_sql = ", ".join(f"[{c}]" for c in pk_cols)
+
+        if self.db.supports_on_conflict and not self.db.use_old_upsert:
+            if non_pk_cols:
+                # DO UPDATE
+                assignments = []
+                for c in non_pk_cols:
+                    if c in conversions:
+                        assignments.append(
+                            f"[{c}] = {conversions[c].replace('?', f'excluded.[{c}]')}"
                         )
-                    )
-                    for record in chunk
-                ),
+                    else:
+                        assignments.append(f"[{c}] = excluded.[{c}]")
+                do_clause = "DO UPDATE SET " + ", ".join(assignments)
+            else:
+                # All columns are in the PK – nothing to update.
+                do_clause = "DO NOTHING"
+
+            sql = (
+                f"INSERT INTO [{self.name}] ({columns_sql}) "
+                f"VALUES {row_placeholders_sql} "
+                f"ON CONFLICT({conflict_sql}) {do_clause}"
             )
-            flat_values = list(itertools.chain(*values))
-            queries_and_params = [(sql, flat_values)]
+            return [(sql, flat_params)]
+
+        # At this point we need compatibility UPSERT for SQLite < 3.24.0
+        # (INSERT OR IGNORE + second UPDATE stage)
+        queries_and_params: list[tuple[str, list]] = []
+
+        insert_sql = (
+            f"INSERT OR IGNORE INTO [{self.name}] "
+            f"({columns_sql}) VALUES {row_placeholders_sql}"
+        )
+        queries_and_params.append((insert_sql, flat_params))
+
+        # If there is nothing to update we are done.
+        if not non_pk_cols:
+            return queries_and_params
 
+        # We can use UPDATE … FROM (VALUES …) on SQLite ≥ 3.33.0
+        # Older SQLite versions will run this as one UPDATE per row
+        # – which is what sqlite-utils did prior to this refactor.
+        alias_cols_sql = ", ".join(pk_cols + non_pk_cols)
+
+        assignments = []
+        for c in non_pk_cols:
+            if c in conversions:
+                assignments.append(f"[{c}] = {conversions[c].replace('?', f'v.[{c}]')}")
+            else:
+                assignments.append(f"[{c}] = v.[{c}]")
+        assignments_sql = ", ".join(assignments)
+
+        update_sql = (
+            f"UPDATE [{self.name}] AS m SET {assignments_sql} "
+            f"FROM (VALUES {row_placeholders_sql}) "
+            f"AS v({alias_cols_sql}) "
+            f"WHERE " + " AND ".join(f"m.[{c}] = v.[{c}]" for c in pk_cols)
+        )
+
+        # Parameters for the UPDATE – pk cols first then non-pk cols
+        update_params: list = []
+        for row in values:
+            row_dict = dict(zip(all_columns, row))
+            ordered = [row_dict[c] for c in pk_cols + non_pk_cols]
+            update_params.extend(ordered)
+
+        queries_and_params.append((update_sql, update_params))
         return queries_and_params
 
     def insert_chunk(
@@ -3081,7 +3138,7 @@ def insert_chunk(
         num_records_processed,
         replace,
         ignore,
-    ):
+    ) -> Optional[sqlite3.Cursor]:
         queries_and_params = self.build_insert_queries_and_params(
             extracts,
             chunk,
@@ -3096,9 +3153,8 @@ def insert_chunk(
             replace,
             ignore,
         )
-
+        result = None
         with self.db.conn:
-            result = None
             for query, params in queries_and_params:
                 try:
                     result = self.db.execute(query, params)
@@ -3127,7 +3183,7 @@ def insert_chunk(
                             ignore,
                         )
 
-                        self.insert_chunk(
+                        result = self.insert_chunk(
                             alter,
                             extracts,
                             second_half,
@@ -3145,20 +3201,7 @@ def insert_chunk(
 
                     else:
                         raise
-            if num_records_processed == 1 and not upsert:
-                self.last_rowid = result.lastrowid
-                self.last_pk = self.last_rowid
-                # self.last_rowid will be 0 if a "INSERT OR IGNORE" happened
-                if (hash_id or pk) and self.last_rowid:
-                    row = list(self.rows_where("rowid = ?", [self.last_rowid]))[0]
-                    if hash_id:
-                        self.last_pk = row[hash_id]
-                    elif isinstance(pk, str):
-                        self.last_pk = row[pk]
-                    else:
-                        self.last_pk = tuple(row[p] for p in pk)
-
-        return
+        return result
 
     def insert(
         self,
@@ -3309,6 +3352,7 @@ def insert_all(
         self.last_pk = None
         if truncate and self.exists():
             self.db.execute("DELETE FROM [{}];".format(self.name))
+        result = None
         for chunk in chunks(itertools.chain([first_record], records), batch_size):
             chunk = list(chunk)
             num_records_processed += len(chunk)
@@ -3316,6 +3360,12 @@ def insert_all(
                 if not self.exists():
                     # Use the first batch to derive the table names
                     column_types = suggest_column_types(chunk)
+                    if extracts:
+                        for col in extracts:
+                            if col in column_types:
+                                column_types[col] = (
+                                    int  # This will be an integer foreign key
+                                )
                     column_types.update(columns or {})
                     self.create(
                         column_types,
@@ -3343,7 +3393,7 @@ def insert_all(
 
             first = False
 
-            self.insert_chunk(
+            result = self.insert_chunk(
                 alter,
                 extracts,
                 chunk,
@@ -3359,6 +3409,33 @@ def insert_all(
                 ignore,
             )
 
+        # If we only handled a single row populate self.last_pk
+        if num_records_processed == 1:
+            # For an insert we need to use result.lastrowid
+            if not upsert:
+                self.last_rowid = result.lastrowid
+                if (hash_id or pk) and self.last_rowid:
+                    # Set self.last_pk to the pk(s) for that rowid
+                    row = list(self.rows_where("rowid = ?", [self.last_rowid]))[0]
+                    if hash_id:
+                        self.last_pk = row[hash_id]
+                    elif isinstance(pk, str):
+                        self.last_pk = row[pk]
+                    else:
+                        self.last_pk = tuple(row[p] for p in pk)
+                else:
+                    self.last_pk = self.last_rowid
+            else:
+                # For an upsert use first_record from earlier
+                if hash_id:
+                    self.last_pk = hash_record(first_record, hash_id_columns)
+                else:
+                    self.last_pk = (
+                        first_record[pk]
+                        if isinstance(pk, str)
+                        else tuple(first_record[p] for p in pk)
+                    )
+
         if analyze:
             self.analyze()
 
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 4033af641..88763eecf 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -1117,9 +1117,8 @@ def test_upsert_alter(db_path, tmpdir):
     )
     assert result.exit_code == 1
     assert (
-        "Error: no such column: age\n\n"
-        "sql = UPDATE [dogs] SET [age] = ? WHERE [id] = ?\n"
-        "parameters = [5, 1]"
+        "Error: table dogs has no column named age\n\n"
+        "Try using --alter to add additional columns"
     ) == result.output.strip()
     # Should succeed with --alter
     result = CliRunner().invoke(
@@ -2248,7 +2247,7 @@ def test_integer_overflow_error(tmpdir):
     assert result.exit_code == 1
     assert result.output == (
         "Error: Python int too large to convert to SQLite INTEGER\n\n"
-        "sql = INSERT INTO [items] ([bignumber]) VALUES (?);\n"
+        "sql = INSERT INTO [items] ([bignumber]) VALUES (?)\n"
         "parameters = [34223049823094832094802398430298048240]\n"
     )
 
diff --git a/tests/test_tracer.py b/tests/test_tracer.py
index 26318ae06..9dfb490d6 100644
--- a/tests/test_tracer.py
+++ b/tests/test_tracer.py
@@ -18,7 +18,7 @@ def test_tracer():
         ("select name from sqlite_master where type = 'view'", None),
         ("CREATE TABLE [dogs] (\n   [name] TEXT\n);\n        ", None),
         ("select name from sqlite_master where type = 'view'", None),
-        ("INSERT INTO [dogs] ([name]) VALUES (?);", ["Cleopaws"]),
+        ("INSERT INTO [dogs] ([name]) VALUES (?)", ["Cleopaws"]),
         ("select name from sqlite_master where type = 'view'", None),
         (
             "CREATE VIRTUAL TABLE [dogs_fts] USING FTS5 (\n    [name],\n    content=[dogs]\n)",

From 2ab894dd73f0f14a30edcd643283824a04836570 Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Thu, 8 May 2025 19:15:46 -0700
Subject: [PATCH 03/11] Remove upsert() requires a pk checks

---
 sqlite_utils/db.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/sqlite_utils/db.py b/sqlite_utils/db.py
index b960790a8..2b43af617 100644
--- a/sqlite_utils/db.py
+++ b/sqlite_utils/db.py
@@ -3004,9 +3004,6 @@ def build_insert_queries_and_params(
         return a list of ``(sql, parameters)`` 2-tuples which, when executed in
         order, perform the desired INSERT / UPSERT / REPLACE operation.
         """
-        if (upsert or replace) and not pk:
-            raise PrimaryKeyRequired("UPSERT / REPLACE needs a primary key")
-
         if hash_id_columns and hash_id is None:
             hash_id = "id"
 
@@ -3319,8 +3316,6 @@ def insert_all(
         if hash_id_columns and hash_id is None:
             hash_id = "id"
 
-        if upsert and (not pk and not hash_id):
-            raise PrimaryKeyRequired("upsert() requires a pk")
         assert not (hash_id and pk), "Use either pk= or hash_id="
         if hash_id_columns and (hash_id is None):
             hash_id = "id"

From ac9f8208c7d24b92c983ea7d0f23545904a20e1d Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Thu, 8 May 2025 19:19:43 -0700
Subject: [PATCH 04/11] Put PrimaryKeyRequired back in the right place

---
 sqlite_utils/db.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sqlite_utils/db.py b/sqlite_utils/db.py
index 2b43af617..89648e880 100644
--- a/sqlite_utils/db.py
+++ b/sqlite_utils/db.py
@@ -3316,6 +3316,9 @@ def insert_all(
         if hash_id_columns and hash_id is None:
             hash_id = "id"
 
+        if upsert and (not pk and not hash_id):
+            raise PrimaryKeyRequired("upsert() requires a pk")
+
         assert not (hash_id and pk), "Use either pk= or hash_id="
         if hash_id_columns and (hash_id is None):
             hash_id = "id"

From 058a8dfa4cc8900dbd4cc31104ba3537bb1589ad Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Thu, 8 May 2025 19:51:41 -0700
Subject: [PATCH 05/11] Fixed remaining broken tests

---
 sqlite_utils/db.py | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/sqlite_utils/db.py b/sqlite_utils/db.py
index 89648e880..922f9fa76 100644
--- a/sqlite_utils/db.py
+++ b/sqlite_utils/db.py
@@ -3010,19 +3010,26 @@ def build_insert_queries_and_params(
         extracts = resolve_extracts(extracts)
 
         # Build a row-list ready for executemany-style flattening
-        values: list[list] = []
-        for record in chunk:
-            row_vals = []
-            for col in all_columns:
-                if col == hash_id:
-                    row_vals.append(hash_record(record, hash_id_columns))
-                    continue
+        values: List[list] = []
 
-                val = record.get(col)
-                if val is None and not_null and col in not_null:
-                    val = ""
-                row_vals.append(jsonify_if_needed(val))
-            values.append(row_vals)
+        for record in chunk:
+            record_values = []
+            for key in all_columns:
+                value = jsonify_if_needed(
+                    record.get(
+                        key,
+                        (
+                            None
+                            if key != hash_id
+                            else hash_record(record, hash_id_columns)
+                        ),
+                    )
+                )
+                if key in extracts:
+                    extract_table = extracts[key]
+                    value = self.db[extract_table].lookup({"value": value})
+                record_values.append(value)
+            values.append(record_values)
 
         columns_sql = ", ".join(f"[{c}]" for c in all_columns)
         placeholder_expr = ", ".join(conversions.get(c, "?") for c in all_columns)

From e9a6b4c176c28526de4db00c056ea52536c271df Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Thu, 8 May 2025 19:56:47 -0700
Subject: [PATCH 06/11] Fixed mypy complaints

---
 sqlite_utils/db.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sqlite_utils/db.py b/sqlite_utils/db.py
index 922f9fa76..4a54dfafd 100644
--- a/sqlite_utils/db.py
+++ b/sqlite_utils/db.py
@@ -3010,7 +3010,7 @@ def build_insert_queries_and_params(
         extracts = resolve_extracts(extracts)
 
         # Build a row-list ready for executemany-style flattening
-        values: List[list] = []
+        values = []
 
         for record in chunk:
             record_values = []
@@ -3085,7 +3085,7 @@ def build_insert_queries_and_params(
 
         # At this point we need compatibility UPSERT for SQLite < 3.24.0
         # (INSERT OR IGNORE + second UPDATE stage)
-        queries_and_params: list[tuple[str, list]] = []
+        queries_and_params = []
 
         insert_sql = (
             f"INSERT OR IGNORE INTO [{self.name}] "
@@ -3118,7 +3118,7 @@ def build_insert_queries_and_params(
         )
 
         # Parameters for the UPDATE – pk cols first then non-pk cols
-        update_params: list = []
+        update_params = []
         for row in values:
             row_dict = dict(zip(all_columns, row))
             ordered = [row_dict[c] for c in pk_cols + non_pk_cols]
@@ -3417,7 +3417,7 @@ def insert_all(
         # If we only handled a single row populate self.last_pk
         if num_records_processed == 1:
             # For an insert we need to use result.lastrowid
-            if not upsert:
+            if not upsert and result is not None:
                 self.last_rowid = result.lastrowid
                 if (hash_id or pk) and self.last_rowid:
                     # Set self.last_pk to the pk(s) for that rowid

From efeba1f18c5688c4dcf152da6e9fa0ec2d4bfb34 Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Thu, 8 May 2025 20:00:00 -0700
Subject: [PATCH 07/11] Prior to sqlite-utils 4.0

Refs https://github.com/simonw/sqlite-utils/issues/652#issuecomment-2864945717
---
 docs/python-api.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/python-api.rst b/docs/python-api.rst
index 6893e1c47..2e81fa3f1 100644
--- a/docs/python-api.rst
+++ b/docs/python-api.rst
@@ -932,7 +932,7 @@ An ``upsert_all()`` method is also available, which behaves like ``insert_all()`
 Alternative upserts using INSERT OR IGNORE
 ------------------------------------------
 
-Upserts use ``INSERT INTO ... ON CONFLICT SET``. Prior to ``sqlite-utils 3.x`` (TODO: fill in version) these used a sequence of ``INSERT OR IGNORE`` followed by an ``UPDATE``. This older method is still used for SQLite 3.23.1 and earlier. You can force the older implementation by passing ``use_old_upsert=True`` to the ``Database()`` constructor.
+Upserts use ``INSERT INTO ... ON CONFLICT SET``. Prior to ``sqlite-utils 4.0`` these used a sequence of ``INSERT OR IGNORE`` followed by an ``UPDATE``. This older method is still used for SQLite 3.23.1 and earlier. You can force the older implementation by passing ``use_old_upsert=True`` to the ``Database()`` constructor.
 
 .. _python_api_convert:
 

From 41497c3210fd90a0f95cebdec54b8909a5fc2cc2 Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Thu, 8 May 2025 20:10:17 -0700
Subject: [PATCH 08/11] Fix for use_old_upsert branch

Refs https://github.com/simonw/sqlite-utils/pull/653#issuecomment-2864951112
---
 sqlite_utils/db.py   | 81 +++++++++++++++++++++++---------------------
 tests/test_create.py |  8 +++--
 tests/test_upsert.py |  7 ++--
 3 files changed, 52 insertions(+), 44 deletions(-)

diff --git a/sqlite_utils/db.py b/sqlite_utils/db.py
index 4a54dfafd..9fc3d7403 100644
--- a/sqlite_utils/db.py
+++ b/sqlite_utils/db.py
@@ -3086,45 +3086,48 @@ def build_insert_queries_and_params(
         # At this point we need compatibility UPSERT for SQLite < 3.24.0
         # (INSERT OR IGNORE + second UPDATE stage)
         queries_and_params = []
-
-        insert_sql = (
-            f"INSERT OR IGNORE INTO [{self.name}] "
-            f"({columns_sql}) VALUES {row_placeholders_sql}"
-        )
-        queries_and_params.append((insert_sql, flat_params))
-
-        # If there is nothing to update we are done.
-        if not non_pk_cols:
-            return queries_and_params
-
-        # We can use UPDATE … FROM (VALUES …) on SQLite ≥ 3.33.0
-        # Older SQLite versions will run this as one UPDATE per row
-        # – which is what sqlite-utils did prior to this refactor.
-        alias_cols_sql = ", ".join(pk_cols + non_pk_cols)
-
-        assignments = []
-        for c in non_pk_cols:
-            if c in conversions:
-                assignments.append(f"[{c}] = {conversions[c].replace('?', f'v.[{c}]')}")
-            else:
-                assignments.append(f"[{c}] = v.[{c}]")
-        assignments_sql = ", ".join(assignments)
-
-        update_sql = (
-            f"UPDATE [{self.name}] AS m SET {assignments_sql} "
-            f"FROM (VALUES {row_placeholders_sql}) "
-            f"AS v({alias_cols_sql}) "
-            f"WHERE " + " AND ".join(f"m.[{c}] = v.[{c}]" for c in pk_cols)
-        )
-
-        # Parameters for the UPDATE – pk cols first then non-pk cols
-        update_params = []
-        for row in values:
-            row_dict = dict(zip(all_columns, row))
-            ordered = [row_dict[c] for c in pk_cols + non_pk_cols]
-            update_params.extend(ordered)
-
-        queries_and_params.append((update_sql, update_params))
+        if isinstance(pk, str):
+            pks = [pk]
+        else:
+            pks = pk
+        self.last_pk = None
+        for record_values in values:
+            record = dict(zip(all_columns, record_values))
+            placeholders = list(pks)
+            # Need to populate not-null columns too, or INSERT OR IGNORE ignores
+            # them since it ignores the resulting integrity errors
+            if not_null:
+                placeholders.extend(not_null)
+            sql = "INSERT OR IGNORE INTO [{table}]({cols}) VALUES({placeholders});".format(
+                table=self.name,
+                cols=", ".join(["[{}]".format(p) for p in placeholders]),
+                placeholders=", ".join(["?" for p in placeholders]),
+            )
+            queries_and_params.append(
+                (sql, [record[col] for col in pks] + ["" for _ in (not_null or [])])
+            )
+            # UPDATE [book] SET [name] = 'Programming' WHERE [id] = 1001;
+            set_cols = [col for col in all_columns if col not in pks]
+            if set_cols:
+                sql2 = "UPDATE [{table}] SET {pairs} WHERE {wheres}".format(
+                    table=self.name,
+                    pairs=", ".join(
+                        "[{}] = {}".format(col, conversions.get(col, "?"))
+                        for col in set_cols
+                    ),
+                    wheres=" AND ".join("[{}] = ?".format(pk) for pk in pks),
+                )
+                queries_and_params.append(
+                    (
+                        sql2,
+                        [record[col] for col in set_cols] + [record[pk] for pk in pks],
+                    )
+                )
+            # We can populate .last_pk right here
+            if num_records_processed == 1:
+                self.last_pk = tuple(record[pk] for pk in pks)
+                if len(self.last_pk) == 1:
+                    self.last_pk = self.last_pk[0]
         return queries_and_params
 
     def insert_chunk(
diff --git a/tests/test_create.py b/tests/test_create.py
index 7825198ac..a04c8b3d3 100644
--- a/tests/test_create.py
+++ b/tests/test_create.py
@@ -173,14 +173,16 @@ def test_create_table_from_example_with_compound_primary_keys(fresh_db):
 @pytest.mark.parametrize(
     "method_name", ("insert", "upsert", "insert_all", "upsert_all")
 )
-def test_create_table_with_custom_columns(fresh_db, method_name):
-    table = fresh_db["dogs"]
+@pytest.mark.parametrize("use_old_upsert", (False, True))
+def test_create_table_with_custom_columns(method_name, use_old_upsert):
+    db = Database(memory=True, use_old_upsert=use_old_upsert)
+    table = db["dogs"]
     method = getattr(table, method_name)
     record = {"id": 1, "name": "Cleo", "age": "5"}
     if method_name.endswith("_all"):
         record = [record]
     method(record, pk="id", columns={"age": int, "weight": float})
-    assert ["dogs"] == fresh_db.table_names()
+    assert ["dogs"] == db.table_names()
     expected_columns = [
         {"name": "id", "type": "INTEGER"},
         {"name": "name", "type": "TEXT"},
diff --git a/tests/test_upsert.py b/tests/test_upsert.py
index 8a34d4ebe..de0e362b5 100644
--- a/tests/test_upsert.py
+++ b/tests/test_upsert.py
@@ -1,9 +1,12 @@
 from sqlite_utils.db import PrimaryKeyRequired
+from sqlite_utils import Database
 import pytest
 
 
-def test_upsert(fresh_db):
-    table = fresh_db["table"]
+@pytest.mark.parametrize("use_old_upsert", (False, True))
+def test_upsert(use_old_upsert):
+    db = Database(memory=True, use_old_upsert=use_old_upsert)
+    table = db["table"]
     table.insert({"id": 1, "name": "Cleo"}, pk="id")
     table.upsert({"id": 1, "age": 5}, pk="id", alter=True)
     assert list(table.rows) == [{"id": 1, "name": "Cleo", "age": 5}]

From 494075584f42e9a1a9b99caf593e8ca881fd6bed Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Thu, 8 May 2025 20:15:07 -0700
Subject: [PATCH 09/11] Drop table in finally

Refs https://github.com/simonw/sqlite-utils/pull/653#issuecomment-2864963011
---
 sqlite_utils/db.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/sqlite_utils/db.py b/sqlite_utils/db.py
index 9fc3d7403..a471bbdf0 100644
--- a/sqlite_utils/db.py
+++ b/sqlite_utils/db.py
@@ -692,8 +692,8 @@ def supports_strict(self) -> bool:
     def supports_on_conflict(self) -> bool:
         # SQLite's upsert is implemented as INSERT INTO ... ON CONFLICT DO ...
         if not hasattr(self, "_supports_on_conflict"):
+            table_name = "t{}".format(secrets.token_hex(16))
             try:
-                table_name = "t{}".format(secrets.token_hex(16))
                 with self.conn:
                     self.conn.execute(
                         "create table {} (id integer primary key, name text)".format(
@@ -709,10 +709,11 @@ def supports_on_conflict(self) -> bool:
                             "on conflict do update set name = 'two'"
                         ).format(table_name)
                     )
-                    self.conn.execute("drop table {}".format(table_name))
-                self._supports_on_conflict = True
+                    self._supports_on_conflict = True
             except Exception:
                 self._supports_on_conflict = False
+            finally:
+                self.conn.execute("drop table {}".format(table_name))
         return self._supports_on_conflict
 
     @property

From fa6ae2b3328dc60966c28e7239a1cfb3aada84b7 Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Thu, 8 May 2025 20:16:56 -0700
Subject: [PATCH 10/11] Use drop table if exists

---
 sqlite_utils/db.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sqlite_utils/db.py b/sqlite_utils/db.py
index a471bbdf0..363b069d8 100644
--- a/sqlite_utils/db.py
+++ b/sqlite_utils/db.py
@@ -713,7 +713,7 @@ def supports_on_conflict(self) -> bool:
             except Exception:
                 self._supports_on_conflict = False
             finally:
-                self.conn.execute("drop table {}".format(table_name))
+                self.conn.execute("drop table if exists {}".format(table_name))
         return self._supports_on_conflict
 
     @property

From 79913af28a7a68eb98725df3343ccdcbe86a30fa Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Thu, 8 May 2025 20:20:27 -0700
Subject: [PATCH 11/11] Handle different SQLite version error messages

Refs https://github.com/simonw/sqlite-utils/pull/653#issuecomment-2864967364
---
 sqlite_utils/cli.py | 4 +++-
 tests/test_cli.py   | 6 ++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/sqlite_utils/cli.py b/sqlite_utils/cli.py
index 795e30de0..a19ee4306 100644
--- a/sqlite_utils/cli.py
+++ b/sqlite_utils/cli.py
@@ -1098,7 +1098,9 @@ def insert_upsert_implementation(
             if (
                 isinstance(e, OperationalError)
                 and e.args
-                and "has no column named" in e.args[0]
+                and (
+                    "has no column named" in e.args[0] or "no such column" in e.args[0]
+                )
             ):
                 raise click.ClickException(
                     "{}\n\nTry using --alter to add additional columns".format(
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 88763eecf..17e5ed6f6 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -1116,10 +1116,8 @@ def test_upsert_alter(db_path, tmpdir):
         cli.cli, ["upsert", db_path, "dogs", json_path, "--pk", "id"]
     )
     assert result.exit_code == 1
-    assert (
-        "Error: table dogs has no column named age\n\n"
-        "Try using --alter to add additional columns"
-    ) == result.output.strip()
+    # Could be one of two errors depending on SQLite version
+    assert ("Try using --alter to add additional columns") in result.output.strip()
     # Should succeed with --alter
     result = CliRunner().invoke(
         cli.cli, ["upsert", db_path, "dogs", json_path, "--pk", "id", "--alter"]