diff --git a/docs/changelog.rst b/docs/changelog.rst index cd694d619..321621496 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -10,6 +10,7 @@ Unreleased ---------- - The ``table.insert_all()`` and ``table.upsert_all()`` methods can now accept an iterator of lists or tuples as an alternative to dictionaries. The first item should be a list/tuple of column names. See :ref:`python_api_insert_lists` for details. (:issue:`672`) +- **Breaking change:** The default floating point column type has been changed from ``FLOAT`` to ``REAL``, which is the correct SQLite type for floating point values. This affects auto-detected columns when inserting data. (:issue:`645`) .. _v4_0a0: diff --git a/docs/cli-reference.rst b/docs/cli-reference.rst index 9e14c1abf..007fb11f6 100644 --- a/docs/cli-reference.rst +++ b/docs/cli-reference.rst @@ -457,8 +457,8 @@ See :ref:`cli_transform_table`. --rename column2 column_renamed Options: - --type ... Change column type to INTEGER, TEXT, FLOAT or - BLOB + --type ... Change column type to INTEGER, TEXT, FLOAT, + REAL or BLOB --drop TEXT Drop this column --rename ... Rename this column to X -o, --column-order TEXT Reorder columns @@ -1142,7 +1142,7 @@ See :ref:`cli_add_column`. :: Usage: sqlite-utils add-column [OPTIONS] PATH TABLE COL_NAME - [[integer|int|float|text|str|blob|bytes]] + [[integer|int|float|real|text|str|blob|bytes]] Add a column to the specified table diff --git a/docs/cli.rst b/docs/cli.rst index 88756ba93..39c2bf1e8 100644 --- a/docs/cli.rst +++ b/docs/cli.rst @@ -1247,7 +1247,7 @@ To stop inserting after a specified number of records - useful for getting a fas A progress bar is displayed when inserting data from a file. You can hide the progress bar using the ``--silent`` option. -By default every column inserted from a CSV or TSV file will be of type ``TEXT``. To automatically detect column types - resulting in a mix of ``TEXT``, ``INTEGER`` and ``FLOAT`` columns, use the ``--detect-types`` option (or its shortcut ``-d``). +By default every column inserted from a CSV or TSV file will be of type ``TEXT``. To automatically detect column types - resulting in a mix of ``TEXT``, ``INTEGER`` and ``REAL`` columns, use the ``--detect-types`` option (or its shortcut ``-d``). For example, given a ``creatures.csv`` file containing this: @@ -1274,7 +1274,7 @@ Will produce this schema: CREATE TABLE "creatures" ( "name" TEXT, "age" INTEGER, - "weight" FLOAT + "weight" REAL ); You can set the ``SQLITE_UTILS_DETECT_TYPES`` environment variable if you want ``--detect-types`` to be the default behavior: @@ -1589,7 +1589,7 @@ This will result in the following schema: CREATE TABLE "images" ( "path" TEXT PRIMARY KEY, "md5" TEXT, - "mtime" FLOAT + "mtime" REAL ); Note that there's no ``content`` column here at all - if you specify custom columns using ``-c`` you need to include ``-c content`` to create that column. @@ -1888,8 +1888,8 @@ The type of the returned values will be taken into account when creating the new CREATE TABLE "places" ( "location" TEXT, - "latitude" FLOAT, - "longitude" FLOAT + "latitude" REAL, + "longitude" REAL ); The code function can also return ``None``, in which case its output will be ignored. You can drop the original column at the end of the operation by adding ``--drop``. diff --git a/docs/python-api.rst b/docs/python-api.rst index e5a7f6e63..f9371d594 100644 --- a/docs/python-api.rst +++ b/docs/python-api.rst @@ -548,7 +548,7 @@ This will create a table with the following schema: "id" INTEGER PRIMARY KEY, "name" TEXT, "age" INTEGER, - "weight" FLOAT + "weight" REAL ) .. _python_api_explicit_create: @@ -1271,11 +1271,11 @@ You can specify the ``col_type`` argument either using a SQLite type as a string The ``col_type`` is optional - if you omit it the type of ``TEXT`` will be used. -SQLite types you can specify are ``"TEXT"``, ``"INTEGER"``, ``"FLOAT"`` or ``"BLOB"``. +SQLite types you can specify are ``"TEXT"``, ``"INTEGER"``, ``"FLOAT"``, ``"REAL"`` or ``"BLOB"``. If you pass a Python type, it will be mapped to SQLite types as shown here:: - float: "FLOAT" + float: "REAL" int: "INTEGER" bool: "INTEGER" str: "TEXT" @@ -1294,12 +1294,9 @@ If you pass a Python type, it will be mapped to SQLite types as shown here:: np.uint16: "INTEGER" np.uint32: "INTEGER" np.uint64: "INTEGER" - np.float16: "FLOAT" - np.float32: "FLOAT" - np.float64: "FLOAT" - -.. note:: - In sqlite-utils 3.x ``FLOAT`` is used for floating point columns when the correct column type is actually ``REAL``. If you specify ``strict=True`` tables created in strict mode will use the correct column type of ``REAL`` instead. We plan to change this behavior in ``sqlite-utils`` 4.x to always use ``REAL``, but this will represent a minor breaking change and so is being held for the next major release, see issue :issue:`645`. + np.float16: "REAL" + np.float32: "REAL" + np.float64: "REAL" You can also add a column that is a foreign key reference to another table using the ``fk`` parameter: diff --git a/sqlite_utils/cli.py b/sqlite_utils/cli.py index 81548bcd4..f4fc43b00 100644 --- a/sqlite_utils/cli.py +++ b/sqlite_utils/cli.py @@ -44,7 +44,7 @@ CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"]) -VALID_COLUMN_TYPES = ("INTEGER", "TEXT", "FLOAT", "BLOB") +VALID_COLUMN_TYPES = ("INTEGER", "TEXT", "FLOAT", "REAL", "BLOB") UNICODE_ERROR = """ {} @@ -419,7 +419,7 @@ def dump(path, load_extension): @click.argument( "col_type", type=click.Choice( - ["integer", "int", "float", "text", "str", "blob", "bytes"], + ["integer", "int", "float", "real", "text", "str", "blob", "bytes"], case_sensitive=False, ), required=False, @@ -2425,10 +2425,12 @@ def schema( "--type", type=( str, - click.Choice(["INTEGER", "TEXT", "FLOAT", "BLOB"], case_sensitive=False), + click.Choice( + ["INTEGER", "TEXT", "FLOAT", "REAL", "BLOB"], case_sensitive=False + ), ), multiple=True, - help="Change column type to INTEGER, TEXT, FLOAT or BLOB", + help="Change column type to INTEGER, TEXT, FLOAT, REAL or BLOB", ) @click.option("--drop", type=str, multiple=True, help="Drop this column") @click.option( diff --git a/sqlite_utils/db.py b/sqlite_utils/db.py index b1b6088e9..c057eabe4 100644 --- a/sqlite_utils/db.py +++ b/sqlite_utils/db.py @@ -189,7 +189,7 @@ class Default: DEFAULT = Default() COLUMN_TYPE_MAPPING = { - float: "FLOAT", + float: "REAL", int: "INTEGER", bool: "INTEGER", str: "TEXT", @@ -203,19 +203,21 @@ class Default: datetime.date: "TEXT", datetime.time: "TEXT", datetime.timedelta: "TEXT", - decimal.Decimal: "FLOAT", + decimal.Decimal: "REAL", None.__class__: "TEXT", uuid.UUID: "TEXT", # SQLite explicit types "TEXT": "TEXT", "INTEGER": "INTEGER", "FLOAT": "FLOAT", + "REAL": "REAL", "BLOB": "BLOB", "text": "TEXT", "str": "TEXT", "integer": "INTEGER", "int": "INTEGER", - "float": "FLOAT", + "float": "REAL", + "real": "REAL", "blob": "BLOB", "bytes": "BLOB", } @@ -232,9 +234,9 @@ class Default: np.uint16: "INTEGER", np.uint32: "INTEGER", np.uint64: "INTEGER", - np.float16: "FLOAT", - np.float32: "FLOAT", - np.float64: "FLOAT", + np.float16: "REAL", + np.float32: "REAL", + np.float64: "REAL", } ) except AttributeError: diff --git a/tests/test_cli.py b/tests/test_cli.py index bcf3e9832..0b7a7def9 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -284,7 +284,7 @@ def test_create_index_desc(db_path): "int", 'CREATE TABLE "dogs" (\n "name" TEXT\n, "integer" INTEGER)', ), - ("float", "FLOAT", 'CREATE TABLE "dogs" (\n "name" TEXT\n, "float" FLOAT)'), + ("float", "FLOAT", 'CREATE TABLE "dogs" (\n "name" TEXT\n, "float" REAL)'), ("blob", "blob", 'CREATE TABLE "dogs" (\n "name" TEXT\n, "blob" BLOB)'), ("blob", "BLOB", 'CREATE TABLE "dogs" (\n "name" TEXT\n, "blob" BLOB)'), ("blob", "bytes", 'CREATE TABLE "dogs" (\n "name" TEXT\n, "blob" BLOB)'), @@ -2240,6 +2240,28 @@ def test_upsert_detect_types(tmpdir, option): ] +def test_csv_detect_types_creates_real_columns(tmpdir): + """Test that CSV import with --detect-types creates REAL columns for floats""" + db_path = str(tmpdir / "test.db") + data = "name,age,weight\nCleo,6,45.5\nDori,1,3.5" + result = CliRunner().invoke( + cli.cli, + ["insert", db_path, "creatures", "-", "--csv", "--detect-types"], + catch_exceptions=False, + input=data, + ) + assert result.exit_code == 0 + db = Database(db_path) + # Check that the schema uses REAL for the weight column + assert db["creatures"].schema == ( + 'CREATE TABLE "creatures" (\n' + ' "name" TEXT,\n' + ' "age" INTEGER,\n' + ' "weight" REAL\n' + ")" + ) + + def test_integer_overflow_error(tmpdir): db_path = str(tmpdir / "test.db") result = CliRunner().invoke( diff --git a/tests/test_cli_convert.py b/tests/test_cli_convert.py index 45e6b7bf7..4a4d3b181 100644 --- a/tests/test_cli_convert.py +++ b/tests/test_cli_convert.py @@ -408,7 +408,7 @@ def test_convert_multi_complex_column_types(fresh_db_and_path): assert db["rows"].schema == ( 'CREATE TABLE "rows" (\n' ' "id" INTEGER PRIMARY KEY\n' - ', "is_str" TEXT, "is_float" FLOAT, "is_int" INTEGER, "is_bytes" BLOB)' + ', "is_str" TEXT, "is_float" REAL, "is_int" INTEGER, "is_bytes" BLOB)' ) diff --git a/tests/test_create.py b/tests/test_create.py index 25015e774..ea09c4595 100644 --- a/tests/test_create.py +++ b/tests/test_create.py @@ -43,7 +43,7 @@ def test_create_table(fresh_db): assert ["test_table"] == fresh_db.table_names() assert [ {"name": "text_col", "type": "TEXT"}, - {"name": "float_col", "type": "FLOAT"}, + {"name": "float_col", "type": "REAL"}, {"name": "int_col", "type": "INTEGER"}, {"name": "bool_col", "type": "INTEGER"}, {"name": "bytes_col", "type": "BLOB"}, @@ -52,7 +52,7 @@ def test_create_table(fresh_db): assert ( 'CREATE TABLE "test_table" (\n' ' "text_col" TEXT,\n' - ' "float_col" FLOAT,\n' + ' "float_col" REAL,\n' ' "int_col" INTEGER,\n' ' "bool_col" INTEGER,\n' ' "bytes_col" BLOB,\n' @@ -143,7 +143,7 @@ def test_create_table_with_not_null(fresh_db): [{"name": "create", "type": "TEXT"}, {"name": "table", "type": "TEXT"}], ), ({"day": datetime.time(11, 0)}, [{"name": "day", "type": "TEXT"}]), - ({"decimal": decimal.Decimal("1.2")}, [{"name": "decimal", "type": "FLOAT"}]), + ({"decimal": decimal.Decimal("1.2")}, [{"name": "decimal", "type": "REAL"}]), ( {"memoryview": memoryview(b"hello")}, [{"name": "memoryview", "type": "BLOB"}], @@ -193,7 +193,7 @@ def test_create_table_with_custom_columns(method_name, use_old_upsert): {"name": "id", "type": "INTEGER"}, {"name": "name", "type": "TEXT"}, {"name": "age", "type": "INTEGER"}, - {"name": "weight", "type": "FLOAT"}, + {"name": "weight", "type": "REAL"}, ] assert expected_columns == [ {"name": col.name, "type": col.type} for col in table.columns @@ -357,7 +357,7 @@ def test_create_error_if_invalid_self_referential_foreign_keys(fresh_db): "weight", float, None, - 'CREATE TABLE "dogs" (\n "name" TEXT\n, "weight" FLOAT)', + 'CREATE TABLE "dogs" (\n "name" TEXT\n, "weight" REAL)', ), ("text", "TEXT", None, 'CREATE TABLE "dogs" (\n "name" TEXT\n, "text" TEXT)'), ( @@ -561,7 +561,7 @@ def test_index_foreign_keys_if_index_name_is_already_used(fresh_db): ), ( {"hats": 5, "rating": 3.5}, - [{"name": "hats", "type": "INTEGER"}, {"name": "rating", "type": "FLOAT"}], + [{"name": "hats", "type": "INTEGER"}, {"name": "rating", "type": "REAL"}], ), ], ) @@ -1197,7 +1197,7 @@ def test_create(fresh_db): assert fresh_db["t"].schema == ( 'CREATE TABLE "t" (\n' ' "id" INTEGER PRIMARY KEY,\n' - ' "float" FLOAT NOT NULL,\n' + ' "float" REAL NOT NULL,\n' ' "text" TEXT,\n' ' "integer" INTEGER NOT NULL DEFAULT 0,\n' ' "bytes" BLOB\n' @@ -1360,7 +1360,7 @@ def test_insert_upsert_strict(fresh_db, method_name, strict): def test_create_table_strict(fresh_db, strict): table = fresh_db.create_table("t", {"id": int, "f": float}, strict=strict) assert table.strict == strict or not fresh_db.supports_strict - expected_schema = 'CREATE TABLE "t" (\n' ' "id" INTEGER,\n' ' "f" FLOAT\n' ")" + expected_schema = 'CREATE TABLE "t" (\n' ' "id" INTEGER,\n' ' "f" REAL\n' ")" if strict and not fresh_db.supports_strict: return if strict: