From 45e330c6d811f8368d25eface80bf5fa3217d114 Mon Sep 17 00:00:00 2001 From: Jochen Topf Date: Sat, 29 Nov 2025 12:05:57 +0100 Subject: [PATCH] Introduce per geometry and overall limits on number of expire tiles The number of tiles to be expired can be quite large if the input geometries are large or if there are many geometries. Numbers of tiles in the billions can crash osm2pgsql because it runs out of memory. Such large numbers can also overwhelm any kind of re-rendering mechanism run after osm2pgsql to bring tiles up to date. In day-to-day processing this should not happen, but it can happen due to vandalism or misconfiguration. To protect against this problem, this change introduces limits on the number of tiles that can be affected by a single geometry and the overall number of tiles that an expire output will generate for each run of osm2pgsql. * If a single geometry would result in the expire of more than `max_tiles_geometry` this geometry will be ignored for the purposes of expiry. Note that the geometry will still be written to the database, but no tiles will be added to the expire output. * If the number of tiles generated during a single run of osm2pgsql for an expire output grows beyond `max_tiles_overall`, no further tiles will be written to this output. Limits are per expire output of which you can have several. The limits can be set in the flex expire output configuration but sensible defaults are provided. For the (legacy) expire output configured on the command line with the `-e` and `-o` options, the settings can not be changed, you will always get the default values. To choose the default values for these settings I looked at real-world values as follows: * Russia has one of the largest boundaries in the planet. Expiry (boundary only) on zoom level 14 affects 94144 tiles, on z15 190168 tiles, on z16 383465 tiles. For typical raster tiles using 8x8 meta tiles expiry on z16 is equivalent to showing z19 tiles. So 500,000 tiles seems to be a useful limit for `max_tiles_geometry`. * For expiring the area I looked at the Greenland icesheet, which needs more than 8 million tiles on z14. At least for vector tiles this is good enough, for raster tiles we might need more though. * For `max_tiles_overall`: Paul Norman analyzed the number of tiles expired by typical minutely updates in https://www.openstreetmap.org/user/pnorman/diary/403266. For zoom level 14 the most he got was 119801 tiles. The same analysis also shows that for longer time frames (checked were 2 minutes and 5 minutes, but the same should be true for larger intervals) the number of tiles doesn't go up because these huge numbers only happen very rarely. Rounding these numbers and adding a safety factor, values of 10,000,000 and 50,000,000 seem reasonable for the single geometry and the overall number of tiles per run. Memory use in osm2pgsql is about 32 bytes per tile, so this will need 1.6 GB max which should be no problem at all. The numbers are chosen so they will practically never be triggered so that users upgrading from existing versions of osm2pgsql will not be suddenly affected. It is recommended that users tune their settings according to their own needs. Once we have some more operational experience with this, we can adjust the defaults. I considered using different default max values for different zoom levels, but this will make configuration more complicated. Change file processing in osm2pgsql runs in parallel threads. The old code stored the to-be-expired tiles in one list per thread and merged them later. This has two problems: a) because the lists might contain some of the same tiles, all lists together can use a much larger amount than a single list would take b) we can not easily check the number of tiles in those lists against the configured maximum. So this commit changes the way the list is kept: We only keep a single list in the expire_output_t and use a mutex to control access to this list. (There might still be overlapping lists if you have more than one expire output, but that's by design.) Objects of expire_tiles_t class now only keep a temporary list for each geometry added. Once all tiles affected by a single geometry are identified, this list is added to the overall list in expire_output_t and the temporary list is cleared. Fixes #2190 --- src/expire-output.cpp | 59 ++++++++- src/expire-output.hpp | 73 ++++++++++- src/expire-tiles.cpp | 46 +++---- src/expire-tiles.hpp | 19 +-- src/flex-lua-expire-output.cpp | 42 +++++- src/flex-lua-expire-output.hpp | 2 + src/flex-table-column.cpp | 13 +- src/flex-table-column.hpp | 3 +- src/flex-write.cpp | 7 +- src/flex-write.hpp | 3 +- src/lua-utils.cpp | 24 ++++ src/lua-utils.hpp | 5 + src/osm2pgsql-expire.cpp | 41 ++++-- src/osmdata.cpp | 12 -- src/output-flex.cpp | 27 ++-- src/output-flex.hpp | 2 - src/output-pgsql.cpp | 25 ++-- src/output-pgsql.hpp | 3 +- src/output.hpp | 2 - tests/bdd/flex/expire-limit.feature | 98 ++++++++++++++ .../lua-expire-output-definitions.feature | 34 +++++ tests/data/test_expire_limit.lua | 20 +++ tests/test-expire-tiles.cpp | 121 +----------------- 23 files changed, 443 insertions(+), 238 deletions(-) create mode 100644 tests/bdd/flex/expire-limit.feature create mode 100644 tests/data/test_expire_limit.lua diff --git a/src/expire-output.cpp b/src/expire-output.cpp index 737e059f3..7babda0e1 100644 --- a/src/expire-output.cpp +++ b/src/expire-output.cpp @@ -17,16 +17,67 @@ #include #include +void expire_output_t::add_tiles( + std::unordered_set const &dirty_tiles) +{ + std::lock_guard const guard{*m_tiles_mutex}; + + if (m_overall_tile_limit_reached) { + return; + } + + if (dirty_tiles.size() > m_max_tiles_geometry) { + log_warn("Tile limit {} reached for single geometry!", + m_max_tiles_geometry); + return; + } + + /** + * This check is not quite correct, because some tiles could be in both, + * the dirty_list and in m_tiles, which means we might not reach + * m_max_tiles_overall if we join those in. But this check is much + * easier and cheaper than trying to add all the tiles into the dirty_list, + * checking each time whether we reached the limit. And with the number + * of tiles involved in doesn't matter that much anyway. + */ + if (dirty_tiles.size() + m_tiles.size() > m_max_tiles_overall) { + m_overall_tile_limit_reached = true; + log_warn("Overall tile limit {} reached for this run!", + m_max_tiles_overall); + return; + } + + m_tiles.insert(dirty_tiles.cbegin(), dirty_tiles.cend()); +} + +bool expire_output_t::empty() noexcept +{ + std::lock_guard const guard{*m_tiles_mutex}; + return m_tiles.empty(); +} + +quadkey_list_t expire_output_t::get_tiles() +{ + std::lock_guard const guard{*m_tiles_mutex}; + quadkey_list_t tile_list; + + tile_list.reserve(m_tiles.size()); + tile_list.assign(m_tiles.cbegin(), m_tiles.cend()); + std::sort(tile_list.begin(), tile_list.end()); + m_tiles.clear(); + + return tile_list; +} + std::size_t -expire_output_t::output(quadkey_list_t const &tile_list, - connection_params_t const &connection_params) const +expire_output_t::output(connection_params_t const &connection_params) { std::size_t num = 0; if (!m_filename.empty()) { - num = output_tiles_to_file(tile_list); + num = output_tiles_to_file(get_tiles()); } if (!m_table.empty()) { - num = output_tiles_to_table(tile_list, connection_params); + num = output_tiles_to_table(get_tiles(), connection_params); } return num; } diff --git a/src/expire-output.hpp b/src/expire-output.hpp index 59de5f3fb..312ed7de4 100644 --- a/src/expire-output.hpp +++ b/src/expire-output.hpp @@ -15,9 +15,15 @@ #include #include #include +#include +#include #include +#include #include +constexpr std::size_t DEFAULT_MAX_TILES_GEOMETRY = 10'000'000; +constexpr std::size_t DEFAULT_MAX_TILES_OVERALL = 50'000'000; + class pg_conn_t; class connection_params_t; @@ -53,9 +59,45 @@ class expire_output_t uint32_t maxzoom() const noexcept { return m_maxzoom; } void set_maxzoom(uint32_t maxzoom) noexcept { m_maxzoom = maxzoom; } - std::size_t output(quadkey_list_t const &tile_list, - connection_params_t const &connection_params) const; + std::size_t max_tiles_geometry() const noexcept + { + return m_max_tiles_geometry; + } + + void set_max_tiles_geometry(std::size_t max_tiles_geometry) noexcept + { + m_max_tiles_geometry = max_tiles_geometry; + } + + std::size_t max_tiles_overall() const noexcept + { + return m_max_tiles_overall; + } + + void set_max_tiles_overall(std::size_t max_tiles_overall) noexcept + { + m_max_tiles_overall = max_tiles_overall; + } + + bool empty() noexcept; + + void add_tiles(std::unordered_set const &dirty_tiles); + + quadkey_list_t get_tiles(); + + /** + * Write the list of tiles to a database table or file. + * + * \param connection_params Database connection parameters + */ + std::size_t output(connection_params_t const &connection_params); + + /** + * Create table for tiles. + */ + void create_output_table(pg_conn_t const &db_connection) const; +private: /** * Write the list of tiles to a file. * @@ -75,11 +117,16 @@ class expire_output_t connection_params_t const &connection_params) const; /** - * Create table for tiles. + * Access to the m_tiles collection of expired tiles must go through + * this mutex, because it can happend from several threads at the same + * time. Mutex is wrapped in a shared_ptr to make this class movable so + * we can store instances in std::vector. */ - void create_output_table(pg_conn_t const &db_connection) const; + std::shared_ptr m_tiles_mutex = std::make_shared(); + + /// This is where we collect all the expired tiles. + std::unordered_set m_tiles; -private: /// The filename (if any) for output std::string m_filename; @@ -95,6 +142,22 @@ class expire_output_t /// Zoom level we capture tiles on uint32_t m_maxzoom = 0; + /** + * The following two settings are for protecting osm2pgsql from overload as + * well as downstream tile expiry mechanisms in case of large changes to + * OSM data (possibly from vandalism). They should be large enough to not + * trigger in normal use. + */ + + /// Maximum number of tiles that can be affected by a single geometry. + std::size_t m_max_tiles_geometry = DEFAULT_MAX_TILES_GEOMETRY; + + /// Maximum number of tiles that can be affected per run. + std::size_t m_max_tiles_overall = DEFAULT_MAX_TILES_OVERALL; + + /// Has the overall tile limit been reached already. + bool m_overall_tile_limit_reached = false; + }; // class expire_output_t #endif // OSM2PGSQL_EXPIRE_OUTPUT_HPP diff --git a/src/expire-tiles.cpp b/src/expire-tiles.cpp index 0686804ea..076148571 100644 --- a/src/expire-tiles.cpp +++ b/src/expire-tiles.cpp @@ -27,20 +27,30 @@ #include "wkb.hpp" expire_tiles_t::expire_tiles_t(uint32_t max_zoom, - std::shared_ptr projection) -: m_projection(std::move(projection)), m_maxzoom(max_zoom), - m_map_width(static_cast(1U << m_maxzoom)) -{} + std::shared_ptr projection, + std::size_t max_tiles_geometry) +: m_projection(std::move(projection)), m_max_tiles_geometry(max_tiles_geometry), + m_maxzoom(max_zoom), m_map_width(static_cast(1U << m_maxzoom)) +{ +} void expire_tiles_t::expire_tile(uint32_t x, uint32_t y) { - // Only try to insert to tile into the set if the last inserted tile - // is different from this tile. + if (m_dirty_tiles.size() > m_max_tiles_geometry) { + return; + } + tile_t const new_tile{m_maxzoom, x, y}; - if (!m_prev_tile.valid() || m_prev_tile != new_tile) { - m_dirty_tiles.insert(new_tile.quadkey()); - m_prev_tile = new_tile; + m_dirty_tiles.insert(new_tile.quadkey()); +} + +void expire_tiles_t::commit_tiles(expire_output_t *expire_output) +{ + if (!expire_output || m_dirty_tiles.empty()) { + return; } + expire_output->add_tiles(m_dirty_tiles); + m_dirty_tiles.clear(); } uint32_t expire_tiles_t::normalise_tile_x_coord(int x) const @@ -281,24 +291,6 @@ quadkey_list_t expire_tiles_t::get_tiles() return tiles; } -void expire_tiles_t::merge_and_destroy(expire_tiles_t *other) -{ - if (m_map_width != other->m_map_width) { - throw fmt_error("Unable to merge tile expiry sets when " - "map_width does not match: {} != {}.", - m_map_width, other->m_map_width); - } - - if (m_dirty_tiles.empty()) { - using std::swap; - swap(m_dirty_tiles, other->m_dirty_tiles); - } else { - m_dirty_tiles.insert(other->m_dirty_tiles.cbegin(), - other->m_dirty_tiles.cend()); - other->m_dirty_tiles.clear(); - } -} - int expire_from_result(expire_tiles_t *expire, pg_result_t const &result, expire_config_t const &expire_config) { diff --git a/src/expire-tiles.hpp b/src/expire-tiles.hpp index a372d3759..73ee1add8 100644 --- a/src/expire-tiles.hpp +++ b/src/expire-tiles.hpp @@ -18,6 +18,7 @@ #include #include "expire-config.hpp" +#include "expire-output.hpp" #include "geom.hpp" #include "geom-box.hpp" #include "logging.hpp" @@ -31,9 +32,8 @@ class expire_tiles_t { public: expire_tiles_t(uint32_t max_zoom, - std::shared_ptr projection); - - bool empty() const noexcept { return m_dirty_tiles.empty(); } + std::shared_ptr projection, + std::size_t max_tiles_geometry = DEFAULT_MAX_TILES_GEOMETRY); bool enabled() const noexcept { return m_maxzoom != 0; } @@ -83,10 +83,13 @@ class expire_tiles_t quadkey_list_t get_tiles(); /** - * Merge the list of expired tiles in the other object into this - * object, destroying the list in the other object. + * Must be called after calling expire_tile() one or more times for a + * single geometry to "commit" all tiles to be expired for that geometry. + * + * \param expire_output The expire output to write tiles to. If this is + * the nullptr, nothing is done. */ - void merge_and_destroy(expire_tiles_t *other); + void commit_tiles(expire_output_t* expire_output); private: /** @@ -113,11 +116,9 @@ class expire_tiles_t /// This is where we collect all the expired tiles. std::unordered_set m_dirty_tiles; - /// The tile which has been added last to the unordered set. - tile_t m_prev_tile; - std::shared_ptr m_projection; + std::size_t m_max_tiles_geometry; uint32_t m_maxzoom; int m_map_width; diff --git a/src/flex-lua-expire-output.cpp b/src/flex-lua-expire-output.cpp index 8248d30d6..172f29e8b 100644 --- a/src/flex-lua-expire-output.cpp +++ b/src/flex-lua-expire-output.cpp @@ -62,6 +62,26 @@ create_expire_output(lua_State *lua_state, std::string const &default_schema, } lua_pop(lua_state, 1); // "minzoom" + // optional "max_tiles_geometry" field + auto const max_tiles_geometry = luaX_get_table_optional_uint64( + lua_state, "max_tiles_geometry", -1, + "The 'max_tiles_geometry' field in a expire output", 1, (4ULL << 20ULL), + "1 and 4 << 20"); + if (max_tiles_geometry > 0) { + new_expire_output.set_max_tiles_geometry(max_tiles_geometry); + } + lua_pop(lua_state, 1); // "max_tiles_geometry" + + // optional "max_tiles_overall" field + auto const max_tiles_overall = luaX_get_table_optional_uint64( + lua_state, "max_tiles_overall", -1, + "The 'max_tiles_overall' field in a expire output", 1, (4ULL << 20ULL), + "1 and 4 << 20"); + if (max_tiles_overall > 0) { + new_expire_output.set_max_tiles_overall(max_tiles_overall); + } + lua_pop(lua_state, 1); // "max_tiles_overall" + return new_expire_output; } @@ -71,6 +91,8 @@ TRAMPOLINE_WRAPPED_OBJECT(expire_output, maxzoom) TRAMPOLINE_WRAPPED_OBJECT(expire_output, minzoom) TRAMPOLINE_WRAPPED_OBJECT(expire_output, schema) TRAMPOLINE_WRAPPED_OBJECT(expire_output, table) +TRAMPOLINE_WRAPPED_OBJECT(expire_output, max_tiles_geometry) +TRAMPOLINE_WRAPPED_OBJECT(expire_output, max_tiles_overall) } // anonymous namespace @@ -106,7 +128,11 @@ void lua_wrapper_expire_output_t::init(lua_State *lua_state) {"maxzoom", lua_trampoline_expire_output_maxzoom}, {"minzoom", lua_trampoline_expire_output_minzoom}, {"schema", lua_trampoline_expire_output_schema}, - {"table", lua_trampoline_expire_output_table}}); + {"table", lua_trampoline_expire_output_table}, + {"max_tiles_geometry", + lua_trampoline_expire_output_max_tiles_geometry}, + {"max_tiles_overall", + lua_trampoline_expire_output_max_tiles_overall}}); } int lua_wrapper_expire_output_t::tostring() const @@ -150,3 +176,17 @@ int lua_wrapper_expire_output_t::table() const noexcept luaX_pushstring(lua_state(), self().table()); return 1; } + +int lua_wrapper_expire_output_t::max_tiles_geometry() const noexcept +{ + lua_pushinteger(lua_state(), + static_cast(self().max_tiles_geometry())); + return 1; +} + +int lua_wrapper_expire_output_t::max_tiles_overall() const noexcept +{ + lua_pushinteger(lua_state(), + static_cast(self().max_tiles_overall())); + return 1; +} diff --git a/src/flex-lua-expire-output.hpp b/src/flex-lua-expire-output.hpp index 9e71d6471..437023ea8 100644 --- a/src/flex-lua-expire-output.hpp +++ b/src/flex-lua-expire-output.hpp @@ -42,6 +42,8 @@ class lua_wrapper_expire_output_t : public lua_wrapper_base_t int minzoom() const noexcept; int schema() const noexcept; int table() const noexcept; + int max_tiles_geometry() const noexcept; + int max_tiles_overall() const noexcept; }; // class lua_wrapper_expire_output_t diff --git a/src/flex-table-column.cpp b/src/flex-table-column.cpp index 121fd485e..4ba1ac273 100644 --- a/src/flex-table-column.cpp +++ b/src/flex-table-column.cpp @@ -202,13 +202,18 @@ void flex_table_column_t::add_expire(expire_config_t const &config) m_expires.push_back(config); } -void flex_table_column_t::do_expire(geom::geometry_t const &geom, - std::vector *expire) const +void flex_table_column_t::do_expire( + geom::geometry_t const &geom, std::vector *expire, + std::vector *expire_outputs) const { assert(expire); + assert(expire_outputs); + for (auto const &expire_config : m_expires) { assert(expire_config.expire_output < expire->size()); - (*expire)[expire_config.expire_output].from_geometry(geom, - expire_config); + auto &expire_tiles = expire->at(expire_config.expire_output); + expire_tiles.from_geometry(geom, expire_config); + expire_tiles.commit_tiles( + &expire_outputs->at(expire_config.expire_output)); } } diff --git a/src/flex-table-column.hpp b/src/flex-table-column.hpp index 213bd9726..966e73d2e 100644 --- a/src/flex-table-column.hpp +++ b/src/flex-table-column.hpp @@ -132,7 +132,8 @@ class flex_table_column_t } void do_expire(geom::geometry_t const &geom, - std::vector *expire) const; + std::vector *expire, + std::vector *expire_outputs) const; private: std::vector m_expires; diff --git a/src/flex-write.cpp b/src/flex-write.cpp index 688b66b76..b023cb5fd 100644 --- a/src/flex-write.cpp +++ b/src/flex-write.cpp @@ -258,7 +258,8 @@ bool is_compatible(geom::geometry_t const &geom, void flex_write_column(lua_State *lua_state, db_copy_mgr_t *copy_mgr, flex_table_column_t const &column, - std::vector *expire) + std::vector *expire, + std::vector *expire_outputs) { lua_getfield(lua_state, -1, column.name().c_str()); int const ltype = lua_type(lua_state, -1); @@ -423,12 +424,12 @@ void flex_write_column(lua_State *lua_state, type == table_column_type::multilinestring || type == table_column_type::multipolygon); if (geom->srid() == column.srid()) { - column.do_expire(*geom, expire); + column.do_expire(*geom, expire, expire_outputs); copy_mgr->add_hex_geom(geom_to_ewkb(*geom, wrap_multi)); } else { auto const &proj = get_projection(column.srid()); auto const tgeom = geom::transform(*geom, proj); - column.do_expire(tgeom, expire); + column.do_expire(tgeom, expire, expire_outputs); copy_mgr->add_hex_geom(geom_to_ewkb(tgeom, wrap_multi)); } } else { diff --git a/src/flex-write.hpp b/src/flex-write.hpp index 6150aace8..bef1b45c0 100644 --- a/src/flex-write.hpp +++ b/src/flex-write.hpp @@ -37,6 +37,7 @@ class not_null_exception_t : public std::runtime_error void flex_write_column(lua_State *lua_state, db_copy_mgr_t *copy_mgr, flex_table_column_t const &column, - std::vector *expire); + std::vector *expire, + std::vector *expire_outputs); #endif // OSM2PGSQL_FLEX_WRITE_HPP diff --git a/src/lua-utils.cpp b/src/lua-utils.cpp index 4734dd4a7..a50ea9f8d 100644 --- a/src/lua-utils.cpp +++ b/src/lua-utils.cpp @@ -217,6 +217,30 @@ uint32_t luaX_get_table_optional_uint32(lua_State *lua_state, char const *key, return static_cast(num); } +uint64_t luaX_get_table_optional_uint64(lua_State *lua_state, char const *key, + int table_index, char const *error_msg, + uint64_t min, uint64_t max, + char const *range) +{ + assert(lua_state); + assert(key); + assert(error_msg); + lua_getfield(lua_state, table_index, key); + if (lua_isnil(lua_state, -1)) { + return 0; + } + if (!lua_isnumber(lua_state, -1)) { + throw fmt_error("{} must contain an integer.", error_msg); + } + + auto const num = lua_tonumber(lua_state, -1); + if (num < static_cast(min) || num > static_cast(max)) { + throw fmt_error("{} must be between {}.", error_msg, range); + } + + return static_cast(num); +} + // Lua 5.1 doesn't support luaL_traceback, unless LuaJIT is used #if LUA_VERSION_NUM < 502 && !defined(HAVE_LUAJIT) diff --git a/src/lua-utils.hpp b/src/lua-utils.hpp index 8030935b5..979481026 100644 --- a/src/lua-utils.hpp +++ b/src/lua-utils.hpp @@ -70,6 +70,11 @@ uint32_t luaX_get_table_optional_uint32(lua_State *lua_state, char const *key, uint32_t min, uint32_t max, char const *range); +uint64_t luaX_get_table_optional_uint64(lua_State *lua_state, char const *key, + int table_index, char const *error_msg, + uint64_t min, uint64_t max, + char const *range); + bool luaX_get_table_bool(lua_State *lua_state, char const *key, int table_index, char const *error_msg, bool default_value); diff --git a/src/osm2pgsql-expire.cpp b/src/osm2pgsql-expire.cpp index 36f9a4163..1859cbed7 100644 --- a/src/osm2pgsql-expire.cpp +++ b/src/osm2pgsql-expire.cpp @@ -83,14 +83,14 @@ class output_expire_t : public output_t void way_delete(osmium::Way * /*way*/) override {} void relation_delete(osmium::Relation const & /*rel*/) override {} - void merge_expire_trees(output_t * /*other*/) override {} - void print(std::string const &format); private: + void create_expire_tiles(geom::geometry_t const &geom); + config_t m_config; - expire_tiles_t m_expire_tiles; expire_output_t m_expire_output; + expire_tiles_t m_expire_tiles; }; // class output_expire_t std::shared_ptr output_expire_t::clone( @@ -106,20 +106,26 @@ output_expire_t::output_expire_t(std::shared_ptr const &mid, : output_t(mid, std::move(thread_pool), options), m_config(cfg), m_expire_tiles(cfg.zoom, cfg.projection) { + m_expire_output.set_minzoom(cfg.zoom); + m_expire_output.set_maxzoom(cfg.zoom); } output_expire_t::~output_expire_t() = default; +void output_expire_t::create_expire_tiles(geom::geometry_t const &geom) +{ + auto const geom_merc = geom::transform(geom, *m_config.projection); + m_expire_tiles.from_geometry(geom_merc, m_config.expire_config); + m_expire_tiles.commit_tiles(&m_expire_output); +} + void output_expire_t::node_add(osmium::Node const &node) { if (node.tags().empty()) { return; } - auto const geom_merc = - geom::transform(geom::create_point(node), *m_config.projection); - - m_expire_tiles.from_geometry(geom_merc, m_config.expire_config); + create_expire_tiles(geom::create_point(node)); } void output_expire_t::way_add(osmium::Way *way) @@ -152,9 +158,7 @@ void output_expire_t::way_add(osmium::Way *way) return; } - auto const geom_merc = geom::transform(geom, *m_config.projection); - - m_expire_tiles.from_geometry(geom_merc, m_config.expire_config); + create_expire_tiles(geom); } void output_expire_t::relation_add(osmium::Relation const &relation) @@ -207,9 +211,7 @@ void output_expire_t::relation_add(osmium::Relation const &relation) return; } - auto const geom_merc = geom::transform(geom, *m_config.projection); - - m_expire_tiles.from_geometry(geom_merc, m_config.expire_config); + create_expire_tiles(geom); } std::string tile_to_json(tile_t const &tile) @@ -251,6 +253,8 @@ std::string geojson_end() { return "]}\n"; } void print_tiles(std::vector const &tiles) { + log_debug("Writing out {} tiles in 'geojson' format...", tiles.size()); + fmt::print("{}\n", geojson_start()); bool first = true; for (auto const &tile : tiles) { @@ -334,7 +338,9 @@ config_t parse_command_line(int argc, char *argv[]) void output_expire_t::print(std::string const &format) { - auto const tiles = m_expire_tiles.get_tiles(); + auto const tiles = m_expire_output.get_tiles(); + log_debug("Writing out {} tiles in '{}' format...", tiles.size(), format); + if (format == "tiles") { for (auto const &qk : tiles) { auto const tile = tile_t::from_quadkey(qk, m_config.zoom); @@ -398,6 +404,12 @@ int main(int argc, char *argv[]) auto const &suffix = input.back(); if (suffix == "osm" || suffix == "pbf" || suffix == "opl") { // input is an OSM file + + if (cfg.zoom == 0) { + throw std::runtime_error{ + "You have to set the zoom level with -z, --zoom"}; + } + auto thread_pool = std::make_shared(1U); log_debug("Started pool with {} threads.", thread_pool->num_threads()); @@ -427,6 +439,7 @@ int main(int argc, char *argv[]) } print_tiles(tiles); } + log_debug("Done."); } catch (std::exception const &e) { log_error("{}", e.what()); return 1; diff --git a/src/osmdata.cpp b/src/osmdata.cpp index 3d87dd175..3f0a5a105 100644 --- a/src/osmdata.cpp +++ b/src/osmdata.cpp @@ -252,17 +252,6 @@ class multithreaded_processor_t &output_t::pending_relation_stage1c); } - /** - * Collect expiry tree information from all clones and merge it back - * into the original output. - */ - void merge_expire_trees() - { - for (auto const &clone : m_clones) { - m_output->merge_expire_trees(clone.get()); - } - } - private: /// Get the next id from the queue. static osmid_t pop_id(idlist_t *queue, std::mutex *mutex) @@ -392,7 +381,6 @@ void osmdata_t::process_dependents() m_rels_pending_tracker.sort_unique(); proc.process_relations(std::move(m_rels_pending_tracker)); } - proc.merge_expire_trees(); } // stage 1c processing: mark parent relations of marked objects as changed diff --git a/src/output-flex.cpp b/src/output-flex.cpp index c3d78f9e6..5541da3be 100644 --- a/src/output-flex.cpp +++ b/src/output-flex.cpp @@ -809,7 +809,7 @@ int output_flex_t::table_insert() copy_mgr->add_column(id); } else { flex_write_column(lua_state(), copy_mgr, column, - &m_expire_tiles); + &m_expire_tiles, m_expire_outputs.get()); } } table_connection.increment_insert_counter(); @@ -1020,12 +1020,11 @@ void output_flex_t::stop() assert(m_expire_outputs->size() == m_expire_tiles.size()); for (std::size_t i = 0; i < m_expire_outputs->size(); ++i) { - if (!m_expire_tiles[i].empty()) { - auto const &eo = (*m_expire_outputs)[i]; + if (!(*m_expire_outputs)[i].empty()) { + auto &eo = (*m_expire_outputs)[i]; std::size_t const count = - eo.output(m_expire_tiles[i].get_tiles(), - get_options()->connection_params); + eo.output(get_options()->connection_params); log_info("Wrote {} entries to expire output [{}].", count, i); } @@ -1113,7 +1112,8 @@ void output_flex_t::delete_from_table(table_connection_t *table_connection, if (column.has_expire()) { for (int i = 0; i < num_tuples; ++i) { auto const geom = ewkb_to_geom(result.get(i, col)); - column.do_expire(geom, &m_expire_tiles); + column.do_expire(geom, &m_expire_tiles, + m_expire_outputs.get()); } ++col; } @@ -1242,7 +1242,8 @@ output_flex_t::output_flex_t(output_flex_t const *other, for (auto &expire_output : *m_expire_outputs) { m_expire_tiles.emplace_back( expire_output.maxzoom(), - reprojection_t::create_projection(PROJ_SPHERE_MERC)); + reprojection_t::create_projection(PROJ_SPHERE_MERC), + expire_output.max_tiles_geometry()); } } @@ -1308,7 +1309,8 @@ output_flex_t::output_flex_t(std::shared_ptr const &mid, for (auto const &expire_output : *m_expire_outputs) { m_expire_tiles.emplace_back( expire_output.maxzoom(), - reprojection_t::create_projection(PROJ_SPHERE_MERC)); + reprojection_t::create_projection(PROJ_SPHERE_MERC), + expire_output.max_tiles_geometry()); } create_expire_tables(*m_expire_outputs, get_options()->connection_params); @@ -1515,12 +1517,3 @@ void output_flex_t::reprocess_marked() // We don't need these any more so can free the memory. m_stage2_way_ids->clear(); } - -void output_flex_t::merge_expire_trees(output_t *other) -{ - auto *const opgsql = dynamic_cast(other); - assert(m_expire_tiles.size() == opgsql->m_expire_tiles.size()); - for (std::size_t i = 0; i < m_expire_tiles.size(); ++i) { - m_expire_tiles[i].merge_and_destroy(&opgsql->m_expire_tiles[i]); - } -} diff --git a/src/output-flex.hpp b/src/output-flex.hpp index 446273a13..427225208 100644 --- a/src/output-flex.hpp +++ b/src/output-flex.hpp @@ -151,8 +151,6 @@ class output_flex_t : public output_t void way_delete(osmium::Way *way) override; void relation_delete(osmium::Relation const &rel) override; - void merge_expire_trees(output_t *other) override; - int app_as_point(); int app_as_linestring(); int app_as_polygon(); diff --git a/src/output-pgsql.cpp b/src/output-pgsql.cpp index 97316b1b0..de1f51275 100644 --- a/src/output-pgsql.cpp +++ b/src/output-pgsql.cpp @@ -102,6 +102,7 @@ void output_pgsql_t::pgsql_out_way(osmium::Way const &way, taglist_t *tags, auto const wkb = geom_to_ewkb(projected_geom); if (!wkb.empty()) { m_expire.from_geometry_if_3857(projected_geom, m_expire_config); + m_expire.commit_tiles(&m_expire_output); if (m_enable_way_area) { double const area = calculate_area( get_options()->reproject_area, geom, projected_geom); @@ -117,6 +118,7 @@ void output_pgsql_t::pgsql_out_way(osmium::Way const &way, taglist_t *tags, geom::transform(geom::create_linestring(way), *m_proj), split_at)); for (auto const &sgeom : geoms) { m_expire.from_geometry_if_3857(sgeom, m_expire_config); + m_expire.commit_tiles(&m_expire_output); auto const wkb = geom_to_ewkb(sgeom); m_tables[t_line]->write_row(way.id(), *tags, wkb); if (roads) { @@ -180,12 +182,7 @@ void output_pgsql_t::stop() } if (get_options()->expire_tiles_zoom_min > 0) { - expire_output_t expire_out; - expire_out.set_filename(get_options()->expire_tiles_filename); - expire_out.set_minzoom(get_options()->expire_tiles_zoom_min); - expire_out.set_maxzoom(get_options()->expire_tiles_zoom); - auto const count = - expire_out.output_tiles_to_file(m_expire.get_tiles()); + auto const count = m_expire_output.output(connection_params_t{}); log_info("Wrote {} entries to expired tiles list", count); } } @@ -218,6 +215,7 @@ void output_pgsql_t::node_add(osmium::Node const &node) auto const geom = geom::transform(geom::create_point(node), *m_proj); m_expire.from_geometry_if_3857(geom, m_expire_config); + m_expire.commit_tiles(&m_expire_output); auto const wkb = geom_to_ewkb(geom); m_tables[t_point]->write_row(node.id(), outtags, wkb); } @@ -296,6 +294,7 @@ void output_pgsql_t::pgsql_process_relation(osmium::Relation const &rel) auto const geoms = geom::split_multi(std::move(projected_geom)); for (auto const &sgeom : geoms) { m_expire.from_geometry_if_3857(sgeom, m_expire_config); + m_expire.commit_tiles(&m_expire_output); auto const wkb = geom_to_ewkb(sgeom); m_tables[t_line]->write_row(-rel.id(), outtags, wkb); if (roads) { @@ -312,6 +311,7 @@ void output_pgsql_t::pgsql_process_relation(osmium::Relation const &rel) for (auto const &sgeom : geoms) { auto const projected_geom = geom::transform(sgeom, *m_proj); m_expire.from_geometry_if_3857(projected_geom, m_expire_config); + m_expire.commit_tiles(&m_expire_output); auto const wkb = geom_to_ewkb(projected_geom); if (m_enable_way_area) { double const area = calculate_area( @@ -461,6 +461,10 @@ output_pgsql_t::output_pgsql_t(std::shared_ptr const &mid, "https://osm2pgsql.org/doc/" "faq.html#the-pgsql-output-is-deprecated-what-does-that-mean"); + m_expire_output.set_filename(options.expire_tiles_filename); + m_expire_output.set_minzoom(options.expire_tiles_zoom_min); + m_expire_output.set_maxzoom(options.expire_tiles_zoom); + m_expire_config.full_area_limit = get_options()->expire_tiles_max_bbox; if (get_options()->expire_tiles_max_bbox > 0.0) { m_expire_config.mode = expire_mode::hybrid; @@ -524,6 +528,7 @@ output_pgsql_t::output_pgsql_t( m_enable_way_area(other->m_enable_way_area), m_ignore_untagged_objects(other->m_ignore_untagged_objects), m_proj(get_options()->projection), m_expire_config(other->m_expire_config), + m_expire_output(other->m_expire_output), m_expire(get_options()->expire_tiles_zoom, get_options()->projection), m_buffer(1024, osmium::memory::Buffer::auto_grow::yes), m_rels_buffer(1024, osmium::memory::Buffer::auto_grow::yes), @@ -537,11 +542,3 @@ output_pgsql_t::output_pgsql_t( } output_pgsql_t::~output_pgsql_t() = default; - -void output_pgsql_t::merge_expire_trees(output_t *other) -{ - auto *const opgsql = dynamic_cast(other); - if (opgsql) { - m_expire.merge_and_destroy(&opgsql->m_expire); - } -} diff --git a/src/output-pgsql.hpp b/src/output-pgsql.hpp index dd2679b66..b0cdc45d3 100644 --- a/src/output-pgsql.hpp +++ b/src/output-pgsql.hpp @@ -81,8 +81,6 @@ class output_pgsql_t : public output_t void way_delete(osmium::Way *way) override; void relation_delete(osmium::Relation const &rel) override; - void merge_expire_trees(output_t *other) override; - private: void pgsql_out_way(osmium::Way const &way, taglist_t *tags, bool polygon, bool roads); @@ -108,6 +106,7 @@ class output_pgsql_t : public output_t std::shared_ptr m_proj; expire_config_t m_expire_config; + expire_output_t m_expire_output; expire_tiles_t m_expire; osmium::memory::Buffer m_buffer; diff --git a/src/output.hpp b/src/output.hpp index f7d3b85bd..54c6b1323 100644 --- a/src/output.hpp +++ b/src/output.hpp @@ -102,8 +102,6 @@ class output_t virtual void way_delete(osmium::Way *way) = 0; virtual void relation_delete(osmium::Relation const &rel) = 0; - virtual void merge_expire_trees(output_t * /*other*/) {} - output_requirements const &get_requirements() const noexcept { return m_output_requirements; diff --git a/tests/bdd/flex/expire-limit.feature b/tests/bdd/flex/expire-limit.feature new file mode 100644 index 000000000..b40f37062 --- /dev/null +++ b/tests/bdd/flex/expire-limit.feature @@ -0,0 +1,98 @@ +Feature: Changes with limited expire on zoom 2 + + Background: + Given the style file 'test_expire_limit.lua' + + And the OSM data + """ + n10 v1 dV x10 y10 + n11 v1 dV x100 y10 + n12 v1 dV x10 y70 + n13 v1 dV x100 y70 + """ + When running osm2pgsql flex with parameters + | --slim | + + Then table osm2pgsql_test_t1 contains exactly + | way_id | + Then table osm2pgsql_test_expire contains exactly + | zoom | x | y | + + + Scenario: short ways are okay + Given the OSM data + """ + w20 v1 dV Ta=b Nn10,n11 + w21 v1 dV Ta=b Nn10,n12 + """ + + When running osm2pgsql flex with parameters + | --slim | -a | + + Then table osm2pgsql_test_t1 contains exactly + | way_id | + | 20 | + | 21 | + Then table osm2pgsql_test_expire contains exactly + | zoom | x | y | + | 2 | 2 | 1 | + | 2 | 3 | 1 | + | 2 | 2 | 0 | + + + Scenario: long way is not okay + Given the OSM data + """ + w20 v1 dV Ta=b Nn10,n11,n13 + """ + + When running osm2pgsql flex with parameters + | --slim | -a | + + Then table osm2pgsql_test_t1 contains exactly + | way_id | + | 20 | + Then table osm2pgsql_test_expire contains exactly + | zoom | x | y | + And the error output contains + """ + Tile limit 2 reached for single geometry! + """ + + Scenario: too many tiles overall is not okay + Given the OSM data + """ + n14 v1 dV x100 y-10 + n15 v1 dV x100 y-70 + n16 v1 dV x10 y-70 + n17 v1 dV x-10 y-70 + n18 v1 dV x-100 y-70 + w20 v1 dV Ta=b Nn13,n11 + w21 v1 dV Ta=b Nn14,n15 + w22 v1 dV Ta=b Nn15,n16 + w23 v1 dV Ta=b Nn16,n17 + w24 v1 dV Ta=b Nn17,n18 + """ + + When running osm2pgsql flex with parameters + | --slim | -a | + + Then table osm2pgsql_test_t1 contains exactly + | way_id | + | 20 | + | 21 | + | 22 | + | 23 | + | 24 | + Then table osm2pgsql_test_expire contains exactly + | zoom | x | y | + | 2 | 3 | 0 | + | 2 | 3 | 1 | + | 2 | 3 | 2 | + | 2 | 2 | 3 | + | 2 | 3 | 3 | + And the error output contains + """ + Overall tile limit 6 reached for this run! + """ + diff --git a/tests/bdd/flex/lua-expire-output-definitions.feature b/tests/bdd/flex/lua-expire-output-definitions.feature index 500328c72..1a88dddb0 100644 --- a/tests/bdd/flex/lua-expire-output-definitions.feature +++ b/tests/bdd/flex/lua-expire-output-definitions.feature @@ -142,3 +142,37 @@ Feature: Expire output definitions in Lua file The 'minzoom' field in a expire output must be between 1 and 'maxzoom'. """ + Scenario: max_tiles_geometry in expire output definition has to be in range + Given the input file 'liechtenstein-2013-08-03.osm.pbf' + And the lua style + """ + osm2pgsql.define_expire_output({ + maxzoom = 12, + filename = 'somewhere', + max_tiles_geometry = 12345678901234567890 + }) + """ + When running osm2pgsql flex + Then execution fails + And the error output contains + """ + The 'max_tiles_geometry' field in a expire output must be between 1 and 4 << 20. + """ + + Scenario: max_tiles_overall in expire output definition has to be in range + Given the input file 'liechtenstein-2013-08-03.osm.pbf' + And the lua style + """ + osm2pgsql.define_expire_output({ + maxzoom = 12, + filename = 'somewhere', + max_tiles_overall = -1 + }) + """ + When running osm2pgsql flex + Then execution fails + And the error output contains + """ + The 'max_tiles_overall' field in a expire output must be between 1 and 4 << 20. + """ + diff --git a/tests/data/test_expire_limit.lua b/tests/data/test_expire_limit.lua new file mode 100644 index 000000000..8dbe62d39 --- /dev/null +++ b/tests/data/test_expire_limit.lua @@ -0,0 +1,20 @@ + +local eo = osm2pgsql.define_expire_output({ + table = 'osm2pgsql_test_expire', + maxzoom = 2, + max_tiles_geometry = 2, + max_tiles_overall = 6, +}) + +local the_table = osm2pgsql.define_way_table('osm2pgsql_test_t1', { + { column = 'tags', type = 'hstore' }, + { column = 'geom', type = 'linestring', expire = {{ output = eo }} }, +}) + +function osm2pgsql.process_way(object) + the_table:insert{ + tags = object.tags, + geom = object:as_linestring() + } +end + diff --git a/tests/test-expire-tiles.cpp b/tests/test-expire-tiles.cpp index 48486f717..22bdfbadf 100644 --- a/tests/test-expire-tiles.cpp +++ b/tests/test-expire-tiles.cpp @@ -97,7 +97,7 @@ TEST_CASE("simple expire z1", "[NoDB]") et.from_bbox({-10000, -10000, 10000, 10000}, expire_config_t{}); auto const tiles = get_tiles_ordered(&et, zoom, zoom); - CHECK(tiles.size() == 4); + REQUIRE(tiles.size() == 4); auto itr = tiles.cbegin(); CHECK(*(itr++) == tile_t(1, 0, 0)); @@ -376,122 +376,3 @@ TEST_CASE("expire centroids", "[NoDB]") CHECK(set == check_set); } } - -/** - * After expiring a random set of tiles in one expire_tiles_t object - * and a different set in another, when they are merged together they are the - * same as if the union of the sets of tiles had been expired. - */ -TEST_CASE("merge expire sets", "[NoDB]") -{ - uint32_t const zoom = 18; - - for (int i = 0; i < 100; ++i) { - expire_tiles_t et{zoom, defproj}; - expire_tiles_t et1{zoom, defproj}; - expire_tiles_t et2{zoom, defproj}; - - auto check_set1 = generate_random(zoom, 100); - expire_centroids(&et1, check_set1); - - auto check_set2 = generate_random(zoom, 100); - expire_centroids(&et2, check_set2); - - et.merge_and_destroy(&et1); - et.merge_and_destroy(&et2); - - check_set1.merge(check_set2); - - auto const set = get_tiles_unordered(&et, zoom); - - CHECK(set == check_set1); - } -} - -/** - * Merging two identical sets results in the same set. This guarantees that - * we check some pathways of the merging which possibly could be skipped by - * the random tile set in the previous test. - */ -TEST_CASE("merge identical expire sets", "[NoDB]") -{ - uint32_t const zoom = 18; - - for (int i = 0; i < 100; ++i) { - expire_tiles_t et{zoom, defproj}; - expire_tiles_t et1{zoom, defproj}; - expire_tiles_t et2{zoom, defproj}; - - auto const check_set = generate_random(zoom, 100); - expire_centroids(&et1, check_set); - expire_centroids(&et2, check_set); - - et.merge_and_destroy(&et1); - et.merge_and_destroy(&et2); - - auto const set = get_tiles_unordered(&et, zoom); - - CHECK(set == check_set); - } -} - -/** - * Make sure that we're testing the case where some tiles are in both. - */ -TEST_CASE("merge overlapping expire sets", "[NoDB]") -{ - uint32_t const zoom = 18; - - for (int i = 0; i < 100; ++i) { - expire_tiles_t et{zoom, defproj}; - expire_tiles_t et1{zoom, defproj}; - expire_tiles_t et2{zoom, defproj}; - - auto check_set1 = generate_random(zoom, 100); - expire_centroids(&et1, check_set1); - - auto check_set2 = generate_random(zoom, 100); - expire_centroids(&et2, check_set2); - - auto check_set3 = generate_random(zoom, 100); - expire_centroids(&et1, check_set3); - expire_centroids(&et2, check_set3); - - et.merge_and_destroy(&et1); - et.merge_and_destroy(&et2); - - check_set1.merge(check_set2); - check_set1.merge(check_set3); - - auto const set = get_tiles_unordered(&et, zoom); - - CHECK(set == check_set1); - } -} - -/** - * The set union still works when we expire large contiguous areas of tiles - * (i.e: ensure that we handle the "complete" flag correctly) - */ -TEST_CASE("merge with complete flag", "[NoDB]") -{ - uint32_t const zoom = 18; - - expire_tiles_t et{zoom, defproj}; - expire_tiles_t et0{zoom, defproj}; - expire_tiles_t et1{zoom, defproj}; - expire_tiles_t et2{zoom, defproj}; - - // et1&2 are two halves of et0's box - et0.from_bbox({-10000, -10000, 10000, 10000}, expire_config_t{}); - et1.from_bbox({-10000, -10000, 0, 10000}, expire_config_t{}); - et2.from_bbox({0, -10000, 10000, 10000}, expire_config_t{}); - - et.merge_and_destroy(&et1); - et.merge_and_destroy(&et2); - - auto const set = get_tiles_unordered(&et, zoom); - auto const set0 = get_tiles_unordered(&et0, zoom); - - CHECK(set == set0); -}