diff --git a/flex-config/turning-circles.lua b/flex-config/turning-circles.lua new file mode 100644 index 000000000..47b31255f --- /dev/null +++ b/flex-config/turning-circles.lua @@ -0,0 +1,69 @@ +-- This config example file is released into the Public Domain. + +-- Create a table with turning circles that can be styled in sync with the +-- highway they are on. + +local turning_circles = osm2pgsql.define_table({ + name = 'turning_circles', + ids = { type = 'node', id_column = 'node_id', cache = true }, + columns = { + { column = 'geom', type = 'point', not_null = true }, + } +}) + +local highways = osm2pgsql.define_table({ + name = 'highways', + ids = { type = 'way', id_column = 'way_id' }, + columns = { + { column = 'htype', type = 'text', not_null = true }, + { column = 'geom', type = 'linestring', not_null = true }, + } +}) + +-- This table will contain entries for all node/way combinations where the way +-- is tagged as "highway" and the node is tagged as "highway=turning_circle". +-- The "htype" column contains the highway type, the "geom" the geometry of +-- the node. This can be used, for instance, to draw the point in a style that +-- fits with the style of the highway. +-- +-- Note that you might have multiple entries for the same node in this table +-- if it is in several ways. I that case you might have to decide at rendering +-- time which of them to render. +local highway_ends = osm2pgsql.define_table({ + name = 'highway_ends', + ids = { type = 'way', id_column = 'way_id' }, + columns = { + { column = 'htype', type = 'text', not_null = true }, + { column = 'node_id', type = 'int8', not_null = true }, + { column = 'geom', type = 'point', not_null = true }, + } +}) + +function osm2pgsql.process_node(object) + if object.tags.highway == 'turning_circle' then + -- This insert will add the entry to the id cache later read with + -- in_id_cache(). + turning_circles:insert({ + geom = object:as_point(), + }) + end +end + +function osm2pgsql.process_way(object) + local t = object.tags.highway + if t then + highways:insert({ + htype = t, + geom = object:as_linestring(), + }) + local c = turning_circles:in_id_cache(object.nodes) + for _, n in ipairs(c) do + highway_ends:insert({ + htype = t, + node_id = object.nodes[n], + geom = object:as_point(n), + }) + end + end +end + diff --git a/src/debug-output.cpp b/src/debug-output.cpp index 6e90fdf0e..ec46a49e3 100644 --- a/src/debug-output.cpp +++ b/src/debug-output.cpp @@ -61,6 +61,7 @@ void write_table_list_to_debug_log(std::vector const &tables) log_debug(" - data_tablespace={}", table.data_tablespace()); log_debug(" - index_tablespace={}", table.index_tablespace()); log_debug(" - cluster={}", table.cluster_by_geom()); + log_debug(" - id_cache={}", table.with_id_cache()); for (auto const &index : table.indexes()) { log_debug(" - INDEX USING {}", index.method()); if (index.name().empty()) { diff --git a/src/flex-lua-table.cpp b/src/flex-lua-table.cpp index f50fef6ab..abb55ed1f 100644 --- a/src/flex-lua-table.cpp +++ b/src/flex-lua-table.cpp @@ -174,6 +174,17 @@ void setup_flex_table_id_columns(lua_State *lua_state, flex_table_t *table) throw fmt_error("Unknown ids type: {}.", type); } + bool const cache = + luaX_get_table_bool(lua_state, "cache", -1, "The ids", false); + lua_pop(lua_state, 1); // "cache" + if (cache) { + if (type == "node") { + table->enable_id_cache(); + } else { + throw std::runtime_error{"ID cache only available for node ids."}; + } + } + std::string const name = luaX_get_table_string(lua_state, "id_column", -1, "The ids field"); lua_pop(lua_state, 1); // "id_column" @@ -459,6 +470,7 @@ void lua_wrapper_table_t::init(lua_State *lua_state) luaX_set_up_metatable(lua_state, "Table", OSM2PGSQL_TABLE_CLASS, {{"__tostring", lua_trampoline_table_tostring}, {"insert", lua_trampoline_table_insert}, + {"in_id_cache", lua_trampoline_table_in_id_cache}, {"name", lua_trampoline_table_name}, {"schema", lua_trampoline_table_schema}, {"cluster", lua_trampoline_table_cluster}, diff --git a/src/flex-table.cpp b/src/flex-table.cpp index 0204b766d..263a9cedc 100644 --- a/src/flex-table.cpp +++ b/src/flex-table.cpp @@ -263,6 +263,10 @@ void flex_table_t::analyze(pg_conn_t const &db_connection) const analyze_table(db_connection, schema(), name()); } +void flex_table_t::enable_id_cache() noexcept { m_with_id_cache = true; } + +bool flex_table_t::with_id_cache() const noexcept { return m_with_id_cache; } + namespace { void enable_check_trigger(pg_conn_t const &db_connection, diff --git a/src/flex-table.hpp b/src/flex-table.hpp index d917ddacb..663375327 100644 --- a/src/flex-table.hpp +++ b/src/flex-table.hpp @@ -215,6 +215,10 @@ class flex_table_t void analyze(pg_conn_t const &db_connection) const; + void enable_id_cache() noexcept; + + bool with_id_cache() const noexcept; + private: /// The schema this table is in std::string m_schema; @@ -271,6 +275,9 @@ class flex_table_t /// Index should be a primary key. bool m_primary_key_index = false; + /// Do we want an ID cache for this table? + bool m_with_id_cache = false; + }; // class flex_table_t class table_connection_t diff --git a/src/idlist.cpp b/src/idlist.cpp index 6877959d1..21d647940 100644 --- a/src/idlist.cpp +++ b/src/idlist.cpp @@ -22,6 +22,11 @@ osmid_t idlist_t::pop_id() return id; } +bool idlist_t::contains(osmid_t id) const +{ + return std::binary_search(m_list.begin(), m_list.end(), id); +} + void idlist_t::sort_unique() { std::sort(m_list.begin(), m_list.end()); diff --git a/src/idlist.hpp b/src/idlist.hpp index 5ae6a1b60..0b6068e7b 100644 --- a/src/idlist.hpp +++ b/src/idlist.hpp @@ -62,6 +62,13 @@ class idlist_t void reserve(std::size_t size) { m_list.reserve(size); } + /** + * Is the specified id in the list? + * + * You must have called sort_unique() before calling this. + */ + bool contains(osmid_t id) const; + /** * Remove id at the end of the list and return it. * diff --git a/src/output-flex.cpp b/src/output-flex.cpp index 8be5af027..fc138981b 100644 --- a/src/output-flex.cpp +++ b/src/output-flex.cpp @@ -89,6 +89,7 @@ TRAMPOLINE(app_as_geometrycollection, as_geometrycollection) } // anonymous namespace TRAMPOLINE(table_insert, insert) +TRAMPOLINE(table_in_id_cache, in_id_cache) prepared_lua_function_t::prepared_lua_function_t(lua_State *lua_state, calling_context context, @@ -270,6 +271,9 @@ void flush_tables(std::vector &table_connections) for (auto &table : table_connections) { table.flush(); } + for (auto &table : table_connections) { + table.sync(); + } } void create_expire_tables(std::vector const &expire_outputs, @@ -789,6 +793,10 @@ int output_flex_t::table_insert() auto const &object = check_and_get_context_object(table); osmid_t const id = table.map_id(object.type(), object.id()); + if (table.with_id_cache()) { + get_id_cache(table).push_back(id); + } + table_connection.new_line(); auto *copy_mgr = table_connection.copy_mgr(); @@ -823,6 +831,56 @@ int output_flex_t::table_insert() return 1; } +int output_flex_t::table_in_id_cache() +{ + if (m_calling_context != calling_context::process_way) { + throw std::runtime_error{ + "The function in_id_cache() can only be called (directly or " + "indirectly) from the process_[untagged_]way() function."}; + } + + auto const num_params = lua_gettop(lua_state()); + if (num_params != 2) { + throw std::runtime_error{ + "Need two parameters: The osm2pgsql.Table and the id(s)."}; + } + + // The first parameter is the table object. + auto &table_connection = m_table_connections.at( + idx_from_param(lua_state(), OSM2PGSQL_TABLE_CLASS)); + lua_remove(lua_state(), 1); // table + + if (!table_connection.table().with_id_cache()) { + throw fmt_error("No ID cache on table {}.", + table_connection.table().name()); + } + + std::vector ids; + int const type = lua_type(lua_state(), 1); + if (type == LUA_TTABLE && luaX_is_array(lua_state())) { + luaX_for_each(lua_state(), + [&]() { ids.push_back(lua_tointeger(lua_state(), -1)); }); + } else { + throw std::runtime_error{"Second parameter must be an array of ids."}; + } + + auto const &cache = get_id_cache(table_connection.table()); + lua_createtable(lua_state(), 0, 0); + + lua_Integer n = 0; + lua_Integer idx = 1; + for (auto const id : ids) { + if (cache.contains(id)) { + lua_pushinteger(lua_state(), ++n); + lua_pushinteger(lua_state(), idx); + lua_rawset(lua_state(), -3); + } + ++idx; + } + + return 1; +} + void output_flex_t::call_lua_function(prepared_lua_function_t func) { lua_pushvalue(lua_state(), func.index()); @@ -980,6 +1038,28 @@ void output_flex_t::after_nodes() } flush_tables(m_table_connections); + + for (auto &table : *m_tables) { + if (table.with_id_cache()) { + auto &cache = get_id_cache(table); + if (get_options()->append) { + log_debug("Initializing cache for table '{}' from database...", + table.name()); + auto const result = m_db_connection.exec( + "SELECT \"{}\" FROM {}", table.id_column_names(), + table.full_name()); + + cache.reserve(result.num_tuples()); + for (int i = 0; i < result.num_tuples(); ++i) { + cache.push_back( + osmium::string_to_object_id(result.get_value(i, 0))); + } + } + cache.sort_unique(); + log_debug("Cache for table '{}' initialized with {} entries.", + table.name(), cache.size()); + } + } } void output_flex_t::after_ways() @@ -1199,6 +1279,10 @@ void output_flex_t::relation_modify(osmium::Relation const &rel) void output_flex_t::start() { for (auto &table : m_table_connections) { + if (table.table().with_id_cache()) { + log_debug("Enable cache for table '{}'.", table.table().name()); + create_id_cache(table.table()); + } table.start(m_db_connection, get_options()->append); } @@ -1212,6 +1296,7 @@ output_flex_t::output_flex_t(output_flex_t const *other, std::shared_ptr copy_thread) : output_t(other, std::move(mid)), m_locators(other->m_locators), m_tables(other->m_tables), m_expire_outputs(other->m_expire_outputs), + m_id_caches(other->m_id_caches), m_db_connection(get_options()->connection_params, "out.flex.thread"), m_stage2_way_ids(other->m_stage2_way_ids), m_copy_thread(std::move(copy_thread)), m_lua_state(other->m_lua_state), diff --git a/src/output-flex.hpp b/src/output-flex.hpp index 427225208..7bbb6bef3 100644 --- a/src/output-flex.hpp +++ b/src/output-flex.hpp @@ -165,6 +165,7 @@ class output_flex_t : public output_t int app_get_bbox(); int table_insert(); + int table_in_id_cache(); // Get the flex table that is as first parameter on the Lua stack. flex_table_t &get_table_from_param(); @@ -216,6 +217,21 @@ class output_flex_t : public output_t lua_State *lua_state() noexcept { return m_lua_state.get(); } + void create_id_cache(flex_table_t const &table) + { + if (table.num() >= m_id_caches.size()) { + m_id_caches.resize(table.num() + 1); + } + m_id_caches[table.num()] = std::make_shared(); + } + + idlist_t &get_id_cache(flex_table_t const &table) + { + auto& c = m_id_caches[table.num()]; + assert(c); + return *c; + } + class way_cache_t { public: @@ -273,6 +289,8 @@ class output_flex_t : public output_t std::shared_ptr> m_expire_outputs = std::make_shared>(); + std::vector> m_id_caches; + std::vector m_table_connections; /// The connection to the database server. @@ -325,5 +343,6 @@ class output_flex_t : public output_t }; int lua_trampoline_table_insert(lua_State *lua_state); +int lua_trampoline_table_in_id_cache(lua_State *lua_state); #endif // OSM2PGSQL_OUTPUT_FLEX_HPP diff --git a/tests/bdd/flex/id-cache.feature b/tests/bdd/flex/id-cache.feature new file mode 100644 index 000000000..4eb634c3a --- /dev/null +++ b/tests/bdd/flex/id-cache.feature @@ -0,0 +1,132 @@ +Feature: Id cache + + Background: + Given the 0.1 grid + | | 10 | 11 | 12 | + | 14 | 15 | | 16 | + And the lua style + """ + local barriers = osm2pgsql.define_table({ + name = 'barriers', + ids = { type = 'node', id_column = 'node_id', cache = true }, + columns = { + { column = 'btype', type = 'text', not_null = true }, + { column = 'geom', type = 'point', projection = 4326, not_null = true }, + } + }) + local highways = osm2pgsql.define_table({ + name = 'highways', + ids = { type = 'way', id_column = 'way_id' }, + columns = { + { column = 'htype', type = 'text', not_null = true }, + { column = 'geom', type = 'linestring', projection = 4326, not_null = true }, + } + }) + local b_on_h = osm2pgsql.define_table({ + name = 'b_on_h', + ids = { type = 'way', id_column = 'way_id' }, + columns = { + { column = 'node_id', type = 'int8', not_null = true }, + { column = 'htype', type = 'text', not_null = true }, + { column = 'hgeom', type = 'linestring', projection = 4326, not_null = true }, + { column = 'bgeom', type = 'point', projection = 4326, not_null = true }, + } + }) + + function osm2pgsql.process_node(object) + local t = object.tags.barrier + if t then + barriers:insert({ + btype = t, + geom = object:as_point(), + }) + end + end + + function osm2pgsql.process_way(object) + local t = object.tags.highway + if t then + highways:insert({ + htype = t, + geom = object:as_linestring(), + }) + local bidx = barriers:in_id_cache(object.nodes) + for _, idx in ipairs(bidx) do + b_on_h:insert({ + node_id = object.nodes[idx], + htype = t, + hgeom = object:as_linestring(), + bgeom = object:as_point(idx), + }) + end + end + end + """ + + Scenario: Id cache works with simple import + Given the OSM data + """ + n10 v1 dV Tbarrier=gate + n16 v1 dV Tbarrier=lift_gate + w20 v1 dV Thighway=residential Nn10,n11,n12,n16 + w21 v1 dV Thighway=residential Nn14,n15,n10 + """ + When running osm2pgsql flex + Then table barriers contains exactly + | node_id | btype | geom!geo | + | 10 | gate | 10 | + | 16 | lift_gate | 16 | + Then table highways contains exactly + | way_id | htype | geom!geo | + | 20 | residential | 10,11,12,16 | + | 21 | residential | 14,15,10 | + Then table b_on_h contains exactly + | way_id | node_id | htype | bgeom!geo | hgeom!geo | + | 20 | 10 | residential | 10 | 10,11,12,16 | + | 20 | 16 | residential | 16 | 10,11,12,16 | + | 21 | 10 | residential | 10 | 14,15,10 | + + Scenario: Id cache works with updates + Given the OSM data + """ + n10 v1 dV Tbarrier=gate + n16 v1 dV Tbarrier=lift_gate + w20 v1 dV Thighway=residential Nn10,n11,n12,n16 + w21 v1 dV Thighway=residential Nn14,n15,n10 + """ + When running osm2pgsql flex with parameters + | --slim | + Then table barriers contains exactly + | node_id | btype | geom!geo | + | 10 | gate | 10 | + | 16 | lift_gate | 16 | + Then table highways contains exactly + | way_id | htype | geom!geo | + | 20 | residential | 10,11,12,16 | + | 21 | residential | 14,15,10 | + Then table b_on_h contains exactly + | way_id | node_id | htype | bgeom!geo | hgeom!geo | + | 20 | 10 | residential | 10 | 10,11,12,16 | + | 20 | 16 | residential | 16 | 10,11,12,16 | + | 21 | 10 | residential | 10 | 14,15,10 | + + Given the OSM data + """ + n10 v2 dV Tno=barrier + n11 v2 dV Tbarrier=gate + """ + When running osm2pgsql flex with parameters + | --slim | -a | + Then table barriers contains exactly + | node_id | btype | geom!geo | + | 11 | gate | 11 | + | 16 | lift_gate | 16 | + Then table highways contains exactly + | way_id | htype | geom!geo | + | 20 | residential | 10,11,12,16 | + | 21 | residential | 14,15,10 | + Then table b_on_h contains exactly + | way_id | node_id | htype | bgeom!geo | hgeom!geo | + | 20 | 11 | residential | 11 | 10,11,12,16 | + | 20 | 16 | residential | 16 | 10,11,12,16 | + diff --git a/tests/bdd/flex/lua-table-ids.feature b/tests/bdd/flex/lua-table-ids.feature new file mode 100644 index 000000000..fab461183 --- /dev/null +++ b/tests/bdd/flex/lua-table-ids.feature @@ -0,0 +1,254 @@ +Feature: Ids in table definitions in Lua file + + Scenario: Table definition without ids is okay + Given the input file 'liechtenstein-2013-08-03.osm.pbf' + And the lua style + """ + local t = osm2pgsql.define_table({ + name = 'foo', + columns = {{ column = 'bar', type = 'text' }} + }) + + function osm2pgsql.process_node(object) + t:insert({}) + end + """ + When running osm2pgsql flex + Then table foo has 1562 rows + + Scenario: Table definition with empty ids is not allowed + Given the input file 'liechtenstein-2013-08-03.osm.pbf' + And the lua style + """ + local t = osm2pgsql.define_table({ + name = 'foo', + ids = {}, + columns = {{ column = 'bar', type = 'text' }} + }) + + function osm2pgsql.process_node(object) + t:insert({}) + end + """ + When running osm2pgsql flex + Then execution fails + And the error output contains + """ + The ids field must contain a 'type' string field. + """ + + Scenario: Table ids definition must contain a text id_column field + Given the input file 'liechtenstein-2013-08-03.osm.pbf' + And the lua style + """ + local t = osm2pgsql.define_table({ + name = 'foo', + ids = { type = 123 }, + columns = {{ column = 'bar', type = 'text' }} + }) + + function osm2pgsql.process_node(object) + t:insert({}) + end + """ + When running osm2pgsql flex + Then execution fails + And the error output contains + """ + Unknown ids type: 123. + """ + + Scenario: Table ids definition must contain an id_column field + Given the input file 'liechtenstein-2013-08-03.osm.pbf' + And the lua style + """ + local t = osm2pgsql.define_table({ + name = 'foo', + ids = { type = 'node' }, + columns = {{ column = 'bar', type = 'text' }} + }) + + function osm2pgsql.process_node(object) + t:insert({}) + end + """ + When running osm2pgsql flex + Then execution fails + And the error output contains + """ + The ids field must contain a 'id_column' string field. + """ + + Scenario: Table ids definition must contain an id_column field + Given the input file 'liechtenstein-2013-08-03.osm.pbf' + And the lua style + """ + local t = osm2pgsql.define_table({ + name = 'foo', + ids = { type = 'node', ids_column = false }, + columns = {{ column = 'bar', type = 'text' }} + }) + + function osm2pgsql.process_node(object) + t:insert({}) + end + """ + When running osm2pgsql flex + Then execution fails + And the error output contains + """ + The ids field must contain a 'id_column' string field. + """ + + Scenario: Table ids definition with type and id_column fields is okay + Given the input file 'liechtenstein-2013-08-03.osm.pbf' + And the lua style + """ + local t = osm2pgsql.define_table({ + name = 'foo', + ids = { type = 'node', id_column = 'abc' }, + columns = {{ column = 'bar', type = 'text' }} + }) + + function osm2pgsql.process_node(object) + t:insert({}) + end + """ + When running osm2pgsql flex + Then table foo has 1562 rows + + Scenario Outline: + Given the input file 'liechtenstein-2013-08-03.osm.pbf' + And the lua style + """ + local t = osm2pgsql.define_table({ + name = 'foo', + ids = { type = '', id_column = 'abc' }, + columns = {{ column = 'bar', type = 'text' }} + }) + """ + When running osm2pgsql flex + Then execution is successful + + Examples: + | idtype | + | node | + | way | + | relation | + | area | + | any | + | tile | + + Scenario: Table ids definition checks for special characters in column names + Given the input file 'liechtenstein-2013-08-03.osm.pbf' + And the lua style + """ + local t = osm2pgsql.define_table({ + name = 'foo', + ids = { type = 'node', id_column = 'a"b"c' }, + columns = {{ column = 'bar', type = 'text' }} + }) + + function osm2pgsql.process_node(object) + t:insert({}) + end + """ + When running osm2pgsql flex + Then execution fails + And the error output contains + """ + Special characters are not allowed in column names: 'a"b"c'. + """ + + Scenario: Table ids definition can contain cache field but needs right type + Given the input file 'liechtenstein-2013-08-03.osm.pbf' + And the lua style + """ + local t = osm2pgsql.define_table({ + name = 'foo', + ids = { type = 'node', id_column = 'abc', cache = 'xxx' }, + columns = {{ column = 'bar', type = 'text' }} + }) + + function osm2pgsql.process_node(object) + t:insert({}) + end + """ + When running osm2pgsql flex + Then execution fails + And the error output contains + """ + The ids field 'cache' must be a boolean field. + """ + + Scenario: Table ids definition can contain boolean cache field (false) + Given the input file 'liechtenstein-2013-08-03.osm.pbf' + And the lua style + """ + local t = osm2pgsql.define_table({ + name = 'foo', + ids = { type = 'node', id_column = 'abc', cache = false }, + columns = {{ column = 'bar', type = 'text' }} }) + + function osm2pgsql.process_node(object) + t:insert({}) + end + """ + When running osm2pgsql flex + Then table foo has 1562 rows + + Scenario: Table ids definition can contain boolean cache field (true) + Given the input file 'liechtenstein-2013-08-03.osm.pbf' + And the lua style + """ + local t = osm2pgsql.define_table({ + name = 'foo', + ids = { type = 'node', id_column = 'abc', cache = true }, + columns = {{ column = 'bar', type = 'text' }} + }) + + function osm2pgsql.process_node(object) + t:insert({}) + end + """ + When running osm2pgsql flex + Then table foo has 1562 rows + + Scenario: Table ids definition can contain false cache field for a way + Given the input file 'liechtenstein-2013-08-03.osm.pbf' + And the lua style + """ + local t = osm2pgsql.define_table({ + name = 'foo', + ids = { type = 'way', id_column = 'abc', cache = false }, + columns = {{ column = 'bar', type = 'text' }} + }) + + function osm2pgsql.process_way(object) + t:insert({}) + end + """ + When running osm2pgsql flex + Then table foo has 7105 rows + + Scenario: Table ids definition can contain cache field only for nodes + Given the input file 'liechtenstein-2013-08-03.osm.pbf' + And the lua style + """ + local t = osm2pgsql.define_table({ + name = 'foo', + ids = { type = 'way', id_column = 'abc', cache = true }, + columns = {{ column = 'bar', type = 'text' }} + }) + + function osm2pgsql.process_way(object) + t:insert({}) + end + """ + When running osm2pgsql flex + Then execution fails + And the error output contains + """ + ID cache only available for node ids. + """ +