diff --git a/.gitignore b/.gitignore index 8def1549..10fcc112 100644 --- a/.gitignore +++ b/.gitignore @@ -278,3 +278,6 @@ BUCKAROO_DEPS # Vim *.swp *.swo + +# clangd cache +/.cache/clangd diff --git a/clickhouse/columns/factory.cpp b/clickhouse/columns/factory.cpp index 460d66fa..47e3a06e 100644 --- a/clickhouse/columns/factory.cpp +++ b/clickhouse/columns/factory.cpp @@ -162,16 +162,26 @@ static ColumnRef CreateColumnFromAst(const TypeAst& ast, CreateColumnByTypeSetti case TypeAst::Tuple: { std::vector columns; + std::vector names; columns.reserve(ast.elements.size()); + names.reserve(ast.elements.size()); + bool any_named = false; for (const auto& elem : ast.elements) { if (auto col = CreateColumnFromAst(elem, settings)) { columns.push_back(col); + names.push_back(elem.element_name); + if (!elem.element_name.empty()) { + any_named = true; + } } else { return nullptr; } } + if (any_named) { + return std::make_shared(columns, std::move(names)); + } return std::make_shared(columns); } diff --git a/clickhouse/columns/tuple.cpp b/clickhouse/columns/tuple.cpp index 56858590..72d206b6 100644 --- a/clickhouse/columns/tuple.cpp +++ b/clickhouse/columns/tuple.cpp @@ -1,8 +1,9 @@ #include "tuple.h" namespace clickhouse { +namespace { -static std::vector CollectTypes(const std::vector& columns) { +std::vector CollectTypes(const std::vector& columns) { std::vector types; for (const auto& col : columns) { types.push_back(col->Type()); @@ -10,12 +11,45 @@ static std::vector CollectTypes(const std::vector& columns) return types; } +/// Tuple types can be appended if they have the same shape. +bool CanAppendType(const TypeRef& destination_type, const TypeRef& source_type) { + if (destination_type->GetCode() != Type::Tuple || source_type->GetCode() != Type::Tuple) { + return destination_type->IsEqual(source_type); + } + + const auto* destination_tuple = destination_type->As(); + const auto* source_tuple = source_type->As(); + + const auto destination_item_types = destination_tuple->GetTupleType(); + const auto source_item_types = source_tuple->GetTupleType(); + if (destination_item_types.size() != source_item_types.size()) { + return false; + } + + for (size_t i = 0; i < destination_item_types.size(); ++i) { + if (!CanAppendType(destination_item_types[i], source_item_types[i])) { + return false; + } + } + + return true; +} + +} + ColumnTuple::ColumnTuple(const std::vector& columns) : Column(Type::CreateTuple(CollectTypes(columns))) , columns_(columns) { } +ColumnTuple::ColumnTuple(const std::vector& columns, + std::vector names) + : Column(Type::CreateTuple(CollectTypes(columns), std::move(names))) + , columns_(columns) +{ +} + size_t ColumnTuple::TupleSize() const { return columns_.size(); } @@ -23,11 +57,11 @@ size_t ColumnTuple::TupleSize() const { void ColumnTuple::Reserve(size_t new_cap) { for (auto& column : columns_) { column->Reserve(new_cap); - } + } } void ColumnTuple::Append(ColumnRef column) { - if (!this->Type()->IsEqual(column->Type())) { + if (!CanAppendType(this->Type(), column->Type())) { throw ValidationError( "can't append column of type " + column->Type()->GetName() + " " "to column type " + this->Type()->GetName()); @@ -37,6 +71,7 @@ void ColumnTuple::Append(ColumnRef column) { columns_[ci]->Append((*source_tuple_column)[ci]); } } + size_t ColumnTuple::Size() const { return columns_.empty() ? 0 : columns_[0]->Size(); } @@ -48,7 +83,11 @@ ColumnRef ColumnTuple::Slice(size_t begin, size_t len) const { sliced_columns.push_back(column->Slice(begin, len)); } - return std::make_shared(sliced_columns); + const auto& names = this->Type()->As()->GetItemNames(); + if (names.empty()) { + return std::make_shared(sliced_columns); + } + return std::make_shared(sliced_columns, names); } ColumnRef ColumnTuple::CloneEmpty() const { @@ -59,7 +98,11 @@ ColumnRef ColumnTuple::CloneEmpty() const { result_columns.push_back(column->CloneEmpty()); } - return std::make_shared(result_columns); + const auto& names = this->Type()->As()->GetItemNames(); + if (names.empty()) { + return std::make_shared(result_columns); + } + return std::make_shared(result_columns, names); } bool ColumnTuple::LoadPrefix(InputStream* input, size_t rows) { diff --git a/clickhouse/columns/tuple.h b/clickhouse/columns/tuple.h index ebc1b895..5bf3b0d6 100644 --- a/clickhouse/columns/tuple.h +++ b/clickhouse/columns/tuple.h @@ -13,6 +13,8 @@ namespace clickhouse { class ColumnTuple : public Column { public: ColumnTuple(const std::vector& columns); + ColumnTuple(const std::vector& columns, + std::vector names); /// Returns count of columns in the tuple. size_t TupleSize() const; diff --git a/clickhouse/types/type_parser.cpp b/clickhouse/types/type_parser.cpp index d488a079..385f4e90 100644 --- a/clickhouse/types/type_parser.cpp +++ b/clickhouse/types/type_parser.cpp @@ -22,7 +22,9 @@ bool TypeAst::operator==(const TypeAst & other) const { return meta == other.meta && code == other.code && name == other.name + && element_name == other.element_name && value == other.value + && value_string == other.value_string && std::equal(elements.begin(), elements.end(), other.elements.begin(), other.elements.end()); } @@ -167,6 +169,12 @@ bool TypeParser::Parse(TypeAst* type) { break; } case Token::Name: + if (!type_->name.empty()) { + // A second Name token on the same element means the + // previous one was a field name in a named-tuple element + // (e.g. "a" in "Tuple(a Int32, …)"). + type_->element_name = std::move(type_->name); + } type_->meta = GetTypeMeta(token.value); type_->name = token.value.to_string(); type_->code = GetTypeCode(type_->name); diff --git a/clickhouse/types/type_parser.h b/clickhouse/types/type_parser.h index 2f8f2f6f..9cc29512 100644 --- a/clickhouse/types/type_parser.h +++ b/clickhouse/types/type_parser.h @@ -31,6 +31,9 @@ struct TypeAst { /// Type's name. /// Need to cache TypeAst, so can't use StringView for name. std::string name; + /// Name of this element inside its parent (e.g. field name inside a named + /// Tuple). Empty for unnamed elements. + std::string element_name; /// Value associated with the node, /// used for fixed-width types and enum values. int64_t value = 0; diff --git a/clickhouse/types/types.cpp b/clickhouse/types/types.cpp index a5588c68..c0d14a18 100644 --- a/clickhouse/types/types.cpp +++ b/clickhouse/types/types.cpp @@ -239,8 +239,9 @@ TypeRef Type::CreateString(size_t n) { return TypeRef(new FixedStringType(n)); } -TypeRef Type::CreateTuple(const std::vector& item_types) { - return TypeRef(new TupleType(item_types)); +TypeRef Type::CreateTuple(const std::vector& item_types, + std::vector item_names) { + return TypeRef(new TupleType(item_types, std::move(item_names))); } TypeRef Type::CreateEnum8(const std::vector& enum_items) { @@ -442,9 +443,17 @@ FixedStringType::FixedStringType(size_t n) : Type(FixedString), size_(n) { NullableType::NullableType(TypeRef nested_type) : Type(Nullable), nested_type_(nested_type) { } -/// class TupleType - -TupleType::TupleType(const std::vector& item_types) : Type(Tuple), item_types_(item_types) { +TupleType::TupleType(const std::vector& item_types, + std::vector item_names) + : Type(Tuple), item_types_(item_types), item_names_(std::move(item_names)) { + if (!item_names_.empty() && item_names_.size() != item_types_.size()) { + throw ValidationError("Tuple field names count doesn't match tuple element count"); + } + for (const auto& item_name : item_names_) { + if (item_name.empty()) { + throw ValidationError("Tuple field names can't be empty"); + } + } } /// class LowCardinalityType @@ -456,13 +465,22 @@ LowCardinalityType::~LowCardinalityType() { std::string TupleType::GetName() const { std::string result("Tuple("); + bool has_complete_names = !item_names_.empty(); if (!item_types_.empty()) { - result += item_types_[0]->GetName(); + if (has_complete_names) { + result += item_names_[0] + " " + item_types_[0]->GetName(); + } else { + result += item_types_[0]->GetName(); + } } for (size_t i = 1; i < item_types_.size(); ++i) { - result += ", " + item_types_[i]->GetName(); + if (has_complete_names) { + result += ", " + item_names_[i] + " " + item_types_[i]->GetName(); + } else { + result += ", " + item_types_[i]->GetName(); + } } result += ")"; diff --git a/clickhouse/types/types.h b/clickhouse/types/types.h index 2275cfba..9720eee1 100644 --- a/clickhouse/types/types.h +++ b/clickhouse/types/types.h @@ -124,7 +124,8 @@ class Type { static TypeRef CreateString(size_t n); - static TypeRef CreateTuple(const std::vector& item_types); + static TypeRef CreateTuple(const std::vector& item_types, + std::vector item_names = {}); static TypeRef CreateEnum8(const std::vector& enum_items); @@ -292,15 +293,21 @@ class NullableType : public Type { class TupleType : public Type { public: - explicit TupleType(const std::vector& item_types); + explicit TupleType(const std::vector& item_types, + std::vector item_names = {}); std::string GetName() const; /// Type of nested Tuple element type. std::vector GetTupleType() const { return item_types_; } + /// Field names for named tuples. Same length as GetTupleType() when + /// populated, or empty when the tuple has no field names. + const std::vector& GetItemNames() const { return item_names_; } + private: std::vector item_types_; + std::vector item_names_; }; class LowCardinalityType : public Type { diff --git a/ut/columns_ut.cpp b/ut/columns_ut.cpp index f799cb55..82511cf3 100644 --- a/ut/columns_ut.cpp +++ b/ut/columns_ut.cpp @@ -154,6 +154,89 @@ TEST(ColumnsCase, TupleAppend){ ASSERT_EQ((*tuple2)[1]->As()->At(0), "2"); } +TEST(ColumnsCase, TupleAppendWithSameFieldNames){ + auto tuple1 = std::make_shared(std::vector({ + std::make_shared(), + std::make_shared() + }), std::vector{"a", "b"}); + auto tuple2 = std::make_shared(std::vector({ + std::make_shared(), + std::make_shared() + }), std::vector{"a", "b"}); + (*tuple1)[0]->As()->Append(2u); + (*tuple1)[1]->As()->Append("2"); + tuple2->Append(tuple1); + + ASSERT_EQ((*tuple2)[0]->As()->At(0), 2u); + ASSERT_EQ((*tuple2)[1]->As()->At(0), "2"); +} + +TEST(ColumnsCase, TupleAppendUnnamedSourceIntoNamedDestination){ + auto tuple1 = std::make_shared(std::vector({ + std::make_shared(), + std::make_shared() + })); + auto tuple2 = std::make_shared(std::vector({ + std::make_shared(), + std::make_shared() + }), std::vector{"a", "b"}); + (*tuple1)[0]->As()->Append(2u); + (*tuple1)[1]->As()->Append("2"); + tuple2->Append(tuple1); + + ASSERT_EQ((*tuple2)[0]->As()->At(0), 2u); + ASSERT_EQ((*tuple2)[1]->As()->At(0), "2"); +} + +TEST(ColumnsCase, TupleAppendWithDifferentFieldNames){ + auto tuple1 = std::make_shared(std::vector({ + std::make_shared(), + std::make_shared() + }), std::vector{"x", "y"}); + auto tuple2 = std::make_shared(std::vector({ + std::make_shared(), + std::make_shared() + }), std::vector{"a", "b"}); + + (*tuple1)[0]->As()->Append(2u); + (*tuple1)[1]->As()->Append("2"); + tuple2->Append(tuple1); + + ASSERT_EQ((*tuple2)[0]->As()->At(0), 2u); + ASSERT_EQ((*tuple2)[1]->As()->At(0), "2"); +} + +TEST(ColumnsCase, TupleAppendNamedSourceIntoUnnamedDestination){ + auto tuple1 = std::make_shared(std::vector({ + std::make_shared(), + std::make_shared() + }), std::vector{"a", "b"}); + auto tuple2 = std::make_shared(std::vector({ + std::make_shared(), + std::make_shared() + })); + + (*tuple1)[0]->As()->Append(2u); + (*tuple1)[1]->As()->Append("2"); + tuple2->Append(tuple1); + + ASSERT_EQ((*tuple2)[0]->As()->At(0), 2u); + ASSERT_EQ((*tuple2)[1]->As()->At(0), "2"); +} + +TEST(ColumnsCase, TupleAppendRejectsIncompatibleStructure){ + auto tuple1 = std::make_shared(std::vector({ + std::make_shared(), + std::make_shared() + })); + auto tuple2 = std::make_shared(std::vector({ + std::make_shared(), + std::make_shared() + })); + + EXPECT_THROW(tuple2->Append(tuple1), ValidationError); +} + TEST(ColumnsCase, TupleSlice){ auto tuple1 = std::make_shared(std::vector({ std::make_shared(), diff --git a/ut/type_parser_ut.cpp b/ut/type_parser_ut.cpp index 4cff5237..d4012f82 100644 --- a/ut/type_parser_ut.cpp +++ b/ut/type_parser_ut.cpp @@ -89,10 +89,28 @@ TEST(TypeParserCase, ParseTuple) { auto element = ast.elements.begin(); for (size_t i = 0; i < 2; ++i) { ASSERT_EQ(element->name, names[i]); + ASSERT_TRUE(element->element_name.empty()); ++element; } } +TEST(TypeParserCase, ParseNamedTuple) { + TypeAst ast; + TypeParser("Tuple(a UInt8, b String)").Parse(&ast); + ASSERT_EQ(ast.meta, TypeAst::Tuple); + ASSERT_EQ(ast.name, "Tuple"); + ASSERT_EQ(ast.code, Type::Tuple); + ASSERT_EQ(ast.elements.size(), 2u); + + ASSERT_EQ(ast.elements[0].element_name, "a"); + ASSERT_EQ(ast.elements[0].name, "UInt8"); + ASSERT_EQ(ast.elements[0].code, Type::UInt8); + + ASSERT_EQ(ast.elements[1].element_name, "b"); + ASSERT_EQ(ast.elements[1].name, "String"); + ASSERT_EQ(ast.elements[1].code, Type::String); +} + TEST(TypeParserCase, ParseDecimal) { TypeAst ast; TypeParser("Decimal(12, 5)").Parse(&ast); @@ -167,6 +185,20 @@ TEST(TypeParserCase, ParseDateTime_MINSK_TIMEZONE) { ASSERT_EQ(ast.elements[0].meta, TypeAst::Terminal); } +TEST(TypeParserCase, EqualityIncludesValueString) { + TypeAst utc; + TypeAst minsk; + ASSERT_TRUE(TypeParser("DateTime('UTC')").Parse(&utc)); + ASSERT_TRUE(TypeParser("DateTime('Europe/Minsk')").Parse(&minsk)); + ASSERT_NE(utc, minsk); + + TypeAst enum_one; + TypeAst enum_two; + ASSERT_TRUE(TypeParser("Enum8('ONE' = 1)").Parse(&enum_one)); + ASSERT_TRUE(TypeParser("Enum8('TWO' = 1)").Parse(&enum_two)); + ASSERT_NE(enum_one, enum_two); +} + TEST(TypeParserCase, LowCardinality_String) { TypeAst ast; ASSERT_TRUE(TypeParser("LowCardinality(String)").Parse(&ast)); @@ -194,7 +226,7 @@ TEST(TypeParserCase, LowCardinality_FixedString) { ASSERT_EQ(ast.elements[0].name, "FixedString"); ASSERT_EQ(ast.elements[0].value, 0); ASSERT_EQ(ast.elements[0].elements.size(), 1u); - auto param = TypeAst{TypeAst::Number, Type::Void, "", 10, {}, {}}; + auto param = TypeAst{TypeAst::Number, Type::Void, "", "", 10, {}, {}}; ASSERT_EQ(ast.elements[0].elements[0], param); } diff --git a/ut/types_ut.cpp b/ut/types_ut.cpp index 7af343b5..f517bf41 100644 --- a/ut/types_ut.cpp +++ b/ut/types_ut.cpp @@ -41,6 +41,68 @@ TEST(TypesCase, NullableType) { ASSERT_EQ(Type::CreateNullable(nested)->As()->GetNestedType(), nested); } +TEST(TypesCase, TupleTypeItemNames) { + auto unnamed = Type::CreateTuple({ + Type::CreateSimple(), + Type::CreateString()}); + ASSERT_TRUE(unnamed->As()->GetItemNames().empty()); + + auto named = Type::CreateTuple( + {Type::CreateSimple(), Type::CreateString()}, + {"a", "b"}); + const auto& names = named->As()->GetItemNames(); + ASSERT_EQ(names.size(), 2u); + ASSERT_EQ(names[0], "a"); + ASSERT_EQ(names[1], "b"); +} + +TEST(TypesCase, TupleTypeNameIncludesFieldNames) { + auto named = Type::CreateTuple( + {Type::CreateSimple(), Type::CreateString()}, + {"a", "b"}); + ASSERT_EQ(named->GetName(), "Tuple(a UInt8, b String)"); + + ASSERT_THROW( + Type::CreateTuple( + {Type::CreateSimple(), Type::CreateString()}, + {"a", ""}), + ValidationError); + + ASSERT_THROW( + Type::CreateTuple( + {Type::CreateSimple(), Type::CreateString()}, + {"a"}), + ValidationError); +} + +TEST(TypesCase, TupleTypeNamesFromFactory) { + auto col = CreateColumnByType("Tuple(a UInt8, b String)"); + ASSERT_NE(col, nullptr); + const auto& names = col->Type()->As()->GetItemNames(); + ASSERT_EQ(names.size(), 2u); + ASSERT_EQ(names[0], "a"); + ASSERT_EQ(names[1], "b"); + + auto col_unnamed = CreateColumnByType("Tuple(UInt8, String)"); + ASSERT_NE(col_unnamed, nullptr); + ASSERT_TRUE(col_unnamed->Type()->As()->GetItemNames().empty()); +} + +TEST(TypesCase, TupleTypeEqualityIncludesFieldNames) { + auto unnamed = Type::CreateTuple( + {Type::CreateSimple(), Type::CreateString()}); + auto named_ab = Type::CreateTuple( + {Type::CreateSimple(), Type::CreateString()}, + {"a", "b"}); + auto named_xy = Type::CreateTuple( + {Type::CreateSimple(), Type::CreateString()}, + {"x", "y"}); + + ASSERT_TRUE(named_ab->IsEqual(named_ab)); + ASSERT_FALSE(named_ab->IsEqual(unnamed)); + ASSERT_FALSE(named_ab->IsEqual(named_xy)); +} + TEST(TypesCase, EnumTypes) { auto enum8 = Type::CreateEnum8({{"One", 1}, {"Two", 2}}); ASSERT_EQ(enum8->GetName(), "Enum8('One' = 1, 'Two' = 2)");