diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index 8c72d47f0865..ff0ba613e042 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -851,7 +851,6 @@ std::optional ColumnsDescription::getDefault(const String & colum return {}; } - bool ColumnsDescription::hasCompressionCodec(const String & column_name) const { const auto it = columns.get<1>().find(column_name); diff --git a/src/Storages/MergeTree/ExportPartTask.cpp b/src/Storages/MergeTree/ExportPartTask.cpp index ad737fedcb21..1dc417b45e54 100644 --- a/src/Storages/MergeTree/ExportPartTask.cpp +++ b/src/Storages/MergeTree/ExportPartTask.cpp @@ -4,17 +4,20 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include #include #include #include #include +#include namespace ProfileEvents { @@ -58,7 +61,11 @@ bool ExportPartTask::executeStep() const auto & metadata_snapshot = manifest.metadata_snapshot; + // Read only physical columns from the part Names columns_to_read = metadata_snapshot->getColumns().getNamesOfPhysical(); + + // But we want all columns (including aliases) in the output + NamesAndTypesList all_columns = metadata_snapshot->getColumns().getAll(); MergeTreeSequentialSourceType read_type = MergeTreeSequentialSourceType::Export; @@ -146,6 +153,34 @@ bool ExportPartTask::executeStep() local_context, getLogger("ExportPartition")); + // Add expression step to compute alias and other default columns for export + // This materializes virtual columns (like ALIAS) so they can be written to output + const auto & current_header = plan_for_part.getCurrentHeader(); + + // Enable all experimental settings for default expressions + // (same pattern as in IMergeTreeReader::evaluateMissingDefaults) + auto context_for_defaults = Context::createCopy(local_context); + enableAllExperimentalSettings(context_for_defaults); + + auto defaults_dag = evaluateMissingDefaults( + *current_header, + all_columns, + metadata_snapshot->getColumns(), + context_for_defaults); + + if (defaults_dag) + { + // Ensure columns are in the correct order matching all_columns + defaults_dag->removeUnusedActions(all_columns.getNames(), false); + defaults_dag->addMaterializingOutputActions(/*materialize_sparse=*/ false); + + auto expression_step = std::make_unique( + current_header, + std::move(*defaults_dag)); + expression_step->setStepDescription("Compute alias and default expressions for export"); + plan_for_part.addStep(std::move(expression_step)); + } + ThreadGroupSwitcher switcher((*exports_list_entry)->thread_group, ""); QueryPlanOptimizationSettings optimization_settings(local_context); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index e90a6e3ffc0b..d2d573b0a86a 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -6242,7 +6242,11 @@ void MergeTreeData::exportPartToTable( auto source_metadata_ptr = getInMemoryMetadataPtr(); auto destination_metadata_ptr = dest_storage->getInMemoryMetadataPtr(); - if (destination_metadata_ptr->getColumns().getAllPhysical().sizeOfDifference(source_metadata_ptr->getColumns().getAllPhysical())) + const auto & source_columns = source_metadata_ptr->getColumns(); + + const auto & destination_columns = destination_metadata_ptr->getColumns(); + + if (destination_columns.getAll().sizeOfDifference(source_columns.getAll())) throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS, "Tables have different structure"); if (query_to_string(source_metadata_ptr->getPartitionKeyAST()) != query_to_string(destination_metadata_ptr->getPartitionKeyAST())) diff --git a/tests/queries/0_stateless/03572_export_merge_tree_part_to_object_storage.reference b/tests/queries/0_stateless/03572_export_merge_tree_part_to_object_storage.reference index d11773c3c9cd..3d9ee8308464 100644 --- a/tests/queries/0_stateless/03572_export_merge_tree_part_to_object_storage.reference +++ b/tests/queries/0_stateless/03572_export_merge_tree_part_to_object_storage.reference @@ -42,3 +42,17 @@ ---- Count rows in big_table and big_destination_max_rows 4194304 4194304 +---- Test ALIAS columns export +---- Verify ALIAS column data in source table (arr_1 computed from arr[1]) +1 [1,2,3] 1 +1 [10,20,30] 10 +---- Verify ALIAS column data exported to S3 (should match source) +1 [1,2,3] 1 +1 [10,20,30] 10 +---- Test MATERIALIZED columns export +---- Verify MATERIALIZED column data in source table (arr_1 computed from arr[1]) +1 [1,2,3] 1 +1 [10,20,30] 10 +---- Verify MATERIALIZED column data exported to S3 (should match source) +1 [1,2,3] 1 +1 [10,20,30] 10 diff --git a/tests/queries/0_stateless/03572_export_merge_tree_part_to_object_storage.sh b/tests/queries/0_stateless/03572_export_merge_tree_part_to_object_storage.sh index 669da7a9d163..3757acb3df71 100755 --- a/tests/queries/0_stateless/03572_export_merge_tree_part_to_object_storage.sh +++ b/tests/queries/0_stateless/03572_export_merge_tree_part_to_object_storage.sh @@ -18,12 +18,17 @@ mt_table_roundtrip="mt_table_roundtrip_${RANDOM}" big_table="big_table_${RANDOM}" big_destination_max_bytes="big_destination_max_bytes_${RANDOM}" big_destination_max_rows="big_destination_max_rows_${RANDOM}" +mt_table_tf="mt_table_tf_${RANDOM}" +mt_alias="mt_alias_${RANDOM}" +mt_materialized="mt_materialized_${RANDOM}" +s3_alias_export="s3_alias_export_${RANDOM}" +s3_materialized_export="s3_materialized_export_${RANDOM}" query() { $CLICKHOUSE_CLIENT --query "$1" } -query "DROP TABLE IF EXISTS $mt_table, $s3_table, $mt_table_roundtrip, $s3_table_wildcard, $s3_table_wildcard_partition_expression_with_function, $mt_table_partition_expression_with_function" +query "DROP TABLE IF EXISTS $mt_table, $s3_table, $mt_table_roundtrip, $s3_table_wildcard, $s3_table_wildcard_partition_expression_with_function, $mt_table_partition_expression_with_function, $mt_alias, $mt_materialized, $s3_alias_export, $s3_materialized_export" query "CREATE TABLE $mt_table (id UInt64, year UInt16) ENGINE = MergeTree() PARTITION BY year ORDER BY tuple()" query "CREATE TABLE $s3_table (id UInt64, year UInt16) ENGINE = S3(s3_conn, filename='$s3_table', format=Parquet, partition_strategy='hive') PARTITION BY year" @@ -114,4 +119,40 @@ echo "---- Count rows in big_table and big_destination_max_rows" query "SELECT COUNT() from $big_table" query "SELECT COUNT() from $big_destination_max_rows" -query "DROP TABLE IF EXISTS $mt_table, $s3_table, $mt_table_roundtrip, $s3_table_wildcard, $s3_table_wildcard_partition_expression_with_function, $mt_table_partition_expression_with_function, $big_table, $big_destination_max_bytes, $big_destination_max_rows" +echo "---- Test ALIAS columns export" +query "CREATE TABLE $mt_alias (a UInt32, arr Array(UInt64), arr_1 UInt64 ALIAS arr[1]) ENGINE = MergeTree() PARTITION BY a ORDER BY (a, arr[1]) SETTINGS index_granularity = 1" +query "CREATE TABLE $s3_alias_export (a UInt32, arr Array(UInt64), arr_1 UInt64) ENGINE = S3(s3_conn, filename='$s3_alias_export', format=Parquet, partition_strategy='hive') PARTITION BY a" + +query "INSERT INTO $mt_alias VALUES (1, [1, 2, 3]), (1, [10, 20, 30])" + +alias_part=$(query "SELECT name FROM system.parts WHERE database = currentDatabase() AND table = '$mt_alias' AND partition_id = '1' AND active = 1 ORDER BY name LIMIT 1" | tr -d '\n') + +query "ALTER TABLE $mt_alias EXPORT PART '$alias_part' TO TABLE $s3_alias_export SETTINGS allow_experimental_export_merge_tree_part = 1" + +sleep 3 + +echo "---- Verify ALIAS column data in source table (arr_1 computed from arr[1])" +query "SELECT a, arr, arr_1 FROM $mt_alias ORDER BY arr" + +echo "---- Verify ALIAS column data exported to S3 (should match source)" +query "SELECT a, arr, arr_1 FROM $s3_alias_export ORDER BY arr" + +echo "---- Test MATERIALIZED columns export" +query "CREATE TABLE $mt_materialized (a UInt32, arr Array(UInt64), arr_1 UInt64 MATERIALIZED arr[1]) ENGINE = MergeTree() PARTITION BY a ORDER BY (a, arr_1) SETTINGS index_granularity = 1" +query "CREATE TABLE $s3_materialized_export (a UInt32, arr Array(UInt64), arr_1 UInt64) ENGINE = S3(s3_conn, filename='$s3_materialized_export', format=Parquet, partition_strategy='hive') PARTITION BY a" + +query "INSERT INTO $mt_materialized VALUES (1, [1, 2, 3]), (1, [10, 20, 30])" + +materialized_part=$(query "SELECT name FROM system.parts WHERE database = currentDatabase() AND table = '$mt_materialized' AND partition_id = '1' AND active = 1 ORDER BY name LIMIT 1" | tr -d '\n') + +query "ALTER TABLE $mt_materialized EXPORT PART '$materialized_part' TO TABLE $s3_materialized_export SETTINGS allow_experimental_export_merge_tree_part = 1" + +sleep 3 + +echo "---- Verify MATERIALIZED column data in source table (arr_1 computed from arr[1])" +query "SELECT a, arr, arr_1 FROM $mt_materialized ORDER BY arr" + +echo "---- Verify MATERIALIZED column data exported to S3 (should match source)" +query "SELECT a, arr, arr_1 FROM $s3_materialized_export ORDER BY arr" + +query "DROP TABLE IF EXISTS $mt_table, $s3_table, $mt_table_roundtrip, $s3_table_wildcard, $s3_table_wildcard_partition_expression_with_function, $mt_table_partition_expression_with_function, $big_table, $big_destination_max_bytes, $big_destination_max_rows, $mt_alias, $mt_materialized, $s3_alias_export, $s3_materialized_export"