-
Notifications
You must be signed in to change notification settings - Fork 12
export part - skip or throw on pending mutations and patch parts #1294
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: antalya-25.8
Are you sure you want to change the base?
Changes from all commits
28e1246
dd10dda
506a8ab
8254d0b
87bf725
d4f1609
4c59e82
d1d1f86
8eaadcf
3ee9ce3
6b6be9e
fd3dee8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -217,6 +217,9 @@ namespace Setting | |
| extern const SettingsMergeTreePartExportFileAlreadyExistsPolicy export_merge_tree_part_file_already_exists_policy; | ||
| extern const SettingsBool output_format_parallel_formatting; | ||
| extern const SettingsBool output_format_parquet_parallel_encoding; | ||
| extern const SettingsBool export_merge_tree_part_throw_on_pending_mutations; | ||
| extern const SettingsBool export_merge_tree_part_throw_on_pending_patch_parts; | ||
| extern const SettingsBool export_merge_tree_part_allow_outdated_parts; | ||
| } | ||
|
|
||
| namespace MergeTreeSetting | ||
|
|
@@ -336,6 +339,7 @@ namespace ErrorCodes | |
| extern const int TOO_LARGE_LIGHTWEIGHT_UPDATES; | ||
| extern const int UNKNOWN_TABLE; | ||
| extern const int FILE_ALREADY_EXISTS; | ||
| extern const int PENDING_MUTATIONS_NOT_ALLOWED; | ||
| } | ||
|
|
||
| static void checkSuspiciousIndices(const ASTFunction * index_function) | ||
|
|
@@ -6254,6 +6258,45 @@ void MergeTreeData::exportPartToTable( | |
| throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "No such data part '{}' to export in table '{}'", | ||
| part_name, getStorageID().getFullTableName()); | ||
|
|
||
| const bool allow_outdated_parts = query_context->getSettingsRef()[Setting::export_merge_tree_part_allow_outdated_parts]; | ||
|
|
||
| if (part->getState() == MergeTreeDataPartState::Outdated && !allow_outdated_parts) | ||
| throw Exception( | ||
| ErrorCodes::BAD_ARGUMENTS, | ||
| "Part {} is in the outdated state and cannot be exported. Set `export_merge_tree_part_allow_outdated_parts` to true to allow exporting outdated parts", | ||
| part_name); | ||
|
|
||
| const bool throw_on_pending_mutations = query_context->getSettingsRef()[Setting::export_merge_tree_part_throw_on_pending_mutations]; | ||
| const bool throw_on_pending_patch_parts = query_context->getSettingsRef()[Setting::export_merge_tree_part_throw_on_pending_patch_parts]; | ||
|
|
||
| MergeTreeData::IMutationsSnapshot::Params mutations_snapshot_params | ||
| { | ||
| .metadata_version = source_metadata_ptr->getMetadataVersion(), | ||
| .min_part_metadata_version = part->getMetadataVersion(), | ||
| .need_data_mutations = throw_on_pending_mutations, | ||
| .need_alter_mutations = throw_on_pending_mutations || throw_on_pending_patch_parts, | ||
| .need_patch_parts = throw_on_pending_patch_parts, | ||
| }; | ||
|
|
||
| const auto mutations_snapshot = getMutationsSnapshot(mutations_snapshot_params); | ||
|
|
||
| const auto alter_conversions = getAlterConversionsForPart(part, mutations_snapshot, query_context); | ||
|
|
||
| /// re-check `throw_on_pending_mutations` because `pending_mutations` might have been filled due to `throw_on_pending_patch_parts` | ||
| if (throw_on_pending_mutations && alter_conversions->hasMutations()) | ||
| { | ||
| throw Exception(ErrorCodes::PENDING_MUTATIONS_NOT_ALLOWED, | ||
| "Part {} can not be exported because there are pending mutations. Either wait for the mutations to be applied or set `export_merge_tree_part_throw_on_pending_mutations` to false", | ||
| part_name); | ||
| } | ||
|
|
||
| if (alter_conversions->hasPatches()) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Need to check
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No because of #1294 (comment)
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can add the check if you think it is more clear, but it is not needed at all |
||
| { | ||
| throw Exception(ErrorCodes::PENDING_MUTATIONS_NOT_ALLOWED, | ||
| "Part {} can not be exported because there are pending patch parts. Either wait for the patch parts to be applied or set `export_merge_tree_part_throw_on_pending_patch_parts` to false", | ||
| part_name); | ||
| } | ||
|
|
||
| { | ||
| const auto format_settings = getFormatSettings(query_context); | ||
| MergeTreePartExportManifest manifest( | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -202,6 +202,8 @@ namespace Setting | |
| extern const SettingsMergeTreePartExportFileAlreadyExistsPolicy export_merge_tree_part_file_already_exists_policy; | ||
| extern const SettingsUInt64 export_merge_tree_part_max_bytes_per_file; | ||
| extern const SettingsUInt64 export_merge_tree_part_max_rows_per_file; | ||
| extern const SettingsBool export_merge_tree_part_throw_on_pending_mutations; | ||
| extern const SettingsBool export_merge_tree_part_throw_on_pending_patch_parts; | ||
| } | ||
|
|
||
| namespace MergeTreeSetting | ||
|
|
@@ -305,6 +307,7 @@ namespace ErrorCodes | |
| extern const int CANNOT_FORGET_PARTITION; | ||
| extern const int TIMEOUT_EXCEEDED; | ||
| extern const int INVALID_SETTING_VALUE; | ||
| extern const int PENDING_MUTATIONS_NOT_ALLOWED; | ||
| } | ||
|
|
||
| namespace ServerSetting | ||
|
|
@@ -8192,18 +8195,54 @@ void StorageReplicatedMergeTree::exportPartitionToTable(const PartitionCommand & | |
|
|
||
| ops.emplace_back(zkutil::makeCreateRequest(partition_exports_path, "", zkutil::CreateMode::Persistent)); | ||
|
|
||
| auto data_parts_lock = lockParts(); | ||
| DataPartsVector parts; | ||
|
|
||
| const auto parts = getDataPartsVectorInPartitionForInternalUsage(MergeTreeDataPartState::Active, partition_id, &data_parts_lock); | ||
| { | ||
| auto data_parts_lock = lockParts(); | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Either |
||
| parts = getDataPartsVectorInPartitionForInternalUsage(MergeTreeDataPartState::Active, partition_id, &data_parts_lock); | ||
| } | ||
|
|
||
| if (parts.empty()) | ||
| { | ||
| throw Exception(ErrorCodes::BAD_ARGUMENTS, "Partition {} doesn't exist", partition_id); | ||
| } | ||
|
|
||
| const bool throw_on_pending_mutations = query_context->getSettingsRef()[Setting::export_merge_tree_part_throw_on_pending_mutations]; | ||
| const bool throw_on_pending_patch_parts = query_context->getSettingsRef()[Setting::export_merge_tree_part_throw_on_pending_patch_parts]; | ||
|
|
||
| MergeTreeData::IMutationsSnapshot::Params mutations_snapshot_params | ||
| { | ||
| .metadata_version = getInMemoryMetadataPtr()->getMetadataVersion(), | ||
| .min_part_metadata_version = MergeTreeData::getMinMetadataVersion(parts), | ||
| .need_data_mutations = throw_on_pending_mutations, | ||
| .need_alter_mutations = throw_on_pending_mutations || throw_on_pending_patch_parts, | ||
| .need_patch_parts = throw_on_pending_patch_parts, | ||
| }; | ||
|
|
||
| const auto mutations_snapshot = getMutationsSnapshot(mutations_snapshot_params); | ||
|
|
||
| std::vector<String> part_names; | ||
| for (const auto & part : parts) | ||
| { | ||
| const auto alter_conversions = getAlterConversionsForPart(part, mutations_snapshot, query_context); | ||
|
|
||
| /// re-check `throw_on_pending_mutations` because `pending_mutations` might have been filled due to `throw_on_pending_patch_parts` | ||
| if (alter_conversions->hasMutations() && throw_on_pending_mutations) | ||
| { | ||
| throw Exception(ErrorCodes::PENDING_MUTATIONS_NOT_ALLOWED, | ||
| "Partition {} can not be exported because the part {} has pending mutations. Either wait for the mutations to be applied or set `export_merge_tree_part_throw_on_pending_mutations` to false", | ||
| partition_id, | ||
| part->name); | ||
| } | ||
|
|
||
| if (alter_conversions->hasPatches()) | ||
| { | ||
| throw Exception(ErrorCodes::PENDING_MUTATIONS_NOT_ALLOWED, | ||
| "Partition {} can not be exported because the part {} has pending patch parts. Either wait for the patch parts to be applied or set `export_merge_tree_part_throw_on_pending_patch_parts` to false", | ||
| partition_id, | ||
| part->name); | ||
| } | ||
|
|
||
| part_names.push_back(part->name); | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does ClickHouse throw some exception when this flag is set? made only fast check, but looks like only some parts are skipped.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It should not, at least in this context. The code you are reviewing is the code that process the export query request, not the code that actually exports the data. The
mutations_snapshot_paramsis used to createmutations_snapshotandalter_conversions. None is used for anything but to check if there are pending mutations within this function.I did not understand this, can you elaborate?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unclean. why flags "need_something" are filled with values from "throw_on_some_condition".
When I see "throw_on_some_condition", I expect that setting causes exception, but here it changes other behavior.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
TLDR: To throw, I need to know if it exists. To know if it exists, I need to set it to true.
Ah, I see. The
need_somethingflags dictacte if the mutation snapshot contains the mutations / patch parts and etc. If I hard code it to false, the snapshot will never include information about pending mutations / patch parts. In this case, I will not be able to check if they exist and will not be able to throw.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It does throw, look at https://github.com/Altinity/ClickHouse/pull/1294/changes#diff-b7d33a7abda5f3365d5e4fb58ca98b756f6401372b2e2d5eb449a2bd3444fb5eR6286 and https://github.com/Altinity/ClickHouse/pull/1294/changes#diff-b7d33a7abda5f3365d5e4fb58ca98b756f6401372b2e2d5eb449a2bd3444fb5eR6293
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see that it throw exception. I mean, setting does something more than only throw exception.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What else does it do in this case? I don't think so.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I say about naming, not about code logic :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Setting
export_merge_tree_part_throw_on_pending_mutationschange value ofneed_data_mutationsfield inmutations_snapshot_params.need_data_mutationschanges behavior. So setting name is confusing, something like "export_merge_tree_part_do_not_export_on_pending_mutations" (I'm not good in naming) is more clean.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not in this case. Like I said here, in this code snippet the only effect of setting
need_data_mutationsis that the snapshot and alter conversions object will contain information about mutations.And if it does, we throw. Because the only reason for that object to have mutations is: 1. there are pending mutations; 2. the user set
throw = true