From 0bd619b934d34489f001fdc72dbe456824d24542 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Tue, 24 Feb 2026 13:33:10 +0100 Subject: [PATCH 01/32] Analyzer: fix __aliasMarker Signed-off-by: Mikhail Filimonov --- src/Analyzer/Utils.cpp | 170 +++++++++++++++ src/Analyzer/Utils.h | 9 + .../createUniqueAliasesIfNecessary.cpp | 1 + src/Functions/identity.cpp | 2 +- src/Functions/identity.h | 26 ++- src/Planner/PlannerActionsVisitor.cpp | 36 ++-- src/Planner/Utils.cpp | 51 +---- src/Storages/StorageDistributed.cpp | 57 +++-- src/Storages/buildQueryTreeForShard.cpp | 11 +- ...lias_marker_with_mergeable_state.reference | 2 + ...03648_alias_marker_with_mergeable_state.sh | 21 +- .../03842_hybrid_alias_issue_1424.reference | 42 ++++ .../03842_hybrid_alias_issue_1424.sql | 202 ++++++++++++++++++ ...istributed_alias_same_expression.reference | 7 + ...3843_distributed_alias_same_expression.sql | 44 ++++ ..._distributed_nested_alias_marker.reference | 4 + .../03844_distributed_nested_alias_marker.sql | 34 +++ ...buted_global_in_join_alias_chain.reference | 8 + ...distributed_global_in_join_alias_chain.sql | 34 +++ ...global_in_alias_marker_collision.reference | 2 + ...buted_global_in_alias_marker_collision.sql | 56 +++++ 21 files changed, 722 insertions(+), 97 deletions(-) create mode 100644 tests/queries/0_stateless/03842_hybrid_alias_issue_1424.reference create mode 100644 tests/queries/0_stateless/03842_hybrid_alias_issue_1424.sql create mode 100644 tests/queries/0_stateless/03843_distributed_alias_same_expression.reference create mode 100644 tests/queries/0_stateless/03843_distributed_alias_same_expression.sql create mode 100644 tests/queries/0_stateless/03844_distributed_nested_alias_marker.reference create mode 100644 tests/queries/0_stateless/03844_distributed_nested_alias_marker.sql create mode 100644 tests/queries/0_stateless/03845_distributed_global_in_join_alias_chain.reference create mode 100644 tests/queries/0_stateless/03845_distributed_global_in_join_alias_chain.sql create mode 100644 tests/queries/0_stateless/03846_distributed_global_in_alias_marker_collision.reference create mode 100644 tests/queries/0_stateless/03846_distributed_global_in_alias_marker_collision.sql diff --git a/src/Analyzer/Utils.cpp b/src/Analyzer/Utils.cpp index 7b1193e92a28..72245f974ff7 100644 --- a/src/Analyzer/Utils.cpp +++ b/src/Analyzer/Utils.cpp @@ -49,6 +49,8 @@ #include +#include + #include namespace DB { @@ -979,6 +981,174 @@ void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const Stri function_node.resolveAsAggregateFunction(std::move(aggregate_function)); } +namespace +{ + +/// `materialized_only`: +/// false = strip any marker. +/// true = strip only markers that arrived in the query already materialized (arg2 is String). +/// Markers injected in the current rewrite keep arg2 as ColumnNode until finalization. +bool stripAliasMarker(QueryTreeNodePtr & node, bool materialized_only) +{ + auto * function_node = node->as(); + if (!function_node || function_node->getFunctionName() != "__aliasMarker") + return false; + + auto & arguments = function_node->getArguments().getNodes(); + if (arguments.size() != 2 || !arguments[0] || !arguments[1]) + return false; + + if (materialized_only) + { + const auto * marker_id_node = arguments[1]->as(); + if (!marker_id_node || !isString(marker_id_node->getResultType())) + return false; + } + + auto replacement = arguments[0]; + if (!replacement->hasAlias() && function_node->hasAlias()) + replacement->setAlias(function_node->getAlias()); + + node = std::move(replacement); + return true; +} + +String buildDeterministicFallbackAliasMarkerId(const ColumnNode & marker_column_node, const QueryTreeNodePtr & marker_expression_node) +{ + IQueryTreeNode::CompareOptions compare_options + { + .compare_aliases = false, + .compare_types = false, + .ignore_cte = true, + }; + + String alias_id = marker_column_node.getColumnName(); + + if (const auto & marker_source = marker_column_node.getColumnSourceOrNull()) + { + /// Keep fallback ids deterministic and source-specific when table aliases are not available yet. + alias_id += "__src_" + getHexUIntLowercase(marker_source->getTreeHash(compare_options)); + } + + /// Add expression hash to avoid collapsing different marker payloads with the same column name. + alias_id += "__expr_" + getHexUIntLowercase(marker_expression_node->getTreeHash(compare_options)); + + return alias_id; +} + +void stripAliasMarkersFromPayloadSubtree(QueryTreeNodePtr & node) +{ + while (stripAliasMarker(node, false)) + {} + + for (auto & child : node->getChildren()) + { + if (child) + stripAliasMarkersFromPayloadSubtree(child); + } +} + +/// Finalize __aliasMarker nodes right before distributed SQL boundaries. +/// This pass strips nested markers from arg0 payload and materializes arg2 to String constant. +class FinalizeAliasMarkersForDistributedSerializationVisitor : public InDepthQueryTreeVisitor +{ +public: + explicit FinalizeAliasMarkersForDistributedSerializationVisitor(ContextPtr context_) + : context(std::move(context_)) + {} + + bool shouldTraverseTopToBottom() const + { + return false; + } + + static bool needChildVisit(const QueryTreeNodePtr & parent_node, const QueryTreeNodePtr &) + { + auto * function_node = parent_node->as(); + if (!function_node || function_node->getFunctionName() != "__aliasMarker") + return true; + + /// __aliasMarker subtrees are processed explicitly in visitImpl: + /// arg0 is recursively cleaned from nested wrappers and arg2 is materialized in place. + return false; + } + + void visitImpl(QueryTreeNodePtr & node) + { + auto * function_node = node->as(); + if (!function_node || function_node->getFunctionName() != "__aliasMarker") + return; + + auto & arguments = function_node->getArguments().getNodes(); + if (arguments.size() != 2 || !arguments[0] || !arguments[1]) + return; + + /// Remove nested marker wrappers in payload subtree; keep only this node as the boundary marker. + stripAliasMarkersFromPayloadSubtree(arguments[0]); + + String alias_id; + if (const auto * marker_column_node = arguments[1]->as()) + { + if (const auto & marker_source = marker_column_node->getColumnSourceOrNull(); + marker_source && marker_source->hasAlias()) + { + alias_id = marker_source->getAlias() + "." + marker_column_node->getColumnName(); + } + else + { + /// In some distributed subquery execution paths marker ids are materialized + /// before alias uniquification assigns source aliases. + alias_id = buildDeterministicFallbackAliasMarkerId(*marker_column_node, arguments[0]); + } + } + else if (const auto * marker_id_node = arguments[1]->as(); + marker_id_node && isString(marker_id_node->getResultType())) + { + alias_id = marker_id_node->getValue().safeGet(); + } + + if (alias_id.empty()) + return; + + arguments[1] = std::make_shared(std::move(alias_id), std::make_shared()); + resolveOrdinaryFunctionNodeByName(*function_node, "__aliasMarker", context); + } + +private: + ContextPtr context; +}; + +/// Strip incoming __aliasMarker wrappers between distributed hops. +/// This keeps marker lifecycle hop-local and avoids forwarding stale previous-hop ids. +class StripMaterializedAliasMarkersVisitor : public InDepthQueryTreeVisitor +{ +public: + bool shouldTraverseTopToBottom() const + { + return false; + } + + void visitImpl(QueryTreeNodePtr & node) + { + while (stripAliasMarker(node, true)) + {} + } +}; + +} + +void finalizeAliasMarkersForDistributedSerialization(QueryTreeNodePtr & node, const ContextPtr & context) +{ + FinalizeAliasMarkersForDistributedSerializationVisitor visitor(context); + visitor.visit(node); +} + +void stripMaterializedAliasMarkers(QueryTreeNodePtr & node) +{ + StripMaterializedAliasMarkersVisitor visitor; + visitor.visit(node); +} + std::pair getExpressionSource(const QueryTreeNodePtr & node) { if (const auto * column = node->as()) diff --git a/src/Analyzer/Utils.h b/src/Analyzer/Utils.h index 9a19af2b4e0d..0ace391dc488 100644 --- a/src/Analyzer/Utils.h +++ b/src/Analyzer/Utils.h @@ -157,6 +157,15 @@ void resolveOrdinaryFunctionNodeByName(FunctionNode & function_node, const Strin /// Arguments and parameters are taken from the node. void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const String & function_name); +/// Finalize __aliasMarker nodes before distributed SQL boundaries: +/// 1) collapse nested wrappers to keep only current-hop marker id; +/// 2) materialize marker id (arg2) to String ConstantNode. +void finalizeAliasMarkersForDistributedSerialization(QueryTreeNodePtr & node, const ContextPtr & context); + +/// Remove incoming/materialized __aliasMarker wrappers (arg2 is String ConstantNode), +/// preserving wrapped expressions. +void stripMaterializedAliasMarkers(QueryTreeNodePtr & node); + /// Returns single source of expression node. /// First element of pair is source node, can be nullptr if there are no sources or multiple sources. /// Second element of pair is true if there is at most one source, false if there are multiple sources. diff --git a/src/Analyzer/createUniqueAliasesIfNecessary.cpp b/src/Analyzer/createUniqueAliasesIfNecessary.cpp index 2846eb28443a..1235f865b170 100644 --- a/src/Analyzer/createUniqueAliasesIfNecessary.cpp +++ b/src/Analyzer/createUniqueAliasesIfNecessary.cpp @@ -229,6 +229,7 @@ void createUniqueAliasesIfNecessary(QueryTreeNodePtr & node, const ContextPtr & * It's required to create a valid AST for distributed query. */ CreateUniqueArrayJoinAliasesVisitor(context).visit(node); + } } diff --git a/src/Functions/identity.cpp b/src/Functions/identity.cpp index 05d2ef870601..3e1c2903a3f9 100644 --- a/src/Functions/identity.cpp +++ b/src/Functions/identity.cpp @@ -38,7 +38,7 @@ REGISTER_FUNCTION(AliasMarker) { factory.registerFunction(FunctionDocumentation{ .description = R"( -Internal function that marks ALIAS column expressions for the analyzer. Not intended for direct use. +Internal function. Not for direct use. )", .syntax = {"__aliasMarker(expr, alias_name)"}, .arguments = { diff --git a/src/Functions/identity.h b/src/Functions/identity.h index f74b92280f65..ed2886fcc034 100644 --- a/src/Functions/identity.h +++ b/src/Functions/identity.h @@ -108,6 +108,27 @@ struct AliasMarkerName static constexpr auto name = "__aliasMarker"; }; +/** + * __aliasMarker is a transport-time alias preservation hint for distributed SQL paths. + * + * Why it exists: + * - When a distributed query is planned and mergeable-state flows are used, the final aliasing step + * is intentionally skipped. + * - That is desired, but it also prevents preserving/injecting initiator-side expression names + * (for example, names coming from ALIAS columns or certain CAST expressions). + * - This becomes especially problematic when shard schemas differ slightly. + * - Some injected alias columns must preserve a specific output name; otherwise remote headers may diverge + * from initiator expectations (header mismatch, wrong column association, and similar inconsistencies). + * + * Lifecycle/invariants: + * 1) Injected only around rewritten alias expressions that require stable output identity. + * 2) Materialized before SQL serialization: the marker id is converted to a String alias identifier. + * 3) Consumed by analyzer/planner on receiver to enforce alias naming in actions. + * 4) Removed/stripped before forwarding to the next hop, then (if needed) re-injected for that hop only. + * + * This is a temporary bridge while distributed plan transport still relies on SQL text in these paths. + * As query plan serialization fully replaces that boundary, this marker path should become unnecessary. + */ class FunctionAliasMarker : public IFunction { public: @@ -116,7 +137,7 @@ class FunctionAliasMarker : public IFunction String getName() const override { return name; } size_t getNumberOfArguments() const override { return 2; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {}; } bool isSuitableForConstantFolding() const override { return false; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } @@ -125,9 +146,6 @@ class FunctionAliasMarker : public IFunction if (arguments.size() != 2) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function __aliasMarker expects 2 arguments"); - if (!WhichDataType(arguments[1]).isString()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function __aliasMarker is internal and should not be used directly"); - return arguments.front(); } diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 7636b41d06a9..4c7ee14cb082 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -31,6 +31,7 @@ #include #include +#include #include #include @@ -93,6 +94,17 @@ String calculateActionNodeNameWithCastIfNeeded(const ConstantNode & constant_nod return buffer.str(); } +String tryExtractAliasMarkerIdFromSecondArgument(const QueryTreeNodePtr & argument) +{ + if (const auto * second_argument_constant = argument->as(); + second_argument_constant && isString(second_argument_constant->getResultType())) + { + return second_argument_constant->getValue().safeGet(); + } + + return {}; +} + class ActionNodeNameHelper { public: @@ -189,14 +201,12 @@ class ActionNodeNameHelper { /// Perform sanity check, because user may call this function with unexpected arguments const auto & function_argument_nodes = function_node.getArguments().getNodes(); - if (function_argument_nodes.size() == 2) - { - if (const auto * second_argument = function_argument_nodes.at(1)->as()) - { - if (isString(second_argument->getResultType())) - result = second_argument->getValue().safeGet(); - } - } + if (function_argument_nodes.size() != 2) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function __aliasMarker expects 2 arguments"); + + result = tryExtractAliasMarkerIdFromSecondArgument(function_argument_nodes.at(1)); + if (result.empty()) + result = calculateActionNodeName(function_argument_nodes.at(0)); /// Empty node name is not allowed and leads to logical errors if (result.empty()) @@ -1139,15 +1149,11 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi if (function_arguments.size() != 2) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function __aliasMarker expects 2 arguments"); - const auto * alias_id_node = function_arguments.at(1)->as(); - if (!alias_id_node || !isString(alias_id_node->getResultType())) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function __aliasMarker is internal and should not be used directly"); - - const auto & alias_id = alias_id_node->getValue().safeGet(); + auto [child_name, levels] = visitImpl(function_arguments.at(0)); + auto alias_id = tryExtractAliasMarkerIdFromSecondArgument(function_arguments.at(1)); if (alias_id.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function __aliasMarker is internal and should not be used directly"); + alias_id = child_name; - auto [child_name, levels] = visitImpl(function_arguments.at(0)); if (alias_id == child_name) return {child_name, levels}; diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp index 5357e3d93dba..660a33ed5d7e 100644 --- a/src/Planner/Utils.cpp +++ b/src/Planner/Utils.cpp @@ -199,62 +199,13 @@ ASTPtr queryNodeToSelectQuery(const QueryTreeNodePtr & query_node, bool set_subq return result_ast; } -namespace -{ -class NormalizeAliasMarkerVisitor : public InDepthQueryTreeVisitor -{ -public: - void visitImpl(QueryTreeNodePtr & node) - { - auto * function_node = node->as(); - if (!function_node || function_node->getFunctionName() != "__aliasMarker") - return; - - auto & arguments = function_node->getArguments().getNodes(); - if (arguments.size() != 2) - return; - - while (true) - { - auto * inner_function = arguments.front()->as(); - if (!inner_function || inner_function->getFunctionName() != "__aliasMarker") - break; - - auto & inner_arguments = inner_function->getArguments().getNodes(); - if (inner_arguments.size() != 2) - break; - - arguments.front() = inner_arguments.front(); - } - } - - bool needChildVisit(QueryTreeNodePtr & parent, QueryTreeNodePtr & child) - { - auto * parent_function = parent->as(); - if (parent_function && parent_function->getFunctionName() == "__aliasMarker") - return false; - - auto child_node_type = child->getNodeType(); - return !(child_node_type == QueryTreeNodeType::QUERY || child_node_type == QueryTreeNodeType::UNION); - } -}; - -void normalizeAliasMarkersInQueryTree(QueryTreeNodePtr & node) -{ - NormalizeAliasMarkerVisitor visitor; - visitor.visit(node); -} -} - ASTPtr queryNodeToDistributedSelectQuery(const QueryTreeNodePtr & query_node) { /// Remove CTEs information from distributed queries. /// Now, if cte_name is set for subquery node, AST -> String serialization will only print cte name. /// But CTE is defined only for top-level query part, so may not be sent. /// Removing cte_name forces subquery to be always printed. - auto query_node_to_convert = query_node->clone(); - normalizeAliasMarkersInQueryTree(query_node_to_convert); - auto ast = queryNodeToSelectQuery(query_node_to_convert, /*set_subquery_cte_name=*/false); + auto ast = queryNodeToSelectQuery(query_node, /*set_subquery_cte_name=*/false); return ast; } diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 6566aabdad97..5370a852fae1 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -55,7 +55,6 @@ #include #include -#include #include #include #include @@ -212,6 +211,7 @@ namespace Setting extern const SettingsUInt64 allow_experimental_parallel_reading_from_replicas; extern const SettingsBool prefer_global_in_and_join; extern const SettingsBool skip_unavailable_shards; + extern const SettingsBool serialize_query_plan; extern const SettingsBool enable_global_with_statement; extern const SettingsBool allow_experimental_hybrid_table; extern const SettingsBool enable_alias_marker; @@ -840,6 +840,8 @@ StorageSnapshotPtr StorageDistributed::getStorageSnapshot(const StorageMetadataP namespace { +/// Rebuild alias ColumnNode references into expression nodes and optionally +/// wrap them with __aliasMarker for distributed SQL transport. class ReplaseAliasColumnsVisitor : public InDepthQueryTreeVisitor { QueryTreeNodePtr getColumnNodeAliasExpression(const QueryTreeNodePtr & node) const @@ -855,40 +857,39 @@ class ReplaseAliasColumnsVisitor : public InDepthQueryTreeVisitorgetExpression(); - const auto & column_name = column_node->getColumnName(); + const String original_expression_alias = column_expression->hasAlias() ? column_expression->getAlias() : String{}; - if (!context->getSettingsRef()[Setting::enable_alias_marker]) + const auto & settings = context->getSettingsRef(); + /// With serialized query plans we transfer actions directly and do not need SQL-only alias markers. + const bool use_alias_marker = settings[Setting::enable_alias_marker] && !settings[Setting::serialize_query_plan]; + if (!use_alias_marker) { - column_expression->setAlias(column_name); return column_expression; } - String alias_id; - const auto & source_alias = column_source->getAlias(); - if (!source_alias.empty()) - alias_id = source_alias + "." + column_name; - else - alias_id = column_name; - if (auto * function_node = column_expression->as(); function_node && function_node->getFunctionName() == "__aliasMarker") { auto & arguments = function_node->getArguments().getNodes(); - if (arguments.size() == 2) - arguments[1] = std::make_shared(alias_id, std::make_shared()); - - column_expression->setAlias(column_name); - return column_expression; + if (!arguments.empty() && arguments[0]) + column_expression = arguments[0]; } QueryTreeNodes arguments; arguments.reserve(2); + /// Preserve the original column reference in arg2 so normal analyzer passes + /// (alias/source uniquification) can still transform it consistently. + /// Before query is sent to shard this ColumnNode is materialized to String ConstantNode. arguments.emplace_back(std::move(column_expression)); - arguments.emplace_back(std::make_shared(alias_id, std::make_shared())); + arguments.emplace_back(std::make_shared(column_node->getColumn(), column_source)); auto alias_marker_node = std::make_shared("__aliasMarker"); alias_marker_node->getArguments().getNodes() = std::move(arguments); - alias_marker_node->setAlias(column_name); + if (!original_expression_alias.empty()) + { + alias_marker_node->getArguments().getNodes()[0]->removeAlias(); + alias_marker_node->setAlias(original_expression_alias); + } resolveOrdinaryFunctionNodeByName(*alias_marker_node, "__aliasMarker", context); return alias_marker_node; @@ -903,6 +904,22 @@ class ReplaseAliasColumnsVisitor : public InDepthQueryTreeVisitoras(); + if (!function_node || function_node->getFunctionName() != "__aliasMarker") + return true; + + const auto & arguments = function_node->getArguments().getNodes(); + if (arguments.size() < 2) + return true; + + /// Do not recurse into __aliasMarker arg2. + /// It is an internal column-reference payload used only for later id materialization, + /// and visiting it here can re-expand aliases or create recursive rewrites. + return child_node.get() != arguments[1].get(); + } + private: ContextPtr context; }; @@ -1215,7 +1232,9 @@ QueryTreeNodePtr buildQueryTreeDistributed(SelectQueryInfo & query_info, rewriteJoinToGlobalJoinIfNeeded(query_node.getJoinTree()); } - return buildQueryTreeForShard(query_info.planner_context, query_tree_to_modify, /*allow_global_join_for_right_table*/ false); + auto shard_query_tree = buildQueryTreeForShard(query_info.planner_context, query_tree_to_modify, /*allow_global_join_for_right_table*/ false); + finalizeAliasMarkersForDistributedSerialization(shard_query_tree, query_context); + return shard_query_tree; } diff --git a/src/Storages/buildQueryTreeForShard.cpp b/src/Storages/buildQueryTreeForShard.cpp index efb7d426b4fe..a26eef9a0d95 100644 --- a/src/Storages/buildQueryTreeForShard.cpp +++ b/src/Storages/buildQueryTreeForShard.cpp @@ -404,7 +404,10 @@ TableNodePtr executeSubqueryNode(const QueryTreeNodePtr & subquery_node, ContextMutablePtr & mutable_context, size_t subquery_depth) { - const auto subquery_hash = subquery_node->getTreeHash(); + auto subquery_node_to_execute = subquery_node->clone(); + finalizeAliasMarkersForDistributedSerialization(subquery_node_to_execute, mutable_context); + + const auto subquery_hash = subquery_node_to_execute->getTreeHash(); const auto temporary_table_name = fmt::format("_data_{}", toString(subquery_hash)); const auto & external_tables = mutable_context->getExternalTables(); @@ -422,7 +425,7 @@ TableNodePtr executeSubqueryNode(const QueryTreeNodePtr & subquery_node, auto context_copy = Context::createCopy(mutable_context); updateContextForSubqueryExecution(context_copy); - InterpreterSelectQueryAnalyzer interpreter(subquery_node, context_copy, subquery_options); + InterpreterSelectQueryAnalyzer interpreter(subquery_node_to_execute, context_copy, subquery_options); auto & query_plan = interpreter.getQueryPlan(); auto sample_block_with_unique_names = *query_plan.getCurrentHeader(); @@ -560,6 +563,10 @@ QueryTreeNodePtr getSubqueryFromTableExpression( QueryTreeNodePtr buildQueryTreeForShard(const PlannerContextPtr & planner_context, QueryTreeNodePtr query_tree_to_modify, bool allow_global_join_for_right_table) { + /// Incoming materialized markers are hop-local metadata. + /// Strip them before this node prepares/executes subqueries for the next hop. + stripMaterializedAliasMarkers(query_tree_to_modify); + CollectColumnSourceToColumnsVisitor collect_column_source_to_columns_visitor; collect_column_source_to_columns_visitor.visit(query_tree_to_modify); diff --git a/tests/queries/0_stateless/03648_alias_marker_with_mergeable_state.reference b/tests/queries/0_stateless/03648_alias_marker_with_mergeable_state.reference index 58bf6a7ec74b..5f061a829b23 100644 --- a/tests/queries/0_stateless/03648_alias_marker_with_mergeable_state.reference +++ b/tests/queries/0_stateless/03648_alias_marker_with_mergeable_state.reference @@ -2,6 +2,8 @@ Header: sum(foo) AggregateFunction(sum, Int64) ---- stage: with_mergeable_state (analyzer=0) ---- Expected error: Function __aliasMarker is internal and supported only with the analyzer +---- explicit __aliasMarker in user query (analyzer=1) ---- +Explicit __aliasMarker call is allowed ---- stage: complete (analyzer=1) ---- Header: x Int64 ---- stage: fetch_columns (analyzer=1) ---- diff --git a/tests/queries/0_stateless/03648_alias_marker_with_mergeable_state.sh b/tests/queries/0_stateless/03648_alias_marker_with_mergeable_state.sh index 66974be38517..fb0580e796f0 100755 --- a/tests/queries/0_stateless/03648_alias_marker_with_mergeable_state.sh +++ b/tests/queries/0_stateless/03648_alias_marker_with_mergeable_state.sh @@ -5,7 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh echo "---- stage: with_mergeable_state (analyzer=1, setting=enable_alias_marker=1) ----" -$CLICKHOUSE_CLIENT --enable_analyzer=1 --stage with_mergeable_state --multiquery 2>&1 <<'EOF' | sed -n '/^Header:/,/^ [^ ]/p' | sed '$d' +$CLICKHOUSE_CLIENT --enable_analyzer=1 --query_kind secondary_query --stage with_mergeable_state --multiquery 2>&1 <<'EOF' | sed -n '/^Header:/,/^ [^ ]/p' | sed '$d' SET enable_alias_marker=1; EXPLAIN header=1 SELECT sum(__aliasMarker(number*2-3,'foo')) AS x @@ -22,27 +22,36 @@ else echo "${alias_marker_error_output}" fi +echo "---- explicit __aliasMarker in user query (analyzer=1) ----" +if $CLICKHOUSE_CLIENT --enable_analyzer=1 --query \ + "SELECT __aliasMarker(number*2-3,'foo') FROM numbers(1)" >/dev/null 2>&1 +then + echo "Explicit __aliasMarker call is allowed" +else + echo "Unexpected error for explicit __aliasMarker call" +fi + echo "---- stage: complete (analyzer=1) ----" -$CLICKHOUSE_CLIENT --enable_analyzer=1 --stage complete --query \ +$CLICKHOUSE_CLIENT --enable_analyzer=1 --query_kind secondary_query --stage complete --query \ "EXPLAIN header=1 SELECT sum(__aliasMarker(number*2-3,'foo')) AS x FROM numbers(10)" \ 2>&1 | sed -n '/^Header:/,/^ [^ ]/p' | sed '$d' echo "---- stage: fetch_columns (analyzer=1) ----" -$CLICKHOUSE_CLIENT --enable_analyzer=1 --stage fetch_columns --query \ +$CLICKHOUSE_CLIENT --enable_analyzer=1 --query_kind secondary_query --stage fetch_columns --query \ "EXPLAIN header=1 SELECT sum(__aliasMarker(number*2-3,'foo')) AS x FROM numbers(10)" \ 2>&1 | sed -n '/^Header:/,/^ [^ ]/p' | sed '$d' echo "---- stage: with_mergeable_state (analyzer=1) ----" -$CLICKHOUSE_CLIENT --enable_analyzer=1 --stage with_mergeable_state --query \ +$CLICKHOUSE_CLIENT --enable_analyzer=1 --query_kind secondary_query --stage with_mergeable_state --query \ "EXPLAIN header=1 SELECT sum(__aliasMarker(number*2-3,'foo')) AS x FROM numbers(10)" \ 2>&1 | sed -n '/^Header:/,/^ [^ ]/p' | sed '$d' echo "---- stage: with_mergeable_state_after_aggregation (analyzer=1) ----" -$CLICKHOUSE_CLIENT --enable_analyzer=1 --stage with_mergeable_state_after_aggregation --query \ +$CLICKHOUSE_CLIENT --enable_analyzer=1 --query_kind secondary_query --stage with_mergeable_state_after_aggregation --query \ "EXPLAIN header=1 SELECT sum(__aliasMarker(number*2-3,'foo')) AS x FROM numbers(10)" \ 2>&1 | sed -n '/^Header:/,/^ [^ ]/p' | sed '$d' echo "---- stage: with_mergeable_state_after_aggregation_and_limit (analyzer=1) ----" -$CLICKHOUSE_CLIENT --enable_analyzer=1 --stage with_mergeable_state_after_aggregation_and_limit --query \ +$CLICKHOUSE_CLIENT --enable_analyzer=1 --query_kind secondary_query --stage with_mergeable_state_after_aggregation_and_limit --query \ "EXPLAIN header=1 SELECT sum(__aliasMarker(number*2-3,'foo')) AS x FROM numbers(10) GROUP BY intDiv(number,10) AS y ORDER BY y LIMIT 10" \ 2>&1 | sed -n '/^Header:/,/^ [^ ]/p' | sed '$d' diff --git a/tests/queries/0_stateless/03842_hybrid_alias_issue_1424.reference b/tests/queries/0_stateless/03842_hybrid_alias_issue_1424.reference new file mode 100644 index 000000000000..6f78da4c4f59 --- /dev/null +++ b/tests/queries/0_stateless/03842_hybrid_alias_issue_1424.reference @@ -0,0 +1,42 @@ +max in subquery +4294967294 +sum in subquery +-4921211434 +cte min with predicate +679772422 +cte with limit +-2147483648 -4294967296 +-1762862292 -574613778 +-1329695183 -1573638336 +-221724287 679772422 +0 0 +550067609 -3048000734 +1084637461 3417479706 +1169291374 -3082049462 +1899628504 -740161250 +2147483647 4294967294 +cte without limit +-2147483648 -4294967296 +-1762862292 -574613778 +-1329695183 -1573638336 +-221724287 679772422 +0 0 +550067609 -3048000734 +1084637461 3417479706 +1169291374 -3082049462 +1899628504 -740161250 +2147483647 4294967294 +group by in subquery +10 10 +intersect with order by +-221724287 679772422 +1084637461 3417479706 +2147483647 4294967294 +intersect without order by +-221724287 679772422 +1084637461 3417479706 +2147483647 4294967294 +constant alias in subquery +9 7 32 +constant alias predicate +2 diff --git a/tests/queries/0_stateless/03842_hybrid_alias_issue_1424.sql b/tests/queries/0_stateless/03842_hybrid_alias_issue_1424.sql new file mode 100644 index 000000000000..3d2f527eb534 --- /dev/null +++ b/tests/queries/0_stateless/03842_hybrid_alias_issue_1424.sql @@ -0,0 +1,202 @@ +SET allow_experimental_hybrid_table = 1, enable_analyzer = 1; + +DROP TABLE IF EXISTS test_hybrid_issue_1424; +DROP TABLE IF EXISTS test_hybrid_issue_1424_left; +DROP TABLE IF EXISTS test_hybrid_issue_1424_right; +DROP TABLE IF EXISTS test_hybrid_issue_1424_const; +DROP TABLE IF EXISTS test_hybrid_issue_1424_const_left; +DROP TABLE IF EXISTS test_hybrid_issue_1424_const_right; + +CREATE TABLE test_hybrid_issue_1424_left +( + id Int32, + value Int32, + date_col Date, + computed ALIAS value * 2 +) +ENGINE = MergeTree +PARTITION BY toYYYYMM(date_col) +ORDER BY (date_col, id); + +INSERT INTO test_hybrid_issue_1424_left VALUES + (toInt32(2147483647), toInt32(2147483647), toDate('2149-06-06')), + (toInt32(-2147483648), toInt32(-2147483648), toDate('1970-01-01')), + (toInt32(0), toInt32(0), '1970-01-01'), + (toInt32(1084637461), toInt32(1708739853), toDate(1335613783)), + (toInt32(-221724287), toInt32(339886211), toDate(1294089763)), + (toInt32(-1762862292), toInt32(-287306889), toDate(1375707465)), + (toInt32(1169291374), toInt32(-1541024731), toDate(1082126480)), + (toInt32(-1329695183), toInt32(-786819168), toDate(1226000164)), + (toInt32(1899628504), toInt32(-370080625), toDate(1179050966)), + (toInt32(550067609), toInt32(-1524000367), toDate(1410654931)); + +CREATE TABLE test_hybrid_issue_1424_right +( + id Int32, + value Int32, + date_col Date, + computed ALIAS value * 2 +) +ENGINE = MergeTree +PARTITION BY toYYYYMM(date_col) +ORDER BY (date_col, id); + +INSERT INTO test_hybrid_issue_1424_right VALUES + (toInt32(2147483647), toInt32(2147483647), toDate('2149-06-06')), + (toInt32(-2147483648), toInt32(-2147483648), toDate('1970-01-01')), + (toInt32(0), toInt32(0), '1970-01-01'), + (toInt32(1084637461), toInt32(1708739853), toDate(1335613783)), + (toInt32(-221724287), toInt32(339886211), toDate(1294089763)), + (toInt32(-1762862292), toInt32(-287306889), toDate(1375707465)), + (toInt32(1169291374), toInt32(-1541024731), toDate(1082126480)), + (toInt32(-1329695183), toInt32(-786819168), toDate(1226000164)), + (toInt32(1899628504), toInt32(-370080625), toDate(1179050966)), + (toInt32(550067609), toInt32(-1524000367), toDate(1410654931)); + +CREATE TABLE test_hybrid_issue_1424 +( + id Int32, + value Int32, + date_col Date, + computed Int64 +) +ENGINE = Hybrid( + remote('127.0.0.1:9000', currentDatabase(), 'test_hybrid_issue_1424_left'), date_col >= '2025-01-15', + remote('127.0.0.1:9000', currentDatabase(), 'test_hybrid_issue_1424_right'), date_col < '2025-01-15' +); + +SELECT 'max in subquery'; +SELECT max_computed FROM (SELECT max(computed) AS max_computed FROM test_hybrid_issue_1424); + +SELECT 'sum in subquery'; +SELECT sum_computed FROM (SELECT sum(computed) AS sum_computed FROM test_hybrid_issue_1424); + +SELECT 'cte min with predicate'; +WITH cte AS +( + SELECT min(computed) AS min_computed + FROM test_hybrid_issue_1424 + WHERE computed > 50 +) +SELECT * FROM cte; + +SELECT 'cte with limit'; +WITH ranked AS +( + SELECT id, computed + FROM test_hybrid_issue_1424 + LIMIT 10 +) +SELECT * +FROM ranked +ORDER BY id ASC; + +SELECT 'cte without limit'; +WITH ranked AS +( + SELECT id, computed + FROM test_hybrid_issue_1424 +) +SELECT * +FROM ranked +ORDER BY id ASC; + +SELECT 'group by in subquery'; +WITH monthly AS +( + SELECT count() AS cnt + FROM test_hybrid_issue_1424 + GROUP BY computed +) +SELECT sum(cnt), count() FROM monthly; + +SELECT 'intersect with order by'; +SELECT * +FROM +( + SELECT id, computed + FROM test_hybrid_issue_1424 + WHERE computed > 100 + INTERSECT + SELECT id, computed + FROM test_hybrid_issue_1424 + WHERE value > 50 +) +ORDER BY id; + +SELECT 'intersect without order by'; +SELECT * +FROM +( + SELECT id, computed + FROM test_hybrid_issue_1424 + WHERE computed > 100 + INTERSECT + SELECT id, computed + FROM test_hybrid_issue_1424 + WHERE value > 50 +) +ORDER BY id; + +CREATE TABLE test_hybrid_issue_1424_const_left +( + id Int32, + value Int32, + date_col Date, + computed ALIAS toInt64(7) +) +ENGINE = MergeTree +PARTITION BY toYYYYMM(date_col) +ORDER BY (date_col, id); + +INSERT INTO test_hybrid_issue_1424_const_left VALUES + (1, 1, toDate('2025-01-15')), + (2, 2, toDate('2025-02-01')); + +CREATE TABLE test_hybrid_issue_1424_const_right +( + id Int32, + value Int32, + date_col Date, + computed ALIAS toInt64(9) +) +ENGINE = MergeTree +PARTITION BY toYYYYMM(date_col) +ORDER BY (date_col, id); + +INSERT INTO test_hybrid_issue_1424_const_right VALUES + (3, 3, toDate('2024-12-31')), + (4, 4, toDate('2020-01-01')); + +CREATE TABLE test_hybrid_issue_1424_const +( + id Int32, + value Int32, + date_col Date, + computed Int64 +) +ENGINE = Hybrid( + remote('127.0.0.1:9000', currentDatabase(), 'test_hybrid_issue_1424_const_left'), date_col >= '2025-01-15', + remote('127.0.0.1:9000', currentDatabase(), 'test_hybrid_issue_1424_const_right'), date_col < '2025-01-15' +); + +SELECT 'constant alias in subquery'; +SELECT max_computed, min_computed, sum_computed +FROM +( + SELECT + max(computed) AS max_computed, + min(computed) AS min_computed, + sum(computed) AS sum_computed + FROM test_hybrid_issue_1424_const +); + +SELECT 'constant alias predicate'; +SELECT count() FROM test_hybrid_issue_1424_const WHERE computed = 9; + +DROP TABLE test_hybrid_issue_1424; +DROP TABLE test_hybrid_issue_1424_left; +DROP TABLE test_hybrid_issue_1424_right; +DROP TABLE test_hybrid_issue_1424_const; +DROP TABLE test_hybrid_issue_1424_const_left; +DROP TABLE test_hybrid_issue_1424_const_right; diff --git a/tests/queries/0_stateless/03843_distributed_alias_same_expression.reference b/tests/queries/0_stateless/03843_distributed_alias_same_expression.reference new file mode 100644 index 000000000000..18bd6916995a --- /dev/null +++ b/tests/queries/0_stateless/03843_distributed_alias_same_expression.reference @@ -0,0 +1,7 @@ +first +1999-03-29 01:15:33.000 +second +1999-03-29 01:15:33.000 +third +1999-03-29 01:15:33.000 +fourth diff --git a/tests/queries/0_stateless/03843_distributed_alias_same_expression.sql b/tests/queries/0_stateless/03843_distributed_alias_same_expression.sql new file mode 100644 index 000000000000..1767e932b6b7 --- /dev/null +++ b/tests/queries/0_stateless/03843_distributed_alias_same_expression.sql @@ -0,0 +1,44 @@ +-- Regression coverage for distributed ORDER BY + ALIAS columns with identical expressions. +-- Related issue: https://github.com/ClickHouse/ClickHouse/issues/79916 + +DROP TABLE IF EXISTS test_alias_same_expr_remote; + +CREATE TABLE test_alias_same_expr_remote +( + dt DateTime64(3), + String_7 String, + alias_String_7_0 String ALIAS String_7, + alias_String_7_1 String ALIAS String_7 +) +ENGINE = MergeTree() +ORDER BY dt; + +INSERT INTO test_alias_same_expr_remote VALUES ('1999-03-29T01:15:33', ''); + +SELECT 'first'; +SELECT dt, alias_String_7_0, alias_String_7_1 +FROM remote('127.0.0.{1,2}', currentDatabase(), test_alias_same_expr_remote) +LIMIT 1; + +SELECT 'second'; +SELECT dt, alias_String_7_0, alias_String_7_1 +FROM remote('127.0.0.{1,2}', currentDatabase(), test_alias_same_expr_remote) +ORDER BY dt +LIMIT 1 +SETTINGS enable_analyzer = 0; + +SELECT 'third'; +SELECT dt, alias_String_7_0, alias_String_7_1 +FROM remote('127.0.0.{1,2}', currentDatabase(), test_alias_same_expr_remote) +ORDER BY dt +LIMIT 1 +SETTINGS enable_analyzer = 1; + +SELECT 'fourth'; +SELECT dt, alias_String_7_0, alias_String_7_1 +FROM remote('127.0.0.{1,2}', currentDatabase(), test_alias_same_expr_remote) +ORDER BY dt +LIMIT 1 +SETTINGS enable_analyzer = 1, enable_alias_marker = 0; -- { serverError NUMBER_OF_COLUMNS_DOESNT_MATCH } + +DROP TABLE test_alias_same_expr_remote; diff --git a/tests/queries/0_stateless/03844_distributed_nested_alias_marker.reference b/tests/queries/0_stateless/03844_distributed_nested_alias_marker.reference new file mode 100644 index 000000000000..7b05cb1e81a0 --- /dev/null +++ b/tests/queries/0_stateless/03844_distributed_nested_alias_marker.reference @@ -0,0 +1,4 @@ +analyzer +x x +legacy +x x diff --git a/tests/queries/0_stateless/03844_distributed_nested_alias_marker.sql b/tests/queries/0_stateless/03844_distributed_nested_alias_marker.sql new file mode 100644 index 000000000000..b725acf38949 --- /dev/null +++ b/tests/queries/0_stateless/03844_distributed_nested_alias_marker.sql @@ -0,0 +1,34 @@ +DROP TABLE IF EXISTS test_nested_alias_dist; +DROP TABLE IF EXISTS test_nested_alias_local; + +CREATE TABLE test_nested_alias_local +( + dt DateTime64(3), + base String, + a String ALIAS base, + b String ALIAS a +) +ENGINE = MergeTree() +ORDER BY dt; + +INSERT INTO test_nested_alias_local VALUES ('1999-03-29T01:15:33', 'x'); + +CREATE TABLE test_nested_alias_dist AS test_nested_alias_local +ENGINE = Distributed('test_shard_localhost', currentDatabase(), test_nested_alias_local, rand()); + +SELECT 'analyzer'; +SELECT a, b +FROM test_nested_alias_dist +ORDER BY dt +LIMIT 1 +SETTINGS enable_analyzer = 1; + +SELECT 'legacy'; +SELECT a, b +FROM test_nested_alias_dist +ORDER BY dt +LIMIT 1 +SETTINGS enable_analyzer = 0; + +DROP TABLE test_nested_alias_dist; +DROP TABLE test_nested_alias_local; diff --git a/tests/queries/0_stateless/03845_distributed_global_in_join_alias_chain.reference b/tests/queries/0_stateless/03845_distributed_global_in_join_alias_chain.reference new file mode 100644 index 000000000000..325078d71cc1 --- /dev/null +++ b/tests/queries/0_stateless/03845_distributed_global_in_join_alias_chain.reference @@ -0,0 +1,8 @@ +rewrite_in +1 +1 +rewrite_join +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/03845_distributed_global_in_join_alias_chain.sql b/tests/queries/0_stateless/03845_distributed_global_in_join_alias_chain.sql new file mode 100644 index 000000000000..9bd95d72fd20 --- /dev/null +++ b/tests/queries/0_stateless/03845_distributed_global_in_join_alias_chain.sql @@ -0,0 +1,34 @@ +DROP TABLE IF EXISTS test_global_alias_chain_dist; +DROP TABLE IF EXISTS test_global_alias_chain_local; + +CREATE TABLE test_global_alias_chain_local +( + id UInt64, + base UInt64, + a UInt64 ALIAS base, + b UInt64 ALIAS a +) +ENGINE = MergeTree() +ORDER BY id; + +INSERT INTO test_global_alias_chain_local VALUES (1, 1); + +CREATE TABLE test_global_alias_chain_dist AS test_global_alias_chain_local +ENGINE = Distributed('test_cluster_two_shards', currentDatabase(), test_global_alias_chain_local, rand()); + +SELECT 'rewrite_in'; +SELECT id +FROM test_global_alias_chain_dist +WHERE id IN (SELECT b FROM test_global_alias_chain_dist) +ORDER BY id +SETTINGS enable_analyzer = 1, distributed_product_mode = 'global'; + +SELECT 'rewrite_join'; +SELECT l.id +FROM test_global_alias_chain_dist AS l +INNER JOIN (SELECT b FROM test_global_alias_chain_dist) AS r ON l.id = r.b +ORDER BY l.id +SETTINGS enable_analyzer = 1, distributed_product_mode = 'global'; + +DROP TABLE test_global_alias_chain_dist; +DROP TABLE test_global_alias_chain_local; diff --git a/tests/queries/0_stateless/03846_distributed_global_in_alias_marker_collision.reference b/tests/queries/0_stateless/03846_distributed_global_in_alias_marker_collision.reference new file mode 100644 index 000000000000..9a3a29a69ce8 --- /dev/null +++ b/tests/queries/0_stateless/03846_distributed_global_in_alias_marker_collision.reference @@ -0,0 +1,2 @@ +global_in_collision_check +1 diff --git a/tests/queries/0_stateless/03846_distributed_global_in_alias_marker_collision.sql b/tests/queries/0_stateless/03846_distributed_global_in_alias_marker_collision.sql new file mode 100644 index 000000000000..d47e6a304ba1 --- /dev/null +++ b/tests/queries/0_stateless/03846_distributed_global_in_alias_marker_collision.sql @@ -0,0 +1,56 @@ +DROP TABLE IF EXISTS test_marker_collision_dist; +DROP TABLE IF EXISTS test_marker_collision_main; +DROP TABLE IF EXISTS test_marker_collision_left; +DROP TABLE IF EXISTS test_marker_collision_right; + +CREATE TABLE test_marker_collision_main +( + id UInt64 +) +ENGINE = MergeTree() +ORDER BY id; + +INSERT INTO test_marker_collision_main VALUES (1); + +CREATE TABLE test_marker_collision_left +( + id UInt64, + x UInt64, + b UInt64 ALIAS x +) +ENGINE = MergeTree() +ORDER BY id; + +CREATE TABLE test_marker_collision_right +( + id UInt64, + y UInt64, + b UInt64 ALIAS y +) +ENGINE = MergeTree() +ORDER BY id; + +INSERT INTO test_marker_collision_left VALUES (1, 1); +INSERT INTO test_marker_collision_right VALUES (1, 20); + +CREATE TABLE test_marker_collision_dist AS test_marker_collision_main +ENGINE = Distributed('test_shard_localhost', currentDatabase(), test_marker_collision_main, rand()); + +SELECT 'global_in_collision_check'; +SELECT id +FROM test_marker_collision_dist +WHERE id GLOBAL IN +( + SELECT test_marker_collision_left.id + FROM test_marker_collision_left + INNER JOIN test_marker_collision_right + ON test_marker_collision_left.id = test_marker_collision_right.id + WHERE test_marker_collision_left.b + test_marker_collision_right.b = 21 +) +ORDER BY id +SETTINGS enable_analyzer = 1, enable_alias_marker = 1; + +DROP TABLE test_marker_collision_dist; +DROP TABLE test_marker_collision_main; +DROP TABLE test_marker_collision_left; +DROP TABLE test_marker_collision_right; From 610d2970a38ca4c8591c9bfb39ad6a0bfa2e2221 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Tue, 10 Mar 2026 10:40:31 +0100 Subject: [PATCH 02/32] Add distributed alias marker regressions Signed-off-by: Mikhail Filimonov --- ...istributed_alias_same_expression.reference | 2 + ...3843_distributed_alias_same_expression.sql | 7 +++ ...replicas_second_hop_alias_marker.reference | 4 ++ ...allel_replicas_second_hop_alias_marker.sql | 51 +++++++++++++++++++ 4 files changed, 64 insertions(+) create mode 100644 tests/queries/0_stateless/03847_parallel_replicas_second_hop_alias_marker.reference create mode 100644 tests/queries/0_stateless/03847_parallel_replicas_second_hop_alias_marker.sql diff --git a/tests/queries/0_stateless/03843_distributed_alias_same_expression.reference b/tests/queries/0_stateless/03843_distributed_alias_same_expression.reference index 18bd6916995a..1a74c27dc4c5 100644 --- a/tests/queries/0_stateless/03843_distributed_alias_same_expression.reference +++ b/tests/queries/0_stateless/03843_distributed_alias_same_expression.reference @@ -5,3 +5,5 @@ second third 1999-03-29 01:15:33.000 fourth +fifth +1999-03-29 01:15:33.000 diff --git a/tests/queries/0_stateless/03843_distributed_alias_same_expression.sql b/tests/queries/0_stateless/03843_distributed_alias_same_expression.sql index 1767e932b6b7..3a6d7b19011f 100644 --- a/tests/queries/0_stateless/03843_distributed_alias_same_expression.sql +++ b/tests/queries/0_stateless/03843_distributed_alias_same_expression.sql @@ -41,4 +41,11 @@ ORDER BY dt LIMIT 1 SETTINGS enable_analyzer = 1, enable_alias_marker = 0; -- { serverError NUMBER_OF_COLUMNS_DOESNT_MATCH } +SELECT 'fifth'; +SELECT dt, alias_String_7_0, alias_String_7_1 +FROM remote('127.0.0.{1,2}', currentDatabase(), test_alias_same_expr_remote) +ORDER BY dt +LIMIT 1 +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, serialize_query_plan = 1; + DROP TABLE test_alias_same_expr_remote; diff --git a/tests/queries/0_stateless/03847_parallel_replicas_second_hop_alias_marker.reference b/tests/queries/0_stateless/03847_parallel_replicas_second_hop_alias_marker.reference new file mode 100644 index 000000000000..fbdae0d35623 --- /dev/null +++ b/tests/queries/0_stateless/03847_parallel_replicas_second_hop_alias_marker.reference @@ -0,0 +1,4 @@ +single_replica_second_hop +1999-03-29 01:15:33.000 x x +parallel_replicas_second_hop +1999-03-29 01:15:33.000 x x diff --git a/tests/queries/0_stateless/03847_parallel_replicas_second_hop_alias_marker.sql b/tests/queries/0_stateless/03847_parallel_replicas_second_hop_alias_marker.sql new file mode 100644 index 000000000000..5500c4904f82 --- /dev/null +++ b/tests/queries/0_stateless/03847_parallel_replicas_second_hop_alias_marker.sql @@ -0,0 +1,51 @@ +-- Regression coverage for materialized __aliasMarker metadata across +-- remote -> Distributed -> parallel replicas fanout. + +DROP TABLE IF EXISTS test_alias_pr_second_hop_dist; +DROP TABLE IF EXISTS test_alias_pr_second_hop_local; + +CREATE TABLE test_alias_pr_second_hop_local +( + dt DateTime64(3), + base String, + alias_base_0 String ALIAS base, + alias_base_1 String ALIAS base +) +ENGINE = MergeTree() +ORDER BY dt; + +INSERT INTO test_alias_pr_second_hop_local VALUES + ('1999-03-29T01:15:33', 'x'), + ('1999-03-29T01:15:34', 'y'); + +CREATE TABLE test_alias_pr_second_hop_dist AS test_alias_pr_second_hop_local +ENGINE = Distributed(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), test_alias_pr_second_hop_local); + +SELECT 'single_replica_second_hop'; +SELECT dt, alias_base_0, alias_base_1 +FROM remote('127.0.0.2', currentDatabase(), test_alias_pr_second_hop_dist) +ORDER BY dt +LIMIT 1 +SETTINGS + enable_analyzer = 1, + enable_alias_marker = 1, + enable_parallel_replicas = 1, + max_parallel_replicas = 1, + cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', + parallel_replicas_for_non_replicated_merge_tree = 1; + +SELECT 'parallel_replicas_second_hop'; +SELECT dt, alias_base_0, alias_base_1 +FROM remote('127.0.0.2', currentDatabase(), test_alias_pr_second_hop_dist) +ORDER BY dt +LIMIT 1 +SETTINGS + enable_analyzer = 1, + enable_alias_marker = 1, + enable_parallel_replicas = 1, + max_parallel_replicas = 3, + cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', + parallel_replicas_for_non_replicated_merge_tree = 1; + +DROP TABLE test_alias_pr_second_hop_dist; +DROP TABLE test_alias_pr_second_hop_local; From 439ab815178e048863e0c3fb16fc6810792ea6c5 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Tue, 10 Mar 2026 15:21:10 +0100 Subject: [PATCH 03/32] Consolidate distributed alias regression tests Signed-off-by: Mikhail Filimonov --- ...3_distributed_alias_same_expression.reference | 6 ++++++ .../03843_distributed_alias_same_expression.sql | 16 ++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/tests/queries/0_stateless/03843_distributed_alias_same_expression.reference b/tests/queries/0_stateless/03843_distributed_alias_same_expression.reference index 1a74c27dc4c5..279f11b9bf19 100644 --- a/tests/queries/0_stateless/03843_distributed_alias_same_expression.reference +++ b/tests/queries/0_stateless/03843_distributed_alias_same_expression.reference @@ -7,3 +7,9 @@ third fourth fifth 1999-03-29 01:15:33.000 +sixth +query_alias_0 query_alias_1 + +seventh +alias_String_7_0 alias_String_7_1 + diff --git a/tests/queries/0_stateless/03843_distributed_alias_same_expression.sql b/tests/queries/0_stateless/03843_distributed_alias_same_expression.sql index 3a6d7b19011f..8aa596998571 100644 --- a/tests/queries/0_stateless/03843_distributed_alias_same_expression.sql +++ b/tests/queries/0_stateless/03843_distributed_alias_same_expression.sql @@ -48,4 +48,20 @@ ORDER BY dt LIMIT 1 SETTINGS enable_analyzer = 1, enable_alias_marker = 1, serialize_query_plan = 1; +SELECT 'sixth'; +SELECT alias_String_7_0 AS query_alias_0, alias_String_7_1 AS query_alias_1 +FROM remote('127.0.0.{1,2}', currentDatabase(), test_alias_same_expr_remote) +ORDER BY dt +LIMIT 1 +SETTINGS enable_analyzer = 1, enable_alias_marker = 1 +FORMAT TSVWithNames; + +SELECT 'seventh'; +SELECT alias_String_7_0, alias_String_7_1 +FROM remote('127.0.0.{1,2}', currentDatabase(), test_alias_same_expr_remote) +ORDER BY dt +LIMIT 1 +SETTINGS enable_analyzer = 1, enable_alias_marker = 1 +FORMAT TSVWithNames; + DROP TABLE test_alias_same_expr_remote; From 205cd7211e3e282e4250c46f1bba9a0e9b145f5e Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Tue, 10 Mar 2026 15:35:22 +0100 Subject: [PATCH 04/32] Fix distributed alias marker handling Signed-off-by: Mikhail Filimonov --- src/Functions/identity.h | 51 ++++++++++++++----- src/Storages/StorageDistributed.cpp | 19 +++---- src/Storages/buildQueryTreeForShard.cpp | 9 ++-- .../03842_hybrid_alias_issue_1424.sql | 2 +- 4 files changed, 55 insertions(+), 26 deletions(-) diff --git a/src/Functions/identity.h b/src/Functions/identity.h index ed2886fcc034..980b58690028 100644 --- a/src/Functions/identity.h +++ b/src/Functions/identity.h @@ -109,22 +109,47 @@ struct AliasMarkerName }; /** - * __aliasMarker is a transport-time alias preservation hint for distributed SQL paths. + * __aliasMarker is an internal function used to enforce an alias-preserving projection step exactly + * where it appears in distributed SQL transport. * - * Why it exists: - * - When a distributed query is planned and mergeable-state flows are used, the final aliasing step - * is intentionally skipped. - * - That is desired, but it also prevents preserving/injecting initiator-side expression names - * (for example, names coming from ALIAS columns or certain CAST expressions). - * - This becomes especially problematic when shard schemas differ slightly. - * - Some injected alias columns must preserve a specific output name; otherwise remote headers may diverge - * from initiator expectations (header mismatch, wrong column association, and similar inconsistencies). + * It is injected only when a pushed-down expression must still behave like a real column from the + * initiator's point of view, rather than as an arbitrary expression produced on the initiator. This + * typically happens after expanding an ALIAS column to its underlying expression for distributed SQL + * transport. Conceptually, if the initiator has `SELECT foo AS bar FROM distr` and `foo` is an ALIAS + * column such as `1 + x`, the remote query should look like + * `SELECT __aliasMarker(1 + x, 'table1.foo') AS bar FROM local AS table1`. + * + * The user-facing SQL alias (`bar` in the example above) is separate and must stay untouched. + * __aliasMarker carries only the low-level column identity that says "treat this expression as the + * expanded form of that logical column". Preserving that identity is important because otherwise remote + * headers may diverge from initiator expectations, leading to header mismatch, wrong column association, + * or column-count mismatch. + * + * This must not be confused with normal SQL aliases that appear in the query text: those participate + * in user-visible query semantics and may or may not be materialized depending on the execution stage. + * A normal SQL alias is not enough here because it may interfere with user query logic, clash with + * existing names, and in the mergeable-state path the final projection step that normally assigns + * aliases is intentionally skipped (see the conditional createComputeAliasColumnsStep(...) path in + * PlannerJoinTree::buildQueryPlanForTableExpression()). + * + * This is also why __aliasMarker is not the same as __actionName. For this use case we need the + * wrapper to be consumed into an alias/projection boundary on top of the child expression, so the + * expression keeps behaving like a distinct logical column. __actionName would instead survive as a + * normal function node with a forced result name, which is a different semantic contract and a worse + * fit for distributed alias transport. + * + * The marker also prevents distinct logical columns with the same expression from collapsing into one + * transport column. For example, `SELECT 2 * x AS x, 2 * x AS y` must still travel as two columns; + * otherwise both expressions may collapse to a single `multiply(2, x)` output and break distributed + * header reconciliation. * * Lifecycle/invariants: - * 1) Injected only around rewritten alias expressions that require stable output identity. - * 2) Materialized before SQL serialization: the marker id is converted to a String alias identifier. - * 3) Consumed by analyzer/planner on receiver to enforce alias naming in actions. - * 4) Removed/stripped before forwarding to the next hop, then (if needed) re-injected for that hop only. + * 1) Injected around rewritten alias expressions that need stable transport identity. + * 2) Materialized before the query is sent to the shard in serialized form: the marker id is converted + * to a String alias identifier. + * 3) Consumed on the receiver by adding a projection step where it appears, so that identity is enforced + * in actions without changing the user-facing aliasing logic. + * 4) Removed before forwarding to the next hop, then re-injected there only if that hop still needs it. * * This is a temporary bridge while distributed plan transport still relies on SQL text in these paths. * As query plan serialization fully replaces that boundary, this marker path should become unnecessary. diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 5370a852fae1..ea1f0ff9661b 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -857,14 +857,17 @@ class ReplaseAliasColumnsVisitor : public InDepthQueryTreeVisitorgetExpression(); - const String original_expression_alias = column_expression->hasAlias() ? column_expression->getAlias() : String{}; + const String output_alias = column_node->hasAlias() ? column_node->getAlias() : String{}; const auto & settings = context->getSettingsRef(); - /// With serialized query plans we transfer actions directly and do not need SQL-only alias markers. - const bool use_alias_marker = settings[Setting::enable_alias_marker] && !settings[Setting::serialize_query_plan]; + const bool use_alias_marker = settings[Setting::enable_alias_marker]; if (!use_alias_marker) { - return column_expression; + auto column_expression_with_alias = column_expression->clone(); + column_expression_with_alias->removeAlias(); + if (!output_alias.empty()) + column_expression_with_alias->setAlias(output_alias); + return column_expression_with_alias; } if (auto * function_node = column_expression->as(); @@ -885,11 +888,9 @@ class ReplaseAliasColumnsVisitor : public InDepthQueryTreeVisitor("__aliasMarker"); alias_marker_node->getArguments().getNodes() = std::move(arguments); - if (!original_expression_alias.empty()) - { - alias_marker_node->getArguments().getNodes()[0]->removeAlias(); - alias_marker_node->setAlias(original_expression_alias); - } + alias_marker_node->getArguments().getNodes()[0]->removeAlias(); + if (!output_alias.empty()) + alias_marker_node->setAlias(output_alias); resolveOrdinaryFunctionNodeByName(*alias_marker_node, "__aliasMarker", context); return alias_marker_node; diff --git a/src/Storages/buildQueryTreeForShard.cpp b/src/Storages/buildQueryTreeForShard.cpp index a26eef9a0d95..be8c48577779 100644 --- a/src/Storages/buildQueryTreeForShard.cpp +++ b/src/Storages/buildQueryTreeForShard.cpp @@ -39,6 +39,7 @@ namespace Setting extern const SettingsDistributedProductMode distributed_product_mode; extern const SettingsUInt64 min_external_table_block_size_rows; extern const SettingsUInt64 min_external_table_block_size_bytes; + extern const SettingsNonZeroUInt64 max_parallel_replicas; extern const SettingsBool parallel_replicas_prefer_local_join; extern const SettingsBool prefer_global_in_and_join; extern const SettingsBool enable_add_distinct_to_in_subqueries; @@ -563,9 +564,11 @@ QueryTreeNodePtr getSubqueryFromTableExpression( QueryTreeNodePtr buildQueryTreeForShard(const PlannerContextPtr & planner_context, QueryTreeNodePtr query_tree_to_modify, bool allow_global_join_for_right_table) { - /// Incoming materialized markers are hop-local metadata. - /// Strip them before this node prepares/executes subqueries for the next hop. - stripMaterializedAliasMarkers(query_tree_to_modify); + /// Incoming materialized markers are usually hop-local metadata. + /// Keep them when this shard is about to fan out again via parallel replicas, + /// because that path reuses the already materialized markers and does not re-inject them. + if (planner_context->getQueryContext()->getSettingsRef()[Setting::max_parallel_replicas] <= 1) + stripMaterializedAliasMarkers(query_tree_to_modify); CollectColumnSourceToColumnsVisitor collect_column_source_to_columns_visitor; collect_column_source_to_columns_visitor.visit(query_tree_to_modify); diff --git a/tests/queries/0_stateless/03842_hybrid_alias_issue_1424.sql b/tests/queries/0_stateless/03842_hybrid_alias_issue_1424.sql index 3d2f527eb534..8b9cf9182896 100644 --- a/tests/queries/0_stateless/03842_hybrid_alias_issue_1424.sql +++ b/tests/queries/0_stateless/03842_hybrid_alias_issue_1424.sql @@ -1,4 +1,4 @@ -SET allow_experimental_hybrid_table = 1, enable_analyzer = 1; +SET allow_experimental_hybrid_table = 1, enable_analyzer = 1, enable_alias_marker = 1; DROP TABLE IF EXISTS test_hybrid_issue_1424; DROP TABLE IF EXISTS test_hybrid_issue_1424_left; From 1ef98aee9f0401513057a348fa5c31babcb5ec27 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Tue, 10 Mar 2026 22:14:57 +0100 Subject: [PATCH 05/32] Tighten alias marker serialization contract Signed-off-by: Mikhail Filimonov --- src/Analyzer/Utils.cpp | 31 +- ...buted_global_alias_marker_matrix.reference | 35 +++ ...distributed_global_alias_marker_matrix.sql | 297 ++++++++++++++++++ 3 files changed, 337 insertions(+), 26 deletions(-) create mode 100644 tests/queries/0_stateless/03920_distributed_global_alias_marker_matrix.reference create mode 100644 tests/queries/0_stateless/03920_distributed_global_alias_marker_matrix.sql diff --git a/src/Analyzer/Utils.cpp b/src/Analyzer/Utils.cpp index 72245f974ff7..b0e2ead03be4 100644 --- a/src/Analyzer/Utils.cpp +++ b/src/Analyzer/Utils.cpp @@ -1013,29 +1013,6 @@ bool stripAliasMarker(QueryTreeNodePtr & node, bool materialized_only) return true; } -String buildDeterministicFallbackAliasMarkerId(const ColumnNode & marker_column_node, const QueryTreeNodePtr & marker_expression_node) -{ - IQueryTreeNode::CompareOptions compare_options - { - .compare_aliases = false, - .compare_types = false, - .ignore_cte = true, - }; - - String alias_id = marker_column_node.getColumnName(); - - if (const auto & marker_source = marker_column_node.getColumnSourceOrNull()) - { - /// Keep fallback ids deterministic and source-specific when table aliases are not available yet. - alias_id += "__src_" + getHexUIntLowercase(marker_source->getTreeHash(compare_options)); - } - - /// Add expression hash to avoid collapsing different marker payloads with the same column name. - alias_id += "__expr_" + getHexUIntLowercase(marker_expression_node->getTreeHash(compare_options)); - - return alias_id; -} - void stripAliasMarkersFromPayloadSubtree(QueryTreeNodePtr & node) { while (stripAliasMarker(node, false)) @@ -1096,9 +1073,11 @@ class FinalizeAliasMarkersForDistributedSerializationVisitor : public InDepthQue } else { - /// In some distributed subquery execution paths marker ids are materialized - /// before alias uniquification assigns source aliases. - alias_id = buildDeterministicFallbackAliasMarkerId(*marker_column_node, arguments[0]); + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "__aliasMarker expects the second argument to resolve to a column with a source alias before distributed serialization. " + "Column '{}' has an unnamed or missing source", + marker_column_node->getColumnName()); } } else if (const auto * marker_id_node = arguments[1]->as(); diff --git a/tests/queries/0_stateless/03920_distributed_global_alias_marker_matrix.reference b/tests/queries/0_stateless/03920_distributed_global_alias_marker_matrix.reference new file mode 100644 index 000000000000..8c2ebec53ff8 --- /dev/null +++ b/tests/queries/0_stateless/03920_distributed_global_alias_marker_matrix.reference @@ -0,0 +1,35 @@ +case1_global_in_unnamed_identical_derived_subqueries +1 +case2_global_join_unnamed_identical_derived_subqueries +id left_b0 right_b0 +1 10 20 +case3_global_join_unnamed_identical_derived_subqueries_serialize_query_plan +id left_b0 right_b0 +1 10 20 +case4_global_join_unnamed_remote_over_distributed_subqueries +id left_b0 right_b0 +1 10 20 +case5_global_join_unnamed_identical_dual_alias_columns +id left_b0 right_b1 +1 10 20 +case6_local_join_unnamed_identical_derived_subqueries +id left_b0 right_b0 +1 10 20 +case7_local_join_unnamed_identical_derived_subqueries_serialize_query_plan +id left_b0 right_b0 +1 10 20 +case8_global_join_direct_distributed_serialize_query_plan +id b0 b1 +1 10 10 +2 20 20 +case9_global_join_direct_remote_over_distributed_serialize_query_plan +id b0 b1 +1 10 10 +2 20 20 +case10_wrapper_alias_subquery_serialize_query_plan +id left_foo right_foo +1 1 20 +case11_wrapper_constant_alias_subquery_serialize_query_plan +id left_foo right_foo +1 foo foo +2 foo foo diff --git a/tests/queries/0_stateless/03920_distributed_global_alias_marker_matrix.sql b/tests/queries/0_stateless/03920_distributed_global_alias_marker_matrix.sql new file mode 100644 index 000000000000..2ac9a8a65fe3 --- /dev/null +++ b/tests/queries/0_stateless/03920_distributed_global_alias_marker_matrix.sql @@ -0,0 +1,297 @@ +DROP TABLE IF EXISTS test_marker_suite_main_dist; +DROP TABLE IF EXISTS test_marker_suite_side_dist; +DROP TABLE IF EXISTS test_marker_suite_main; +DROP TABLE IF EXISTS test_marker_suite_side; +DROP TABLE IF EXISTS test_wrapper_alias_a_dist; +DROP TABLE IF EXISTS test_wrapper_alias_b_dist; +DROP TABLE IF EXISTS test_wrapper_alias_a_local; +DROP TABLE IF EXISTS test_wrapper_alias_b_local; +DROP TABLE IF EXISTS test_wrapper_const_alias_a_dist; +DROP TABLE IF EXISTS test_wrapper_const_alias_b_dist; +DROP TABLE IF EXISTS test_wrapper_const_alias_a_local; +DROP TABLE IF EXISTS test_wrapper_const_alias_b_local; + +CREATE TABLE test_marker_suite_main +( + id UInt64 +) +ENGINE = MergeTree() +ORDER BY id; + +INSERT INTO test_marker_suite_main VALUES (1), (2); + +CREATE TABLE test_marker_suite_side +( + id UInt64, + x UInt64, + b0 UInt64 ALIAS x, + b1 UInt64 ALIAS x +) +ENGINE = MergeTree() +ORDER BY id; + +INSERT INTO test_marker_suite_side VALUES (1, 10), (2, 20); + +CREATE TABLE test_marker_suite_main_dist AS test_marker_suite_main +ENGINE = Distributed('test_shard_localhost', currentDatabase(), test_marker_suite_main, rand()); + +CREATE TABLE test_marker_suite_side_dist AS test_marker_suite_side +ENGINE = Distributed('test_shard_localhost', currentDatabase(), test_marker_suite_side, rand()); + +SELECT 'case1_global_in_unnamed_identical_derived_subqueries'; +SELECT id +FROM test_marker_suite_main_dist +WHERE id GLOBAL IN +( + SELECT left_id + FROM + (SELECT id AS left_id, b0 AS left_b0 FROM test_marker_suite_side_dist) + INNER JOIN + (SELECT id AS right_id, b0 AS right_b0 FROM test_marker_suite_side_dist) + ON left_id < right_id + WHERE left_b0 + right_b0 = 30 + SETTINGS joined_subquery_requires_alias = 0 +) +ORDER BY id +SETTINGS enable_analyzer = 1, enable_alias_marker = 1; + +SELECT 'case2_global_join_unnamed_identical_derived_subqueries'; +SELECT m.id, j.left_b0, j.right_b0 +FROM test_marker_suite_main_dist AS m +GLOBAL INNER JOIN +( + SELECT left_id AS id, left_b0, right_b0 + FROM + (SELECT id AS left_id, b0 AS left_b0 FROM test_marker_suite_side_dist) + INNER JOIN + (SELECT id AS right_id, b0 AS right_b0 FROM test_marker_suite_side_dist) + ON left_id < right_id + WHERE left_b0 + right_b0 = 30 + SETTINGS joined_subquery_requires_alias = 0 +) AS j +ON m.id = j.id +ORDER BY m.id +SETTINGS enable_analyzer = 1, enable_alias_marker = 1 +FORMAT TSVWithNames; + +SELECT 'case3_global_join_unnamed_identical_derived_subqueries_serialize_query_plan'; +SELECT m.id, j.left_b0, j.right_b0 +FROM test_marker_suite_main_dist AS m +GLOBAL INNER JOIN +( + SELECT left_id AS id, left_b0, right_b0 + FROM + (SELECT id AS left_id, b0 AS left_b0 FROM test_marker_suite_side_dist) + INNER JOIN + (SELECT id AS right_id, b0 AS right_b0 FROM test_marker_suite_side_dist) + ON left_id < right_id + WHERE left_b0 + right_b0 = 30 + SETTINGS joined_subquery_requires_alias = 0 +) AS j +ON m.id = j.id +ORDER BY m.id +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, serialize_query_plan = 1 +FORMAT TSVWithNames; + +SELECT 'case4_global_join_unnamed_remote_over_distributed_subqueries'; +SELECT m.id, j.left_b0, j.right_b0 +FROM test_marker_suite_main_dist AS m +GLOBAL INNER JOIN +( + SELECT left_id AS id, left_b0, right_b0 + FROM + (SELECT id AS left_id, b0 AS left_b0 FROM remote('127.0.0.2', currentDatabase(), test_marker_suite_side_dist)) + INNER JOIN + (SELECT id AS right_id, b0 AS right_b0 FROM remote('127.0.0.2', currentDatabase(), test_marker_suite_side_dist)) + ON left_id < right_id + WHERE left_b0 + right_b0 = 30 + SETTINGS joined_subquery_requires_alias = 0 +) AS j +ON m.id = j.id +ORDER BY m.id +SETTINGS enable_analyzer = 1, enable_alias_marker = 1 +FORMAT TSVWithNames; + +SELECT 'case5_global_join_unnamed_identical_dual_alias_columns'; +SELECT m.id, j.left_b0, j.right_b1 +FROM test_marker_suite_main_dist AS m +GLOBAL INNER JOIN +( + SELECT left_id AS id, left_b0, right_b1 + FROM + (SELECT id AS left_id, b0 AS left_b0 FROM test_marker_suite_side_dist) + INNER JOIN + (SELECT id AS right_id, b1 AS right_b1 FROM test_marker_suite_side_dist) + ON left_id < right_id + WHERE left_b0 + right_b1 = 30 + SETTINGS joined_subquery_requires_alias = 0 +) AS j +ON m.id = j.id +ORDER BY m.id +SETTINGS enable_analyzer = 1, enable_alias_marker = 1 +FORMAT TSVWithNames; + +SELECT 'case6_local_join_unnamed_identical_derived_subqueries'; +SELECT m.id, j.left_b0, j.right_b0 +FROM test_marker_suite_main_dist AS m +INNER JOIN +( + SELECT left_id AS id, left_b0, right_b0 + FROM + (SELECT id AS left_id, b0 AS left_b0 FROM test_marker_suite_side_dist) + INNER JOIN + (SELECT id AS right_id, b0 AS right_b0 FROM test_marker_suite_side_dist) + ON left_id < right_id + WHERE left_b0 + right_b0 = 30 + SETTINGS joined_subquery_requires_alias = 0 +) AS j +ON m.id = j.id +ORDER BY m.id +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, distributed_product_mode = 'local' +FORMAT TSVWithNames; + +SELECT 'case7_local_join_unnamed_identical_derived_subqueries_serialize_query_plan'; +SELECT m.id, j.left_b0, j.right_b0 +FROM test_marker_suite_main_dist AS m +INNER JOIN +( + SELECT left_id AS id, left_b0, right_b0 + FROM + (SELECT id AS left_id, b0 AS left_b0 FROM test_marker_suite_side_dist) + INNER JOIN + (SELECT id AS right_id, b0 AS right_b0 FROM test_marker_suite_side_dist) + ON left_id < right_id + WHERE left_b0 + right_b0 = 30 + SETTINGS joined_subquery_requires_alias = 0 +) AS j +ON m.id = j.id +ORDER BY m.id +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, distributed_product_mode = 'local', serialize_query_plan = 1 +FORMAT TSVWithNames; + +SELECT 'case8_global_join_direct_distributed_serialize_query_plan'; +SELECT m.id, b0, b1 +FROM test_marker_suite_main_dist AS m +GLOBAL INNER JOIN test_marker_suite_side_dist USING (id) +ORDER BY m.id +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, asterisk_include_alias_columns = 1, serialize_query_plan = 1 +FORMAT TSVWithNames; + +SELECT 'case9_global_join_direct_remote_over_distributed_serialize_query_plan'; +SELECT m.id, b0, b1 +FROM test_marker_suite_main_dist AS m +GLOBAL INNER JOIN remote('127.0.0.2', currentDatabase(), test_marker_suite_side_dist) USING (id) +ORDER BY m.id +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, asterisk_include_alias_columns = 1, joined_subquery_requires_alias = 0, serialize_query_plan = 1 +FORMAT TSVWithNames; + +DROP TABLE test_marker_suite_main_dist; +DROP TABLE test_marker_suite_side_dist; +DROP TABLE test_marker_suite_main; +DROP TABLE test_marker_suite_side; + +CREATE TABLE test_wrapper_alias_a_local +( + id UInt64, + x UInt64 +) +ENGINE = MergeTree() +ORDER BY id; + +CREATE TABLE test_wrapper_alias_b_local +( + id UInt64, + x UInt64 +) +ENGINE = MergeTree() +ORDER BY id; + +INSERT INTO test_wrapper_alias_a_local VALUES (1, 1), (2, 20); +INSERT INTO test_wrapper_alias_b_local VALUES (1, 1), (2, 20); + +CREATE TABLE test_wrapper_alias_a_dist +( + id UInt64, + x UInt64, + foo UInt64 ALIAS x +) +ENGINE = Distributed('test_shard_localhost', currentDatabase(), test_wrapper_alias_a_local, rand()); + +CREATE TABLE test_wrapper_alias_b_dist +( + id UInt64, + x UInt64, + foo UInt64 ALIAS x +) +ENGINE = Distributed('test_shard_localhost', currentDatabase(), test_wrapper_alias_b_local, rand()); + +SELECT 'case10_wrapper_alias_subquery_serialize_query_plan'; +SELECT a.id, j.left_foo, j.right_foo +FROM test_wrapper_alias_a_dist AS a +GLOBAL INNER JOIN +( + SELECT l.id, l.foo AS left_foo, r.foo AS right_foo + FROM test_wrapper_alias_a_dist AS l + INNER JOIN test_wrapper_alias_b_dist AS r ON l.id < r.id + WHERE l.foo + r.foo = 21 +) AS j +ON a.id = j.id +ORDER BY a.id +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, serialize_query_plan = 1 +FORMAT TSVWithNames; + +DROP TABLE test_wrapper_alias_a_dist; +DROP TABLE test_wrapper_alias_b_dist; +DROP TABLE test_wrapper_alias_a_local; +DROP TABLE test_wrapper_alias_b_local; + +CREATE TABLE test_wrapper_const_alias_a_local +( + id UInt64 +) +ENGINE = MergeTree() +ORDER BY id; + +CREATE TABLE test_wrapper_const_alias_b_local +( + id UInt64 +) +ENGINE = MergeTree() +ORDER BY id; + +INSERT INTO test_wrapper_const_alias_a_local VALUES (1), (2); +INSERT INTO test_wrapper_const_alias_b_local VALUES (1), (2); + +CREATE TABLE test_wrapper_const_alias_a_dist +( + id UInt64, + foo String ALIAS 'foo' +) +ENGINE = Distributed('test_shard_localhost', currentDatabase(), test_wrapper_const_alias_a_local, rand()); + +CREATE TABLE test_wrapper_const_alias_b_dist +( + id UInt64, + foo String ALIAS 'foo' +) +ENGINE = Distributed('test_shard_localhost', currentDatabase(), test_wrapper_const_alias_b_local, rand()); + +SELECT 'case11_wrapper_constant_alias_subquery_serialize_query_plan'; +SELECT a.id, j.left_foo, j.right_foo +FROM test_wrapper_const_alias_a_dist AS a +GLOBAL INNER JOIN +( + SELECT l.id, l.foo AS left_foo, r.foo AS right_foo + FROM test_wrapper_const_alias_a_dist AS l + INNER JOIN test_wrapper_const_alias_b_dist AS r ON l.id = r.id + WHERE l.foo = 'foo' AND r.foo = 'foo' +) AS j +ON a.id = j.id +ORDER BY a.id +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, serialize_query_plan = 1 +FORMAT TSVWithNames; + +DROP TABLE test_wrapper_const_alias_a_dist; +DROP TABLE test_wrapper_const_alias_b_dist; +DROP TABLE test_wrapper_const_alias_a_local; +DROP TABLE test_wrapper_const_alias_b_local; From 4f6d06cdf71bcfcf122acd67062f187ac612a47b Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Wed, 11 Mar 2026 20:02:57 +0100 Subject: [PATCH 06/32] Preserve nested __aliasMarker chains across hops Signed-off-by: Mikhail Filimonov --- src/Analyzer/Utils.cpp | 85 +++---------------------- src/Analyzer/Utils.h | 9 +-- src/Functions/identity.h | 3 +- src/Storages/StorageDistributed.cpp | 8 --- src/Storages/buildQueryTreeForShard.cpp | 7 -- 5 files changed, 12 insertions(+), 100 deletions(-) diff --git a/src/Analyzer/Utils.cpp b/src/Analyzer/Utils.cpp index b0e2ead03be4..eeb128f1ece0 100644 --- a/src/Analyzer/Utils.cpp +++ b/src/Analyzer/Utils.cpp @@ -984,49 +984,9 @@ void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const Stri namespace { -/// `materialized_only`: -/// false = strip any marker. -/// true = strip only markers that arrived in the query already materialized (arg2 is String). -/// Markers injected in the current rewrite keep arg2 as ColumnNode until finalization. -bool stripAliasMarker(QueryTreeNodePtr & node, bool materialized_only) -{ - auto * function_node = node->as(); - if (!function_node || function_node->getFunctionName() != "__aliasMarker") - return false; - - auto & arguments = function_node->getArguments().getNodes(); - if (arguments.size() != 2 || !arguments[0] || !arguments[1]) - return false; - - if (materialized_only) - { - const auto * marker_id_node = arguments[1]->as(); - if (!marker_id_node || !isString(marker_id_node->getResultType())) - return false; - } - - auto replacement = arguments[0]; - if (!replacement->hasAlias() && function_node->hasAlias()) - replacement->setAlias(function_node->getAlias()); - - node = std::move(replacement); - return true; -} - -void stripAliasMarkersFromPayloadSubtree(QueryTreeNodePtr & node) -{ - while (stripAliasMarker(node, false)) - {} - - for (auto & child : node->getChildren()) - { - if (child) - stripAliasMarkersFromPayloadSubtree(child); - } -} - /// Finalize __aliasMarker nodes right before distributed SQL boundaries. -/// This pass strips nested markers from arg0 payload and materializes arg2 to String constant. +/// This pass preserves nested markers and materializes arg2 to String constant +/// only when arg2 is ColumnNode. class FinalizeAliasMarkersForDistributedSerializationVisitor : public InDepthQueryTreeVisitor { public: @@ -1039,15 +999,11 @@ class FinalizeAliasMarkersForDistributedSerializationVisitor : public InDepthQue return false; } - static bool needChildVisit(const QueryTreeNodePtr & parent_node, const QueryTreeNodePtr &) + static bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr &) { - auto * function_node = parent_node->as(); - if (!function_node || function_node->getFunctionName() != "__aliasMarker") - return true; - - /// __aliasMarker subtrees are processed explicitly in visitImpl: - /// arg0 is recursively cleaned from nested wrappers and arg2 is materialized in place. - return false; + /// Keep traversing marker payload recursively so nested chains are preserved + /// and each marker can materialize its own arg2 when needed. + return true; } void visitImpl(QueryTreeNodePtr & node) @@ -1060,9 +1016,6 @@ class FinalizeAliasMarkersForDistributedSerializationVisitor : public InDepthQue if (arguments.size() != 2 || !arguments[0] || !arguments[1]) return; - /// Remove nested marker wrappers in payload subtree; keep only this node as the boundary marker. - stripAliasMarkersFromPayloadSubtree(arguments[0]); - String alias_id; if (const auto * marker_column_node = arguments[1]->as()) { @@ -1083,7 +1036,8 @@ class FinalizeAliasMarkersForDistributedSerializationVisitor : public InDepthQue else if (const auto * marker_id_node = arguments[1]->as(); marker_id_node && isString(marker_id_node->getResultType())) { - alias_id = marker_id_node->getValue().safeGet(); + /// Already materialized marker id from a previous hop. Keep as is. + return; } if (alias_id.empty()) @@ -1097,23 +1051,6 @@ class FinalizeAliasMarkersForDistributedSerializationVisitor : public InDepthQue ContextPtr context; }; -/// Strip incoming __aliasMarker wrappers between distributed hops. -/// This keeps marker lifecycle hop-local and avoids forwarding stale previous-hop ids. -class StripMaterializedAliasMarkersVisitor : public InDepthQueryTreeVisitor -{ -public: - bool shouldTraverseTopToBottom() const - { - return false; - } - - void visitImpl(QueryTreeNodePtr & node) - { - while (stripAliasMarker(node, true)) - {} - } -}; - } void finalizeAliasMarkersForDistributedSerialization(QueryTreeNodePtr & node, const ContextPtr & context) @@ -1122,12 +1059,6 @@ void finalizeAliasMarkersForDistributedSerialization(QueryTreeNodePtr & node, co visitor.visit(node); } -void stripMaterializedAliasMarkers(QueryTreeNodePtr & node) -{ - StripMaterializedAliasMarkersVisitor visitor; - visitor.visit(node); -} - std::pair getExpressionSource(const QueryTreeNodePtr & node) { if (const auto * column = node->as()) diff --git a/src/Analyzer/Utils.h b/src/Analyzer/Utils.h index 0ace391dc488..2fd2fe85bbe8 100644 --- a/src/Analyzer/Utils.h +++ b/src/Analyzer/Utils.h @@ -157,15 +157,10 @@ void resolveOrdinaryFunctionNodeByName(FunctionNode & function_node, const Strin /// Arguments and parameters are taken from the node. void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const String & function_name); -/// Finalize __aliasMarker nodes before distributed SQL boundaries: -/// 1) collapse nested wrappers to keep only current-hop marker id; -/// 2) materialize marker id (arg2) to String ConstantNode. +/// Finalize __aliasMarker nodes before distributed SQL boundaries by materializing +/// marker ids in arg2 from ColumnNode to String ConstantNode when needed. void finalizeAliasMarkersForDistributedSerialization(QueryTreeNodePtr & node, const ContextPtr & context); -/// Remove incoming/materialized __aliasMarker wrappers (arg2 is String ConstantNode), -/// preserving wrapped expressions. -void stripMaterializedAliasMarkers(QueryTreeNodePtr & node); - /// Returns single source of expression node. /// First element of pair is source node, can be nullptr if there are no sources or multiple sources. /// Second element of pair is true if there is at most one source, false if there are multiple sources. diff --git a/src/Functions/identity.h b/src/Functions/identity.h index 980b58690028..77f90c6b6a31 100644 --- a/src/Functions/identity.h +++ b/src/Functions/identity.h @@ -149,7 +149,8 @@ struct AliasMarkerName * to a String alias identifier. * 3) Consumed on the receiver by adding a projection step where it appears, so that identity is enforced * in actions without changing the user-facing aliasing logic. - * 4) Removed before forwarding to the next hop, then re-injected there only if that hop still needs it. + * 4) Preserved while forwarding to the next hop. Nested marker chains are allowed and each marker may + * contribute an alias step during actions construction. * * This is a temporary bridge while distributed plan transport still relies on SQL text in these paths. * As query plan serialization fully replaces that boundary, this marker path should become unnecessary. diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index ea1f0ff9661b..265b2efc56eb 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -870,14 +870,6 @@ class ReplaseAliasColumnsVisitor : public InDepthQueryTreeVisitoras(); - function_node && function_node->getFunctionName() == "__aliasMarker") - { - auto & arguments = function_node->getArguments().getNodes(); - if (!arguments.empty() && arguments[0]) - column_expression = arguments[0]; - } - QueryTreeNodes arguments; arguments.reserve(2); /// Preserve the original column reference in arg2 so normal analyzer passes diff --git a/src/Storages/buildQueryTreeForShard.cpp b/src/Storages/buildQueryTreeForShard.cpp index be8c48577779..dc27f983d43c 100644 --- a/src/Storages/buildQueryTreeForShard.cpp +++ b/src/Storages/buildQueryTreeForShard.cpp @@ -39,7 +39,6 @@ namespace Setting extern const SettingsDistributedProductMode distributed_product_mode; extern const SettingsUInt64 min_external_table_block_size_rows; extern const SettingsUInt64 min_external_table_block_size_bytes; - extern const SettingsNonZeroUInt64 max_parallel_replicas; extern const SettingsBool parallel_replicas_prefer_local_join; extern const SettingsBool prefer_global_in_and_join; extern const SettingsBool enable_add_distinct_to_in_subqueries; @@ -564,12 +563,6 @@ QueryTreeNodePtr getSubqueryFromTableExpression( QueryTreeNodePtr buildQueryTreeForShard(const PlannerContextPtr & planner_context, QueryTreeNodePtr query_tree_to_modify, bool allow_global_join_for_right_table) { - /// Incoming materialized markers are usually hop-local metadata. - /// Keep them when this shard is about to fan out again via parallel replicas, - /// because that path reuses the already materialized markers and does not re-inject them. - if (planner_context->getQueryContext()->getSettingsRef()[Setting::max_parallel_replicas] <= 1) - stripMaterializedAliasMarkers(query_tree_to_modify); - CollectColumnSourceToColumnsVisitor collect_column_source_to_columns_visitor; collect_column_source_to_columns_visitor.visit(query_tree_to_modify); From 0271d62ea6c763e70fc2db59636ebfc913608f3e Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Wed, 11 Mar 2026 20:38:57 +0100 Subject: [PATCH 07/32] Add distributed-over-distributed double alias marker test Signed-off-by: Mikhail Filimonov --- ..._over_distributed_double_aliases.reference | 56 ++++++++++++++ ...ibuted_over_distributed_double_aliases.sql | 74 +++++++++++++++++++ 2 files changed, 130 insertions(+) create mode 100644 tests/queries/0_stateless/03921_distributed_over_distributed_double_aliases.reference create mode 100644 tests/queries/0_stateless/03921_distributed_over_distributed_double_aliases.sql diff --git a/tests/queries/0_stateless/03921_distributed_over_distributed_double_aliases.reference b/tests/queries/0_stateless/03921_distributed_over_distributed_double_aliases.reference new file mode 100644 index 000000000000..69d7ecdfa9a8 --- /dev/null +++ b/tests/queries/0_stateless/03921_distributed_over_distributed_double_aliases.reference @@ -0,0 +1,56 @@ +prefer_localhost_replica_0 +x a b c d inner_c inner_d +1 2 2 2 2 1 1 +1 2 2 2 2 1 1 +1 2 2 2 2 1 1 +1 2 2 2 2 1 1 +2 2 2 3 3 1 1 +2 2 2 3 3 1 1 +2 2 2 3 3 1 1 +2 2 2 3 3 1 1 +10 2 2 11 11 1 1 +10 2 2 11 11 1 1 +10 2 2 11 11 1 1 +10 2 2 11 11 1 1 +prefer_localhost_replica_1 +x a b c d inner_c inner_d +1 2 2 2 2 1 1 +1 2 2 2 2 1 1 +1 2 2 2 2 1 1 +1 2 2 2 2 1 1 +2 3 3 3 3 1 1 +2 3 3 3 3 1 1 +2 2 2 3 3 1 1 +2 2 2 3 3 1 1 +10 11 11 11 11 1 1 +10 11 11 11 11 1 1 +10 2 2 11 11 1 1 +10 2 2 11 11 1 1 +prefer_localhost_replica_0_serialize_query_plan_1 +x a b c d inner_c inner_d +1 1 1 2 2 2 2 +1 1 1 2 2 2 2 +1 1 1 2 2 2 2 +1 1 1 2 2 2 2 +2 1 1 3 3 3 3 +2 1 1 3 3 3 3 +2 1 1 3 3 3 3 +2 1 1 3 3 3 3 +10 1 1 11 11 11 11 +10 1 1 11 11 11 11 +10 1 1 11 11 11 11 +10 1 1 11 11 11 11 +prefer_localhost_replica_1_serialize_query_plan_1 +x a b c d inner_c inner_d +1 2 2 2 2 1 1 +1 2 2 2 2 1 1 +1 1 1 2 2 2 2 +1 1 1 2 2 2 2 +2 3 3 3 3 1 1 +2 3 3 3 3 1 1 +2 1 1 3 3 3 3 +2 1 1 3 3 3 3 +10 11 11 11 11 1 1 +10 11 11 11 11 1 1 +10 1 1 11 11 11 11 +10 1 1 11 11 11 11 diff --git a/tests/queries/0_stateless/03921_distributed_over_distributed_double_aliases.sql b/tests/queries/0_stateless/03921_distributed_over_distributed_double_aliases.sql new file mode 100644 index 000000000000..e327f1e1448c --- /dev/null +++ b/tests/queries/0_stateless/03921_distributed_over_distributed_double_aliases.sql @@ -0,0 +1,74 @@ +DROP TABLE IF EXISTS test_dod_double_alias_outer; +DROP TABLE IF EXISTS test_dod_double_alias_inner; +DROP TABLE IF EXISTS test_dod_double_alias_local; + +CREATE TABLE test_dod_double_alias_local +( + x UInt64 +) +ENGINE = MergeTree() +ORDER BY x; + +INSERT INTO test_dod_double_alias_local VALUES (1), (2), (10); + +CREATE TABLE test_dod_double_alias_inner +( + x UInt64, + a UInt64 ALIAS 2, + b UInt64 ALIAS 2, + inner_c UInt64 ALIAS x + 1, + inner_d UInt64 ALIAS x + 1 +) +ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), test_dod_double_alias_local); + +CREATE TABLE test_dod_double_alias_outer +( + x UInt64, + inner_c UInt64, + a UInt64 ALIAS 1, + b UInt64 ALIAS 1, + c UInt64 ALIAS inner_c, + d UInt64 ALIAS inner_c, + inner_d UInt64 +) +ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), test_dod_double_alias_inner); + +SELECT 'prefer_localhost_replica_0'; +SELECT x, a, b, c, d, inner_c, inner_d +FROM test_dod_double_alias_outer +ORDER BY x +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 0 +FORMAT TSVWithNames; + +SELECT 'prefer_localhost_replica_1'; +SELECT x, a, b, c, d, inner_c, inner_d +FROM test_dod_double_alias_outer +ORDER BY x +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 1 +FORMAT TSVWithNames; + +SELECT 'prefer_localhost_replica_0_serialize_query_plan_1'; +SELECT x, a, b, c, d, inner_c, inner_d +FROM test_dod_double_alias_outer +ORDER BY x +SETTINGS + enable_analyzer = 1, + enable_alias_marker = 1, + prefer_localhost_replica = 0, + serialize_query_plan = 1 +FORMAT TSVWithNames; + +SELECT 'prefer_localhost_replica_1_serialize_query_plan_1'; +SELECT x, a, b, c, d, inner_c, inner_d +FROM test_dod_double_alias_outer +ORDER BY x +SETTINGS + enable_analyzer = 1, + enable_alias_marker = 1, + prefer_localhost_replica = 1, + serialize_query_plan = 1 +FORMAT TSVWithNames; + +DROP TABLE test_dod_double_alias_outer; +DROP TABLE test_dod_double_alias_inner; +DROP TABLE test_dod_double_alias_local; From 321674ebf061983eb421694d3295cb76e2da69bc Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Thu, 12 Mar 2026 16:36:41 +0100 Subject: [PATCH 08/32] Fix remote/local header remap by name when safe - switch PlannerJoinTree header conversion from position to name matching when column names are unique and sets match - add stateless regression 03922 for alias-marker column swap across distributed reads Signed-off-by: Mikhail Filimonov --- src/Planner/PlannerJoinTree.cpp | 31 ++++++- ...ributed_alias_marker_column_swap.reference | 32 ++++++++ ...2_distributed_alias_marker_column_swap.sql | 82 +++++++++++++++++++ 3 files changed, 142 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/03922_distributed_alias_marker_column_swap.reference create mode 100644 tests/queries/0_stateless/03922_distributed_alias_marker_column_swap.sql diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index a602426619b7..bb8baa45c831 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -152,6 +152,25 @@ namespace ErrorCodes namespace { +bool canMatchByNameWithoutAmbiguity(const ColumnsWithTypeAndName & source, const ColumnsWithTypeAndName & result) +{ + if (source.size() != result.size()) + return false; + + NameSet source_names; + NameSet result_names; + + for (const auto & source_column : source) + if (!source_names.insert(source_column.name).second) + return false; + + for (const auto & result_column : result) + if (!result_names.insert(result_column.name).second) + return false; + + return source_names == result_names; +} + /// Check if current user has privileges to SELECT columns from table /// Throws an exception if access to any column from `column_names` is not granted /// If `column_names` is empty, check access to any columns and return names of accessible columns @@ -1573,10 +1592,16 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres auto expected_block = *expected_header; materializeBlockInplace(expected_block); + const auto & source_columns = query_plan.getCurrentHeader()->getColumnsWithTypeAndName(); + const auto & expected_columns = expected_block.getColumnsWithTypeAndName(); + auto match_columns_mode = canMatchByNameWithoutAmbiguity(source_columns, expected_columns) + ? ActionsDAG::MatchColumnsMode::Name + : ActionsDAG::MatchColumnsMode::Position; + auto rename_actions_dag = ActionsDAG::makeConvertingActions( - query_plan.getCurrentHeader()->getColumnsWithTypeAndName(), - expected_block.getColumnsWithTypeAndName(), - ActionsDAG::MatchColumnsMode::Position, + source_columns, + expected_columns, + match_columns_mode, planner_context->getQueryContext(), true /*ignore_constant_values*/, false /*add_cast_columns*/, diff --git a/tests/queries/0_stateless/03922_distributed_alias_marker_column_swap.reference b/tests/queries/0_stateless/03922_distributed_alias_marker_column_swap.reference new file mode 100644 index 000000000000..f3797cb0ce0e --- /dev/null +++ b/tests/queries/0_stateless/03922_distributed_alias_marker_column_swap.reference @@ -0,0 +1,32 @@ +prefer_localhost_replica_0_uint64 +a inner_c +1 2 +1 2 +1 3 +1 3 +1 11 +1 11 +prefer_localhost_replica_0_string +a inner_c +aaaa 2 +aaaa 2 +aaaa 3 +aaaa 3 +aaaa 11 +aaaa 11 +prefer_localhost_replica_1_uint64 +a inner_c +1 2 +1 2 +1 3 +1 3 +1 11 +1 11 +prefer_localhost_replica_1_string +a inner_c +aaaa 2 +aaaa 2 +aaaa 3 +aaaa 3 +aaaa 11 +aaaa 11 diff --git a/tests/queries/0_stateless/03922_distributed_alias_marker_column_swap.sql b/tests/queries/0_stateless/03922_distributed_alias_marker_column_swap.sql new file mode 100644 index 000000000000..ae81724146df --- /dev/null +++ b/tests/queries/0_stateless/03922_distributed_alias_marker_column_swap.sql @@ -0,0 +1,82 @@ +DROP TABLE IF EXISTS test_dod_alias_swap_local; +DROP TABLE IF EXISTS test_dod_alias_swap_inner; + +CREATE TABLE test_dod_alias_swap_local +( + x UInt64 +) +ENGINE = MergeTree() +ORDER BY x; + +INSERT INTO test_dod_alias_swap_local VALUES (1), (2), (10); + +CREATE TABLE test_dod_alias_swap_inner +( + x UInt64, + a UInt64 ALIAS 2, + inner_c UInt64 ALIAS x + 1 +) +ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), test_dod_alias_swap_local); + +SELECT 'prefer_localhost_replica_0_uint64'; +SELECT + __aliasMarker(_CAST(1, 'UInt64'), '__table1.a') AS a, + __table1.inner_c AS inner_c +FROM test_dod_alias_swap_inner AS __table1 +ORDER BY __table1.x +SETTINGS + allow_experimental_analyzer = 1, + enable_alias_marker = 1, + prefer_localhost_replica = 0, + enable_parallel_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0 +FORMAT TSVWithNames; + +SELECT 'prefer_localhost_replica_0_string'; +SELECT + __aliasMarker(_CAST('aaaa', 'String'), '__table1.a') AS a, + __table1.inner_c AS inner_c +FROM test_dod_alias_swap_inner AS __table1 +ORDER BY __table1.x +SETTINGS + allow_experimental_analyzer = 1, + enable_alias_marker = 1, + prefer_localhost_replica = 0, + enable_parallel_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0 +FORMAT TSVWithNames; + +SELECT 'prefer_localhost_replica_1_uint64'; +SELECT + __aliasMarker(_CAST(1, 'UInt64'), '__table1.a') AS a, + __table1.inner_c AS inner_c +FROM test_dod_alias_swap_inner AS __table1 +ORDER BY __table1.x +SETTINGS + allow_experimental_analyzer = 1, + enable_alias_marker = 1, + prefer_localhost_replica = 1, + enable_parallel_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0 +FORMAT TSVWithNames; + +SELECT 'prefer_localhost_replica_1_string'; +SELECT + __aliasMarker(_CAST('aaaa', 'String'), '__table1.a') AS a, + __table1.inner_c AS inner_c +FROM test_dod_alias_swap_inner AS __table1 +ORDER BY __table1.x +SETTINGS + allow_experimental_analyzer = 1, + enable_alias_marker = 1, + prefer_localhost_replica = 1, + enable_parallel_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0 +FORMAT TSVWithNames; + +DROP TABLE test_dod_alias_swap_inner; +DROP TABLE test_dod_alias_swap_local; From 1f62781ad6fea652c4f8cab7af3806a1748743ab Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Thu, 12 Mar 2026 16:36:58 +0100 Subject: [PATCH 09/32] Revert "Fix remote/local header remap by name when safe" This reverts commit b6cbb20e7f6b1e38557355f350102bc513b35146. Signed-off-by: Mikhail Filimonov --- src/Planner/PlannerJoinTree.cpp | 31 +------ ...ributed_alias_marker_column_swap.reference | 32 -------- ...2_distributed_alias_marker_column_swap.sql | 82 ------------------- 3 files changed, 3 insertions(+), 142 deletions(-) delete mode 100644 tests/queries/0_stateless/03922_distributed_alias_marker_column_swap.reference delete mode 100644 tests/queries/0_stateless/03922_distributed_alias_marker_column_swap.sql diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index bb8baa45c831..a602426619b7 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -152,25 +152,6 @@ namespace ErrorCodes namespace { -bool canMatchByNameWithoutAmbiguity(const ColumnsWithTypeAndName & source, const ColumnsWithTypeAndName & result) -{ - if (source.size() != result.size()) - return false; - - NameSet source_names; - NameSet result_names; - - for (const auto & source_column : source) - if (!source_names.insert(source_column.name).second) - return false; - - for (const auto & result_column : result) - if (!result_names.insert(result_column.name).second) - return false; - - return source_names == result_names; -} - /// Check if current user has privileges to SELECT columns from table /// Throws an exception if access to any column from `column_names` is not granted /// If `column_names` is empty, check access to any columns and return names of accessible columns @@ -1592,16 +1573,10 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres auto expected_block = *expected_header; materializeBlockInplace(expected_block); - const auto & source_columns = query_plan.getCurrentHeader()->getColumnsWithTypeAndName(); - const auto & expected_columns = expected_block.getColumnsWithTypeAndName(); - auto match_columns_mode = canMatchByNameWithoutAmbiguity(source_columns, expected_columns) - ? ActionsDAG::MatchColumnsMode::Name - : ActionsDAG::MatchColumnsMode::Position; - auto rename_actions_dag = ActionsDAG::makeConvertingActions( - source_columns, - expected_columns, - match_columns_mode, + query_plan.getCurrentHeader()->getColumnsWithTypeAndName(), + expected_block.getColumnsWithTypeAndName(), + ActionsDAG::MatchColumnsMode::Position, planner_context->getQueryContext(), true /*ignore_constant_values*/, false /*add_cast_columns*/, diff --git a/tests/queries/0_stateless/03922_distributed_alias_marker_column_swap.reference b/tests/queries/0_stateless/03922_distributed_alias_marker_column_swap.reference deleted file mode 100644 index f3797cb0ce0e..000000000000 --- a/tests/queries/0_stateless/03922_distributed_alias_marker_column_swap.reference +++ /dev/null @@ -1,32 +0,0 @@ -prefer_localhost_replica_0_uint64 -a inner_c -1 2 -1 2 -1 3 -1 3 -1 11 -1 11 -prefer_localhost_replica_0_string -a inner_c -aaaa 2 -aaaa 2 -aaaa 3 -aaaa 3 -aaaa 11 -aaaa 11 -prefer_localhost_replica_1_uint64 -a inner_c -1 2 -1 2 -1 3 -1 3 -1 11 -1 11 -prefer_localhost_replica_1_string -a inner_c -aaaa 2 -aaaa 2 -aaaa 3 -aaaa 3 -aaaa 11 -aaaa 11 diff --git a/tests/queries/0_stateless/03922_distributed_alias_marker_column_swap.sql b/tests/queries/0_stateless/03922_distributed_alias_marker_column_swap.sql deleted file mode 100644 index ae81724146df..000000000000 --- a/tests/queries/0_stateless/03922_distributed_alias_marker_column_swap.sql +++ /dev/null @@ -1,82 +0,0 @@ -DROP TABLE IF EXISTS test_dod_alias_swap_local; -DROP TABLE IF EXISTS test_dod_alias_swap_inner; - -CREATE TABLE test_dod_alias_swap_local -( - x UInt64 -) -ENGINE = MergeTree() -ORDER BY x; - -INSERT INTO test_dod_alias_swap_local VALUES (1), (2), (10); - -CREATE TABLE test_dod_alias_swap_inner -( - x UInt64, - a UInt64 ALIAS 2, - inner_c UInt64 ALIAS x + 1 -) -ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), test_dod_alias_swap_local); - -SELECT 'prefer_localhost_replica_0_uint64'; -SELECT - __aliasMarker(_CAST(1, 'UInt64'), '__table1.a') AS a, - __table1.inner_c AS inner_c -FROM test_dod_alias_swap_inner AS __table1 -ORDER BY __table1.x -SETTINGS - allow_experimental_analyzer = 1, - enable_alias_marker = 1, - prefer_localhost_replica = 0, - enable_parallel_replicas = 0, - max_parallel_replicas = 1, - parallel_replicas_local_plan = 0 -FORMAT TSVWithNames; - -SELECT 'prefer_localhost_replica_0_string'; -SELECT - __aliasMarker(_CAST('aaaa', 'String'), '__table1.a') AS a, - __table1.inner_c AS inner_c -FROM test_dod_alias_swap_inner AS __table1 -ORDER BY __table1.x -SETTINGS - allow_experimental_analyzer = 1, - enable_alias_marker = 1, - prefer_localhost_replica = 0, - enable_parallel_replicas = 0, - max_parallel_replicas = 1, - parallel_replicas_local_plan = 0 -FORMAT TSVWithNames; - -SELECT 'prefer_localhost_replica_1_uint64'; -SELECT - __aliasMarker(_CAST(1, 'UInt64'), '__table1.a') AS a, - __table1.inner_c AS inner_c -FROM test_dod_alias_swap_inner AS __table1 -ORDER BY __table1.x -SETTINGS - allow_experimental_analyzer = 1, - enable_alias_marker = 1, - prefer_localhost_replica = 1, - enable_parallel_replicas = 0, - max_parallel_replicas = 1, - parallel_replicas_local_plan = 0 -FORMAT TSVWithNames; - -SELECT 'prefer_localhost_replica_1_string'; -SELECT - __aliasMarker(_CAST('aaaa', 'String'), '__table1.a') AS a, - __table1.inner_c AS inner_c -FROM test_dod_alias_swap_inner AS __table1 -ORDER BY __table1.x -SETTINGS - allow_experimental_analyzer = 1, - enable_alias_marker = 1, - prefer_localhost_replica = 1, - enable_parallel_replicas = 0, - max_parallel_replicas = 1, - parallel_replicas_local_plan = 0 -FORMAT TSVWithNames; - -DROP TABLE test_dod_alias_swap_inner; -DROP TABLE test_dod_alias_swap_local; From 24797c4938f2a15ad1ca5aba09459e61dd0976a8 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Thu, 12 Mar 2026 17:40:04 +0100 Subject: [PATCH 10/32] Add DoD regression for alias column swap with enable_alias_marker=0 Signed-off-by: Mikhail Filimonov --- ...alias_column_swap_without_marker.reference | 56 +++++++++++ ...buted_alias_column_swap_without_marker.sql | 96 +++++++++++++++++++ 2 files changed, 152 insertions(+) create mode 100644 tests/queries/0_stateless/03925_distributed_alias_column_swap_without_marker.reference create mode 100644 tests/queries/0_stateless/03925_distributed_alias_column_swap_without_marker.sql diff --git a/tests/queries/0_stateless/03925_distributed_alias_column_swap_without_marker.reference b/tests/queries/0_stateless/03925_distributed_alias_column_swap_without_marker.reference new file mode 100644 index 000000000000..1e2e9b11750a --- /dev/null +++ b/tests/queries/0_stateless/03925_distributed_alias_column_swap_without_marker.reference @@ -0,0 +1,56 @@ +prefer_localhost_replica_0_uint64 +x a_num inner_c +1 1 2 +1 1 2 +1 1 2 +1 1 2 +2 1 3 +2 1 3 +2 1 3 +2 1 3 +10 1 11 +10 1 11 +10 1 11 +10 1 11 +prefer_localhost_replica_0_string +x a_str inner_c +1 aaaa 2 +1 aaaa 2 +1 aaaa 2 +1 aaaa 2 +2 aaaa 3 +2 aaaa 3 +2 aaaa 3 +2 aaaa 3 +10 aaaa 11 +10 aaaa 11 +10 aaaa 11 +10 aaaa 11 +prefer_localhost_replica_1_uint64 +x a_num inner_c +1 1 2 +1 1 2 +1 1 2 +1 1 2 +2 1 3 +2 1 3 +2 1 3 +2 1 3 +10 1 11 +10 1 11 +10 1 11 +10 1 11 +prefer_localhost_replica_1_string +x a_str inner_c +1 aaaa 2 +1 aaaa 2 +1 aaaa 2 +1 aaaa 2 +2 aaaa 3 +2 aaaa 3 +2 aaaa 3 +2 aaaa 3 +10 aaaa 11 +10 aaaa 11 +10 aaaa 11 +10 aaaa 11 diff --git a/tests/queries/0_stateless/03925_distributed_alias_column_swap_without_marker.sql b/tests/queries/0_stateless/03925_distributed_alias_column_swap_without_marker.sql new file mode 100644 index 000000000000..84ad3cf170d0 --- /dev/null +++ b/tests/queries/0_stateless/03925_distributed_alias_column_swap_without_marker.sql @@ -0,0 +1,96 @@ +DROP TABLE IF EXISTS test_dod_alias_swap_no_marker_outer; +DROP TABLE IF EXISTS test_dod_alias_swap_no_marker_inner; +DROP TABLE IF EXISTS test_dod_alias_swap_no_marker_local; + +CREATE TABLE test_dod_alias_swap_no_marker_local +( + x UInt64 +) +ENGINE = MergeTree() +ORDER BY x; + +INSERT INTO test_dod_alias_swap_no_marker_local VALUES (1), (2), (10); + +CREATE TABLE test_dod_alias_swap_no_marker_inner +( + x UInt64, + inner_c UInt64 ALIAS x + 1 +) +ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), test_dod_alias_swap_no_marker_local); + +CREATE TABLE test_dod_alias_swap_no_marker_outer +( + x UInt64, + inner_c UInt64, + a_num UInt64 ALIAS 1, + a_str String ALIAS 'aaaa' +) +ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), test_dod_alias_swap_no_marker_inner); + +SELECT 'prefer_localhost_replica_0_uint64'; +SELECT + x, + a_num, + inner_c +FROM test_dod_alias_swap_no_marker_outer +ORDER BY x +SETTINGS + allow_experimental_analyzer = 1, + enable_alias_marker = 0, + prefer_localhost_replica = 0, + enable_parallel_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0 +FORMAT TSVWithNames; + +SELECT 'prefer_localhost_replica_0_string'; +SELECT + x, + a_str, + inner_c +FROM test_dod_alias_swap_no_marker_outer +ORDER BY x +SETTINGS + allow_experimental_analyzer = 1, + enable_alias_marker = 0, + prefer_localhost_replica = 0, + enable_parallel_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0 +FORMAT TSVWithNames; + +SELECT 'prefer_localhost_replica_1_uint64'; +SELECT + x, + a_num, + inner_c +FROM test_dod_alias_swap_no_marker_outer +ORDER BY x +SETTINGS + allow_experimental_analyzer = 1, + enable_alias_marker = 0, + prefer_localhost_replica = 1, + enable_parallel_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0 +FORMAT TSVWithNames; + +SELECT 'prefer_localhost_replica_1_string'; +SELECT + x, + a_str, + inner_c +FROM test_dod_alias_swap_no_marker_outer +ORDER BY x +SETTINGS + allow_experimental_analyzer = 1, + enable_alias_marker = 0, + prefer_localhost_replica = 1, + enable_parallel_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0 +FORMAT TSVWithNames; + +DROP TABLE test_dod_alias_swap_no_marker_outer; +DROP TABLE test_dod_alias_swap_no_marker_inner; +DROP TABLE test_dod_alias_swap_no_marker_local; From b89d56e59a3362659f393bb02d9d2d5914f158df Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Thu, 12 Mar 2026 18:47:12 +0100 Subject: [PATCH 11/32] tests: add DoD+parallel replicas alias column-swap reproducer Signed-off-by: Mikhail Filimonov --- ...l_replicas_dod_alias_column_swap.reference | 20 ++++ ...arallel_replicas_dod_alias_column_swap.sql | 94 +++++++++++++++++++ 2 files changed, 114 insertions(+) create mode 100644 tests/queries/0_stateless/03926_parallel_replicas_dod_alias_column_swap.reference create mode 100644 tests/queries/0_stateless/03926_parallel_replicas_dod_alias_column_swap.sql diff --git a/tests/queries/0_stateless/03926_parallel_replicas_dod_alias_column_swap.reference b/tests/queries/0_stateless/03926_parallel_replicas_dod_alias_column_swap.reference new file mode 100644 index 000000000000..228ac5f667f7 --- /dev/null +++ b/tests/queries/0_stateless/03926_parallel_replicas_dod_alias_column_swap.reference @@ -0,0 +1,20 @@ +no_pr_uint64 +x a_num inner_c +1 1 2 +2 1 3 +10 1 11 +no_pr_string +x a_str inner_c +1 aaaa 2 +2 aaaa 3 +10 aaaa 11 +pr_uint64 +x a_num inner_c +1 1 2 +2 1 3 +10 1 11 +pr_string +x a_str inner_c +1 aaaa 2 +2 aaaa 3 +10 aaaa 11 diff --git a/tests/queries/0_stateless/03926_parallel_replicas_dod_alias_column_swap.sql b/tests/queries/0_stateless/03926_parallel_replicas_dod_alias_column_swap.sql new file mode 100644 index 000000000000..070330e98826 --- /dev/null +++ b/tests/queries/0_stateless/03926_parallel_replicas_dod_alias_column_swap.sql @@ -0,0 +1,94 @@ +DROP TABLE IF EXISTS test_pr_dod_alias_swap_outer; +DROP TABLE IF EXISTS test_pr_dod_alias_swap_inner; +DROP TABLE IF EXISTS test_pr_dod_alias_swap_local; + +CREATE TABLE test_pr_dod_alias_swap_local +( + x UInt64 +) +ENGINE = MergeTree() +ORDER BY x; + +INSERT INTO test_pr_dod_alias_swap_local VALUES (1), (2), (10); + +CREATE TABLE test_pr_dod_alias_swap_inner +( + x UInt64, + inner_c UInt64 ALIAS x + 1 +) +ENGINE = Distributed(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), test_pr_dod_alias_swap_local); + +CREATE TABLE test_pr_dod_alias_swap_outer +( + x UInt64, + inner_c UInt64, + a_num UInt64 ALIAS 1, + a_str String ALIAS 'aaaa' +) +ENGINE = Distributed(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), test_pr_dod_alias_swap_inner); + +SELECT 'no_pr_uint64'; +SELECT x, a_num, inner_c +FROM test_pr_dod_alias_swap_outer +ORDER BY x +SETTINGS + enable_analyzer = 1, + enable_alias_marker = 0, + enable_parallel_replicas = 0, + allow_experimental_parallel_reading_from_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0, + parallel_replicas_for_non_replicated_merge_tree = 1, + cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost' +FORMAT TSVWithNames; + +SELECT 'no_pr_string'; +SELECT x, a_str, inner_c +FROM test_pr_dod_alias_swap_outer +ORDER BY x +SETTINGS + enable_analyzer = 1, + enable_alias_marker = 0, + enable_parallel_replicas = 0, + allow_experimental_parallel_reading_from_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0, + parallel_replicas_for_non_replicated_merge_tree = 1, + cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost' +FORMAT TSVWithNames; + +SELECT 'pr_uint64'; +SELECT x, a_num, inner_c +FROM test_pr_dod_alias_swap_outer +ORDER BY x +SETTINGS + enable_analyzer = 1, + enable_alias_marker = 0, + enable_parallel_replicas = 2, + allow_experimental_parallel_reading_from_replicas = 2, + max_parallel_replicas = 3, + parallel_replicas_local_plan = 1, + parallel_replicas_for_non_replicated_merge_tree = 1, + parallel_replicas_min_number_of_rows_per_replica = 0, + cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost' +FORMAT TSVWithNames; + +SELECT 'pr_string'; +SELECT x, a_str, inner_c +FROM test_pr_dod_alias_swap_outer +ORDER BY x +SETTINGS + enable_analyzer = 1, + enable_alias_marker = 0, + enable_parallel_replicas = 2, + allow_experimental_parallel_reading_from_replicas = 2, + max_parallel_replicas = 3, + parallel_replicas_local_plan = 1, + parallel_replicas_for_non_replicated_merge_tree = 1, + parallel_replicas_min_number_of_rows_per_replica = 0, + cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost' +FORMAT TSVWithNames; + +DROP TABLE test_pr_dod_alias_swap_outer; +DROP TABLE test_pr_dod_alias_swap_inner; +DROP TABLE test_pr_dod_alias_swap_local; From 86c88ed280dc586cbc143201becda6ebeca6c064 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Thu, 12 Mar 2026 20:12:34 +0100 Subject: [PATCH 12/32] Planner: prefer name-based conversion with position fallback Add a planner utility that first tries header conversion by column name and falls back to position when name matching is not possible.\n\nUse it in PlannerJoinTree and parallel replicas query planning to handle duplicate alias-heavy schemas more robustly, while emitting trace diagnostics when position fallback is used.\n\nUpdate 03921 distributed-over-distributed alias tests and pin parallel-replica settings for deterministic results. Signed-off-by: Mikhail Filimonov --- src/Planner/PlannerJoinTree.cpp | 10 +- src/Planner/Utils.cpp | 100 ++++++++++++++++++ src/Planner/Utils.h | 19 ++++ src/Planner/findParallelReplicasQuery.cpp | 10 +- ..._over_distributed_double_aliases.reference | 60 +++++------ ...ibuted_over_distributed_double_aliases.sql | 22 +++- 6 files changed, 181 insertions(+), 40 deletions(-) diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index a602426619b7..53797bb055cd 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -1573,11 +1573,13 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres auto expected_block = *expected_header; materializeBlockInplace(expected_block); - auto rename_actions_dag = ActionsDAG::makeConvertingActions( - query_plan.getCurrentHeader()->getColumnsWithTypeAndName(), - expected_block.getColumnsWithTypeAndName(), - ActionsDAG::MatchColumnsMode::Position, + const auto & source_columns = query_plan.getCurrentHeader()->getColumnsWithTypeAndName(); + const auto & result_columns = expected_block.getColumnsWithTypeAndName(); + auto rename_actions_dag = makeConvertingActionsPreferNameThenPosition( + source_columns, + result_columns, planner_context->getQueryContext(), + "PlannerJoinTree", true /*ignore_constant_values*/, false /*add_cast_columns*/, nullptr /*new_names*/); diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp index 660a33ed5d7e..200878b48181 100644 --- a/src/Planner/Utils.cpp +++ b/src/Planner/Utils.cpp @@ -51,6 +51,7 @@ #include #include #include +#include namespace DB { @@ -708,4 +709,103 @@ QueryPlanStepPtr projectOnlyUsedColumns( return step; } +void logPositionConversionMismatch( + const ColumnsWithTypeAndName & source_columns, + const ColumnsWithTypeAndName & result_columns, + const ContextPtr & context, + std::string_view location) +{ + static auto log = getLogger("PositionConversion"); + + if (source_columns.size() != result_columns.size()) + { + LOG_TRACE( + log, + "Position conversion fallback at {}. query_id={} columns_count_mismatch source={} result={} source_header=[{}] result_header=[{}]", + location, + context ? context->getCurrentQueryId() : "", + source_columns.size(), + result_columns.size(), + Block(source_columns).dumpNames(), + Block(result_columns).dumpNames()); + return; + } + + std::vector mismatches; + mismatches.reserve(source_columns.size()); + + for (size_t i = 0; i < source_columns.size(); ++i) + { + const auto & source_column = source_columns[i]; + const auto & result_column = result_columns[i]; + + if (source_column.name == result_column.name && source_column.type->equals(*result_column.type)) + continue; + + mismatches.push_back(fmt::format( + "#{} {}:{} -> {}:{}", + i, + source_column.name, + source_column.type->getName(), + result_column.name, + result_column.type->getName())); + } + + if (mismatches.empty()) + return; + + LOG_TRACE( + log, + "Position conversion fallback at {}. query_id={} source_header=[{}] result_header=[{}] mismatches=[{}]", + location, + context ? context->getCurrentQueryId() : "", + Block(source_columns).dumpNames(), + Block(result_columns).dumpNames(), + fmt::join(mismatches, "; ")); +} + +ActionsDAG makeConvertingActionsPreferNameThenPosition( + const ColumnsWithTypeAndName & source_columns, + const ColumnsWithTypeAndName & result_columns, + const ContextPtr & context, + std::string_view location, + bool ignore_constant_values, + bool add_cast_columns, + NameToNameMap * new_names) +{ + static auto log = getLogger("PositionConversion"); + + try + { + return ActionsDAG::makeConvertingActions( + source_columns, + result_columns, + ActionsDAG::MatchColumnsMode::Name, + context, + ignore_constant_values, + add_cast_columns, + new_names); + } + catch (const Exception & e) + { + LOG_TRACE( + log, + "Name conversion is not possible at {}. query_id={} reason={}", + location, + context ? context->getCurrentQueryId() : "", + e.message()); + + logPositionConversionMismatch(source_columns, result_columns, context, location); + + return ActionsDAG::makeConvertingActions( + source_columns, + result_columns, + ActionsDAG::MatchColumnsMode::Position, + context, + ignore_constant_values, + add_cast_columns, + new_names); + } +} + } diff --git a/src/Planner/Utils.h b/src/Planner/Utils.h index 97385e9f56fc..f7ac27ae63e5 100644 --- a/src/Planner/Utils.h +++ b/src/Planner/Utils.h @@ -23,6 +23,8 @@ #include +#include + namespace DB { @@ -124,4 +126,21 @@ QueryPlanStepPtr projectOnlyUsedColumns( const SharedHeader & stream_header, const ColumnIdentifiers & used_column_identifiers); +/// Trace-report mismatches before Position-based conversion. +void logPositionConversionMismatch( + const ColumnsWithTypeAndName & source_columns, + const ColumnsWithTypeAndName & result_columns, + const ContextPtr & context, + std::string_view location); + +/// Try Name-based conversion first, fallback to Position with detailed trace report. +ActionsDAG makeConvertingActionsPreferNameThenPosition( + const ColumnsWithTypeAndName & source_columns, + const ColumnsWithTypeAndName & result_columns, + const ContextPtr & context, + std::string_view location, + bool ignore_constant_values, + bool add_cast_columns, + NameToNameMap * new_names = nullptr); + } diff --git a/src/Planner/findParallelReplicasQuery.cpp b/src/Planner/findParallelReplicasQuery.cpp index 93d006da59b5..d0b0c55ff58d 100644 --- a/src/Planner/findParallelReplicasQuery.cpp +++ b/src/Planner/findParallelReplicasQuery.cpp @@ -534,11 +534,13 @@ JoinTreeQueryPlan buildQueryPlanForParallelReplicas( storage_limits, nullptr); - auto converting = ActionsDAG::makeConvertingActions( - header->getColumnsWithTypeAndName(), - initial_header->getColumnsWithTypeAndName(), - ActionsDAG::MatchColumnsMode::Position, + const auto & source_columns = header->getColumnsWithTypeAndName(); + const auto & result_columns = initial_header->getColumnsWithTypeAndName(); + auto converting = makeConvertingActionsPreferNameThenPosition( + source_columns, + result_columns, context, + "findParallelReplicasQuery", false /*ignore_constant_values*/, false /*add_cast_columns*/, nullptr /*new_names*/); diff --git a/tests/queries/0_stateless/03921_distributed_over_distributed_double_aliases.reference b/tests/queries/0_stateless/03921_distributed_over_distributed_double_aliases.reference index 69d7ecdfa9a8..750abc85a605 100644 --- a/tests/queries/0_stateless/03921_distributed_over_distributed_double_aliases.reference +++ b/tests/queries/0_stateless/03921_distributed_over_distributed_double_aliases.reference @@ -1,31 +1,31 @@ prefer_localhost_replica_0 x a b c d inner_c inner_d -1 2 2 2 2 1 1 -1 2 2 2 2 1 1 -1 2 2 2 2 1 1 -1 2 2 2 2 1 1 -2 2 2 3 3 1 1 -2 2 2 3 3 1 1 -2 2 2 3 3 1 1 -2 2 2 3 3 1 1 -10 2 2 11 11 1 1 -10 2 2 11 11 1 1 -10 2 2 11 11 1 1 -10 2 2 11 11 1 1 +1 1 1 2 2 2 2 +1 1 1 2 2 2 2 +1 1 1 2 2 2 2 +1 1 1 2 2 2 2 +2 1 1 3 3 3 3 +2 1 1 3 3 3 3 +2 1 1 3 3 3 3 +2 1 1 3 3 3 3 +10 1 1 11 11 11 11 +10 1 1 11 11 11 11 +10 1 1 11 11 11 11 +10 1 1 11 11 11 11 prefer_localhost_replica_1 x a b c d inner_c inner_d -1 2 2 2 2 1 1 -1 2 2 2 2 1 1 -1 2 2 2 2 1 1 -1 2 2 2 2 1 1 -2 3 3 3 3 1 1 -2 3 3 3 3 1 1 -2 2 2 3 3 1 1 -2 2 2 3 3 1 1 -10 11 11 11 11 1 1 -10 11 11 11 11 1 1 -10 2 2 11 11 1 1 -10 2 2 11 11 1 1 +1 1 1 2 2 2 2 +1 1 1 2 2 2 2 +1 1 1 2 2 2 2 +1 1 1 2 2 2 2 +2 1 1 3 3 3 3 +2 1 1 3 3 3 3 +2 1 1 3 3 3 3 +2 1 1 3 3 3 3 +10 1 1 11 11 11 11 +10 1 1 11 11 11 11 +10 1 1 11 11 11 11 +10 1 1 11 11 11 11 prefer_localhost_replica_0_serialize_query_plan_1 x a b c d inner_c inner_d 1 1 1 2 2 2 2 @@ -42,15 +42,15 @@ x a b c d inner_c inner_d 10 1 1 11 11 11 11 prefer_localhost_replica_1_serialize_query_plan_1 x a b c d inner_c inner_d -1 2 2 2 2 1 1 -1 2 2 2 2 1 1 1 1 1 2 2 2 2 1 1 1 2 2 2 2 -2 3 3 3 3 1 1 -2 3 3 3 3 1 1 +1 1 1 2 2 2 2 +1 1 1 2 2 2 2 +2 1 1 3 3 3 3 2 1 1 3 3 3 3 2 1 1 3 3 3 3 -10 11 11 11 11 1 1 -10 11 11 11 11 1 1 +2 1 1 3 3 3 3 +10 1 1 11 11 11 11 +10 1 1 11 11 11 11 10 1 1 11 11 11 11 10 1 1 11 11 11 11 diff --git a/tests/queries/0_stateless/03921_distributed_over_distributed_double_aliases.sql b/tests/queries/0_stateless/03921_distributed_over_distributed_double_aliases.sql index e327f1e1448c..e4bbceb7e6cf 100644 --- a/tests/queries/0_stateless/03921_distributed_over_distributed_double_aliases.sql +++ b/tests/queries/0_stateless/03921_distributed_over_distributed_double_aliases.sql @@ -37,14 +37,26 @@ SELECT 'prefer_localhost_replica_0'; SELECT x, a, b, c, d, inner_c, inner_d FROM test_dod_double_alias_outer ORDER BY x -SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 0 +SETTINGS + enable_analyzer = 1, + enable_alias_marker = 1, + prefer_localhost_replica = 0, + enable_parallel_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0 FORMAT TSVWithNames; SELECT 'prefer_localhost_replica_1'; SELECT x, a, b, c, d, inner_c, inner_d FROM test_dod_double_alias_outer ORDER BY x -SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 1 +SETTINGS + enable_analyzer = 1, + enable_alias_marker = 1, + prefer_localhost_replica = 1, + enable_parallel_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0 FORMAT TSVWithNames; SELECT 'prefer_localhost_replica_0_serialize_query_plan_1'; @@ -55,6 +67,9 @@ SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 0, + enable_parallel_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0, serialize_query_plan = 1 FORMAT TSVWithNames; @@ -66,6 +81,9 @@ SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 1, + enable_parallel_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0, serialize_query_plan = 1 FORMAT TSVWithNames; From 2c1e90d3fbcf104f112e7848bbff6b73d8363367 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Tue, 26 May 2026 22:21:24 +0200 Subject: [PATCH 13/32] test: plain-Distributed alias column-swap reproducer (PlannerJoinTree) Nested ALIAS columns (a2 contains a1's subexpression) over a single-shard Distributed table, matrix over prefer_localhost_replica x serialize_query_plan, with the single-node result as oracle. Already green on this branch (the PlannerJoinTree name-first conversion from b0ce0c9374f covers it) - kept as a regression lock. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Mikhail Filimonov --- ...0_distributed_alias_swap_planner.reference | 15 ++++++++ .../03930_distributed_alias_swap_planner.sql | 34 +++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 tests/queries/0_stateless/03930_distributed_alias_swap_planner.reference create mode 100644 tests/queries/0_stateless/03930_distributed_alias_swap_planner.sql diff --git a/tests/queries/0_stateless/03930_distributed_alias_swap_planner.reference b/tests/queries/0_stateless/03930_distributed_alias_swap_planner.reference new file mode 100644 index 000000000000..402cc360bae5 --- /dev/null +++ b/tests/queries/0_stateless/03930_distributed_alias_swap_planner.reference @@ -0,0 +1,15 @@ +local +11 12 +21 22 +dist_prefer0 +11 12 +21 22 +dist_prefer1 +11 12 +21 22 +dist_prefer0_plan +11 12 +21 22 +dist_prefer1_plan +11 12 +21 22 diff --git a/tests/queries/0_stateless/03930_distributed_alias_swap_planner.sql b/tests/queries/0_stateless/03930_distributed_alias_swap_planner.sql new file mode 100644 index 000000000000..848f35b0be14 --- /dev/null +++ b/tests/queries/0_stateless/03930_distributed_alias_swap_planner.sql @@ -0,0 +1,34 @@ +-- Plain Distributed (no Hybrid). Two nested ALIAS columns: a2 contains a1's subexpression, +-- so planner CSE may reorder the remote header. Correct result must equal the single-node +-- ('local') result across every transport variant. +DROP TABLE IF EXISTS t_local_03930; +DROP TABLE IF EXISTS t_dist_03930; + +CREATE TABLE t_local_03930 (x UInt32, a1 UInt32 ALIAS x + 1, a2 UInt32 ALIAS a1 + 1) +ENGINE = MergeTree ORDER BY x; +INSERT INTO t_local_03930 VALUES (10), (20); + +CREATE TABLE t_dist_03930 AS t_local_03930 +ENGINE = Distributed(test_shard_localhost, currentDatabase(), t_local_03930); + +SELECT 'local'; +SELECT a1, a2 FROM t_local_03930 ORDER BY a1; + +SELECT 'dist_prefer0'; +SELECT a1, a2 FROM t_dist_03930 ORDER BY a1 +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 0; + +SELECT 'dist_prefer1'; +SELECT a1, a2 FROM t_dist_03930 ORDER BY a1 +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 1; + +SELECT 'dist_prefer0_plan'; +SELECT a1, a2 FROM t_dist_03930 ORDER BY a1 +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 0, serialize_query_plan = 1; + +SELECT 'dist_prefer1_plan'; +SELECT a1, a2 FROM t_dist_03930 ORDER BY a1 +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 1, serialize_query_plan = 1; + +DROP TABLE t_dist_03930; +DROP TABLE t_local_03930; From ad35dc22292a1b4d66ad0a79c97448fab2d9eee8 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Tue, 26 May 2026 22:22:45 +0200 Subject: [PATCH 14/32] test: parallel-replicas alias column-swap reproducer (findParallelReplicasQuery) Nested ALIAS columns over a Distributed table read with parallel replicas, AST and serialized-plan transport, single-node result as oracle. Green on this branch (findParallelReplicasQuery name-first conversion covers it) - kept as a regression lock. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Mikhail Filimonov --- ...931_parallel_replicas_alias_swap.reference | 9 ++++++ .../03931_parallel_replicas_alias_swap.sql | 32 +++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 tests/queries/0_stateless/03931_parallel_replicas_alias_swap.reference create mode 100644 tests/queries/0_stateless/03931_parallel_replicas_alias_swap.sql diff --git a/tests/queries/0_stateless/03931_parallel_replicas_alias_swap.reference b/tests/queries/0_stateless/03931_parallel_replicas_alias_swap.reference new file mode 100644 index 000000000000..c4f5e5a4c4c3 --- /dev/null +++ b/tests/queries/0_stateless/03931_parallel_replicas_alias_swap.reference @@ -0,0 +1,9 @@ +local +11 12 +21 22 +pr_ast +11 12 +21 22 +pr_plan +11 12 +21 22 diff --git a/tests/queries/0_stateless/03931_parallel_replicas_alias_swap.sql b/tests/queries/0_stateless/03931_parallel_replicas_alias_swap.sql new file mode 100644 index 000000000000..a507c8501296 --- /dev/null +++ b/tests/queries/0_stateless/03931_parallel_replicas_alias_swap.sql @@ -0,0 +1,32 @@ +-- Plain Distributed + parallel replicas (no Hybrid). Exercises the findParallelReplicasQuery +-- header reconciliation path with nested ALIAS columns. Correct result equals the single-node +-- ('local') result for both AST and serialized-plan transport. +DROP TABLE IF EXISTS t_local_03931; +DROP TABLE IF EXISTS t_dist_03931; + +CREATE TABLE t_local_03931 (x UInt32, a1 UInt32 ALIAS x + 1, a2 UInt32 ALIAS a1 + 1) +ENGINE = MergeTree ORDER BY x; +INSERT INTO t_local_03931 VALUES (10), (20); + +CREATE TABLE t_dist_03931 AS t_local_03931 +ENGINE = Distributed(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), t_local_03931); + +SELECT 'local'; +SELECT a1, a2 FROM t_local_03931 ORDER BY a1; + +SELECT 'pr_ast'; +SELECT a1, a2 FROM t_dist_03931 ORDER BY a1 +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, + allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 3, + cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', + serialize_query_plan = 0; + +SELECT 'pr_plan'; +SELECT a1, a2 FROM t_dist_03931 ORDER BY a1 +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, + allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 3, + cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', + serialize_query_plan = 1; + +DROP TABLE t_dist_03931; +DROP TABLE t_local_03931; From 82831da6e63f5f24a9f7157d0a74cf43ec09dd73 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Tue, 26 May 2026 22:24:21 +0200 Subject: [PATCH 15/32] test: natural Merge-over-Distributed alias reproducer (failing) Rewrites 03928 as a black-box test: nested ALIAS columns read through a Merge over a Distributed table, no explicit __aliasMarker, single-node result as oracle. Currently FAILS - alias columns come back as 0 through StorageMerge reconciliation. Probe .stderr/.stdout artifacts dropped. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Mikhail Filimonov --- ...ributed_alias_marker_column_swap.reference | 32 +++++++++++ ...r_distributed_alias_marker_column_swap.sql | 54 +++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 tests/queries/0_stateless/03928_merge_over_distributed_alias_marker_column_swap.reference create mode 100644 tests/queries/0_stateless/03928_merge_over_distributed_alias_marker_column_swap.sql diff --git a/tests/queries/0_stateless/03928_merge_over_distributed_alias_marker_column_swap.reference b/tests/queries/0_stateless/03928_merge_over_distributed_alias_marker_column_swap.reference new file mode 100644 index 000000000000..6ff1dd7f287d --- /dev/null +++ b/tests/queries/0_stateless/03928_merge_over_distributed_alias_marker_column_swap.reference @@ -0,0 +1,32 @@ +local +2 3 +3 4 +11 12 +merge_prefer0 +2 3 +2 3 +3 4 +3 4 +11 12 +11 12 +merge_prefer1 +2 3 +2 3 +3 4 +3 4 +11 12 +11 12 +merge_prefer0_plan +2 3 +2 3 +3 4 +3 4 +11 12 +11 12 +merge_prefer1_plan +2 3 +2 3 +3 4 +3 4 +11 12 +11 12 diff --git a/tests/queries/0_stateless/03928_merge_over_distributed_alias_marker_column_swap.sql b/tests/queries/0_stateless/03928_merge_over_distributed_alias_marker_column_swap.sql new file mode 100644 index 000000000000..7769b38607be --- /dev/null +++ b/tests/queries/0_stateless/03928_merge_over_distributed_alias_marker_column_swap.sql @@ -0,0 +1,54 @@ +-- Plain Merge over Distributed over MergeTree (no Hybrid, no explicit __aliasMarker). +-- Nested ALIAS columns (b contains a's subexpression). Reading the alias columns through the +-- Merge table must reconcile the child (Distributed) header by name; a positional reconciliation +-- in StorageMerge::convertAndFilterSourceStream would swap the columns. Correct result equals the +-- single-node ('local') result. test_cluster_two_shards has two shards, so distributed/merge +-- blocks return each row twice. +DROP TABLE IF EXISTS test_merge_alias_swap_merge; +DROP TABLE IF EXISTS test_merge_alias_swap_dist; +DROP TABLE IF EXISTS test_merge_alias_swap_local; + +CREATE TABLE test_merge_alias_swap_local +( + x UInt64, + a UInt64 ALIAS x + 1, + b UInt64 ALIAS a + 1 +) +ENGINE = MergeTree() +ORDER BY x; + +INSERT INTO test_merge_alias_swap_local VALUES (1), (2), (10); + +CREATE TABLE test_merge_alias_swap_dist AS test_merge_alias_swap_local +ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), test_merge_alias_swap_local); + +CREATE TABLE test_merge_alias_swap_merge +( + x UInt64, + a UInt64, + b UInt64 +) +ENGINE = Merge(currentDatabase(), '^test_merge_alias_swap_dist$'); + +SELECT 'local'; +SELECT a, b FROM test_merge_alias_swap_local ORDER BY x; + +SELECT 'merge_prefer0'; +SELECT a, b FROM test_merge_alias_swap_merge ORDER BY x +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 0; + +SELECT 'merge_prefer1'; +SELECT a, b FROM test_merge_alias_swap_merge ORDER BY x +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 1; + +SELECT 'merge_prefer0_plan'; +SELECT a, b FROM test_merge_alias_swap_merge ORDER BY x +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 0, serialize_query_plan = 1; + +SELECT 'merge_prefer1_plan'; +SELECT a, b FROM test_merge_alias_swap_merge ORDER BY x +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 1, serialize_query_plan = 1; + +DROP TABLE test_merge_alias_swap_merge; +DROP TABLE test_merge_alias_swap_dist; +DROP TABLE test_merge_alias_swap_local; From 324d41da8c1300793f9b7324824f5de5f387eade Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Tue, 26 May 2026 22:55:13 +0200 Subject: [PATCH 16/32] Fix StorageMerge alias columns over Distributed by emitting identifier names When a Merge table reads an underlying Distributed table with ALIAS columns, the alias expressions in convertAndFilterSourceStream were emitted under their plain logical name (e.g. `a`), while the Merge reconciliation expects analyzer column identifiers (e.g. `__table1.a`). The identifier columns were therefore treated as missing and filled with defaults, yielding wrong results (zeros). Map each alias's plain logical name to the unambiguous target-header identifier and emit the alias output under that identifier, so the downstream header reconciliation matches by name with no positional fallback. Makes the header correct by construction; any extra expression columns from the child are simply dropped by name matching. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Mikhail Filimonov --- src/Storages/StorageMerge.cpp | 99 +++++++++++++++++++++++++++++++++-- 1 file changed, 94 insertions(+), 5 deletions(-) diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 329882a13a1e..c25441db0e0e 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -1569,9 +1569,54 @@ void ReadFromMerge::convertAndFilterSourceStream( if (local_context->getSettingsRef()[Setting::allow_experimental_analyzer]) { + /// The Merge table expects its columns under analyzer identifiers (e.g. `__table1.a`), + /// while an alias expression is keyed by its plain logical name (e.g. `a`). Map each plain + /// logical name to the unambiguous target-header identifier so the alias output below is + /// emitted under the identifier the downstream reconciliation matches by name. + std::unordered_map logical_name_to_header_name; + std::unordered_set ambiguous_logical_names; + for (const auto & column : header) + { + auto last_dot_pos = column.name.rfind('.'); + String logical_name = (last_dot_pos == String::npos || last_dot_pos + 1 >= column.name.size()) + ? column.name + : column.name.substr(last_dot_pos + 1); + if (!logical_name_to_header_name.emplace(logical_name, column.name).second) + ambiguous_logical_names.insert(logical_name); + } + for (const auto & ambiguous : ambiguous_logical_names) + logical_name_to_header_name.erase(ambiguous); + for (const auto & alias : aliases) { ActionsDAG actions_dag(pipe_columns); + std::unordered_map short_name_to_node; + std::unordered_set ambiguous_short_names; + std::unordered_set existing_input_names; + for (const auto * input : actions_dag.getInputs()) + { + existing_input_names.insert(input->result_name); + + const auto & input_name = input->result_name; + auto last_dot_pos = input_name.rfind('.'); + if (last_dot_pos == String::npos || last_dot_pos + 1 >= input_name.size()) + continue; + + auto short_name = input_name.substr(last_dot_pos + 1); + if (!short_name_to_node.emplace(short_name, input).second) + ambiguous_short_names.insert(short_name); + } + + for (const auto & ambiguous_short_name : ambiguous_short_names) + short_name_to_node.erase(ambiguous_short_name); + + for (const auto & [short_name, input] : short_name_to_node) + { + if (existing_input_names.contains(short_name)) + continue; + + actions_dag.addAlias(*input, short_name); + } QueryTreeNodePtr query_tree = buildQueryTree(alias.expression, local_context); query_tree->setAlias(alias.name); @@ -1580,13 +1625,15 @@ void ReadFromMerge::convertAndFilterSourceStream( query_analysis_pass.run(query_tree, local_context); ColumnNodePtrWithHashSet empty_correlated_columns_set; - PlannerActionsVisitor actions_visitor(modified_query_info.planner_context, empty_correlated_columns_set, false /*use_column_identifier_as_action_node_name*/); + PlannerActionsVisitor actions_visitor(modified_query_info.planner_context, empty_correlated_columns_set, true /*use_column_identifier_as_action_node_name*/); const auto & [nodes, _] = actions_visitor.visit(actions_dag, query_tree); if (nodes.size() != 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected to have 1 output but got {}", nodes.size()); - actions_dag.addOrReplaceInOutputs(actions_dag.addAlias(*nodes.front(), alias.name)); + auto output_name_it = logical_name_to_header_name.find(alias.name); + const String & output_name = output_name_it != logical_name_to_header_name.end() ? output_name_it->second : alias.name; + actions_dag.addOrReplaceInOutputs(actions_dag.addAlias(*nodes.front(), output_name)); auto expression_step = std::make_unique(child.plan.getCurrentHeader(), std::move(actions_dag)); child.plan.addStep(std::move(expression_step)); } @@ -1636,9 +1683,48 @@ void ReadFromMerge::convertAndFilterSourceStream( }; String smallest_column_name = ExpressionActions::getSmallestColumn(snapshot->metadata->getColumns().getAllPhysical()).name; + auto get_short_name = [](std::string_view full_name) -> std::string_view + { + auto pos = full_name.find_last_of('.'); + if (pos == std::string_view::npos || pos + 1 >= full_name.size()) + return {}; + return full_name.substr(pos + 1); + }; + + std::unordered_map short_name_count; + short_name_count.reserve(header.columns()); + for (const auto & column : header) + { + auto short_name = get_short_name(column.name); + if (!short_name.empty()) + ++short_name_count[short_name]; + } + + auto find_header_column = [&](const ColumnWithTypeAndName & source_elem) -> std::optional + { + if (header.has(source_elem.name)) + return header.getByName(source_elem.name); + + auto short_name = get_short_name(source_elem.name); + if (!short_name.empty() && short_name_count[short_name] == 1) + { + std::string short_name_str(short_name); + if (header.has(short_name_str)) + return header.getByName(short_name_str); + } + + return std::nullopt; + }; + for (size_t i = 0; i < size; ++i) { const auto & source_elem = current_step_columns[i]; + auto header_column_opt = find_header_column(source_elem); + if (header_column_opt) + { + converted_columns.push_back(materializeIfSourceIsNotConst(*header_column_opt, source_elem)); + continue; + } if (header.has(source_elem.name)) { converted_columns.push_back(materializeIfSourceIsNotConst(header.getByName(source_elem.name), source_elem)); @@ -1660,11 +1746,14 @@ void ReadFromMerge::convertAndFilterSourceStream( } } - auto convert_actions_dag = ActionsDAG::makeConvertingActions( + auto convert_actions_dag = makeConvertingActionsPreferNameThenPosition( current_step_columns, converted_columns, - ActionsDAG::MatchColumnsMode::Position, - local_context); + local_context, + "StorageMerge", + false /*ignore_constant_values*/, + false /*add_cast_columns*/, + nullptr /*new_names*/); auto expression_step = std::make_unique(child.plan.getCurrentHeader(), std::move(convert_actions_dag)); child.plan.addStep(std::move(expression_step)); From 67e461d07688482e33c62e3576042545c1fee5de Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Tue, 26 May 2026 23:03:14 +0200 Subject: [PATCH 17/32] test: distributed alias reorder + computed-expression correctness Reorders two nested ALIAS columns and adds a computed expression over them, read through a Distributed table (AST and serialized-plan transport), with the single-node result as oracle. Passes via name-first header reconciliation. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Mikhail Filimonov --- ...32_distributed_alias_strict_name.reference | 9 +++++++ .../03932_distributed_alias_strict_name.sql | 27 +++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 tests/queries/0_stateless/03932_distributed_alias_strict_name.reference create mode 100644 tests/queries/0_stateless/03932_distributed_alias_strict_name.sql diff --git a/tests/queries/0_stateless/03932_distributed_alias_strict_name.reference b/tests/queries/0_stateless/03932_distributed_alias_strict_name.reference new file mode 100644 index 000000000000..cddf594d4e31 --- /dev/null +++ b/tests/queries/0_stateless/03932_distributed_alias_strict_name.reference @@ -0,0 +1,9 @@ +local +12 11 23 +22 21 43 +dist +12 11 23 +22 21 43 +dist_plan +12 11 23 +22 21 43 diff --git a/tests/queries/0_stateless/03932_distributed_alias_strict_name.sql b/tests/queries/0_stateless/03932_distributed_alias_strict_name.sql new file mode 100644 index 000000000000..c094d28f01e0 --- /dev/null +++ b/tests/queries/0_stateless/03932_distributed_alias_strict_name.sql @@ -0,0 +1,27 @@ +-- Plain Distributed (no Hybrid). Reorders alias columns and mixes a computed expression over +-- them. With strict name-based header reconciliation (positional fallback disabled), the result +-- must equal the single-node ('local') result for both AST and serialized-plan transport, and no +-- LOGICAL_ERROR must be raised. +DROP TABLE IF EXISTS t_local_03932; +DROP TABLE IF EXISTS t_dist_03932; + +CREATE TABLE t_local_03932 (x UInt32, a1 UInt32 ALIAS x + 1, a2 UInt32 ALIAS a1 + 1) +ENGINE = MergeTree ORDER BY x; +INSERT INTO t_local_03932 VALUES (10), (20); + +CREATE TABLE t_dist_03932 AS t_local_03932 +ENGINE = Distributed(test_shard_localhost, currentDatabase(), t_local_03932); + +SELECT 'local'; +SELECT a2, a1, a1 + a2 AS s FROM t_local_03932 ORDER BY x; + +SELECT 'dist'; +SELECT a2, a1, a1 + a2 AS s FROM t_dist_03932 ORDER BY x +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 0; + +SELECT 'dist_plan'; +SELECT a2, a1, a1 + a2 AS s FROM t_dist_03932 ORDER BY x +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 0, serialize_query_plan = 1; + +DROP TABLE t_dist_03932; +DROP TABLE t_local_03932; From ea516c3838566d74b435fa0b446a1893f36acfe7 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Tue, 26 May 2026 23:18:00 +0200 Subject: [PATCH 18/32] test: Hybrid unknown-table regression for issues #1208 #1209 #1422 Black-box regression trip-wires for the Hybrid unknown-table scenarios from Altinity/ClickHouse issues #1208, #1209, #1422 (verified fixed on this branch). Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Mikhail Filimonov --- ...nown_table_issues_1208_1209_1422.reference | 8 ++ ...id_unknown_table_issues_1208_1209_1422.sql | 113 ++++++++++++++++++ 2 files changed, 121 insertions(+) create mode 100644 tests/queries/0_stateless/03923_hybrid_unknown_table_issues_1208_1209_1422.reference create mode 100644 tests/queries/0_stateless/03923_hybrid_unknown_table_issues_1208_1209_1422.sql diff --git a/tests/queries/0_stateless/03923_hybrid_unknown_table_issues_1208_1209_1422.reference b/tests/queries/0_stateless/03923_hybrid_unknown_table_issues_1208_1209_1422.reference new file mode 100644 index 000000000000..5155d27310c2 --- /dev/null +++ b/tests/queries/0_stateless/03923_hybrid_unknown_table_issues_1208_1209_1422.reference @@ -0,0 +1,8 @@ +issue_1208_self_in_subquery +5 +issue_1209_join_mode_local +6 +issue_1209_join_mode_allow +6 +issue_1422_hybrid_in_merge_tree_subquery +5 diff --git a/tests/queries/0_stateless/03923_hybrid_unknown_table_issues_1208_1209_1422.sql b/tests/queries/0_stateless/03923_hybrid_unknown_table_issues_1208_1209_1422.sql new file mode 100644 index 000000000000..20d098b79927 --- /dev/null +++ b/tests/queries/0_stateless/03923_hybrid_unknown_table_issues_1208_1209_1422.sql @@ -0,0 +1,113 @@ +SET allow_experimental_hybrid_table = 1, + enable_analyzer = 1, + prefer_localhost_replica = 0; + +DROP TABLE IF EXISTS test_hybrid_issue_1208_1209_1422; +DROP TABLE IF EXISTS test_hybrid_issue_1208_1209_1422_left; +DROP TABLE IF EXISTS test_hybrid_issue_1208_1209_1422_right; +DROP TABLE IF EXISTS test_hybrid_issue_1208_1209_1422_mt; + +CREATE TABLE test_hybrid_issue_1208_1209_1422_left +( + string_col String, + long_col Int64, + date_col Date +) +ENGINE = MergeTree +ORDER BY string_col; + +CREATE TABLE test_hybrid_issue_1208_1209_1422_right +( + string_col String, + long_col Int64, + date_col Date +) +ENGINE = MergeTree +ORDER BY string_col; + +CREATE TABLE test_hybrid_issue_1208_1209_1422_mt +( + string_col String, + long_col Int64, + date_col Date +) +ENGINE = MergeTree +ORDER BY string_col; + +INSERT INTO test_hybrid_issue_1208_1209_1422_left VALUES + ('William', 9044, toDate('2024-01-01')), + ('Oliver', 1654, toDate('2024-01-01')), + ('Frank', 8751, toDate('2024-01-01')); + +INSERT INTO test_hybrid_issue_1208_1209_1422_right VALUES + ('Louis', 1519, toDate('2024-01-02')), + ('Isaac', 3611, toDate('2024-01-02')); + +INSERT INTO test_hybrid_issue_1208_1209_1422_mt +SELECT * FROM test_hybrid_issue_1208_1209_1422_left +UNION ALL +SELECT * FROM test_hybrid_issue_1208_1209_1422_right; + +CREATE TABLE test_hybrid_issue_1208_1209_1422 +( + string_col String, + long_col Int64, + date_col Date +) +ENGINE = Hybrid( + remote('127.0.0.1:9000', currentDatabase(), 'test_hybrid_issue_1208_1209_1422_left'), date_col <= '2024-01-01', + remote('127.0.0.1:9000', currentDatabase(), 'test_hybrid_issue_1208_1209_1422_right'), date_col > '2024-01-01' +); + +SELECT 'issue_1208_self_in_subquery'; +SELECT count() +FROM +( + SELECT string_col + FROM test_hybrid_issue_1208_1209_1422 + WHERE string_col IN + ( + SELECT DISTINCT string_col + FROM test_hybrid_issue_1208_1209_1422 + WHERE long_col > 1500 + ) +); + +SELECT 'issue_1209_join_mode_local'; +SELECT uniqExact(coalesce(h_string_col, m_string_col)) +FROM +( + SELECT h.string_col AS h_string_col, m.string_col AS m_string_col, h.long_col AS hybrid_long, m.long_col AS mt_long + FROM test_hybrid_issue_1208_1209_1422 AS h + FULL OUTER JOIN test_hybrid_issue_1208_1209_1422_mt AS m ON h.string_col = m.string_col + SETTINGS object_storage_cluster_join_mode = 'local' +); + +SELECT 'issue_1209_join_mode_allow'; +SELECT uniqExact(coalesce(h_string_col, m_string_col)) +FROM +( + SELECT h.string_col AS h_string_col, m.string_col AS m_string_col, h.long_col AS hybrid_long, m.long_col AS mt_long + FROM test_hybrid_issue_1208_1209_1422 AS h + FULL OUTER JOIN test_hybrid_issue_1208_1209_1422_mt AS m ON h.string_col = m.string_col + SETTINGS object_storage_cluster_join_mode = 'allow' +); + +SELECT 'issue_1422_hybrid_in_merge_tree_subquery'; +SELECT count() +FROM +( + SELECT string_col + FROM test_hybrid_issue_1208_1209_1422 + WHERE string_col IN + ( + SELECT DISTINCT string_col + FROM test_hybrid_issue_1208_1209_1422_mt + WHERE long_col > 1500 + ) +); + +DROP TABLE test_hybrid_issue_1208_1209_1422; +DROP TABLE test_hybrid_issue_1208_1209_1422_left; +DROP TABLE test_hybrid_issue_1208_1209_1422_right; +DROP TABLE test_hybrid_issue_1208_1209_1422_mt; From 23e8ba66b5d850bc0b492a39650986eba6fd4bd2 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Tue, 26 May 2026 23:36:20 +0200 Subject: [PATCH 19/32] StorageMerge: drop redundant short-name reconciliation heuristics Now that alias outputs are emitted under their target identifier, the child header matches the Merge header by full name, so the short-name lookup helpers (get_short_name, short_name_count, the short-name branch in find_header_column) and the unreachable duplicate header.has() branch are dead. Revert convertAndFilterSourceStream to the upstream name -> smallest -> position -> unneeded shape. The position fallback in the converting step is kept (load-bearing for normal distributed aggregation). Verified: 03928/03930/03931/03932/03921/03842/03843/03923 + the Merge regression set (type cast, virtual columns, alias-merge, structure unification, sample factor) all pass. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Mikhail Filimonov --- src/Storages/StorageMerge.cpp | 38 ----------------------------------- 1 file changed, 38 deletions(-) diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index c25441db0e0e..47fe0ab6497f 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -1683,48 +1683,10 @@ void ReadFromMerge::convertAndFilterSourceStream( }; String smallest_column_name = ExpressionActions::getSmallestColumn(snapshot->metadata->getColumns().getAllPhysical()).name; - auto get_short_name = [](std::string_view full_name) -> std::string_view - { - auto pos = full_name.find_last_of('.'); - if (pos == std::string_view::npos || pos + 1 >= full_name.size()) - return {}; - return full_name.substr(pos + 1); - }; - - std::unordered_map short_name_count; - short_name_count.reserve(header.columns()); - for (const auto & column : header) - { - auto short_name = get_short_name(column.name); - if (!short_name.empty()) - ++short_name_count[short_name]; - } - - auto find_header_column = [&](const ColumnWithTypeAndName & source_elem) -> std::optional - { - if (header.has(source_elem.name)) - return header.getByName(source_elem.name); - - auto short_name = get_short_name(source_elem.name); - if (!short_name.empty() && short_name_count[short_name] == 1) - { - std::string short_name_str(short_name); - if (header.has(short_name_str)) - return header.getByName(short_name_str); - } - - return std::nullopt; - }; for (size_t i = 0; i < size; ++i) { const auto & source_elem = current_step_columns[i]; - auto header_column_opt = find_header_column(source_elem); - if (header_column_opt) - { - converted_columns.push_back(materializeIfSourceIsNotConst(*header_column_opt, source_elem)); - continue; - } if (header.has(source_elem.name)) { converted_columns.push_back(materializeIfSourceIsNotConst(header.getByName(source_elem.name), source_elem)); From 3d20d988005bbb47d0acae4a1a682a1a85fa05b0 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Wed, 27 May 2026 07:08:10 +0200 Subject: [PATCH 20/32] tests: remove contradictory marker-off DoD alias-swap tests (03925, 03926) These tests pinned enable_alias_marker=0 yet asserted non-swapped output. The __aliasMarker is the mechanism that prevents the column swap; with it disabled the swap is expected by design (a String alias routed to a UInt64 column -> CANNOT_PARSE_TEXT), so the references were aspirational and the tests could never pass. Marker-on coverage (03921/03928/03930/03931/03932) already validates the supported configuration. Copies kept under _local_files_and_notes/dropped_tests/ for the record. Signed-off-by: Mikhail Filimonov Co-Authored-By: Claude Opus 4.7 (1M context) --- ...alias_column_swap_without_marker.reference | 56 ----------- ...buted_alias_column_swap_without_marker.sql | 96 ------------------- ...l_replicas_dod_alias_column_swap.reference | 20 ---- ...arallel_replicas_dod_alias_column_swap.sql | 94 ------------------ 4 files changed, 266 deletions(-) delete mode 100644 tests/queries/0_stateless/03925_distributed_alias_column_swap_without_marker.reference delete mode 100644 tests/queries/0_stateless/03925_distributed_alias_column_swap_without_marker.sql delete mode 100644 tests/queries/0_stateless/03926_parallel_replicas_dod_alias_column_swap.reference delete mode 100644 tests/queries/0_stateless/03926_parallel_replicas_dod_alias_column_swap.sql diff --git a/tests/queries/0_stateless/03925_distributed_alias_column_swap_without_marker.reference b/tests/queries/0_stateless/03925_distributed_alias_column_swap_without_marker.reference deleted file mode 100644 index 1e2e9b11750a..000000000000 --- a/tests/queries/0_stateless/03925_distributed_alias_column_swap_without_marker.reference +++ /dev/null @@ -1,56 +0,0 @@ -prefer_localhost_replica_0_uint64 -x a_num inner_c -1 1 2 -1 1 2 -1 1 2 -1 1 2 -2 1 3 -2 1 3 -2 1 3 -2 1 3 -10 1 11 -10 1 11 -10 1 11 -10 1 11 -prefer_localhost_replica_0_string -x a_str inner_c -1 aaaa 2 -1 aaaa 2 -1 aaaa 2 -1 aaaa 2 -2 aaaa 3 -2 aaaa 3 -2 aaaa 3 -2 aaaa 3 -10 aaaa 11 -10 aaaa 11 -10 aaaa 11 -10 aaaa 11 -prefer_localhost_replica_1_uint64 -x a_num inner_c -1 1 2 -1 1 2 -1 1 2 -1 1 2 -2 1 3 -2 1 3 -2 1 3 -2 1 3 -10 1 11 -10 1 11 -10 1 11 -10 1 11 -prefer_localhost_replica_1_string -x a_str inner_c -1 aaaa 2 -1 aaaa 2 -1 aaaa 2 -1 aaaa 2 -2 aaaa 3 -2 aaaa 3 -2 aaaa 3 -2 aaaa 3 -10 aaaa 11 -10 aaaa 11 -10 aaaa 11 -10 aaaa 11 diff --git a/tests/queries/0_stateless/03925_distributed_alias_column_swap_without_marker.sql b/tests/queries/0_stateless/03925_distributed_alias_column_swap_without_marker.sql deleted file mode 100644 index 84ad3cf170d0..000000000000 --- a/tests/queries/0_stateless/03925_distributed_alias_column_swap_without_marker.sql +++ /dev/null @@ -1,96 +0,0 @@ -DROP TABLE IF EXISTS test_dod_alias_swap_no_marker_outer; -DROP TABLE IF EXISTS test_dod_alias_swap_no_marker_inner; -DROP TABLE IF EXISTS test_dod_alias_swap_no_marker_local; - -CREATE TABLE test_dod_alias_swap_no_marker_local -( - x UInt64 -) -ENGINE = MergeTree() -ORDER BY x; - -INSERT INTO test_dod_alias_swap_no_marker_local VALUES (1), (2), (10); - -CREATE TABLE test_dod_alias_swap_no_marker_inner -( - x UInt64, - inner_c UInt64 ALIAS x + 1 -) -ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), test_dod_alias_swap_no_marker_local); - -CREATE TABLE test_dod_alias_swap_no_marker_outer -( - x UInt64, - inner_c UInt64, - a_num UInt64 ALIAS 1, - a_str String ALIAS 'aaaa' -) -ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), test_dod_alias_swap_no_marker_inner); - -SELECT 'prefer_localhost_replica_0_uint64'; -SELECT - x, - a_num, - inner_c -FROM test_dod_alias_swap_no_marker_outer -ORDER BY x -SETTINGS - allow_experimental_analyzer = 1, - enable_alias_marker = 0, - prefer_localhost_replica = 0, - enable_parallel_replicas = 0, - max_parallel_replicas = 1, - parallel_replicas_local_plan = 0 -FORMAT TSVWithNames; - -SELECT 'prefer_localhost_replica_0_string'; -SELECT - x, - a_str, - inner_c -FROM test_dod_alias_swap_no_marker_outer -ORDER BY x -SETTINGS - allow_experimental_analyzer = 1, - enable_alias_marker = 0, - prefer_localhost_replica = 0, - enable_parallel_replicas = 0, - max_parallel_replicas = 1, - parallel_replicas_local_plan = 0 -FORMAT TSVWithNames; - -SELECT 'prefer_localhost_replica_1_uint64'; -SELECT - x, - a_num, - inner_c -FROM test_dod_alias_swap_no_marker_outer -ORDER BY x -SETTINGS - allow_experimental_analyzer = 1, - enable_alias_marker = 0, - prefer_localhost_replica = 1, - enable_parallel_replicas = 0, - max_parallel_replicas = 1, - parallel_replicas_local_plan = 0 -FORMAT TSVWithNames; - -SELECT 'prefer_localhost_replica_1_string'; -SELECT - x, - a_str, - inner_c -FROM test_dod_alias_swap_no_marker_outer -ORDER BY x -SETTINGS - allow_experimental_analyzer = 1, - enable_alias_marker = 0, - prefer_localhost_replica = 1, - enable_parallel_replicas = 0, - max_parallel_replicas = 1, - parallel_replicas_local_plan = 0 -FORMAT TSVWithNames; - -DROP TABLE test_dod_alias_swap_no_marker_outer; -DROP TABLE test_dod_alias_swap_no_marker_inner; -DROP TABLE test_dod_alias_swap_no_marker_local; diff --git a/tests/queries/0_stateless/03926_parallel_replicas_dod_alias_column_swap.reference b/tests/queries/0_stateless/03926_parallel_replicas_dod_alias_column_swap.reference deleted file mode 100644 index 228ac5f667f7..000000000000 --- a/tests/queries/0_stateless/03926_parallel_replicas_dod_alias_column_swap.reference +++ /dev/null @@ -1,20 +0,0 @@ -no_pr_uint64 -x a_num inner_c -1 1 2 -2 1 3 -10 1 11 -no_pr_string -x a_str inner_c -1 aaaa 2 -2 aaaa 3 -10 aaaa 11 -pr_uint64 -x a_num inner_c -1 1 2 -2 1 3 -10 1 11 -pr_string -x a_str inner_c -1 aaaa 2 -2 aaaa 3 -10 aaaa 11 diff --git a/tests/queries/0_stateless/03926_parallel_replicas_dod_alias_column_swap.sql b/tests/queries/0_stateless/03926_parallel_replicas_dod_alias_column_swap.sql deleted file mode 100644 index 070330e98826..000000000000 --- a/tests/queries/0_stateless/03926_parallel_replicas_dod_alias_column_swap.sql +++ /dev/null @@ -1,94 +0,0 @@ -DROP TABLE IF EXISTS test_pr_dod_alias_swap_outer; -DROP TABLE IF EXISTS test_pr_dod_alias_swap_inner; -DROP TABLE IF EXISTS test_pr_dod_alias_swap_local; - -CREATE TABLE test_pr_dod_alias_swap_local -( - x UInt64 -) -ENGINE = MergeTree() -ORDER BY x; - -INSERT INTO test_pr_dod_alias_swap_local VALUES (1), (2), (10); - -CREATE TABLE test_pr_dod_alias_swap_inner -( - x UInt64, - inner_c UInt64 ALIAS x + 1 -) -ENGINE = Distributed(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), test_pr_dod_alias_swap_local); - -CREATE TABLE test_pr_dod_alias_swap_outer -( - x UInt64, - inner_c UInt64, - a_num UInt64 ALIAS 1, - a_str String ALIAS 'aaaa' -) -ENGINE = Distributed(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), test_pr_dod_alias_swap_inner); - -SELECT 'no_pr_uint64'; -SELECT x, a_num, inner_c -FROM test_pr_dod_alias_swap_outer -ORDER BY x -SETTINGS - enable_analyzer = 1, - enable_alias_marker = 0, - enable_parallel_replicas = 0, - allow_experimental_parallel_reading_from_replicas = 0, - max_parallel_replicas = 1, - parallel_replicas_local_plan = 0, - parallel_replicas_for_non_replicated_merge_tree = 1, - cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost' -FORMAT TSVWithNames; - -SELECT 'no_pr_string'; -SELECT x, a_str, inner_c -FROM test_pr_dod_alias_swap_outer -ORDER BY x -SETTINGS - enable_analyzer = 1, - enable_alias_marker = 0, - enable_parallel_replicas = 0, - allow_experimental_parallel_reading_from_replicas = 0, - max_parallel_replicas = 1, - parallel_replicas_local_plan = 0, - parallel_replicas_for_non_replicated_merge_tree = 1, - cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost' -FORMAT TSVWithNames; - -SELECT 'pr_uint64'; -SELECT x, a_num, inner_c -FROM test_pr_dod_alias_swap_outer -ORDER BY x -SETTINGS - enable_analyzer = 1, - enable_alias_marker = 0, - enable_parallel_replicas = 2, - allow_experimental_parallel_reading_from_replicas = 2, - max_parallel_replicas = 3, - parallel_replicas_local_plan = 1, - parallel_replicas_for_non_replicated_merge_tree = 1, - parallel_replicas_min_number_of_rows_per_replica = 0, - cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost' -FORMAT TSVWithNames; - -SELECT 'pr_string'; -SELECT x, a_str, inner_c -FROM test_pr_dod_alias_swap_outer -ORDER BY x -SETTINGS - enable_analyzer = 1, - enable_alias_marker = 0, - enable_parallel_replicas = 2, - allow_experimental_parallel_reading_from_replicas = 2, - max_parallel_replicas = 3, - parallel_replicas_local_plan = 1, - parallel_replicas_for_non_replicated_merge_tree = 1, - parallel_replicas_min_number_of_rows_per_replica = 0, - cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost' -FORMAT TSVWithNames; - -DROP TABLE test_pr_dod_alias_swap_outer; -DROP TABLE test_pr_dod_alias_swap_inner; -DROP TABLE test_pr_dod_alias_swap_local; From e7ae5150b045f9d943c49f73431a759aec34558a Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Wed, 27 May 2026 07:23:45 +0200 Subject: [PATCH 21/32] Address review findings on the __aliasMarker / header-reconciliation changes - Planner::makeConvertingActionsPreferNameThenPosition: only fall back to positional matching for THERE_IS_NO_COLUMN / NUMBER_OF_COLUMNS_DOESNT_MATCH (the cases name matching legitimately cannot handle, e.g. a remote aggregate-state column matched by ordinal); rethrow any other name-mode error instead of silently masking a real schema/type problem. Fallback stays at TRACE (it is a common, expected path). - finalizeAliasMarkersForDistributedSerialization: do not descend into lambda bodies. A marker inside a lambda (e.g. arrayMap(x -> __aliasMarker(x, x), ...)) over a Distributed table resolved its column argument to the lambda parameter, which has no table source, and raised a user-triggerable LOGICAL_ERROR (server abort under abort_on_logical_error). Covered by 03933. - StorageDistributed: clone the alias expression before removeAlias() in the marker branch; getExpression() may return a node shared elsewhere in the tree. - Clarify enable_alias_marker setting description (it is a correctness toggle). - Remove dead/unused code: unused includes (base/hex.h, ClientInfo.h), unused serialize_query_plan extern, a stray blank line, and make logPositionConversionMismatch file-local with a TRACE-level guard. Refresh a stale comment in findParallelReplicasQuery. Tests: 03933 (direct/lambda __aliasMarker use must not crash) and 03934 (enable_alias_marker on=correct vs off=reintroduces the column swap, proving the setting's effect). Signed-off-by: Mikhail Filimonov Co-Authored-By: Claude Opus 4.7 (1M context) --- src/Analyzer/Utils.cpp | 16 +++++++-- .../createUniqueAliasesIfNecessary.cpp | 1 - src/Core/Settings.cpp | 6 ++-- src/Planner/PlannerActionsVisitor.cpp | 1 - src/Planner/Utils.cpp | 20 +++++++++-- src/Planner/Utils.h | 7 ---- src/Planner/findParallelReplicasQuery.cpp | 3 +- src/Storages/StorageDistributed.cpp | 5 +-- ...rker_direct_use_no_logical_error.reference | 14 ++++++++ ...ias_marker_direct_use_no_logical_error.sql | 31 ++++++++++++++++ ...uted_alias_marker_setting_effect.reference | 15 ++++++++ ...istributed_alias_marker_setting_effect.sql | 35 +++++++++++++++++++ 12 files changed, 136 insertions(+), 18 deletions(-) create mode 100644 tests/queries/0_stateless/03933_alias_marker_direct_use_no_logical_error.reference create mode 100644 tests/queries/0_stateless/03933_alias_marker_direct_use_no_logical_error.sql create mode 100644 tests/queries/0_stateless/03934_distributed_alias_marker_setting_effect.reference create mode 100644 tests/queries/0_stateless/03934_distributed_alias_marker_setting_effect.sql diff --git a/src/Analyzer/Utils.cpp b/src/Analyzer/Utils.cpp index eeb128f1ece0..5764702009e5 100644 --- a/src/Analyzer/Utils.cpp +++ b/src/Analyzer/Utils.cpp @@ -49,7 +49,6 @@ #include -#include #include namespace DB @@ -999,8 +998,16 @@ class FinalizeAliasMarkersForDistributedSerializationVisitor : public InDepthQue return false; } - static bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr &) + static bool needChildVisit(const QueryTreeNodePtr & parent, const QueryTreeNodePtr &) { + /// Do not descend into lambda bodies. A marker inside a lambda (e.g. a user-written + /// `arrayMap(x -> __aliasMarker(x, x), ...)`) is a per-row identity computation, not a + /// distributed-serialization-boundary column; its argument column resolves to the lambda + /// parameter which has no table source to materialize. Visiting it would otherwise hit the + /// "unnamed source" path below and raise a user-triggerable LOGICAL_ERROR (see 03933). + if (parent && parent->getNodeType() == QueryTreeNodeType::LAMBDA) + return false; + /// Keep traversing marker payload recursively so nested chains are preserved /// and each marker can materialize its own arg2 when needed. return true; @@ -1040,6 +1047,11 @@ class FinalizeAliasMarkersForDistributedSerializationVisitor : public InDepthQue return; } + /// arg2 was neither a column with a source alias nor an already-materialized String id + /// (e.g. a user-supplied marker with an arbitrary second argument). Leave it untouched - + /// the function is a pass-through identity, so no materialization is the safe, non-throwing + /// behavior. Our own injected markers always carry an aliased column source, so this path + /// is not reachable for them. if (alias_id.empty()) return; diff --git a/src/Analyzer/createUniqueAliasesIfNecessary.cpp b/src/Analyzer/createUniqueAliasesIfNecessary.cpp index 1235f865b170..2846eb28443a 100644 --- a/src/Analyzer/createUniqueAliasesIfNecessary.cpp +++ b/src/Analyzer/createUniqueAliasesIfNecessary.cpp @@ -229,7 +229,6 @@ void createUniqueAliasesIfNecessary(QueryTreeNodePtr & node, const ContextPtr & * It's required to create a valid AST for distributed query. */ CreateUniqueArrayJoinAliasesVisitor(context).visit(node); - } } diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index 847ef382dcdf..4509362630e1 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -2354,8 +2354,10 @@ Maximum length of step description in EXPLAIN PLAN. )", 0) \ \ DECLARE(Bool, enable_alias_marker, true, R"( -Enable __aliasMarker injection for ALIAS column expressions when using the analyzer. -This stabilizes action node names across planner/analyzer stages without changing query semantics. +Enable __aliasMarker injection for ALIAS column expressions when reading a Distributed table with the analyzer. +The marker preserves the identity of an inlined ALIAS expression across the initiator/shard boundary so columns are +reconciled by name instead of by position. This is a correctness fix: with it disabled, distributed queries over +ALIAS columns (especially distributed-over-distributed) can return swapped columns or fail with a type-mismatch error. )", 0) \ \ DECLARE(UInt64, preferred_block_size_bytes, 1000000, R"( diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 4c7ee14cb082..55a8a18f57e4 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -31,7 +31,6 @@ #include #include -#include #include #include diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp index 200878b48181..42a29eea4de6 100644 --- a/src/Planner/Utils.cpp +++ b/src/Planner/Utils.cpp @@ -84,6 +84,8 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int UNION_ALL_RESULT_STRUCTURES_MISMATCH; extern const int INTERSECT_OR_EXCEPT_RESULT_STRUCTURES_MISMATCH; + extern const int THERE_IS_NO_COLUMN; + extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; } String dumpQueryPlan(const QueryPlan & query_plan) @@ -709,7 +711,7 @@ QueryPlanStepPtr projectOnlyUsedColumns( return step; } -void logPositionConversionMismatch( +static void logPositionConversionMismatch( const ColumnsWithTypeAndName & source_columns, const ColumnsWithTypeAndName & result_columns, const ContextPtr & context, @@ -717,6 +719,10 @@ void logPositionConversionMismatch( { static auto log = getLogger("PositionConversion"); + /// Everything below is purely diagnostic; skip the work when TRACE is disabled. + if (!log->is(Poco::Message::PRIO_TRACE)) + return; + if (source_columns.size() != result_columns.size()) { LOG_TRACE( @@ -788,9 +794,19 @@ ActionsDAG makeConvertingActionsPreferNameThenPosition( } catch (const Exception & e) { + /// Only fall back to positional matching for the cases name-matching legitimately + /// cannot handle (a column absent by name, or a differing column count - e.g. a remote + /// shard emitting an aggregate state column matched by ordinal). Any other error from + /// name-mode conversion is a genuine schema/type problem and must propagate rather than + /// be silently masked into a wrong-column association. + if (e.code() != ErrorCodes::THERE_IS_NO_COLUMN && e.code() != ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH) + throw; + + /// Positional fallback is a normal, expected path here (e.g. a remote shard emitting an + /// aggregate-state column matched by ordinal), so this stays at TRACE to avoid noise. LOG_TRACE( log, - "Name conversion is not possible at {}. query_id={} reason={}", + "Name conversion is not possible at {}, falling back to positional matching. query_id={} reason={}", location, context ? context->getCurrentQueryId() : "", e.message()); diff --git a/src/Planner/Utils.h b/src/Planner/Utils.h index f7ac27ae63e5..289704b0ff8a 100644 --- a/src/Planner/Utils.h +++ b/src/Planner/Utils.h @@ -126,13 +126,6 @@ QueryPlanStepPtr projectOnlyUsedColumns( const SharedHeader & stream_header, const ColumnIdentifiers & used_column_identifiers); -/// Trace-report mismatches before Position-based conversion. -void logPositionConversionMismatch( - const ColumnsWithTypeAndName & source_columns, - const ColumnsWithTypeAndName & result_columns, - const ContextPtr & context, - std::string_view location); - /// Try Name-based conversion first, fallback to Position with detailed trace report. ActionsDAG makeConvertingActionsPreferNameThenPosition( const ColumnsWithTypeAndName & source_columns, diff --git a/src/Planner/findParallelReplicasQuery.cpp b/src/Planner/findParallelReplicasQuery.cpp index d0b0c55ff58d..a116e38909eb 100644 --- a/src/Planner/findParallelReplicasQuery.cpp +++ b/src/Planner/findParallelReplicasQuery.cpp @@ -548,7 +548,8 @@ JoinTreeQueryPlan buildQueryPlanForParallelReplicas( /// initial_header is a header expected by initial query. /// header is a header which is returned by the follower. /// They are different because tables will have different aliases (e.g. _table1 or _table5). - /// Here we just rename columns by position, with the hope the types would match. + /// Reconcile by name first (matching the initiator's column identifiers) and fall back to + /// position only when name matching is not possible. auto step = std::make_unique(query_plan.getCurrentHeader(), std::move(converting)); step->setStepDescription("Convert distributed names"); query_plan.addStep(std::move(step)); diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 265b2efc56eb..980b0bfb3995 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -211,7 +211,6 @@ namespace Setting extern const SettingsUInt64 allow_experimental_parallel_reading_from_replicas; extern const SettingsBool prefer_global_in_and_join; extern const SettingsBool skip_unavailable_shards; - extern const SettingsBool serialize_query_plan; extern const SettingsBool enable_global_with_statement; extern const SettingsBool allow_experimental_hybrid_table; extern const SettingsBool enable_alias_marker; @@ -875,7 +874,9 @@ class ReplaseAliasColumnsVisitor : public InDepthQueryTreeVisitorclone()); arguments.emplace_back(std::make_shared(column_node->getColumn(), column_source)); auto alias_marker_node = std::make_shared("__aliasMarker"); diff --git a/tests/queries/0_stateless/03933_alias_marker_direct_use_no_logical_error.reference b/tests/queries/0_stateless/03933_alias_marker_direct_use_no_logical_error.reference new file mode 100644 index 000000000000..f3f736b7dea2 --- /dev/null +++ b/tests/queries/0_stateless/03933_alias_marker_direct_use_no_logical_error.reference @@ -0,0 +1,14 @@ +2arg_identity +42 +lambda_local +[1] +[2] +[3] +lambda_over_distributed +[1] +[2] +[3] +lambda_over_distributed_plan +[1] +[2] +[3] diff --git a/tests/queries/0_stateless/03933_alias_marker_direct_use_no_logical_error.sql b/tests/queries/0_stateless/03933_alias_marker_direct_use_no_logical_error.sql new file mode 100644 index 000000000000..e327c442397d --- /dev/null +++ b/tests/queries/0_stateless/03933_alias_marker_direct_use_no_logical_error.sql @@ -0,0 +1,31 @@ +-- __aliasMarker is an internal pass-through identity function. Direct use from SQL must not +-- raise a server-side LOGICAL_ERROR (which would abort under abort_on_logical_error / sanitizers), +-- in particular inside a lambda over a Distributed table where the marker's column argument +-- resolves to a lambda parameter with no table source. +DROP TABLE IF EXISTS t_local_03933; +DROP TABLE IF EXISTS t_dist_03933; + +CREATE TABLE t_local_03933 (x UInt64) ENGINE = MergeTree ORDER BY x; +INSERT INTO t_local_03933 VALUES (1), (2), (3); + +CREATE TABLE t_dist_03933 AS t_local_03933 +ENGINE = Distributed(test_shard_localhost, currentDatabase(), t_local_03933); + +SELECT '2arg_identity'; +SELECT __aliasMarker(42, 'anything'); + +SELECT 'lambda_local'; +SELECT arrayMap(lx -> __aliasMarker(lx, lx), [x]) AS arr FROM t_local_03933 ORDER BY x; + +SELECT 'lambda_over_distributed'; +SELECT arrayMap(lx -> __aliasMarker(lx, lx), [x]) AS arr +FROM t_dist_03933 ORDER BY x +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 0; + +SELECT 'lambda_over_distributed_plan'; +SELECT arrayMap(lx -> __aliasMarker(lx, lx), [x]) AS arr +FROM t_dist_03933 ORDER BY x +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 0, serialize_query_plan = 1; + +DROP TABLE t_dist_03933; +DROP TABLE t_local_03933; diff --git a/tests/queries/0_stateless/03934_distributed_alias_marker_setting_effect.reference b/tests/queries/0_stateless/03934_distributed_alias_marker_setting_effect.reference new file mode 100644 index 000000000000..cc0e49a8fdf9 --- /dev/null +++ b/tests/queries/0_stateless/03934_distributed_alias_marker_setting_effect.reference @@ -0,0 +1,15 @@ +marker_on +x a_str inner_c +1 aaaa 2 +1 aaaa 2 +1 aaaa 2 +1 aaaa 2 +2 aaaa 3 +2 aaaa 3 +2 aaaa 3 +2 aaaa 3 +10 aaaa 11 +10 aaaa 11 +10 aaaa 11 +10 aaaa 11 +marker_off_reintroduces_swap diff --git a/tests/queries/0_stateless/03934_distributed_alias_marker_setting_effect.sql b/tests/queries/0_stateless/03934_distributed_alias_marker_setting_effect.sql new file mode 100644 index 000000000000..152c225ef272 --- /dev/null +++ b/tests/queries/0_stateless/03934_distributed_alias_marker_setting_effect.sql @@ -0,0 +1,35 @@ +-- Demonstrates that enable_alias_marker is a correctness toggle for distributed ALIAS columns. +-- Distributed-over-distributed with a String ALIAS (`a_str`) and a UInt64 ALIAS (`inner_c`): +-- * marker ON -> columns reconciled by name, correct results. +-- * marker OFF -> the inlined ALIAS expansion swaps columns; the String 'aaaa' is routed into +-- the UInt64 `inner_c` slot and the query fails with CANNOT_PARSE_TEXT. +DROP TABLE IF EXISTS t_se_local; +DROP TABLE IF EXISTS t_se_inner; +DROP TABLE IF EXISTS t_se_outer; + +CREATE TABLE t_se_local (x UInt64) ENGINE = MergeTree() ORDER BY x; +INSERT INTO t_se_local VALUES (1), (2), (10); + +CREATE TABLE t_se_inner (x UInt64, inner_c UInt64 ALIAS x + 1) +ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), t_se_local); + +CREATE TABLE t_se_outer (x UInt64, inner_c UInt64, a_str String ALIAS 'aaaa') +ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), t_se_inner); + +SELECT 'marker_on'; +SELECT x, a_str, inner_c +FROM t_se_outer +ORDER BY x +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 0 +FORMAT TSVWithNames; + +SELECT 'marker_off_reintroduces_swap'; +-- No output format header here: the query errors mid-execution, so it must not stream a header. +SELECT x, a_str, inner_c +FROM t_se_outer +ORDER BY x +SETTINGS enable_analyzer = 1, enable_alias_marker = 0, prefer_localhost_replica = 0; -- { serverError CANNOT_PARSE_TEXT } + +DROP TABLE t_se_outer; +DROP TABLE t_se_inner; +DROP TABLE t_se_local; From 15d85e54c7163b92dbe300fcadbde0c34e56081a Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Wed, 27 May 2026 09:57:09 +0200 Subject: [PATCH 22/32] StorageMerge: use Nested::splitName for analyzer-identifier suffix extraction Replace the two hand-rolled rfind('.')-and-substr blocks in convertAndFilterSourceStream with Nested::splitName(name, /*reverse=*/true), the canonical helper the analyzer itself uses to derive a column's logical name from its `__tableN.col` identifier. Behavior is unchanged (fall back to the full name when there is no dot). Note: the short-name alias-injection loop is retained - it is load-bearing for resolving alias-of-alias expressions (e.g. `b ALIAS a + 1`); removing it breaks 03928 with NOT_FOUND_COLUMN_IN_BLOCK, so the reviewers' "possibly redundant" hypothesis does not hold. Signed-off-by: Mikhail Filimonov Co-Authored-By: Claude Opus 4.7 (1M context) --- src/Storages/StorageMerge.cpp | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 47fe0ab6497f..ba2fea5a9a8a 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -1577,10 +1578,10 @@ void ReadFromMerge::convertAndFilterSourceStream( std::unordered_set ambiguous_logical_names; for (const auto & column : header) { - auto last_dot_pos = column.name.rfind('.'); - String logical_name = (last_dot_pos == String::npos || last_dot_pos + 1 >= column.name.size()) - ? column.name - : column.name.substr(last_dot_pos + 1); + /// Strip the `__tableN.` analyzer prefix to get the logical column name. + auto logical_name = Nested::splitName(column.name, /*reverse=*/ true).second; + if (logical_name.empty()) + logical_name = column.name; if (!logical_name_to_header_name.emplace(logical_name, column.name).second) ambiguous_logical_names.insert(logical_name); } @@ -1590,6 +1591,11 @@ void ReadFromMerge::convertAndFilterSourceStream( for (const auto & alias : aliases) { ActionsDAG actions_dag(pipe_columns); + /// Alias expressions reference columns by their plain logical name (e.g. `a`), while the + /// child stream exposes analyzer identifiers (e.g. `__table1.a`). Add an unambiguous + /// short-name alias for each identifier input so buildQueryTree(alias.expression) can + /// resolve those references. (Required: removing this breaks alias-of-alias resolution, + /// e.g. `b ALIAS a + 1` in 03928.) std::unordered_map short_name_to_node; std::unordered_set ambiguous_short_names; std::unordered_set existing_input_names; @@ -1597,12 +1603,10 @@ void ReadFromMerge::convertAndFilterSourceStream( { existing_input_names.insert(input->result_name); - const auto & input_name = input->result_name; - auto last_dot_pos = input_name.rfind('.'); - if (last_dot_pos == String::npos || last_dot_pos + 1 >= input_name.size()) + auto short_name = Nested::splitName(input->result_name, /*reverse=*/ true).second; + if (short_name.empty()) continue; - auto short_name = input_name.substr(last_dot_pos + 1); if (!short_name_to_node.emplace(short_name, input).second) ambiguous_short_names.insert(short_name); } From 5967d5b05a3036c125d3df1c555e17d7b98a5acf Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Wed, 27 May 2026 11:10:38 +0200 Subject: [PATCH 23/32] test: make 03928 deterministic across architectures The previous form (SELECT a, b FROM merge ORDER BY x over a two-shard cluster) relied on the distributed merge ordering of duplicate rows, which is not guaranteed and differed on arm_binary CI (correct values, wrong row order). Rewrite as SELECT x, a, b ... GROUP BY x, a, b ORDER BY x: GROUP BY keeps x in the required columns (the ALIAS expansion needs it) and deduplicates the per-shard duplicates, and ORDER BY over the distinct x values is a total order independent of the merge. The test still fails if the alias columns are swapped or zeroed. Signed-off-by: Mikhail Filimonov Co-Authored-By: Claude Opus 4.7 (1M context) --- ...ributed_alias_marker_column_swap.reference | 42 +++++++------------ ...r_distributed_alias_marker_column_swap.sql | 20 +++++---- 2 files changed, 27 insertions(+), 35 deletions(-) diff --git a/tests/queries/0_stateless/03928_merge_over_distributed_alias_marker_column_swap.reference b/tests/queries/0_stateless/03928_merge_over_distributed_alias_marker_column_swap.reference index 6ff1dd7f287d..f32381f38096 100644 --- a/tests/queries/0_stateless/03928_merge_over_distributed_alias_marker_column_swap.reference +++ b/tests/queries/0_stateless/03928_merge_over_distributed_alias_marker_column_swap.reference @@ -1,32 +1,20 @@ local -2 3 -3 4 -11 12 +1 2 3 +2 3 4 +10 11 12 merge_prefer0 -2 3 -2 3 -3 4 -3 4 -11 12 -11 12 +1 2 3 +2 3 4 +10 11 12 merge_prefer1 -2 3 -2 3 -3 4 -3 4 -11 12 -11 12 +1 2 3 +2 3 4 +10 11 12 merge_prefer0_plan -2 3 -2 3 -3 4 -3 4 -11 12 -11 12 +1 2 3 +2 3 4 +10 11 12 merge_prefer1_plan -2 3 -2 3 -3 4 -3 4 -11 12 -11 12 +1 2 3 +2 3 4 +10 11 12 diff --git a/tests/queries/0_stateless/03928_merge_over_distributed_alias_marker_column_swap.sql b/tests/queries/0_stateless/03928_merge_over_distributed_alias_marker_column_swap.sql index 7769b38607be..c5817fd07f34 100644 --- a/tests/queries/0_stateless/03928_merge_over_distributed_alias_marker_column_swap.sql +++ b/tests/queries/0_stateless/03928_merge_over_distributed_alias_marker_column_swap.sql @@ -1,9 +1,13 @@ -- Plain Merge over Distributed over MergeTree (no Hybrid, no explicit __aliasMarker). -- Nested ALIAS columns (b contains a's subexpression). Reading the alias columns through the -- Merge table must reconcile the child (Distributed) header by name; a positional reconciliation --- in StorageMerge::convertAndFilterSourceStream would swap the columns. Correct result equals the --- single-node ('local') result. test_cluster_two_shards has two shards, so distributed/merge --- blocks return each row twice. +-- in StorageMerge::convertAndFilterSourceStream would swap the columns (or fill them with 0). +-- The correct result equals the single-node ('local') result. +-- +-- Determinism notes: `x` is kept in GROUP BY so the ALIAS expansion can resolve it (the alias +-- expressions are defined in terms of x); GROUP BY also deduplicates the rows the two shards +-- produce, and ORDER BY x (distinct values) gives a total order independent of the distributed +-- merge order. So every block - local and the distributed variants - yields the same rows. DROP TABLE IF EXISTS test_merge_alias_swap_merge; DROP TABLE IF EXISTS test_merge_alias_swap_dist; DROP TABLE IF EXISTS test_merge_alias_swap_local; @@ -31,22 +35,22 @@ CREATE TABLE test_merge_alias_swap_merge ENGINE = Merge(currentDatabase(), '^test_merge_alias_swap_dist$'); SELECT 'local'; -SELECT a, b FROM test_merge_alias_swap_local ORDER BY x; +SELECT x, a, b FROM test_merge_alias_swap_local GROUP BY x, a, b ORDER BY x; SELECT 'merge_prefer0'; -SELECT a, b FROM test_merge_alias_swap_merge ORDER BY x +SELECT x, a, b FROM test_merge_alias_swap_merge GROUP BY x, a, b ORDER BY x SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 0; SELECT 'merge_prefer1'; -SELECT a, b FROM test_merge_alias_swap_merge ORDER BY x +SELECT x, a, b FROM test_merge_alias_swap_merge GROUP BY x, a, b ORDER BY x SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 1; SELECT 'merge_prefer0_plan'; -SELECT a, b FROM test_merge_alias_swap_merge ORDER BY x +SELECT x, a, b FROM test_merge_alias_swap_merge GROUP BY x, a, b ORDER BY x SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 0, serialize_query_plan = 1; SELECT 'merge_prefer1_plan'; -SELECT a, b FROM test_merge_alias_swap_merge ORDER BY x +SELECT x, a, b FROM test_merge_alias_swap_merge GROUP BY x, a, b ORDER BY x SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 1, serialize_query_plan = 1; DROP TABLE test_merge_alias_swap_merge; From ade20aa57c49d5c90320ca54860a9a59a220c65c Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Wed, 27 May 2026 13:19:10 +0200 Subject: [PATCH 24/32] test: pin serialize_query_plan=0 in 03923 and 03934 Both tests target AST-path behavior; the "distributed plan" CI flavor forces serialize_query_plan=1 globally, which exposed two plan-path differences unrelated to what these tests assert: - 03934: on the serialized-plan path the header is reconciled by name without the marker, so the marker_off query does NOT swap/error there - the serverError expectation only holds on the AST path. - 03923: hybrid + IN-subquery on the plan path hits a separate header-reconciliation gap (THERE_IS_NO_COLUMN __table1.string_col), orthogonal to the unknown-table issues this test covers. Pin serialize_query_plan=0 so both run on the intended AST path under any CI flavor. Signed-off-by: Mikhail Filimonov Co-Authored-By: Claude Opus 4.7 (1M context) --- .../03923_hybrid_unknown_table_issues_1208_1209_1422.sql | 7 ++++++- .../03934_distributed_alias_marker_setting_effect.sql | 8 ++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/03923_hybrid_unknown_table_issues_1208_1209_1422.sql b/tests/queries/0_stateless/03923_hybrid_unknown_table_issues_1208_1209_1422.sql index 20d098b79927..c5cab11cee86 100644 --- a/tests/queries/0_stateless/03923_hybrid_unknown_table_issues_1208_1209_1422.sql +++ b/tests/queries/0_stateless/03923_hybrid_unknown_table_issues_1208_1209_1422.sql @@ -1,6 +1,11 @@ SET allow_experimental_hybrid_table = 1, enable_analyzer = 1, - prefer_localhost_replica = 0; + prefer_localhost_replica = 0, + -- AST-path regression test for unknown-table issues #1208/#1209/#1422. Pin + -- serialize_query_plan=0 so the "distributed plan" CI flavor (which forces it on) does not + -- route these hybrid + IN-subquery queries through the plan path, which has a separate, + -- unrelated header-reconciliation gap. + serialize_query_plan = 0; DROP TABLE IF EXISTS test_hybrid_issue_1208_1209_1422; DROP TABLE IF EXISTS test_hybrid_issue_1208_1209_1422_left; diff --git a/tests/queries/0_stateless/03934_distributed_alias_marker_setting_effect.sql b/tests/queries/0_stateless/03934_distributed_alias_marker_setting_effect.sql index 152c225ef272..4100828c9eb5 100644 --- a/tests/queries/0_stateless/03934_distributed_alias_marker_setting_effect.sql +++ b/tests/queries/0_stateless/03934_distributed_alias_marker_setting_effect.sql @@ -16,11 +16,15 @@ ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), t_se_local); CREATE TABLE t_se_outer (x UInt64, inner_c UInt64, a_str String ALIAS 'aaaa') ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), t_se_inner); +-- serialize_query_plan is pinned to 0 throughout: this test targets the AST-path alias marker. +-- On the serialized-plan path the header is reconciled by name regardless of the marker, so the +-- marker_off swap below does not occur there; the "distributed plan" CI flavor would otherwise +-- force the plan path on and the marker_off query would succeed instead of erroring. SELECT 'marker_on'; SELECT x, a_str, inner_c FROM t_se_outer ORDER BY x -SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 0 +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 0, serialize_query_plan = 0 FORMAT TSVWithNames; SELECT 'marker_off_reintroduces_swap'; @@ -28,7 +32,7 @@ SELECT 'marker_off_reintroduces_swap'; SELECT x, a_str, inner_c FROM t_se_outer ORDER BY x -SETTINGS enable_analyzer = 1, enable_alias_marker = 0, prefer_localhost_replica = 0; -- { serverError CANNOT_PARSE_TEXT } +SETTINGS enable_analyzer = 1, enable_alias_marker = 0, prefer_localhost_replica = 0, serialize_query_plan = 0; -- { serverError CANNOT_PARSE_TEXT } DROP TABLE t_se_outer; DROP TABLE t_se_inner; From 9ea15973384ea614e252dd65e6365627cfb04869 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Wed, 27 May 2026 14:16:31 +0200 Subject: [PATCH 25/32] Document enable_alias_marker escape hatch for mixed-version clusters The marker travels to shards as the __aliasMarker function in distributed SQL. If a shard does not understand it (older/forked build), set enable_alias_marker=0 on the initiator to disable injection and fall back to previous behavior. No version negotiation is performed by design; document this in the setting description. Signed-off-by: Mikhail Filimonov Co-Authored-By: Claude Opus 4.7 (1M context) --- src/Core/Settings.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index 4509362630e1..2f83d2ab4ac2 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -2358,6 +2358,10 @@ Enable __aliasMarker injection for ALIAS column expressions when reading a Distr The marker preserves the identity of an inlined ALIAS expression across the initiator/shard boundary so columns are reconciled by name instead of by position. This is a correctness fix: with it disabled, distributed queries over ALIAS columns (especially distributed-over-distributed) can return swapped columns or fail with a type-mismatch error. + +The marker is sent to shards as the `__aliasMarker` function in the distributed SQL. On a mixed-version cluster whose +shards do not understand `__aliasMarker`, set this setting to `false` on the initiator: that disables marker injection +and falls back to the previous behavior (no negotiation/version handshake is performed). )", 0) \ \ DECLARE(UInt64, preferred_block_size_bytes, 1000000, R"( From a235560ba3212199c28db2de9f0186c535f40719 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Wed, 27 May 2026 19:18:45 +0200 Subject: [PATCH 26/32] test: make 03931 parallel-replicas alias test deterministic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Parallel replicas over a small non-replicated table can read the same rows on several replicas under randomized settings, duplicating output (observed ×3 on arm CI). Switch to `SELECT x, a1, a2 ... GROUP BY x, a1, a2 ORDER BY x` to deduplicate while keeping `x` in the required columns for the ALIAS expansion. The test still fails if `a1`/`a2` are swapped or wrong. Mirrors the earlier 03928 fix. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Mikhail Filimonov --- .../03931_parallel_replicas_alias_swap.reference | 12 ++++++------ .../03931_parallel_replicas_alias_swap.sql | 11 ++++++++--- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/tests/queries/0_stateless/03931_parallel_replicas_alias_swap.reference b/tests/queries/0_stateless/03931_parallel_replicas_alias_swap.reference index c4f5e5a4c4c3..14f9c770f714 100644 --- a/tests/queries/0_stateless/03931_parallel_replicas_alias_swap.reference +++ b/tests/queries/0_stateless/03931_parallel_replicas_alias_swap.reference @@ -1,9 +1,9 @@ local -11 12 -21 22 +10 11 12 +20 21 22 pr_ast -11 12 -21 22 +10 11 12 +20 21 22 pr_plan -11 12 -21 22 +10 11 12 +20 21 22 diff --git a/tests/queries/0_stateless/03931_parallel_replicas_alias_swap.sql b/tests/queries/0_stateless/03931_parallel_replicas_alias_swap.sql index a507c8501296..f669631889c2 100644 --- a/tests/queries/0_stateless/03931_parallel_replicas_alias_swap.sql +++ b/tests/queries/0_stateless/03931_parallel_replicas_alias_swap.sql @@ -1,6 +1,11 @@ -- Plain Distributed + parallel replicas (no Hybrid). Exercises the findParallelReplicasQuery -- header reconciliation path with nested ALIAS columns. Correct result equals the single-node -- ('local') result for both AST and serialized-plan transport. +-- +-- Determinism note: parallel replicas over a small non-replicated table can read the same rows on +-- several replicas under some (randomized) settings, duplicating output. GROUP BY x, a1, a2 +-- deduplicates that and keeps x in the required columns for the ALIAS expansion; ORDER BY x over +-- distinct values gives a total order. The test still fails if a1/a2 are swapped or wrong. DROP TABLE IF EXISTS t_local_03931; DROP TABLE IF EXISTS t_dist_03931; @@ -12,17 +17,17 @@ CREATE TABLE t_dist_03931 AS t_local_03931 ENGINE = Distributed(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), t_local_03931); SELECT 'local'; -SELECT a1, a2 FROM t_local_03931 ORDER BY a1; +SELECT x, a1, a2 FROM t_local_03931 GROUP BY x, a1, a2 ORDER BY x; SELECT 'pr_ast'; -SELECT a1, a2 FROM t_dist_03931 ORDER BY a1 +SELECT x, a1, a2 FROM t_dist_03931 GROUP BY x, a1, a2 ORDER BY x SETTINGS enable_analyzer = 1, enable_alias_marker = 1, allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 3, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', serialize_query_plan = 0; SELECT 'pr_plan'; -SELECT a1, a2 FROM t_dist_03931 ORDER BY a1 +SELECT x, a1, a2 FROM t_dist_03931 GROUP BY x, a1, a2 ORDER BY x SETTINGS enable_analyzer = 1, enable_alias_marker = 1, allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 3, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', From 907d00cd8ba03d894c802d5c10e80b3856b6699c Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Thu, 28 May 2026 16:26:52 +0200 Subject: [PATCH 27/32] Document bucket D drop rationale + preserve SQL Six alias-marker regression tests dropped from the upstream port branch (alias_marker3) had their column-order scenarios already covered by upstream PR #94644. Preserves the original SQL and reference files (under 26.3 slot numbers) plus a dated rationale Markdown so future contributors don't reinvent the same shapes. --- ..._distributed_nested_alias_marker.reference | 4 + .../03844_distributed_nested_alias_marker.sql | 34 ++ ...buted_global_in_join_alias_chain.reference | 8 + ...distributed_global_in_join_alias_chain.sql | 34 ++ ...global_in_alias_marker_collision.reference | 2 + ...buted_global_in_alias_marker_collision.sql | 56 +++ ...ybrid_unknown_table_exact_schema.reference | 30 ++ ...3924_hybrid_unknown_table_exact_schema.sql | 329 ++++++++++++++++++ ...alias_column_swap_without_marker.reference | 56 +++ ...buted_alias_column_swap_without_marker.sql | 96 +++++ ...l_replicas_dod_alias_column_swap.reference | 20 ++ ...arallel_replicas_dod_alias_column_swap.sql | 94 +++++ ...lias_marker_explicit_column_swap.reference | 32 ++ ...uted_alias_marker_explicit_column_swap.sql | 82 +++++ ...0_distributed_alias_swap_planner.reference | 15 + .../03930_distributed_alias_swap_planner.sql | 34 ++ ...931_parallel_replicas_alias_swap.reference | 9 + .../03931_parallel_replicas_alias_swap.sql | 37 ++ ...32_distributed_alias_strict_name.reference | 9 + .../03932_distributed_alias_strict_name.sql | 27 ++ ...-05-28-bucket-d-redundant-with-pr-94644.md | 67 ++++ .../dropped_tests/README.md | 39 +++ 22 files changed, 1114 insertions(+) create mode 100644 _local_files_and_notes/dropped_tests/03844_distributed_nested_alias_marker.reference create mode 100644 _local_files_and_notes/dropped_tests/03844_distributed_nested_alias_marker.sql create mode 100644 _local_files_and_notes/dropped_tests/03845_distributed_global_in_join_alias_chain.reference create mode 100644 _local_files_and_notes/dropped_tests/03845_distributed_global_in_join_alias_chain.sql create mode 100644 _local_files_and_notes/dropped_tests/03846_distributed_global_in_alias_marker_collision.reference create mode 100644 _local_files_and_notes/dropped_tests/03846_distributed_global_in_alias_marker_collision.sql create mode 100644 _local_files_and_notes/dropped_tests/03924_hybrid_unknown_table_exact_schema.reference create mode 100644 _local_files_and_notes/dropped_tests/03924_hybrid_unknown_table_exact_schema.sql create mode 100644 _local_files_and_notes/dropped_tests/03925_distributed_alias_column_swap_without_marker.reference create mode 100644 _local_files_and_notes/dropped_tests/03925_distributed_alias_column_swap_without_marker.sql create mode 100644 _local_files_and_notes/dropped_tests/03926_parallel_replicas_dod_alias_column_swap.reference create mode 100644 _local_files_and_notes/dropped_tests/03926_parallel_replicas_dod_alias_column_swap.sql create mode 100644 _local_files_and_notes/dropped_tests/03927_distributed_alias_marker_explicit_column_swap.reference create mode 100644 _local_files_and_notes/dropped_tests/03927_distributed_alias_marker_explicit_column_swap.sql create mode 100644 _local_files_and_notes/dropped_tests/03930_distributed_alias_swap_planner.reference create mode 100644 _local_files_and_notes/dropped_tests/03930_distributed_alias_swap_planner.sql create mode 100644 _local_files_and_notes/dropped_tests/03931_parallel_replicas_alias_swap.reference create mode 100644 _local_files_and_notes/dropped_tests/03931_parallel_replicas_alias_swap.sql create mode 100644 _local_files_and_notes/dropped_tests/03932_distributed_alias_strict_name.reference create mode 100644 _local_files_and_notes/dropped_tests/03932_distributed_alias_strict_name.sql create mode 100644 _local_files_and_notes/dropped_tests/2026-05-28-bucket-d-redundant-with-pr-94644.md create mode 100644 _local_files_and_notes/dropped_tests/README.md diff --git a/_local_files_and_notes/dropped_tests/03844_distributed_nested_alias_marker.reference b/_local_files_and_notes/dropped_tests/03844_distributed_nested_alias_marker.reference new file mode 100644 index 000000000000..7b05cb1e81a0 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03844_distributed_nested_alias_marker.reference @@ -0,0 +1,4 @@ +analyzer +x x +legacy +x x diff --git a/_local_files_and_notes/dropped_tests/03844_distributed_nested_alias_marker.sql b/_local_files_and_notes/dropped_tests/03844_distributed_nested_alias_marker.sql new file mode 100644 index 000000000000..b725acf38949 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03844_distributed_nested_alias_marker.sql @@ -0,0 +1,34 @@ +DROP TABLE IF EXISTS test_nested_alias_dist; +DROP TABLE IF EXISTS test_nested_alias_local; + +CREATE TABLE test_nested_alias_local +( + dt DateTime64(3), + base String, + a String ALIAS base, + b String ALIAS a +) +ENGINE = MergeTree() +ORDER BY dt; + +INSERT INTO test_nested_alias_local VALUES ('1999-03-29T01:15:33', 'x'); + +CREATE TABLE test_nested_alias_dist AS test_nested_alias_local +ENGINE = Distributed('test_shard_localhost', currentDatabase(), test_nested_alias_local, rand()); + +SELECT 'analyzer'; +SELECT a, b +FROM test_nested_alias_dist +ORDER BY dt +LIMIT 1 +SETTINGS enable_analyzer = 1; + +SELECT 'legacy'; +SELECT a, b +FROM test_nested_alias_dist +ORDER BY dt +LIMIT 1 +SETTINGS enable_analyzer = 0; + +DROP TABLE test_nested_alias_dist; +DROP TABLE test_nested_alias_local; diff --git a/_local_files_and_notes/dropped_tests/03845_distributed_global_in_join_alias_chain.reference b/_local_files_and_notes/dropped_tests/03845_distributed_global_in_join_alias_chain.reference new file mode 100644 index 000000000000..325078d71cc1 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03845_distributed_global_in_join_alias_chain.reference @@ -0,0 +1,8 @@ +rewrite_in +1 +1 +rewrite_join +1 +1 +1 +1 diff --git a/_local_files_and_notes/dropped_tests/03845_distributed_global_in_join_alias_chain.sql b/_local_files_and_notes/dropped_tests/03845_distributed_global_in_join_alias_chain.sql new file mode 100644 index 000000000000..9bd95d72fd20 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03845_distributed_global_in_join_alias_chain.sql @@ -0,0 +1,34 @@ +DROP TABLE IF EXISTS test_global_alias_chain_dist; +DROP TABLE IF EXISTS test_global_alias_chain_local; + +CREATE TABLE test_global_alias_chain_local +( + id UInt64, + base UInt64, + a UInt64 ALIAS base, + b UInt64 ALIAS a +) +ENGINE = MergeTree() +ORDER BY id; + +INSERT INTO test_global_alias_chain_local VALUES (1, 1); + +CREATE TABLE test_global_alias_chain_dist AS test_global_alias_chain_local +ENGINE = Distributed('test_cluster_two_shards', currentDatabase(), test_global_alias_chain_local, rand()); + +SELECT 'rewrite_in'; +SELECT id +FROM test_global_alias_chain_dist +WHERE id IN (SELECT b FROM test_global_alias_chain_dist) +ORDER BY id +SETTINGS enable_analyzer = 1, distributed_product_mode = 'global'; + +SELECT 'rewrite_join'; +SELECT l.id +FROM test_global_alias_chain_dist AS l +INNER JOIN (SELECT b FROM test_global_alias_chain_dist) AS r ON l.id = r.b +ORDER BY l.id +SETTINGS enable_analyzer = 1, distributed_product_mode = 'global'; + +DROP TABLE test_global_alias_chain_dist; +DROP TABLE test_global_alias_chain_local; diff --git a/_local_files_and_notes/dropped_tests/03846_distributed_global_in_alias_marker_collision.reference b/_local_files_and_notes/dropped_tests/03846_distributed_global_in_alias_marker_collision.reference new file mode 100644 index 000000000000..9a3a29a69ce8 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03846_distributed_global_in_alias_marker_collision.reference @@ -0,0 +1,2 @@ +global_in_collision_check +1 diff --git a/_local_files_and_notes/dropped_tests/03846_distributed_global_in_alias_marker_collision.sql b/_local_files_and_notes/dropped_tests/03846_distributed_global_in_alias_marker_collision.sql new file mode 100644 index 000000000000..d47e6a304ba1 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03846_distributed_global_in_alias_marker_collision.sql @@ -0,0 +1,56 @@ +DROP TABLE IF EXISTS test_marker_collision_dist; +DROP TABLE IF EXISTS test_marker_collision_main; +DROP TABLE IF EXISTS test_marker_collision_left; +DROP TABLE IF EXISTS test_marker_collision_right; + +CREATE TABLE test_marker_collision_main +( + id UInt64 +) +ENGINE = MergeTree() +ORDER BY id; + +INSERT INTO test_marker_collision_main VALUES (1); + +CREATE TABLE test_marker_collision_left +( + id UInt64, + x UInt64, + b UInt64 ALIAS x +) +ENGINE = MergeTree() +ORDER BY id; + +CREATE TABLE test_marker_collision_right +( + id UInt64, + y UInt64, + b UInt64 ALIAS y +) +ENGINE = MergeTree() +ORDER BY id; + +INSERT INTO test_marker_collision_left VALUES (1, 1); +INSERT INTO test_marker_collision_right VALUES (1, 20); + +CREATE TABLE test_marker_collision_dist AS test_marker_collision_main +ENGINE = Distributed('test_shard_localhost', currentDatabase(), test_marker_collision_main, rand()); + +SELECT 'global_in_collision_check'; +SELECT id +FROM test_marker_collision_dist +WHERE id GLOBAL IN +( + SELECT test_marker_collision_left.id + FROM test_marker_collision_left + INNER JOIN test_marker_collision_right + ON test_marker_collision_left.id = test_marker_collision_right.id + WHERE test_marker_collision_left.b + test_marker_collision_right.b = 21 +) +ORDER BY id +SETTINGS enable_analyzer = 1, enable_alias_marker = 1; + +DROP TABLE test_marker_collision_dist; +DROP TABLE test_marker_collision_main; +DROP TABLE test_marker_collision_left; +DROP TABLE test_marker_collision_right; diff --git a/_local_files_and_notes/dropped_tests/03924_hybrid_unknown_table_exact_schema.reference b/_local_files_and_notes/dropped_tests/03924_hybrid_unknown_table_exact_schema.reference new file mode 100644 index 000000000000..dcebc31194b1 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03924_hybrid_unknown_table_exact_schema.reference @@ -0,0 +1,30 @@ +merge_tree_row_count +5 +iceberg_row_count +5 +hybrid_row_count +5 +true 8751 7291.267979503492 Frank 2024-01-01 06:00:00.000000 2024-01-01 43200000000 2024-01-01 12:00:00.000000 5313 8428.52 456.78 +false 3611 4492.090462838536 Isaac 2024-01-01 06:00:00.000000 2024-01-01 43200000000 2024-01-01 12:00:00.000000 4552 1554.795 456.78 +true 1519 3799.273006373374 Louis 2024-01-01 06:00:00.000000 2024-01-01 43200000000 2024-01-01 12:00:00.000000 8785 1248.2616 456.78 +true 1654 3801.2622503916614 Oliver 2024-01-01 06:00:00.000000 2024-01-01 43200000000 2024-01-01 12:00:00.000000 3432 6701.752 456.78 +true 9044 2931.782814070929 William 2024-01-01 06:00:00.000000 2024-01-01 43200000000 2024-01-01 12:00:00.000000 3733 7730.6836 456.78 +issue_1208_join_hybrid_mt_local +Frank 8751 8751 +Isaac 3611 3611 +Louis 1519 1519 +Oliver 1654 1654 +William 9044 9044 +issue_1208_join_hybrid_mt_allow +Frank 8751 8751 +Isaac 3611 3611 +Louis 1519 1519 +Oliver 1654 1654 +William 9044 9044 +issue_1208_join_hybrid_mt_iceberg_local +issue_1208_join_hybrid_mt_iceberg_allow +Frank 8751 8751 8751 +Isaac 3611 3611 3611 +Louis 1519 1519 1519 +Oliver 1654 1654 1654 +William 9044 9044 9044 diff --git a/_local_files_and_notes/dropped_tests/03924_hybrid_unknown_table_exact_schema.sql b/_local_files_and_notes/dropped_tests/03924_hybrid_unknown_table_exact_schema.sql new file mode 100644 index 000000000000..a37c991684b4 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03924_hybrid_unknown_table_exact_schema.sql @@ -0,0 +1,329 @@ +SET allow_experimental_hybrid_table = 1, + enable_analyzer = 1, + prefer_localhost_replica = 0, + iceberg_delete_data_on_drop = 1; + +DROP TABLE IF EXISTS hybrid_table; +DROP TABLE IF EXISTS merge_tree_table_b9faf88a_d5d3_11f0_b816_e0c26496f172; +DROP TABLE IF EXISTS iceberg_table_b4bd039e_d5d3_11f0_8208_e0c26496f172; +DROP TABLE IF EXISTS merge_tree_table_3ef2c546_d5d6_11f0_b816_e0c26496f172; +DROP TABLE IF EXISTS hybrid_table_64293f1a_0cba_11f1_876b_de7b9eea3490; +DROP TABLE IF EXISTS merge_tree_table_640a9b6e_0cba_11f1_876b_de7b9eea3490; +DROP TABLE IF EXISTS database_39afd42b_d5d6_11f0_b919_e0c26496f172.`namespace_39afe1b3_d5d6_11f0_9b00_e0c26496f172.table_39afe20a_d5d6_11f0_8208_e0c26496f172`; +DROP DATABASE IF EXISTS database_39afd42b_d5d6_11f0_b919_e0c26496f172; + +CREATE TABLE merge_tree_table_b9faf88a_d5d3_11f0_b816_e0c26496f172 +( + boolean_col Nullable(Bool), + long_col Nullable(Int64), + double_col Nullable(Float64), + string_col String, + timestamp_col Nullable(DateTime64(6)), + date_col Nullable(Date), + time_col Nullable(Int64), + timestamptz_col Nullable(DateTime64(6, 'UTC')), + integer_col Nullable(Int32), + float_col Nullable(Float32), + decimal_col Nullable(Decimal(10, 2)) +) +ENGINE = MergeTree +PARTITION BY string_col +ORDER BY tuple() +SETTINGS index_granularity = 8192; + +INSERT INTO merge_tree_table_b9faf88a_d5d3_11f0_b816_e0c26496f172 VALUES + (true, 9044, 2931.782814070929, 'William', toDateTime64('2024-01-01 06:00:00', 6), toDate('2024-01-01'), 43200000000, toDateTime64('2024-01-01 12:00:00', 6, 'UTC'), 3733, 7730.6836, 456.78), + (true, 1654, 3801.2622503916614, 'Oliver', toDateTime64('2024-01-01 06:00:00', 6), toDate('2024-01-01'), 43200000000, toDateTime64('2024-01-01 12:00:00', 6, 'UTC'), 3432, 6701.752, 456.78), + (true, 8751, 7291.267979503492, 'Frank', toDateTime64('2024-01-01 06:00:00', 6), toDate('2024-01-01'), 43200000000, toDateTime64('2024-01-01 12:00:00', 6, 'UTC'), 5313, 8428.52, 456.78), + (true, 1519, 3799.273006373374, 'Louis', toDateTime64('2024-01-01 06:00:00', 6), toDate('2024-01-01'), 43200000000, toDateTime64('2024-01-01 12:00:00', 6, 'UTC'), 8785, 1248.2616, 456.78), + (false, 3611, 4492.090462838536, 'Isaac', toDateTime64('2024-01-01 06:00:00', 6), toDate('2024-01-01'), 43200000000, toDateTime64('2024-01-01 12:00:00', 6, 'UTC'), 4552, 1554.795, 456.78); + +SELECT 'merge_tree_row_count'; +SELECT count() FROM merge_tree_table_b9faf88a_d5d3_11f0_b816_e0c26496f172; + +CREATE TABLE iceberg_table_b4bd039e_d5d3_11f0_8208_e0c26496f172 +( + boolean_col Nullable(Int32), + long_col Nullable(Int64), + double_col Nullable(Float64), + string_col String, + timestamp_col Nullable(DateTime64(6)), + date_col Nullable(Date), + time_col Nullable(Int64), + timestamptz_col Nullable(DateTime64(6, 'UTC')), + integer_col Nullable(Int32), + float_col Nullable(Float32), + decimal_col Nullable(Float64) +) +ENGINE = IcebergS3( + s3_conn, + filename = concat('hybrid_unknown_table_exact_schema_03924/', currentDatabase(), '/iceberg_table') +); + +INSERT INTO iceberg_table_b4bd039e_d5d3_11f0_8208_e0c26496f172 SETTINGS allow_experimental_insert_into_iceberg = 1, write_full_path_in_iceberg_metadata = 1 VALUES + (1, 9044, 2931.782814070929, 'William', toDateTime64('2024-01-01 06:00:00', 6), toDate('2024-01-01'), 43200000000, toDateTime64('2024-01-01 12:00:00', 6, 'UTC'), 3733, 7730.6836, 456.78), + (1, 1654, 3801.2622503916614, 'Oliver', toDateTime64('2024-01-01 06:00:00', 6), toDate('2024-01-01'), 43200000000, toDateTime64('2024-01-01 12:00:00', 6, 'UTC'), 3432, 6701.752, 456.78), + (1, 8751, 7291.267979503492, 'Frank', toDateTime64('2024-01-01 06:00:00', 6), toDate('2024-01-01'), 43200000000, toDateTime64('2024-01-01 12:00:00', 6, 'UTC'), 5313, 8428.52, 456.78), + (1, 1519, 3799.273006373374, 'Louis', toDateTime64('2024-01-01 06:00:00', 6), toDate('2024-01-01'), 43200000000, toDateTime64('2024-01-01 12:00:00', 6, 'UTC'), 8785, 1248.2616, 456.78), + (0, 3611, 4492.090462838536, 'Isaac', toDateTime64('2024-01-01 06:00:00', 6), toDate('2024-01-01'), 43200000000, toDateTime64('2024-01-01 12:00:00', 6, 'UTC'), 4552, 1554.795, 456.78); + +SELECT 'iceberg_row_count'; +SELECT count() FROM iceberg_table_b4bd039e_d5d3_11f0_8208_e0c26496f172; + +CREATE TABLE hybrid_table +( + boolean_col Nullable(Bool), + long_col Nullable(Int64), + double_col Nullable(Float64), + string_col String, + timestamp_col Nullable(DateTime64(6)), + date_col Nullable(Date), + time_col Nullable(Int64), + timestamptz_col Nullable(DateTime64(6, 'UTC')), + integer_col Nullable(Int32), + float_col Nullable(Float32), + decimal_col Nullable(Decimal(10, 2)) +) +ENGINE = Hybrid( + remote('localhost', currentDatabase(), 'merge_tree_table_b9faf88a_d5d3_11f0_b816_e0c26496f172'), + date_col <= '2024-01-01', + icebergCluster( + 'test_cluster_one_shard_three_replicas_localhost', + concat('http://localhost:11111/test/hybrid_unknown_table_exact_schema_03924/', currentDatabase(), '/iceberg_table/'), + 'test', + 'testtest' + ), + date_col > '2024-01-01' +); + +SELECT 'hybrid_row_count'; +SELECT count() FROM hybrid_table; + +CREATE TABLE merge_tree_table_3ef2c546_d5d6_11f0_b816_e0c26496f172 +( + boolean_col Nullable(Bool), + long_col Nullable(Int64), + double_col Nullable(Float64), + string_col String, + timestamp_col Nullable(DateTime64(6)), + date_col Nullable(Date), + time_col Nullable(Int64), + timestamptz_col Nullable(DateTime64(6, 'UTC')), + integer_col Nullable(Int32), + float_col Nullable(Float32), + decimal_col Nullable(Decimal(10, 2)) +) +ENGINE = MergeTree +PARTITION BY string_col +ORDER BY tuple() +SETTINGS index_granularity = 8192; + +INSERT INTO merge_tree_table_3ef2c546_d5d6_11f0_b816_e0c26496f172 +SELECT * FROM merge_tree_table_b9faf88a_d5d3_11f0_b816_e0c26496f172; + +CREATE DATABASE database_39afd42b_d5d6_11f0_b919_e0c26496f172; + +CREATE TABLE database_39afd42b_d5d6_11f0_b919_e0c26496f172.`namespace_39afe1b3_d5d6_11f0_9b00_e0c26496f172.table_39afe20a_d5d6_11f0_8208_e0c26496f172` +( + boolean_col Nullable(Int32), + long_col Nullable(Int64), + double_col Nullable(Float64), + string_col String, + timestamp_col Nullable(DateTime64(6)), + date_col Nullable(Date), + time_col Nullable(Int64), + timestamptz_col Nullable(DateTime64(6, 'UTC')), + integer_col Nullable(Int32), + float_col Nullable(Float32), + decimal_col Nullable(Float64) +) +ENGINE = IcebergS3( + s3_conn, + filename = concat('hybrid_unknown_table_exact_schema_03924/', currentDatabase(), '/iceberg_table_39afe20a_d5d6_11f0_8208_e0c26496f172') +); + +INSERT INTO database_39afd42b_d5d6_11f0_b919_e0c26496f172.`namespace_39afe1b3_d5d6_11f0_9b00_e0c26496f172.table_39afe20a_d5d6_11f0_8208_e0c26496f172` +SETTINGS allow_experimental_insert_into_iceberg = 1, write_full_path_in_iceberg_metadata = 1 +SELECT + toInt32(boolean_col), + long_col, + double_col, + string_col, + timestamp_col, + date_col, + time_col, + timestamptz_col, + integer_col, + float_col, + toFloat64(decimal_col) +FROM merge_tree_table_3ef2c546_d5d6_11f0_b816_e0c26496f172; + +SELECT * +FROM hybrid_table +WHERE string_col IN +( + SELECT DISTINCT string_col + FROM hybrid_table + WHERE long_col > 1500 +) +ORDER BY string_col; + +SELECT 'issue_1208_join_hybrid_mt_local'; +SELECT + h.string_col, + h.long_col AS hybrid_long, + m.long_col AS mt_long +FROM hybrid_table AS h +FULL OUTER JOIN merge_tree_table_3ef2c546_d5d6_11f0_b816_e0c26496f172 AS m ON h.string_col = m.string_col +ORDER BY h.string_col +LIMIT 10 +SETTINGS object_storage_cluster_join_mode = 'local'; + +SELECT 'issue_1208_join_hybrid_mt_allow'; +SELECT + h.string_col, + h.long_col AS hybrid_long, + m.long_col AS mt_long +FROM hybrid_table AS h +FULL OUTER JOIN merge_tree_table_3ef2c546_d5d6_11f0_b816_e0c26496f172 AS m ON h.string_col = m.string_col +ORDER BY h.string_col +LIMIT 10 +SETTINGS object_storage_cluster_join_mode = 'allow'; + +SELECT 'issue_1208_join_hybrid_mt_iceberg_local'; +SELECT + h.string_col, + h.long_col AS hybrid_long, + m.long_col AS mt_long, + i.long_col AS iceberg_long +FROM hybrid_table AS h +FULL OUTER JOIN merge_tree_table_3ef2c546_d5d6_11f0_b816_e0c26496f172 AS m ON h.string_col = m.string_col +FULL OUTER JOIN database_39afd42b_d5d6_11f0_b919_e0c26496f172.`namespace_39afe1b3_d5d6_11f0_9b00_e0c26496f172.table_39afe20a_d5d6_11f0_8208_e0c26496f172` AS i ON h.string_col = i.string_col +ORDER BY h.string_col +LIMIT 10 +SETTINGS object_storage_cluster_join_mode = 'local'; -- { serverError UNKNOWN_IDENTIFIER } + +SELECT 'issue_1208_join_hybrid_mt_iceberg_allow'; +SELECT + h.string_col, + h.long_col AS hybrid_long, + m.long_col AS mt_long, + i.long_col AS iceberg_long +FROM hybrid_table AS h +FULL OUTER JOIN merge_tree_table_3ef2c546_d5d6_11f0_b816_e0c26496f172 AS m ON h.string_col = m.string_col +FULL OUTER JOIN database_39afd42b_d5d6_11f0_b919_e0c26496f172.`namespace_39afe1b3_d5d6_11f0_9b00_e0c26496f172.table_39afe20a_d5d6_11f0_8208_e0c26496f172` AS i ON h.string_col = i.string_col +ORDER BY h.string_col +LIMIT 10 +SETTINGS object_storage_cluster_join_mode = 'allow'; + +-- Exact issue-shape queries (no ORDER BY), deterministic output via FORMAT Null. +SELECT + h.string_col, + h.long_col AS hybrid_long, + m.long_col AS mt_long +FROM hybrid_table AS h +FULL OUTER JOIN merge_tree_table_3ef2c546_d5d6_11f0_b816_e0c26496f172 AS m ON h.string_col = m.string_col +LIMIT 10 +SETTINGS object_storage_cluster_join_mode = 'local' +FORMAT Null; + +SELECT + h.string_col, + h.long_col AS hybrid_long, + m.long_col AS mt_long +FROM hybrid_table AS h +FULL OUTER JOIN merge_tree_table_3ef2c546_d5d6_11f0_b816_e0c26496f172 AS m ON h.string_col = m.string_col +LIMIT 10 +SETTINGS object_storage_cluster_join_mode = 'allow' +FORMAT Null; + +SELECT + h.string_col, + h.long_col AS hybrid_long, + m.long_col AS mt_long, + i.long_col AS iceberg_long +FROM hybrid_table AS h +FULL OUTER JOIN merge_tree_table_3ef2c546_d5d6_11f0_b816_e0c26496f172 AS m ON h.string_col = m.string_col +FULL OUTER JOIN database_39afd42b_d5d6_11f0_b919_e0c26496f172.`namespace_39afe1b3_d5d6_11f0_9b00_e0c26496f172.table_39afe20a_d5d6_11f0_8208_e0c26496f172` AS i ON h.string_col = i.string_col +LIMIT 10 +SETTINGS object_storage_cluster_join_mode = 'local' +FORMAT Null; -- { serverError UNKNOWN_IDENTIFIER } + +SELECT + h.string_col, + h.long_col AS hybrid_long, + m.long_col AS mt_long, + i.long_col AS iceberg_long +FROM hybrid_table AS h +FULL OUTER JOIN merge_tree_table_3ef2c546_d5d6_11f0_b816_e0c26496f172 AS m ON h.string_col = m.string_col +FULL OUTER JOIN database_39afd42b_d5d6_11f0_b919_e0c26496f172.`namespace_39afe1b3_d5d6_11f0_9b00_e0c26496f172.table_39afe20a_d5d6_11f0_8208_e0c26496f172` AS i ON h.string_col = i.string_col +LIMIT 10 +SETTINGS object_storage_cluster_join_mode = 'allow' +FORMAT Null; + +CREATE TABLE merge_tree_table_640a9b6e_0cba_11f1_876b_de7b9eea3490 +( + boolean_col Nullable(Bool), + long_col Nullable(Int64), + double_col Nullable(Float64), + string_col String, + timestamp_col Nullable(DateTime64(6)), + date_col Nullable(Date), + time_col Nullable(Int64), + timestamptz_col Nullable(DateTime64(6, 'UTC')), + integer_col Nullable(Int32), + float_col Nullable(Float32), + decimal_col Nullable(Decimal(10, 2)) +) +ENGINE = MergeTree +PARTITION BY string_col +ORDER BY tuple() +SETTINGS index_granularity = 8192; + +INSERT INTO merge_tree_table_640a9b6e_0cba_11f1_876b_de7b9eea3490 +SELECT * FROM merge_tree_table_3ef2c546_d5d6_11f0_b816_e0c26496f172; + +CREATE TABLE hybrid_table_64293f1a_0cba_11f1_876b_de7b9eea3490 +( + boolean_col Nullable(Bool), + long_col Nullable(Int64), + double_col Nullable(Float64), + string_col String, + timestamp_col Nullable(DateTime64(6)), + date_col Nullable(Date), + time_col Nullable(Int64), + timestamptz_col Nullable(DateTime64(6, 'UTC')), + integer_col Nullable(Int32), + float_col Nullable(Float32), + decimal_col Nullable(Decimal(10, 2)) +) +ENGINE = Hybrid( + remote('localhost', currentDatabase(), 'merge_tree_table_b9faf88a_d5d3_11f0_b816_e0c26496f172'), + date_col <= '2024-01-01', + icebergCluster( + 'test_cluster_one_shard_three_replicas_localhost', + concat('http://localhost:11111/test/hybrid_unknown_table_exact_schema_03924/', currentDatabase(), '/iceberg_table/'), + 'test', + 'testtest' + ), + date_col > '2024-01-01' +); + +SELECT * +FROM hybrid_table_64293f1a_0cba_11f1_876b_de7b9eea3490 +WHERE string_col IN +( + SELECT DISTINCT string_col + FROM merge_tree_table_640a9b6e_0cba_11f1_876b_de7b9eea3490 + WHERE long_col > 1500 +) +FORMAT Null; + +DROP TABLE hybrid_table; +DROP TABLE merge_tree_table_b9faf88a_d5d3_11f0_b816_e0c26496f172; +DROP TABLE iceberg_table_b4bd039e_d5d3_11f0_8208_e0c26496f172; +DROP TABLE merge_tree_table_3ef2c546_d5d6_11f0_b816_e0c26496f172; +DROP TABLE hybrid_table_64293f1a_0cba_11f1_876b_de7b9eea3490; +DROP TABLE merge_tree_table_640a9b6e_0cba_11f1_876b_de7b9eea3490; +DROP TABLE database_39afd42b_d5d6_11f0_b919_e0c26496f172.`namespace_39afe1b3_d5d6_11f0_9b00_e0c26496f172.table_39afe20a_d5d6_11f0_8208_e0c26496f172`; +DROP DATABASE database_39afd42b_d5d6_11f0_b919_e0c26496f172; diff --git a/_local_files_and_notes/dropped_tests/03925_distributed_alias_column_swap_without_marker.reference b/_local_files_and_notes/dropped_tests/03925_distributed_alias_column_swap_without_marker.reference new file mode 100644 index 000000000000..1e2e9b11750a --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03925_distributed_alias_column_swap_without_marker.reference @@ -0,0 +1,56 @@ +prefer_localhost_replica_0_uint64 +x a_num inner_c +1 1 2 +1 1 2 +1 1 2 +1 1 2 +2 1 3 +2 1 3 +2 1 3 +2 1 3 +10 1 11 +10 1 11 +10 1 11 +10 1 11 +prefer_localhost_replica_0_string +x a_str inner_c +1 aaaa 2 +1 aaaa 2 +1 aaaa 2 +1 aaaa 2 +2 aaaa 3 +2 aaaa 3 +2 aaaa 3 +2 aaaa 3 +10 aaaa 11 +10 aaaa 11 +10 aaaa 11 +10 aaaa 11 +prefer_localhost_replica_1_uint64 +x a_num inner_c +1 1 2 +1 1 2 +1 1 2 +1 1 2 +2 1 3 +2 1 3 +2 1 3 +2 1 3 +10 1 11 +10 1 11 +10 1 11 +10 1 11 +prefer_localhost_replica_1_string +x a_str inner_c +1 aaaa 2 +1 aaaa 2 +1 aaaa 2 +1 aaaa 2 +2 aaaa 3 +2 aaaa 3 +2 aaaa 3 +2 aaaa 3 +10 aaaa 11 +10 aaaa 11 +10 aaaa 11 +10 aaaa 11 diff --git a/_local_files_and_notes/dropped_tests/03925_distributed_alias_column_swap_without_marker.sql b/_local_files_and_notes/dropped_tests/03925_distributed_alias_column_swap_without_marker.sql new file mode 100644 index 000000000000..84ad3cf170d0 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03925_distributed_alias_column_swap_without_marker.sql @@ -0,0 +1,96 @@ +DROP TABLE IF EXISTS test_dod_alias_swap_no_marker_outer; +DROP TABLE IF EXISTS test_dod_alias_swap_no_marker_inner; +DROP TABLE IF EXISTS test_dod_alias_swap_no_marker_local; + +CREATE TABLE test_dod_alias_swap_no_marker_local +( + x UInt64 +) +ENGINE = MergeTree() +ORDER BY x; + +INSERT INTO test_dod_alias_swap_no_marker_local VALUES (1), (2), (10); + +CREATE TABLE test_dod_alias_swap_no_marker_inner +( + x UInt64, + inner_c UInt64 ALIAS x + 1 +) +ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), test_dod_alias_swap_no_marker_local); + +CREATE TABLE test_dod_alias_swap_no_marker_outer +( + x UInt64, + inner_c UInt64, + a_num UInt64 ALIAS 1, + a_str String ALIAS 'aaaa' +) +ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), test_dod_alias_swap_no_marker_inner); + +SELECT 'prefer_localhost_replica_0_uint64'; +SELECT + x, + a_num, + inner_c +FROM test_dod_alias_swap_no_marker_outer +ORDER BY x +SETTINGS + allow_experimental_analyzer = 1, + enable_alias_marker = 0, + prefer_localhost_replica = 0, + enable_parallel_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0 +FORMAT TSVWithNames; + +SELECT 'prefer_localhost_replica_0_string'; +SELECT + x, + a_str, + inner_c +FROM test_dod_alias_swap_no_marker_outer +ORDER BY x +SETTINGS + allow_experimental_analyzer = 1, + enable_alias_marker = 0, + prefer_localhost_replica = 0, + enable_parallel_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0 +FORMAT TSVWithNames; + +SELECT 'prefer_localhost_replica_1_uint64'; +SELECT + x, + a_num, + inner_c +FROM test_dod_alias_swap_no_marker_outer +ORDER BY x +SETTINGS + allow_experimental_analyzer = 1, + enable_alias_marker = 0, + prefer_localhost_replica = 1, + enable_parallel_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0 +FORMAT TSVWithNames; + +SELECT 'prefer_localhost_replica_1_string'; +SELECT + x, + a_str, + inner_c +FROM test_dod_alias_swap_no_marker_outer +ORDER BY x +SETTINGS + allow_experimental_analyzer = 1, + enable_alias_marker = 0, + prefer_localhost_replica = 1, + enable_parallel_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0 +FORMAT TSVWithNames; + +DROP TABLE test_dod_alias_swap_no_marker_outer; +DROP TABLE test_dod_alias_swap_no_marker_inner; +DROP TABLE test_dod_alias_swap_no_marker_local; diff --git a/_local_files_and_notes/dropped_tests/03926_parallel_replicas_dod_alias_column_swap.reference b/_local_files_and_notes/dropped_tests/03926_parallel_replicas_dod_alias_column_swap.reference new file mode 100644 index 000000000000..228ac5f667f7 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03926_parallel_replicas_dod_alias_column_swap.reference @@ -0,0 +1,20 @@ +no_pr_uint64 +x a_num inner_c +1 1 2 +2 1 3 +10 1 11 +no_pr_string +x a_str inner_c +1 aaaa 2 +2 aaaa 3 +10 aaaa 11 +pr_uint64 +x a_num inner_c +1 1 2 +2 1 3 +10 1 11 +pr_string +x a_str inner_c +1 aaaa 2 +2 aaaa 3 +10 aaaa 11 diff --git a/_local_files_and_notes/dropped_tests/03926_parallel_replicas_dod_alias_column_swap.sql b/_local_files_and_notes/dropped_tests/03926_parallel_replicas_dod_alias_column_swap.sql new file mode 100644 index 000000000000..070330e98826 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03926_parallel_replicas_dod_alias_column_swap.sql @@ -0,0 +1,94 @@ +DROP TABLE IF EXISTS test_pr_dod_alias_swap_outer; +DROP TABLE IF EXISTS test_pr_dod_alias_swap_inner; +DROP TABLE IF EXISTS test_pr_dod_alias_swap_local; + +CREATE TABLE test_pr_dod_alias_swap_local +( + x UInt64 +) +ENGINE = MergeTree() +ORDER BY x; + +INSERT INTO test_pr_dod_alias_swap_local VALUES (1), (2), (10); + +CREATE TABLE test_pr_dod_alias_swap_inner +( + x UInt64, + inner_c UInt64 ALIAS x + 1 +) +ENGINE = Distributed(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), test_pr_dod_alias_swap_local); + +CREATE TABLE test_pr_dod_alias_swap_outer +( + x UInt64, + inner_c UInt64, + a_num UInt64 ALIAS 1, + a_str String ALIAS 'aaaa' +) +ENGINE = Distributed(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), test_pr_dod_alias_swap_inner); + +SELECT 'no_pr_uint64'; +SELECT x, a_num, inner_c +FROM test_pr_dod_alias_swap_outer +ORDER BY x +SETTINGS + enable_analyzer = 1, + enable_alias_marker = 0, + enable_parallel_replicas = 0, + allow_experimental_parallel_reading_from_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0, + parallel_replicas_for_non_replicated_merge_tree = 1, + cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost' +FORMAT TSVWithNames; + +SELECT 'no_pr_string'; +SELECT x, a_str, inner_c +FROM test_pr_dod_alias_swap_outer +ORDER BY x +SETTINGS + enable_analyzer = 1, + enable_alias_marker = 0, + enable_parallel_replicas = 0, + allow_experimental_parallel_reading_from_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0, + parallel_replicas_for_non_replicated_merge_tree = 1, + cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost' +FORMAT TSVWithNames; + +SELECT 'pr_uint64'; +SELECT x, a_num, inner_c +FROM test_pr_dod_alias_swap_outer +ORDER BY x +SETTINGS + enable_analyzer = 1, + enable_alias_marker = 0, + enable_parallel_replicas = 2, + allow_experimental_parallel_reading_from_replicas = 2, + max_parallel_replicas = 3, + parallel_replicas_local_plan = 1, + parallel_replicas_for_non_replicated_merge_tree = 1, + parallel_replicas_min_number_of_rows_per_replica = 0, + cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost' +FORMAT TSVWithNames; + +SELECT 'pr_string'; +SELECT x, a_str, inner_c +FROM test_pr_dod_alias_swap_outer +ORDER BY x +SETTINGS + enable_analyzer = 1, + enable_alias_marker = 0, + enable_parallel_replicas = 2, + allow_experimental_parallel_reading_from_replicas = 2, + max_parallel_replicas = 3, + parallel_replicas_local_plan = 1, + parallel_replicas_for_non_replicated_merge_tree = 1, + parallel_replicas_min_number_of_rows_per_replica = 0, + cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost' +FORMAT TSVWithNames; + +DROP TABLE test_pr_dod_alias_swap_outer; +DROP TABLE test_pr_dod_alias_swap_inner; +DROP TABLE test_pr_dod_alias_swap_local; diff --git a/_local_files_and_notes/dropped_tests/03927_distributed_alias_marker_explicit_column_swap.reference b/_local_files_and_notes/dropped_tests/03927_distributed_alias_marker_explicit_column_swap.reference new file mode 100644 index 000000000000..f3797cb0ce0e --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03927_distributed_alias_marker_explicit_column_swap.reference @@ -0,0 +1,32 @@ +prefer_localhost_replica_0_uint64 +a inner_c +1 2 +1 2 +1 3 +1 3 +1 11 +1 11 +prefer_localhost_replica_0_string +a inner_c +aaaa 2 +aaaa 2 +aaaa 3 +aaaa 3 +aaaa 11 +aaaa 11 +prefer_localhost_replica_1_uint64 +a inner_c +1 2 +1 2 +1 3 +1 3 +1 11 +1 11 +prefer_localhost_replica_1_string +a inner_c +aaaa 2 +aaaa 2 +aaaa 3 +aaaa 3 +aaaa 11 +aaaa 11 diff --git a/_local_files_and_notes/dropped_tests/03927_distributed_alias_marker_explicit_column_swap.sql b/_local_files_and_notes/dropped_tests/03927_distributed_alias_marker_explicit_column_swap.sql new file mode 100644 index 000000000000..50085531b6dd --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03927_distributed_alias_marker_explicit_column_swap.sql @@ -0,0 +1,82 @@ +DROP TABLE IF EXISTS test_dod_alias_swap_local; +DROP TABLE IF EXISTS test_dod_alias_swap_inner; + +CREATE TABLE test_dod_alias_swap_local +( + x UInt64 +) +ENGINE = MergeTree() +ORDER BY x; + +INSERT INTO test_dod_alias_swap_local VALUES (1), (2), (10); + +CREATE TABLE test_dod_alias_swap_inner +( + x UInt64, + a UInt64 ALIAS 2, + inner_c UInt64 ALIAS x + 1 +) +ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), test_dod_alias_swap_local); + +SELECT 'prefer_localhost_replica_0_uint64'; +SELECT + __aliasMarker(_CAST(1, 'UInt64'), '__table1.a') AS a, + __table1.inner_c AS inner_c +FROM test_dod_alias_swap_inner AS __table1 +ORDER BY __table1.x +SETTINGS + enable_analyzer = 1, + enable_alias_marker = 1, + prefer_localhost_replica = 0, + enable_parallel_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0 +FORMAT TSVWithNames; + +SELECT 'prefer_localhost_replica_0_string'; +SELECT + __aliasMarker(_CAST('aaaa', 'String'), '__table1.a') AS a, + __table1.inner_c AS inner_c +FROM test_dod_alias_swap_inner AS __table1 +ORDER BY __table1.x +SETTINGS + enable_analyzer = 1, + enable_alias_marker = 1, + prefer_localhost_replica = 0, + enable_parallel_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0 +FORMAT TSVWithNames; + +SELECT 'prefer_localhost_replica_1_uint64'; +SELECT + __aliasMarker(_CAST(1, 'UInt64'), '__table1.a') AS a, + __table1.inner_c AS inner_c +FROM test_dod_alias_swap_inner AS __table1 +ORDER BY __table1.x +SETTINGS + enable_analyzer = 1, + enable_alias_marker = 1, + prefer_localhost_replica = 1, + enable_parallel_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0 +FORMAT TSVWithNames; + +SELECT 'prefer_localhost_replica_1_string'; +SELECT + __aliasMarker(_CAST('aaaa', 'String'), '__table1.a') AS a, + __table1.inner_c AS inner_c +FROM test_dod_alias_swap_inner AS __table1 +ORDER BY __table1.x +SETTINGS + enable_analyzer = 1, + enable_alias_marker = 1, + prefer_localhost_replica = 1, + enable_parallel_replicas = 0, + max_parallel_replicas = 1, + parallel_replicas_local_plan = 0 +FORMAT TSVWithNames; + +DROP TABLE test_dod_alias_swap_inner; +DROP TABLE test_dod_alias_swap_local; diff --git a/_local_files_and_notes/dropped_tests/03930_distributed_alias_swap_planner.reference b/_local_files_and_notes/dropped_tests/03930_distributed_alias_swap_planner.reference new file mode 100644 index 000000000000..402cc360bae5 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03930_distributed_alias_swap_planner.reference @@ -0,0 +1,15 @@ +local +11 12 +21 22 +dist_prefer0 +11 12 +21 22 +dist_prefer1 +11 12 +21 22 +dist_prefer0_plan +11 12 +21 22 +dist_prefer1_plan +11 12 +21 22 diff --git a/_local_files_and_notes/dropped_tests/03930_distributed_alias_swap_planner.sql b/_local_files_and_notes/dropped_tests/03930_distributed_alias_swap_planner.sql new file mode 100644 index 000000000000..848f35b0be14 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03930_distributed_alias_swap_planner.sql @@ -0,0 +1,34 @@ +-- Plain Distributed (no Hybrid). Two nested ALIAS columns: a2 contains a1's subexpression, +-- so planner CSE may reorder the remote header. Correct result must equal the single-node +-- ('local') result across every transport variant. +DROP TABLE IF EXISTS t_local_03930; +DROP TABLE IF EXISTS t_dist_03930; + +CREATE TABLE t_local_03930 (x UInt32, a1 UInt32 ALIAS x + 1, a2 UInt32 ALIAS a1 + 1) +ENGINE = MergeTree ORDER BY x; +INSERT INTO t_local_03930 VALUES (10), (20); + +CREATE TABLE t_dist_03930 AS t_local_03930 +ENGINE = Distributed(test_shard_localhost, currentDatabase(), t_local_03930); + +SELECT 'local'; +SELECT a1, a2 FROM t_local_03930 ORDER BY a1; + +SELECT 'dist_prefer0'; +SELECT a1, a2 FROM t_dist_03930 ORDER BY a1 +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 0; + +SELECT 'dist_prefer1'; +SELECT a1, a2 FROM t_dist_03930 ORDER BY a1 +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 1; + +SELECT 'dist_prefer0_plan'; +SELECT a1, a2 FROM t_dist_03930 ORDER BY a1 +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 0, serialize_query_plan = 1; + +SELECT 'dist_prefer1_plan'; +SELECT a1, a2 FROM t_dist_03930 ORDER BY a1 +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 1, serialize_query_plan = 1; + +DROP TABLE t_dist_03930; +DROP TABLE t_local_03930; diff --git a/_local_files_and_notes/dropped_tests/03931_parallel_replicas_alias_swap.reference b/_local_files_and_notes/dropped_tests/03931_parallel_replicas_alias_swap.reference new file mode 100644 index 000000000000..14f9c770f714 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03931_parallel_replicas_alias_swap.reference @@ -0,0 +1,9 @@ +local +10 11 12 +20 21 22 +pr_ast +10 11 12 +20 21 22 +pr_plan +10 11 12 +20 21 22 diff --git a/_local_files_and_notes/dropped_tests/03931_parallel_replicas_alias_swap.sql b/_local_files_and_notes/dropped_tests/03931_parallel_replicas_alias_swap.sql new file mode 100644 index 000000000000..f669631889c2 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03931_parallel_replicas_alias_swap.sql @@ -0,0 +1,37 @@ +-- Plain Distributed + parallel replicas (no Hybrid). Exercises the findParallelReplicasQuery +-- header reconciliation path with nested ALIAS columns. Correct result equals the single-node +-- ('local') result for both AST and serialized-plan transport. +-- +-- Determinism note: parallel replicas over a small non-replicated table can read the same rows on +-- several replicas under some (randomized) settings, duplicating output. GROUP BY x, a1, a2 +-- deduplicates that and keeps x in the required columns for the ALIAS expansion; ORDER BY x over +-- distinct values gives a total order. The test still fails if a1/a2 are swapped or wrong. +DROP TABLE IF EXISTS t_local_03931; +DROP TABLE IF EXISTS t_dist_03931; + +CREATE TABLE t_local_03931 (x UInt32, a1 UInt32 ALIAS x + 1, a2 UInt32 ALIAS a1 + 1) +ENGINE = MergeTree ORDER BY x; +INSERT INTO t_local_03931 VALUES (10), (20); + +CREATE TABLE t_dist_03931 AS t_local_03931 +ENGINE = Distributed(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), t_local_03931); + +SELECT 'local'; +SELECT x, a1, a2 FROM t_local_03931 GROUP BY x, a1, a2 ORDER BY x; + +SELECT 'pr_ast'; +SELECT x, a1, a2 FROM t_dist_03931 GROUP BY x, a1, a2 ORDER BY x +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, + allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 3, + cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', + serialize_query_plan = 0; + +SELECT 'pr_plan'; +SELECT x, a1, a2 FROM t_dist_03931 GROUP BY x, a1, a2 ORDER BY x +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, + allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 3, + cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', + serialize_query_plan = 1; + +DROP TABLE t_dist_03931; +DROP TABLE t_local_03931; diff --git a/_local_files_and_notes/dropped_tests/03932_distributed_alias_strict_name.reference b/_local_files_and_notes/dropped_tests/03932_distributed_alias_strict_name.reference new file mode 100644 index 000000000000..cddf594d4e31 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03932_distributed_alias_strict_name.reference @@ -0,0 +1,9 @@ +local +12 11 23 +22 21 43 +dist +12 11 23 +22 21 43 +dist_plan +12 11 23 +22 21 43 diff --git a/_local_files_and_notes/dropped_tests/03932_distributed_alias_strict_name.sql b/_local_files_and_notes/dropped_tests/03932_distributed_alias_strict_name.sql new file mode 100644 index 000000000000..c094d28f01e0 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/03932_distributed_alias_strict_name.sql @@ -0,0 +1,27 @@ +-- Plain Distributed (no Hybrid). Reorders alias columns and mixes a computed expression over +-- them. With strict name-based header reconciliation (positional fallback disabled), the result +-- must equal the single-node ('local') result for both AST and serialized-plan transport, and no +-- LOGICAL_ERROR must be raised. +DROP TABLE IF EXISTS t_local_03932; +DROP TABLE IF EXISTS t_dist_03932; + +CREATE TABLE t_local_03932 (x UInt32, a1 UInt32 ALIAS x + 1, a2 UInt32 ALIAS a1 + 1) +ENGINE = MergeTree ORDER BY x; +INSERT INTO t_local_03932 VALUES (10), (20); + +CREATE TABLE t_dist_03932 AS t_local_03932 +ENGINE = Distributed(test_shard_localhost, currentDatabase(), t_local_03932); + +SELECT 'local'; +SELECT a2, a1, a1 + a2 AS s FROM t_local_03932 ORDER BY x; + +SELECT 'dist'; +SELECT a2, a1, a1 + a2 AS s FROM t_dist_03932 ORDER BY x +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 0; + +SELECT 'dist_plan'; +SELECT a2, a1, a1 + a2 AS s FROM t_dist_03932 ORDER BY x +SETTINGS enable_analyzer = 1, enable_alias_marker = 1, prefer_localhost_replica = 0, serialize_query_plan = 1; + +DROP TABLE t_dist_03932; +DROP TABLE t_local_03932; diff --git a/_local_files_and_notes/dropped_tests/2026-05-28-bucket-d-redundant-with-pr-94644.md b/_local_files_and_notes/dropped_tests/2026-05-28-bucket-d-redundant-with-pr-94644.md new file mode 100644 index 000000000000..d4315c16c951 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/2026-05-28-bucket-d-redundant-with-pr-94644.md @@ -0,0 +1,67 @@ +# Bucket D — redundant with upstream PR #94644 + +**Context:** During the `__aliasMarker` upstream port (workspace branch +`alias_marker3`), a three-way cross-validation against `upstream/master` and PR +#105690 revealed that 6 of the 17 ported regression tests now pass on pure +master — they no longer reproduce any current regression. + +**Cause:** [PR #94644](https://github.com/ClickHouse/ClickHouse/pull/94644) +"Preserve ALIAS column order for distributed reads" landed on upstream +2026-01-22, after the original `__aliasMarker` work began on the 26.3 +development branch. #94644 fixes the column-order regression at the +`PlannerJoinTree` / `TableExpressionData` insertion-order level — exactly the +shape these 6 tests were designed to catch — and ships its own test +`03726_distributed_alias_column_order.sql` covering it. + +## Shared DDL pattern + +All 6 dropped tests share the same nested-alias DDL: + +```sql +CREATE TABLE t (x UInt32, a1 UInt32 ALIAS x + 1, a2 UInt32 ALIAS a1 + 1) +ENGINE = MergeTree ORDER BY x; +``` + +In this pattern both `a1` and `a2` need `x` as input. After #94644 preserves +insertion order, CSE doesn't reorder these expressions on the remote side, so +the initiator/shard header matches as-expected by the time +`addConvertingActions` runs. The bug we were trying to reproduce no longer +manifests. + +## What's actually load-bearing on `alias_marker3` after the refactor + +The remaining real bugs (not fixed by #94644 and still failing on master) need +different patterns: + +- **Shared sub-expression across siblings:** `flag_zero ALIAS toBool(bitTest(f, 0))`, + `flag_one ALIAS toBool(bitTest(f, 1))` — CSE collapses to a single + `bitTest(f, ...)` output on the remote, returning fewer columns than the + initiator expects (`NUMBER_OF_COLUMNS_DOESNT_MATCH`). Covered by the kept + tests `04279_distributed_alias_planner_column_count` (single-hop and + multi-hop) and `04280_distributed_alias_column_order` (silent column swap + with `ORDER BY ... LIMIT`). +- **Multi-hop `Distributed`-over-`Distributed`, `Merge`-over-`Distributed`, + parallel-replicas follower, and `distributed_product_mode='local'` rewriting + of `GLOBAL IN`:** covered by the other 4 kept tests (`04281`, `04282`, `04283`). + +## Dropped tests — file index in this directory + +All preserved with their original SQL (copied from +`feature/antalya-26.3/alias_marker_fixes` tip), using original 26.3 slot +numbers: + +| File | Mapped to (now-deleted) slot in alias_marker3 | Status note | +|---|---|---| +| `03930_distributed_alias_swap_planner.sql` | was `04282` on alias_marker3 | Doesn't reproduce on master; #94644 already fixes column order. | +| `03844_distributed_nested_alias_marker.sql` | was `04285` | Doesn't reproduce; #94644 handles the chain. | +| `03845_distributed_global_in_join_alias_chain.sql` | was `04286` | Doesn't reproduce; subquery has one column so column-count divergence dodged. | +| `03846_distributed_global_in_alias_marker_collision.sql` | was `04287` | **Interesting variant.** Two source tables with alias `b` (one is `b ALIAS x`, other `b ALIAS y`). The JOIN resolves on the shard side, so the marker collision is absorbed before the initiator sees it. The *real* collision scenario lives at `04283_distributed_alias_global_in_product_mode_local` (uses `distributed_product_mode='local'` which causes the analyzer to bind both `__table*.x` identifiers to the same alias `foo` on the initiator → `MULTIPLE_EXPRESSIONS_FOR_ALIAS`). | +| `03931_parallel_replicas_alias_swap.sql` | was `04293` | **Interesting variant.** Uses the canonical parallel-replicas-determinism workaround: `GROUP BY x, a1, a2 ORDER BY x` to dedupe non-deterministically distributed rows. Pattern worth recording in case future parallel-replicas tests need it. Underlying scenario doesn't reproduce after #94644. | +| `03932_distributed_alias_strict_name.sql` | was `04294` | Doesn't reproduce; #94644's insertion-order fix handles the reorder + computed column case. | + +## How to revisit + +If a future change ever undoes #94644's insertion-order guarantee, these tests +will start reproducing again. Resurrect them by copying back from this +directory into `tests/queries/0_stateless/` with fresh slot numbers via +`./tests/queries/0_stateless/add-test`. diff --git a/_local_files_and_notes/dropped_tests/README.md b/_local_files_and_notes/dropped_tests/README.md new file mode 100644 index 000000000000..e956b2ca1681 --- /dev/null +++ b/_local_files_and_notes/dropped_tests/README.md @@ -0,0 +1,39 @@ +# Dropped tests register + +This directory preserves the rationale and SQL of tests that were dropped from +upstream PRs but might be informative for future contributors. Each entry +documents: + +1. **What was dropped** — file names, commit references. +2. **Why it was dropped** — and why the scenario it claimed to test isn't + load-bearing on the current upstream. +3. **The one interesting non-redundant variant** (if any) — so a future + contributor doesn't reinvent the same shape. +4. **What's actually load-bearing** — the test (if any) that does cover the + subsystem. + +## Convention + +- Dropped SQL files are copied here verbatim (preserving the original 26.3 + slot numbers so they can be located in the source branch). +- A dated rationale Markdown sits next to them with the same prefix. + +## Entries + +### 2026-05-28: bucket D — alias-marker regression tests redundant with PR #94644 + +See [`2026-05-28-bucket-d-redundant-with-pr-94644.md`](2026-05-28-bucket-d-redundant-with-pr-94644.md). + +Six alias-marker regression tests (slots 03844, 03845, 03846, 03930, 03931, +03932 on `feature/antalya-26.3/alias_marker_fixes`; renumbered to 0428x range +on the upstream port branch `alias_marker3` before being dropped) preserved +in this directory along with their `.reference` files. + +### Older entries (pre-2026-05-28) + +Files in this directory that don't have a dated Markdown next to them +(`03924_hybrid_unknown_table_exact_schema`, `03925_distributed_alias_column_swap_without_marker`, +`03926_parallel_replicas_dod_alias_column_swap`, `03927_distributed_alias_marker_explicit_column_swap`) +were dropped during earlier iterations of the same `__aliasMarker` work, +without an accompanying note. Rationale for those lives in commit messages on +the source branch. From e562e7d0cbfe244a00ed60cc558b733b30067e41 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Thu, 28 May 2026 21:52:52 +0200 Subject: [PATCH 28/32] Planner: switch name-vs-position helper to a predicate selector makeConvertingActionsPreferNameThenPosition previously tried `MatchColumnsMode::Name` inside a try/catch and fell back to `MatchColumnsMode::Position` on `THERE_IS_NO_COLUMN` / `NUMBER_OF_COLUMNS_DOESNT_MATCH`. That works functionally but throws a C++ exception on every call site where Name cannot match by construction -- most notably on every `SELECT count() FROM mt_table` because `Optimized trivial count` emits its output column under the source-table column name (e.g. `x`) while the analyzer-expected header uses the aggregate function name (`count()`). The exception travelled the full stack-unwind path before Position rescued the conversion. Restore the predicate-based selector originally introduced in `b6cbb20e7f6` (reverted by `39a235d405c` for unclear reasons -- not a defect). `canMatchByNameWithoutAmbiguity` checks set equality of source and result column names with no duplicates in O(n) hash work and routes to `Name` only when the rename can be done by name; otherwise routes straight to `Position` and emits a `LOG_TEST` diagnostic. Behaviour is identical to the prior form on every distributed-ALIAS scenario the suite covers (Name path), and identical to upstream Position-blind behaviour on the non-alias paths the prior form was paying try/catch for. Verified locally on: - alias suite: 03921, 03928, 03930, 03931, 03932, 03933, 03228 - non-alias load-bearing: 00059_shard_global_in_mergetree (count over remote), 00754_distributed_optimize_skip_select_on_unused_shards, 00028, 00111, 00124, 01099, 02184, 02402 Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Mikhail Filimonov --- src/Planner/Utils.cpp | 124 +++++++++++------------------------------- 1 file changed, 32 insertions(+), 92 deletions(-) diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp index 42a29eea4de6..6f7340e5b414 100644 --- a/src/Planner/Utils.cpp +++ b/src/Planner/Utils.cpp @@ -84,8 +84,6 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int UNION_ALL_RESULT_STRUCTURES_MISMATCH; extern const int INTERSECT_OR_EXCEPT_RESULT_STRUCTURES_MISMATCH; - extern const int THERE_IS_NO_COLUMN; - extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; } String dumpQueryPlan(const QueryPlan & query_plan) @@ -711,63 +709,25 @@ QueryPlanStepPtr projectOnlyUsedColumns( return step; } -static void logPositionConversionMismatch( - const ColumnsWithTypeAndName & source_columns, - const ColumnsWithTypeAndName & result_columns, - const ContextPtr & context, - std::string_view location) +static bool canMatchByNameWithoutAmbiguity( + const ColumnsWithTypeAndName & source, + const ColumnsWithTypeAndName & result) { - static auto log = getLogger("PositionConversion"); - - /// Everything below is purely diagnostic; skip the work when TRACE is disabled. - if (!log->is(Poco::Message::PRIO_TRACE)) - return; - - if (source_columns.size() != result_columns.size()) - { - LOG_TRACE( - log, - "Position conversion fallback at {}. query_id={} columns_count_mismatch source={} result={} source_header=[{}] result_header=[{}]", - location, - context ? context->getCurrentQueryId() : "", - source_columns.size(), - result_columns.size(), - Block(source_columns).dumpNames(), - Block(result_columns).dumpNames()); - return; - } - - std::vector mismatches; - mismatches.reserve(source_columns.size()); + if (source.size() != result.size()) + return false; - for (size_t i = 0; i < source_columns.size(); ++i) - { - const auto & source_column = source_columns[i]; - const auto & result_column = result_columns[i]; + NameSet source_names; + NameSet result_names; - if (source_column.name == result_column.name && source_column.type->equals(*result_column.type)) - continue; + for (const auto & source_column : source) + if (!source_names.insert(source_column.name).second) + return false; - mismatches.push_back(fmt::format( - "#{} {}:{} -> {}:{}", - i, - source_column.name, - source_column.type->getName(), - result_column.name, - result_column.type->getName())); - } + for (const auto & result_column : result) + if (!result_names.insert(result_column.name).second) + return false; - if (mismatches.empty()) - return; - - LOG_TRACE( - log, - "Position conversion fallback at {}. query_id={} source_header=[{}] result_header=[{}] mismatches=[{}]", - location, - context ? context->getCurrentQueryId() : "", - Block(source_columns).dumpNames(), - Block(result_columns).dumpNames(), - fmt::join(mismatches, "; ")); + return source_names == result_names; } ActionsDAG makeConvertingActionsPreferNameThenPosition( @@ -779,49 +739,29 @@ ActionsDAG makeConvertingActionsPreferNameThenPosition( bool add_cast_columns, NameToNameMap * new_names) { - static auto log = getLogger("PositionConversion"); + const auto mode = canMatchByNameWithoutAmbiguity(source_columns, result_columns) + ? ActionsDAG::MatchColumnsMode::Name + : ActionsDAG::MatchColumnsMode::Position; - try - { - return ActionsDAG::makeConvertingActions( - source_columns, - result_columns, - ActionsDAG::MatchColumnsMode::Name, - context, - ignore_constant_values, - add_cast_columns, - new_names); - } - catch (const Exception & e) + if (mode == ActionsDAG::MatchColumnsMode::Position) { - /// Only fall back to positional matching for the cases name-matching legitimately - /// cannot handle (a column absent by name, or a differing column count - e.g. a remote - /// shard emitting an aggregate state column matched by ordinal). Any other error from - /// name-mode conversion is a genuine schema/type problem and must propagate rather than - /// be silently masked into a wrong-column association. - if (e.code() != ErrorCodes::THERE_IS_NO_COLUMN && e.code() != ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH) - throw; - - /// Positional fallback is a normal, expected path here (e.g. a remote shard emitting an - /// aggregate-state column matched by ordinal), so this stays at TRACE to avoid noise. - LOG_TRACE( + static auto log = getLogger("ConversionDiag"); + LOG_TEST( log, - "Name conversion is not possible at {}, falling back to positional matching. query_id={} reason={}", + "Position match at {} (names not matchable as a set): source_count={} result_count={}", location, - context ? context->getCurrentQueryId() : "", - e.message()); - - logPositionConversionMismatch(source_columns, result_columns, context, location); - - return ActionsDAG::makeConvertingActions( - source_columns, - result_columns, - ActionsDAG::MatchColumnsMode::Position, - context, - ignore_constant_values, - add_cast_columns, - new_names); + source_columns.size(), + result_columns.size()); } + + return ActionsDAG::makeConvertingActions( + source_columns, + result_columns, + mode, + context, + ignore_constant_values, + add_cast_columns, + new_names); } } From 956089b30736889b639e7fa34a0b1d1b8608fb40 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Fri, 29 May 2026 07:09:40 +0200 Subject: [PATCH 29/32] Nits: clarity refactor + Nested-subcolumn TODO - StorageDistributed::ReplaseAliasColumnsVisitor: capture function_node.getArguments().getNodes() in a local reference before the move-assign + index-access pair. Two separate accessor calls are safe but the original code read as indexing a moved-from container. - StorageMerge::convertAndFilterSourceStream: document the suffix-vs-prefix concern in Nested::splitName(reverse=true) as a latent-only issue. Investigated 2026-05-29: the bug does not reproduce today because Nested subcolumns appear in the child stream with their dotted name verbatim, so the alias expression resolves directly without going through the suffix-stripping bridge. Comment in code points to the proper fix (stripAnalyzerTablePrefix) if reachability changes. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Mikhail Filimonov (cherry picked from commit f323e680c2eea59e8a288b47662ca15ddfd39de9) --- src/Storages/StorageDistributed.cpp | 5 +++-- src/Storages/StorageMerge.cpp | 9 +++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 980b0bfb3995..991c2814ab7a 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -880,8 +880,9 @@ class ReplaseAliasColumnsVisitor : public InDepthQueryTreeVisitor(column_node->getColumn(), column_source)); auto alias_marker_node = std::make_shared("__aliasMarker"); - alias_marker_node->getArguments().getNodes() = std::move(arguments); - alias_marker_node->getArguments().getNodes()[0]->removeAlias(); + auto & nodes = alias_marker_node->getArguments().getNodes(); + nodes = std::move(arguments); + nodes[0]->removeAlias(); if (!output_alias.empty()) alias_marker_node->setAlias(output_alias); resolveOrdinaryFunctionNodeByName(*alias_marker_node, "__aliasMarker", context); diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index ba2fea5a9a8a..9dcbc2d67fae 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -1579,6 +1579,15 @@ void ReadFromMerge::convertAndFilterSourceStream( for (const auto & column : header) { /// Strip the `__tableN.` analyzer prefix to get the logical column name. + /// NOTE(alias-marker): `splitName(..., reverse=true)` splits on the LAST dot, + /// so `__table1.Nested.sub` yields `sub`, not `Nested.sub`. The mismatch is + /// structurally latent: investigated 2026-05-29, the bug does not reproduce + /// because Nested subcolumns appear in the child stream with their dotted + /// name verbatim (`n.a`), so the alias expression resolves against the input + /// directly without going through the suffix-stripping bridge. If a future + /// analyzer change emits Nested subcolumns under split names, route through a + /// dedicated stripAnalyzerTablePrefix helper that strips only the + /// `__tableN.` segment. auto logical_name = Nested::splitName(column.name, /*reverse=*/ true).second; if (logical_name.empty()) logical_name = column.name; From b4fb40c956ad9cedae7ea83ed811567eccb21c65 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Fri, 29 May 2026 15:15:17 +0200 Subject: [PATCH 30/32] StorageMerge: emit alias outputs under analyzer identifiers (schema-lookup) `ReadFromMerge::convertAndFilterSourceStream` mapped logical alias names to analyzer identifiers via `Nested::splitName(name, /*reverse=*/true)`, which splits on the LAST dot. For backtick-quoted dotted column names (e.g. `\`n.a\`` -> analyzer identifier `__table1.\`n.a\``) this yields the wrong logical name (`a` instead of `n.a`); the alias output was then emitted under the wrong name, the downstream `addMissingDefaults` saw the expected `__tableN.\`n.a\`` column as missing and filled it with type defaults -- silent wrong data. Replace the analyzer-naming-convention regex with a schema lookup: take the candidate set from the actual declared Merge schema (via `merge_storage_snapshot->metadata->getColumns()`) and suffix-match each declared column name against the `header` / `pipe_columns` identifier names. A header column named `.` or `.\`\`` is known to correspond to Merge column `C` because `C` is in the candidate set. This is robust to dotted column names (Nested-style and backtick-quoted) and does not hardcode the `__tableN` prefix shape. The same lookup also drives the input-side bridge that exposes child inputs under their plain names so `buildQueryTree(alias.expression)` resolves references like `a` or `n.a` against `__table1.a` / `__table1.\`n.a\``-named inputs -- required for alias-of-alias resolution such as `b ALIAS a + 1`. Pass `merge_storage_snapshot->metadata->getColumns()` to the (static) helper so it has access to the Merge schema. Add a long TODO documenting the structural awkwardness of the two-step design (rewrite the child query in `getModifiedQueryInfo`, recompute alias values here) and a potential future unification via `__aliasMarker` -- noting that the marker function call disappears at plan-build time so MergeTree index analysis on predicates over ALIAS columns is unaffected. Tests: - 04281_storage_merge_over_distributed_alias: Merge over Distributed with nested ALIAS columns (`b ALIAS a + 1` where `a ALIAS x + 1`). - 04286_dotted_alias_merge_over_distributed: Merge with explicit backtick-quoted dotted column names declared as ALIAS in the underlying storage, over a two-shard Distributed cluster -- the shape that triggers `__tableN.\`n.a\`` identifiers and uncovers the suffix-vs-prefix splitName bug. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Mikhail Filimonov --- src/Storages/StorageMerge.cpp | 166 ++++++++++++------ src/Storages/StorageMerge.h | 1 + ...age_merge_over_distributed_alias.reference | 20 +++ ...1_storage_merge_over_distributed_alias.sql | 58 ++++++ ...ted_alias_merge_over_distributed.reference | 9 + ...86_dotted_alias_merge_over_distributed.sql | 64 +++++++ 6 files changed, 265 insertions(+), 53 deletions(-) create mode 100644 tests/queries/0_stateless/04281_storage_merge_over_distributed_alias.reference create mode 100644 tests/queries/0_stateless/04281_storage_merge_over_distributed_alias.sql create mode 100644 tests/queries/0_stateless/04286_dotted_alias_merge_over_distributed.reference create mode 100644 tests/queries/0_stateless/04286_dotted_alias_merge_over_distributed.sql diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 9dcbc2d67fae..b00b5bc74109 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -817,7 +817,7 @@ std::vector ReadFromMerge::createChildrenPlans(SelectQ /// Source tables could have different but convertible types, like numeric types of different width. /// We must return streams with structure equals to structure of Merge table. - convertAndFilterSourceStream(*common_header, modified_query_info, nested_storage_snapshot, aliases, row_policy_data_opt, context, child, is_smallest_column_requested); + convertAndFilterSourceStream(*common_header, modified_query_info, nested_storage_snapshot, merge_storage_snapshot->metadata->getColumns(), aliases, row_policy_data_opt, context, child, is_smallest_column_requested); for (const auto & filter_info : pushed_down_filters) { @@ -1558,6 +1558,7 @@ void ReadFromMerge::convertAndFilterSourceStream( const Block & header, SelectQueryInfo & modified_query_info, const StorageSnapshotPtr & snapshot, + const ColumnsDescription & merge_columns, const Aliases & aliases, const RowPolicyDataOpt & row_policy_data_opt, ContextPtr local_context, @@ -1568,67 +1569,123 @@ void ReadFromMerge::convertAndFilterSourceStream( auto pipe_columns = before_block_header->getNamesAndTypesList(); + /// TODO(storage-merge-alias): the analyzer branch below is a manual reproduction of what the + /// analyzer's standard column-alias resolution would do if it ran end-to-end on the child + /// plan. It exists because of a two-step design in `getModifiedQueryInfo` + here: + /// + /// Step 1 (in `getModifiedQueryInfo`): rewrite the query going to the child storage. + /// References to ALIAS columns at the Merge level are replaced by their resolved + /// expressions via `replaceColumns(query_tree, column_name_to_node)`. The child storage + /// receives a request for the PHYSICAL columns the expressions need; it does not see the + /// alias names at all. + /// + /// Step 2 (here, `convertAndFilterSourceStream`): re-compute the alias VALUES at the + /// Merge level from those physical columns by building a fresh ActionsDAG, running + /// `QueryAnalysisPass` on each alias expression, and visiting with `PlannerActionsVisitor`. + /// Emit each alias output under the alias's analyzer identifier so the Merge target + /// header (also using analyzer identifiers) can pick it up by name. + /// + /// The structural awkwardness: alias values are computed AFTER the child's ReadFromMergeTree, + /// not before / inside it. This means predicates on ALIAS columns can only use the underlying + /// physical column for index analysis IF Step 1's `replaceColumns` happens to inline the + /// alias expression into the predicate too (which it does today), making KeyCondition see + /// `col*2 > 10` instead of `alias > 10`. Output-side aliases on the other hand are recomputed + /// here from scratch even when the child has already produced the same value (e.g. + /// Distributed children inline-evaluate alias expressions on the shard and return them as + /// expression-named output columns). The recompute is redundant for those cases. + /// + /// A natural unification would be to use `__aliasMarker(expr, identifier)` (the function + /// introduced elsewhere for distributed ALIAS-column header reconciliation) in Step 1: + /// replace each ALIAS reference with `__aliasMarker(, '')` + /// instead of the bare resolved expression. The child planner's `PlannerActionsVisitor` + /// resolves the marker at plan-build time -- the marker function call disappears from the + /// resulting ActionsDAG, leaving a normal action node that computes `` named + /// ``. So predicate / KeyCondition analysis is unaffected (it sees the underlying + /// computation graph, the marker is a planner-time naming device, not a runtime expression). + /// With this unification Step 2 here disappears entirely: pipe_columns would already carry + /// alias values under correct names, and the entire `if (allow_experimental_analyzer) { ... }` + /// block below could be deleted. + /// + /// Left as future work. The current design is correct (Step 1 + Step 2 together produce the + /// right values), just not minimal. if (local_context->getSettingsRef()[Setting::allow_experimental_analyzer]) { - /// The Merge table expects its columns under analyzer identifiers (e.g. `__table1.a`), - /// while an alias expression is keyed by its plain logical name (e.g. `a`). Map each plain - /// logical name to the unambiguous target-header identifier so the alias output below is - /// emitted under the identifier the downstream reconciliation matches by name. - std::unordered_map logical_name_to_header_name; - std::unordered_set ambiguous_logical_names; - for (const auto & column : header) - { - /// Strip the `__tableN.` analyzer prefix to get the logical column name. - /// NOTE(alias-marker): `splitName(..., reverse=true)` splits on the LAST dot, - /// so `__table1.Nested.sub` yields `sub`, not `Nested.sub`. The mismatch is - /// structurally latent: investigated 2026-05-29, the bug does not reproduce - /// because Nested subcolumns appear in the child stream with their dotted - /// name verbatim (`n.a`), so the alias expression resolves against the input - /// directly without going through the suffix-stripping bridge. If a future - /// analyzer change emits Nested subcolumns under split names, route through a - /// dedicated stripAnalyzerTablePrefix helper that strips only the - /// `__tableN.` segment. - auto logical_name = Nested::splitName(column.name, /*reverse=*/ true).second; - if (logical_name.empty()) - logical_name = column.name; - if (!logical_name_to_header_name.emplace(logical_name, column.name).second) - ambiguous_logical_names.insert(logical_name); - } - for (const auto & ambiguous : ambiguous_logical_names) - logical_name_to_header_name.erase(ambiguous); - - for (const auto & alias : aliases) + /// The Merge table expects its columns under analyzer identifiers (e.g. `__table1.a`, + /// `__table1.\`n.a\``) while alias expressions and `alias.name` reference plain logical + /// names (e.g. `a`, `n.a`). + /// + /// At this point in the pipeline the planner's TableExpressionData for the Merge node + /// is NOT yet populated (column collection happens later in CollectTableExpressionData), + /// so we cannot look up the mapping via PlannerContext. Instead, build it ourselves by + /// matching each Merge-declared column name against the suffixes of the `header` / + /// `pipe_columns` identifier names: a header column named like `.` or + /// `.\`\`` corresponds to the Merge column `C`. + /// + /// This is robust to dotted column names (Nested, backtick-quoted) because the candidate + /// set is the actual declared Merge schema rather than a regex over the analyzer's + /// naming convention. + auto build_plain_to_identifier = [&](const auto & candidate_names) { - ActionsDAG actions_dag(pipe_columns); - /// Alias expressions reference columns by their plain logical name (e.g. `a`), while the - /// child stream exposes analyzer identifiers (e.g. `__table1.a`). Add an unambiguous - /// short-name alias for each identifier input so buildQueryTree(alias.expression) can - /// resolve those references. (Required: removing this breaks alias-of-alias resolution, - /// e.g. `b ALIAS a + 1` in 03928.) - std::unordered_map short_name_to_node; - std::unordered_set ambiguous_short_names; - std::unordered_set existing_input_names; - for (const auto * input : actions_dag.getInputs()) + std::unordered_map plain_to_identifier; + std::unordered_set ambiguous; + for (const auto & column : candidate_names) { - existing_input_names.insert(input->result_name); - - auto short_name = Nested::splitName(input->result_name, /*reverse=*/ true).second; - if (short_name.empty()) + /// First try exact match (no analyzer prefix at all). + if (merge_columns.has(column.name)) + { + if (!plain_to_identifier.emplace(column.name, column.name).second) + ambiguous.insert(column.name); continue; + } - if (!short_name_to_node.emplace(short_name, input).second) - ambiguous_short_names.insert(short_name); + /// Otherwise look for the `.` or `.\`\`` shape where C is + /// a declared Merge column name. Skip any column that doesn't match a known + /// Merge column (e.g. intermediate expression outputs of the child plan). + for (const auto & merge_column : merge_columns.getAll()) + { + bool dotted = merge_column.name.find('.') != String::npos; + String want = dotted ? ("." + backQuote(merge_column.name)) : ("." + merge_column.name); + if (column.name.ends_with(want)) + { + if (!plain_to_identifier.emplace(merge_column.name, column.name).second) + ambiguous.insert(merge_column.name); + break; + } + } } + for (const auto & a : ambiguous) + plain_to_identifier.erase(a); + return plain_to_identifier; + }; - for (const auto & ambiguous_short_name : ambiguous_short_names) - short_name_to_node.erase(ambiguous_short_name); + const auto header_plain_to_identifier = build_plain_to_identifier(header); + const auto pipe_plain_to_identifier = build_plain_to_identifier(pipe_columns); - for (const auto & [short_name, input] : short_name_to_node) + for (const auto & alias : aliases) + { + ActionsDAG actions_dag(pipe_columns); + + /// Bridge: alias expressions reference columns by plain logical name, but the child + /// stream's inputs are named with analyzer identifiers. For each Merge column with a + /// known identifier in `pipe_columns`, expose it under its plain name as well so + /// `buildQueryTree(alias.expression)` resolves references like `a` or `n.a` against + /// inputs named `__table1.a` / `__table1.\`n.a\``. Required for alias-of-alias + /// resolution (e.g. `b ALIAS a + 1`, see 04283). + for (const auto & [plain, identifier] : pipe_plain_to_identifier) { - if (existing_input_names.contains(short_name)) + if (plain == identifier) continue; - - actions_dag.addAlias(*input, short_name); + const ActionsDAG::Node * input_node = nullptr; + for (const auto * candidate : actions_dag.getInputs()) + { + if (candidate->result_name == identifier) + { + input_node = candidate; + break; + } + } + if (input_node) + actions_dag.addAlias(*input_node, plain); } QueryTreeNodePtr query_tree = buildQueryTree(alias.expression, local_context); @@ -1644,8 +1701,11 @@ void ReadFromMerge::convertAndFilterSourceStream( if (nodes.size() != 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected to have 1 output but got {}", nodes.size()); - auto output_name_it = logical_name_to_header_name.find(alias.name); - const String & output_name = output_name_it != logical_name_to_header_name.end() ? output_name_it->second : alias.name; + /// Emit the alias output under its analyzer identifier so the downstream + /// `addMissingDefaults` matches it by name (otherwise the expected + /// `__tableN.\`alias.name\`` column would be filled with type defaults). + auto it = header_plain_to_identifier.find(alias.name); + const String & output_name = it != header_plain_to_identifier.end() ? it->second : alias.name; actions_dag.addOrReplaceInOutputs(actions_dag.addAlias(*nodes.front(), output_name)); auto expression_step = std::make_unique(child.plan.getCurrentHeader(), std::move(actions_dag)); child.plan.addStep(std::move(expression_step)); diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index f23c7a750a78..5e57960d4862 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -298,6 +298,7 @@ class ReadFromMerge final : public SourceStepWithFilter const Block & header, SelectQueryInfo & modified_query_info, const StorageSnapshotPtr & snapshot, + const ColumnsDescription & merge_columns, const Aliases & aliases, const RowPolicyDataOpt & row_policy_data_opt, ContextPtr context, diff --git a/tests/queries/0_stateless/04281_storage_merge_over_distributed_alias.reference b/tests/queries/0_stateless/04281_storage_merge_over_distributed_alias.reference new file mode 100644 index 000000000000..f32381f38096 --- /dev/null +++ b/tests/queries/0_stateless/04281_storage_merge_over_distributed_alias.reference @@ -0,0 +1,20 @@ +local +1 2 3 +2 3 4 +10 11 12 +merge_prefer0 +1 2 3 +2 3 4 +10 11 12 +merge_prefer1 +1 2 3 +2 3 4 +10 11 12 +merge_prefer0_plan +1 2 3 +2 3 4 +10 11 12 +merge_prefer1_plan +1 2 3 +2 3 4 +10 11 12 diff --git a/tests/queries/0_stateless/04281_storage_merge_over_distributed_alias.sql b/tests/queries/0_stateless/04281_storage_merge_over_distributed_alias.sql new file mode 100644 index 000000000000..e1ac59428d5e --- /dev/null +++ b/tests/queries/0_stateless/04281_storage_merge_over_distributed_alias.sql @@ -0,0 +1,58 @@ +-- Plain Merge over Distributed over MergeTree without an explicit __aliasMarker call. +-- Nested ALIAS columns (b contains a's subexpression). Reading the alias columns through the +-- Merge table must reconcile the child (Distributed) header by name; a positional reconciliation +-- in StorageMerge::convertAndFilterSourceStream would swap the columns (or fill them with 0). +-- The correct result equals the single-node ('local') result. +-- +-- Determinism notes: `x` is kept in GROUP BY so the ALIAS expansion can resolve it (the alias +-- expressions are defined in terms of x); GROUP BY also deduplicates the rows the two shards +-- produce, and ORDER BY x (distinct values) gives a total order independent of the distributed +-- merge order. So every block - local and the distributed variants - yields the same rows. +DROP TABLE IF EXISTS test_merge_alias_swap_merge; +DROP TABLE IF EXISTS test_merge_alias_swap_dist; +DROP TABLE IF EXISTS test_merge_alias_swap_local; + +CREATE TABLE test_merge_alias_swap_local +( + x UInt64, + a UInt64 ALIAS x + 1, + b UInt64 ALIAS a + 1 +) +ENGINE = MergeTree() +ORDER BY x; + +INSERT INTO test_merge_alias_swap_local VALUES (1), (2), (10); + +CREATE TABLE test_merge_alias_swap_dist AS test_merge_alias_swap_local +ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), test_merge_alias_swap_local); + +CREATE TABLE test_merge_alias_swap_merge +( + x UInt64, + a UInt64, + b UInt64 +) +ENGINE = Merge(currentDatabase(), '^test_merge_alias_swap_dist$'); + +SELECT 'local'; +SELECT x, a, b FROM test_merge_alias_swap_local GROUP BY x, a, b ORDER BY x; + +SELECT 'merge_prefer0'; +SELECT x, a, b FROM test_merge_alias_swap_merge GROUP BY x, a, b ORDER BY x +SETTINGS enable_analyzer = 1, prefer_localhost_replica = 0; + +SELECT 'merge_prefer1'; +SELECT x, a, b FROM test_merge_alias_swap_merge GROUP BY x, a, b ORDER BY x +SETTINGS enable_analyzer = 1, prefer_localhost_replica = 1; + +SELECT 'merge_prefer0_plan'; +SELECT x, a, b FROM test_merge_alias_swap_merge GROUP BY x, a, b ORDER BY x +SETTINGS enable_analyzer = 1, prefer_localhost_replica = 0, serialize_query_plan = 1; + +SELECT 'merge_prefer1_plan'; +SELECT x, a, b FROM test_merge_alias_swap_merge GROUP BY x, a, b ORDER BY x +SETTINGS enable_analyzer = 1, prefer_localhost_replica = 1, serialize_query_plan = 1; + +DROP TABLE test_merge_alias_swap_merge; +DROP TABLE test_merge_alias_swap_dist; +DROP TABLE test_merge_alias_swap_local; diff --git a/tests/queries/0_stateless/04286_dotted_alias_merge_over_distributed.reference b/tests/queries/0_stateless/04286_dotted_alias_merge_over_distributed.reference new file mode 100644 index 000000000000..97dd73615024 --- /dev/null +++ b/tests/queries/0_stateless/04286_dotted_alias_merge_over_distributed.reference @@ -0,0 +1,9 @@ +local +1 10 100 +2 20 200 +merge_prefer0 +1 10 100 +2 20 200 +merge_prefer1 +1 10 100 +2 20 200 diff --git a/tests/queries/0_stateless/04286_dotted_alias_merge_over_distributed.sql b/tests/queries/0_stateless/04286_dotted_alias_merge_over_distributed.sql new file mode 100644 index 000000000000..c42ad63adab4 --- /dev/null +++ b/tests/queries/0_stateless/04286_dotted_alias_merge_over_distributed.sql @@ -0,0 +1,64 @@ +-- Regression for the StorageMerge alias-output-naming fix. +-- The bug: `Nested::splitName(name, reverse=true)` (used before this fix to strip the +-- analyzer's `__tableN.` prefix from header column names) splits on the LAST dot, so for +-- an analyzer identifier like `__table1.\`n.a\`` (a dotted column name wrapped in backticks +-- by the analyzer) it returns the suffix `a\`` instead of `n.a`, leaving the +-- `logical_name_to_header_name` map with broken keys. The lookup for `alias.name == "n.a"` +-- then misses, the alias output falls back to the bare name `n.a`, and the downstream +-- header-reconciliation step fills the expected `__table1.\`n.a\`` column with type +-- defaults (zeros). Silent wrong data. +-- +-- Repro shape: Merge declares dotted column names explicitly (typical when matching a +-- schema with Nested-style names), underlying storage has those columns as ALIAS, and +-- the Distributed routing forces analyzer-prefixed names in the Merge level. Using a +-- two-shard cluster with prefer_localhost_replica=0 reliably reproduces. + +DROP TABLE IF EXISTS test_04286_dotted_alias_local; +DROP TABLE IF EXISTS test_04286_dotted_alias_dist; +DROP TABLE IF EXISTS test_04286_dotted_alias_merge; + +CREATE TABLE test_04286_dotted_alias_local +( + id UInt32, + `n.a` UInt32 ALIAS id * 10, + `m.b` UInt32 ALIAS id * 100 +) +ENGINE = MergeTree +ORDER BY id; + +INSERT INTO test_04286_dotted_alias_local VALUES (1), (2); + +CREATE TABLE test_04286_dotted_alias_dist AS test_04286_dotted_alias_local +ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), test_04286_dotted_alias_local); + +CREATE TABLE test_04286_dotted_alias_merge +( + id UInt32, + `n.a` UInt32, + `m.b` UInt32 +) +ENGINE = Merge(currentDatabase(), '^test_04286_dotted_alias_dist$'); + +SELECT 'local'; +SELECT id, `n.a`, `m.b` +FROM test_04286_dotted_alias_local +GROUP BY id, `n.a`, `m.b` +ORDER BY id; + +SELECT 'merge_prefer0'; +SELECT id, `n.a`, `m.b` +FROM test_04286_dotted_alias_merge +GROUP BY id, `n.a`, `m.b` +ORDER BY id +SETTINGS prefer_localhost_replica = 0; + +SELECT 'merge_prefer1'; +SELECT id, `n.a`, `m.b` +FROM test_04286_dotted_alias_merge +GROUP BY id, `n.a`, `m.b` +ORDER BY id +SETTINGS prefer_localhost_replica = 1; + +DROP TABLE test_04286_dotted_alias_merge; +DROP TABLE test_04286_dotted_alias_dist; +DROP TABLE test_04286_dotted_alias_local; From b17801431930a39df77fa511e7a1cbe2bf9b548c Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Fri, 29 May 2026 13:53:41 +0200 Subject: [PATCH 31/32] Planner: include column-name lists in Position-fallback LOG_TEST diagnostic (cherry picked from commit bfbf06fa04a07daafc115801ef81ae7deb66ff57) --- src/Planner/Utils.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp index 6f7340e5b414..36f8cafa312d 100644 --- a/src/Planner/Utils.cpp +++ b/src/Planner/Utils.cpp @@ -1,5 +1,7 @@ #include +#include + #include #include #include @@ -745,13 +747,13 @@ ActionsDAG makeConvertingActionsPreferNameThenPosition( if (mode == ActionsDAG::MatchColumnsMode::Position) { - static auto log = getLogger("ConversionDiag"); + static auto log = getLogger("Planner"); LOG_TEST( log, - "Position match at {} (names not matchable as a set): source_count={} result_count={}", + "Position match at {} (names not matchable as a set): source=[{}] result=[{}]", location, - source_columns.size(), - result_columns.size()); + Block(source_columns).dumpNames(), + Block(result_columns).dumpNames()); } return ActionsDAG::makeConvertingActions( From 2cb9bd962a712702b4c34f21b8bcb969da897b8d Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Fri, 29 May 2026 15:25:59 +0200 Subject: [PATCH 32/32] StorageMerge: accept raw-dot dotted ALIAS identifiers in schema lookup The previous commit assumed analyzer identifiers for dotted Merge columns are backtick-quoted (e.g. `__tableN.\`n.a\``). The 26.3 analyzer emits them raw (`__tableN.n.a`). 04286_dotted_alias_merge_over_distributed exposed this: header matching only the backtick-quoted form missed the raw-dot identifier, the alias output was emitted under the wrong name and `addMissingDefaults` filled the expected column with type defaults (silent wrong data: `n.a` came back as 0). Try both forms in the suffix match: `.` and (only for dotted column names) `.\`\``. Either match accepts the analyzer identifier as corresponding to the Merge column `C`. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Mikhail Filimonov --- src/Storages/StorageMerge.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index b00b5bc74109..be02782ce62c 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -1641,11 +1641,15 @@ void ReadFromMerge::convertAndFilterSourceStream( /// Otherwise look for the `.` or `.\`\`` shape where C is /// a declared Merge column name. Skip any column that doesn't match a known /// Merge column (e.g. intermediate expression outputs of the child plan). + /// The analyzer's quoting of dotted column names varies between branches: some + /// produce `__tableN.\`n.a\`` (backtick-quoted), others `__tableN.n.a` (raw). + /// Try both. for (const auto & merge_column : merge_columns.getAll()) { bool dotted = merge_column.name.find('.') != String::npos; - String want = dotted ? ("." + backQuote(merge_column.name)) : ("." + merge_column.name); - if (column.name.ends_with(want)) + String want_raw = "." + merge_column.name; + String want_quoted = dotted ? ("." + backQuote(merge_column.name)) : want_raw; + if (column.name.ends_with(want_quoted) || (dotted && column.name.ends_with(want_raw))) { if (!plain_to_identifier.emplace(merge_column.name, column.name).second) ambiguous.insert(merge_column.name);