From cbb4163c5c2d77d419e390051f3187bb643e18c3 Mon Sep 17 00:00:00 2001 From: Adriano dos Santos Fernandes Date: Tue, 2 Jun 2026 08:22:03 -0300 Subject: [PATCH 1/2] Feature #9047 - Add GROUPS unit for window frames --- doc/sql.extensions/README.window_functions.md | 12 +- src/common/ParserTokens.h | 1 + src/dsql/ExprNodes.h | 4 +- src/dsql/parse.y | 5 + src/include/firebird/impl/msg/jrd.h | 2 +- src/jrd/RecordSourceNodes.cpp | 1 + src/jrd/recsrc/RecordSource.h | 3 + src/jrd/recsrc/WindowedStream.cpp | 114 ++++++++++++++++-- 8 files changed, 127 insertions(+), 15 deletions(-) diff --git a/doc/sql.extensions/README.window_functions.md b/doc/sql.extensions/README.window_functions.md index 600c505c678..6c4ab628b33 100644 --- a/doc/sql.extensions/README.window_functions.md +++ b/doc/sql.extensions/README.window_functions.md @@ -23,7 +23,7 @@ Syntax: ORDER BY [] [] [, [] []] ... ::= - {RANGE | ROWS} [] + {RANGE | ROWS | GROUPS} [] ::= { | } @@ -272,17 +272,19 @@ And the result set: It's possible to specify the frame that some window functions work. The frame is divided in three piecies: unit, start bound and end bound. -The unit `RANGE` or `ROWS` defines how the bounds ` PRECEDING`, ` FOLLOWING` and `CURRENT ROW` works. +The unit `RANGE`, `ROWS` or `GROUPS` defines how the bounds ` PRECEDING`, ` FOLLOWING` and `CURRENT ROW` works. With `RANGE`, the `ORDER BY` should specify only one expression, and that expression should be of a numeric, date, time or timestamp type. For ` PRECEDING` and ` FOLLOWING` bounds, `` is respectively subtracted or added to the order expression, and for `CURRENT ROW` only the order expression is used. Then, all rows (inside the partition) between the bounds are considered part of the resulting window frame. With `ROWS`, order expressions is not limited by number or types. In this case, ` PRECEDING`, ` FOLLOWING` and `CURRENT ROW` relates to the row position under the partition, and not to the order keys values. -`UNBOUNDED PRECEDING` and `UNBOUNDED FOLLOWING` work identically with `RANGE` and `ROWS`. `UNBOUNDED PRECEDING` looks for the first row and `UNBOUNDED FOLLOWING` the last one, always inside the partition. +With `GROUPS`, order expressions are not limited by number or types. In this case, ` PRECEDING`, ` FOLLOWING` and `CURRENT ROW` relate to peer groups under the partition. Peers are rows with the same `ORDER BY` values. If there is no `ORDER BY`, all rows in the partition are peers and the partition has one group. + +`UNBOUNDED PRECEDING` and `UNBOUNDED FOLLOWING` work identically with `RANGE`, `ROWS` and `GROUPS`. `UNBOUNDED PRECEDING` looks for the first row and `UNBOUNDED FOLLOWING` the last one, always inside the partition. The frame syntax with `` specifies the start frame, with the end frame being `CURRENT ROW`. -The optional frame exclusion clause (FB 6.0) is part of the frame clause and removes rows from the frame after its bounds have been evaluated. It can only be specified together with an explicit `ROWS` or `RANGE` frame. `EXCLUDE NO OTHERS` is the default and keeps the frame unchanged. +The optional frame exclusion clause (FB 6.0) is part of the frame clause and removes rows from the frame after its bounds have been evaluated. It can only be specified together with an explicit `ROWS`, `RANGE` or `GROUPS` frame. `EXCLUDE NO OTHERS` is the default and keeps the frame unchanged. `EXCLUDE CURRENT ROW` removes only the current row from the frame. @@ -377,7 +379,7 @@ Some window functions discard frames and frame exclusions. `ROW_NUMBER`, `LAG` a ## 6. Named windows (FB 4.0) -To avoid write repetitive or confusing expressions, windows can be named in a query with the `WINDOW` clause. A named window can be used in `OVER` to reference a window definition and can also be used as a base window of another named or inline (`OVER`) window. A window with frame (`ROWS` or `RANGE` clauses) can't be used as base window (but can be used with `OVER `). And a window with a base window can't have `PARTITION BY` nor can override `ORDER BY` of a base window. +To avoid write repetitive or confusing expressions, windows can be named in a query with the `WINDOW` clause. A named window can be used in `OVER` to reference a window definition and can also be used as a base window of another named or inline (`OVER`) window. A window with frame (`ROWS`, `RANGE` or `GROUPS` clauses) can't be used as base window (but can be used with `OVER `). And a window with a base window can't have `PARTITION BY` nor can override `ORDER BY` of a base window. In a query with multiple `SELECT` and `WINDOW` clauses (for example, with subqueries), the window name scope is bound only to its query context, that is, a window name from an inner or outer context could not be used in another context. As such, the same window name definition could be used at different contexts. diff --git a/src/common/ParserTokens.h b/src/common/ParserTokens.h index 8dfd004bc2e..3b2d30246da 100644 --- a/src/common/ParserTokens.h +++ b/src/common/ParserTokens.h @@ -247,6 +247,7 @@ PARSER_TOKEN(TOK_GRANT, "GRANT", false) PARSER_TOKEN(TOK_GRANTED, "GRANTED", true) PARSER_TOKEN(TOK_GREATEST, "GREATEST", false) PARSER_TOKEN(TOK_GROUP, "GROUP", false) +PARSER_TOKEN(TOK_GROUPS, "GROUPS", false) PARSER_TOKEN(TOK_HASH, "HASH", true) PARSER_TOKEN(TOK_HAVING, "HAVING", false) PARSER_TOKEN(TOK_HEX_DECODE, "HEX_DECODE", true) diff --git a/src/dsql/ExprNodes.h b/src/dsql/ExprNodes.h index 2f167e27fec..1929ea36e05 100644 --- a/src/dsql/ExprNodes.h +++ b/src/dsql/ExprNodes.h @@ -1492,8 +1492,8 @@ class WindowClause final : public DsqlNode FORMAT %token GENERATE_SERIES %token GREATEST +%token GROUPS %token LEAST %token LISTAGG %token LTRIM @@ -9003,6 +9004,10 @@ window_frame_extent { $$ = newNode(WindowClause::FrameExtent::Unit::RANGE); } window_frame($2) { $$ = $2; } + | GROUPS + { $$ = newNode(WindowClause::FrameExtent::Unit::GROUPS); } + window_frame($2) + { $$ = $2; } | ROWS { $$ = newNode(WindowClause::FrameExtent::Unit::ROWS); } window_frame($2) diff --git a/src/include/firebird/impl/msg/jrd.h b/src/include/firebird/impl/msg/jrd.h index 340fd1fafdb..a7bca8444db 100644 --- a/src/include/firebird/impl/msg/jrd.h +++ b/src/include/firebird/impl/msg/jrd.h @@ -797,7 +797,7 @@ FB_IMPL_MSG(JRD, 795, no_cursor, -901, "07", "005", "Cannot open cursor for non- FB_IMPL_MSG(JRD, 796, dsql_window_incompat_frames, -104, "42", "000", "If specifies @1, then shall not specify @2") FB_IMPL_MSG(JRD, 797, dsql_window_range_multi_key, -104, "42", "000", "RANGE based window with {PRECEDING | FOLLOWING} cannot have ORDER BY with more than one value") FB_IMPL_MSG(JRD, 798, dsql_window_range_inv_key_type, -104, "42", "000", "RANGE based window with PRECEDING/FOLLOWING must have a single ORDER BY key of numerical, date, time or timestamp types") -FB_IMPL_MSG(JRD, 799, dsql_window_frame_value_inv_type, -104, "42", "000", "Window RANGE/ROWS PRECEDING/FOLLOWING value must be of a numerical type") +FB_IMPL_MSG(JRD, 799, dsql_window_frame_value_inv_type, -104, "42", "000", "Window RANGE/ROWS/GROUPS PRECEDING/FOLLOWING value must be of a numerical type") FB_IMPL_MSG(JRD, 800, window_frame_value_invalid, -833, "42", "000", "Invalid PRECEDING or FOLLOWING offset in window function: cannot be negative") FB_IMPL_MSG(JRD, 801, dsql_window_not_found, -833, "42", "000", "Window @1 not found") FB_IMPL_MSG(JRD, 802, dsql_window_cant_overr_part, -833, "42", "000", "Cannot use PARTITION BY clause while overriding the window @1") diff --git a/src/jrd/RecordSourceNodes.cpp b/src/jrd/RecordSourceNodes.cpp index d1648297484..e89bb17506c 100644 --- a/src/jrd/RecordSourceNodes.cpp +++ b/src/jrd/RecordSourceNodes.cpp @@ -2647,6 +2647,7 @@ void WindowSourceNode::parseWindow(thread_db* tdbb, CompilerScratch* csb) { case WindowClause::FrameExtent::Unit::RANGE: case WindowClause::FrameExtent::Unit::ROWS: + case WindowClause::FrameExtent::Unit::GROUPS: break; default: diff --git a/src/jrd/recsrc/RecordSource.h b/src/jrd/recsrc/RecordSource.h index 712505e9b0d..e85064bc5d8 100644 --- a/src/jrd/recsrc/RecordSource.h +++ b/src/jrd/recsrc/RecordSource.h @@ -1064,6 +1064,9 @@ namespace Jrd SINT64 locateFrameRange(thread_db* tdbb, Request* request, Impure* impure, const Frame* frame, const dsc* offsetDesc, SINT64 position) const; + SINT64 locateFrameGroups(thread_db* tdbb, Request* request, Impure* impure, + const Frame* frame, const impure_value_ex* offsetValue, SINT64 position, + bool startFrame) const; private: NestConst m_order; diff --git a/src/jrd/recsrc/WindowedStream.cpp b/src/jrd/recsrc/WindowedStream.cpp index c5990ecaee0..1094976a070 100644 --- a/src/jrd/recsrc/WindowedStream.cpp +++ b/src/jrd/recsrc/WindowedStream.cpp @@ -225,7 +225,9 @@ WindowedStream::WindowedStream(thread_db* tdbb, Optimizer* opt, { // While here, verify not supported functions/clauses. - if (window.order || window.frameExtent->unit == FrameExtent::Unit::ROWS) + if (window.order || + window.frameExtent->unit == FrameExtent::Unit::ROWS || + window.frameExtent->unit == FrameExtent::Unit::GROUPS) { for (const auto& source : window.map->sourceList) { @@ -241,7 +243,9 @@ WindowedStream::WindowedStream(thread_db* tdbb, Optimizer* opt, if (arg) { string msg; - msg.printf("%s is not supported in windows with ORDER BY or frame by ROWS clauses", arg); + msg.printf( + "%s is not supported in windows with ORDER BY or frame by ROWS/GROUPS clauses", + arg); status_exception::raise( Arg::Gds(isc_wish_list) << @@ -660,7 +664,7 @@ bool WindowedStream::WindowStream::internalGetRecord(thread_db* tdbb) const if (m_frameExtent->frame1->value && !(m_invariantOffsets & 0x1)) getFrameValue(tdbb, request, m_frameExtent->frame1, &impure->startOffset); - // {range | rows} between unbounded preceding and ... + // {range | rows | groups} between unbounded preceding and ... // (no order by) range if ((m_frameExtent->frame1->bound == Frame::Bound::PRECEDING && !m_frameExtent->frame1->value) || (!m_order && m_frameExtent->unit == FrameExtent::Unit::RANGE)) @@ -673,12 +677,26 @@ bool WindowedStream::WindowStream::internalGetRecord(thread_db* tdbb) const { impure->windowBlock.startPosition = position; } + // groups between current row and ... + else if (m_frameExtent->unit == FrameExtent::Unit::GROUPS && + m_frameExtent->frame1->bound == Frame::Bound::CURRENT_ROW) + { + impure->windowBlock.startPosition = locateFrameGroups(tdbb, request, impure, + m_frameExtent->frame1, nullptr, position, true); + } // rows between {preceding | following} and ... else if (m_frameExtent->unit == FrameExtent::Unit::ROWS && m_frameExtent->frame1->value) { impure->windowBlock.startPosition = position + impure->startOffset.vlux_count; } + // groups between {preceding | following} and ... + else if (m_frameExtent->unit == FrameExtent::Unit::GROUPS && + m_frameExtent->frame1->value) + { + impure->windowBlock.startPosition = locateFrameGroups(tdbb, request, impure, + m_frameExtent->frame1, &impure->startOffset, position, true); + } // range between current row and ... else if (m_frameExtent->unit == FrameExtent::Unit::RANGE && m_frameExtent->frame1->bound == Frame::Bound::CURRENT_ROW) @@ -703,7 +721,7 @@ bool WindowedStream::WindowStream::internalGetRecord(thread_db* tdbb) const if (m_frameExtent->frame2->value && !(m_invariantOffsets & 0x2)) getFrameValue(tdbb, request, m_frameExtent->frame2, &impure->endOffset); - // {range | rows} between ... and unbounded following + // {range | rows | groups} between ... and unbounded following // (no order by) range if ((m_frameExtent->frame2->bound == Frame::Bound::FOLLOWING && !m_frameExtent->frame2->value) || (!m_order && m_frameExtent->unit == FrameExtent::Unit::RANGE)) @@ -716,12 +734,26 @@ bool WindowedStream::WindowStream::internalGetRecord(thread_db* tdbb) const { impure->windowBlock.endPosition = position; } + // groups between ... and current row + else if (m_frameExtent->unit == FrameExtent::Unit::GROUPS && + m_frameExtent->frame2->bound == Frame::Bound::CURRENT_ROW) + { + impure->windowBlock.endPosition = locateFrameGroups(tdbb, request, impure, + m_frameExtent->frame2, nullptr, position, false); + } // rows between ... and {preceding | following} else if (m_frameExtent->unit == FrameExtent::Unit::ROWS && m_frameExtent->frame2->value) { impure->windowBlock.endPosition = position + impure->endOffset.vlux_count; } + // groups between ... and {preceding | following} + else if (m_frameExtent->unit == FrameExtent::Unit::GROUPS && + m_frameExtent->frame2->value) + { + impure->windowBlock.endPosition = locateFrameGroups(tdbb, request, impure, + m_frameExtent->frame2, &impure->endOffset, position, false); + } // range between ... and current row else if (m_frameExtent->unit == FrameExtent::Unit::RANGE && m_frameExtent->frame2->bound == Frame::Bound::CURRENT_ROW) @@ -765,11 +797,14 @@ bool WindowedStream::WindowStream::internalGetRecord(thread_db* tdbb) const if (m_exclusion == Exclusion::NO_OTHERS && ((m_frameExtent->frame1->bound == Frame::Bound::PRECEDING && !m_frameExtent->frame1->value && m_frameExtent->frame2->bound == Frame::Bound::FOLLOWING && !m_frameExtent->frame2->value) || - (m_frameExtent->unit == FrameExtent::Unit::RANGE && !m_order))) + ((m_frameExtent->unit == FrameExtent::Unit::RANGE || + m_frameExtent->unit == FrameExtent::Unit::GROUPS) && !m_order))) { impure->rangePending = MAX(0, impure->windowBlock.endPosition - position); } - else if (m_exclusion == Exclusion::NO_OTHERS && m_frameExtent->unit == FrameExtent::Unit::RANGE) + else if (m_exclusion == Exclusion::NO_OTHERS && + (m_frameExtent->unit == FrameExtent::Unit::RANGE || + m_frameExtent->unit == FrameExtent::Unit::GROUPS)) { SINT64 rangePos = position; cacheValues(tdbb, request, &m_order->expressions, impure->orderValues, @@ -1006,7 +1041,8 @@ void WindowedStream::WindowStream::getFrameValue(thread_db* tdbb, Request* reque error = true; else { - if (m_frameExtent->unit == FrameExtent::Unit::ROWS) + if (m_frameExtent->unit == FrameExtent::Unit::ROWS || + m_frameExtent->unit == FrameExtent::Unit::GROUPS) { // Purposedly used 32-bit here. So long distance will complicate things for no gain. impureValue->vlux_count = MOV_get_long(tdbb, desc, 0); @@ -1044,6 +1080,14 @@ WindowedStream::WindowStream::Block WindowedStream::WindowStream::getPeerBlock( return peerBlock; } + if ((SINT64) m_next->getPosition(request) != position + 1) + { + m_next->locate(tdbb, position); + + if (!m_next->getRecord(tdbb)) + fb_assert(false); + } + cacheValues(tdbb, request, &m_order->expressions, impure->orderValues, DummyAdjustFunctor()); while (peerBlock.startPosition > impure->partitionBlock.startPosition) @@ -1145,6 +1189,62 @@ bool WindowedStream::WindowStream::isExcluded(SINT64 position, const Block& excl position >= exclusion2.startPosition && position <= exclusion2.endPosition); } +SINT64 WindowedStream::WindowStream::locateFrameGroups(thread_db* tdbb, Request* request, + Impure* impure, const Frame* frame, const impure_value_ex* offsetValue, SINT64 position, + bool startFrame) const +{ + SINT64 offset = 0; + + if (offsetValue) + { + offset = MOV_get_long(tdbb, &offsetValue->vlu_desc, 0); + + if (frame->bound == Frame::Bound::PRECEDING) + offset = -offset; + } + + Block groupBlock = getPeerBlock(tdbb, request, impure, position); + + auto restoreAndReturn = [&] (SINT64 result) + { + m_next->locate(tdbb, position); + + if (!m_next->getRecord(tdbb)) + fb_assert(false); + + return result; + }; + + if (offset < 0) + { + for (SINT64 pending = -offset; pending > 0; --pending) + { + if (groupBlock.startPosition <= impure->partitionBlock.startPosition) + { + return restoreAndReturn(startFrame ? + impure->partitionBlock.startPosition : impure->partitionBlock.startPosition - 1); + } + + groupBlock = getPeerBlock(tdbb, request, impure, groupBlock.startPosition - 1); + } + } + else if (offset > 0) + { + for (SINT64 pending = offset; pending > 0; --pending) + { + if (groupBlock.endPosition >= impure->partitionBlock.endPosition) + { + return restoreAndReturn(startFrame ? + impure->partitionBlock.endPosition + 1 : impure->partitionBlock.endPosition); + } + + groupBlock = getPeerBlock(tdbb, request, impure, groupBlock.endPosition + 1); + } + } + + return restoreAndReturn(startFrame ? groupBlock.startPosition : groupBlock.endPosition); +} + SINT64 WindowedStream::WindowStream::locateFrameRange(thread_db* tdbb, Request* request, Impure* impure, const Frame* frame, const dsc* offsetDesc, SINT64 position) const { From adb717e2461f84666286572e8ed9a7f9bd415e55 Mon Sep 17 00:00:00 2001 From: Adriano dos Santos Fernandes Date: Tue, 2 Jun 2026 22:16:31 -0300 Subject: [PATCH 2/2] Add GROUPS to keyword_or_column --- src/dsql/parse.y | 1 + 1 file changed, 1 insertion(+) diff --git a/src/dsql/parse.y b/src/dsql/parse.y index a2d2331f987..ca070f43707 100644 --- a/src/dsql/parse.y +++ b/src/dsql/parse.y @@ -4887,6 +4887,7 @@ keyword_or_column | CALL | CURRENT_SCHEMA | GREATEST + | GROUPS | LEAST | LISTAGG | LTRIM