From d39fb5e8168510742e6df54a79dc6eb3cc7be56b Mon Sep 17 00:00:00 2001 From: Anton Date: Sat, 13 Jun 2026 16:07:45 +0200 Subject: [PATCH 1/3] fix(iter): remove broken auto-swap in map-left/map-right The auto-swap heuristic in map-left/map-right fired when the iterated slot was scalar and the fixed slot was a vector, and in that branch each function performed the OTHER variant's semantics (the author's own comment flagged it: "but we want fn(fixed=scalar, elem)"). The plain paths already match the documented convention (map-left fixes the left arg and iterates the right; map-right fixes the right and iterates the left), and map_iterate handles scalar broadcast via its scalar early-return. Removing the auto-swap blocks makes the two ops clean mirror images with no scalar/vector special-casing. --- src/ops/collection.c | 36 ++++++++++++------------------------ 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/src/ops/collection.c b/src/ops/collection.c index fa99b530..f9f5410a 100644 --- a/src/ops/collection.c +++ b/src/ops/collection.c @@ -2200,38 +2200,26 @@ static ray_t* map_iterate(ray_t* fn, ray_t* fixed, ray_t* vec, int fixed_is_left return out; } -/* (map-left fn fixed vec) → apply fn(fixed, elem) for each elem in vec. - * If vec is scalar but fixed is a vector, auto-swap (iterate over fixed). */ +/* (map-left fn left right) → fix the LEFT arg, iterate over the right: + * apply fn(left, right_i) for each element of right. If right is scalar this + * collapses to a single fn(left, right) (handled by map_iterate). */ ray_t* ray_map_left_fn(ray_t** args, int64_t n) { if (n != 3) return ray_error("domain", NULL); ray_t* fn = args[0]; - ray_t* fixed = args[1]; - ray_t* vec = args[2]; - - /* Auto-detect: if vec is scalar but fixed is a vector, swap roles */ - if (!ray_is_vec(vec) && vec->type != RAY_LIST && - (ray_is_vec(fixed) || fixed->type == RAY_LIST)) { - return map_iterate(fn, vec, fixed, 0); /* fn(elem_of_fixed, vec) — but we want fn(fixed=scalar, elem) */ - } - - return map_iterate(fn, fixed, vec, 1); /* fn(fixed, elem) */ + ray_t* left = args[1]; + ray_t* right = args[2]; + return map_iterate(fn, left, right, 1); /* fn(left, right_i) */ } -/* (map-right fn vec fixed) → apply fn(elem, fixed) for each elem in vec. - * If vec is scalar but fixed is a vector, auto-swap (iterate over fixed). */ +/* (map-right fn left right) → fix the RIGHT arg, iterate over the left: + * apply fn(left_i, right) for each element of left. If left is scalar this + * collapses to a single fn(left, right) (handled by map_iterate). */ ray_t* ray_map_right_fn(ray_t** args, int64_t n) { if (n != 3) return ray_error("domain", NULL); ray_t* fn = args[0]; - ray_t* vec = args[1]; - ray_t* fixed = args[2]; - - /* Auto-detect: if vec is scalar but fixed is a vector, swap roles */ - if (!ray_is_vec(vec) && vec->type != RAY_LIST && - (ray_is_vec(fixed) || fixed->type == RAY_LIST)) { - return map_iterate(fn, vec, fixed, 1); /* fn(vec_scalar, elem_of_fixed) */ - } - - return map_iterate(fn, fixed, vec, 0); /* fn(elem, fixed) */ + ray_t* left = args[1]; + ray_t* right = args[2]; + return map_iterate(fn, right, left, 0); /* fn(left_i, right) */ } /* ══════════════════════════════════════════ From 5db077d5b052ff592c7db1564ee9b78865f69860 Mon Sep 17 00:00:00 2001 From: Anton Date: Sat, 13 Jun 2026 16:30:42 +0200 Subject: [PATCH 2/3] fix(table): typeless empty () column adopts type on first insert MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An empty generic list () used as a column was coerced to I64 in ray_table_fn, so the column rejected any non-integer atom on insert (float/sym/str/date → error: type). q/kdb treats () as a typeless column that adopts the type of the first inserted value. - ray_table_fn: an empty () list column is now stored as an empty RAY_LIST (typeless) instead of an empty I64 vector. - insert: when the target column is a typeless empty RAY_LIST, derive the storage type from the inserted value (atom → its type; typed vec or generic atom-list → element type with I64→F64 promotion; nested payload → stays LIST). The table has 0 rows in this case, so there is nothing to copy and the derived type drives the new column. Also corrects test/rfl/table/query.rfl:11, whose expected empty-select result wrote (list) for the STR Tape column; that only matched before because () was coerced to an empty I64 vec and empty vectors compared equal regardless of element type. The column is STR, so its empty form is (as 'STR []). Adds regression tests in table/update.rfl for first-insert type adoption across F64/SYM/STR/DATE/TIME and repeated typed inserts. --- src/lang/eval.c | 11 +++++++++++ src/ops/query.c | 28 ++++++++++++++++++++++++++++ test/rfl/table/query.rfl | 2 +- test/rfl/table/update.rfl | 11 +++++++++++ 4 files changed, 51 insertions(+), 1 deletion(-) diff --git a/src/lang/eval.c b/src/lang/eval.c index 19c31c8c..a0ec8d0c 100644 --- a/src/lang/eval.c +++ b/src/lang/eval.c @@ -1330,6 +1330,17 @@ ray_t* ray_table_fn(ray_t* names, ray_t* cols) { else if (nrows != expected_rows) { ray_release(tbl); if (_bxn) ray_release(_bxn); if (_bxc) ray_release(_bxc); return ray_error("domain", NULL); } + /* Empty generic list → typeless empty column: keep it as a RAY_LIST + * so its storage type is adopted from the first inserted value + * (q-style () column), rather than defaulting to I64. */ + if (nrows == 0) { + ray_retain(col_src); + tbl = ray_table_add_col(tbl, name_id, col_src); + ray_release(col_src); + if (RAY_IS_ERR(tbl)) { if (_bxn) ray_release(_bxn); if (_bxc) ray_release(_bxc); return tbl; } + continue; + } + ray_t** row_elems = (ray_t**)ray_data(col_src); /* If the LIST contains non-atom values (e.g. nested vectors for an diff --git a/src/ops/query.c b/src/ops/query.c index 092d9300..b5d67bd2 100644 --- a/src/ops/query.c +++ b/src/ops/query.c @@ -9197,6 +9197,28 @@ ray_t* ray_xbar_fn(ray_t* col, ray_t* bucket) { * Update, Insert, Upsert * ══════════════════════════════════════════ */ +/* Derive the storage type for a typeless (empty RAY_LIST) column from the + * first value inserted into it — q-style () columns adopt their type on the + * first insert. Returns the RAY_* column type, or RAY_LIST when the payload + * is itself nested (non-atom elements → a genuine list column). */ +static int8_t typeless_col_type(ray_t* payload) { + if (!payload) return RAY_I64; /* null row → default I64 */ + if (ray_is_atom(payload)) return -payload->type; + if (ray_is_vec(payload)) return payload->type; /* typed vec → splice */ + if (payload->type == RAY_LIST) { + int64_t m = ray_len(payload); + if (m == 0) return RAY_LIST; /* empty payload → stay typeless */ + ray_t** e = (ray_t**)ray_data(payload); + if (e[0] && !ray_is_atom(e[0])) return RAY_LIST; /* nested cells */ + int8_t t = e[0] ? (int8_t)(-e[0]->type) : RAY_I64; + if (t == RAY_I64) /* promote to F64 if any element is float */ + for (int64_t k = 0; k < m; k++) + if (e[k] && e[k]->type == -RAY_F64) { t = RAY_F64; break; } + return t; + } + return RAY_I64; +} + /* Helper: convert a Rayfall list of atoms into a typed column vector by * appending to an existing column (for insert/upsert). */ static ray_t* append_atom_to_col(ray_t* col_vec, ray_t* atom) { @@ -10412,6 +10434,12 @@ ray_t* ray_insert(ray_t** args, int64_t n) { ray_t* orig_col = ray_table_get_col_idx(tbl, c); int8_t ct = orig_col->type; + /* Typeless empty column (an empty () list, adopt-on-first-insert). + * The table has 0 rows here, so there is nothing to copy and the + * derived type drives the new column's storage. */ + if (ct == RAY_LIST && ray_len(orig_col) == 0) + ct = typeless_col_type(row_elems[c]); + ray_t* new_col = ray_vec_new(ct, nrows + 1); if (RAY_IS_ERR(new_col)) { ray_release(result); return new_col; } diff --git a/test/rfl/table/query.rfl b/test/rfl/table/query.rfl index 40784fc0..236f7381 100644 --- a/test/rfl/table/query.rfl +++ b/test/rfl/table/query.rfl @@ -8,7 +8,7 @@ (set n 10)(set gds (take (guid 3) n))(set t (table [OrderId Symbol Price Size Tape Timestamp](list gds(take [apll good msfk ibmd amznt fbad baba] n)(as 'F64 (til n))(take (+ 1 (til 3)) n)(map (fn [x] (as 'STR x)) (take (til 10) n))(as 'TIMESTAMP (til n)))))null -- null (select {from: t by: Symbol}) -- (table [Symbol OrderId Price Size Tape Timestamp](list [apll good msfk ibmd amznt fbad baba](at gds (til 7)) [0 1 2 3 4 5 6.0] [1 2 3 1 2 3 1](list "0""1""2""3""4""5""6")(at (at t 'Timestamp) (til 7)))) (select {from: t by: Symbol where: (== Price 3)}) -- (table [Symbol OrderId Price Size Tape Timestamp](list [ibmd] (at gds 3) [3.00] [1] (list "3") [2000.01.01D00:00:00.000000003])) -(select {from: t by: Symbol where: (== Price 99)}) -- (table [Symbol OrderId Price Size Tape Timestamp](list (as 'SYMBOL []) (as 'GUID []) (as 'F64 []) (as 'I64 []) (list) (as 'TIMESTAMP []))) +(select {from: t by: Symbol where: (== Price 99)}) -- (table [Symbol OrderId Price Size Tape Timestamp](list (as 'SYMBOL []) (as 'GUID []) (as 'F64 []) (as 'I64 []) (as 'STR []) (as 'TIMESTAMP []))) (select {s: (sum Price) from: t by: Symbol}) -- (table [Symbol s](list [apll good msfk ibmd amznt fbad baba][7.00 9.00 11.00 3.00 4.00 5.00 6.00])) ;; by: (xbar Ts 10000) groups rows into 10-second buckets; the Ts ;; key column surfaces the bucket start (09:00:00, 09:00:10), not diff --git a/test/rfl/table/update.rfl b/test/rfl/table/update.rfl index adc240fe..88dfff66 100644 --- a/test/rfl/table/update.rfl +++ b/test/rfl/table/update.rfl @@ -172,6 +172,17 @@ t -- (table [ID Name Value] (list [1 2] [alice bob] [10.0 20.0])) (insert (table [a] (list (as 'Timestamp (list)))) (list 'bad)) !- type (insert (table [a] (list (as 'Date (list)))) (list 1)) !- type +;; Typeless empty () column: adopts the type of the first inserted value +;; (q-style () column) instead of defaulting to I64 and rejecting non-ints. +(insert (table [a] (list (list))) (list 3.14)) -- (table [a] (list [3.14])) +(insert (table [a] (list (list))) (list 'x)) -- (table [a] (list ['x])) +(insert (table [a] (list (list))) (list "s")) -- (table [a] (list (list "s"))) +(insert (table [a] (list (list))) (list 2020.01.01)) -- (table [a] (list [2020.01.01])) +(insert (table [a] (list (list))) (list 12:30:00.0)) -- (table [a] (list [12:30:00.000])) +;; Type is adopted once, then enforced on subsequent inserts. +(set tl (table [a] (list (list))))(insert 'tl (list 3.14))(insert 'tl (list 2.71))tl -- (table [a] (list [3.14 2.71])) +(set ts (table [a] (list (list))))(insert 'ts (list 'a))(insert 'ts (list 'b))ts -- (table [a] (list ['a 'b])) + ;; ══════════════════════════════════════════════════════════════════ ;; UPDATE scalar-broadcast into typed columns — regression for the ;; broadcast sites that copied the scalar through an 8-byte `elem` From 306d46f260c13b83c6516eeea136db4c553f4210 Mon Sep 17 00:00:00 2001 From: Anton Date: Sat, 13 Jun 2026 16:49:31 +0200 Subject: [PATCH 3/3] fix(filter): preserve LIST columns in empty (none-pass) filter results MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sel_compact's none-pass branch (a WHERE that matches 0 rows) built each empty result column with ray_vec_new(ct, 0). But RAY_LIST == 0 and ray_vec_new rejects type <= 0, returning an error; the 'if (nc && !RAY_IS_ERR(nc))' guard then silently skipped the column. So an empty select/where over a table with a nested LIST column dropped that column from the 0-row result, while a non-empty result (which goes through the gather path that handles RAY_LIST explicitly) kept it — the result schema depended on how many rows matched. Use ray_list_new(0) for LIST columns, mirroring the gather path. Adds regression tests in ops/filter.rfl asserting the result schema is identical for empty and non-empty filters, including through a by-group. Pre-existing bug, surfaced while auditing empty-column handling. --- src/ops/filter.c | 5 ++++- test/rfl/ops/filter.rfl | 13 +++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/ops/filter.c b/src/ops/filter.c index 9014e206..b1b6dfe9 100644 --- a/src/ops/filter.c +++ b/src/ops/filter.c @@ -554,7 +554,10 @@ ray_t* sel_compact(ray_graph_t* g, ray_t* tbl, ray_t* sel) { if (!col) continue; int8_t ct = RAY_IS_PARTED(col->type) ? (int8_t)RAY_PARTED_BASETYPE(col->type) : col->type; - ray_t* nc = ray_vec_new(ct, 0); + /* RAY_LIST == 0; ray_vec_new rejects type <= 0, so a LIST + * column needs ray_list_new — otherwise the empty result + * silently drops it and the 0-row schema loses a column. */ + ray_t* nc = (ct == RAY_LIST) ? ray_list_new(0) : ray_vec_new(ct, 0); if (nc && !RAY_IS_ERR(nc)) { nc->len = 0; empty = ray_table_add_col(empty, ray_table_col_name(tbl, c), nc); diff --git a/test/rfl/ops/filter.rfl b/test/rfl/ops/filter.rfl index 54fcd34c..1649a148 100644 --- a/test/rfl/ops/filter.rfl +++ b/test/rfl/ops/filter.rfl @@ -74,6 +74,19 @@ (count (select {from: Tstr where: (> k 999)})) -- 0 (count (select {from: Tstr where: (> k 0)})) -- 5 +;; ────────────── LIST column survives an empty filter (sel_compact none-pass) ────────────── +;; The none-pass branch built empty columns with ray_vec_new(ct,0), but +;; RAY_LIST == 0 and ray_vec_new rejects type <= 0 — so the column was +;; silently dropped and the 0-row result lost a column relative to the +;; non-empty result. Schema must be identical regardless of match count. +(set Tlist (table [k nested] (list [1 2 3] (list (list 1 2) (list 3 4) (list 5 6))))) +(key (select {from: Tlist where: (> k 999)})) -- [k nested] +(key (select {from: Tlist where: (> k 0)})) -- [k nested] +(at (select {from: Tlist where: (> k 999)}) 'nested) -- (list) +(count (select {from: Tlist where: (> k 999)})) -- 0 +;; same invariant through a by-group with an empty WHERE +(key (select {from: Tlist by: k where: (> k 999)})) -- [k nested] + ;; ────────────── 3. morsel-boundary sizes: 1023 / 1024 / 1025 ────────────── ;; sel_compact walks segments of RAY_MORSEL_ELEMS=1024 rows. These three ;; sizes hit the partial-last-segment / exact-boundary / one-row-overflow