diff --git a/src/lang/eval.c b/src/lang/eval.c index 19c31c8c..a0ec8d0c 100644 --- a/src/lang/eval.c +++ b/src/lang/eval.c @@ -1330,6 +1330,17 @@ ray_t* ray_table_fn(ray_t* names, ray_t* cols) { else if (nrows != expected_rows) { ray_release(tbl); if (_bxn) ray_release(_bxn); if (_bxc) ray_release(_bxc); return ray_error("domain", NULL); } + /* Empty generic list → typeless empty column: keep it as a RAY_LIST + * so its storage type is adopted from the first inserted value + * (q-style () column), rather than defaulting to I64. */ + if (nrows == 0) { + ray_retain(col_src); + tbl = ray_table_add_col(tbl, name_id, col_src); + ray_release(col_src); + if (RAY_IS_ERR(tbl)) { if (_bxn) ray_release(_bxn); if (_bxc) ray_release(_bxc); return tbl; } + continue; + } + ray_t** row_elems = (ray_t**)ray_data(col_src); /* If the LIST contains non-atom values (e.g. nested vectors for an diff --git a/src/ops/collection.c b/src/ops/collection.c index fa99b530..f9f5410a 100644 --- a/src/ops/collection.c +++ b/src/ops/collection.c @@ -2200,38 +2200,26 @@ static ray_t* map_iterate(ray_t* fn, ray_t* fixed, ray_t* vec, int fixed_is_left return out; } -/* (map-left fn fixed vec) → apply fn(fixed, elem) for each elem in vec. - * If vec is scalar but fixed is a vector, auto-swap (iterate over fixed). */ +/* (map-left fn left right) → fix the LEFT arg, iterate over the right: + * apply fn(left, right_i) for each element of right. If right is scalar this + * collapses to a single fn(left, right) (handled by map_iterate). */ ray_t* ray_map_left_fn(ray_t** args, int64_t n) { if (n != 3) return ray_error("domain", NULL); ray_t* fn = args[0]; - ray_t* fixed = args[1]; - ray_t* vec = args[2]; - - /* Auto-detect: if vec is scalar but fixed is a vector, swap roles */ - if (!ray_is_vec(vec) && vec->type != RAY_LIST && - (ray_is_vec(fixed) || fixed->type == RAY_LIST)) { - return map_iterate(fn, vec, fixed, 0); /* fn(elem_of_fixed, vec) — but we want fn(fixed=scalar, elem) */ - } - - return map_iterate(fn, fixed, vec, 1); /* fn(fixed, elem) */ + ray_t* left = args[1]; + ray_t* right = args[2]; + return map_iterate(fn, left, right, 1); /* fn(left, right_i) */ } -/* (map-right fn vec fixed) → apply fn(elem, fixed) for each elem in vec. - * If vec is scalar but fixed is a vector, auto-swap (iterate over fixed). */ +/* (map-right fn left right) → fix the RIGHT arg, iterate over the left: + * apply fn(left_i, right) for each element of left. If left is scalar this + * collapses to a single fn(left, right) (handled by map_iterate). */ ray_t* ray_map_right_fn(ray_t** args, int64_t n) { if (n != 3) return ray_error("domain", NULL); ray_t* fn = args[0]; - ray_t* vec = args[1]; - ray_t* fixed = args[2]; - - /* Auto-detect: if vec is scalar but fixed is a vector, swap roles */ - if (!ray_is_vec(vec) && vec->type != RAY_LIST && - (ray_is_vec(fixed) || fixed->type == RAY_LIST)) { - return map_iterate(fn, vec, fixed, 1); /* fn(vec_scalar, elem_of_fixed) */ - } - - return map_iterate(fn, fixed, vec, 0); /* fn(elem, fixed) */ + ray_t* left = args[1]; + ray_t* right = args[2]; + return map_iterate(fn, right, left, 0); /* fn(left_i, right) */ } /* ══════════════════════════════════════════ diff --git a/src/ops/filter.c b/src/ops/filter.c index 9014e206..b1b6dfe9 100644 --- a/src/ops/filter.c +++ b/src/ops/filter.c @@ -554,7 +554,10 @@ ray_t* sel_compact(ray_graph_t* g, ray_t* tbl, ray_t* sel) { if (!col) continue; int8_t ct = RAY_IS_PARTED(col->type) ? (int8_t)RAY_PARTED_BASETYPE(col->type) : col->type; - ray_t* nc = ray_vec_new(ct, 0); + /* RAY_LIST == 0; ray_vec_new rejects type <= 0, so a LIST + * column needs ray_list_new — otherwise the empty result + * silently drops it and the 0-row schema loses a column. */ + ray_t* nc = (ct == RAY_LIST) ? ray_list_new(0) : ray_vec_new(ct, 0); if (nc && !RAY_IS_ERR(nc)) { nc->len = 0; empty = ray_table_add_col(empty, ray_table_col_name(tbl, c), nc); diff --git a/src/ops/query.c b/src/ops/query.c index 092d9300..b5d67bd2 100644 --- a/src/ops/query.c +++ b/src/ops/query.c @@ -9197,6 +9197,28 @@ ray_t* ray_xbar_fn(ray_t* col, ray_t* bucket) { * Update, Insert, Upsert * ══════════════════════════════════════════ */ +/* Derive the storage type for a typeless (empty RAY_LIST) column from the + * first value inserted into it — q-style () columns adopt their type on the + * first insert. Returns the RAY_* column type, or RAY_LIST when the payload + * is itself nested (non-atom elements → a genuine list column). */ +static int8_t typeless_col_type(ray_t* payload) { + if (!payload) return RAY_I64; /* null row → default I64 */ + if (ray_is_atom(payload)) return -payload->type; + if (ray_is_vec(payload)) return payload->type; /* typed vec → splice */ + if (payload->type == RAY_LIST) { + int64_t m = ray_len(payload); + if (m == 0) return RAY_LIST; /* empty payload → stay typeless */ + ray_t** e = (ray_t**)ray_data(payload); + if (e[0] && !ray_is_atom(e[0])) return RAY_LIST; /* nested cells */ + int8_t t = e[0] ? (int8_t)(-e[0]->type) : RAY_I64; + if (t == RAY_I64) /* promote to F64 if any element is float */ + for (int64_t k = 0; k < m; k++) + if (e[k] && e[k]->type == -RAY_F64) { t = RAY_F64; break; } + return t; + } + return RAY_I64; +} + /* Helper: convert a Rayfall list of atoms into a typed column vector by * appending to an existing column (for insert/upsert). */ static ray_t* append_atom_to_col(ray_t* col_vec, ray_t* atom) { @@ -10412,6 +10434,12 @@ ray_t* ray_insert(ray_t** args, int64_t n) { ray_t* orig_col = ray_table_get_col_idx(tbl, c); int8_t ct = orig_col->type; + /* Typeless empty column (an empty () list, adopt-on-first-insert). + * The table has 0 rows here, so there is nothing to copy and the + * derived type drives the new column's storage. */ + if (ct == RAY_LIST && ray_len(orig_col) == 0) + ct = typeless_col_type(row_elems[c]); + ray_t* new_col = ray_vec_new(ct, nrows + 1); if (RAY_IS_ERR(new_col)) { ray_release(result); return new_col; } diff --git a/test/rfl/ops/filter.rfl b/test/rfl/ops/filter.rfl index 54fcd34c..1649a148 100644 --- a/test/rfl/ops/filter.rfl +++ b/test/rfl/ops/filter.rfl @@ -74,6 +74,19 @@ (count (select {from: Tstr where: (> k 999)})) -- 0 (count (select {from: Tstr where: (> k 0)})) -- 5 +;; ────────────── LIST column survives an empty filter (sel_compact none-pass) ────────────── +;; The none-pass branch built empty columns with ray_vec_new(ct,0), but +;; RAY_LIST == 0 and ray_vec_new rejects type <= 0 — so the column was +;; silently dropped and the 0-row result lost a column relative to the +;; non-empty result. Schema must be identical regardless of match count. +(set Tlist (table [k nested] (list [1 2 3] (list (list 1 2) (list 3 4) (list 5 6))))) +(key (select {from: Tlist where: (> k 999)})) -- [k nested] +(key (select {from: Tlist where: (> k 0)})) -- [k nested] +(at (select {from: Tlist where: (> k 999)}) 'nested) -- (list) +(count (select {from: Tlist where: (> k 999)})) -- 0 +;; same invariant through a by-group with an empty WHERE +(key (select {from: Tlist by: k where: (> k 999)})) -- [k nested] + ;; ────────────── 3. morsel-boundary sizes: 1023 / 1024 / 1025 ────────────── ;; sel_compact walks segments of RAY_MORSEL_ELEMS=1024 rows. These three ;; sizes hit the partial-last-segment / exact-boundary / one-row-overflow diff --git a/test/rfl/table/query.rfl b/test/rfl/table/query.rfl index 40784fc0..236f7381 100644 --- a/test/rfl/table/query.rfl +++ b/test/rfl/table/query.rfl @@ -8,7 +8,7 @@ (set n 10)(set gds (take (guid 3) n))(set t (table [OrderId Symbol Price Size Tape Timestamp](list gds(take [apll good msfk ibmd amznt fbad baba] n)(as 'F64 (til n))(take (+ 1 (til 3)) n)(map (fn [x] (as 'STR x)) (take (til 10) n))(as 'TIMESTAMP (til n)))))null -- null (select {from: t by: Symbol}) -- (table [Symbol OrderId Price Size Tape Timestamp](list [apll good msfk ibmd amznt fbad baba](at gds (til 7)) [0 1 2 3 4 5 6.0] [1 2 3 1 2 3 1](list "0""1""2""3""4""5""6")(at (at t 'Timestamp) (til 7)))) (select {from: t by: Symbol where: (== Price 3)}) -- (table [Symbol OrderId Price Size Tape Timestamp](list [ibmd] (at gds 3) [3.00] [1] (list "3") [2000.01.01D00:00:00.000000003])) -(select {from: t by: Symbol where: (== Price 99)}) -- (table [Symbol OrderId Price Size Tape Timestamp](list (as 'SYMBOL []) (as 'GUID []) (as 'F64 []) (as 'I64 []) (list) (as 'TIMESTAMP []))) +(select {from: t by: Symbol where: (== Price 99)}) -- (table [Symbol OrderId Price Size Tape Timestamp](list (as 'SYMBOL []) (as 'GUID []) (as 'F64 []) (as 'I64 []) (as 'STR []) (as 'TIMESTAMP []))) (select {s: (sum Price) from: t by: Symbol}) -- (table [Symbol s](list [apll good msfk ibmd amznt fbad baba][7.00 9.00 11.00 3.00 4.00 5.00 6.00])) ;; by: (xbar Ts 10000) groups rows into 10-second buckets; the Ts ;; key column surfaces the bucket start (09:00:00, 09:00:10), not diff --git a/test/rfl/table/update.rfl b/test/rfl/table/update.rfl index adc240fe..88dfff66 100644 --- a/test/rfl/table/update.rfl +++ b/test/rfl/table/update.rfl @@ -172,6 +172,17 @@ t -- (table [ID Name Value] (list [1 2] [alice bob] [10.0 20.0])) (insert (table [a] (list (as 'Timestamp (list)))) (list 'bad)) !- type (insert (table [a] (list (as 'Date (list)))) (list 1)) !- type +;; Typeless empty () column: adopts the type of the first inserted value +;; (q-style () column) instead of defaulting to I64 and rejecting non-ints. +(insert (table [a] (list (list))) (list 3.14)) -- (table [a] (list [3.14])) +(insert (table [a] (list (list))) (list 'x)) -- (table [a] (list ['x])) +(insert (table [a] (list (list))) (list "s")) -- (table [a] (list (list "s"))) +(insert (table [a] (list (list))) (list 2020.01.01)) -- (table [a] (list [2020.01.01])) +(insert (table [a] (list (list))) (list 12:30:00.0)) -- (table [a] (list [12:30:00.000])) +;; Type is adopted once, then enforced on subsequent inserts. +(set tl (table [a] (list (list))))(insert 'tl (list 3.14))(insert 'tl (list 2.71))tl -- (table [a] (list [3.14 2.71])) +(set ts (table [a] (list (list))))(insert 'ts (list 'a))(insert 'ts (list 'b))ts -- (table [a] (list ['a 'b])) + ;; ══════════════════════════════════════════════════════════════════ ;; UPDATE scalar-broadcast into typed columns — regression for the ;; broadcast sites that copied the scalar through an 8-byte `elem`