From 8a47fe813ed522519abbb5c1c3670a1d1f9ca0d9 Mon Sep 17 00:00:00 2001
From: Anton <singaraiona@gmail.com>
Date: Sat, 13 Jun 2026 15:38:00 +0200
Subject: [PATCH] fix(update): correct scalar-broadcast for GUID/STR and all
 column widths
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ray_update broadcast a scalar atom into a full column through a fixed
8-byte `elem` buffer with a (ct==BOOL?1:8) stride. For a GUID column
(16-byte payload, stored in ->obj) ray_vec_append then read 16 bytes from
the 8-byte stack buffer — an ASan stack-buffer-overflow / crash — and it
also copied from the wrong source field (->i64 instead of ->obj). The
narrow-int and temporal types only worked by relying on union aliasing
on little-endian.

All three broadcast sites (WHERE, all-rows, and the BY new-column path)
now use a 16-byte buffer, copy ray_elem_size(ct) bytes, and source GUID
payloads from ->obj. The BY new-column site also gained the STR handling
the other two already had, so a string-valued new column no longer copies
garbage.

alter (store_typed_elem) and upsert (append_atom_to_col + ray_elem_size
copy loop, fixed earlier in this branch) already handle every type.

Added update-broadcast regression tests covering I32/I16/Date/Timestamp
type preservation and the GUID overflow (all-rows and WHERE).
---
 src/ops/query.c           | 55 +++++++++++++++++++++++++++------------
 test/rfl/table/update.rfl | 21 +++++++++++++++
 2 files changed, 60 insertions(+), 16 deletions(-)

diff --git a/src/ops/query.c b/src/ops/query.c
index 3713b4af..092d9300 100644
--- a/src/ops/query.c
+++ b/src/ops/query.c
@@ -9659,13 +9659,18 @@ ray_t* ray_update(ray_t** args, int64_t n) {
                             if (RAY_IS_ERR(bcast)) { ray_release(expr_vec); ray_release(new_col); ray_release(result); ray_release(mask_vec); ray_release(tbl); return bcast; }
                         }
                     } else {
-                        size_t esz = (ct == RAY_BOOL) ? 1 : 8;
-                        uint8_t elem[8] = {0};
-                        if (ct == RAY_F64 && expr_vec->type == -RAY_I64) {
+                        /* elem is wide enough for every fixed-width type incl.
+                         * GUID (16 B), whose payload lives in ->obj — copying
+                         * ray_elem_size(ct) bytes from ->i64 would over-read an
+                         * 8-byte buffer and write the wrong source for GUID. */
+                        uint8_t elem[16] = {0};
+                        if (ct == RAY_GUID) {
+                            if (expr_vec->obj) memcpy(elem, ray_data(expr_vec->obj), 16);
+                        } else if (ct == RAY_F64 && expr_vec->type == -RAY_I64) {
                             double promoted = (double)expr_vec->i64;
-                            memcpy(elem, &promoted, 8);
+                            memcpy(elem, &promoted, sizeof promoted);
                         } else {
-                            memcpy(elem, &expr_vec->i64, esz);
+                            memcpy(elem, &expr_vec->i64, ray_elem_size(ct));
                         }
                         for (int64_t r = 0; r < nrows; r++) {
                             bcast = ray_vec_append(bcast, elem);
@@ -9897,13 +9902,17 @@ ray_t* ray_update(ray_t** args, int64_t n) {
                         if (RAY_IS_ERR(bcast)) { ray_release(expr_vec); ray_release(result); ray_release(tbl); return bcast; }
                     }
                 } else {
-                    size_t esz = (ct == RAY_BOOL) ? 1 : 8;
-                    uint8_t elem[8] = {0};
-                    if (ct == RAY_F64 && expr_vec->type == -RAY_I64) {
+                    /* Wide enough for every fixed-width type incl. GUID (16 B,
+                     * payload in ->obj); ray_elem_size(ct) bytes from ->i64
+                     * would over-read an 8-byte buffer for GUID. */
+                    uint8_t elem[16] = {0};
+                    if (ct == RAY_GUID) {
+                        if (expr_vec->obj) memcpy(elem, ray_data(expr_vec->obj), 16);
+                    } else if (ct == RAY_F64 && expr_vec->type == -RAY_I64) {
                         double promoted = (double)expr_vec->i64;
-                        memcpy(elem, &promoted, 8);
+                        memcpy(elem, &promoted, sizeof promoted);
                     } else {
-                        memcpy(elem, &expr_vec->i64, esz);
+                        memcpy(elem, &expr_vec->i64, ray_elem_size(ct));
                     }
                     for (int64_t r = 0; r < nrows; r++) {
                         bcast = ray_vec_append(bcast, elem);
@@ -10022,12 +10031,26 @@ ray_t* ray_update(ray_t** args, int64_t n) {
             int8_t ct = -expr_vec->type;
             ray_t* bcast = ray_vec_new(ct, nrows);
             if (RAY_IS_ERR(bcast)) { ray_release(expr_vec); ray_release(result); ray_release(tbl); return bcast; }
-            size_t esz = ray_elem_size(ct);
-            uint8_t elem[8] = {0};
-            memcpy(elem, &expr_vec->i64, esz > 8 ? 8 : esz);
-            for (int64_t r = 0; r < nrows; r++) {
-                bcast = ray_vec_append(bcast, elem);
-                if (RAY_IS_ERR(bcast)) { ray_release(expr_vec); ray_release(result); ray_release(tbl); return bcast; }
+            if (ct == RAY_STR) {
+                const char* sp = (expr_vec->type == -RAY_STR) ? ray_str_ptr(expr_vec) : "";
+                size_t sl = (expr_vec->type == -RAY_STR) ? ray_str_len(expr_vec) : 0;
+                for (int64_t r = 0; r < nrows; r++) {
+                    bcast = ray_str_vec_append(bcast, sp, sl);
+                    if (RAY_IS_ERR(bcast)) { ray_release(expr_vec); ray_release(result); ray_release(tbl); return bcast; }
+                }
+            } else {
+                /* elem holds any fixed-width payload incl. GUID's 16 B (in
+                 * ->obj); copying from ->i64 would be wrong/over-read for GUID. */
+                uint8_t elem[16] = {0};
+                if (ct == RAY_GUID) {
+                    if (expr_vec->obj) memcpy(elem, ray_data(expr_vec->obj), 16);
+                } else {
+                    memcpy(elem, &expr_vec->i64, ray_elem_size(ct));
+                }
+                for (int64_t r = 0; r < nrows; r++) {
+                    bcast = ray_vec_append(bcast, elem);
+                    if (RAY_IS_ERR(bcast)) { ray_release(expr_vec); ray_release(result); ray_release(tbl); return bcast; }
+                }
             }
             /* Preserve typed-null markers across broadcast (mirrors the
              * existing-column branches above).  Without this,
diff --git a/test/rfl/table/update.rfl b/test/rfl/table/update.rfl
index b3e38d2c..adc240fe 100644
--- a/test/rfl/table/update.rfl
+++ b/test/rfl/table/update.rfl
@@ -171,3 +171,24 @@ t -- (table [ID Name Value] (list [1 2] [alice bob] [10.0 20.0]))
 ;; Wrong-type atoms into typed columns still rejected.
 (insert (table [a] (list (as 'Timestamp (list)))) (list 'bad)) !- type
 (insert (table [a] (list (as 'Date (list)))) (list 1)) !- type
+
+;; ══════════════════════════════════════════════════════════════════
+;; UPDATE scalar-broadcast into typed columns — regression for the
+;; broadcast sites that copied the scalar through an 8-byte `elem`
+;; buffer with a (ct==BOOL?1:8) stride. That over-read the buffer for
+;; GUID columns (16-byte payload, stored in ->obj) — an ASan stack-
+;; buffer-overflow / crash — and relied on union aliasing for the
+;; narrow-int / temporal types.
+;; ══════════════════════════════════════════════════════════════════
+
+;; Narrow-int and temporal columns keep their type after a scalar update.
+(set t (table [k a] (list [1 2 3] (as 'I32 [10 20 30]))))(at (update {a: 99i from: 't}) 'a) -- [99i 99i 99i]
+(set t (table [k a] (list [1 2 3] (as 'I16 [1 2 3]))))(at (update {a: 7h from: 't}) 'a) -- [7h 7h 7h]
+(set t (table [k a] (list [1 2 3] (as 'Date [2020.01.01 2020.01.02 2020.01.03]))))(at (update {a: 2030.06.15 from: 't}) 'a) -- [2030.06.15 2030.06.15 2030.06.15]
+(set t (table [k a] (list [1 2 3] (as 'Timestamp [2024.01.01D00:00:00.0 2024.01.02D00:00:00.0 2024.01.03D00:00:00.0]))))(at (update {a: 2030.01.01D00:00:00.0 from: 't}) 'a) -- [2030.01.01D00:00:00.000000000 2030.01.01D00:00:00.000000000 2030.01.01D00:00:00.000000000]
+
+;; GUID column scalar update (all-rows): every row becomes the scalar guid.
+;; (guid is random, so compare cells to the scalar rather than a literal.)
+(set g (first (guid 1)))(set t (table [k a] (list [1 2 3] (guid 3))))(== (at (at (update {a: g from: 't}) 'a) 2) g) -- true
+;; GUID column scalar update (WHERE): only matching rows change, no overflow.
+(set g (first (guid 1)))(set t (table [k a] (list [1 2 3] (guid 3))))(== (at (at (update {a: g from: 't where: (== k 2)}) 'a) 1) g) -- true