From 8a47fe813ed522519abbb5c1c3670a1d1f9ca0d9 Mon Sep 17 00:00:00 2001 From: Anton Date: Sat, 13 Jun 2026 15:38:00 +0200 Subject: [PATCH] fix(update): correct scalar-broadcast for GUID/STR and all column widths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ray_update broadcast a scalar atom into a full column through a fixed 8-byte `elem` buffer with a (ct==BOOL?1:8) stride. For a GUID column (16-byte payload, stored in ->obj) ray_vec_append then read 16 bytes from the 8-byte stack buffer — an ASan stack-buffer-overflow / crash — and it also copied from the wrong source field (->i64 instead of ->obj). The narrow-int and temporal types only worked by relying on union aliasing on little-endian. All three broadcast sites (WHERE, all-rows, and the BY new-column path) now use a 16-byte buffer, copy ray_elem_size(ct) bytes, and source GUID payloads from ->obj. The BY new-column site also gained the STR handling the other two already had, so a string-valued new column no longer copies garbage. alter (store_typed_elem) and upsert (append_atom_to_col + ray_elem_size copy loop, fixed earlier in this branch) already handle every type. Added update-broadcast regression tests covering I32/I16/Date/Timestamp type preservation and the GUID overflow (all-rows and WHERE). --- src/ops/query.c | 55 +++++++++++++++++++++++++++------------ test/rfl/table/update.rfl | 21 +++++++++++++++ 2 files changed, 60 insertions(+), 16 deletions(-) diff --git a/src/ops/query.c b/src/ops/query.c index 3713b4af..092d9300 100644 --- a/src/ops/query.c +++ b/src/ops/query.c @@ -9659,13 +9659,18 @@ ray_t* ray_update(ray_t** args, int64_t n) { if (RAY_IS_ERR(bcast)) { ray_release(expr_vec); ray_release(new_col); ray_release(result); ray_release(mask_vec); ray_release(tbl); return bcast; } } } else { - size_t esz = (ct == RAY_BOOL) ? 1 : 8; - uint8_t elem[8] = {0}; - if (ct == RAY_F64 && expr_vec->type == -RAY_I64) { + /* elem is wide enough for every fixed-width type incl. + * GUID (16 B), whose payload lives in ->obj — copying + * ray_elem_size(ct) bytes from ->i64 would over-read an + * 8-byte buffer and write the wrong source for GUID. */ + uint8_t elem[16] = {0}; + if (ct == RAY_GUID) { + if (expr_vec->obj) memcpy(elem, ray_data(expr_vec->obj), 16); + } else if (ct == RAY_F64 && expr_vec->type == -RAY_I64) { double promoted = (double)expr_vec->i64; - memcpy(elem, &promoted, 8); + memcpy(elem, &promoted, sizeof promoted); } else { - memcpy(elem, &expr_vec->i64, esz); + memcpy(elem, &expr_vec->i64, ray_elem_size(ct)); } for (int64_t r = 0; r < nrows; r++) { bcast = ray_vec_append(bcast, elem); @@ -9897,13 +9902,17 @@ ray_t* ray_update(ray_t** args, int64_t n) { if (RAY_IS_ERR(bcast)) { ray_release(expr_vec); ray_release(result); ray_release(tbl); return bcast; } } } else { - size_t esz = (ct == RAY_BOOL) ? 1 : 8; - uint8_t elem[8] = {0}; - if (ct == RAY_F64 && expr_vec->type == -RAY_I64) { + /* Wide enough for every fixed-width type incl. GUID (16 B, + * payload in ->obj); ray_elem_size(ct) bytes from ->i64 + * would over-read an 8-byte buffer for GUID. */ + uint8_t elem[16] = {0}; + if (ct == RAY_GUID) { + if (expr_vec->obj) memcpy(elem, ray_data(expr_vec->obj), 16); + } else if (ct == RAY_F64 && expr_vec->type == -RAY_I64) { double promoted = (double)expr_vec->i64; - memcpy(elem, &promoted, 8); + memcpy(elem, &promoted, sizeof promoted); } else { - memcpy(elem, &expr_vec->i64, esz); + memcpy(elem, &expr_vec->i64, ray_elem_size(ct)); } for (int64_t r = 0; r < nrows; r++) { bcast = ray_vec_append(bcast, elem); @@ -10022,12 +10031,26 @@ ray_t* ray_update(ray_t** args, int64_t n) { int8_t ct = -expr_vec->type; ray_t* bcast = ray_vec_new(ct, nrows); if (RAY_IS_ERR(bcast)) { ray_release(expr_vec); ray_release(result); ray_release(tbl); return bcast; } - size_t esz = ray_elem_size(ct); - uint8_t elem[8] = {0}; - memcpy(elem, &expr_vec->i64, esz > 8 ? 8 : esz); - for (int64_t r = 0; r < nrows; r++) { - bcast = ray_vec_append(bcast, elem); - if (RAY_IS_ERR(bcast)) { ray_release(expr_vec); ray_release(result); ray_release(tbl); return bcast; } + if (ct == RAY_STR) { + const char* sp = (expr_vec->type == -RAY_STR) ? ray_str_ptr(expr_vec) : ""; + size_t sl = (expr_vec->type == -RAY_STR) ? ray_str_len(expr_vec) : 0; + for (int64_t r = 0; r < nrows; r++) { + bcast = ray_str_vec_append(bcast, sp, sl); + if (RAY_IS_ERR(bcast)) { ray_release(expr_vec); ray_release(result); ray_release(tbl); return bcast; } + } + } else { + /* elem holds any fixed-width payload incl. GUID's 16 B (in + * ->obj); copying from ->i64 would be wrong/over-read for GUID. */ + uint8_t elem[16] = {0}; + if (ct == RAY_GUID) { + if (expr_vec->obj) memcpy(elem, ray_data(expr_vec->obj), 16); + } else { + memcpy(elem, &expr_vec->i64, ray_elem_size(ct)); + } + for (int64_t r = 0; r < nrows; r++) { + bcast = ray_vec_append(bcast, elem); + if (RAY_IS_ERR(bcast)) { ray_release(expr_vec); ray_release(result); ray_release(tbl); return bcast; } + } } /* Preserve typed-null markers across broadcast (mirrors the * existing-column branches above). Without this, diff --git a/test/rfl/table/update.rfl b/test/rfl/table/update.rfl index b3e38d2c..adc240fe 100644 --- a/test/rfl/table/update.rfl +++ b/test/rfl/table/update.rfl @@ -171,3 +171,24 @@ t -- (table [ID Name Value] (list [1 2] [alice bob] [10.0 20.0])) ;; Wrong-type atoms into typed columns still rejected. (insert (table [a] (list (as 'Timestamp (list)))) (list 'bad)) !- type (insert (table [a] (list (as 'Date (list)))) (list 1)) !- type + +;; ══════════════════════════════════════════════════════════════════ +;; UPDATE scalar-broadcast into typed columns — regression for the +;; broadcast sites that copied the scalar through an 8-byte `elem` +;; buffer with a (ct==BOOL?1:8) stride. That over-read the buffer for +;; GUID columns (16-byte payload, stored in ->obj) — an ASan stack- +;; buffer-overflow / crash — and relied on union aliasing for the +;; narrow-int / temporal types. +;; ══════════════════════════════════════════════════════════════════ + +;; Narrow-int and temporal columns keep their type after a scalar update. +(set t (table [k a] (list [1 2 3] (as 'I32 [10 20 30]))))(at (update {a: 99i from: 't}) 'a) -- [99i 99i 99i] +(set t (table [k a] (list [1 2 3] (as 'I16 [1 2 3]))))(at (update {a: 7h from: 't}) 'a) -- [7h 7h 7h] +(set t (table [k a] (list [1 2 3] (as 'Date [2020.01.01 2020.01.02 2020.01.03]))))(at (update {a: 2030.06.15 from: 't}) 'a) -- [2030.06.15 2030.06.15 2030.06.15] +(set t (table [k a] (list [1 2 3] (as 'Timestamp [2024.01.01D00:00:00.0 2024.01.02D00:00:00.0 2024.01.03D00:00:00.0]))))(at (update {a: 2030.01.01D00:00:00.0 from: 't}) 'a) -- [2030.01.01D00:00:00.000000000 2030.01.01D00:00:00.000000000 2030.01.01D00:00:00.000000000] + +;; GUID column scalar update (all-rows): every row becomes the scalar guid. +;; (guid is random, so compare cells to the scalar rather than a literal.) +(set g (first (guid 1)))(set t (table [k a] (list [1 2 3] (guid 3))))(== (at (at (update {a: g from: 't}) 'a) 2) g) -- true +;; GUID column scalar update (WHERE): only matching rows change, no overflow. +(set g (first (guid 1)))(set t (table [k a] (list [1 2 3] (guid 3))))(== (at (at (update {a: g from: 't where: (== k 2)}) 'a) 1) g) -- true