From cdd28da5a6480e028e82ff14027fd9f374ba137f Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Fri, 12 Jun 2026 15:27:49 +0530 Subject: [PATCH] Fix multi-row CAST when bind names contain escape-map characters. PR #68 wraps mixed-type multi-row INSERT binds with CAST(... AS STRING) by post-editing the rendered SQL. Marker reconstruction used the raw bind name from ``self.binds``, but SQLAlchemy renders the marker after applying ``bindname_escape_characters`` (space/./[/]/(/)/%/: get translated). For any column name containing one of those chars the two strings disagree, ``str.replace`` is a no-op, and the mixed-type insert fails again with ``INCOMPATIBLE_TYPES_IN_INLINE_TABLE`` (PECOBLR-2746 follow-up, Comment 21). Look up the escaped form via ``self.escaped_bind_names`` so the rebuilt marker matches what the compiler actually wrote. Backtick-containing names take a different path in ``bindparam_string`` that bypasses the escape map; the ``.replace("\`", "\`\`")`` doubling in marker rebuild matches that path too, so backticks alone, and backticks combined with escape-map chars, are handled by the same code. Tests cover every char in ``bindname_escape_characters`` plus literal and combined backtick cases, all at compile-only level (no warehouse). Co-authored-by: Isaac --- src/databricks/sqlalchemy/_ddl.py | 19 +++++- tests/test_local/test_ddl.py | 105 ++++++++++++++++++++++++++++++ 2 files changed, 122 insertions(+), 2 deletions(-) diff --git a/src/databricks/sqlalchemy/_ddl.py b/src/databricks/sqlalchemy/_ddl.py index cf78731..d8d0dff 100644 --- a/src/databricks/sqlalchemy/_ddl.py +++ b/src/databricks/sqlalchemy/_ddl.py @@ -285,14 +285,29 @@ def _build_multi_value_cast_plan(self, insert_stmt): return cast_plan def _apply_multi_value_casts(self, sql_text, insert_stmt): - """Wrap selected ``:`name``` markers with ``CAST(... AS )``.""" + """Wrap selected ``:`name``` markers with ``CAST(... AS )``. + + ``self.binds`` is keyed by the *raw* bind name (e.g. + ``'col with space_m0'``) but SQLAlchemy renders the marker using the + *escaped* form after applying ``bindname_escape_characters`` + (space/./[/]/(/)/%/: → ``_`` etc., see ``compiler.py:bindparam_string``). + The mapping is recorded in ``self.escaped_bind_names`` as + ``{original: escaped}``. We must look up the escaped form when + reconstructing the marker — otherwise ``str.replace`` is a no-op for any + column name containing an escaped character and no cast is applied, + re-triggering the inline-table type incompatibility that this method + exists to prevent (PECOBLR-2746 follow-up). + """ cast_plan = self._build_multi_value_cast_plan(insert_stmt) if not cast_plan: return sql_text rendered = sql_text for bind_name, target_type in cast_plan.items(): - marker = self._BIND_TEMPLATE % {"name": bind_name.replace("`", "``")} + rendered_name = self.escaped_bind_names.get(bind_name, bind_name) + marker = self._BIND_TEMPLATE % { + "name": rendered_name.replace("`", "``") + } rendered = rendered.replace(marker, f"CAST({marker} AS {target_type})") return rendered diff --git a/tests/test_local/test_ddl.py b/tests/test_local/test_ddl.py index e8b64e9..3cd36c5 100644 --- a/tests/test_local/test_ddl.py +++ b/tests/test_local/test_ddl.py @@ -545,3 +545,108 @@ def test_single_row_insert_does_not_render_casts(self): sql = str(stmt.compile(bind=self.engine)) assert "CAST(:`value` AS STRING)" not in sql + + +class TestMultiRowInsertCastsEscapedBindNames(DDLTestBase): + """Regression tests for PECOBLR-2746 follow-up. + + SQLAlchemy's ``bindname_escape_characters`` translates the chars space, + ``.``, ``[``, ``]``, ``(``, ``)``, ``%``, ``:`` in bind names to ``_`` etc. + The cast pass keys off ``self.binds`` (raw names) but the rendered SQL uses + the escaped form. If the cast pass doesn't look up the escaped name, the + str.replace becomes a no-op and the mixed-type insert fails again. These + tests pin the fix. + """ + + # Each pair is (column_name, expected_bind_token_inside_backticks). + # The expected token mirrors SQLAlchemy's default bindname_escape_characters + # map: space/./[/] → _, ( → A, ) → Z, % → P, : → C. + _ESCAPED_NAMES = [ + ("col with space", "col_with_space"), + ("col.dot", "col_dot"), + ("col[bracket]", "col_bracket_"), + ("col(paren)", "colAparenZ"), + ("col%pct", "colPpct"), + ("col:colon", "colCcolon"), + ] + + @pytest.mark.parametrize("column_name,escaped_token", _ESCAPED_NAMES) + def test_cast_renders_for_escape_char_column( + self, column_name, escaped_token + ): + metadata = MetaData() + table = Table("t", metadata, Column(column_name, String())) + stmt = insert(table).values( + [{column_name: 1}, {column_name: 0}, {column_name: "NE"}] + ) + + sql = str(stmt.compile(bind=self.engine)) + + for idx in range(3): + marker = f":`{escaped_token}_m{idx}`" + cast = f"CAST({marker} AS STRING)" + assert cast in sql, ( + f"expected {cast!r} in compiled SQL for column " + f"{column_name!r}, got:\n{sql}" + ) + # And the bare marker must not appear standalone — every + # occurrence of it must be inside the CAST(...). + assert sql.count(marker) == sql.count(cast), ( + f"bare {marker!r} appears outside CAST(...) for column " + f"{column_name!r}:\n{sql}" + ) + + def test_cast_renders_for_backtick_column_name(self): + """Literal-backtick column: bindparam_string short-circuits super and + doubles the backtick directly, so escaped_bind_names stays empty. Our + `.get(bind_name, bind_name)` falls back to the raw name and the + `.replace("`", "``")` in the marker rebuild reproduces the same + doubling, so the marker matches and CAST wraps correctly. + """ + metadata = MetaData() + table = Table("t", metadata, Column("col`tick", String())) + stmt = insert(table).values( + [{"col`tick": 1}, {"col`tick": 0}, {"col`tick": "NE"}] + ) + + sql = str(stmt.compile(bind=self.engine)) + for idx in range(3): + assert f"CAST(:`col``tick_m{idx}` AS STRING)" in sql, sql + + def test_cast_renders_for_backtick_plus_escape_char(self): + """Both backtick and a default-escape-map char in the same column name. + The backtick path bypasses super entirely (so the escape map never + runs), and `.replace("`", "``")` doubles the backtick — the dot stays + verbatim inside the backtick-quoted marker. + """ + metadata = MetaData() + table = Table("t", metadata, Column("col`x.y", String())) + stmt = insert(table).values( + [{"col`x.y": 1}, {"col`x.y": 0}, {"col`x.y": "NE"}] + ) + + sql = str(stmt.compile(bind=self.engine)) + for idx in range(3): + assert f"CAST(:`col``x.y_m{idx}` AS STRING)" in sql, sql + + def test_cast_renders_for_mixed_escape_chars_in_same_table(self): + metadata = MetaData() + table = Table( + "t", + metadata, + Column("a b", String()), + Column("c.d", String()), + Column("e", String()), + ) + stmt = insert(table).values( + [ + {"a b": 1, "c.d": 1, "e": 1}, + {"a b": 0, "c.d": 0, "e": 0}, + {"a b": "NE", "c.d": "NE", "e": "NE"}, + ] + ) + + sql = str(stmt.compile(bind=self.engine)) + for token in ("a_b", "c_d", "e"): + for idx in range(3): + assert f"CAST(:`{token}_m{idx}` AS STRING)" in sql, sql