Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 6 additions & 31 deletions mssql_python/pybind/ddbc_bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4780,9 +4780,8 @@ SQLRETURN FetchArrowBatch_wrap(SqlHandlePtr StatementHandle, py::list& capsules,
ColumnBuffers buffers(numCols, fetchSize);

if (!hasLobColumns && fetchSize > 0) {
// Bind columns — Arrow always uses SQL_C_CHAR for VARCHAR because
// it processes raw byte buffers directly, not via Python codecs.
ret = SQLBindColums(hStmt, buffers, columnNames, numCols, fetchSize, SQL_C_CHAR);
// Always request WCHARs so we don't have to deal with CHAR encodings
ret = SQLBindColums(hStmt, buffers, columnNames, numCols, fetchSize, SQL_C_WCHAR);
if (!SQL_SUCCEEDED(ret)) {
LOG("Error when binding columns");
return ret;
Expand Down Expand Up @@ -4841,20 +4840,12 @@ SQLRETURN FetchArrowBatch_wrap(SqlHandlePtr StatementHandle, py::list& capsules,
}
case SQL_CHAR:
case SQL_VARCHAR:
case SQL_LONGVARCHAR: {
ret = GetDataVar(hStmt, idxCol + 1, SQL_C_CHAR,
buffers.charBuffers[idxCol],
buffers.indicators[idxCol].data());
if (!SQL_SUCCEEDED(ret)) {
LOG("Error fetching CHAR LOB for column %d", idxCol + 1);
return ret;
}
break;
}
case SQL_LONGVARCHAR:
Comment thread
ffelixg marked this conversation as resolved.
case SQL_SS_XML:
case SQL_WCHAR:
case SQL_WVARCHAR:
case SQL_WLONGVARCHAR: {
// Always request WCHARs so we don't have to deal with CHAR encodings.
ret = GetDataVar(hStmt, idxCol + 1, SQL_C_WCHAR,
buffers.wcharBuffers[idxCol],
buffers.indicators[idxCol].data());
Expand Down Expand Up @@ -5093,28 +5084,12 @@ SQLRETURN FetchArrowBatch_wrap(SqlHandlePtr StatementHandle, py::list& capsules,
}
case SQL_CHAR:
case SQL_VARCHAR:
case SQL_LONGVARCHAR: {
#if defined(__APPLE__) || defined(__linux__)
uint64_t fetchBufferSize = columnSize * 4 + 1 /*null-terminator*/;
#else
uint64_t fetchBufferSize = columnSize + 1 /*null-terminator*/;
#endif
auto target_vec = &arrowColumnProducer->varData;
auto start = arrowColumnProducer->varVal[idxRowArrow];
while (target_vec->size() < start + dataLen) {
target_vec->resize(target_vec->size() * 2);
}

std::memcpy(&(*target_vec)[start],
&buffers.charBuffers[idxCol][idxRowSql * fetchBufferSize],
dataLen);
arrowColumnProducer->varVal[idxRowArrow + 1] = start + dataLen;
break;
}
case SQL_LONGVARCHAR:
Comment thread
ffelixg marked this conversation as resolved.
case SQL_SS_XML:
case SQL_WCHAR:
case SQL_WVARCHAR:
case SQL_WLONGVARCHAR: {
// We have previously fetched these as WCHARs, even for SQL_CHAR types.
assert(dataLen % sizeof(SQLWCHAR) == 0);
auto dataLenW = dataLen / sizeof(SQLWCHAR);
auto wcharSource =
Expand Down
78 changes: 78 additions & 0 deletions tests/test_004_cursor_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,84 @@ def test_arrow_long_string(cursor: mssql_python.Cursor):
assert batch.column(0).to_pylist() == [long_string]


@pytest.mark.parametrize(
"sql_type",
[
pytest.param("char(32)", id="char"),
pytest.param("varchar(32)", id="varchar"),
],
)
def test_arrow_char_utf8_collation_unicode(cursor: mssql_python.Cursor, sql_type: str):
table = "#t_arrow_char_decode"
collation = "Latin1_General_100_CI_AS_SC_UTF8"
expected = [
"Grüße",
"你好😀",
"こんにちは",
"Привет",
"Hello 世界",
"😀😃😄😁",
"",
None,
]

try:
cursor.execute(
f"create table {table} (id int primary key, v {sql_type} collate {collation})"
)
except Exception as exc:
pytest.skip(f"UTF-8 collation '{collation}' not supported: {exc}")

try:
for index, value in enumerate(expected, start=1):
cursor.execute(f"insert into {table} (id, v) values (?, ?)", index, value)
tbl = cursor.execute(f"select v from {table} order by id").arrow()
assert tbl.column(0).type.equals(pa.large_string())
for expected_val, actual_val in zip(expected, tbl.column(0).to_pylist(), strict=True):
if actual_val is not None:
actual_val = actual_val.strip()
assert expected_val == actual_val
finally:
cursor.execute(f"drop table if exists {table}")


@pytest.mark.parametrize(
"sql_type",
[
pytest.param("char(32)", id="char"),
pytest.param("varchar(32)", id="varchar"),
pytest.param("text", id="text"),
],
)
def test_arrow_char_cp1252_collation_unicode(cursor: mssql_python.Cursor, sql_type: str):
table = "#t_arrow_char_decode"
collation = "SQL_Latin1_General_CP1_CI_AS"
expected = [
"Grüße",
"café René!",
"naïve café",
"Español",
"Müller-Öztürk",
"Françoise",
"",
None,
]

cursor.execute(f"create table {table} (id int primary key, v {sql_type} collate {collation})")

try:
for index, value in enumerate(expected, start=1):
cursor.execute(f"insert into {table} (id, v) values (?, ?)", index, value)
tbl = cursor.execute(f"select v from {table} order by id").arrow()
assert tbl.column(0).type.equals(pa.large_string())
for expected_val, actual_val in zip(expected, tbl.column(0).to_pylist(), strict=True):
if actual_val is not None:
actual_val = actual_val.strip()
assert expected_val == actual_val
finally:
cursor.execute(f"drop table if exists {table}")

Comment thread
ffelixg marked this conversation as resolved.

def test_rownumber_arrow_batch_interleaved_fetchmany(cursor: mssql_python.Cursor):
"""Verify that arrow_batch and fetchmany can be interleaved
on the same result set with correct rownumber tracking and values."""
Expand Down
Loading