Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 4 additions & 31 deletions mssql_python/pybind/ddbc_bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4780,9 +4780,8 @@ SQLRETURN FetchArrowBatch_wrap(SqlHandlePtr StatementHandle, py::list& capsules,
ColumnBuffers buffers(numCols, fetchSize);

if (!hasLobColumns && fetchSize > 0) {
// Bind columns — Arrow always uses SQL_C_CHAR for VARCHAR because
// it processes raw byte buffers directly, not via Python codecs.
ret = SQLBindColums(hStmt, buffers, columnNames, numCols, fetchSize, SQL_C_CHAR);
// Always request WCHARs so we don't have to deal with CHAR encodings
ret = SQLBindColums(hStmt, buffers, columnNames, numCols, fetchSize, SQL_C_WCHAR);
if (!SQL_SUCCEEDED(ret)) {
LOG("Error when binding columns");
return ret;
Expand Down Expand Up @@ -4841,16 +4840,7 @@ SQLRETURN FetchArrowBatch_wrap(SqlHandlePtr StatementHandle, py::list& capsules,
}
case SQL_CHAR:
case SQL_VARCHAR:
case SQL_LONGVARCHAR: {
ret = GetDataVar(hStmt, idxCol + 1, SQL_C_CHAR,
buffers.charBuffers[idxCol],
buffers.indicators[idxCol].data());
if (!SQL_SUCCEEDED(ret)) {
LOG("Error fetching CHAR LOB for column %d", idxCol + 1);
return ret;
}
break;
}
case SQL_LONGVARCHAR:
case SQL_SS_XML:
case SQL_WCHAR:
case SQL_WVARCHAR:
Expand Down Expand Up @@ -5093,24 +5083,7 @@ SQLRETURN FetchArrowBatch_wrap(SqlHandlePtr StatementHandle, py::list& capsules,
}
case SQL_CHAR:
case SQL_VARCHAR:
case SQL_LONGVARCHAR: {
#if defined(__APPLE__) || defined(__linux__)
uint64_t fetchBufferSize = columnSize * 4 + 1 /*null-terminator*/;
#else
uint64_t fetchBufferSize = columnSize + 1 /*null-terminator*/;
#endif
auto target_vec = &arrowColumnProducer->varData;
auto start = arrowColumnProducer->varVal[idxRowArrow];
while (target_vec->size() < start + dataLen) {
target_vec->resize(target_vec->size() * 2);
}

std::memcpy(&(*target_vec)[start],
&buffers.charBuffers[idxCol][idxRowSql * fetchBufferSize],
dataLen);
arrowColumnProducer->varVal[idxRowArrow + 1] = start + dataLen;
break;
}
case SQL_LONGVARCHAR:
case SQL_SS_XML:
case SQL_WCHAR:
case SQL_WVARCHAR:
Expand Down
57 changes: 57 additions & 0 deletions tests/test_004_cursor_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,63 @@ def test_arrow_long_string(cursor: mssql_python.Cursor):
assert batch.column(0).to_pylist() == [long_string]


def test_arrow_varchar_utf8_collation_unicode(cursor: mssql_python.Cursor):
table = "#t_arrow_utf8_varchar"
collation = "Latin1_General_100_CI_AS_SC_UTF8"
expected = [
"Grüße",
"你好😀",
"こんにちは",
"Привет",
"Hello 世界",
"😀😃😄😁",
"",
None,
]

try:
cursor.execute(
f"create table {table} (id int primary key, v varchar(32) collate {collation})"
)
except Exception as exc:
pytest.skip(f"UTF-8 collation '{collation}' not supported: {exc}")

try:
for index, value in enumerate(expected, start=1):
cursor.execute(f"insert into {table} (id, v) values (?, ?)", index, value)
tbl = cursor.execute(f"select v from {table} order by id").arrow()
assert tbl.column(0).type.equals(pa.large_string())
assert tbl.column(0).to_pylist() == expected
finally:
cursor.execute(f"drop table if exists {table}")


def test_arrow_varchar_utf8_collation_cp1252(cursor: mssql_python.Cursor):
table = "#t_arrow_cp1252_varchar"
collation = "SQL_Latin1_General_CP1_CI_AS"
expected = [
"Grüße",
"café René!",
"naïve café",
"Español",
"Müller-Öztürk",
"Françoise",
"",
None,
]

cursor.execute(f"create table {table} (id int primary key, v varchar(32) collate {collation})")

try:
for index, value in enumerate(expected, start=1):
cursor.execute(f"insert into {table} (id, v) values (?, ?)", index, value)
tbl = cursor.execute(f"select v from {table} order by id").arrow()
assert tbl.column(0).type.equals(pa.large_string())
assert tbl.column(0).to_pylist() == expected
finally:
cursor.execute(f"drop table if exists {table}")


def test_rownumber_arrow_batch_interleaved_fetchmany(cursor: mssql_python.Cursor):
"""Verify that arrow_batch and fetchmany can be interleaved
on the same result set with correct rownumber tracking and values."""
Expand Down