@@ -2924,9 +2924,7 @@ SQLRETURN SQLGetData_wrap(SqlHandlePtr StatementHandle, SQLUSMALLINT colCount, p
29242924 row.append (
29252925 FetchLobColumnData (hStmt, i, SQL_C_CHAR, false , false , charEncoding));
29262926 } else {
2927- // Multiply by 4 because utf8 conversion by the driver might
2928- // turn varchar(x) into up to 3*x (maybe 4*x?) bytes.
2929- uint64_t fetchBufferSize = 4 * columnSize + 1 /* null-termination */ ;
2927+ uint64_t fetchBufferSize = columnSize + 1 /* null-termination */ ;
29302928 std::vector<SQLCHAR> dataBuffer (fetchBufferSize);
29312929 SQLLEN dataLen;
29322930 ret = SQLGetData_ptr (hStmt, i, SQL_C_CHAR, dataBuffer.data (), dataBuffer.size (),
@@ -2955,15 +2953,12 @@ SQLRETURN SQLGetData_wrap(SqlHandlePtr StatementHandle, SQLUSMALLINT colCount, p
29552953 row.append (raw_bytes);
29562954 }
29572955 } else {
2958- // Reaching this case indicates an error in mssql_python.
2959- // Theoretically, we could still compensate by calling SQLGetData or
2960- // FetchLobColumnData more often, but then we would still have to process
2961- // the data we already got from the above call to SQLGetData.
2962- // Better to throw an exception and fix the code than to risk returning corrupted data.
2963- ThrowStdException (
2964- " Internal error: SQLGetData returned data "
2965- " larger than expected for CHAR column"
2966- );
2956+ // Buffer too small, fallback to streaming
2957+ LOG (" SQLGetData: CHAR column %d data truncated "
2958+ " (buffer_size=%zu), using streaming LOB" ,
2959+ i, dataBuffer.size ());
2960+ row.append (FetchLobColumnData (hStmt, i, SQL_C_CHAR, false , false ,
2961+ charEncoding));
29672962 }
29682963 } else if (dataLen == SQL_NULL_DATA) {
29692964 LOG (" SQLGetData: Column %d is NULL (CHAR)" , i);
@@ -3000,7 +2995,7 @@ SQLRETURN SQLGetData_wrap(SqlHandlePtr StatementHandle, SQLUSMALLINT colCount, p
30002995 case SQL_WCHAR:
30012996 case SQL_WVARCHAR:
30022997 case SQL_WLONGVARCHAR: {
3003- if (columnSize == SQL_NO_TOTAL || columnSize == 0 || columnSize > 4000 ) {
2998+ if (columnSize == SQL_NO_TOTAL || columnSize > 4000 ) {
30042999 LOG (" SQLGetData: Streaming LOB for column %d (SQL_C_WCHAR) "
30053000 " - columnSize=%lu" ,
30063001 i, (unsigned long )columnSize);
@@ -3029,15 +3024,12 @@ SQLRETURN SQLGetData_wrap(SqlHandlePtr StatementHandle, SQLUSMALLINT colCount, p
30293024 " length=%lu for column %d" ,
30303025 (unsigned long )numCharsInData, i);
30313026 } else {
3032- // Reaching this case indicates an error in mssql_python.
3033- // Theoretically, we could still compensate by calling SQLGetData or
3034- // FetchLobColumnData more often, but then we would still have to process
3035- // the data we already got from the above call to SQLGetData.
3036- // Better to throw an exception and fix the code than to risk returning corrupted data.
3037- ThrowStdException (
3038- " Internal error: SQLGetData returned data "
3039- " larger than expected for WCHAR column"
3040- );
3027+ // Buffer too small, fallback to streaming
3028+ LOG (" SQLGetData: NVARCHAR column %d data "
3029+ " truncated, using streaming LOB" ,
3030+ i);
3031+ row.append (FetchLobColumnData (hStmt, i, SQL_C_WCHAR, true , false ,
3032+ " utf-16le" ));
30413033 }
30423034 } else if (dataLen == SQL_NULL_DATA) {
30433035 LOG (" SQLGetData: Column %d is NULL (NVARCHAR)" , i);
@@ -3299,15 +3291,8 @@ SQLRETURN SQLGetData_wrap(SqlHandlePtr StatementHandle, SQLUSMALLINT colCount, p
32993291 row.append (py::bytes (
33003292 reinterpret_cast <const char *>(dataBuffer.data ()), dataLen));
33013293 } else {
3302- // Reaching this case indicates an error in mssql_python.
3303- // Theoretically, we could still compensate by calling SQLGetData or
3304- // FetchLobColumnData more often, but then we would still have to process
3305- // the data we already got from the above call to SQLGetData.
3306- // Better to throw an exception and fix the code than to risk returning corrupted data.
3307- ThrowStdException (
3308- " Internal error: SQLGetData returned data "
3309- " larger than expected for BINARY column"
3310- );
3294+ row.append (
3295+ FetchLobColumnData (hStmt, i, SQL_C_BINARY, false , true , " " ));
33113296 }
33123297 } else if (dataLen == SQL_NULL_DATA) {
33133298 row.append (py::none ());
@@ -3449,9 +3434,7 @@ SQLRETURN SQLBindColums(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& column
34493434 // TODO: handle variable length data correctly. This logic wont
34503435 // suffice
34513436 HandleZeroColumnSizeAtFetch (columnSize);
3452- // Multiply by 4 because utf8 conversion by the driver might
3453- // turn varchar(x) into up to 3*x (maybe 4*x?) bytes.
3454- uint64_t fetchBufferSize = 4 * columnSize + 1 /* null-terminator*/ ;
3437+ uint64_t fetchBufferSize = columnSize + 1 /* null-terminator*/ ;
34553438 // TODO: For LONGVARCHAR/BINARY types, columnSize is returned as
34563439 // 2GB-1 by SQLDescribeCol. So fetchBufferSize = 2GB.
34573440 // fetchSize=1 if columnSize>1GB. So we'll allocate a vector of
@@ -3597,7 +3580,8 @@ SQLRETURN SQLBindColums(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& column
35973580// Fetch rows in batches
35983581// TODO: Move to anonymous namespace, since it is not used outside this file
35993582SQLRETURN FetchBatchData (SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& columnNames,
3600- py::list& rows, SQLUSMALLINT numCols, SQLULEN& numRowsFetched) {
3583+ py::list& rows, SQLUSMALLINT numCols, SQLULEN& numRowsFetched,
3584+ const std::vector<SQLUSMALLINT>& lobColumns) {
36013585 LOG (" FetchBatchData: Fetching data in batches" );
36023586 SQLRETURN ret = SQLFetchScroll_ptr (hStmt, SQL_FETCH_NEXT, 0 );
36033587 if (ret == SQL_NO_DATA) {
@@ -3616,28 +3600,19 @@ SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& colum
36163600 SQLULEN columnSize;
36173601 SQLULEN processedColumnSize;
36183602 uint64_t fetchBufferSize;
3603+ bool isLob;
36193604 };
36203605 std::vector<ColumnInfo> columnInfos (numCols);
36213606 for (SQLUSMALLINT col = 0 ; col < numCols; col++) {
36223607 const auto & columnMeta = columnNames[col].cast <py::dict>();
36233608 columnInfos[col].dataType = columnMeta[" DataType" ].cast <SQLSMALLINT>();
36243609 columnInfos[col].columnSize = columnMeta[" ColumnSize" ].cast <SQLULEN>();
3610+ columnInfos[col].isLob =
3611+ std::find (lobColumns.begin (), lobColumns.end (), col + 1 ) != lobColumns.end ();
36253612 columnInfos[col].processedColumnSize = columnInfos[col].columnSize ;
36263613 HandleZeroColumnSizeAtFetch (columnInfos[col].processedColumnSize );
3627- switch (columnInfos[col].dataType ) {
3628- case SQL_CHAR:
3629- case SQL_VARCHAR:
3630- case SQL_LONGVARCHAR:
3631- // Multiply by 4 because utf8 conversion by the driver might
3632- // turn varchar(x) into up to 3*x (maybe 4*x?) bytes.
3633- columnInfos[col].fetchBufferSize =
3634- 4 * columnInfos[col].processedColumnSize + 1 ; // +1 for null terminator
3635- break ;
3636- default :
3637- columnInfos[col].fetchBufferSize =
3638- columnInfos[col].processedColumnSize + 1 ; // +1 for null terminator
3639- break ;
3640- }
3614+ columnInfos[col].fetchBufferSize =
3615+ columnInfos[col].processedColumnSize + 1 ; // +1 for null terminator
36413616 }
36423617
36433618 std::string decimalSeparator = GetDecimalSeparator (); // Cache decimal separator
@@ -3655,6 +3630,7 @@ SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& colum
36553630 columnInfosExt[col].columnSize = columnInfos[col].columnSize ;
36563631 columnInfosExt[col].processedColumnSize = columnInfos[col].processedColumnSize ;
36573632 columnInfosExt[col].fetchBufferSize = columnInfos[col].fetchBufferSize ;
3633+ columnInfosExt[col].isLob = columnInfos[col].isLob ;
36583634
36593635 // Map data type to processor function (switch executed once per column,
36603636 // not per cell)
@@ -3763,7 +3739,7 @@ SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& colum
37633739 // types) to just 10 (setup only) Note: Processor functions no
37643740 // longer need to check for NULL since we do it above
37653741 if (columnProcessors[col - 1 ] != nullptr ) {
3766- columnProcessors[col - 1 ](row, buffers, &columnInfosExt[col - 1 ], col, i);
3742+ columnProcessors[col - 1 ](row, buffers, &columnInfosExt[col - 1 ], col, i, hStmt );
37673743 continue ;
37683744 }
37693745
@@ -3940,9 +3916,7 @@ size_t calculateRowSize(py::list& columnNames, SQLUSMALLINT numCols) {
39403916 case SQL_CHAR:
39413917 case SQL_VARCHAR:
39423918 case SQL_LONGVARCHAR:
3943- // Multiply by 4 because utf8 conversion by the driver might
3944- // turn varchar(x) into up to 3*x (maybe 4*x?) bytes.
3945- rowSize += 4 * columnSize;
3919+ rowSize += columnSize;
39463920 break ;
39473921 case SQL_SS_XML:
39483922 case SQL_WCHAR:
@@ -4096,7 +4070,7 @@ SQLRETURN FetchMany_wrap(SqlHandlePtr StatementHandle, py::list& rows, int fetch
40964070 SQLSetStmtAttr_ptr (hStmt, SQL_ATTR_ROW_ARRAY_SIZE, (SQLPOINTER)(intptr_t )fetchSize, 0 );
40974071 SQLSetStmtAttr_ptr (hStmt, SQL_ATTR_ROWS_FETCHED_PTR, &numRowsFetched, 0 );
40984072
4099- ret = FetchBatchData (hStmt, buffers, columnNames, rows, numCols, numRowsFetched);
4073+ ret = FetchBatchData (hStmt, buffers, columnNames, rows, numCols, numRowsFetched, lobColumns );
41004074 if (!SQL_SUCCEEDED (ret) && ret != SQL_NO_DATA) {
41014075 LOG (" FetchMany_wrap: Error when fetching data - SQLRETURN=%d" , ret);
41024076 return ret;
@@ -4229,7 +4203,7 @@ SQLRETURN FetchAll_wrap(SqlHandlePtr StatementHandle, py::list& rows,
42294203
42304204 while (ret != SQL_NO_DATA) {
42314205 ret =
4232- FetchBatchData (hStmt, buffers, columnNames, rows, numCols, numRowsFetched);
4206+ FetchBatchData (hStmt, buffers, columnNames, rows, numCols, numRowsFetched, lobColumns );
42334207 if (!SQL_SUCCEEDED (ret) && ret != SQL_NO_DATA) {
42344208 LOG (" FetchAll_wrap: Error when fetching data - SQLRETURN=%d" , ret);
42354209 return ret;
0 commit comments