Skip to content

Commit 5166365

Browse files
NiallEgansusodapop
authored andcommitted
Revert "Improve deserialisation in client"
This reverts commit a4e784914fa1c9951f7ac2d44cbce738106a2ac2.
1 parent caa5028 commit 5166365

File tree

2 files changed

+25
-18
lines changed

2 files changed

+25
-18
lines changed

cmdexec/clients/python/src/databricks/sql/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ def __eq__(self, other):
99
return other in self.values
1010

1111
def __repr__(self):
12-
return "DBAPITypeObject({values})".format(values=self.values)
12+
return "DBAPITypeObject(%s)" % self.values
1313

1414

1515
STRING = _DBAPITypeObject('string')

cmdexec/clients/python/src/databricks/sql/client.py

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
from typing import Dict, Tuple, List, Optional, Any
1111

1212
import grpc
13-
import pandas
1413
import pyarrow
1514

1615
from databricks.sql.errors import OperationalError, InterfaceError, DatabaseError, Error, DataError
@@ -27,9 +26,8 @@
2726

2827

2928
def _parse_timestamp(value):
30-
if type(value) is datetime.datetime or type(value) is pandas.Timestamp:
31-
# The cmd exec server will return a native datetime / timestamp, so no further parsing is
32-
# needed
29+
if type(value) is datetime.datetime:
30+
# The cmd exec server will return a datetime.datetime, so no further parsing is needed
3331
return value
3432
elif value:
3533
match = _TIMESTAMP_PATTERN.match(value)
@@ -40,13 +38,20 @@ def _parse_timestamp(value):
4038
value = match.group()
4139
else:
4240
format = '%Y-%m-%d %H:%M:%S'
43-
return pandas.to_datetime(datetime.datetime.strptime(value, format))
41+
value = datetime.datetime.strptime(value, format)
42+
return value
4443
else:
4544
raise Exception('Cannot convert "{}" into a datetime'.format(value))
4645
else:
4746
return None
4847

4948

49+
TYPES_CONVERTER = {
50+
"decimal": Decimal,
51+
"timestamp": _parse_timestamp,
52+
}
53+
54+
5055
class Connection:
5156
def __init__(self,
5257
server_hostname: str,
@@ -574,19 +579,21 @@ def _fill_results_buffer(self):
574579
self.has_more_rows = has_more_rows
575580
self.description = description
576581

582+
@staticmethod
583+
def parse_type(type_, value):
584+
converter = TYPES_CONVERTER.get(type_)
585+
if converter:
586+
return converter(value)
587+
else:
588+
return value
589+
577590
def _convert_arrow_table(self, table):
578-
df = table.to_pandas()
579-
for (i, col) in enumerate(df.columns):
580-
# Check for 0 because .dt doesn't work on empty series
581-
if self.description[i][1] == 'timestamp' and len(df) > 0:
582-
# We store the dtype as object so we don't use the pandas datetime dtype but
583-
# a native datetime.datetime
584-
timestamp_col = df[col].apply(_parse_timestamp)
585-
df[col] = pandas.Series(timestamp_col.dt.to_pydatetime(), dtype='object')
586-
elif self.description[i][1] == 'decimal':
587-
df[col] = pandas.Series(df[col].apply(Decimal), dtype='object')
588-
# Replace NaNs with None to maintain backwards compatibility
589-
return df.where(pandas.notnull(df), None).values.tolist()
591+
n_rows, _ = table.shape
592+
list_repr = [[
593+
self.parse_type(self.description[col_index][1], col[row_index].as_py())
594+
for col_index, col in enumerate(table.itercolumns())
595+
] for row_index in range(n_rows)]
596+
return list_repr
590597

591598
def fetchmany_arrow(self, n_rows: int) -> pyarrow.Table:
592599
"""

0 commit comments

Comments
 (0)