databricks
diff --git a/‎cmdexec/clients/python/setup.py‎
Lines changed: 1 addition & 1 deletion b/‎cmdexec/clients/python/setup.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cmdexec/clients/python/src/databricks/sql/client.py‎
Lines changed: 53 additions & 14 deletions b/‎cmdexec/clients/python/src/databricks/sql/client.py‎
Lines changed: 53 additions & 14 deletions
diff --git a/‎cmdexec/clients/python/src/databricks/sql/types.py‎
Lines changed: 195 additions & 0 deletions b/‎cmdexec/clients/python/src/databricks/sql/types.py‎
Lines changed: 195 additions & 0 deletions
@@ -5,6 +5,6 @@
     version="2.0.0rc2",
     package_dir={"": "src"},
     packages=setuptools.find_packages(where="src"),
-    install_requires=["pyarrow", 'thrift>=0.13.0', "pandas"],
+    install_requires=["pyarrow", 'thrift>=0.13.0', "pandas>=1.0.0"],
     author="Databricks",
 )
@@ -5,12 +5,14 @@
 import re
 from typing import Dict, Tuple, List, Optional, Any
 
+import pandas
 import pyarrow
 
 from databricks.sql import USER_AGENT_NAME, __version__
 from databricks.sql import *
 from databricks.sql.thrift_backend import ThriftBackend
 from databricks.sql.utils import ExecuteResponse, ParamEscaper
+from databricks.sql.types import Row
 
 logger = logging.getLogger(__name__)
 
@@ -67,9 +69,13 @@ def __init__(self,
         # _socket_timeout
         #  The timeout in seconds for socket send, recv and connect operations. Defaults to None for
         #  no timeout. Should be a positive float or integer.
+        # _disable_pandas
+        #  In case the deserialisation through pandas causes any issues, it can be disabled with
+        #  this flag.
 
         self.host = server_hostname
         self.port = kwargs.get("_port", 443)
+        self.disable_pandas = kwargs.get("_disable_pandas", False)
 
         authorization_header = []
         if kwargs.get("_username") and kwargs.get("_password"):
@@ -324,7 +330,7 @@ def columns(self,
                                            self.buffer_size_bytes, self.arraysize)
         return self
 
-    def fetchall(self) -> List[Tuple]:
+    def fetchall(self) -> List[Row]:
         """
         Fetch all (remaining) rows of a query result, returning them as a sequence of sequences.
 
@@ -337,7 +343,7 @@ def fetchall(self) -> List[Tuple]:
         else:
             raise Error("There is no active result set")
 
-    def fetchone(self) -> Tuple:
+    def fetchone(self) -> Optional[Row]:
         """
         Fetch the next row of a query result set, returning a single sequence, or ``None`` when
         no more data is available.
@@ -351,7 +357,7 @@ def fetchone(self) -> Tuple:
         else:
             raise Error("There is no active result set")
 
-    def fetchmany(self, size: int) -> List[Tuple]:
+    def fetchmany(self, size: int) -> List[Row]:
         """
         Fetch the next set of rows of a query result, returning a sequence of sequences (e.g. a
         list of tuples).
@@ -373,14 +379,14 @@ def fetchmany(self, size: int) -> List[Tuple]:
         else:
             raise Error("There is no active result set")
 
-    def fetchall_arrow(self):
+    def fetchall_arrow(self) -> pyarrow.Table:
         self._check_not_closed()
         if self.active_result_set:
             return self.active_result_set.fetchall_arrow()
         else:
             raise Error("There is no active result set")
 
-    def fetchmany_arrow(self, size):
+    def fetchmany_arrow(self, size) -> pyarrow.Table:
         self._check_not_closed()
         if self.active_result_set:
             return self.active_result_set.fetchmany_arrow(size)
@@ -505,10 +511,43 @@ def _fill_results_buffer(self):
         self.has_more_rows = has_more_rows
 
     def _convert_arrow_table(self, table):
-        n_rows, _ = table.shape
-        list_repr = [[col[row_index].as_py() for col in table.itercolumns()]
-                     for row_index in range(n_rows)]
-        return list_repr
+        column_names = [c[0] for c in self.description]
+        ResultRow = Row(*column_names)
+
+        if self.connection.disable_pandas is True:
+            return [ResultRow(*[v.as_py() for v in r]) for r in zip(*table.itercolumns())]
+
+        # Need to use nullable types, as otherwise type can change when there are missing values.
+        # See https://arrow.apache.org/docs/python/pandas.html#nullable-types
+        # NOTE: This api is epxerimental https://pandas.pydata.org/pandas-docs/stable/user_guide/integer_na.html
+        dtype_mapping = {
+            pyarrow.int8(): pandas.Int8Dtype(),
+            pyarrow.int16(): pandas.Int16Dtype(),
+            pyarrow.int32(): pandas.Int32Dtype(),
+            pyarrow.int64(): pandas.Int64Dtype(),
+            pyarrow.uint8(): pandas.UInt8Dtype(),
+            pyarrow.uint16(): pandas.UInt16Dtype(),
+            pyarrow.uint32(): pandas.UInt32Dtype(),
+            pyarrow.uint64(): pandas.UInt64Dtype(),
+            pyarrow.bool_(): pandas.BooleanDtype(),
+            pyarrow.float32(): pandas.Float32Dtype(),
+            pyarrow.float64(): pandas.Float64Dtype(),
+            pyarrow.string(): pandas.StringDtype(),
+        }
+
+        # Need to rename columns, as the to_pandas function cannot handle duplicate column names
+        table_renamed = table.rename_columns([str(c) for c in range(table.num_columns)])
+        df = table_renamed.to_pandas(types_mapper=dtype_mapping.get)
+
+        for (i, col) in enumerate(df.columns):
+            # Check for 0 because .dt doesn't work on empty series
+            if self.description[i][1] == 'timestamp' and len(df) > 0:
+                # We store the dtype as object so we don't use the pandas datetime dtype but
+                # a native datetime.datetime
+                df[col] = pandas.Series(df[col].dt.to_pydatetime(), dtype='object')
+
+        res = df.to_numpy(na_value=None)
+        return [ResultRow(*v) for v in res]
 
     @property
     def rownumber(self):
@@ -548,7 +587,7 @@ def fetchall_arrow(self) -> pyarrow.Table:
 
         return results
 
-    def fetchone(self) -> Optional[Tuple]:
+    def fetchone(self) -> Optional[Row]:
         """
         Fetch the next row of a query result set, returning a single sequence,
         or None when no more data is available.
@@ -559,15 +598,15 @@ def fetchone(self) -> Optional[Tuple]:
         else:
             return None
 
-    def fetchall(self) -> List[Tuple]:
+    def fetchall(self) -> List[Row]:
         """
-        Fetch all (remaining) rows of a query result, returning them as a list of lists.
+        Fetch all (remaining) rows of a query result, returning them as a list of rows.
         """
         return self._convert_arrow_table(self.fetchall_arrow())
 
-    def fetchmany(self, size: int) -> List[Tuple]:
+    def fetchmany(self, size: int) -> List[Row]:
         """
-        Fetch the next set of rows of a query result, returning a list of lists.
+        Fetch the next set of rows of a query result, returning a list of rows.
 
         An empty sequence is returned when no more rows are available.
         """
 
@@ -0,0 +1,195 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Row class was taken from Apache Spark pyspark.
+
+from typing import (Any, Dict, List, Optional, Tuple, Union)
+
+
+class Row(tuple):
+    """
+    A row in a query result.
+    The fields in it can be accessed:
+
+    * like attributes (``row.key``)
+    * like dictionary values (``row[key]``)
+
+    ``key in row`` will search through row keys.
+
+    Row can be used to create a row object by using named arguments.
+    It is not allowed to omit a named argument to represent that the value is
+    None or missing. This should be explicitly set to None in this case.
+
+    Examples
+    --------
+    >>> row = Row(name="Alice", age=11)
+    >>> row
+    Row(name='Alice', age=11)
+    >>> row['name'], row['age']
+    ('Alice', 11)
+    >>> row.name, row.age
+    ('Alice', 11)
+    >>> 'name' in row
+    True
+    >>> 'wrong_key' in row
+    False
+
+    Row also can be used to create another Row like class, then it
+    could be used to create Row objects, such as
+
+    >>> Person = Row("name", "age")
+    >>> Person
+    <Row('name', 'age')>
+    >>> 'name' in Person
+    True
+    >>> 'wrong_key' in Person
+    False
+    >>> Person("Alice", 11)
+    Row(name='Alice', age=11)
+
+    This form can also be used to create rows as tuple values, i.e. with unnamed
+    fields.
+
+    >>> row1 = Row("Alice", 11)
+    >>> row2 = Row(name="Alice", age=11)
+    >>> row1 == row2
+    True
+    """
+
+    def __new__(cls, *args: Optional[str], **kwargs: Optional[Any]) -> "Row":
+        if args and kwargs:
+            raise ValueError("Can not use both args " "and kwargs to create Row")
+        if kwargs:
+            # create row objects
+            row = tuple.__new__(cls, list(kwargs.values()))
+            row.__fields__ = list(kwargs.keys())
+            return row
+        else:
+            # create row class or objects
+            return tuple.__new__(cls, args)
+
+    def asDict(self, recursive: bool = False) -> Dict[str, Any]:
+        """
+        Return as a dict
+
+        Parameters
+        ----------
+        recursive : bool, optional
+            turns the nested Rows to dict (default: False).
+
+        Notes
+        -----
+        If a row contains duplicate field names, e.g., the rows of a join
+        between two dataframes that both have the fields of same names,
+        one of the duplicate fields will be selected by ``asDict``. ``__getitem__``
+        will also return one of the duplicate fields, however returned value might
+        be different to ``asDict``.
+
+        Examples
+        --------
+        >>> Row(name="Alice", age=11).asDict() == {'name': 'Alice', 'age': 11}
+        True
+        >>> row = Row(key=1, value=Row(name='a', age=2))
+        >>> row.asDict() == {'key': 1, 'value': Row(name='a', age=2)}
+        True
+        >>> row.asDict(True) == {'key': 1, 'value': {'name': 'a', 'age': 2}}
+        True
+        """
+        if not hasattr(self, "__fields__"):
+            raise TypeError("Cannot convert a Row class into dict")
+
+        if recursive:
+
+            def conv(obj: Any) -> Any:
+                if isinstance(obj, Row):
+                    return obj.asDict(True)
+                elif isinstance(obj, list):
+                    return [conv(o) for o in obj]
+                elif isinstance(obj, dict):
+                    return dict((k, conv(v)) for k, v in obj.items())
+                else:
+                    return obj
+
+            return dict(zip(self.__fields__, (conv(o) for o in self)))
+        else:
+            return dict(zip(self.__fields__, self))
+
+    def __contains__(self, item: Any) -> bool:
+        if hasattr(self, "__fields__"):
+            return item in self.__fields__
+        else:
+            return super(Row, self).__contains__(item)
+
+    # let object acts like class
+    def __call__(self, *args: Any) -> "Row":
+        """create new Row object"""
+        if len(args) > len(self):
+            raise ValueError("Can not create Row with fields %s, expected %d values "
+                             "but got %s" % (self, len(self), args))
+        return _create_row(self, args)
+
+    def __getitem__(self, item: Any) -> Any:
+        if isinstance(item, (int, slice)):
+            return super(Row, self).__getitem__(item)
+        try:
+            # it will be slow when it has many fields,
+            # but this will not be used in normal cases
+            idx = self.__fields__.index(item)
+            return super(Row, self).__getitem__(idx)
+        except IndexError:
+            raise KeyError(item)
+        except ValueError:
+            raise ValueError(item)
+
+    def __getattr__(self, item: str) -> Any:
+        if item.startswith("__"):
+            raise AttributeError(item)
+        try:
+            # it will be slow when it has many fields,
+            # but this will not be used in normal cases
+            idx = self.__fields__.index(item)
+            return self[idx]
+        except IndexError:
+            raise AttributeError(item)
+        except ValueError:
+            raise AttributeError(item)
+
+    def __setattr__(self, key: Any, value: Any) -> None:
+        if key != "__fields__":
+            raise RuntimeError("Row is read-only")
+        self.__dict__[key] = value
+
+    def __reduce__(self, ) -> Union[str, Tuple[Any, ...]]:
+        """Returns a tuple so Python knows how to pickle Row."""
+        if hasattr(self, "__fields__"):
+            return (_create_row, (self.__fields__, tuple(self)))
+        else:
+            return tuple.__reduce__(self)
+
+    def __repr__(self) -> str:
+        """Printable representation of Row used in Python REPL."""
+        if hasattr(self, "__fields__"):
+            return "Row(%s)" % ", ".join("%s=%r" % (k, v)
+                                         for k, v in zip(self.__fields__, tuple(self)))
+        else:
+            return "<Row(%s)>" % ", ".join("%r" % field for field in self)
+
+
+def _create_row(fields: Union["Row", List[str]],
+                values: Union[Tuple[Any, ...], List[Any]]) -> "Row":
+    row = Row(*values)
+    row.__fields__ = fields
+    return row
Original file line number	Diff line number	Diff line change
`@@ -5,6 +5,6 @@`
`5`	`5`	`version="2.0.0rc2",`
`6`	`6`	`package_dir={"": "src"},`
`7`	`7`	`packages=setuptools.find_packages(where="src"),`
`8`		`- install_requires=["pyarrow", 'thrift>=0.13.0', "pandas"],`
	`8`	`+ install_requires=["pyarrow", 'thrift>=0.13.0', "pandas>=1.0.0"],`
`9`	`9`	`author="Databricks",`
`10`	`10`	`)`