Skip to content

Commit 460cdd2

Browse files
Merge pull request #28 from bandaangosta/dataset_read_py3
Adds support for dataset reading (iter_rows) on Python 3
2 parents db2fc7e + 512416c commit 460cdd2

File tree

1 file changed

+21
-8
lines changed

1 file changed

+21
-8
lines changed

dataikuapi/utils.py

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
import itertools
66

77
if sys.version_info > (3,0):
8+
import codecs
9+
810
dku_basestring_type = str
911
dku_zip_longest = itertools.zip_longest
1012
else:
@@ -31,7 +33,7 @@ def next(self):
3133

3234
def __iter__(self):
3335
return self
34-
36+
3537
def none_if_throws(f):
3638
def aux(*args, **kargs):
3739
try:
@@ -51,7 +53,10 @@ def __init__(self, schema, csv_stream):
5153

5254
def iter_rows(self):
5355
def decode(x):
54-
return unicode(x, "utf8")
56+
if sys.version_info > (3,0):
57+
return x
58+
else:
59+
return unicode(x, "utf8")
5560

5661
def parse_iso_date(s):
5762
if s == "":
@@ -79,9 +84,17 @@ def str_to_bool(s):
7984
CASTERS.get(col["type"], decode) for col in schema
8085
]
8186
with closing(self.csv_stream) as r:
82-
for uncasted_tuple in csv.reader(r.raw,
83-
delimiter='\t',
84-
quotechar='"',
85-
doublequote=True):
86-
yield [none_if_throws(caster)(val)
87-
for (caster, val) in dku_zip_longest(casters, uncasted_tuple)]
87+
if sys.version_info > (3,0):
88+
for uncasted_tuple in csv.reader(codecs.iterdecode(r.raw, 'utf-8'),
89+
delimiter='\t',
90+
quotechar='"',
91+
doublequote=True):
92+
yield [none_if_throws(caster)(val)
93+
for (caster, val) in dku_zip_longest(casters, uncasted_tuple)]
94+
else:
95+
for uncasted_tuple in csv.reader(r.raw,
96+
delimiter='\t',
97+
quotechar='"',
98+
doublequote=True):
99+
yield [none_if_throws(caster)(val)
100+
for (caster, val) in dku_zip_longest(casters, uncasted_tuple)]

0 commit comments

Comments
 (0)