Skip to content

Commit 4aa0ff1

Browse files
committed
hackish solution
1 parent 2686655 commit 4aa0ff1

File tree

1 file changed

+40
-3
lines changed

1 file changed

+40
-3
lines changed

pandas/_libs/parsers.pyx

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1069,6 +1069,10 @@ cdef class TextReader:
10691069
else:
10701070
col_res = None
10711071
for dt in self.dtype_cast_order:
1072+
if (dt.kind in "iu" and
1073+
self._column_has_float(i, start, end, na_filter, na_hashset)):
1074+
continue
1075+
10721076
try:
10731077
col_res, na_count = self._convert_with_dtype(
10741078
dt, i, start, end, na_filter, 0, na_hashset, na_fset)
@@ -1081,9 +1085,9 @@ cdef class TextReader:
10811085
np.dtype("object"), i, start, end, 0,
10821086
0, na_hashset, na_fset)
10831087
except OverflowError:
1084-
# Try other dtypes that can accommodate large numbers.
1085-
# (e.g. float and string)
1086-
pass
1088+
col_res, na_count = self._convert_with_dtype(
1089+
np.dtype("object"), i, start, end, na_filter,
1090+
0, na_hashset, na_fset)
10871091
if col_res is not None:
10881092
break
10891093

@@ -1341,6 +1345,39 @@ cdef class TextReader:
13411345
else:
13421346
return None
13431347

1348+
cdef bint _column_has_float(self, int64_t col,
1349+
int64_t start, int64_t end,
1350+
bint na_filter, kh_str_starts_t *na_hashset):
1351+
"""Check if the column contains any float number."""
1352+
cdef:
1353+
Py_ssize_t i, lines = end - start
1354+
coliter_t it
1355+
const char *word = NULL
1356+
const char *ch
1357+
bint found_float = False
1358+
1359+
coliter_setup(&it, self.parser, col, start)
1360+
1361+
for i in range(lines):
1362+
COLITER_NEXT(it, word)
1363+
1364+
if na_filter and kh_get_str_starts_item(na_hashset, word):
1365+
continue
1366+
1367+
ch = word
1368+
while ch[0] != b"\0":
1369+
token_indicates_float = (ch[0] == self.parser.decimal
1370+
or ch[0] == b"e"
1371+
or ch[0] == b"E")
1372+
if token_indicates_float:
1373+
found_float = True
1374+
break
1375+
ch += 1
1376+
1377+
if found_float:
1378+
break
1379+
1380+
return found_float
13441381

13451382
# Factor out code common to TextReader.__dealloc__ and TextReader.close
13461383
# It cannot be a class method, since calling self.close() in __dealloc__

0 commit comments

Comments
 (0)