Skip to content

Commit 4e7dd18

Browse files
committed
Add test
1 parent 10a5305 commit 4e7dd18

File tree

1 file changed

+21
-0
lines changed

1 file changed

+21
-0
lines changed

pandas/tests/io/parser/common/test_chunksize.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -381,3 +381,24 @@ def test_chunksize_second_block_shorter(all_parsers):
381381

382382
for i, result in enumerate(result_chunks):
383383
tm.assert_frame_equal(result, expected_frames[i])
384+
385+
386+
def test_chunksize_skip_bad_line_with_bad_line_first_in_the_chunk(all_parsers):
387+
parser = all_parsers
388+
data = "a,b\n1,2\n3\n4,5,extra\n6,7"
389+
390+
if parser.engine == "pyarrow":
391+
msg = "The 'chunksize' option is not supported with the 'pyarrow' engine"
392+
with pytest.raises(ValueError, match=msg):
393+
parser.read_csv(StringIO(data), chunksize=2, on_bad_lines="skip")
394+
return
395+
396+
result_chunks = parser.read_csv(StringIO(data), chunksize=2, on_bad_lines="skip")
397+
398+
expected_frames = [
399+
DataFrame({"a": [1, 3], "b": [2, np.nan]}),
400+
DataFrame({"a": [6], "b": [7]}, index=[2]),
401+
]
402+
403+
for i, result in enumerate(result_chunks):
404+
tm.assert_frame_equal(result, expected_frames[i])

0 commit comments

Comments
 (0)