Skip to content

Commit b3f7932

Browse files
committed
fix: accept \r in unquoted fields when row_sep excludes \r
Fixes #60
1 parent 9417e55 commit b3f7932

File tree

4 files changed

+92
-25
lines changed

4 files changed

+92
-25
lines changed

lib/csv/parser.rb

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -675,7 +675,10 @@ def prepare_quoted
675675
def prepare_unquoted
676676
return if @quote_character.nil?
677677

678-
no_unquoted_values = "\r\n".encode(@encoding)
678+
# Only exclude characters that are actually part of the row separator
679+
# instead of hardcoding "\r\n"
680+
row_separator_chars = @row_separator.chars.map { |c| Regexp.escape(c) }.join
681+
no_unquoted_values = row_separator_chars.encode(@encoding)
679682
no_unquoted_values << @escaped_first_column_separator
680683
unless @liberal_parsing
681684
no_unquoted_values << @escaped_quote_character

test/csv/parse/test_general.rb

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -139,27 +139,24 @@ def test_non_regex_edge_cases
139139
end
140140

141141
def test_malformed_csv_cr_first_line
142-
error = assert_raise(CSV::MalformedCSVError) do
143-
CSV.parse_line("1,2\r,3", row_sep: "\n")
144-
end
145-
assert_equal("Unquoted fields do not allow new line <\"\\r\"> in line 1.",
146-
error.message)
142+
# With the fix for accepting \r without quote when row separator doesn't include \r,
143+
# this should now parse successfully when row_sep is "\n"
144+
result = CSV.parse_line("1,2\r,3", row_sep: "\n")
145+
assert_equal(["1", "2\r", "3"], result)
147146
end
148147

149148
def test_malformed_csv_cr_middle_line
150-
csv = <<-CSV
151-
line,1,abc
152-
line,2,"def\nghi"
153-
154-
line,4,some\rjunk
155-
line,5,jkl
156-
CSV
157-
158-
error = assert_raise(CSV::MalformedCSVError) do
159-
CSV.parse(csv)
160-
end
161-
assert_equal("Unquoted fields do not allow new line <\"\\r\"> in line 4.",
162-
error.message)
149+
# With the fix for accepting \r without quote when row separator doesn't include \r,
150+
# this should now parse successfully (default row_sep is "\n")
151+
csv = "line,1,abc\nline,2,\"def\nghi\"\nline,4,some\rjunk\nline,5,jkl\n"
152+
result = CSV.parse(csv)
153+
expected = [
154+
["line", "1", "abc"],
155+
["line", "2", "def\nghi"],
156+
["line", "4", "some\rjunk"],
157+
["line", "5", "jkl"]
158+
]
159+
assert_equal(expected, result)
163160
end
164161

165162
def test_malformed_csv_unclosed_quote

test/csv/parse/test_invalid.rb

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,11 @@
55

66
class TestCSVParseInvalid < Test::Unit::TestCase
77
def test_no_column_mixed_new_lines
8-
error = assert_raise(CSV::MalformedCSVError) do
9-
CSV.parse("\n" +
10-
"\r")
11-
end
12-
assert_equal("New line must be <\"\\n\"> not <\"\\r\"> in line 2.",
13-
error.message)
8+
# With the fix for accepting \r without quote when row separator doesn't include \r,
9+
# this should now parse successfully (default row_sep is "\n")
10+
result = CSV.parse("\n" + "\r")
11+
# This should parse as an empty first row and a second row with just "\r"
12+
assert_equal([[], ["\r"]], result)
1413
end
1514

1615
def test_ignore_invalid_line

test/csv/parse/test_unquoted_cr.rb

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
# -*- coding: utf-8 -*-
2+
# frozen_string_literal: false
3+
4+
require_relative "../helper"
5+
6+
class TestCSVParseUnquotedCR < Test::Unit::TestCase
7+
extend DifferentOFS
8+
9+
def test_accept_cr_in_unquoted_field_when_row_separator_is_lf_only
10+
# When row separator is just \n, \r should be allowed in unquoted fields
11+
data = "field1,field\rwith\rcr,field3\nrow2,data,here\n"
12+
expected = [
13+
["field1", "field\rwith\rcr", "field3"],
14+
["row2", "data", "here"]
15+
]
16+
assert_equal(expected, CSV.parse(data, row_sep: "\n"))
17+
end
18+
19+
def test_accept_cr_in_unquoted_field_when_row_separator_is_custom
20+
# When row separator is custom (like "|"), \r should be allowed in unquoted fields
21+
data = "field1,field\rwith\rcr,field3|row2,data,here|"
22+
expected = [
23+
["field1", "field\rwith\rcr", "field3"],
24+
["row2", "data", "here"]
25+
]
26+
assert_equal(expected, CSV.parse(data, row_sep: "|"))
27+
end
28+
29+
def test_reject_cr_when_row_separator_includes_cr
30+
# When row separator includes \r (like \r\n), \r should still be rejected in unquoted fields
31+
data = "field1,field2,field3\r\nrow2,data,here\r\n"
32+
expected = [
33+
["field1", "field2", "field3"],
34+
["row2", "data", "here"]
35+
]
36+
assert_equal(expected, CSV.parse(data, row_sep: "\r\n"))
37+
end
38+
39+
def test_reject_cr_when_row_separator_is_cr_only
40+
# When row separator is just \r, \r should be rejected in unquoted fields
41+
data = "field1,field2,field3\rrow2,data,here\r"
42+
expected = [
43+
["field1", "field2", "field3"],
44+
["row2", "data", "here"]
45+
]
46+
assert_equal(expected, CSV.parse(data, row_sep: "\r"))
47+
end
48+
49+
def test_liberal_parsing_with_custom_row_separator
50+
# Test liberal parsing mode with custom row separator
51+
data = "field1,field\rwith\rcr,field3|row2,data,here|"
52+
expected = [
53+
["field1", "field\rwith\rcr", "field3"],
54+
["row2", "data", "here"]
55+
]
56+
assert_equal(expected, CSV.parse(data, row_sep: "|", liberal_parsing: true))
57+
end
58+
59+
def test_quoted_fields_with_cr_and_custom_row_separator
60+
# Quoted fields should always allow \r regardless of row separator
61+
data = "field1,\"field\rwith\rcr\",field3|row2,data,here|"
62+
expected = [
63+
["field1", "field\rwith\rcr", "field3"],
64+
["row2", "data", "here"]
65+
]
66+
assert_equal(expected, CSV.parse(data, row_sep: "|"))
67+
end
68+
end

0 commit comments

Comments
 (0)