From b3f7932d0b13bcb37d44f7f5dae2fc33b9d50f59 Mon Sep 17 00:00:00 2001 From: jsxs Date: Thu, 5 Jun 2025 14:30:01 +0900 Subject: [PATCH 01/12] fix: accept \r in unquoted fields when row_sep excludes \r Fixes #60 --- lib/csv/parser.rb | 5 ++- test/csv/parse/test_general.rb | 33 +++++++-------- test/csv/parse/test_invalid.rb | 11 +++-- test/csv/parse/test_unquoted_cr.rb | 68 ++++++++++++++++++++++++++++++ 4 files changed, 92 insertions(+), 25 deletions(-) create mode 100644 test/csv/parse/test_unquoted_cr.rb diff --git a/lib/csv/parser.rb b/lib/csv/parser.rb index 4a74e40d..a7dd9632 100644 --- a/lib/csv/parser.rb +++ b/lib/csv/parser.rb @@ -675,7 +675,10 @@ def prepare_quoted def prepare_unquoted return if @quote_character.nil? - no_unquoted_values = "\r\n".encode(@encoding) + # Only exclude characters that are actually part of the row separator + # instead of hardcoding "\r\n" + row_separator_chars = @row_separator.chars.map { |c| Regexp.escape(c) }.join + no_unquoted_values = row_separator_chars.encode(@encoding) no_unquoted_values << @escaped_first_column_separator unless @liberal_parsing no_unquoted_values << @escaped_quote_character diff --git a/test/csv/parse/test_general.rb b/test/csv/parse/test_general.rb index 7ec0994e..3dec620e 100644 --- a/test/csv/parse/test_general.rb +++ b/test/csv/parse/test_general.rb @@ -139,27 +139,24 @@ def test_non_regex_edge_cases end def test_malformed_csv_cr_first_line - error = assert_raise(CSV::MalformedCSVError) do - CSV.parse_line("1,2\r,3", row_sep: "\n") - end - assert_equal("Unquoted fields do not allow new line <\"\\r\"> in line 1.", - error.message) + # With the fix for accepting \r without quote when row separator doesn't include \r, + # this should now parse successfully when row_sep is "\n" + result = CSV.parse_line("1,2\r,3", row_sep: "\n") + assert_equal(["1", "2\r", "3"], result) end def test_malformed_csv_cr_middle_line - csv = <<-CSV -line,1,abc -line,2,"def\nghi" - -line,4,some\rjunk -line,5,jkl - CSV - - error = assert_raise(CSV::MalformedCSVError) do - CSV.parse(csv) - end - assert_equal("Unquoted fields do not allow new line <\"\\r\"> in line 4.", - error.message) + # With the fix for accepting \r without quote when row separator doesn't include \r, + # this should now parse successfully (default row_sep is "\n") + csv = "line,1,abc\nline,2,\"def\nghi\"\nline,4,some\rjunk\nline,5,jkl\n" + result = CSV.parse(csv) + expected = [ + ["line", "1", "abc"], + ["line", "2", "def\nghi"], + ["line", "4", "some\rjunk"], + ["line", "5", "jkl"] + ] + assert_equal(expected, result) end def test_malformed_csv_unclosed_quote diff --git a/test/csv/parse/test_invalid.rb b/test/csv/parse/test_invalid.rb index ddb59e2b..8e5d1b08 100644 --- a/test/csv/parse/test_invalid.rb +++ b/test/csv/parse/test_invalid.rb @@ -5,12 +5,11 @@ class TestCSVParseInvalid < Test::Unit::TestCase def test_no_column_mixed_new_lines - error = assert_raise(CSV::MalformedCSVError) do - CSV.parse("\n" + - "\r") - end - assert_equal("New line must be <\"\\n\"> not <\"\\r\"> in line 2.", - error.message) + # With the fix for accepting \r without quote when row separator doesn't include \r, + # this should now parse successfully (default row_sep is "\n") + result = CSV.parse("\n" + "\r") + # This should parse as an empty first row and a second row with just "\r" + assert_equal([[], ["\r"]], result) end def test_ignore_invalid_line diff --git a/test/csv/parse/test_unquoted_cr.rb b/test/csv/parse/test_unquoted_cr.rb new file mode 100644 index 00000000..4381de0f --- /dev/null +++ b/test/csv/parse/test_unquoted_cr.rb @@ -0,0 +1,68 @@ +# -*- coding: utf-8 -*- +# frozen_string_literal: false + +require_relative "../helper" + +class TestCSVParseUnquotedCR < Test::Unit::TestCase + extend DifferentOFS + + def test_accept_cr_in_unquoted_field_when_row_separator_is_lf_only + # When row separator is just \n, \r should be allowed in unquoted fields + data = "field1,field\rwith\rcr,field3\nrow2,data,here\n" + expected = [ + ["field1", "field\rwith\rcr", "field3"], + ["row2", "data", "here"] + ] + assert_equal(expected, CSV.parse(data, row_sep: "\n")) + end + + def test_accept_cr_in_unquoted_field_when_row_separator_is_custom + # When row separator is custom (like "|"), \r should be allowed in unquoted fields + data = "field1,field\rwith\rcr,field3|row2,data,here|" + expected = [ + ["field1", "field\rwith\rcr", "field3"], + ["row2", "data", "here"] + ] + assert_equal(expected, CSV.parse(data, row_sep: "|")) + end + + def test_reject_cr_when_row_separator_includes_cr + # When row separator includes \r (like \r\n), \r should still be rejected in unquoted fields + data = "field1,field2,field3\r\nrow2,data,here\r\n" + expected = [ + ["field1", "field2", "field3"], + ["row2", "data", "here"] + ] + assert_equal(expected, CSV.parse(data, row_sep: "\r\n")) + end + + def test_reject_cr_when_row_separator_is_cr_only + # When row separator is just \r, \r should be rejected in unquoted fields + data = "field1,field2,field3\rrow2,data,here\r" + expected = [ + ["field1", "field2", "field3"], + ["row2", "data", "here"] + ] + assert_equal(expected, CSV.parse(data, row_sep: "\r")) + end + + def test_liberal_parsing_with_custom_row_separator + # Test liberal parsing mode with custom row separator + data = "field1,field\rwith\rcr,field3|row2,data,here|" + expected = [ + ["field1", "field\rwith\rcr", "field3"], + ["row2", "data", "here"] + ] + assert_equal(expected, CSV.parse(data, row_sep: "|", liberal_parsing: true)) + end + + def test_quoted_fields_with_cr_and_custom_row_separator + # Quoted fields should always allow \r regardless of row separator + data = "field1,\"field\rwith\rcr\",field3|row2,data,here|" + expected = [ + ["field1", "field\rwith\rcr", "field3"], + ["row2", "data", "here"] + ] + assert_equal(expected, CSV.parse(data, row_sep: "|")) + end +end From 440c545fada134c6a9a2d8f35ca3125fe616ea3b Mon Sep 17 00:00:00 2001 From: jsxs Date: Thu, 5 Jun 2025 14:30:01 +0900 Subject: [PATCH 02/12] fix: accept \r in unquoted fields when row_sep excludes \r - simplified implementation based on review feedback --- .gitignore | 1 + lib/csv/parser.rb | 4 +- test/csv/parse/test_general.rb | 33 +++++++-------- test/csv/parse/test_invalid.rb | 11 +++-- test/csv/parse/test_unquoted_cr.rb | 68 ++++++++++++++++++++++++++++++ 5 files changed, 92 insertions(+), 25 deletions(-) create mode 100644 test/csv/parse/test_unquoted_cr.rb diff --git a/.gitignore b/.gitignore index a57ca09e..45a01580 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ /pkg/ /spec/reports/ /tmp/ +/vendor diff --git a/lib/csv/parser.rb b/lib/csv/parser.rb index 4a74e40d..f3d896eb 100644 --- a/lib/csv/parser.rb +++ b/lib/csv/parser.rb @@ -675,7 +675,9 @@ def prepare_quoted def prepare_unquoted return if @quote_character.nil? - no_unquoted_values = "\r\n".encode(@encoding) + # Only exclude characters that are actually part of the row separator + # instead of hardcoding "\r\n" + no_unquoted_values = Regexp.escape(@row_separator).encode(@encoding) no_unquoted_values << @escaped_first_column_separator unless @liberal_parsing no_unquoted_values << @escaped_quote_character diff --git a/test/csv/parse/test_general.rb b/test/csv/parse/test_general.rb index 7ec0994e..3dec620e 100644 --- a/test/csv/parse/test_general.rb +++ b/test/csv/parse/test_general.rb @@ -139,27 +139,24 @@ def test_non_regex_edge_cases end def test_malformed_csv_cr_first_line - error = assert_raise(CSV::MalformedCSVError) do - CSV.parse_line("1,2\r,3", row_sep: "\n") - end - assert_equal("Unquoted fields do not allow new line <\"\\r\"> in line 1.", - error.message) + # With the fix for accepting \r without quote when row separator doesn't include \r, + # this should now parse successfully when row_sep is "\n" + result = CSV.parse_line("1,2\r,3", row_sep: "\n") + assert_equal(["1", "2\r", "3"], result) end def test_malformed_csv_cr_middle_line - csv = <<-CSV -line,1,abc -line,2,"def\nghi" - -line,4,some\rjunk -line,5,jkl - CSV - - error = assert_raise(CSV::MalformedCSVError) do - CSV.parse(csv) - end - assert_equal("Unquoted fields do not allow new line <\"\\r\"> in line 4.", - error.message) + # With the fix for accepting \r without quote when row separator doesn't include \r, + # this should now parse successfully (default row_sep is "\n") + csv = "line,1,abc\nline,2,\"def\nghi\"\nline,4,some\rjunk\nline,5,jkl\n" + result = CSV.parse(csv) + expected = [ + ["line", "1", "abc"], + ["line", "2", "def\nghi"], + ["line", "4", "some\rjunk"], + ["line", "5", "jkl"] + ] + assert_equal(expected, result) end def test_malformed_csv_unclosed_quote diff --git a/test/csv/parse/test_invalid.rb b/test/csv/parse/test_invalid.rb index ddb59e2b..8e5d1b08 100644 --- a/test/csv/parse/test_invalid.rb +++ b/test/csv/parse/test_invalid.rb @@ -5,12 +5,11 @@ class TestCSVParseInvalid < Test::Unit::TestCase def test_no_column_mixed_new_lines - error = assert_raise(CSV::MalformedCSVError) do - CSV.parse("\n" + - "\r") - end - assert_equal("New line must be <\"\\n\"> not <\"\\r\"> in line 2.", - error.message) + # With the fix for accepting \r without quote when row separator doesn't include \r, + # this should now parse successfully (default row_sep is "\n") + result = CSV.parse("\n" + "\r") + # This should parse as an empty first row and a second row with just "\r" + assert_equal([[], ["\r"]], result) end def test_ignore_invalid_line diff --git a/test/csv/parse/test_unquoted_cr.rb b/test/csv/parse/test_unquoted_cr.rb new file mode 100644 index 00000000..4381de0f --- /dev/null +++ b/test/csv/parse/test_unquoted_cr.rb @@ -0,0 +1,68 @@ +# -*- coding: utf-8 -*- +# frozen_string_literal: false + +require_relative "../helper" + +class TestCSVParseUnquotedCR < Test::Unit::TestCase + extend DifferentOFS + + def test_accept_cr_in_unquoted_field_when_row_separator_is_lf_only + # When row separator is just \n, \r should be allowed in unquoted fields + data = "field1,field\rwith\rcr,field3\nrow2,data,here\n" + expected = [ + ["field1", "field\rwith\rcr", "field3"], + ["row2", "data", "here"] + ] + assert_equal(expected, CSV.parse(data, row_sep: "\n")) + end + + def test_accept_cr_in_unquoted_field_when_row_separator_is_custom + # When row separator is custom (like "|"), \r should be allowed in unquoted fields + data = "field1,field\rwith\rcr,field3|row2,data,here|" + expected = [ + ["field1", "field\rwith\rcr", "field3"], + ["row2", "data", "here"] + ] + assert_equal(expected, CSV.parse(data, row_sep: "|")) + end + + def test_reject_cr_when_row_separator_includes_cr + # When row separator includes \r (like \r\n), \r should still be rejected in unquoted fields + data = "field1,field2,field3\r\nrow2,data,here\r\n" + expected = [ + ["field1", "field2", "field3"], + ["row2", "data", "here"] + ] + assert_equal(expected, CSV.parse(data, row_sep: "\r\n")) + end + + def test_reject_cr_when_row_separator_is_cr_only + # When row separator is just \r, \r should be rejected in unquoted fields + data = "field1,field2,field3\rrow2,data,here\r" + expected = [ + ["field1", "field2", "field3"], + ["row2", "data", "here"] + ] + assert_equal(expected, CSV.parse(data, row_sep: "\r")) + end + + def test_liberal_parsing_with_custom_row_separator + # Test liberal parsing mode with custom row separator + data = "field1,field\rwith\rcr,field3|row2,data,here|" + expected = [ + ["field1", "field\rwith\rcr", "field3"], + ["row2", "data", "here"] + ] + assert_equal(expected, CSV.parse(data, row_sep: "|", liberal_parsing: true)) + end + + def test_quoted_fields_with_cr_and_custom_row_separator + # Quoted fields should always allow \r regardless of row separator + data = "field1,\"field\rwith\rcr\",field3|row2,data,here|" + expected = [ + ["field1", "field\rwith\rcr", "field3"], + ["row2", "data", "here"] + ] + assert_equal(expected, CSV.parse(data, row_sep: "|")) + end +end From 5b8f693eeaf9c7889a4ef18bf2428b4f200121b1 Mon Sep 17 00:00:00 2001 From: jsxs Date: Thu, 5 Jun 2025 16:08:32 +0900 Subject: [PATCH 03/12] Update .gitignore --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index 45a01580..a57ca09e 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,3 @@ /pkg/ /spec/reports/ /tmp/ -/vendor From c237450f70ef1644a8ae99c6f0605c6a3830585e Mon Sep 17 00:00:00 2001 From: jsxs Date: Thu, 5 Jun 2025 16:34:38 +0900 Subject: [PATCH 04/12] style: fix indentation in prepare_unquoted method --- lib/csv/parser.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/csv/parser.rb b/lib/csv/parser.rb index f3d896eb..7dbdcb59 100644 --- a/lib/csv/parser.rb +++ b/lib/csv/parser.rb @@ -675,9 +675,9 @@ def prepare_quoted def prepare_unquoted return if @quote_character.nil? - # Only exclude characters that are actually part of the row separator - # instead of hardcoding "\r\n" - no_unquoted_values = Regexp.escape(@row_separator).encode(@encoding) + # Only exclude characters that are actually part of the row separator + # instead of hardcoding "\r\n" + no_unquoted_values = Regexp.escape(@row_separator).encode(@encoding) no_unquoted_values << @escaped_first_column_separator unless @liberal_parsing no_unquoted_values << @escaped_quote_character From dd88061410ef28f0fbe9a94fa4559a4687e30b01 Mon Sep 17 00:00:00 2001 From: jsxs Date: Thu, 5 Jun 2025 17:22:51 +0900 Subject: [PATCH 05/12] reviewers' feedback --- lib/csv/parser.rb | 3 ++- test/csv/parse/test_general.rb | 13 +++++++------ test/csv/parse/test_invalid.rb | 9 +-------- test/csv/parse/test_unquoted_cr.rb | 24 ++++++++++++------------ 4 files changed, 22 insertions(+), 27 deletions(-) diff --git a/lib/csv/parser.rb b/lib/csv/parser.rb index 7dbdcb59..a7dd9632 100644 --- a/lib/csv/parser.rb +++ b/lib/csv/parser.rb @@ -677,7 +677,8 @@ def prepare_unquoted # Only exclude characters that are actually part of the row separator # instead of hardcoding "\r\n" - no_unquoted_values = Regexp.escape(@row_separator).encode(@encoding) + row_separator_chars = @row_separator.chars.map { |c| Regexp.escape(c) }.join + no_unquoted_values = row_separator_chars.encode(@encoding) no_unquoted_values << @escaped_first_column_separator unless @liberal_parsing no_unquoted_values << @escaped_quote_character diff --git a/test/csv/parse/test_general.rb b/test/csv/parse/test_general.rb index 3dec620e..e6f43e6c 100644 --- a/test/csv/parse/test_general.rb +++ b/test/csv/parse/test_general.rb @@ -138,16 +138,12 @@ def test_non_regex_edge_cases end end - def test_malformed_csv_cr_first_line - # With the fix for accepting \r without quote when row separator doesn't include \r, - # this should now parse successfully when row_sep is "\n" + def test_unquoted_cr_with_lf_row_separator result = CSV.parse_line("1,2\r,3", row_sep: "\n") assert_equal(["1", "2\r", "3"], result) end - def test_malformed_csv_cr_middle_line - # With the fix for accepting \r without quote when row separator doesn't include \r, - # this should now parse successfully (default row_sep is "\n") + def test_unquoted_cr_in_middle_line csv = "line,1,abc\nline,2,\"def\nghi\"\nline,4,some\rjunk\nline,5,jkl\n" result = CSV.parse(csv) expected = [ @@ -159,6 +155,11 @@ def test_malformed_csv_cr_middle_line assert_equal(expected, result) end + def test_empty_rows_with_cr + result = CSV.parse("\n" + "\r") + assert_equal([[], ["\r"]], result) + end + def test_malformed_csv_unclosed_quote error = assert_raise(CSV::MalformedCSVError) do CSV.parse_line('1,2,"3...') diff --git a/test/csv/parse/test_invalid.rb b/test/csv/parse/test_invalid.rb index 8e5d1b08..11e7a977 100644 --- a/test/csv/parse/test_invalid.rb +++ b/test/csv/parse/test_invalid.rb @@ -1,16 +1,9 @@ -# -*- coding: utf-8 -*- # frozen_string_literal: false require_relative "../helper" class TestCSVParseInvalid < Test::Unit::TestCase - def test_no_column_mixed_new_lines - # With the fix for accepting \r without quote when row separator doesn't include \r, - # this should now parse successfully (default row_sep is "\n") - result = CSV.parse("\n" + "\r") - # This should parse as an empty first row and a second row with just "\r" - assert_equal([[], ["\r"]], result) - end + def test_ignore_invalid_line csv = CSV.new(<<-CSV, headers: true, return_headers: true) diff --git a/test/csv/parse/test_unquoted_cr.rb b/test/csv/parse/test_unquoted_cr.rb index 4381de0f..fb353eb5 100644 --- a/test/csv/parse/test_unquoted_cr.rb +++ b/test/csv/parse/test_unquoted_cr.rb @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # frozen_string_literal: false require_relative "../helper" @@ -6,8 +5,7 @@ class TestCSVParseUnquotedCR < Test::Unit::TestCase extend DifferentOFS - def test_accept_cr_in_unquoted_field_when_row_separator_is_lf_only - # When row separator is just \n, \r should be allowed in unquoted fields + def test_unquoted_cr_with_lf_row_separator data = "field1,field\rwith\rcr,field3\nrow2,data,here\n" expected = [ ["field1", "field\rwith\rcr", "field3"], @@ -16,8 +14,7 @@ def test_accept_cr_in_unquoted_field_when_row_separator_is_lf_only assert_equal(expected, CSV.parse(data, row_sep: "\n")) end - def test_accept_cr_in_unquoted_field_when_row_separator_is_custom - # When row separator is custom (like "|"), \r should be allowed in unquoted fields + def test_unquoted_cr_with_custom_row_separator data = "field1,field\rwith\rcr,field3|row2,data,here|" expected = [ ["field1", "field\rwith\rcr", "field3"], @@ -26,8 +23,7 @@ def test_accept_cr_in_unquoted_field_when_row_separator_is_custom assert_equal(expected, CSV.parse(data, row_sep: "|")) end - def test_reject_cr_when_row_separator_includes_cr - # When row separator includes \r (like \r\n), \r should still be rejected in unquoted fields + def test_unquoted_cr_with_crlf_row_separator data = "field1,field2,field3\r\nrow2,data,here\r\n" expected = [ ["field1", "field2", "field3"], @@ -36,8 +32,14 @@ def test_reject_cr_when_row_separator_includes_cr assert_equal(expected, CSV.parse(data, row_sep: "\r\n")) end - def test_reject_cr_when_row_separator_is_cr_only - # When row separator is just \r, \r should be rejected in unquoted fields + def test_unquoted_cr_rejected_when_included_in_row_separator + data = "field1,field\r2,field3\r\nrow2,data,here\r\n" + assert_raise(CSV::MalformedCSVError) do + CSV.parse(data, row_sep: "\r\n") + end + end + + def test_unquoted_cr_with_cr_row_separator data = "field1,field2,field3\rrow2,data,here\r" expected = [ ["field1", "field2", "field3"], @@ -47,7 +49,6 @@ def test_reject_cr_when_row_separator_is_cr_only end def test_liberal_parsing_with_custom_row_separator - # Test liberal parsing mode with custom row separator data = "field1,field\rwith\rcr,field3|row2,data,here|" expected = [ ["field1", "field\rwith\rcr", "field3"], @@ -56,8 +57,7 @@ def test_liberal_parsing_with_custom_row_separator assert_equal(expected, CSV.parse(data, row_sep: "|", liberal_parsing: true)) end - def test_quoted_fields_with_cr_and_custom_row_separator - # Quoted fields should always allow \r regardless of row separator + def test_quoted_cr_with_custom_row_separator data = "field1,\"field\rwith\rcr\",field3|row2,data,here|" expected = [ ["field1", "field\rwith\rcr", "field3"], From 750531a2ae9142d0e2b18670a29fbe098ee09026 Mon Sep 17 00:00:00 2001 From: jsxs Date: Thu, 5 Jun 2025 17:26:14 +0900 Subject: [PATCH 06/12] refactor(parser): simplify row separator escaping per code review --- lib/csv/parser.rb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/csv/parser.rb b/lib/csv/parser.rb index a7dd9632..7dbdcb59 100644 --- a/lib/csv/parser.rb +++ b/lib/csv/parser.rb @@ -677,8 +677,7 @@ def prepare_unquoted # Only exclude characters that are actually part of the row separator # instead of hardcoding "\r\n" - row_separator_chars = @row_separator.chars.map { |c| Regexp.escape(c) }.join - no_unquoted_values = row_separator_chars.encode(@encoding) + no_unquoted_values = Regexp.escape(@row_separator).encode(@encoding) no_unquoted_values << @escaped_first_column_separator unless @liberal_parsing no_unquoted_values << @escaped_quote_character From f32387300eb8cf2fb3a4863796bf2fb71ed6bcb2 Mon Sep 17 00:00:00 2001 From: jsxs Date: Thu, 5 Jun 2025 17:31:27 +0900 Subject: [PATCH 07/12] Updated test_unquoted_cr_with_crlf_row_separator --- test/csv/parse/test_unquoted_cr.rb | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/test/csv/parse/test_unquoted_cr.rb b/test/csv/parse/test_unquoted_cr.rb index fb353eb5..dde114e3 100644 --- a/test/csv/parse/test_unquoted_cr.rb +++ b/test/csv/parse/test_unquoted_cr.rb @@ -24,12 +24,10 @@ def test_unquoted_cr_with_custom_row_separator end def test_unquoted_cr_with_crlf_row_separator - data = "field1,field2,field3\r\nrow2,data,here\r\n" - expected = [ - ["field1", "field2", "field3"], - ["row2", "data", "here"] - ] - assert_equal(expected, CSV.parse(data, row_sep: "\r\n")) + data = "field1\r,field2,field3\r\nrow2,data,here\r\n" + assert_raise(CSV::MalformedCSVError) do + CSV.parse(data, row_sep: "\r\n") + end end def test_unquoted_cr_rejected_when_included_in_row_separator From cb1084d8be8149013f6001079b10d6789c1b621a Mon Sep 17 00:00:00 2001 From: jsxs Date: Thu, 5 Jun 2025 17:36:18 +0900 Subject: [PATCH 08/12] test_unquoted_cr_with_cr_row_separator test: logically problematic --- test/csv/parse/test_unquoted_cr.rb | 9 --------- 1 file changed, 9 deletions(-) diff --git a/test/csv/parse/test_unquoted_cr.rb b/test/csv/parse/test_unquoted_cr.rb index dde114e3..295f75c1 100644 --- a/test/csv/parse/test_unquoted_cr.rb +++ b/test/csv/parse/test_unquoted_cr.rb @@ -37,15 +37,6 @@ def test_unquoted_cr_rejected_when_included_in_row_separator end end - def test_unquoted_cr_with_cr_row_separator - data = "field1,field2,field3\rrow2,data,here\r" - expected = [ - ["field1", "field2", "field3"], - ["row2", "data", "here"] - ] - assert_equal(expected, CSV.parse(data, row_sep: "\r")) - end - def test_liberal_parsing_with_custom_row_separator data = "field1,field\rwith\rcr,field3|row2,data,here|" expected = [ From f2a2f8f0e7903bee0c3ac15251b805f622168756 Mon Sep 17 00:00:00 2001 From: Jas Date: Thu, 5 Jun 2025 17:56:00 +0900 Subject: [PATCH 09/12] Update test/csv/parse/test_invalid.rb Co-authored-by: Sutou Kouhei --- test/csv/parse/test_invalid.rb | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/csv/parse/test_invalid.rb b/test/csv/parse/test_invalid.rb index 11e7a977..ae7c72cc 100644 --- a/test/csv/parse/test_invalid.rb +++ b/test/csv/parse/test_invalid.rb @@ -3,8 +3,6 @@ require_relative "../helper" class TestCSVParseInvalid < Test::Unit::TestCase - - def test_ignore_invalid_line csv = CSV.new(<<-CSV, headers: true, return_headers: true) head1,head2,head3 From 313f849af03645970d646e5cf291501e6430a12c Mon Sep 17 00:00:00 2001 From: jsxs Date: Thu, 5 Jun 2025 18:05:11 +0900 Subject: [PATCH 10/12] Following better organization principles --- test/csv/parse/test_general.rb | 49 +++++++++++++++++++++++-- test/csv/parse/test_unquoted_cr.rb | 57 ------------------------------ 2 files changed, 47 insertions(+), 59 deletions(-) delete mode 100644 test/csv/parse/test_unquoted_cr.rb diff --git a/test/csv/parse/test_general.rb b/test/csv/parse/test_general.rb index e6f43e6c..5524a1c5 100644 --- a/test/csv/parse/test_general.rb +++ b/test/csv/parse/test_general.rb @@ -139,8 +139,53 @@ def test_non_regex_edge_cases end def test_unquoted_cr_with_lf_row_separator - result = CSV.parse_line("1,2\r,3", row_sep: "\n") - assert_equal(["1", "2\r", "3"], result) + data = "field1,field\rwith\rcr,field3\nrow2,data,here\n" + expected = [ + ["field1", "field\rwith\rcr", "field3"], + ["row2", "data", "here"] + ] + assert_equal(expected, CSV.parse(data, row_sep: "\n")) + end + + def test_unquoted_cr_with_custom_row_separator + data = "field1,field\rwith\rcr,field3|row2,data,here|" + expected = [ + ["field1", "field\rwith\rcr", "field3"], + ["row2", "data", "here"] + ] + assert_equal(expected, CSV.parse(data, row_sep: "|")) + end + + def test_unquoted_cr_with_crlf_row_separator + data = "field1\r,field2,field3\r\nrow2,data,here\r\n" + assert_raise(CSV::MalformedCSVError) do + CSV.parse(data, row_sep: "\r\n") + end + end + + def test_unquoted_cr_rejected_when_included_in_row_separator + data = "field1,field\r2,field3\r\nrow2,data,here\r\n" + assert_raise(CSV::MalformedCSVError) do + CSV.parse(data, row_sep: "\r\n") + end + end + + def test_liberal_parsing_with_unquoted_cr_and_custom_row_separator + data = "field1,field\rwith\rcr,field3|row2,data,here|" + expected = [ + ["field1", "field\rwith\rcr", "field3"], + ["row2", "data", "here"] + ] + assert_equal(expected, CSV.parse(data, row_sep: "|", liberal_parsing: true)) + end + + def test_quoted_cr_with_custom_row_separator + data = "field1,\"field\rwith\rcr\",field3|row2,data,here|" + expected = [ + ["field1", "field\rwith\rcr", "field3"], + ["row2", "data", "here"] + ] + assert_equal(expected, CSV.parse(data, row_sep: "|")) end def test_unquoted_cr_in_middle_line diff --git a/test/csv/parse/test_unquoted_cr.rb b/test/csv/parse/test_unquoted_cr.rb deleted file mode 100644 index 295f75c1..00000000 --- a/test/csv/parse/test_unquoted_cr.rb +++ /dev/null @@ -1,57 +0,0 @@ -# frozen_string_literal: false - -require_relative "../helper" - -class TestCSVParseUnquotedCR < Test::Unit::TestCase - extend DifferentOFS - - def test_unquoted_cr_with_lf_row_separator - data = "field1,field\rwith\rcr,field3\nrow2,data,here\n" - expected = [ - ["field1", "field\rwith\rcr", "field3"], - ["row2", "data", "here"] - ] - assert_equal(expected, CSV.parse(data, row_sep: "\n")) - end - - def test_unquoted_cr_with_custom_row_separator - data = "field1,field\rwith\rcr,field3|row2,data,here|" - expected = [ - ["field1", "field\rwith\rcr", "field3"], - ["row2", "data", "here"] - ] - assert_equal(expected, CSV.parse(data, row_sep: "|")) - end - - def test_unquoted_cr_with_crlf_row_separator - data = "field1\r,field2,field3\r\nrow2,data,here\r\n" - assert_raise(CSV::MalformedCSVError) do - CSV.parse(data, row_sep: "\r\n") - end - end - - def test_unquoted_cr_rejected_when_included_in_row_separator - data = "field1,field\r2,field3\r\nrow2,data,here\r\n" - assert_raise(CSV::MalformedCSVError) do - CSV.parse(data, row_sep: "\r\n") - end - end - - def test_liberal_parsing_with_custom_row_separator - data = "field1,field\rwith\rcr,field3|row2,data,here|" - expected = [ - ["field1", "field\rwith\rcr", "field3"], - ["row2", "data", "here"] - ] - assert_equal(expected, CSV.parse(data, row_sep: "|", liberal_parsing: true)) - end - - def test_quoted_cr_with_custom_row_separator - data = "field1,\"field\rwith\rcr\",field3|row2,data,here|" - expected = [ - ["field1", "field\rwith\rcr", "field3"], - ["row2", "data", "here"] - ] - assert_equal(expected, CSV.parse(data, row_sep: "|")) - end -end From b455a09aa5f5438d3e2ad85b427c93cae8f78424 Mon Sep 17 00:00:00 2001 From: jsxs Date: Thu, 5 Jun 2025 19:06:59 +0900 Subject: [PATCH 11/12] test: consolidate unquoted CR tests, remove duplication --- test/csv/parse/test_general.rb | 16 ---------------- test/csv/parse/test_liberal_parsing.rb | 9 +++++++++ 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/test/csv/parse/test_general.rb b/test/csv/parse/test_general.rb index 5524a1c5..03d97df0 100644 --- a/test/csv/parse/test_general.rb +++ b/test/csv/parse/test_general.rb @@ -156,13 +156,6 @@ def test_unquoted_cr_with_custom_row_separator assert_equal(expected, CSV.parse(data, row_sep: "|")) end - def test_unquoted_cr_with_crlf_row_separator - data = "field1\r,field2,field3\r\nrow2,data,here\r\n" - assert_raise(CSV::MalformedCSVError) do - CSV.parse(data, row_sep: "\r\n") - end - end - def test_unquoted_cr_rejected_when_included_in_row_separator data = "field1,field\r2,field3\r\nrow2,data,here\r\n" assert_raise(CSV::MalformedCSVError) do @@ -170,15 +163,6 @@ def test_unquoted_cr_rejected_when_included_in_row_separator end end - def test_liberal_parsing_with_unquoted_cr_and_custom_row_separator - data = "field1,field\rwith\rcr,field3|row2,data,here|" - expected = [ - ["field1", "field\rwith\rcr", "field3"], - ["row2", "data", "here"] - ] - assert_equal(expected, CSV.parse(data, row_sep: "|", liberal_parsing: true)) - end - def test_quoted_cr_with_custom_row_separator data = "field1,\"field\rwith\rcr\",field3|row2,data,here|" expected = [ diff --git a/test/csv/parse/test_liberal_parsing.rb b/test/csv/parse/test_liberal_parsing.rb index 5796d108..4300ab40 100644 --- a/test/csv/parse/test_liberal_parsing.rb +++ b/test/csv/parse/test_liberal_parsing.rb @@ -80,6 +80,15 @@ def test_space_quote CSV.parse(input, liberal_parsing: true)) end + def test_unquoted_cr_with_custom_row_separator + data = "field1,field\rwith\rcr,field3|row2,data,here|" + expected = [ + ["field1", "field\rwith\rcr", "field3"], + ["row2", "data", "here"] + ] + assert_equal(expected, CSV.parse(data, row_sep: "|", liberal_parsing: true)) + end + def test_double_quote_outside_quote data = %Q{a,""b""} error = assert_raise(CSV::MalformedCSVError) do From 9be946f2798d1936765fc22dc2fc06202d4c6f38 Mon Sep 17 00:00:00 2001 From: jsxs Date: Fri, 6 Jun 2025 13:28:30 +0900 Subject: [PATCH 12/12] Apply maintainer feedback: improve tests and clean up code --- lib/csv/parser.rb | 2 -- test/csv/parse/test_general.rb | 6 ++++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/csv/parser.rb b/lib/csv/parser.rb index 7dbdcb59..c508c8b2 100644 --- a/lib/csv/parser.rb +++ b/lib/csv/parser.rb @@ -675,8 +675,6 @@ def prepare_quoted def prepare_unquoted return if @quote_character.nil? - # Only exclude characters that are actually part of the row separator - # instead of hardcoding "\r\n" no_unquoted_values = Regexp.escape(@row_separator).encode(@encoding) no_unquoted_values << @escaped_first_column_separator unless @liberal_parsing diff --git a/test/csv/parse/test_general.rb b/test/csv/parse/test_general.rb index 03d97df0..520eb8f0 100644 --- a/test/csv/parse/test_general.rb +++ b/test/csv/parse/test_general.rb @@ -156,11 +156,13 @@ def test_unquoted_cr_with_custom_row_separator assert_equal(expected, CSV.parse(data, row_sep: "|")) end - def test_unquoted_cr_rejected_when_included_in_row_separator + def test_unquoted_cr_with_crlf_row_separator data = "field1,field\r2,field3\r\nrow2,data,here\r\n" - assert_raise(CSV::MalformedCSVError) do + error = assert_raise(CSV::MalformedCSVError) do CSV.parse(data, row_sep: "\r\n") end + assert_equal("Unquoted fields do not allow new line <\"\\r\"> in line 1.", + error.message) end def test_quoted_cr_with_custom_row_separator