From c551d7020bba3cf452e696d55c451ae951d0f24b Mon Sep 17 00:00:00 2001 From: Daisuke Aritomo Date: Tue, 4 Nov 2025 14:34:31 +0900 Subject: [PATCH] Re-allow consecutive, leading and trailing dots in EMAIL_REGEXP Effectively reverts commit 788274b180d6cd41af71ee6f2bb7117d8b43c936 and 0abac721d8fe0e5ad7e10f3945a9b8c6c7144176. EMAIL_REGEXP was mostly drawn from WHATWG HTML LS. This spec states that it intentionally violates RFC 5322 to provide a practical regex for validation. > This requirement is a willful violation of RFC 5322, which defines a > syntax for email addresses that is simultaneously too strict (before the > "@" character), too vague (after the "@" character), and too lax > (allowing comments, whitespace characters, and quoted strings in manners > unfamiliar to most users) to be of practical use here. The allowing of consecutive dot s(`a..a@`) and leading/trailing dots (`.a@`, `a.@`) is not the only derivation from RFC 5322. If a truly RFC 5322-compliant regexp is needed, tt should be organized under a different name, since too much departure from the original EMAIL_REGEXP must be introduced. --- lib/uri/mailto.rb | 6 +----- test/uri/test_mailto.rb | 26 ++++++++++---------------- 2 files changed, 11 insertions(+), 21 deletions(-) diff --git a/lib/uri/mailto.rb b/lib/uri/mailto.rb index f747b79..cb8024f 100644 --- a/lib/uri/mailto.rb +++ b/lib/uri/mailto.rb @@ -52,11 +52,7 @@ class MailTo < Generic HEADER_REGEXP = /\A(?(?:%\h\h|[!$'-.0-;@-Z_a-z~])*=(?:%\h\h|[!$'-.0-;@-Z_a-z~])*)(?:&\g)*\z/ # practical regexp for email address # https://html.spec.whatwg.org/multipage/input.html#valid-e-mail-address - EMAIL_REGEXP = %r[\A#{ - atext = %q[(?:[a-zA-Z0-9!\#$%&'*+\/=?^_`{|}~-]+)] - }(?:\.#{atext})*@#{ - label = %q[(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)] - }(?:\.#{label})*\z] + EMAIL_REGEXP = /\A[a-zA-Z0-9.!\#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*\z/ # :startdoc: # diff --git a/test/uri/test_mailto.rb b/test/uri/test_mailto.rb index 59bb5de..6cd3352 100644 --- a/test/uri/test_mailto.rb +++ b/test/uri/test_mailto.rb @@ -145,29 +145,23 @@ def test_check_to u.to = 'a@valid.com' assert_equal(u.to, 'a@valid.com') - # Invalid emails - assert_raise(URI::InvalidComponentError) do - u.to = '#1@mail.com' - end + # Intentionally allowed violations of RFC 5322 + u.to = 'a..a@valid.com' + assert_equal(u.to, 'a..a@valid.com') - assert_raise(URI::InvalidComponentError) do - u.to = '@invalid.email' - end + u.to = 'hello.@valid.com' + assert_equal(u.to, 'hello.@valid.com') - assert_raise(URI::InvalidComponentError) do - u.to = '.hello@invalid.email' - end - - assert_raise(URI::InvalidComponentError) do - u.to = 'hello.@invalid.email' - end + u.to = '.hello@valid.com' + assert_equal(u.to, '.hello@valid.com') + # Invalid emails assert_raise(URI::InvalidComponentError) do - u.to = 'n.@invalid.email' + u.to = '#1@mail.com' end assert_raise(URI::InvalidComponentError) do - u.to = 'n..t@invalid.email' + u.to = '@invalid.email' end # Invalid host emails