From d78b163ddb69ec1b5cfb8805a3484769598fbe8e Mon Sep 17 00:00:00 2001 From: Abe Tomoaki Date: Wed, 22 Jan 2025 17:36:57 +0900 Subject: [PATCH 1/5] msginit: Replace charset in Content-type with charset in locale `getext` is generated with `charset=CHARSET`. Translation as is will result in an error because `CHARSET` is an invalid value. Changed so that `charset=CHARSET` is replaced by the locale's charset. --- lib/gettext/po_parser.rb | 6 ++-- lib/gettext/tools/msginit.rb | 15 ++++++++++ test/po/ja/_.po | 5 ++-- test/tools/test_msginit.rb | 54 +++++++++++++++++++++++++++++++++--- 4 files changed, 70 insertions(+), 10 deletions(-) diff --git a/lib/gettext/po_parser.rb b/lib/gettext/po_parser.rb index c294cfb9..23020694 100644 --- a/lib/gettext/po_parser.rb +++ b/lib/gettext/po_parser.rb @@ -8,7 +8,7 @@ # # DO NOT MODIFY!!!! -# This file is automatically generated by Racc 1.7.1 +# This file is automatically generated by Racc 1.5.1 # from Racc grammar file "". # @@ -164,7 +164,7 @@ def on_comment(comment) if comment.start_with?(POFormat::FLAG_MARK) content = comment[POFormat::FLAG_MARK.size..-1] flags = parse_flags_line(content) - @fuzzy = true if flags.include?("fuzzy") + @fuzzy = flags.include?("fuzzy") end if @data.instance_of?(PO) if comment == "#" @@ -350,7 +350,6 @@ def clear racc_shift_n, racc_reduce_n, racc_use_result_var ] -Ractor.make_shareable(Racc_arg) if defined?(Ractor) Racc_token_to_s_table = [ "$end", @@ -372,7 +371,6 @@ def clear "plural_message", "msgstr_plural", "msgstr_plural_line" ] -Ractor.make_shareable(Racc_token_to_s_table) if defined?(Ractor) Racc_debug_parser = true diff --git a/lib/gettext/tools/msginit.rb b/lib/gettext/tools/msginit.rb index 710d013b..39fb4c1f 100644 --- a/lib/gettext/tools/msginit.rb +++ b/lib/gettext/tools/msginit.rb @@ -62,6 +62,7 @@ def initialize @output_file = nil @locale = nil @language = nil + @charset = nil @entry = nil @comment = nil @translator = nil @@ -180,6 +181,11 @@ def validate end @locale = language_tag.to_simple.to_s @language = language_tag.language + if language_tag.respond_to?(:charset) + @charset = language_tag.charset + else + @charset = Locale.charset + end @output_file ||= "#{@locale}.po" if File.exist?(@output_file) @@ -280,6 +286,7 @@ def replace_entry replace_pot_revision_date replace_language replace_plural_forms + replace_charset end def replace_comment @@ -325,6 +332,14 @@ def replace_plural_forms end end + CONTENT_TYPE_CHARSET = /(Content-Type: .+ charset=)CHARSET/ + + def replace_charset + if CONTENT_TYPE_CHARSET =~ @entry + @entry = @entry.gsub(CONTENT_TYPE_CHARSET, "\\1#{@charset}") + end + end + def plural_forms(language) converter = CLDRPluralsConverter.new(language) converter.convert diff --git a/test/po/ja/_.po b/test/po/ja/_.po index 0069587d..f858de06 100644 --- a/test/po/ja/_.po +++ b/test/po/ja/_.po @@ -69,8 +69,9 @@ msgstr "" msgid "#" msgstr "" -msgid "\taaa'bbb\\ccc" -msgstr "" +#, fuzzy +msgid "\\taaa'bbb\\ccc" +msgstr "AAA" msgid "" "Here document1\n" diff --git a/test/tools/test_msginit.rb b/test/tools/test_msginit.rb index 27e6e8ac..0a28c5cd 100644 --- a/test/tools/test_msginit.rb +++ b/test/tools/test_msginit.rb @@ -77,6 +77,7 @@ def pot_header(options) options = default_po_header_options.merge(options) package_name = options[:package_name] || default_package_name have_plural_forms = options[:have_plural_forms] || true + charset = options[:charset] || "UTF-8" header = <\\n" "MIME-Version: 1.0\\n" -"Content-Type: text/plain; charset=UTF-8\\n" +"Content-Type: text/plain; charset=#{charset}\\n" "Content-Transfer-Encoding: 8bit\\n" EOF if have_plural_forms @@ -109,6 +110,7 @@ def po_header(locale, language, options={}) name = options[:translator_name] || "FULL NAME" email = options[:translator_email] || "EMAIL@ADDRESS" language_name = Locale::Info.get_language(language).name + charset = options[:charset] || "UTF-8" plural_forms = @msginit.send(:plural_forms, language) < charset) po_file_path = "output.po" @msginit.run("--output", po_file_path, "--locale", locale) @@ -224,6 +226,50 @@ def test_language_region_charset assert_equal(po_header(locale, language), run_msginit("#{locale}.#{charset}")) end + + def test_language_charset_with_replace_content_type + locale = "en" + assert_equal(po_header(locale, locale), + run_msginit(locale, "CHARSET")) + end + + def test_language_region_with_replace_content_type + locale = "en_US" + language = "en" + assert_equal(po_header(locale, language), + run_msginit(locale, "CHARSET")) + end + + def test_language_region_charset_with_replace_content_type + locale = "en_US" + language = "en" + charset = "UTF-8" + assert_equal(po_header(locale, language), + run_msginit("#{locale}.#{charset}", "CHARSET")) + end + end + + class TestCurrentCharset < self + def run_msginit(charset) + create_pot_file("test.pot", :charset => charset) + po_file_path = "output.po" + @msginit.run("--output", po_file_path) + File.read(po_file_path) + end + + def po_header(options) + super(current_locale, current_language, options) + end + + def test_change + assert_equal(po_header(:charset => "UTF-8"), + run_msginit("CHARSET")) + end + + def test_not_change + assert_equal(po_header(:charset => "ASCII"), + run_msginit("ASCII")) + end end class TestTranslator < self From a16e9bc1c6a516614243bc14cdbefc4b93285607 Mon Sep 17 00:00:00 2001 From: Abe Tomoaki Date: Thu, 23 Jan 2025 08:31:41 +0900 Subject: [PATCH 2/5] Revert --- lib/gettext/po_parser.rb | 6 ++++-- test/po/ja/_.po | 5 ++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/lib/gettext/po_parser.rb b/lib/gettext/po_parser.rb index 23020694..c294cfb9 100644 --- a/lib/gettext/po_parser.rb +++ b/lib/gettext/po_parser.rb @@ -8,7 +8,7 @@ # # DO NOT MODIFY!!!! -# This file is automatically generated by Racc 1.5.1 +# This file is automatically generated by Racc 1.7.1 # from Racc grammar file "". # @@ -164,7 +164,7 @@ def on_comment(comment) if comment.start_with?(POFormat::FLAG_MARK) content = comment[POFormat::FLAG_MARK.size..-1] flags = parse_flags_line(content) - @fuzzy = flags.include?("fuzzy") + @fuzzy = true if flags.include?("fuzzy") end if @data.instance_of?(PO) if comment == "#" @@ -350,6 +350,7 @@ def clear racc_shift_n, racc_reduce_n, racc_use_result_var ] +Ractor.make_shareable(Racc_arg) if defined?(Ractor) Racc_token_to_s_table = [ "$end", @@ -371,6 +372,7 @@ def clear "plural_message", "msgstr_plural", "msgstr_plural_line" ] +Ractor.make_shareable(Racc_token_to_s_table) if defined?(Ractor) Racc_debug_parser = true diff --git a/test/po/ja/_.po b/test/po/ja/_.po index f858de06..0069587d 100644 --- a/test/po/ja/_.po +++ b/test/po/ja/_.po @@ -69,9 +69,8 @@ msgstr "" msgid "#" msgstr "" -#, fuzzy -msgid "\\taaa'bbb\\ccc" -msgstr "AAA" +msgid "\taaa'bbb\\ccc" +msgstr "" msgid "" "Here document1\n" From 9f70ff108fe9ef7fca0d00eb4f865288a34495e5 Mon Sep 17 00:00:00 2001 From: Abe Tomoaki Date: Thu, 23 Jan 2025 08:33:59 +0900 Subject: [PATCH 3/5] Improve regular expression --- lib/gettext/tools/msginit.rb | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lib/gettext/tools/msginit.rb b/lib/gettext/tools/msginit.rb index 39fb4c1f..3e8a06e3 100644 --- a/lib/gettext/tools/msginit.rb +++ b/lib/gettext/tools/msginit.rb @@ -332,12 +332,10 @@ def replace_plural_forms end end - CONTENT_TYPE_CHARSET = /(Content-Type: .+ charset=)CHARSET/ + CONTENT_TYPE_CHARSET = /^(Content-Type:.+ charset=)CHARSET/ def replace_charset - if CONTENT_TYPE_CHARSET =~ @entry - @entry = @entry.gsub(CONTENT_TYPE_CHARSET, "\\1#{@charset}") - end + @entry = @entry.gsub(CONTENT_TYPE_CHARSET, "\\1#{@charset}") end def plural_forms(language) From 29ad41d36e6f8d9a8db4878b3dd9be71f022a8b4 Mon Sep 17 00:00:00 2001 From: Abe Tomoaki Date: Thu, 23 Jan 2025 08:40:39 +0900 Subject: [PATCH 4/5] test: Improve argument passing --- test/tools/test_msginit.rb | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/test/tools/test_msginit.rb b/test/tools/test_msginit.rb index 0a28c5cd..2cd42c36 100644 --- a/test/tools/test_msginit.rb +++ b/test/tools/test_msginit.rb @@ -198,8 +198,8 @@ def test_specify_option end class TestLocale < self - def run_msginit(locale, charset = "UTF-8") - create_pot_file("test.pot", :charset => charset) + def run_msginit(locale, pot_charset=nil) + create_pot_file("test.pot", charset: pot_charset) po_file_path = "output.po" @msginit.run("--output", po_file_path, "--locale", locale) @@ -250,8 +250,8 @@ def test_language_region_charset_with_replace_content_type end class TestCurrentCharset < self - def run_msginit(charset) - create_pot_file("test.pot", :charset => charset) + def run_msginit(pot_charset) + create_pot_file("test.pot", charset: pot_charset) po_file_path = "output.po" @msginit.run("--output", po_file_path) File.read(po_file_path) @@ -262,13 +262,13 @@ def po_header(options) end def test_change - assert_equal(po_header(:charset => "UTF-8"), - run_msginit("CHARSET")) + assert_equal(po_header(charset: "UTF-8"), + run_msginit("CHARSET")) end def test_not_change - assert_equal(po_header(:charset => "ASCII"), - run_msginit("ASCII")) + assert_equal(po_header(charset: "ASCII"), + run_msginit("ASCII")) end end From fb300b5942953747d2480e2272525d1dd0da9cc7 Mon Sep 17 00:00:00 2001 From: Abe Tomoaki Date: Fri, 24 Jan 2025 08:57:28 +0900 Subject: [PATCH 5/5] Improve test name --- test/tools/test_msginit.rb | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/test/tools/test_msginit.rb b/test/tools/test_msginit.rb index 2cd42c36..69aa5f3c 100644 --- a/test/tools/test_msginit.rb +++ b/test/tools/test_msginit.rb @@ -49,6 +49,10 @@ def current_language Locale.current.language end + def current_charset + Locale.current.charset + end + def translator_name "me" end @@ -57,6 +61,10 @@ def translator_email "me@example.com" end + def template_charset + "CHARSET" + end + def create_pot_file(path, options=nil) options ||= {} File.open(path, "w") do |pot_file| @@ -227,25 +235,25 @@ def test_language_region_charset run_msginit("#{locale}.#{charset}")) end - def test_language_charset_with_replace_content_type + def test_language_charset_with_template_charset locale = "en" assert_equal(po_header(locale, locale), - run_msginit(locale, "CHARSET")) + run_msginit(locale, template_charset)) end - def test_language_region_with_replace_content_type + def test_language_region_with_template_charset locale = "en_US" language = "en" assert_equal(po_header(locale, language), - run_msginit(locale, "CHARSET")) + run_msginit(locale, template_charset)) end - def test_language_region_charset_with_replace_content_type + def test_language_region_charset_with_template_charset locale = "en_US" language = "en" charset = "UTF-8" assert_equal(po_header(locale, language), - run_msginit("#{locale}.#{charset}", "CHARSET")) + run_msginit("#{locale}.#{charset}", template_charset)) end end @@ -261,12 +269,12 @@ def po_header(options) super(current_locale, current_language, options) end - def test_change - assert_equal(po_header(charset: "UTF-8"), - run_msginit("CHARSET")) + def test_template_charset + assert_equal(po_header(charset: current_charset), + run_msginit(template_charset)) end - def test_not_change + def test_no_template_charset assert_equal(po_header(charset: "ASCII"), run_msginit("ASCII")) end