From bae42a825fc2009975ddc3c79e2da2e49807e645 Mon Sep 17 00:00:00 2001 From: Herwin Date: Wed, 23 Apr 2025 10:45:32 +0200 Subject: [PATCH 1/3] Add Nokogiri XML parser This one is similar to the LibXML parser, but uses the Nokogiri SAX parser under the hood. This library is already used in a lot of web projects and is easier to install in Windows than LibXML. There is no direct dependency on the nokogiri gem, if the gem is not available the code will only error in case it is used. --- Gemfile | 1 + lib/xmlrpc.rb | 3 +++ lib/xmlrpc/config.rb | 1 + lib/xmlrpc/parser.rb | 46 +++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 50 insertions(+), 1 deletion(-) diff --git a/Gemfile b/Gemfile index e882397..a4f7a3f 100644 --- a/Gemfile +++ b/Gemfile @@ -5,6 +5,7 @@ gemspec group :development do gem "bundler" gem "libxml-ruby", platforms: [:ruby, :jruby] + gem "nokogiri" gem "rake" gem "test-unit" end diff --git a/lib/xmlrpc.rb b/lib/xmlrpc.rb index 367b79a..ff28e46 100644 --- a/lib/xmlrpc.rb +++ b/lib/xmlrpc.rb @@ -59,6 +59,9 @@ # * libxml (LibXMLStreamParser) # * Compiled # * See https://rubygems.org/gems/libxml-ruby/ +# * nokogiri (NokogiriStreamParser) +# * Compiled +# * See https://nokogiri.org # # * General # * possible to choose between XMLParser module (Expat wrapper) and REXML (pure Ruby) parsers diff --git a/lib/xmlrpc/config.rb b/lib/xmlrpc/config.rb index bd13932..559f16a 100644 --- a/lib/xmlrpc/config.rb +++ b/lib/xmlrpc/config.rb @@ -15,6 +15,7 @@ module Config # # * XMLParser::REXMLStreamParser # * XMLParser::LibXMLStreamParser + # * XMLParser::NokogiriStreamParser DEFAULT_PARSER = XMLParser::REXMLStreamParser # enable tag diff --git a/lib/xmlrpc/parser.rb b/lib/xmlrpc/parser.rb index 582046a..c6d398b 100644 --- a/lib/xmlrpc/parser.rb +++ b/lib/xmlrpc/parser.rb @@ -624,7 +624,51 @@ def parse(str) end end - Classes = [REXMLStreamParser, LibXMLStreamParser] + class NokogiriStreamParser < AbstractStreamParser + def initialize + require 'nokogiri' + @parser_class = NokogiriStreamListener + end + + class NokogiriStreamListener + include StreamParserMixin + + def self.handler + # We need to construct this on first use, since we cannot be sure Nokogiri is available + @handler ||= begin + Class.new(Nokogiri::XML::SAX::Document) do + def initialize(parent) + super() + @parent = parent + end + + def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = []) + @parent.startElement(name, attrs) + end + + def end_element_namespace(name, prefix = nil, uri = nil) + @parent.endElement(name) + end + + def characters(string) + @parent.character(string) + end + + def cdata_block(string) + @parent.character(string) + end + end + end + end + + def parse(str) + parser = Nokogiri::XML::SAX::Parser.new(self.class.handler.new(self)) + parser.parse(str) + end + end + end + + Classes = [REXMLStreamParser, LibXMLStreamParser, NokogiriStreamParser] # yields an instance of each installed parser def self.each_installed_parser From 1979db928580695b983d4a6c248f056733c48289 Mon Sep 17 00:00:00 2001 From: Herwin Date: Wed, 23 Apr 2025 10:51:51 +0200 Subject: [PATCH 2/3] Update description of parsers in main file The old text was outdated, Expat parser has not been available for years. --- lib/xmlrpc.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/xmlrpc.rb b/lib/xmlrpc.rb index ff28e46..a961ebf 100644 --- a/lib/xmlrpc.rb +++ b/lib/xmlrpc.rb @@ -64,7 +64,7 @@ # * See https://nokogiri.org # # * General -# * possible to choose between XMLParser module (Expat wrapper) and REXML (pure Ruby) parsers +# * possible to choose between REXML (pure Ruby) and LibXML/Nokogiri (compiled) parsers # * Marshalling Ruby objects to Hashes and reconstruct them later from a Hash # * SandStorm component architecture XMLRPC::Client interface # From 6f41970b8c473709431e9f45fc8ea2f91d3e201e Mon Sep 17 00:00:00 2001 From: Herwin Date: Wed, 23 Apr 2025 11:20:12 +0200 Subject: [PATCH 3/3] Limit nokogiri in tests to ruby and jruby It does pass the tests on the regular windows images, but mingw and mswin fail in the compilation step of the nokogiri gem. This can probably be fixed by installing the right development libraries, but I could not find out how. --- Gemfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Gemfile b/Gemfile index a4f7a3f..96b856d 100644 --- a/Gemfile +++ b/Gemfile @@ -5,7 +5,7 @@ gemspec group :development do gem "bundler" gem "libxml-ruby", platforms: [:ruby, :jruby] - gem "nokogiri" + gem "nokogiri", platforms: [:ruby, :jruby] gem "rake" gem "test-unit" end