diff --git a/lib/rexml/attribute.rb b/lib/rexml/attribute.rb index c5673249..b31beb74 100644 --- a/lib/rexml/attribute.rb +++ b/lib/rexml/attribute.rb @@ -180,7 +180,7 @@ def element=( element ) # # This method is usually not called directly. def remove - @element.attributes.delete self.name unless @element.nil? + @element.attributes.delete self unless @element.nil? end # Writes this attribute (EG, puts 'key="value"' to the output) @@ -205,6 +205,15 @@ def xpath def document @element&.document end + + # Returns true if this attribute is a namespace declaration, false otherwise. + # "foo" => false + # "xmlns" => true + # "xmlns:foo" => true + # "foo:xmlns" => false + def namespace_declaration? + prefix == 'xmlns' || (prefix.empty? && name == 'xmlns') + end end end #vim:ts=2 sw=2 noexpandtab: diff --git a/lib/rexml/doctype.rb b/lib/rexml/doctype.rb index d4f0bbe4..c8789d94 100644 --- a/lib/rexml/doctype.rb +++ b/lib/rexml/doctype.rb @@ -113,23 +113,26 @@ def node_type end def attributes_of element - rv = [] - each do |child| - child.each do |key,val| - rv << Attribute.new(key,val) - end if child.kind_of? AttlistDecl and child.element_name == element + raw_attributes = _attlist_mappings[element] || {} + raw_attributes.map do |key, value| + Attribute.new(key, value) end - rv end def attribute_of element, attribute - att_decl = find do |child| - child.kind_of? AttlistDecl and - child.element_name == element and - child.include? attribute + _attlist_mappings[element]&.[](attribute) + end + + def _attlist_mappings + mapping = {} + grep(AttlistDecl).each do |child| + raw_attributes = mapping[child.element_name] ||= {} + child.each do |key, val| + # First declaration wins + raw_attributes[key] = val unless raw_attributes.key? key + end end - return nil unless att_decl - att_decl[attribute] + mapping end def clone diff --git a/lib/rexml/element.rb b/lib/rexml/element.rb index abdb28a7..95b9aff9 100644 --- a/lib/rexml/element.rb +++ b/lib/rexml/element.rb @@ -588,12 +588,7 @@ def prefixes # d.elements['//c'].namespaces # => {"x"=>"1", "y"=>"2", "z"=>"3"} # def namespaces - namespaces_cache = document&.__send__(:namespaces_cache) - if namespaces_cache - namespaces_cache[self] ||= calculate_namespaces - else - calculate_namespaces - end + _calculate_namespaces end # :call-seq: @@ -618,13 +613,10 @@ def namespaces # def namespace(prefix=nil) if prefix.nil? - prefix = prefix() + _namespace_internal + else + _namespace_lookup_internal(prefix) end - prefix = (prefix == '') ? 'xmlns' : prefix.delete_prefix("xmlns:") - ns = namespaces[prefix] - - ns = '' if ns.nil? and prefix == 'xmlns' - ns end # :call-seq: @@ -1507,15 +1499,32 @@ def write(output=$stdout, indent=-1, transitive=false, ie_hack=false) formatter.write( self, output ) end - private - def calculate_namespaces - if parent - parent.namespaces.merge(attributes.namespaces) - else - attributes.namespaces - end + # Returns namespace of the element + def _namespace_internal(namespaces = _calculate_namespaces) # :nodoc: + _namespace_lookup_internal(prefix, namespaces) + end + + # Lookup namespace for the given prefix in the context of the element + def _namespace_lookup_internal(prefix, namespaces = _calculate_namespaces) # :nodoc: + prefix = (prefix == '') ? 'xmlns' : prefix.delete_prefix("xmlns:") + ns = namespaces[prefix] + ns = '' if ns.nil? and prefix == 'xmlns' + ns end + def _calculate_namespaces(cache_hash = nil, attlist_mappings = nil) # :nodoc: + return cache_hash[self] if cache_hash && cache_hash.key?(self) + + attlist_mappings ||= document&.doctype&._attlist_mappings || {} + inherited_namespaces = parent ? parent._calculate_namespaces(cache_hash, attlist_mappings) : {} + attr_namespaces = attributes._calculate_namespaces(attlist_mappings) + namespaces = attr_namespaces.any? ? inherited_namespaces.merge(attr_namespaces) : inherited_namespaces + cache_hash[self] = namespaces if cache_hash + namespaces + end + + private + def __to_xpath_helper node rv = node.expanded_name.clone if node.parent @@ -2240,15 +2249,10 @@ def length # [REXML::Attribute, bar:att='2'] # [REXML::Attribute, att='<'] # - def each_attribute # :yields: attribute - return to_enum(__method__) unless block_given? - each_value do |val| - if val.kind_of? Attribute - yield val - else - val.each_value { |atr| yield atr } - end - end + # This method doesn't iterate attributes in the DTD. This may be a bug. + # + def each_attribute(&block) # :yields: attribute + each_own_attribute(&block) end # :call-seq: @@ -2273,6 +2277,8 @@ def each_attribute # :yields: attribute # ["bar:att", "2"] # ["att", "<"] # + # This method doesn't iterate attributes in the DTD. This may be a bug. + # def each return to_enum(__method__) unless block_given? each_attribute do |attr| @@ -2300,36 +2306,7 @@ def each # attrs.get_attribute('nosuch') # => nil # def get_attribute( name ) - attr = fetch( name, nil ) - if attr.nil? - return nil if name.nil? - # Look for prefix - name =~ Namespace::NAMESPLIT - prefix, n = $1, $2 - if prefix - attr = fetch( n, nil ) - # check prefix - if attr == nil - elsif attr.kind_of? Attribute - return attr if prefix == attr.prefix - else - attr = attr[ prefix ] - return attr - end - end - doctype = @element.document&.doctype - if doctype - expn = @element.expanded_name - expn = doctype.name if expn.size == 0 - attr_val = doctype.attribute_of(expn, name) - return Attribute.new( name, attr_val ) if attr_val - end - return nil - end - if attr.kind_of? Hash - attr = attr[ @element.prefix ] - end - attr + fetch(name, nil) || attlist_attributes[name] end # :call-seq: @@ -2357,8 +2334,7 @@ def get_attribute( name ) # def []=( name, value ) if value.nil? # Delete the named attribute - attr = get_attribute(name) - delete attr + delete name return end @@ -2372,17 +2348,7 @@ def []=( name, value ) value = Attribute.new(name, value) end value.element = @element - old_attr = fetch(value.name, nil) - if old_attr.nil? - store(value.name, value) - elsif old_attr.kind_of? Hash - old_attr[value.prefix] = value - elsif old_attr.prefix != value.prefix - store value.name, {old_attr.prefix => old_attr, - value.prefix => value} - else - store value.name, value - end + store name, value @element end @@ -2398,20 +2364,7 @@ def []=( name, value ) # d.root.attributes.prefixes # => ["x", "y"] # def prefixes - ns = [] - each_attribute do |attribute| - ns << attribute.name if attribute.prefix == 'xmlns' - end - doctype = @element.document&.doctype - if doctype - expn = @element.expanded_name - expn = doctype.name if expn.size == 0 - doctype.attributes_of(expn).each { - |attribute| - ns << attribute.name if attribute.prefix == 'xmlns' - } - end - ns + namespaces.keys - ['xmlns'] end # :call-seq: @@ -2424,20 +2377,7 @@ def prefixes # d.root.attributes.namespaces # => {"xmlns"=>"foo", "x"=>"bar", "y"=>"twee"} # def namespaces - namespaces = {} - each_attribute do |attribute| - namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns' - end - doctype = @element.document&.doctype - if doctype - expn = @element.expanded_name - expn = doctype.name if expn.size == 0 - doctype.attributes_of(expn).each { - |attribute| - namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns' - } - end - namespaces + _calculate_namespaces end # :call-seq: @@ -2468,28 +2408,11 @@ def namespaces # attrs.delete(attr) # => # => # attrs.delete(attr) # => # => # - def delete( attribute ) - name = nil - prefix = nil - if attribute.kind_of? Attribute - name = attribute.name - prefix = attribute.prefix - else - attribute =~ Namespace::NAMESPLIT - prefix, name = $1, $2 - prefix = '' unless prefix - end - old = fetch(name, nil) - if old.kind_of? Hash # the supplied attribute is one of many - old.delete(prefix) - if old.size == 1 - repl = nil - old.each_value{|v| repl = v} - store name, repl - end - elsif old # the supplied attribute is a top-level one - super(name) - end + def delete(attribute_or_name) + key = attribute_or_name + key = attribute_or_name.expanded_name if attribute_or_name.kind_of? Attribute + super(key) + @element end @@ -2514,7 +2437,7 @@ def delete( attribute ) # attrs.include?('baz') # => true # def add( attribute ) - self[attribute.name] = attribute + self[attribute.expanded_name] = attribute end alias :<< :add @@ -2527,21 +2450,26 @@ def add( attribute ) # # xml_string = <<-EOT # - # + # # # EOT # d = REXML::Document.new(xml_string) - # ele = d.root.elements['//ele'] # => + # ele = d.root.elements['//ele'] # => # attrs = ele.attributes - # attrs.delete_all('att') # => [att='<'] + # attrs.delete_all('att') # => [foo:att='1', att='<'] + # attrs.each_attribute.map(&:expanded_name) #=> ['foo:other', 'other'] # def delete_all( name ) - rv = [] - each_attribute { |attribute| - rv << attribute if attribute.expanded_name == name - } - rv.each{ |attr| attr.remove } - rv + attributes = each_attribute.select do |attribute| + # For + # delete_all('foo') should not delete xmlns:foo="url" + # because it is a namespace declaration, not a normal attribute. + (!attribute.namespace_declaration? && attribute.name == name) || attribute.expanded_name == name + end + attributes.each do |attribute| + delete attribute.expanded_name + end + attributes end # :call-seq: @@ -2562,17 +2490,58 @@ def delete_all( name ) # attrs.get_attribute_ns('http://foo', 'nosuch') # => nil # def get_attribute_ns(namespace, name) - result = nil - each_attribute() { |attribute| - if name == attribute.name && - namespace == attribute.namespace() && - ( !namespace.empty? || !attribute.fully_expanded_name.index(':') ) - # foo will match xmlns:foo, but only if foo isn't also an attribute - result = attribute if !result or !namespace.empty? or - !attribute.fully_expanded_name.index(':') + each_effective_attribute.find do |attribute| + if attribute.namespace_declaration? + # namespace declarations are not considered as attributes in this method. + # For example: + # Both elem1.get_attribute_ns('', 'foo') and elem2.get_attribute_ns('bar', 'foo') + # should not match. + false + elsif namespace.empty? && !attribute.prefix.empty? + # If prefix is present, namespace url shouldn't be empty, so it never matches. + # For example, , even if attribute.namespace returns '', + # it just means that namespace lookup failed, it shouldn't match. + false + else + name == attribute.name && namespace == attribute.namespace() end - } - result + end + end + + def _calculate_namespaces(attlist_mappings = nil) # :nodoc: + namespaces = {} + each_effective_attribute(attlist_mappings) do |attribute| + namespaces[attribute.name] = attribute.value if attribute.namespace_declaration? + end + namespaces + end + + private + + # Iterates over the attributes of the element and those in the DTD. + # If the element has an attribute with the same name as one in the DTD, + # the element's attribute takes precedence. + def each_effective_attribute(attlist_mappings = nil) + return to_enum(__method__, attlist_mappings) unless block_given? + attributes = attlist_attributes(attlist_mappings).merge(self) + attributes.each_value do |attribute| + yield attribute + end + end + + alias each_own_attribute each_value + private :each_own_attribute + + def attlist_attributes(attlist_mappings = nil) + attlist_mappings ||= @element.document&.doctype&._attlist_mappings + return {} unless attlist_mappings + + expn = @element.is_a?(Document) ? @element.doctype&.name : @element.expanded_name + attlist_mappings[expn]&.map do |key, value| + attr = Attribute.new(key, value) + attr.element = @element + [key, attr] + end.to_h end end end diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb index 821cca95..c114982d 100644 --- a/lib/rexml/xpath_parser.rb +++ b/lib/rexml/xpath_parser.rb @@ -63,6 +63,9 @@ def initialize(strict: false) @namespaces = nil @variables = {} @functions = FunctionsClass.new + @attlist_mappings = nil + @document = nil + @element_namespaces_cache = {} @nest = 0 @strict = strict end @@ -85,14 +88,8 @@ def parse path, node node = node.first end - document = node.document - if document - document.__send__(:enable_cache) do - match( path_stack, node ) - end - else - match( path_stack, node ) - end + @document = node.document + match(path_stack, node) end def get_first path, node @@ -167,20 +164,45 @@ def strict? @strict end - # Returns a String namespace for a node, given a prefix + # Returns a String namespace for a prefix used in xpath # The rules are: # # 1. Use the supplied namespace mapping first. - # 2. If no mapping was supplied, use the context node to look up the namespace - def get_namespace( node, prefix ) + # 2. If no mapping was supplied, use the context node to look up the namespace as a fallback. + def get_xpath_namespace(node, prefix) if @namespaces @namespaces[prefix] || '' + elsif node.node_type == :element + element_namespace_lookup(node, prefix) else - return node.namespace( prefix ) if node.node_type == :element '' end end + # Returns attribute's namespace URI while caching the + # intermediate result to speed up retrieval of namespaces + def attribute_namespace(attribute) + attribute.prefix == '' ? '' : element_namespace_lookup(attribute.element, attribute.prefix) + end + + # Return element's namespace URI while caching the + # intermediate result to speed up retrieval of namespaces + def element_namespace(element) + element._namespace_internal(element_namespaces(element)) + end + + # Returns a hash of namespaces for the given element while caching the + # intermediate result to speed up retrieval of namespaces + def element_namespaces(element) + @attlist_mappings ||= @document&.doctype&._attlist_mappings || {} + element._calculate_namespaces(@element_namespaces_cache, @attlist_mappings) + end + + # Returns namespace of the prefix in the context of the element, + # while caching the intermediate result to speed up retrieval of namespaces + def element_namespace_lookup(element, prefix) + element._namespace_lookup_internal(prefix, element_namespaces(element)) + end # Expr takes a stack of path elements and a set of nodes (either a Parent # or an Array and returns an Array of matching nodes @@ -641,20 +663,20 @@ def node_test(path_stack, any_type: :element) node.name == name elsif prefix.empty? if strict? - node.name == name and node.namespace == "" + node.name == name and element_namespace(node) == "" else - node.name == name and node.namespace == get_namespace(node, prefix) + node.name == name and element_namespace(node) == get_xpath_namespace(node, prefix) end else - node.name == name and node.namespace == get_namespace(node, prefix) + node.name == name and element_namespace(node) == get_xpath_namespace(node, prefix) end when :attribute if prefix.nil? node.name == name elsif prefix.empty? - node.name == name and node.namespace == "" + node.name == name and attribute_namespace(node) == "" else - node.name == name and node.namespace == get_namespace(node.element, prefix) + node.name == name and attribute_namespace(node) == get_xpath_namespace(node.element, prefix) end else false @@ -665,11 +687,11 @@ def node_test(path_stack, any_type: :element) ->(node) do case node.node_type when :element - namespaces = @namespaces || node.namespaces - node.namespace == namespaces[prefix] + namespaces = @namespaces || element_namespaces(node) + element_namespace(node) == namespaces[prefix] when :attribute - namespaces = @namespaces || node.element.namespaces - node.namespace == namespaces[prefix] + namespaces = @namespaces || element_namespaces(node.element) + attribute_namespace(node) == namespaces[prefix] else false end diff --git a/test/test_attributes.rb b/test/test_attributes.rb index 09fde442..af42f06f 100644 --- a/test/test_attributes.rb +++ b/test/test_attributes.rb @@ -74,6 +74,16 @@ def test_delete assert_equal '4', doc.root.attributes['z:foo'] end + def test_delete_all + doc = Document.new("") + doc.root.attributes.delete_all 'x' + assert_equal ['xmlns:x', 'xmlns:y', 'y', 'x:y', 'x:z', 'y:y'], doc.root.attributes.each_attribute.map(&:expanded_name) + doc.root.attributes.delete_all 'x:y' + assert_equal ['xmlns:x', 'xmlns:y', 'y', 'x:z', 'y:y'], doc.root.attributes.each_attribute.map(&:expanded_name) + doc.root.attributes.delete_all 'xmlns:y' + assert_equal ['xmlns:x', 'y', 'x:z', 'y:y'], doc.root.attributes.each_attribute.map(&:expanded_name) + end + def test_prefixes doc = Document.new("") prefixes = doc.root.attributes.prefixes diff --git a/test/test_attributes_mixin.rb b/test/test_attributes_mixin.rb index 2b9108cb..e4d5841c 100644 --- a/test/test_attributes_mixin.rb +++ b/test/test_attributes_mixin.rb @@ -5,8 +5,10 @@ class TestAttributes < Test::Unit::TestCase def setup @ns_a = "urn:x-test:a" @ns_b = "urn:x-test:b" + @ns_default = "urn:x-test:default" element_string = <<-"XMLEND" - Dummy title' anElement = anElement = aDoc.elements[1] - elementAttrPrefix = anElement.attributes.get_attribute('content').prefix + elementAttrPrefix = anElement.attributes.get_attribute('tpl:content').prefix aClone = anElement.clone - cloneAttrPrefix = aClone.attributes.get_attribute('content').prefix + cloneAttrPrefix = aClone.attributes.get_attribute('tpl:content').prefix assert_equal( elementAttrPrefix , cloneAttrPrefix ) end diff --git a/test/test_core.rb b/test/test_core.rb index beaaeed7..d4962507 100644 --- a/test/test_core.rb +++ b/test/test_core.rb @@ -889,16 +889,25 @@ def test_attlist_decl ]> EOL assert_equal '', doc.doctype.children[0].to_s.gsub(/\s+/, " ") assert_equal 'gobble', doc.root.attributes['bar'] + assert_equal 'gobble', doc.root.attributes.get_attribute('bar').value + assert_equal 'three', doc.root.attributes.get_attribute('one:baz').value + assert_equal 'three', doc.root.attributes.get_attribute_ns('two', 'baz').value assert_equal 'xxx', doc.root.elements[2].namespace assert_equal 'two', doc.root.elements[1].namespace assert_equal 'foo', doc.root.namespace + # Not used in REXML internal any more, keep for backward compatibility + assert_equal 'gobble', doc.doctype.attribute_of('a', 'bar') + expected_attributes = [Attribute.new('bar', 'gobble'), Attribute.new('xmlns:one', 'two'), Attribute.new('one:baz', 'three')] + assert_equal expected_attributes, doc.doctype.attributes_of('a') + doc = Document.new <<~EOL ' ) expected = { - "inkscape" => attribute("xmlns:inkscape", + "xmlns:inkscape" => attribute("xmlns:inkscape", "http://www.inkscape.org/namespaces/inkscape"), - "version" => { - "inkscape" => attribute("inkscape:version", "0.44"), - "" => attribute("version", "1.0"), - }, + "inkscape:version" => attribute("inkscape:version", "0.44"), + "version" => attribute("version", "1.0"), } assert_equal(expected, doc.root.attributes.to_h) diff --git a/test/test_namespace.rb b/test/test_namespace.rb index a41e5058..6d405c9a 100644 --- a/test/test_namespace.rb +++ b/test/test_namespace.rb @@ -1,7 +1,9 @@ # frozen_string_literal: false +require "core_assertions" module REXMLTests class TestNamespace < Test::Unit::TestCase + include Test::Unit::CoreAssertions include Helper::Fixture include REXML @@ -34,5 +36,25 @@ def test_xml_namespace assert_equal("http://www.w3.org/XML/1998/namespace", document.root.namespace("xml")) end + + def test_deep_element_namespace_linear + omit('Recursion too deep on JRuby') if RUBY_ENGINE == "jruby" + + max_depth = 3000 + xml = <<~XML + #{'' * max_depth + '' * max_depth} + XML + doc = Document.new(xml) + prepare_element = ->(depth) do + node = doc.root + depth.times { node = node.first } + node || raise + end + + assert_linear_performance([30, 100, 300, 1000, 3000], rehearsal: 10) do |depth| + elem = prepare_element.call(depth) + elem.namespace + end + end end end diff --git a/test/xpath/test_base.rb b/test/xpath/test_base.rb index 43fe3e99..0e7635a5 100644 --- a/test/xpath/test_base.rb +++ b/test/xpath/test_base.rb @@ -1312,16 +1312,6 @@ def test_namespaces_0 assert_equal( 1, XPath.match( d, "//x:*" ).size ) end - def test_namespaces_cache - doc = Document.new("") - assert_equal("", XPath.first(doc, "//b[namespace-uri()='1']").to_s) - assert_nil(XPath.first(doc, "//b[namespace-uri()='']")) - - doc.root.delete_namespace - assert_nil(XPath.first(doc, "//b[namespace-uri()='1']")) - assert_equal("", XPath.first(doc, "//b[namespace-uri()='']").to_s) - end - def test_ticket_71 doc = Document.new(%Q{}) el = doc.root.elements[1]