From f29ede3926ee6a7b838465d76082a624052392e9 Mon Sep 17 00:00:00 2001 From: Ronald Brill Date: Fri, 22 Nov 2024 11:19:55 +0100 Subject: [PATCH] try to optimize the id/name index by replacing the tree set --- src/main/java/org/htmlunit/html/HtmlPage.java | 89 ++++++++++++++----- 1 file changed, 68 insertions(+), 21 deletions(-) diff --git a/src/main/java/org/htmlunit/html/HtmlPage.java b/src/main/java/org/htmlunit/html/HtmlPage.java index 34c4c52f9a..f1874ccff7 100644 --- a/src/main/java/org/htmlunit/html/HtmlPage.java +++ b/src/main/java/org/htmlunit/html/HtmlPage.java @@ -40,8 +40,6 @@ import java.util.Locale; import java.util.Map; import java.util.Set; -import java.util.SortedSet; -import java.util.TreeSet; import java.util.WeakHashMap; import java.util.concurrent.ConcurrentHashMap; @@ -153,8 +151,8 @@ public class HtmlPage extends SgmlPage { private transient Charset originalCharset_; private final Object lock_ = new SerializableLock(); // used for synchronization - private Map> idMap_ = new ConcurrentHashMap<>(); - private Map> nameMap_ = new ConcurrentHashMap<>(); + private Map idMap_ = new ConcurrentHashMap<>(); + private Map nameMap_ = new ConcurrentHashMap<>(); private List frameElements_ = new ArrayList<>(); private int parserCount_; @@ -629,7 +627,7 @@ public ProcessingInstruction createProcessingInstruction(final String namespaceU @Override public DomElement getElementById(final String elementId) { if (elementId != null) { - final SortedSet elements = idMap_.get(elementId); + final MappedElementIndexEntry elements = idMap_.get(elementId); if (elements != null) { return elements.first(); } @@ -1665,9 +1663,9 @@ public E getHtmlElementById(final String elementId) thro */ public List getElementsById(final String elementId) { if (elementId != null) { - final SortedSet elements = idMap_.get(elementId); + final MappedElementIndexEntry elements = idMap_.get(elementId); if (elements != null) { - return new ArrayList<>(elements); + return new ArrayList<>(elements.elements()); } } return Collections.emptyList(); @@ -1685,7 +1683,7 @@ public List getElementsById(final String elementId) { @SuppressWarnings("unchecked") public E getElementByName(final String name) throws ElementNotFoundException { if (name != null) { - final SortedSet elements = nameMap_.get(name); + final MappedElementIndexEntry elements = nameMap_.get(name); if (elements != null) { return (E) elements.first(); } @@ -1703,9 +1701,9 @@ public E getElementByName(final String name) throws Eleme */ public List getElementsByName(final String name) { if (name != null) { - final SortedSet elements = nameMap_.get(name); + final MappedElementIndexEntry elements = nameMap_.get(name); if (elements != null) { - return new ArrayList<>(elements); + return new ArrayList<>(elements.elements()); } } return Collections.emptyList(); @@ -1722,14 +1720,14 @@ public List getElementsByIdAndOrName(final String idAndOrName) { if (idAndOrName == null) { return Collections.emptyList(); } - final Collection list1 = idMap_.get(idAndOrName); - final Collection list2 = nameMap_.get(idAndOrName); + final MappedElementIndexEntry list1 = idMap_.get(idAndOrName); + final MappedElementIndexEntry list2 = nameMap_.get(idAndOrName); final List list = new ArrayList<>(); if (list1 != null) { - list.addAll(list1); + list.addAll(list1.elements()); } if (list2 != null) { - for (final DomElement elt : list2) { + for (final DomElement elt : list2.elements()) { if (!list.contains(elt)) { list.add(elt); } @@ -1804,14 +1802,14 @@ void addMappedElement(final DomElement element, final boolean recurse) { } } - private void addElement(final Map> map, final DomElement element, + private void addElement(final Map map, final DomElement element, final String attribute, final boolean recurse) { final String value = element.getAttribute(attribute); if (ATTRIBUTE_NOT_DEFINED != value) { - SortedSet elements = map.get(value); + MappedElementIndexEntry elements = map.get(value); if (elements == null) { - elements = new TreeSet<>(DOCUMENT_POSITION_COMPERATOR); + elements = new MappedElementIndexEntry(); elements.add(element); map.put(value, elements); } @@ -1845,14 +1843,13 @@ void removeMappedElement(final DomElement element, final boolean recurse, final } } - private void removeElement(final Map> map, final DomElement element, + private void removeElement(final Map map, final DomElement element, final String attribute, final boolean recurse) { final String value = element.getAttribute(attribute); if (ATTRIBUTE_NOT_DEFINED != value) { - final SortedSet elements = map.remove(value); - if (elements != null && (elements.size() != 1 || !elements.contains(element))) { - elements.remove(element); + final MappedElementIndexEntry elements = map.remove(value); + if (elements != null && elements.remove(element)) { map.put(value, elements); } } @@ -2879,4 +2876,54 @@ private void readObject(final ObjectInputStream in) throws IOException, ClassNot computedStyles_ = new WeakHashMap<>(); } } + + private static final class MappedElementIndexEntry { + private ArrayList elements_; + private boolean sorted_; + + MappedElementIndexEntry() { + // we do not expect to many elements having the same id/name + elements_ = new ArrayList<>(2); + sorted_ = false; + } + + void add(final DomElement element) { + elements_.add(element); + sorted_ = false; + } + + DomElement first() { + if (elements_.size() == 0) { + return null; + } + + if (sorted_) { + return elements_.get(0); + } + + Collections.sort(elements_, DOCUMENT_POSITION_COMPERATOR); + sorted_ = true; + + return elements_.get(0); + } + + List elements() { + if (sorted_ || elements_.size() == 0) { + return elements_; + } + + Collections.sort(elements_, DOCUMENT_POSITION_COMPERATOR); + sorted_ = true; + + return elements_; + } + + boolean remove(final DomElement element) { + if (elements_.size() == 0) { + return false; + } + + return elements_.remove(element); + } + } }