Skip to content

Commit 0679bef

Browse files
committed
Perf: removed redundant lowercase normalization
1 parent d80275e commit 0679bef

File tree

6 files changed

+22
-19
lines changed

6 files changed

+22
-19
lines changed

src/main/java/org/jsoup/parser/HtmlTreeBuilder.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ protected void initialiseParse(Reader input, String baseUri, Parser parser) {
111111

112112
if (context != null) {
113113
final String contextName = context.normalName();
114-
contextElement = new Element(tagFor(contextName, settings), baseUri);
114+
contextElement = new Element(tagFor(contextName, contextName, defaultNamespace(), settings), baseUri);
115115
if (context.ownerDocument() != null) // quirks setup:
116116
doc.quirksMode(context.ownerDocument().quirksMode());
117117

@@ -318,7 +318,7 @@ Element createElementFor(Token.StartTag startTag, String namespace, boolean forc
318318
}
319319
}
320320

321-
Tag tag = tagFor(startTag.tagName, namespace,
321+
Tag tag = tagFor(startTag.tagName, startTag.normalName, namespace,
322322
forcePreserveCase ? ParseSettings.preserveCase : settings);
323323

324324
return (tag.normalName().equals("form")) ?
@@ -948,7 +948,7 @@ void reconstructFormattingElements() {
948948

949949
// 8. create new element from element, 9 insert into current node, onto stack
950950
skip = false; // can only skip increment from 4.
951-
Element newEl = new Element(tagFor(entry.normalName(), settings), null, entry.attributes().clone());
951+
Element newEl = new Element(tagFor(entry.nodeName(), entry.normalName(), defaultNamespace(), settings), null, entry.attributes().clone());
952952
doInsertElement(newEl, null);
953953

954954
// 10. replace entry with new entry

src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -950,7 +950,7 @@ private boolean inBodyEndTagAdoption(Token t, HtmlTreeBuilder tb) {
950950
}
951951

952952
// 6. [Create an element for the token] for which the element node was created, in the [HTML namespace], with commonAncestor as the intended parent; replace the entry for node in the [list of active formatting elements] with an entry for the new element, replace the entry for node in the [stack of open elements] with an entry for the new element, and let node be the new element.
953-
Element replacement = new Element(tb.tagFor(node.nodeName(), ParseSettings.preserveCase), tb.getBaseUri());
953+
Element replacement = new Element(tb.tagFor(node.nodeName(), node.normalName(), tb.defaultNamespace(), ParseSettings.preserveCase), tb.getBaseUri());
954954
tb.replaceActiveFormattingElement(node, replacement);
955955
tb.replaceOnStack(node, replacement);
956956
node = replacement;

src/main/java/org/jsoup/parser/ParseSettings.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import org.jspecify.annotations.Nullable;
55

66
import static org.jsoup.internal.Normalizer.lowerCase;
7+
import static org.jsoup.internal.Normalizer.normalize;
78

89
/**
910
* Controls parser case settings, to optionally preserve tag and/or attribute name case.
@@ -83,6 +84,6 @@ public String normalizeAttribute(String name) {
8384

8485
/** Returns the normal name that a Tag will have (trimmed and lower-cased) */
8586
static String normalName(String name) {
86-
return lowerCase(name.trim());
87+
return normalize(name);
8788
}
8889
}

src/main/java/org/jsoup/parser/Tag.java

+13-7
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@ public class Tag implements Cloneable {
2828
private boolean formList = false; // a control that appears in forms: input, textarea, output etc
2929
private boolean formSubmit = false; // a control that can be submitted in a form: input etc
3030

31-
private Tag(String tagName, String namespace) {
31+
private Tag(String tagName, String normalName, String namespace) {
3232
this.tagName = tagName;
33-
normalName = Normalizer.lowerCase(tagName);
33+
this.normalName = normalName;
3434
this.namespace = namespace;
3535
}
3636

@@ -67,15 +67,20 @@ public String namespace() {
6767
* @return The tag, either defined or new generic.
6868
*/
6969
public static Tag valueOf(String tagName, String namespace, ParseSettings settings) {
70+
return valueOf(tagName, ParseSettings.normalName(tagName), namespace, settings);
71+
}
72+
73+
/** Tag.valueOf with the normalName via the token.normalName, to save redundant lower-casing passes. */
74+
static Tag valueOf(String tagName, String normalName, String namespace, ParseSettings settings) {
75+
Validate.notNull(tagName);
76+
tagName = tagName.trim();
7077
Validate.notEmpty(tagName);
7178
Validate.notNull(namespace);
7279
Tag tag = Tags.get(tagName);
7380
if (tag != null && tag.namespace.equals(namespace))
7481
return tag;
7582

76-
tagName = settings.normalizeTag(tagName); // the name we'll use
77-
Validate.notEmpty(tagName);
78-
String normalName = Normalizer.lowerCase(tagName); // the lower-case name to get tag settings off
83+
tagName = settings.preserveTagCase() ? tagName : normalName;
7984
tag = Tags.get(normalName);
8085
if (tag != null && tag.namespace.equals(namespace)) {
8186
if (settings.preserveTagCase() && !tagName.equals(normalName)) {
@@ -86,12 +91,13 @@ public static Tag valueOf(String tagName, String namespace, ParseSettings settin
8691
}
8792

8893
// not defined: create default; go anywhere, do anything! (incl be inside a <p>)
89-
tag = new Tag(tagName, namespace);
94+
tag = new Tag(tagName, normalName, namespace);
9095
tag.isBlock = false;
9196

9297
return tag;
9398
}
9499

100+
95101
/**
96102
* Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything.
97103
* <p>
@@ -304,7 +310,7 @@ private static void setupTags(String[] tagNames, Consumer<Tag> tagModifier) {
304310
for (String tagName : tagNames) {
305311
Tag tag = Tags.get(tagName);
306312
if (tag == null) {
307-
tag = new Tag(tagName, Parser.NamespaceHtml);
313+
tag = new Tag(tagName, tagName, Parser.NamespaceHtml);
308314
Tags.put(tag.tagName, tag);
309315
}
310316
tagModifier.accept(tag);

src/main/java/org/jsoup/parser/TreeBuilder.java

+2-6
Original file line numberDiff line numberDiff line change
@@ -238,21 +238,17 @@ boolean isContentForTagData(String normalName) {
238238
return false;
239239
}
240240

241-
Tag tagFor(String tagName, String namespace, ParseSettings settings) {
241+
Tag tagFor(String tagName, String normalName, String namespace, ParseSettings settings) {
242242
Tag cached = seenTags.get(tagName); // note that we don't normalize the cache key. But tag via valueOf may be normalized.
243243
if (cached == null || !cached.namespace().equals(namespace)) {
244244
// only return from cache if the namespace is the same. not running nested cache to save double hit on the common flow
245-
Tag tag = Tag.valueOf(tagName, namespace, settings);
245+
Tag tag = Tag.valueOf(tagName, normalName, namespace, settings);
246246
seenTags.put(tagName, tag);
247247
return tag;
248248
}
249249
return cached;
250250
}
251251

252-
Tag tagFor(String tagName, ParseSettings settings) {
253-
return tagFor(tagName, defaultNamespace(), settings);
254-
}
255-
256252
/**
257253
Gets the default namespace for this TreeBuilder
258254
* @return the default namespace

src/main/java/org/jsoup/parser/XmlTreeBuilder.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ protected boolean process(Token token) {
9393
}
9494

9595
void insertElementFor(Token.StartTag startTag) {
96-
Tag tag = tagFor(startTag.name(), settings);
96+
Tag tag = tagFor(startTag.name(), startTag.normalName(), defaultNamespace(), settings);
9797
if (startTag.attributes != null)
9898
startTag.attributes.deduplicate(settings);
9999

0 commit comments

Comments
 (0)