Skip to content

Commit d55469a

Browse files
committed
Clone the Parser when cloning a Document
This ensures that concurrent calls to `doc.append(html)`, which uses the document's parser, are supported. Fixes #2281
1 parent 11a0334 commit d55469a

File tree

5 files changed

+84
-1
lines changed

5 files changed

+84
-1
lines changed

src/main/java/org/jsoup/nodes/Document.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,7 @@ public boolean updateMetaCharsetElement() {
294294
public Document clone() {
295295
Document clone = (Document) super.clone();
296296
clone.outputSettings = this.outputSettings.clone();
297+
clone.parser = this.parser.clone();
297298
return clone;
298299
}
299300

src/main/java/org/jsoup/parser/Parser.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
{@link org.jsoup.Jsoup}.
1515
<p>Note that a Parser instance object is not threadsafe. To reuse a Parser configuration in a multi-threaded
1616
environment, use {@link #newInstance()} to make copies. */
17-
public class Parser {
17+
public class Parser implements Cloneable {
1818
public static final String NamespaceHtml = "http://www.w3.org/1999/xhtml";
1919
public static final String NamespaceXml = "http://www.w3.org/XML/1998/namespace";
2020
public static final String NamespaceMathml = "http://www.w3.org/1998/Math/MathML";
@@ -43,6 +43,12 @@ public Parser newInstance() {
4343
return new Parser(this);
4444
}
4545

46+
@SuppressWarnings("MethodDoesntCallSuperMethod") // because we use the copy constructor instead
47+
@Override
48+
public Parser clone() {
49+
return new Parser(this);
50+
}
51+
4652
private Parser(Parser copy) {
4753
treeBuilder = copy.treeBuilder.newInstance(); // because extended
4854
errors = new ParseErrorList(copy.errors); // only copies size, not contents

src/test/java/org/jsoup/nodes/DocumentTest.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,9 +109,14 @@ public class DocumentTest {
109109
@Test public void testClone() {
110110
Document doc = Jsoup.parse("<title>Hello</title> <p>One<p>Two");
111111
Document clone = doc.clone();
112+
assertNotSame(doc, clone);
113+
assertTrue(doc.hasSameValue(clone));
114+
assertNotSame(doc.parser(), clone.parser());
115+
assertNotSame(doc.outputSettings(), clone.outputSettings());
112116

113117
assertEquals("<html><head><title>Hello</title></head><body><p>One</p><p>Two</p></body></html>", TextUtil.stripNewlines(clone.html()));
114118
clone.title("Hello there");
119+
assertFalse(doc.hasSameValue(clone));
115120
clone.expectFirst("p").text("One more").attr("id", "1");
116121
assertEquals("<html><head><title>Hello there</title></head><body><p id=\"1\">One more</p><p>Two</p></body></html>", TextUtil.stripNewlines(clone.html()));
117122
assertEquals("<html><head><title>Hello</title></head><body><p>One</p><p>Two</p></body></html>", TextUtil.stripNewlines(doc.html()));

src/test/java/org/jsoup/nodes/ElementIT.java

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,4 +134,53 @@ public void testFastReparentExistingContent() {
134134
assertTrue(html.startsWith("<div>"));
135135
assertEquals(num + 3, el.parents().size());
136136
}
137+
138+
@Test
139+
public void testConcurrentMerge() throws Exception {
140+
// https://github.com/jhy/jsoup/discussions/2280 / https://github.com/jhy/jsoup/issues/2281
141+
// This was failing because the template.clone().append(html) was reusing the same underlying parser object.
142+
// Document#clone now correctly clones its parser.
143+
144+
class TemplateMerger {
145+
private final Document templateDoc;
146+
147+
public TemplateMerger(Document template) {
148+
this.templateDoc = template;
149+
}
150+
151+
public Document mergeWith(Document inputDoc) {
152+
Document merged = templateDoc.clone();
153+
Element content = merged.getElementById("content");
154+
content.append(inputDoc.html());
155+
return merged;
156+
}
157+
}
158+
159+
String templateHtml = "<html><body><div id='content'></div></body></html>";
160+
Document templateDoc = Jsoup.parse(templateHtml);
161+
TemplateMerger merger = new TemplateMerger(templateDoc);
162+
163+
Runnable mergeTask = () -> {
164+
try {
165+
for (int i = 0; i < 1000; i++) {
166+
String inputHtml = "<html><body><p>Some content</p></body></html>";
167+
Document inputDoc = Jsoup.parse(inputHtml);
168+
Document merged = merger.mergeWith(inputDoc);
169+
assertNotNull(merged);
170+
}
171+
} catch (Exception e) {
172+
fail(e);
173+
}
174+
};
175+
176+
int threadCount = 10;
177+
Thread[] threads = new Thread[threadCount];
178+
for (int i = 0; i < threadCount; i++) {
179+
threads[i] = new Thread(mergeTask);
180+
threads[i].start();
181+
}
182+
for (Thread t : threads) {
183+
t.join();
184+
}
185+
}
137186
}

src/test/java/org/jsoup/parser/ParserTest.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import java.nio.charset.StandardCharsets;
1010

1111
import static org.junit.jupiter.api.Assertions.assertEquals;
12+
import static org.junit.jupiter.api.Assertions.assertNotSame;
1213

1314
public class ParserTest {
1415

@@ -36,4 +37,25 @@ public void testUtf8() throws IOException {
3637
String text = parsed.selectFirst("p").wholeText();
3738
assertEquals(text, "H\u00E9llo, w\u00F6rld!");
3839
}
40+
41+
@Test
42+
public void testClone() {
43+
// Test HTML parser cloning
44+
Parser htmlParser = Parser.htmlParser();
45+
Parser htmlClone = htmlParser.clone();
46+
assertNotSame(htmlParser, htmlClone);
47+
// Ensure the tree builder instances are different
48+
assertNotSame(htmlParser.getTreeBuilder(), htmlClone.getTreeBuilder());
49+
// Check that settings are cloned properly (for example, tag case settings)
50+
assertEquals(htmlParser.settings().preserveTagCase(), htmlClone.settings().preserveTagCase());
51+
assertEquals(htmlParser.settings().preserveAttributeCase(), htmlClone.settings().preserveAttributeCase());
52+
53+
// Test XML parser cloning
54+
Parser xmlParser = Parser.xmlParser();
55+
Parser xmlClone = xmlParser.clone();
56+
assertNotSame(xmlParser, xmlClone);
57+
assertNotSame(xmlParser.getTreeBuilder(), xmlClone.getTreeBuilder());
58+
assertEquals(xmlParser.settings().preserveTagCase(), xmlClone.settings().preserveTagCase());
59+
assertEquals(xmlParser.settings().preserveAttributeCase(), xmlClone.settings().preserveAttributeCase());
60+
}
3961
}

0 commit comments

Comments
 (0)