Skip to content

Commit 0a4b830

Browse files
committed
Fix XML attribute validation
When converting from the `[a-zA-Z_:][-a-zA-Z0-9_:.]*` regex, missed `-`, `.`, and digits. Also clarified HTML attribute scan. Fixes #2235
1 parent f6e82f2 commit 0a4b830

File tree

4 files changed

+22
-6
lines changed

4 files changed

+22
-6
lines changed

CHANGES.md

+7
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
# jsoup Changelog
22

3+
## 1.18.3 (PENDING)
4+
5+
### Bug Fixes
6+
7+
* When serializing to XML, attribute names containing `-`, `.`, or digits were incorrectly marked as invalid and
8+
removed. [2235](https://github.com/jhy/jsoup/issues/2235)
9+
310
## 1.18.2 (2024-Nov-27)
411

512
### Improvements

pom.xml

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
<groupId>org.jsoup</groupId>
77
<artifactId>jsoup</artifactId>
8-
<version>1.19.1-SNAPSHOT</version><!-- remember to update previous version below for japicmp -->
8+
<version>1.18.3-SNAPSHOT</version><!-- remember to update previous version below for japicmp -->
99
<url>https://jsoup.org/</url>
1010
<description>jsoup is a Java library that simplifies working with real-world HTML and XML. It offers an easy-to-use API for URL fetching, data parsing, extraction, and manipulation using DOM API methods, CSS, and xpath selectors. jsoup implements the WHATWG HTML5 specification, and parses HTML to the same DOM as modern browsers.</description>
1111
<inceptionYear>2009</inceptionYear>
@@ -237,7 +237,7 @@
237237
<dependency>
238238
<groupId>org.jsoup</groupId>
239239
<artifactId>jsoup</artifactId>
240-
<version>1.18.1</version>
240+
<version>1.18.2</version>
241241
<type>jar</type>
242242
</dependency>
243243
</oldVersion>

src/main/java/org/jsoup/nodes/Attribute.java

+4-4
Original file line numberDiff line numberDiff line change
@@ -199,13 +199,13 @@ else if (syntax == Syntax.html && !isValidHtmlKey(key)) {
199199
private static boolean isValidXmlKey(String key) {
200200
// =~ [a-zA-Z_:][-a-zA-Z0-9_:.]*
201201
final int length = key.length();
202-
if (length ==0) return false;
202+
if (length == 0) return false;
203203
char c = key.charAt(0);
204204
if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == ':'))
205205
return false;
206206
for (int i = 1; i < length; i++) {
207207
c = key.charAt(i);
208-
if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == ':'))
208+
if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '-' || c == '_' || c == ':' || c == '.'))
209209
return false;
210210
}
211211
return true;
@@ -214,10 +214,10 @@ private static boolean isValidXmlKey(String key) {
214214
private static boolean isValidHtmlKey(String key) {
215215
// =~ [\x00-\x1f\x7f-\x9f "'/=]+
216216
final int length = key.length();
217-
if (length ==0) return false;
217+
if (length == 0) return false;
218218
for (int i = 0; i < length; i++) {
219219
char c = key.charAt(i);
220-
if (c <= 0x1f || c >= 0x7f && c <= 0x9f || c == ' ' || c == '"' || c == '\'' || c == '/' || c == '=')
220+
if ((c <= 0x1f) || (c >= 0x7f && c <= 0x9f) || c == ' ' || c == '"' || c == '\'' || c == '/' || c == '=')
221221
return false;
222222
}
223223
return true;

src/test/java/org/jsoup/parser/XmlTreeBuilderTest.java

+9
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,15 @@ public void handlesLTinScript() {
299299
assertEquals("<body style=\"color: red\" _=\"\" name_=\"\"><div _=\"\"></div></body>", out);
300300
}
301301

302+
@Test void xmlValidAttributes() {
303+
String xml = "<a bB1-_:.=foo _9!=bar>One</a>";
304+
Document doc = Jsoup.parse(xml, Parser.xmlParser());
305+
assertEquals(Syntax.xml, doc.outputSettings().syntax());
306+
307+
String out = doc.html();
308+
assertEquals("<a bB1-_:.=\"foo\" _9_=\"bar\">One</a>", out); // first is same, second coerced
309+
}
310+
302311
@Test void customTagsAreFlyweights() {
303312
String xml = "<foo>Foo</foo><foo>Foo</foo><FOO>FOO</FOO><FOO>FOO</FOO>";
304313
Document doc = Jsoup.parse(xml, Parser.xmlParser());

0 commit comments

Comments
 (0)