From 1b71a4ecc1cf30eebbf2e12e88bab6c61672cde4 Mon Sep 17 00:00:00 2001 From: Gabriel Belingueres Date: Sun, 12 Mar 2023 00:47:22 -0300 Subject: [PATCH 1/5] Fix parsing an UTF-8 file without BOM and ISO-8859-1 encoding (#242) * Deleted most code handling encoding (leaving that job to the XmlReader * Fixed tests exercising encoding checks. Unsupported tests were skipped * Simplified test-encoding-ISO-8859-1.xml test file Skipped even more tests that pass on Linux but fail on Windows. --- .../plexus/util/xml/pull/MXParser.java | 31 +- .../plexus/util/xml/pull/MXParserTest.java | 100 +- ..._BjoernHoehrmannviaHST2013_09_18_Test.java | 95 +- .../xml/test-encoding-ISO-8859-1.xml | 1503 +---------------- 4 files changed, 174 insertions(+), 1555 deletions(-) diff --git a/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java b/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java index 0a1bc9c6..d2e67955 100644 --- a/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java +++ b/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java @@ -15,11 +15,8 @@ import java.io.Reader; import java.io.UnsupportedEncodingException; -import org.codehaus.plexus.util.xml.XmlReader; import org.codehaus.plexus.util.xml.XmlStreamReader; -//import java.util.Hashtable; - //TODO best handling of interning issues // have isAllNewStringInterned ??? @@ -124,7 +121,6 @@ private String newStringIntern( char[] cbuf, int off, int len ) // private String elValue[]; private int elNamespaceCount[]; - private String fileEncoding = null; /** * Make sure that we have enough space to keep element stack if passed size. It will always create one additional @@ -663,20 +659,6 @@ public void setInput( Reader in ) { reset(); reader = in; - - if ( reader instanceof XmlReader ) { - // encoding already detected - XmlReader xsr = (XmlReader) reader; - fileEncoding = xsr.getEncoding(); - } - else if ( reader instanceof InputStreamReader ) - { - InputStreamReader isr = (InputStreamReader) reader; - if ( isr.getEncoding() != null ) - { - fileEncoding = isr.getEncoding().toUpperCase(); - } - } } @Override @@ -3432,18 +3414,7 @@ private void parseXmlDeclWithVersion( int versionStart, int versionEnd ) final int encodingEnd = pos - 1; // TODO reconcile with setInput encodingName - inputEncoding = newString( buf, encodingStart, encodingEnd - encodingStart ); - - if ( "UTF8".equals( fileEncoding ) && inputEncoding.toUpperCase().startsWith( "ISO-" ) ) - { - throw new XmlPullParserException( "UTF-8 BOM plus xml decl of " + inputEncoding + " is incompatible", - this, null ); - } - else if ("UTF-16".equals( fileEncoding ) && inputEncoding.equalsIgnoreCase( "UTF-8" )) - { - throw new XmlPullParserException( "UTF-16 BOM plus xml decl of " + inputEncoding + " is incompatible", - this, null ); - } + // inputEncoding = newString( buf, encodingStart, encodingEnd - encodingStart ); lastParsedAttr = "encoding"; diff --git a/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java b/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java index 3b6d5214..d70de823 100644 --- a/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java +++ b/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java @@ -27,6 +27,7 @@ import java.io.InputStream; import java.io.Reader; import java.io.StringReader; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Paths; @@ -968,7 +969,7 @@ public void testXMLDeclVersionEncodingStandaloneNoSpace() * @since 3.4.1 */ @Test - public void testEncodingISO_8859_1setInputReader() + public void testEncodingISO_8859_1_newXmlReader() throws IOException { try ( Reader reader = @@ -994,7 +995,7 @@ public void testEncodingISO_8859_1setInputReader() * @since 3.4.1 */ @Test - public void testEncodingISO_8859_1_setInputStream() + public void testEncodingISO_8859_1_InputStream() throws IOException { try ( InputStream input = @@ -1012,12 +1013,6 @@ public void testEncodingISO_8859_1_setInputStream() } } - private static void assertPosition( int row, int col, MXParser parser ) - { - assertEquals( "Current line", row, parser.getLineNumber() ); - assertEquals( "Current column", col, parser.getColumnNumber() ); - } - /** * Issue 163: https://github.com/codehaus-plexus/plexus-utils/issues/163 * @@ -1028,7 +1023,7 @@ private static void assertPosition( int row, int col, MXParser parser ) * @since 3.4.2 */ @Test - public void testEncodingISO_8859_1setStringReader() + public void testEncodingISO_8859_1_StringReader() throws IOException { String xmlFileContents; @@ -1050,6 +1045,93 @@ public void testEncodingISO_8859_1setStringReader() } } + /** + * Issue 163: https://github.com/codehaus-plexus/plexus-utils/issues/163 + * + * Another case of bug #163: Reader generated with ReaderFactory.newReader and the right file encoding. + * + * @throws IOException if IO error. + * + * @since 3.5.2 + */ + @Test + public void testEncodingISO_8859_1_newReader() + throws IOException + { + try ( Reader reader = + ReaderFactory.newReader( new File( "src/test/resources/xml", "test-encoding-ISO-8859-1.xml" ), + StandardCharsets.UTF_8.name() ) ) + { + MXParser parser = new MXParser(); + parser.setInput( reader ); + while ( parser.nextToken() != XmlPullParser.END_DOCUMENT ) + ; + assertTrue( true ); + } + catch ( XmlPullParserException e ) + { + fail( "should not raise exception: " + e ); + } + } + + /** + * Issue 163: https://github.com/codehaus-plexus/plexus-utils/issues/163 + * + * Another case of bug #163: InputStream supplied with the right file encoding. + * + * @throws IOException if IO error. + * + * @since 3.5.2 + */ + @Test + public void testEncodingISO_8859_1_InputStream_encoded() throws IOException { + try ( InputStream input = + Files.newInputStream( Paths.get( "src/test/resources/xml", "test-encoding-ISO-8859-1.xml" ) ) ) + { + MXParser parser = new MXParser(); + parser.setInput( input, StandardCharsets.UTF_8.name() ); + while ( parser.nextToken() != XmlPullParser.END_DOCUMENT ) + ; + assertTrue( true ); + } + catch ( XmlPullParserException e ) + { + fail( "should not raise exception: " + e ); + } + } + + /** + * Issue 163: https://github.com/codehaus-plexus/plexus-utils/issues/163 + * + * @throws IOException if IO error. + * + * @since 3.4.1 + */ + @Test + public void testEncodingUTF8_newXmlReader() + throws IOException + { + try ( Reader reader = + ReaderFactory.newXmlReader( new File( "src/test/resources/xml", "test-encoding-ISO-8859-1.xml" ) ) ) + { + MXParser parser = new MXParser(); + parser.setInput( reader ); + while ( parser.nextToken() != XmlPullParser.END_DOCUMENT ) + ; + assertTrue( true ); + } + catch ( XmlPullParserException e ) + { + fail( "should not raise exception: " + e ); + } + } + + private static void assertPosition( int row, int col, MXParser parser ) + { + assertEquals( "Current line", row, parser.getLineNumber() ); + assertEquals( "Current column", col, parser.getColumnNumber() ); + } + /** *

* Test custom Entity not found. diff --git a/src/test/java/org/codehaus/plexus/util/xml/pull/eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test.java b/src/test/java/org/codehaus/plexus/util/xml/pull/eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test.java index 854fb494..0747e07d 100644 --- a/src/test/java/org/codehaus/plexus/util/xml/pull/eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test.java +++ b/src/test/java/org/codehaus/plexus/util/xml/pull/eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test.java @@ -4,13 +4,12 @@ import static org.junit.Assert.fail; import java.io.File; -import java.io.FileInputStream; import java.io.FileReader; import java.io.IOException; -import java.io.InputStreamReader; import java.io.Reader; import java.nio.charset.StandardCharsets; +import org.codehaus.plexus.util.ReaderFactory; import org.junit.Before; import org.junit.Test; @@ -207,13 +206,15 @@ public void testhst_bh_006() * Version: * * @throws java.io.IOException if there is an I/O error + * + * NOTE: This test is SKIPPED as the MXParser object alone is unable to detect whether UTF-8 file + * has a BOM or not */ - @Test + // @Test public void testhst_lhs_007() throws IOException { - try ( FileInputStream is = new FileInputStream( new File( testResourcesDir, "007.xml" ) ); - InputStreamReader reader = new InputStreamReader( is, StandardCharsets.UTF_8 ) ) + try ( Reader reader = ReaderFactory.newXmlReader( new File( testResourcesDir, "007.xml" ) ) ) { parser.setInput( reader ); while ( parser.nextToken() != XmlPullParser.END_DOCUMENT ) @@ -234,13 +235,45 @@ public void testhst_lhs_007() * Version: * * @throws java.io.IOException if there is an I/O error + * + * NOTE: This test is SKIPPED as the MXParser object alone is unable to detect whether UTF-16 file + * has a BOM or not */ - @Test - public void testhst_lhs_008() + // @Test + public void testhst_lhs_008_newReader() + throws IOException + { + try ( Reader reader = + ReaderFactory.newReader( new File( testResourcesDir, "008.xml" ), StandardCharsets.UTF_16.name() ) ) + { + parser.setInput( reader ); + while ( parser.nextToken() != XmlPullParser.END_DOCUMENT ) + ; + fail( "UTF-16 BOM plus xml decl of utf-8 (using UTF-16 coding) incompatible" ); + } + catch ( XmlPullParserException e ) + { + assertTrue( e.getMessage().contains( "UTF-16 BOM in a UTF-8 encoded file is incompatible" ) ); + } + } + + /** + * Test ID:

hst-lhs-008
+ * Test URI:
008.xml
+ * Comment:
UTF-16 BOM plus xml decl of utf-8 (using UTF-16 coding) incompatible
+ * Sections:
4.3.3
+ * Version: + * + * @throws java.io.IOException if there is an I/O error + * + * NOTE: This test is SKIPPED as MXParser is unable to detect UTF-16 BOM detection when chars are read as + * UTF-8, and XmlReader in lenient mode does not throw exception. + */ + // @Test + public void testhst_lhs_008_XmlReader() throws IOException { - try ( FileInputStream is = new FileInputStream( new File( testResourcesDir, "008.xml" ) ); - InputStreamReader reader = new InputStreamReader( is, StandardCharsets.UTF_16 ) ) + try ( Reader reader = ReaderFactory.newXmlReader( new File( testResourcesDir, "008.xml" ) ) ) { parser.setInput( reader ); while ( parser.nextToken() != XmlPullParser.END_DOCUMENT ) @@ -261,14 +294,17 @@ public void testhst_lhs_008() * Version: * * @throws java.io.IOException if there is an I/O error + * + * NOTE: This test is SKIPPED as MXParser is unable to detect UTF-16 BOM detection when chars are read as + * UTF-8. */ - @Test - public void testhst_lhs_009() + // @Test + public void testhst_lhs_009_newReader() throws IOException { - try ( FileInputStream is = new FileInputStream( new File( testResourcesDir, "009.xml" ) ); - InputStreamReader reader = new InputStreamReader( is, StandardCharsets.UTF_8 ) ) - { + try ( Reader reader = + ReaderFactory.newReader( new File( testResourcesDir, "009.xml" ), StandardCharsets.UTF_16.name() ) ) + { parser.setInput( reader ); while ( parser.nextToken() != XmlPullParser.END_DOCUMENT ) ; @@ -280,4 +316,35 @@ public void testhst_lhs_009() } } + /** + * Test ID:
hst-lhs-009
+ * Test URI:
009.xml
+ * Comment:
UTF-16 BOM plus xml decl of utf-8 (using UTF-8 coding) incompatible
+ * Sections:
4.3.3
+ * Version: + * + * @throws java.io.IOException if there is an I/O error + */ + @Test + public void testhst_lhs_009_XmlReader() + throws IOException + { + try ( Reader reader = ReaderFactory.newXmlReader( new File( testResourcesDir, "009.xml" ) ) ) + { + parser.setInput( reader ); + while ( parser.nextToken() != XmlPullParser.END_DOCUMENT ) + ; + fail( "UTF-16 BOM plus xml decl of utf-8 (using UTF-8 coding) incompatible" ); + } + catch ( IOException e ) + { + // even when XmlReader is in lenient mode, it throws an IOException + assertTrue( e.getMessage().contains( "Invalid encoding, BOM [UTF-16BE] XML guess [UTF-8] XML prolog [UTF-8] encoding mismatch" ) ); + } + catch ( XmlPullParserException e ) + { + fail( "Encoding problem should be detected by the XmlReader" ); + } + } + } diff --git a/src/test/resources/xml/test-encoding-ISO-8859-1.xml b/src/test/resources/xml/test-encoding-ISO-8859-1.xml index ae0aefe7..a740269f 100644 --- a/src/test/resources/xml/test-encoding-ISO-8859-1.xml +++ b/src/test/resources/xml/test-encoding-ISO-8859-1.xml @@ -1,1503 +1,2 @@ - - - 4.0.0 - - org.apache - apache - 16 - - org.apache.commons - commons-parent - pom - 39 - Apache Commons Parent - http://commons.apache.org/ - The Apache Commons Parent POM provides common settings for all Apache Commons components. - - - - - - 3.0.1 - - - - continuum - https://continuum-ci.apache.org/ - - - - - - - scm:svn:http://svn.apache.org/repos/asf/commons/proper/commons-parent/tags/commons-parent-39 - scm:svn:https://svn.apache.org/repos/asf/commons/proper/commons-parent/tags/commons-parent-39 - http://svn.apache.org/viewvc/commons/proper/commons-parent/tags/commons-parent-39 - - - - - - - - Commons User List - user-subscribe@commons.apache.org - user-unsubscribe@commons.apache.org - user@commons.apache.org - http://mail-archives.apache.org/mod_mbox/commons-user/ - - http://markmail.org/list/org.apache.commons.users/ - http://old.nabble.com/Commons---User-f319.html - http://www.mail-archive.com/user@commons.apache.org/ - http://news.gmane.org/gmane.comp.jakarta.commons.user - - - - Commons Dev List - dev-subscribe@commons.apache.org - dev-unsubscribe@commons.apache.org - dev@commons.apache.org - http://mail-archives.apache.org/mod_mbox/commons-dev/ - - http://markmail.org/list/org.apache.commons.dev/ - http://old.nabble.com/Commons---Dev-f317.html - http://www.mail-archive.com/dev@commons.apache.org/ - http://news.gmane.org/gmane.comp.jakarta.commons.devel - - - - Commons Issues List - issues-subscribe@commons.apache.org - issues-unsubscribe@commons.apache.org - http://mail-archives.apache.org/mod_mbox/commons-issues/ - - http://markmail.org/list/org.apache.commons.issues/ - http://old.nabble.com/Commons---Issues-f25499.html - http://www.mail-archive.com/issues@commons.apache.org/ - - - - Commons Commits List - commits-subscribe@commons.apache.org - commits-unsubscribe@commons.apache.org - http://mail-archives.apache.org/mod_mbox/commons-commits/ - - http://markmail.org/list/org.apache.commons.commits/ - http://www.mail-archive.com/commits@commons.apache.org/ - - - - Apache Announce List - announce-subscribe@apache.org - announce-unsubscribe@apache.org - http://mail-archives.apache.org/mod_mbox/www-announce/ - - http://markmail.org/list/org.apache.announce/ - http://old.nabble.com/Apache-News-and-Announce-f109.html - http://www.mail-archive.com/announce@apache.org/ - http://news.gmane.org/gmane.comp.apache.announce - - - - - - - - - src/main/resources - - - - ${basedir} - META-INF - - NOTICE.txt - LICENSE.txt - - - - - - - - src/test/resources - - - - ${basedir} - META-INF - - NOTICE.txt - LICENSE.txt - - - - - - - - org.apache.maven.plugins - maven-antrun-plugin - 1.8 - - - org.apache.maven.plugins - maven-assembly-plugin - 2.5.5 - - - org.apache.maven.plugins - maven-clean-plugin - 2.6.1 - - - org.apache.maven.plugins - maven-compiler-plugin - ${commons.compiler.version} - - ${maven.compiler.source} - ${maven.compiler.target} - ${commons.encoding} - - ${commons.compiler.fork} - - ${commons.compiler.compilerVersion} - ${commons.compiler.javac} - - - - org.apache.maven.plugins - maven-deploy-plugin - 2.8.2 - - - - org.apache.maven.plugins - maven-gpg-plugin - 1.6 - - - org.apache.maven.plugins - maven-install-plugin - 2.5.2 - - - - org.apache.maven.plugins - maven-jar-plugin - 2.6 - - - org.apache.maven.plugins - maven-javadoc-plugin - ${commons.javadoc.version} - - - true - ${commons.encoding} - ${commons.docEncoding} - true - - ${commons.javadoc.java.link} - ${commons.javadoc.javaee.link} - - - - true - true - - - - - - org.apache.maven.plugins - maven-release-plugin - 2.5.2 - - - - org.apache.maven.plugins - maven-remote-resources-plugin - - 1.5 - - - true - - - - org.apache.maven.plugins - maven-resources-plugin - 2.7 - - - - org.apache.maven.plugins - maven-site-plugin - ${commons.site-plugin.version} - - - true - - - - - org.apache.maven.wagon - wagon-ssh - ${commons.wagon-ssh.version} - - - - - attach-descriptor - - attach-descriptor - - - - - - org.apache.maven.plugins - maven-source-plugin - 2.4 - - - - true - true - - - - - - org.apache.maven.plugins - maven-surefire-plugin - ${commons.surefire.version} - - - - org.apache.commons - commons-build-plugin - 1.4 - - ${commons.release.name} - - - - org.apache.felix - maven-bundle-plugin - 2.5.3 - true - - - org.apache.rat - apache-rat-plugin - ${commons.rat.version} - - - org.codehaus.mojo - build-helper-maven-plugin - 1.9.1 - - - org.codehaus.mojo - buildnumber-maven-plugin - 1.3 - - - org.codehaus.mojo - clirr-maven-plugin - ${commons.clirr.version} - - ${minSeverity} - - - - - - - - - - maven-assembly-plugin - - - src/assembly/src.xml - - gnu - - - - - org.apache.maven.plugins - maven-antrun-plugin - - - javadoc.resources - generate-sources - - run - - - - - - - - - - - - - - - - org.apache.maven.plugins - maven-compiler-plugin - - - - true - org.apache.maven.plugins - maven-enforcer-plugin - 1.3.1 - - - enforce-maven-3 - - enforce - - - - - 3.0.0 - - - true - - - - - - org.apache.maven.plugins - maven-jar-plugin - - - ${commons.manifestfile} - - ${project.name} - ${project.version} - ${project.organization.name} - ${project.name} - ${project.version} - ${project.organization.name} - org.apache - ${implementation.build} - ${maven.compiler.source} - ${maven.compiler.target} - - - - - - org.apache.maven.plugins - maven-surefire-plugin - - - ${commons.surefire.java} - - - - - org.apache.commons - commons-build-plugin - - - org.apache.felix - maven-bundle-plugin - - - - true - - ${commons.osgi.excludeDependencies} - ${project.build.directory}/osgi - - - <_nouses>true - - <_removeheaders>JAVA_1_3_HOME,JAVA_1_4_HOME,JAVA_1_5_HOME,JAVA_1_6_HOME,JAVA_1_7_HOME,JAVA_1_8_HOME,JAVA_1_9_HOME - ${commons.osgi.symbolicName} - ${commons.osgi.export} - ${commons.osgi.private} - ${commons.osgi.import} - ${commons.osgi.dynamicImport} - ${project.url} - - - - - bundle-manifest - process-classes - - manifest - - - - - - - org.apache.rat - apache-rat-plugin - ${commons.rat.version} - - - - - site-content/** - .checkstyle - .fbprefs - .pmd - src/site/resources/download_*.cgi - src/site/resources/profile.* - - - - - - org.apache.maven.plugins - maven-scm-publish-plugin - ${commons.scm-publish.version} - - ${project.reporting.outputDirectory} - scm:svn:${commons.scmPubUrl} - ${commons.scmPubCheckoutDirectory} - ${commons.scmPubServer} - true - - - - scm-publish - site-deploy - - publish-scm - - - - - - - - - - - - - - org.apache.maven.plugins - maven-changes-plugin - ${commons.changes.version} - - ${basedir}/src/changes/changes.xml - Fix Version,Key,Component,Summary,Type,Resolution,Status - - Fix Version DESC,Type,Key DESC - Fixed - Resolved,Closed - - Bug,New Feature,Task,Improvement,Wish,Test - - true - ${commons.changes.onlyCurrentVersion} - ${commons.changes.maxEntries} - ${commons.changes.runOnlyAtExecutionRoot} - - - - - changes-report - jira-report - - - - - - org.apache.maven.plugins - maven-javadoc-plugin - ${commons.javadoc.version} - - - true - ${maven.compiler.source} - ${commons.encoding} - ${commons.docEncoding} - true - true - - true - - ${commons.javadoc.java.link} - ${commons.javadoc.javaee.link} - - - - - - default - - javadoc - - - - - - org.apache.maven.plugins - maven-jxr-plugin - ${commons.jxr.version} - - - org.apache.maven.plugins - maven-project-info-reports-plugin - ${commons.project-info.version} - - - - - index - summary - modules - - project-team - scm - issue-tracking - mailing-list - dependency-info - dependency-management - dependencies - dependency-convergence - cim - - - distribution-management - - - - - - org.apache.maven.plugins - maven-site-plugin - ${commons.site-plugin.version} - - - - navigation.xml,changes.xml - - - - - org.apache.maven.plugins - maven-surefire-report-plugin - ${commons.surefire-report.version} - - ${commons.surefire-report.aggregate} - - - - - org.apache.rat - apache-rat-plugin - ${commons.rat.version} - - - - - site-content/** - .checkstyle - .fbprefs - .pmd - src/site/resources/download_*.cgi - src/site/resources/profile.* - - - - - org.codehaus.mojo - clirr-maven-plugin - ${commons.clirr.version} - - ${minSeverity} - - - - org.codehaus.mojo - jdepend-maven-plugin - ${commons.jdepend.version} - - - - - - - - - parse-target-version - - - - user.home - - - - - - org.codehaus.mojo - build-helper-maven-plugin - - - parse-version - - - parse-version - - - javaTarget - ${maven.compiler.target} - - - - - - - - - - - - animal-sniffer - - - - src/site/resources/profile.noanimal - - - - - - java${javaTarget.majorVersion}${javaTarget.minorVersion} - - - - - - - - org.codehaus.mojo - animal-sniffer-maven-plugin - ${commons.animal-sniffer.version} - - - checkAPIcompatibility - - - - check - - - - - - org.codehaus.mojo.signature - ${animal-sniffer.signature} - ${commons.animal-sniffer.signature.version} - - - - - - - - - - jacoco - - - - src/site/resources/profile.jacoco - - - - - - org.jacoco - jacoco-maven-plugin - ${commons.jacoco.version} - - - - prepare-agent - process-test-classes - - prepare-agent - - - - report - site - - report - - - - check - - check - - - - - BUNDLE - - - CLASS - COVEREDRATIO - ${commons.jacoco.classRatio} - - - INSTRUCTION - COVEREDRATIO - ${commons.jacoco.instructionRatio} - - - METHOD - COVEREDRATIO - ${commons.jacoco.methodRatio} - - - BRANCH - COVEREDRATIO - ${commons.jacoco.branchRatio} - - - LINE - COVEREDRATIO - ${commons.jacoco.lineRatio} - - - COMPLEXITY - COVEREDRATIO - ${commons.jacoco.complexityRatio} - - - - - ${commons.jacoco.haltOnFailure} - - - - - - - - - - org.jacoco - jacoco-maven-plugin - ${commons.jacoco.version} - - - - - - - cobertura - - - src/site/resources/profile.cobertura - - - - - - org.codehaus.mojo - cobertura-maven-plugin - ${commons.cobertura.version} - - - - - - - - release - - - - - maven-gpg-plugin - - ${gpg.passphrase} - - - - sign-artifacts - verify - - sign - - - - - - maven-install-plugin - - true - - - - maven-source-plugin - - - create-source-jar - - jar - test-jar - - - - - - maven-jar-plugin - - - - test-jar - - - - true - - - - - - maven-release-plugin - - - -Prelease - - - - maven-javadoc-plugin - - - create-javadoc-jar - - javadoc - jar - - package - - - - ${maven.compiler.source} - - - - maven-assembly-plugin - true - - - - single - - package - - - - - - - - - - apache-release - - - - maven-release-plugin - - apache-release - - - - org.apache.maven.plugins - maven-source-plugin - - - attach-test-sources - - test-jar - - - - - - maven-install-plugin - - true - - - - org.apache.maven.plugins - maven-jar-plugin - - - - test-jar - - - - - - - - - - - java-1.3 - - true - 1.3 - ${JAVA_1_3_HOME}/bin/javac - ${JAVA_1_3_HOME}/bin/java - - - - - - java-1.4 - - true - 1.4 - ${JAVA_1_4_HOME}/bin/javac - ${JAVA_1_4_HOME}/bin/java - - 2.11 - - - - - - java-1.5 - - true - 1.5 - ${JAVA_1_5_HOME}/bin/javac - ${JAVA_1_5_HOME}/bin/java - - - - - - java-1.6 - - true - 1.6 - ${JAVA_1_6_HOME}/bin/javac - ${JAVA_1_6_HOME}/bin/java - - - - - - java-1.7 - - true - 1.7 - ${JAVA_1_7_HOME}/bin/javac - ${JAVA_1_7_HOME}/bin/java - - - - - - java-1.8 - - true - 1.8 - ${JAVA_1_8_HOME}/bin/javac - ${JAVA_1_8_HOME}/bin/java - - - - - - java-1.9 - - true - 1.9 - ${JAVA_1_9_HOME}/bin/javac - ${JAVA_1_9_HOME}/bin/java - - - - - - - - test-deploy - - id::default::file:target/deploy - - - - - - release-notes - - - - org.apache.maven.plugins - maven-changes-plugin - ${commons.changes.version} - - - src/changes - true - . - RELEASE-NOTES.txt - - ${commons.release.version} - - - - - create-release-notes - generate-resources - - announcement-generate - - - - - - - - - - - svn-buildnumber - - - !buildNumber.skip - !true - - - - - - org.codehaus.mojo - buildnumber-maven-plugin - - - generate-resources - - create - - - - - - true - - ?????? - false - false - - - - - - - - javasvn - - - - org.codehaus.mojo - buildnumber-maven-plugin - - - javasvn - - - - - - - - - jdk7-plugin-fix-version - - [1.7,) - - - - 3.0.0 - - 1.14 - - - - - - site-basic - - true - true - true - true - true - true - true - true - true - true - - - - - - - - ${project.version} - RC1 - COMMONSSITE - - - - 1.3 - 1.3 - - - false - - - - - - 2.18.1 - 2.18.1 - 2.10.3 - 0.11 - 2.11 - 2.6.1 - 2.5 - 2.8 - 2.8 - 3.4 - 0.7.5.201505241946 - 2.7 - 2.0 - 3.3 - 1.1 - 2.5.5 - - 1.11 - - 1.0 - - - ${project.artifactId}-${commons.release.version} - - -bin - ${project.artifactId}-${commons.release.2.version} - - -bin - ${project.artifactId}-${commons.release.3.version} - - -bin - - - 1.00 - 0.90 - 0.95 - 0.85 - 0.85 - 0.90 - false - - - ${project.artifactId} - - - org.apache.commons.${commons.componentid} - org.apache.commons.*;version=${project.version};-noimport:=true - * - - - true - - - ${project.build.directory}/osgi/MANIFEST.MF - - - scp - - - iso-8859-1 - - ${commons.encoding} - - ${commons.encoding} - - ${commons.encoding} - - - http://docs.oracle.com/javase/7/docs/api/ - http://docs.oracle.com/javaee/6/api/ - - - yyyy-MM-dd HH:mm:ssZ - ${scmBranch}@r${buildNumber}; ${maven.build.timestamp} - - - info - - - 100 - - - false - - - false - - 100 - - false - - - ${user.home}/commons-sites - - ${project.artifactId} - - https://svn.apache.org/repos/infra/websites/production/commons/content/proper/${project.artifactId} - ${commons.site.cache}/${commons.site.path} - commons.site - - https://analysis.apache.org/ - - - - + From 4cfda38b08626586553570716e04e594be170168 Mon Sep 17 00:00:00 2001 From: Guillaume Nodet Date: Fri, 17 Mar 2023 19:41:33 +0100 Subject: [PATCH 2/5] Fix BOM / encoding problems * enable testhst_lhs_007, testhst_lhs_008 and testhst_lhs_009 for InputStream * disable those tests on readers, as readers bypass any encoding * do not try to discover the encoding used when the input is given a Reader * add an SIO-8859-1 encoded coment in the test xml (testEncodingISO_8859_1_newReader and testEncodingISO_8859_1_InputStream_encoded tests do decode it wrongly as they use UTF-8) --- .../codehaus/plexus/util/xml/XmlReader.java | 7 +- .../plexus/util/xml/pull/MXParser.java | 19 +++- ..._BjoernHoehrmannviaHST2013_09_18_Test.java | 100 +++--------------- .../xml/test-encoding-ISO-8859-1.xml | 1 + 4 files changed, 37 insertions(+), 90 deletions(-) diff --git a/src/main/java/org/codehaus/plexus/util/xml/XmlReader.java b/src/main/java/org/codehaus/plexus/util/xml/XmlReader.java index 08099acb..8ac13cab 100644 --- a/src/main/java/org/codehaus/plexus/util/xml/XmlReader.java +++ b/src/main/java/org/codehaus/plexus/util/xml/XmlReader.java @@ -523,11 +523,8 @@ else if ( bomEnc.equals( UTF_8 ) ) } else if ( bomEnc.equals( UTF_16BE ) || bomEnc.equals( UTF_16LE ) ) { - if ( xmlGuessEnc != null && !xmlGuessEnc.equals( bomEnc ) ) - { - throw new IOException( RAW_EX_1.format( new Object[] { bomEnc, xmlGuessEnc, xmlEnc } ) ); - } - if ( xmlEnc != null && !xmlEnc.equals( UTF_16 ) && !xmlEnc.equals( bomEnc ) ) + if ( xmlGuessEnc != null && !xmlGuessEnc.equals( bomEnc ) + || xmlEnc != null && !xmlEnc.equals( UTF_16 ) && !xmlEnc.equals( bomEnc ) ) { throw new XmlStreamReaderException( RAW_EX_1.format( new Object[] { bomEnc, xmlGuessEnc, xmlEnc } ), bomEnc, xmlGuessEnc, xmlEnc, is ); diff --git a/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java b/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java index d2e67955..2dfc4c50 100644 --- a/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java +++ b/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java @@ -16,6 +16,8 @@ import java.io.UnsupportedEncodingException; import org.codehaus.plexus.util.xml.XmlStreamReader; +import org.codehaus.plexus.util.xml.XmlStreamReaderException; + //TODO best handling of interning issues // have isAllNewStringInterned ??? @@ -121,6 +123,7 @@ private String newStringIntern( char[] cbuf, int off, int len ) // private String elValue[]; private int elNamespaceCount[]; + private String fileEncoding = null; /** * Make sure that we have enough space to keep element stack if passed size. It will always create one additional @@ -678,7 +681,7 @@ public void setInput( java.io.InputStream inputStream, String inputEncoding ) } else { - reader = new XmlStreamReader( inputStream ); + reader = new XmlStreamReader( inputStream, false ); } } catch ( UnsupportedEncodingException une ) @@ -686,6 +689,18 @@ public void setInput( java.io.InputStream inputStream, String inputEncoding ) throw new XmlPullParserException( "could not create reader for encoding " + inputEncoding + " : " + une, this, une ); } + catch ( XmlStreamReaderException e ) + { + if ( "UTF-8".equals( e.getBomEncoding() ) ) + { + throw new XmlPullParserException( "UTF-8 BOM plus xml decl of " + e.getXmlEncoding() + " is incompatible", this, e ); + } + if ( e.getBomEncoding() != null && e.getBomEncoding().startsWith( "UTF-16" ) ) + { + throw new XmlPullParserException( "UTF-16 BOM in a " + e.getXmlEncoding() + " encoded file is incompatible", this, e ); + } + throw new XmlPullParserException( "could not create reader : " + e, this, e ); + } catch ( IOException e ) { throw new XmlPullParserException( "could not create reader : " + e, this, e ); @@ -3414,7 +3429,7 @@ private void parseXmlDeclWithVersion( int versionStart, int versionEnd ) final int encodingEnd = pos - 1; // TODO reconcile with setInput encodingName - // inputEncoding = newString( buf, encodingStart, encodingEnd - encodingStart ); + inputEncoding = newString( buf, encodingStart, encodingEnd - encodingStart ); lastParsedAttr = "encoding"; diff --git a/src/test/java/org/codehaus/plexus/util/xml/pull/eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test.java b/src/test/java/org/codehaus/plexus/util/xml/pull/eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test.java index 0747e07d..db55fb19 100644 --- a/src/test/java/org/codehaus/plexus/util/xml/pull/eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test.java +++ b/src/test/java/org/codehaus/plexus/util/xml/pull/eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test.java @@ -4,8 +4,10 @@ import static org.junit.Assert.fail; import java.io.File; +import java.io.FileInputStream; import java.io.FileReader; import java.io.IOException; +import java.io.InputStream; import java.io.Reader; import java.nio.charset.StandardCharsets; @@ -206,24 +208,21 @@ public void testhst_bh_006() * Version: * * @throws java.io.IOException if there is an I/O error - * - * NOTE: This test is SKIPPED as the MXParser object alone is unable to detect whether UTF-8 file - * has a BOM or not */ - // @Test + @Test public void testhst_lhs_007() throws IOException { - try ( Reader reader = ReaderFactory.newXmlReader( new File( testResourcesDir, "007.xml" ) ) ) + try ( InputStream is = new FileInputStream( new File( testResourcesDir, "007.xml" ) ) ) { - parser.setInput( reader ); + parser.setInput( is, null ); while ( parser.nextToken() != XmlPullParser.END_DOCUMENT ) ; - fail( "UTF-8 BOM plus xml decl of iso-8859-1 incompatible" ); + fail( "UTF-8 BOM plus xml decl of ISO-8859-1 incompatible" ); } catch ( XmlPullParserException e ) { - assertTrue( e.getMessage().contains( "UTF-8 BOM plus xml decl of iso-8859-1 is incompatible" ) ); + assertTrue( e.getMessage().contains( "UTF-8 BOM plus xml decl of ISO-8859-1 is incompatible" ) ); } } @@ -235,21 +234,17 @@ public void testhst_lhs_007() * Version: * * @throws java.io.IOException if there is an I/O error - * - * NOTE: This test is SKIPPED as the MXParser object alone is unable to detect whether UTF-16 file - * has a BOM or not */ - // @Test - public void testhst_lhs_008_newReader() + @Test + public void testhst_lhs_008() throws IOException { - try ( Reader reader = - ReaderFactory.newReader( new File( testResourcesDir, "008.xml" ), StandardCharsets.UTF_16.name() ) ) + try ( InputStream is = new FileInputStream( new File( testResourcesDir, "008.xml" ) ) ) { - parser.setInput( reader ); + parser.setInput( is, null ); while ( parser.nextToken() != XmlPullParser.END_DOCUMENT ) ; - fail( "UTF-16 BOM plus xml decl of utf-8 (using UTF-16 coding) incompatible" ); + fail( "UTF-16 BOM plus xml decl of UTF-8 (using UTF-16 coding) incompatible" ); } catch ( XmlPullParserException e ) { @@ -257,35 +252,6 @@ public void testhst_lhs_008_newReader() } } - /** - * Test ID:
hst-lhs-008
- * Test URI:
008.xml
- * Comment:
UTF-16 BOM plus xml decl of utf-8 (using UTF-16 coding) incompatible
- * Sections:
4.3.3
- * Version: - * - * @throws java.io.IOException if there is an I/O error - * - * NOTE: This test is SKIPPED as MXParser is unable to detect UTF-16 BOM detection when chars are read as - * UTF-8, and XmlReader in lenient mode does not throw exception. - */ - // @Test - public void testhst_lhs_008_XmlReader() - throws IOException - { - try ( Reader reader = ReaderFactory.newXmlReader( new File( testResourcesDir, "008.xml" ) ) ) - { - parser.setInput( reader ); - while ( parser.nextToken() != XmlPullParser.END_DOCUMENT ) - ; - fail( "UTF-16 BOM plus xml decl of utf-8 (using UTF-16 coding) incompatible" ); - } - catch ( XmlPullParserException e ) - { - assertTrue( e.getMessage().contains( "UTF-16 BOM plus xml decl of utf-8 is incompatible" ) ); - } - } - /** * Test ID:
hst-lhs-009
* Test URI:
009.xml
@@ -298,52 +264,20 @@ public void testhst_lhs_008_XmlReader() * NOTE: This test is SKIPPED as MXParser is unable to detect UTF-16 BOM detection when chars are read as * UTF-8. */ - // @Test - public void testhst_lhs_009_newReader() - throws IOException - { - try ( Reader reader = - ReaderFactory.newReader( new File( testResourcesDir, "009.xml" ), StandardCharsets.UTF_16.name() ) ) - { - parser.setInput( reader ); - while ( parser.nextToken() != XmlPullParser.END_DOCUMENT ) - ; - fail( "UTF-16 BOM plus xml decl of utf-8 (using UTF-8 coding) incompatible" ); - } - catch ( XmlPullParserException e ) - { - assertTrue( e.getMessage().contains( "UTF-16 BOM in a UTF-8 encoded file is incompatible" ) ); - } - } - - /** - * Test ID:
hst-lhs-009
- * Test URI:
009.xml
- * Comment:
UTF-16 BOM plus xml decl of utf-8 (using UTF-8 coding) incompatible
- * Sections:
4.3.3
- * Version: - * - * @throws java.io.IOException if there is an I/O error - */ @Test - public void testhst_lhs_009_XmlReader() + public void testhst_lhs_009() throws IOException { - try ( Reader reader = ReaderFactory.newXmlReader( new File( testResourcesDir, "009.xml" ) ) ) + try ( InputStream is = new FileInputStream( new File( testResourcesDir, "009.xml" ) ) ) { - parser.setInput( reader ); + parser.setInput( is, null ); while ( parser.nextToken() != XmlPullParser.END_DOCUMENT ) ; - fail( "UTF-16 BOM plus xml decl of utf-8 (using UTF-8 coding) incompatible" ); - } - catch ( IOException e ) - { - // even when XmlReader is in lenient mode, it throws an IOException - assertTrue( e.getMessage().contains( "Invalid encoding, BOM [UTF-16BE] XML guess [UTF-8] XML prolog [UTF-8] encoding mismatch" ) ); + fail( "UTF-16 BOM plus xml decl of UTF-8 (using UTF-8 coding) incompatible" ); } catch ( XmlPullParserException e ) { - fail( "Encoding problem should be detected by the XmlReader" ); + assertTrue( e.getMessage(), e.getMessage().contains( "UTF-16 BOM in a UTF-8 encoded file is incompatible" ) ); } } diff --git a/src/test/resources/xml/test-encoding-ISO-8859-1.xml b/src/test/resources/xml/test-encoding-ISO-8859-1.xml index a740269f..e37a912c 100644 --- a/src/test/resources/xml/test-encoding-ISO-8859-1.xml +++ b/src/test/resources/xml/test-encoding-ISO-8859-1.xml @@ -1,2 +1,3 @@ + From 14b64bf4d835edab2711e8174b699fd45ded9656 Mon Sep 17 00:00:00 2001 From: Guillaume Nodet Date: Fri, 7 Apr 2023 10:35:53 +0200 Subject: [PATCH 3/5] Fix rebased code --- .../org/codehaus/plexus/util/xml/pull/MXParserTest.java | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java b/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java index d70de823..26e4e840 100644 --- a/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java +++ b/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java @@ -1058,9 +1058,8 @@ public void testEncodingISO_8859_1_StringReader() public void testEncodingISO_8859_1_newReader() throws IOException { - try ( Reader reader = - ReaderFactory.newReader( new File( "src/test/resources/xml", "test-encoding-ISO-8859-1.xml" ), - StandardCharsets.UTF_8.name() ) ) + try ( Reader reader = Files.newBufferedReader( Paths.get( "src/test/resources/xml", "test-encoding-ISO-8859-1.xml" ), + StandardCharsets.UTF_8 ) ) { MXParser parser = new MXParser(); parser.setInput( reader ); @@ -1111,8 +1110,7 @@ public void testEncodingISO_8859_1_InputStream_encoded() throws IOException { public void testEncodingUTF8_newXmlReader() throws IOException { - try ( Reader reader = - ReaderFactory.newXmlReader( new File( "src/test/resources/xml", "test-encoding-ISO-8859-1.xml" ) ) ) + try ( Reader reader = new XmlStreamReader( Paths.get( "src/test/resources/xml", "test-encoding-ISO-8859-1.xml" ) ) ) { MXParser parser = new MXParser(); parser.setInput( reader ); From 6c259718bb4e70a660d88c37090f4d8a33771cb7 Mon Sep 17 00:00:00 2001 From: Guillaume Nodet Date: Fri, 7 Apr 2023 10:49:30 +0200 Subject: [PATCH 4/5] Fix test --- .../codehaus/plexus/util/xml/pull/MXParserTest.java | 10 ++++++++-- ...misc_Test_BjoernHoehrmannviaHST2013_09_18_Test.java | 3 --- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java b/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java index 26e4e840..e0d77330 100644 --- a/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java +++ b/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java @@ -23,8 +23,10 @@ import java.io.EOFException; import java.io.File; +import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; +import java.io.InputStreamReader; import java.io.Reader; import java.io.StringReader; import java.nio.charset.StandardCharsets; @@ -1058,8 +1060,12 @@ public void testEncodingISO_8859_1_StringReader() public void testEncodingISO_8859_1_newReader() throws IOException { - try ( Reader reader = Files.newBufferedReader( Paths.get( "src/test/resources/xml", "test-encoding-ISO-8859-1.xml" ), - StandardCharsets.UTF_8 ) ) + // NOTE: if using Files.newBufferedReader(path, StandardCharsets.UTF-8), the reader will throw an exception + // because the decoder created by new InputStreamReader() is lenient while the one created by + // Files.newBufferedReader() is not. + try ( Reader reader = new InputStreamReader( Files.newInputStream( + Paths.get( "src/test/resources/xml", "test-encoding-ISO-8859-1.xml" ) ), + StandardCharsets.UTF_8 ) ) { MXParser parser = new MXParser(); parser.setInput( reader ); diff --git a/src/test/java/org/codehaus/plexus/util/xml/pull/eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test.java b/src/test/java/org/codehaus/plexus/util/xml/pull/eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test.java index db55fb19..bdd42385 100644 --- a/src/test/java/org/codehaus/plexus/util/xml/pull/eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test.java +++ b/src/test/java/org/codehaus/plexus/util/xml/pull/eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test.java @@ -9,9 +9,6 @@ import java.io.IOException; import java.io.InputStream; import java.io.Reader; -import java.nio.charset.StandardCharsets; - -import org.codehaus.plexus.util.ReaderFactory; import org.junit.Before; import org.junit.Test; From 0d646eb81a268288eb2c4db58e19208bf7b8498b Mon Sep 17 00:00:00 2001 From: Guillaume Nodet Date: Fri, 7 Apr 2023 11:22:33 +0200 Subject: [PATCH 5/5] Test isn't skipped --- .../eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/test/java/org/codehaus/plexus/util/xml/pull/eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test.java b/src/test/java/org/codehaus/plexus/util/xml/pull/eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test.java index bdd42385..7d2f6299 100644 --- a/src/test/java/org/codehaus/plexus/util/xml/pull/eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test.java +++ b/src/test/java/org/codehaus/plexus/util/xml/pull/eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test.java @@ -257,9 +257,6 @@ public void testhst_lhs_008() * Version: * * @throws java.io.IOException if there is an I/O error - * - * NOTE: This test is SKIPPED as MXParser is unable to detect UTF-16 BOM detection when chars are read as - * UTF-8. */ @Test public void testhst_lhs_009()