diff --git a/src/main/java/org/codehaus/plexus/util/xml/XmlReader.java b/src/main/java/org/codehaus/plexus/util/xml/XmlReader.java index 08099acb..8ac13cab 100644 --- a/src/main/java/org/codehaus/plexus/util/xml/XmlReader.java +++ b/src/main/java/org/codehaus/plexus/util/xml/XmlReader.java @@ -523,11 +523,8 @@ else if ( bomEnc.equals( UTF_8 ) ) } else if ( bomEnc.equals( UTF_16BE ) || bomEnc.equals( UTF_16LE ) ) { - if ( xmlGuessEnc != null && !xmlGuessEnc.equals( bomEnc ) ) - { - throw new IOException( RAW_EX_1.format( new Object[] { bomEnc, xmlGuessEnc, xmlEnc } ) ); - } - if ( xmlEnc != null && !xmlEnc.equals( UTF_16 ) && !xmlEnc.equals( bomEnc ) ) + if ( xmlGuessEnc != null && !xmlGuessEnc.equals( bomEnc ) + || xmlEnc != null && !xmlEnc.equals( UTF_16 ) && !xmlEnc.equals( bomEnc ) ) { throw new XmlStreamReaderException( RAW_EX_1.format( new Object[] { bomEnc, xmlGuessEnc, xmlEnc } ), bomEnc, xmlGuessEnc, xmlEnc, is ); diff --git a/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java b/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java index 0a1bc9c6..2dfc4c50 100644 --- a/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java +++ b/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java @@ -15,10 +15,9 @@ import java.io.Reader; import java.io.UnsupportedEncodingException; -import org.codehaus.plexus.util.xml.XmlReader; import org.codehaus.plexus.util.xml.XmlStreamReader; +import org.codehaus.plexus.util.xml.XmlStreamReaderException; -//import java.util.Hashtable; //TODO best handling of interning issues // have isAllNewStringInterned ??? @@ -663,20 +662,6 @@ public void setInput( Reader in ) { reset(); reader = in; - - if ( reader instanceof XmlReader ) { - // encoding already detected - XmlReader xsr = (XmlReader) reader; - fileEncoding = xsr.getEncoding(); - } - else if ( reader instanceof InputStreamReader ) - { - InputStreamReader isr = (InputStreamReader) reader; - if ( isr.getEncoding() != null ) - { - fileEncoding = isr.getEncoding().toUpperCase(); - } - } } @Override @@ -696,7 +681,7 @@ public void setInput( java.io.InputStream inputStream, String inputEncoding ) } else { - reader = new XmlStreamReader( inputStream ); + reader = new XmlStreamReader( inputStream, false ); } } catch ( UnsupportedEncodingException une ) @@ -704,6 +689,18 @@ public void setInput( java.io.InputStream inputStream, String inputEncoding ) throw new XmlPullParserException( "could not create reader for encoding " + inputEncoding + " : " + une, this, une ); } + catch ( XmlStreamReaderException e ) + { + if ( "UTF-8".equals( e.getBomEncoding() ) ) + { + throw new XmlPullParserException( "UTF-8 BOM plus xml decl of " + e.getXmlEncoding() + " is incompatible", this, e ); + } + if ( e.getBomEncoding() != null && e.getBomEncoding().startsWith( "UTF-16" ) ) + { + throw new XmlPullParserException( "UTF-16 BOM in a " + e.getXmlEncoding() + " encoded file is incompatible", this, e ); + } + throw new XmlPullParserException( "could not create reader : " + e, this, e ); + } catch ( IOException e ) { throw new XmlPullParserException( "could not create reader : " + e, this, e ); @@ -3434,17 +3431,6 @@ private void parseXmlDeclWithVersion( int versionStart, int versionEnd ) // TODO reconcile with setInput encodingName inputEncoding = newString( buf, encodingStart, encodingEnd - encodingStart ); - if ( "UTF8".equals( fileEncoding ) && inputEncoding.toUpperCase().startsWith( "ISO-" ) ) - { - throw new XmlPullParserException( "UTF-8 BOM plus xml decl of " + inputEncoding + " is incompatible", - this, null ); - } - else if ("UTF-16".equals( fileEncoding ) && inputEncoding.equalsIgnoreCase( "UTF-8" )) - { - throw new XmlPullParserException( "UTF-16 BOM plus xml decl of " + inputEncoding + " is incompatible", - this, null ); - } - lastParsedAttr = "encoding"; ch = more(); diff --git a/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java b/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java index 3b6d5214..e0d77330 100644 --- a/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java +++ b/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java @@ -23,10 +23,13 @@ import java.io.EOFException; import java.io.File; +import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; +import java.io.InputStreamReader; import java.io.Reader; import java.io.StringReader; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Paths; @@ -968,7 +971,7 @@ public void testXMLDeclVersionEncodingStandaloneNoSpace() * @since 3.4.1 */ @Test - public void testEncodingISO_8859_1setInputReader() + public void testEncodingISO_8859_1_newXmlReader() throws IOException { try ( Reader reader = @@ -994,7 +997,7 @@ public void testEncodingISO_8859_1setInputReader() * @since 3.4.1 */ @Test - public void testEncodingISO_8859_1_setInputStream() + public void testEncodingISO_8859_1_InputStream() throws IOException { try ( InputStream input = @@ -1012,12 +1015,6 @@ public void testEncodingISO_8859_1_setInputStream() } } - private static void assertPosition( int row, int col, MXParser parser ) - { - assertEquals( "Current line", row, parser.getLineNumber() ); - assertEquals( "Current column", col, parser.getColumnNumber() ); - } - /** * Issue 163: https://github.com/codehaus-plexus/plexus-utils/issues/163 * @@ -1028,7 +1025,7 @@ private static void assertPosition( int row, int col, MXParser parser ) * @since 3.4.2 */ @Test - public void testEncodingISO_8859_1setStringReader() + public void testEncodingISO_8859_1_StringReader() throws IOException { String xmlFileContents; @@ -1050,6 +1047,95 @@ public void testEncodingISO_8859_1setStringReader() } } + /** + * Issue 163: https://github.com/codehaus-plexus/plexus-utils/issues/163 + * + * Another case of bug #163: Reader generated with ReaderFactory.newReader and the right file encoding. + * + * @throws IOException if IO error. + * + * @since 3.5.2 + */ + @Test + public void testEncodingISO_8859_1_newReader() + throws IOException + { + // NOTE: if using Files.newBufferedReader(path, StandardCharsets.UTF-8), the reader will throw an exception + // because the decoder created by new InputStreamReader() is lenient while the one created by + // Files.newBufferedReader() is not. + try ( Reader reader = new InputStreamReader( Files.newInputStream( + Paths.get( "src/test/resources/xml", "test-encoding-ISO-8859-1.xml" ) ), + StandardCharsets.UTF_8 ) ) + { + MXParser parser = new MXParser(); + parser.setInput( reader ); + while ( parser.nextToken() != XmlPullParser.END_DOCUMENT ) + ; + assertTrue( true ); + } + catch ( XmlPullParserException e ) + { + fail( "should not raise exception: " + e ); + } + } + + /** + * Issue 163: https://github.com/codehaus-plexus/plexus-utils/issues/163 + * + * Another case of bug #163: InputStream supplied with the right file encoding. + * + * @throws IOException if IO error. + * + * @since 3.5.2 + */ + @Test + public void testEncodingISO_8859_1_InputStream_encoded() throws IOException { + try ( InputStream input = + Files.newInputStream( Paths.get( "src/test/resources/xml", "test-encoding-ISO-8859-1.xml" ) ) ) + { + MXParser parser = new MXParser(); + parser.setInput( input, StandardCharsets.UTF_8.name() ); + while ( parser.nextToken() != XmlPullParser.END_DOCUMENT ) + ; + assertTrue( true ); + } + catch ( XmlPullParserException e ) + { + fail( "should not raise exception: " + e ); + } + } + + /** + * Issue 163: https://github.com/codehaus-plexus/plexus-utils/issues/163 + * + * @throws IOException if IO error. + * + * @since 3.4.1 + */ + @Test + public void testEncodingUTF8_newXmlReader() + throws IOException + { + try ( Reader reader = new XmlStreamReader( Paths.get( "src/test/resources/xml", "test-encoding-ISO-8859-1.xml" ) ) ) + { + MXParser parser = new MXParser(); + parser.setInput( reader ); + while ( parser.nextToken() != XmlPullParser.END_DOCUMENT ) + ; + assertTrue( true ); + } + catch ( XmlPullParserException e ) + { + fail( "should not raise exception: " + e ); + } + } + + private static void assertPosition( int row, int col, MXParser parser ) + { + assertEquals( "Current line", row, parser.getLineNumber() ); + assertEquals( "Current column", col, parser.getColumnNumber() ); + } + /** *
* Test custom Entity not found.
diff --git a/src/test/java/org/codehaus/plexus/util/xml/pull/eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test.java b/src/test/java/org/codehaus/plexus/util/xml/pull/eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test.java
index 854fb494..7d2f6299 100644
--- a/src/test/java/org/codehaus/plexus/util/xml/pull/eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test.java
+++ b/src/test/java/org/codehaus/plexus/util/xml/pull/eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test.java
@@ -7,10 +7,8 @@
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
-import java.io.InputStreamReader;
+import java.io.InputStream;
import java.io.Reader;
-import java.nio.charset.StandardCharsets;
-
import org.junit.Before;
import org.junit.Test;
@@ -212,17 +210,16 @@ public void testhst_bh_006()
public void testhst_lhs_007()
throws IOException
{
- try ( FileInputStream is = new FileInputStream( new File( testResourcesDir, "007.xml" ) );
- InputStreamReader reader = new InputStreamReader( is, StandardCharsets.UTF_8 ) )
+ try ( InputStream is = new FileInputStream( new File( testResourcesDir, "007.xml" ) ) )
{
- parser.setInput( reader );
+ parser.setInput( is, null );
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
;
- fail( "UTF-8 BOM plus xml decl of iso-8859-1 incompatible" );
+ fail( "UTF-8 BOM plus xml decl of ISO-8859-1 incompatible" );
}
catch ( XmlPullParserException e )
{
- assertTrue( e.getMessage().contains( "UTF-8 BOM plus xml decl of iso-8859-1 is incompatible" ) );
+ assertTrue( e.getMessage().contains( "UTF-8 BOM plus xml decl of ISO-8859-1 is incompatible" ) );
}
}
@@ -239,17 +236,16 @@ public void testhst_lhs_007()
public void testhst_lhs_008()
throws IOException
{
- try ( FileInputStream is = new FileInputStream( new File( testResourcesDir, "008.xml" ) );
- InputStreamReader reader = new InputStreamReader( is, StandardCharsets.UTF_16 ) )
+ try ( InputStream is = new FileInputStream( new File( testResourcesDir, "008.xml" ) ) )
{
- parser.setInput( reader );
+ parser.setInput( is, null );
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
;
- fail( "UTF-16 BOM plus xml decl of utf-8 (using UTF-16 coding) incompatible" );
+ fail( "UTF-16 BOM plus xml decl of UTF-8 (using UTF-16 coding) incompatible" );
}
catch ( XmlPullParserException e )
{
- assertTrue( e.getMessage().contains( "UTF-16 BOM plus xml decl of utf-8 is incompatible" ) );
+ assertTrue( e.getMessage().contains( "UTF-16 BOM in a UTF-8 encoded file is incompatible" ) );
}
}
@@ -266,17 +262,16 @@ public void testhst_lhs_008()
public void testhst_lhs_009()
throws IOException
{
- try ( FileInputStream is = new FileInputStream( new File( testResourcesDir, "009.xml" ) );
- InputStreamReader reader = new InputStreamReader( is, StandardCharsets.UTF_8 ) )
- {
- parser.setInput( reader );
+ try ( InputStream is = new FileInputStream( new File( testResourcesDir, "009.xml" ) ) )
+ {
+ parser.setInput( is, null );
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
;
- fail( "UTF-16 BOM plus xml decl of utf-8 (using UTF-8 coding) incompatible" );
+ fail( "UTF-16 BOM plus xml decl of UTF-8 (using UTF-8 coding) incompatible" );
}
catch ( XmlPullParserException e )
{
- assertTrue( e.getMessage().contains( "UTF-16 BOM in a UTF-8 encoded file is incompatible" ) );
+ assertTrue( e.getMessage(), e.getMessage().contains( "UTF-16 BOM in a UTF-8 encoded file is incompatible" ) );
}
}
diff --git a/src/test/resources/xml/test-encoding-ISO-8859-1.xml b/src/test/resources/xml/test-encoding-ISO-8859-1.xml
index ae0aefe7..e37a912c 100644
--- a/src/test/resources/xml/test-encoding-ISO-8859-1.xml
+++ b/src/test/resources/xml/test-encoding-ISO-8859-1.xml
@@ -1,1503 +1,3 @@
-
-