Fix logic error when detecting next valid utf8 sequence.

Alan Wright · Alan Wright · commit 5170153646cb · 2012-09-30T23:15:53.000+02:00
Closes #31.
diff --git a/src/core/util/UTF8Stream.cpp b/src/core/util/UTF8Stream.cpp
@@ -16,22 +16,22 @@ namespace Lucene
     const uint16_t UTF8Base::TRAIL_SURROGATE_MAX = 0xdfffu;
     const uint16_t UTF8Base::LEAD_OFFSET = LEAD_SURROGATE_MIN - (0x10000 >> 10);
     const uint32_t UTF8Base::SURROGATE_OFFSET = 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN;
-    
+
     // Maximum valid value for a Unicode code point
     const uint32_t UTF8Base::CODE_POINT_MAX = 0x0010ffffu;
-    
+
     #ifdef LPP_UNICODE_CHAR_SIZE_2
     const wchar_t UTF8Base::UNICODE_REPLACEMENT_CHAR = (wchar_t)0xfffd;
     const wchar_t UTF8Base::UNICODE_TERMINATOR = (wchar_t)0xffff;
     #else
     const wchar_t UTF8Base::UNICODE_REPLACEMENT_CHAR = (wchar_t)0x0001fffd;
     const wchar_t UTF8Base::UNICODE_TERMINATOR = (wchar_t)0x0001ffff;
     #endif
-    
+
     UTF8Base::~UTF8Base()
     {
     }
-    
+
     inline uint8_t UTF8Base::mask8(uint32_t b)
     {
         return static_cast<uint8_t>(0xff & b);
@@ -46,7 +46,7 @@ namespace Lucene
     {
         return ((mask8(b) >> 6) == 0x2);
     }
-    
+
     inline bool UTF8Base::isSurrogate(uint32_t cp)
     {
         return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
@@ -61,47 +61,47 @@ namespace Lucene
     {
         return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
     }
-    
+
     inline bool UTF8Base::isValidCodePoint(uint32_t cp)
     {
         return (cp <= CODE_POINT_MAX && !isSurrogate(cp) && cp != 0xfffe && cp != 0xffff);
     }
-    
+
     inline bool UTF8Base::isOverlongSequence(uint32_t cp, int32_t length)
     {
         if (cp < 0x80)
         {
-            if (length != 1) 
+            if (length != 1)
                 return true;
         }
         else if (cp < 0x800)
         {
-            if (length != 2) 
+            if (length != 2)
                 return true;
         }
         else if (cp < 0x10000)
         {
-            if (length != 3) 
+            if (length != 3)
                 return true;
         }
         return false;
     }
-    
+
     UTF8Encoder::UTF8Encoder(const wchar_t* unicodeBegin, const wchar_t* unicodeEnd)
     {
         this->unicodeBegin = unicodeBegin;
         this->unicodeEnd = unicodeEnd;
     }
-    
+
     UTF8Encoder::~UTF8Encoder()
     {
     }
-    
+
     uint32_t UTF8Encoder::readNext()
     {
         return unicodeBegin == unicodeEnd ? (uint32_t)UNICODE_TERMINATOR : (uint32_t)*unicodeBegin++;
     }
-    
+
     inline uint8_t* UTF8Encoder::appendChar(uint8_t* utf8, uint32_t cp)
     {
         if (cp < 0x80) // one octet
@@ -126,12 +126,12 @@ namespace Lucene
         }
         return utf8;
     }
-    
+
     int32_t UTF8Encoder::utf16to8(uint8_t* utf8, int32_t length)
     {
         uint8_t* start = utf8;
         uint32_t next = readNext();
-        
+
         while (next != UNICODE_TERMINATOR)
         {
             uint32_t cp = mask16(next);
@@ -154,15 +154,15 @@ namespace Lucene
                 break;
             next = readNext();
         }
-        
+
         return ((utf8 - start) == 0 && next == UNICODE_TERMINATOR) ? Reader::READER_EOF : (utf8 - start);
     }
-    
+
     int32_t UTF8Encoder::utf32to8(uint8_t* utf8, int32_t length)
     {
         uint8_t* start = utf8;
         uint32_t next = readNext();
-        
+
         while (next != UNICODE_TERMINATOR)
         {
             if (!isValidCodePoint(next))
@@ -172,10 +172,10 @@ namespace Lucene
                 break;
             next = readNext();
         }
-        
+
         return ((utf8 - start) == 0 && next == UNICODE_TERMINATOR) ? Reader::READER_EOF : (utf8 - start);
     }
-    
+
     int32_t UTF8Encoder::encode(uint8_t* utf8, int32_t length)
     {
         #ifdef LPP_UNICODE_CHAR_SIZE_2
@@ -184,37 +184,37 @@ namespace Lucene
         return utf32to8(utf8, length);
         #endif
     }
-    
+
     UTF8EncoderStream::UTF8EncoderStream(ReaderPtr reader) : UTF8Encoder(NULL, NULL)
     {
         this->reader = reader;
     }
-    
+
     UTF8EncoderStream::~UTF8EncoderStream()
     {
     }
-    
+
     uint32_t UTF8EncoderStream::readNext()
     {
         int32_t next = reader->read();
         return next == Reader::READER_EOF ? UNICODE_TERMINATOR : (uint32_t)next;
     }
-    
+
     UTF8Decoder::UTF8Decoder(const uint8_t* utf8Begin, const uint8_t* utf8End)
     {
         this->utf8Begin = utf8Begin;
         this->utf8End = utf8End;
     }
-    
+
     UTF8Decoder::~UTF8Decoder()
     {
     }
-    
+
     uint32_t UTF8Decoder::readNext()
     {
         return utf8Begin == utf8End ? (uint32_t)UNICODE_TERMINATOR : (uint32_t)*utf8Begin++;
     }
-    
+
     inline int32_t UTF8Decoder::sequenceLength(uint32_t cp)
     {
         uint8_t lead = mask8(cp);
@@ -228,7 +228,7 @@ namespace Lucene
             return 4;
         return 0;
     }
-    
+
     inline bool UTF8Decoder::getSequence(uint32_t& cp, int32_t length)
     {
         cp = mask8(cp);
@@ -267,27 +267,27 @@ namespace Lucene
         cp += next & 0x3f;
         return true;
     }
-    
+
     inline bool UTF8Decoder::isValidNext(uint32_t& cp)
     {
         // Determine the sequence length based on the lead octet
         int32_t length = sequenceLength(cp);
-        if (length < 1 && length > 4)
+        if (length < 1 || length > 4)
             return false;
 
         // Now that we have a valid sequence length, get trail octets and calculate the code point
         if (!getSequence(cp, length))
             return false;
-        
+
         // Decoding succeeded, now security checks
         return (isValidCodePoint(cp) && !isOverlongSequence(cp, length));
     }
-    
+
     int32_t UTF8Decoder::utf8to16(wchar_t* unicode, int32_t length)
     {
         int32_t position = 0;
         uint32_t next = readNext();
-        
+
         while (next != UNICODE_TERMINATOR)
         {
             if (!isValidNext(next))
@@ -303,15 +303,15 @@ namespace Lucene
                 break;
             next = readNext();
         }
-        
+
         return (position == 0 && next == UNICODE_TERMINATOR) ? Reader::READER_EOF : position;
     }
-    
+
     int32_t UTF8Decoder::utf8to32(wchar_t* unicode, int32_t length)
     {
         int32_t position = 0;
         uint32_t next = readNext();
-        
+
         while (next != UNICODE_TERMINATOR)
         {
             if (!isValidNext(next))
@@ -321,10 +321,10 @@ namespace Lucene
                 break;
             next = readNext();
         }
-        
+
         return (position == 0 && next == UNICODE_TERMINATOR) ? Reader::READER_EOF : position;
     }
-    
+
     int32_t UTF8Decoder::decode(wchar_t* unicode, int32_t length)
     {
         #ifdef LPP_UNICODE_CHAR_SIZE_2
@@ -333,42 +333,42 @@ namespace Lucene
         return utf8to32(unicode, length);
         #endif
     }
-    
+
     UTF8DecoderStream::UTF8DecoderStream(ReaderPtr reader)  : UTF8Decoder(NULL, NULL)
     {
         this->reader = reader;
     }
-    
+
     UTF8DecoderStream::~UTF8DecoderStream()
     {
     }
-    
+
     uint32_t UTF8DecoderStream::readNext()
     {
         int32_t next = reader->read();
         return next == Reader::READER_EOF ? UNICODE_TERMINATOR : (uint32_t)next;
     }
-    
+
     UTF16Decoder::UTF16Decoder(const uint16_t* utf16Begin, const uint16_t* utf16End)
     {
         this->utf16Begin = utf16Begin;
         this->utf16End = utf16End;
     }
-    
+
     UTF16Decoder::~UTF16Decoder()
     {
     }
-    
+
     uint32_t UTF16Decoder::readNext()
     {
         return utf16Begin == utf16End ? (uint32_t)UNICODE_TERMINATOR : (uint32_t)*utf16Begin++;
     }
-    
+
     int32_t UTF16Decoder::utf16to32(wchar_t* unicode, int32_t length)
     {
         int32_t position = 0;
         uint32_t next = readNext();
-        
+
         while (next != UNICODE_TERMINATOR)
         {
             uint32_t cp = mask16(next);
@@ -390,26 +390,26 @@ namespace Lucene
                 break;
             next = readNext();
         }
-        
+
         return (position == 0 && next == UNICODE_TERMINATOR) ? Reader::READER_EOF : position;
     }
-    
+
     int32_t UTF16Decoder::utf16to16(wchar_t* unicode, int32_t length)
     {
         int32_t position = 0;
         uint32_t next = readNext();
-        
+
         while (next != UNICODE_TERMINATOR)
         {
             unicode[position++] = static_cast<wchar_t>(next);
             if (position >= length)
                 break;
             next = readNext();
         }
-        
+
         return (position == 0 && next == UNICODE_TERMINATOR) ? Reader::READER_EOF : position;
     }
-    
+
     int32_t UTF16Decoder::decode(wchar_t* unicode, int32_t length)
     {
         #ifdef LPP_UNICODE_CHAR_SIZE_2

Original file line number	Diff line number	Diff line change
`@@ -16,22 +16,22 @@ namespace Lucene`
`16`	`16`	`const uint16_t UTF8Base::TRAIL_SURROGATE_MAX = 0xdfffu;`
`17`	`17`	`const uint16_t UTF8Base::LEAD_OFFSET = LEAD_SURROGATE_MIN - (0x10000 >> 10);`
`18`	`18`	`const uint32_t UTF8Base::SURROGATE_OFFSET = 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN;`
`19`		`-`
	`19`	`+`
`20`	`20`	`// Maximum valid value for a Unicode code point`
`21`	`21`	`const uint32_t UTF8Base::CODE_POINT_MAX = 0x0010ffffu;`
`22`		`-`
	`22`	`+`
`23`	`23`	`#ifdef LPP_UNICODE_CHAR_SIZE_2`
`24`	`24`	`const wchar_t UTF8Base::UNICODE_REPLACEMENT_CHAR = (wchar_t)0xfffd;`
`25`	`25`	`const wchar_t UTF8Base::UNICODE_TERMINATOR = (wchar_t)0xffff;`
`26`	`26`	`#else`
`27`	`27`	`const wchar_t UTF8Base::UNICODE_REPLACEMENT_CHAR = (wchar_t)0x0001fffd;`
`28`	`28`	`const wchar_t UTF8Base::UNICODE_TERMINATOR = (wchar_t)0x0001ffff;`
`29`	`29`	`#endif`
`30`		`-`
	`30`	`+`
`31`	`31`	`UTF8Base::~UTF8Base()`
`32`	`32`	`{`
`33`	`33`	`}`
`34`		`-`
	`34`	`+`
`35`	`35`	`inline uint8_t UTF8Base::mask8(uint32_t b)`
`36`	`36`	`{`
`37`	`37`	`return static_cast<uint8_t>(0xff & b);`
`@@ -46,7 +46,7 @@ namespace Lucene`
`46`	`46`	`{`
`47`	`47`	`return ((mask8(b) >> 6) == 0x2);`
`48`	`48`	`}`
`49`		`-`
	`49`	`+`
`50`	`50`	`inline bool UTF8Base::isSurrogate(uint32_t cp)`
`51`	`51`	`{`
`52`	`52`	`return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);`
`@@ -61,47 +61,47 @@ namespace Lucene`
`61`	`61`	`{`
`62`	`62`	`return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);`
`63`	`63`	`}`
`64`		`-`
	`64`	`+`
`65`	`65`	`inline bool UTF8Base::isValidCodePoint(uint32_t cp)`
`66`	`66`	`{`
`67`	`67`	`return (cp <= CODE_POINT_MAX && !isSurrogate(cp) && cp != 0xfffe && cp != 0xffff);`
`68`	`68`	`}`
`69`		`-`
	`69`	`+`
`70`	`70`	`inline bool UTF8Base::isOverlongSequence(uint32_t cp, int32_t length)`
`71`	`71`	`{`
`72`	`72`	`if (cp < 0x80)`
`73`	`73`	`{`
`74`		`- if (length != 1)`
	`74`	`+ if (length != 1)`
`75`	`75`	`return true;`
`76`	`76`	`}`
`77`	`77`	`else if (cp < 0x800)`
`78`	`78`	`{`
`79`		`- if (length != 2)`
	`79`	`+ if (length != 2)`
`80`	`80`	`return true;`
`81`	`81`	`}`
`82`	`82`	`else if (cp < 0x10000)`
`83`	`83`	`{`
`84`		`- if (length != 3)`
	`84`	`+ if (length != 3)`
`85`	`85`	`return true;`
`86`	`86`	`}`
`87`	`87`	`return false;`
`88`	`88`	`}`
`89`		`-`
	`89`	`+`
`90`	`90`	`UTF8Encoder::UTF8Encoder(const wchar_t* unicodeBegin, const wchar_t* unicodeEnd)`
`91`	`91`	`{`
`92`	`92`	`this->unicodeBegin = unicodeBegin;`
`93`	`93`	`this->unicodeEnd = unicodeEnd;`
`94`	`94`	`}`
`95`		`-`
	`95`	`+`
`96`	`96`	`UTF8Encoder::~UTF8Encoder()`
`97`	`97`	`{`
`98`	`98`	`}`
`99`		`-`
	`99`	`+`
`100`	`100`	`uint32_t UTF8Encoder::readNext()`
`101`	`101`	`{`
`102`	`102`	`return unicodeBegin == unicodeEnd ? (uint32_t)UNICODE_TERMINATOR : (uint32_t)*unicodeBegin++;`
`103`	`103`	`}`
`104`		`-`
	`104`	`+`
`105`	`105`	`inline uint8_t* UTF8Encoder::appendChar(uint8_t* utf8, uint32_t cp)`
`106`	`106`	`{`
`107`	`107`	`if (cp < 0x80) // one octet`
`@@ -126,12 +126,12 @@ namespace Lucene`
`126`	`126`	`}`
`127`	`127`	`return utf8;`
`128`	`128`	`}`
`129`		`-`
	`129`	`+`
`130`	`130`	`int32_t UTF8Encoder::utf16to8(uint8_t* utf8, int32_t length)`
`131`	`131`	`{`
`132`	`132`	`uint8_t* start = utf8;`
`133`	`133`	`uint32_t next = readNext();`
`134`		`-`
	`134`	`+`
`135`	`135`	`while (next != UNICODE_TERMINATOR)`
`136`	`136`	`{`
`137`	`137`	`uint32_t cp = mask16(next);`
`@@ -154,15 +154,15 @@ namespace Lucene`
`154`	`154`	`break;`
`155`	`155`	`next = readNext();`
`156`	`156`	`}`
`157`		`-`
	`157`	`+`
`158`	`158`	`return ((utf8 - start) == 0 && next == UNICODE_TERMINATOR) ? Reader::READER_EOF : (utf8 - start);`
`159`	`159`	`}`
`160`		`-`
	`160`	`+`
`161`	`161`	`int32_t UTF8Encoder::utf32to8(uint8_t* utf8, int32_t length)`
`162`	`162`	`{`
`163`	`163`	`uint8_t* start = utf8;`
`164`	`164`	`uint32_t next = readNext();`
`165`		`-`
	`165`	`+`
`166`	`166`	`while (next != UNICODE_TERMINATOR)`
`167`	`167`	`{`
`168`	`168`	`if (!isValidCodePoint(next))`
`@@ -172,10 +172,10 @@ namespace Lucene`
`172`	`172`	`break;`
`173`	`173`	`next = readNext();`
`174`	`174`	`}`
`175`		`-`
	`175`	`+`
`176`	`176`	`return ((utf8 - start) == 0 && next == UNICODE_TERMINATOR) ? Reader::READER_EOF : (utf8 - start);`
`177`	`177`	`}`
`178`		`-`
	`178`	`+`
`179`	`179`	`int32_t UTF8Encoder::encode(uint8_t* utf8, int32_t length)`
`180`	`180`	`{`
`181`	`181`	`#ifdef LPP_UNICODE_CHAR_SIZE_2`
`@@ -184,37 +184,37 @@ namespace Lucene`
`184`	`184`	`return utf32to8(utf8, length);`
`185`	`185`	`#endif`
`186`	`186`	`}`
`187`		`-`
	`187`	`+`
`188`	`188`	`UTF8EncoderStream::UTF8EncoderStream(ReaderPtr reader) : UTF8Encoder(NULL, NULL)`
`189`	`189`	`{`
`190`	`190`	`this->reader = reader;`
`191`	`191`	`}`
`192`		`-`
	`192`	`+`
`193`	`193`	`UTF8EncoderStream::~UTF8EncoderStream()`
`194`	`194`	`{`
`195`	`195`	`}`
`196`		`-`
	`196`	`+`
`197`	`197`	`uint32_t UTF8EncoderStream::readNext()`
`198`	`198`	`{`
`199`	`199`	`int32_t next = reader->read();`
`200`	`200`	`return next == Reader::READER_EOF ? UNICODE_TERMINATOR : (uint32_t)next;`
`201`	`201`	`}`
`202`		`-`
	`202`	`+`
`203`	`203`	`UTF8Decoder::UTF8Decoder(const uint8_t* utf8Begin, const uint8_t* utf8End)`
`204`	`204`	`{`
`205`	`205`	`this->utf8Begin = utf8Begin;`
`206`	`206`	`this->utf8End = utf8End;`
`207`	`207`	`}`
`208`		`-`
	`208`	`+`
`209`	`209`	`UTF8Decoder::~UTF8Decoder()`
`210`	`210`	`{`
`211`	`211`	`}`
`212`		`-`
	`212`	`+`
`213`	`213`	`uint32_t UTF8Decoder::readNext()`
`214`	`214`	`{`
`215`	`215`	`return utf8Begin == utf8End ? (uint32_t)UNICODE_TERMINATOR : (uint32_t)*utf8Begin++;`
`216`	`216`	`}`
`217`		`-`
	`217`	`+`
`218`	`218`	`inline int32_t UTF8Decoder::sequenceLength(uint32_t cp)`
`219`	`219`	`{`
`220`	`220`	`uint8_t lead = mask8(cp);`
`@@ -228,7 +228,7 @@ namespace Lucene`
`228`	`228`	`return 4;`
`229`	`229`	`return 0;`
`230`	`230`	`}`
`231`		`-`
	`231`	`+`
`232`	`232`	`inline bool UTF8Decoder::getSequence(uint32_t& cp, int32_t length)`
`233`	`233`	`{`
`234`	`234`	`cp = mask8(cp);`
`@@ -267,27 +267,27 @@ namespace Lucene`
`267`	`267`	`cp += next & 0x3f;`
`268`	`268`	`return true;`
`269`	`269`	`}`
`270`		`-`
	`270`	`+`
`271`	`271`	`inline bool UTF8Decoder::isValidNext(uint32_t& cp)`
`272`	`272`	`{`
`273`	`273`	`// Determine the sequence length based on the lead octet`
`274`	`274`	`int32_t length = sequenceLength(cp);`
`275`		`- if (length < 1 && length > 4)`
	`275`	`+ if (length < 1 \|\| length > 4)`
`276`	`276`	`return false;`
`277`	`277`
`278`	`278`	`// Now that we have a valid sequence length, get trail octets and calculate the code point`
`279`	`279`	`if (!getSequence(cp, length))`
`280`	`280`	`return false;`
`281`		`-`
	`281`	`+`
`282`	`282`	`// Decoding succeeded, now security checks`
`283`	`283`	`return (isValidCodePoint(cp) && !isOverlongSequence(cp, length));`
`284`	`284`	`}`
`285`		`-`
	`285`	`+`
`286`	`286`	`int32_t UTF8Decoder::utf8to16(wchar_t* unicode, int32_t length)`
`287`	`287`	`{`
`288`	`288`	`int32_t position = 0;`
`289`	`289`	`uint32_t next = readNext();`
`290`		`-`
	`290`	`+`
`291`	`291`	`while (next != UNICODE_TERMINATOR)`
`292`	`292`	`{`
`293`	`293`	`if (!isValidNext(next))`
`@@ -303,15 +303,15 @@ namespace Lucene`
`303`	`303`	`break;`
`304`	`304`	`next = readNext();`
`305`	`305`	`}`
`306`		`-`
	`306`	`+`
`307`	`307`	`return (position == 0 && next == UNICODE_TERMINATOR) ? Reader::READER_EOF : position;`
`308`	`308`	`}`
`309`		`-`
	`309`	`+`
`310`	`310`	`int32_t UTF8Decoder::utf8to32(wchar_t* unicode, int32_t length)`
`311`	`311`	`{`
`312`	`312`	`int32_t position = 0;`
`313`	`313`	`uint32_t next = readNext();`
`314`		`-`
	`314`	`+`
`315`	`315`	`while (next != UNICODE_TERMINATOR)`
`316`	`316`	`{`
`317`	`317`	`if (!isValidNext(next))`
`@@ -321,10 +321,10 @@ namespace Lucene`
`321`	`321`	`break;`
`322`	`322`	`next = readNext();`
`323`	`323`	`}`
`324`		`-`
	`324`	`+`
`325`	`325`	`return (position == 0 && next == UNICODE_TERMINATOR) ? Reader::READER_EOF : position;`
`326`	`326`	`}`
`327`		`-`
	`327`	`+`
`328`	`328`	`int32_t UTF8Decoder::decode(wchar_t* unicode, int32_t length)`
`329`	`329`	`{`
`330`	`330`	`#ifdef LPP_UNICODE_CHAR_SIZE_2`
`@@ -333,42 +333,42 @@ namespace Lucene`
`333`	`333`	`return utf8to32(unicode, length);`
`334`	`334`	`#endif`
`335`	`335`	`}`
`336`		`-`
	`336`	`+`
`337`	`337`	`UTF8DecoderStream::UTF8DecoderStream(ReaderPtr reader) : UTF8Decoder(NULL, NULL)`
`338`	`338`	`{`
`339`	`339`	`this->reader = reader;`
`340`	`340`	`}`
`341`		`-`
	`341`	`+`
`342`	`342`	`UTF8DecoderStream::~UTF8DecoderStream()`
`343`	`343`	`{`
`344`	`344`	`}`
`345`		`-`
	`345`	`+`
`346`	`346`	`uint32_t UTF8DecoderStream::readNext()`
`347`	`347`	`{`
`348`	`348`	`int32_t next = reader->read();`
`349`	`349`	`return next == Reader::READER_EOF ? UNICODE_TERMINATOR : (uint32_t)next;`
`350`	`350`	`}`
`351`		`-`
	`351`	`+`
`352`	`352`	`UTF16Decoder::UTF16Decoder(const uint16_t* utf16Begin, const uint16_t* utf16End)`
`353`	`353`	`{`
`354`	`354`	`this->utf16Begin = utf16Begin;`
`355`	`355`	`this->utf16End = utf16End;`
`356`	`356`	`}`
`357`		`-`
	`357`	`+`
`358`	`358`	`UTF16Decoder::~UTF16Decoder()`
`359`	`359`	`{`
`360`	`360`	`}`
`361`		`-`
	`361`	`+`
`362`	`362`	`uint32_t UTF16Decoder::readNext()`
`363`	`363`	`{`
`364`	`364`	`return utf16Begin == utf16End ? (uint32_t)UNICODE_TERMINATOR : (uint32_t)*utf16Begin++;`
`365`	`365`	`}`
`366`		`-`
	`366`	`+`
`367`	`367`	`int32_t UTF16Decoder::utf16to32(wchar_t* unicode, int32_t length)`
`368`	`368`	`{`
`369`	`369`	`int32_t position = 0;`
`370`	`370`	`uint32_t next = readNext();`
`371`		`-`
	`371`	`+`
`372`	`372`	`while (next != UNICODE_TERMINATOR)`
`373`	`373`	`{`
`374`	`374`	`uint32_t cp = mask16(next);`
`@@ -390,26 +390,26 @@ namespace Lucene`
`390`	`390`	`break;`
`391`	`391`	`next = readNext();`
`392`	`392`	`}`
`393`		`-`
	`393`	`+`
`394`	`394`	`return (position == 0 && next == UNICODE_TERMINATOR) ? Reader::READER_EOF : position;`
`395`	`395`	`}`
`396`		`-`
	`396`	`+`
`397`	`397`	`int32_t UTF16Decoder::utf16to16(wchar_t* unicode, int32_t length)`
`398`	`398`	`{`
`399`	`399`	`int32_t position = 0;`
`400`	`400`	`uint32_t next = readNext();`
`401`		`-`
	`401`	`+`
`402`	`402`	`while (next != UNICODE_TERMINATOR)`
`403`	`403`	`{`
`404`	`404`	`unicode[position++] = static_cast<wchar_t>(next);`
`405`	`405`	`if (position >= length)`
`406`	`406`	`break;`
`407`	`407`	`next = readNext();`
`408`	`408`	`}`
`409`		`-`
	`409`	`+`
`410`	`410`	`return (position == 0 && next == UNICODE_TERMINATOR) ? Reader::READER_EOF : position;`
`411`	`411`	`}`
`412`		`-`
	`412`	`+`
`413`	`413`	`int32_t UTF16Decoder::decode(wchar_t* unicode, int32_t length)`
`414`	`414`	`{`
`415`	`415`	`#ifdef LPP_UNICODE_CHAR_SIZE_2`