@@ -284,39 +284,64 @@ std::wstring utf8_to_utf16_little_endian(const std::string &in)
284
284
return utf8_to_utf16 (in, swap_bytes);
285
285
}
286
286
287
- // / \par parameters: String in UTF-16LE format
288
- // / \return String in US-ASCII format, with \uxxxx escapes for other characters
287
+ // / \param ch: UTF-16LE character
288
+ // / \param result: stream to receive string in US-ASCII format, with \\uxxxx
289
+ // / escapes for other characters
290
+ // / \param loc: locale to check for printable characters
291
+ static void utf16_little_endian_to_java (
292
+ const wchar_t ch,
293
+ std::ostringstream &result,
294
+ const std::locale &loc)
295
+ {
296
+ // \u unicode characters are translated very early by the Java compiler and so
297
+ // \u000a or \u000d would become a newline character in a char constant, which
298
+ // is illegal. Instead use \n or \r.
299
+ if (ch == ' \n ' )
300
+ result << " \\ n" ;
301
+ else if (ch == ' \r ' )
302
+ result << " \\ r" ;
303
+ // \f, \b and \t do not need to be escaped, but this will improve readability
304
+ // of generated tests.
305
+ else if (ch == ' \f ' )
306
+ result << " \\ f" ;
307
+ else if (ch == ' \b ' )
308
+ result << " \\ b" ;
309
+ else if (ch == ' \t ' )
310
+ result << " \\ t" ;
311
+ else if (ch <= 255 && isprint (ch, loc))
312
+ {
313
+ const auto uch = static_cast <unsigned char >(ch);
314
+ // ", \ and ' need to be escaped.
315
+ if (uch == ' "' || uch == ' \\ ' || uch == ' \' ' )
316
+ result << ' \\ ' ;
317
+ result << uch;
318
+ }
319
+ else
320
+ {
321
+ // Format ch as a hexadecimal unicode character padded to four digits with
322
+ // zeros.
323
+ result << " \\ u" << std::hex << std::setw (4 ) << std::setfill (' 0' )
324
+ << static_cast <unsigned int >(ch);
325
+ }
326
+ }
327
+
328
+ // / \param ch: UTF-16LE character
329
+ // / \return String in US-ASCII format, with \\uxxxx escapes for other characters
330
+ std::string utf16_little_endian_to_java (const wchar_t ch)
331
+ {
332
+ std::ostringstream result;
333
+ const std::locale loc;
334
+ utf16_little_endian_to_java (ch, result, loc);
335
+ return result.str ();
336
+ }
337
+
338
+ // / \param in: String in UTF-16LE format
339
+ // / \return String in US-ASCII format, with \\uxxxx escapes for other characters
289
340
std::string utf16_little_endian_to_java (const std::wstring &in)
290
341
{
291
342
std::ostringstream result;
292
343
const std::locale loc;
293
344
for (const auto ch : in)
294
- {
295
- if (ch==' \n ' )
296
- result << " \\ n" ;
297
- else if (ch==' \r ' )
298
- result << " \\ r" ;
299
- else if (ch==' \f ' )
300
- result << " \\ f" ;
301
- else if (ch==' \b ' )
302
- result << " \\ b" ;
303
- else if (ch==' \t ' )
304
- result << " \\ t" ;
305
- else if (ch<=255 && isprint (ch, loc))
306
- {
307
- const auto uch=static_cast <unsigned char >(ch);
308
- if (uch==' "' || uch==' \\ ' )
309
- result << ' \\ ' ;
310
- result << uch;
311
- }
312
- else
313
- {
314
- result << " \\ u"
315
- << std::hex
316
- << std::setw (4 )
317
- << std::setfill (' 0' )
318
- << static_cast <unsigned int >(ch);
319
- }
320
- }
345
+ utf16_little_endian_to_java (ch, result, loc);
321
346
return result.str ();
322
347
}
0 commit comments