@@ -257,11 +257,15 @@ std::wstring utf8_to_utf16_native_endian(const std::string &in)
257
257
return result;
258
258
}
259
259
260
+ // / Escapes non-printable characters, whitespace except for spaces, double
261
+ // / quotes and backslashes. This should yield a valid Java string literal.
262
+ // / Note that this specifically does not escape single quotes, as these are not
263
+ // / required to be escaped for Java string literals.
260
264
// / \param ch: UTF-16 character in architecture-native endianness encoding
261
265
// / \param result: stream to receive string in US-ASCII format, with \\uxxxx
262
266
// / escapes for other characters
263
267
// / \param loc: locale to check for printable characters
264
- static void utf16_native_endian_to_java (
268
+ static void utf16_native_endian_to_java_string (
265
269
const wchar_t ch,
266
270
std::ostringstream &result,
267
271
const std::locale &loc)
@@ -284,8 +288,9 @@ static void utf16_native_endian_to_java(
284
288
else if (ch <= 255 && isprint (ch, loc))
285
289
{
286
290
const auto uch = static_cast <unsigned char >(ch);
287
- // ", \ and ' need to be escaped.
288
- if (uch == ' "' || uch == ' \\ ' || uch == ' \' ' )
291
+ // ", and \ need to be escaped, but not ' for java strings
292
+ // e.g. "\"\\" needs escaping but "'" does not.
293
+ if (uch == ' "' || uch == ' \\ ' )
289
294
result << ' \\ ' ;
290
295
result << uch;
291
296
}
@@ -298,6 +303,29 @@ static void utf16_native_endian_to_java(
298
303
}
299
304
}
300
305
306
+ // / Escapes non-printable characters, whitespace except for spaces, double- and
307
+ // / single-quotes and backslashes. This should yield a valid Java identifier.
308
+ // / \param ch: UTF-16 character in architecture-native endianness encoding
309
+ // / \param result: stream to receive string in US-ASCII format, with \\uxxxx
310
+ // / escapes for other characters
311
+ // / \param loc: locale to check for printable characters
312
+ static void utf16_native_endian_to_java (
313
+ const wchar_t ch,
314
+ std::ostringstream &result,
315
+ const std::locale &loc)
316
+ {
317
+ if (ch == (wchar_t )' \' ' )
318
+ {
319
+ const auto uch = static_cast <unsigned char >(ch);
320
+ // ' needs to be escaped for java characters, e.g. '\''
321
+ result << ' \\ ' << uch;
322
+ }
323
+ else
324
+ {
325
+ utf16_native_endian_to_java_string (ch, result, loc);
326
+ }
327
+ }
328
+
301
329
// / \param ch: UTF-16 character in architecture-native endianness encoding
302
330
// / \return String in US-ASCII format, with \\uxxxx escapes for other characters
303
331
std::string utf16_native_endian_to_java (const char16_t ch)
@@ -308,14 +336,19 @@ std::string utf16_native_endian_to_java(const char16_t ch)
308
336
return result.str ();
309
337
}
310
338
339
+ // / Escapes non-printable characters, whitespace except for spaces, double
340
+ // / quotes and backslashes. This should yield a valid Java string literal.
341
+ // / Note that this specifically does not escape single quotes, as these are not
342
+ // / required to be escaped for Java string literals.
311
343
// / \param in: String in UTF-16 (native endianness) format
312
- // / \return String in US-ASCII format, with \\uxxxx escapes for other characters
313
- std::string utf16_native_endian_to_java (const std::wstring &in)
344
+ // / \return Valid Java string literal in US-ASCII format, with \\uxxxx escapes
345
+ // / for other characters
346
+ std::string utf16_native_endian_to_java_string (const std::wstring &in)
314
347
{
315
348
std::ostringstream result;
316
349
const std::locale loc;
317
350
for (const auto ch : in)
318
- utf16_native_endian_to_java (ch, result, loc);
351
+ utf16_native_endian_to_java_string (ch, result, loc);
319
352
return result.str ();
320
353
}
321
354
0 commit comments