Skip to content

Commit c1a00b2

Browse files
Do not escape ' character in java strings.
When printing a character, it is required to escape the ' character, but when printing it as part of a string, it is not. Therefore, this has been separated into two separate functions.
1 parent 744671a commit c1a00b2

File tree

2 files changed

+48
-3
lines changed

2 files changed

+48
-3
lines changed

src/util/unicode.cpp

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -257,11 +257,15 @@ std::wstring utf8_to_utf16_native_endian(const std::string &in)
257257
return result;
258258
}
259259

260+
/// Escapes non-printable characters, whitespace except for spaces, double
261+
/// quotes and backslashes. This should yield a valid Java string literal.
262+
/// Note that this specifically does not escape single quotes, as these are not
263+
/// required to be escaped for Java string literals.
260264
/// \param ch: UTF-16 character in architecture-native endianness encoding
261265
/// \param result: stream to receive string in US-ASCII format, with \\uxxxx
262266
/// escapes for other characters
263267
/// \param loc: locale to check for printable characters
264-
static void utf16_native_endian_to_java(
268+
static void utf16_native_endian_to_java_string(
265269
const wchar_t ch,
266270
std::ostringstream &result,
267271
const std::locale &loc)
@@ -284,8 +288,9 @@ static void utf16_native_endian_to_java(
284288
else if(ch <= 255 && isprint(ch, loc))
285289
{
286290
const auto uch = static_cast<unsigned char>(ch);
287-
// ", \ and ' need to be escaped.
288-
if(uch == '"' || uch == '\\' || uch == '\'')
291+
// ", and \ need to be escaped, but not ' for java strings
292+
// e.g. "\"\\" needs escaping but "'" does not.
293+
if(uch == '"' || uch == '\\')
289294
result << '\\';
290295
result << uch;
291296
}
@@ -298,6 +303,29 @@ static void utf16_native_endian_to_java(
298303
}
299304
}
300305

306+
/// Escapes non-printable characters, whitespace except for spaces, double- and
307+
/// single-quotes and backslashes. This should yield a valid Java identifier.
308+
/// \param ch: UTF-16 character in architecture-native endianness encoding
309+
/// \param result: stream to receive string in US-ASCII format, with \\uxxxx
310+
/// escapes for other characters
311+
/// \param loc: locale to check for printable characters
312+
static void utf16_native_endian_to_java(
313+
const wchar_t ch,
314+
std::ostringstream &result,
315+
const std::locale &loc)
316+
{
317+
if(ch == (wchar_t)'\'')
318+
{
319+
const auto uch = static_cast<unsigned char>(ch);
320+
// ' needs to be escaped for java characters, e.g. '\''
321+
result << '\\' << uch;
322+
}
323+
else
324+
{
325+
utf16_native_endian_to_java_string(ch, result, loc);
326+
}
327+
}
328+
301329
/// \param ch: UTF-16 character in architecture-native endianness encoding
302330
/// \return String in US-ASCII format, with \\uxxxx escapes for other characters
303331
std::string utf16_native_endian_to_java(const char16_t ch)
@@ -308,6 +336,22 @@ std::string utf16_native_endian_to_java(const char16_t ch)
308336
return result.str();
309337
}
310338

339+
/// Escapes non-printable characters, whitespace except for spaces, double
340+
/// quotes and backslashes. This should yield a valid Java string literal.
341+
/// Note that this specifically does not escape single quotes, as these are not
342+
/// required to be escaped for Java string literals.
343+
/// \param in: String in UTF-16 (native endianness) format
344+
/// \return Valid Java string literal in US-ASCII format, with \\uxxxx escapes
345+
/// for other characters
346+
std::string utf16_native_endian_to_java_string(const std::wstring &in)
347+
{
348+
std::ostringstream result;
349+
const std::locale loc;
350+
for(const auto ch : in)
351+
utf16_native_endian_to_java_string(ch, result, loc);
352+
return result.str();
353+
}
354+
311355
/// \param in: String in UTF-16 (native endianness) format
312356
/// \return String in US-ASCII format, with \\uxxxx escapes for other characters
313357
std::string utf16_native_endian_to_java(const std::wstring &in)

src/util/unicode.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ utf32_native_endian_to_utf8(const std::basic_string<unsigned int> &s);
2828
std::wstring utf8_to_utf16_native_endian(const std::string &in);
2929
std::string utf16_native_endian_to_java(const char16_t ch);
3030
std::string utf16_native_endian_to_java(const std::wstring &in);
31+
std::string utf16_native_endian_to_java_string(const std::wstring &in);
3132

3233
std::vector<std::string> narrow_argv(int argc, const wchar_t **argv_wide);
3334

0 commit comments

Comments
 (0)