Skip to content

Commit 06e666b

Browse files
committed
Implement parsing of Unicode characters from JSON
Previously, if a JSON file contained a string in hexadecimal Unicode representation, e.g. "\u0001", the JSON parser would discard the "\u" part and store the string as "0001". This commit fixes this so the resulting string is equal to "\u0001".
1 parent cb2f5ea commit 06e666b

File tree

2 files changed

+35
-1
lines changed

2 files changed

+35
-1
lines changed

src/json/parser.y

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,15 @@ static std::string convert_TOK_STRING()
4646
case 'r': result+='\r'; break;
4747
case 't': result+='\t'; break;
4848
case 'u':
49+
{
50+
// Character in hexadecimal Unicode representation, in the format
51+
// \uABCD, i.e. the following four digits are part of this character.
52+
assert(p + 4 < yyjsontext + len - 1);
53+
std::string hex(++p, 4);
54+
result += std::stoi(hex, nullptr, 16);
55+
p += 3;
4956
break;
50-
57+
}
5158
default:; /* an error */
5259
}
5360
}

unit/json/json_parser.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,4 +84,31 @@ SCENARIO("Loading JSON files")
8484
}
8585
}
8686
}
87+
GIVEN("A JSON file containing a hexadecimal Unicode character")
88+
{
89+
temporary_filet unicode_json_file("cbmc_unit_json_parser_unicode", ".json");
90+
const std::string unicode_json_path = unicode_json_file();
91+
{
92+
std::ofstream unicode_json_out(unicode_json_path);
93+
unicode_json_out << "{\n"
94+
<< " \"special character\": \"\\u0001\"\n"
95+
<< "}\n";
96+
}
97+
WHEN("Loading the JSON file with the special character")
98+
{
99+
jsont unicode_json;
100+
const auto unicode_parse_error =
101+
parse_json(unicode_json_path, null_message_handler, unicode_json);
102+
THEN("The JSON file should be parsed correctly")
103+
{
104+
REQUIRE_FALSE(unicode_parse_error);
105+
REQUIRE(unicode_json.is_object());
106+
107+
const json_objectt &json_object = to_json_object(unicode_json);
108+
REQUIRE(json_object.find("special character") != json_object.end());
109+
REQUIRE(json_object["special character"].value.size() == 1);
110+
REQUIRE(json_object["special character"].value == "\u0001");
111+
}
112+
}
113+
}
87114
}

0 commit comments

Comments
 (0)