Skip to content

Commit 08d1f64

Browse files
committed
Update gdb api to return more information about pointed-to objects
This update the gdb api to return more information about pointers (via the method get_memory() which returns an object of type pointer_valuet describing the pointer and pointed-to data). Unit tests for the new functionality are included.
1 parent 1f193b1 commit 08d1f64

File tree

4 files changed

+150
-41
lines changed

4 files changed

+150
-41
lines changed

src/memory-analyzer/gdb_api.cpp

Lines changed: 39 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ Author: Malte Mues <[email protected]>
2525
#include <cstring>
2626
#include <regex>
2727

28+
#include <iostream>
29+
2830
#include "gdb_api.h"
2931

3032
#include <goto-programs/goto_model.h>
@@ -358,32 +360,36 @@ std::string gdb_apit::eval_expr(const std::string &expr)
358360
/// \param expr an expression of pointer type (e.g., `&x` with `x` being of type
359361
/// `int` or `p` with `p` being of type `int *`)
360362
/// \return memory address in hex format
361-
std::string gdb_apit::get_memory(const std::string &expr)
363+
gdb_apit::pointer_valuet gdb_apit::get_memory(const std::string &expr)
362364
{
363365
PRECONDITION(gdb_state == gdb_statet::STOPPED);
364366

365-
std::string mem;
366-
367-
// regex matching a hex memory address followed by an optional identifier in
368-
// angle brackets (e.g., `0x601060 <x>`)
369-
std::regex regex(R"(^(0x[1-9a-f][0-9a-f]*)( <.*>)?)");
367+
std::string value = eval_expr(expr);
370368

371-
const std::string value = eval_expr(expr);
369+
std::regex regex(
370+
r_hex_addr + r_opt(' ' + r_id) + r_opt(' ' + r_or(r_char, r_string)));
372371

373372
std::smatch result;
374-
if(regex_match(value, result, regex))
375-
{
376-
// return hex address only
377-
return result[1];
378-
}
379-
else
373+
const bool b = regex_match(value, result, regex);
374+
CHECK_RETURN(b);
375+
376+
optionalt<std::string> opt_string;
377+
const std::string string = result[4];
378+
379+
if(!string.empty())
380380
{
381-
throw gdb_interaction_exceptiont(
382-
"value `" + value +
383-
"` is not a memory address or has unrecognised format");
381+
const std::size_t len = string.length();
382+
383+
INVARIANT(len >= 4, "");
384+
INVARIANT(string[0] == '\\', "");
385+
INVARIANT(string[1] == '"', "");
386+
INVARIANT(string[len - 2] == '\\', "");
387+
INVARIANT(string[len - 1] == '"', "");
388+
389+
opt_string = string.substr(2, len - 4);
384390
}
385391

386-
UNREACHABLE;
392+
return pointer_valuet(result[1], result[2], result[3], opt_string);
387393
}
388394

389395
/// Get value of the given value expression
@@ -398,25 +404,15 @@ std::string gdb_apit::get_value(const std::string &expr)
398404

399405
const std::string value = eval_expr(expr);
400406

407+
// Get char value
401408
{
402-
// get string from char pointer
403-
const std::regex regex(R"(0x[1-9a-f][0-9a-f]* \\"(.*)\\")");
409+
// matches e.g. 99 'c' and extracts c
410+
std::regex regex(R"([^ ]+ '([^']+)')");
404411

405412
std::smatch result;
406-
if(regex_match(value, result, regex))
407-
{
408-
return result[1];
409-
}
410-
}
413+
const bool b = regex_match(value, result, regex);
411414

412-
// this case will go away eventually, once client code has been refactored to
413-
// use get_memory() instead
414-
{
415-
// get void pointer address
416-
const std::regex regex(R"(0x[1-9a-f][0-9a-f]*)");
417-
418-
std::smatch result;
419-
if(regex_match(value, result, regex))
415+
if(b)
420416
{
421417
return result[1];
422418
}
@@ -500,4 +496,15 @@ void gdb_apit::check_command_accepted()
500496
CHECK_RETURN(was_accepted);
501497
}
502498

499+
std::string gdb_apit::r_opt(const std::string &regex)
500+
{
501+
return R"((?:)" + regex + R"()?)";
502+
}
503+
504+
std::string
505+
gdb_apit::r_or(const std::string &regex_left, const std::string &regex_right)
506+
{
507+
return R"((?:)" + regex_left + '|' + regex_right + R"())";
508+
}
509+
503510
#endif

src/memory-analyzer/gdb_api.h

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,14 +41,31 @@ class gdb_apit
4141
explicit gdb_apit(const char *binary, const bool log = false);
4242
~gdb_apit();
4343

44+
struct pointer_valuet
45+
{
46+
pointer_valuet(
47+
const std::string &address = "",
48+
const std::string &pointee = "",
49+
const std::string &character = "",
50+
const optionalt<std::string> &string = nullopt)
51+
: address(address), pointee(pointee), character(character), string(string)
52+
{
53+
}
54+
55+
const std::string address;
56+
const std::string pointee;
57+
const std::string character;
58+
const optionalt<std::string> string;
59+
};
60+
4461
void create_gdb_process();
4562
void terminate_gdb_process();
4663

4764
bool run_gdb_to_breakpoint(const std::string &breakpoint);
4865
void run_gdb_from_core(const std::string &corefile);
4966

5067
std::string get_value(const std::string &expr);
51-
std::string get_memory(const std::string &expr);
68+
pointer_valuet get_memory(const std::string &expr);
5269

5370
const commandst &get_command_log();
5471

@@ -86,6 +103,27 @@ class gdb_apit
86103
bool most_recent_line_has_tag(const std::string &tag);
87104
bool was_command_accepted();
88105
void check_command_accepted();
106+
107+
static std::string r_opt(const std::string &regex);
108+
109+
static std::string
110+
r_or(const std::string &regex_left, const std::string &regex_right);
111+
112+
// regex group for hex memory address (part of the output of gdb when printing
113+
// a pointer), matches e.g. 0x601040 and extracts 0x601040
114+
const std::string r_hex_addr = R"((0x(?:0|[1-9a-f][0-9a-f]*)))";
115+
116+
// regex group for identifier (optional part of the output of gdb when
117+
// printing a pointer), matches e.g. <abc> and extracts abc
118+
const std::string r_id = R"(<([^<>]+)>)";
119+
120+
// regex group for octal encoded char (optional part of the output of gdb when
121+
// printing a pointer), matches e.g. \"\\003\" and extracts \\003
122+
const std::string r_char = R"(\\"(\\\\[0-7]{3})\\")";
123+
124+
// regex group for string (optional part of the output of gdb when printing a
125+
// pointer), matches e.g. \"abc\" and extracts \"abc\"
126+
const std::string r_string = R"((\\".*\\"))";
89127
};
90128

91129
class gdb_interaction_exceptiont : public cprover_exception_baset

unit/memory-analyzer/gdb_api.cpp

Lines changed: 63 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,14 @@ void compile_test_file()
4545

4646
class gdb_api_testt : public gdb_apit
4747
{
48+
public:
4849
explicit gdb_api_testt(const char *binary) : gdb_apit(binary)
4950
{
5051
}
5152

5253
friend void gdb_api_internals_test();
54+
55+
using gdb_apit::r_hex_addr;
5356
};
5457

5558
void gdb_api_internals_test()
@@ -151,7 +154,10 @@ TEST_CASE("gdb api test", "[core][memory-analyzer]")
151154
}
152155
}
153156

154-
gdb_apit gdb_api("test");
157+
gdb_api_testt gdb_api("test");
158+
159+
std::regex hex_addr(gdb_api.r_hex_addr);
160+
155161
gdb_api.create_gdb_process();
156162

157163
SECTION("breakpoint is hit")
@@ -172,24 +178,73 @@ TEST_CASE("gdb api test", "[core][memory-analyzer]")
172178
gdb_api.run_gdb_to_breakpoint("checkpoint3"), gdb_interaction_exceptiont);
173179
}
174180

175-
SECTION("query memory")
181+
SECTION("query variables, primitive types")
176182
{
177183
const bool r = gdb_api.run_gdb_to_breakpoint("checkpoint");
178184
REQUIRE(r);
179185

180186
REQUIRE(gdb_api.get_value("x") == "8");
181-
REQUIRE(gdb_api.get_value("s") == "abc");
187+
REQUIRE(gdb_api.get_value("y") == "2.5");
188+
REQUIRE(gdb_api.get_value("z") == "c");
189+
}
190+
191+
SECTION("query pointers")
192+
{
193+
const bool r = gdb_api.run_gdb_to_breakpoint("checkpoint");
194+
REQUIRE(r);
195+
196+
{
197+
auto value = gdb_api.get_memory("s");
198+
REQUIRE(std::regex_match(value.address, hex_addr));
199+
REQUIRE(value.pointee.empty());
200+
REQUIRE(value.character.empty());
201+
REQUIRE(*value.string == "abc");
202+
}
203+
204+
{
205+
auto value = gdb_api.get_memory("p");
206+
REQUIRE(std::regex_match(value.address, hex_addr));
207+
REQUIRE(value.pointee == "x");
208+
REQUIRE(value.character.empty());
209+
REQUIRE(!value.string);
210+
}
211+
212+
{
213+
auto value = gdb_api.get_memory("vp");
214+
REQUIRE(std::regex_match(value.address, hex_addr));
215+
REQUIRE(value.pointee == "x");
216+
REQUIRE(value.character.empty());
217+
REQUIRE(!value.string);
218+
}
182219

183-
const std::regex regex(R"(0x[1-9a-f][0-9a-f]*)");
220+
{
221+
auto value = gdb_api.get_memory("np");
222+
REQUIRE(value.address == "0x0");
223+
REQUIRE(value.pointee.empty());
224+
REQUIRE(value.character.empty());
225+
REQUIRE(!value.string);
226+
}
184227

185228
{
186-
std::string address = gdb_api.get_memory("p");
187-
REQUIRE(std::regex_match(address, regex));
229+
auto value = gdb_api.get_memory("vp_string");
230+
REQUIRE(std::regex_match(value.address, hex_addr));
231+
REQUIRE(value.pointee.empty());
232+
REQUIRE(value.character.empty());
233+
REQUIRE(!value.string);
188234
}
235+
}
236+
237+
SECTION("query expressions")
238+
{
239+
const bool r = gdb_api.run_gdb_to_breakpoint("checkpoint");
240+
REQUIRE(r);
189241

190242
{
191-
std::string address = gdb_api.get_memory("vp");
192-
REQUIRE(std::regex_match(address, regex));
243+
auto value = gdb_api.get_memory("&x");
244+
REQUIRE(std::regex_match(value.address, hex_addr));
245+
REQUIRE(value.pointee == "x");
246+
REQUIRE(value.character.empty());
247+
REQUIRE(!value.string);
193248
}
194249
}
195250

unit/memory-analyzer/test.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
int x;
2+
float y;
3+
char z;
4+
25
char *s = "abc";
36
int *p;
47
void *vp;
8+
int *np = 0;
9+
void *vp_string;
510

611
void checkpoint()
712
{
@@ -18,8 +23,12 @@ void func()
1823
int main()
1924
{
2025
x = 8;
26+
y = 2.5;
27+
z = 'c';
28+
2129
p = &x;
2230
vp = (void *)&x;
31+
vp_string = s;
2332

2433
checkpoint();
2534

0 commit comments

Comments
 (0)