Skip to content

Commit 34643aa

Browse files
author
Daniel Kroening
committed
smt2 parser: track parenthesis level
The tokenizer is changed to split reading tokens and consuming them; the parser counts the parentheses.
1 parent f0c59b5 commit 34643aa

File tree

4 files changed

+86
-21
lines changed

4 files changed

+86
-21
lines changed

src/solvers/smt2/smt2_parser.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,25 @@ Author: Daniel Kroening, [email protected]
1212

1313
#include <util/arith_tools.h>
1414

15+
smt2_parsert::tokent smt2_parsert::next_token()
16+
{
17+
const auto token = smt2_tokenizert::next_token();
18+
19+
if(token == OPEN)
20+
parenthesis_level++;
21+
else if(token == CLOSE)
22+
parenthesis_level--;
23+
24+
return token;
25+
}
26+
27+
void smt2_parsert::skip_to_end_of_list()
28+
{
29+
while(parenthesis_level > 0)
30+
if(next_token() == END_OF_FILE)
31+
return;
32+
}
33+
1534
void smt2_parsert::command_sequence()
1635
{
1736
exit=false;

src/solvers/smt2/smt2_parser.h

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,8 @@ Author: Daniel Kroening, [email protected]
1818
class smt2_parsert:public smt2_tokenizert
1919
{
2020
public:
21-
explicit smt2_parsert(std::istream &_in):
22-
smt2_tokenizert(_in),
23-
exit(false)
21+
explicit smt2_parsert(std::istream &_in)
22+
: smt2_tokenizert(_in), exit(false), parenthesis_level(0)
2423
{
2524
}
2625

@@ -43,8 +42,25 @@ class smt2_parsert:public smt2_tokenizert
4342
using id_mapt=std::map<irep_idt, idt>;
4443
id_mapt id_map;
4544

45+
struct smt2_format
46+
{
47+
explicit smt2_format(const typet &_type) : type(_type)
48+
{
49+
}
50+
51+
const typet type;
52+
};
53+
54+
/// This skips tokens until all bracketed expressions are closed
55+
void skip_to_end_of_list();
56+
4657
protected:
4758
bool exit;
59+
60+
// we override next_token to track the parenthesis level
61+
std::size_t parenthesis_level;
62+
tokent next_token() override;
63+
4864
void command_sequence();
4965

5066
virtual void command(const std::string &);

src/solvers/smt2/smt2_tokenizer.cpp

Lines changed: 37 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -198,8 +198,15 @@ smt2_tokenizert::tokent smt2_tokenizert::get_string_literal()
198198
smt2_tokenizert::tokent smt2_tokenizert::next_token()
199199
{
200200
if(peeked)
201-
return peeked=false, token;
201+
peeked = false;
202+
else
203+
get_token_from_stream();
204+
205+
return token;
206+
}
202207

208+
void smt2_tokenizert::get_token_from_stream()
209+
{
203210
char ch;
204211

205212
while(in->get(ch))
@@ -231,60 +238,76 @@ smt2_tokenizert::tokent smt2_tokenizert::next_token()
231238

232239
case '(':
233240
// produce sub-expression
234-
return token=OPEN;
241+
token = OPEN;
242+
return;
235243

236244
case ')':
237245
// done with sub-expression
238-
return token=CLOSE;
246+
token = CLOSE;
247+
return;
239248

240249
case '|': // quoted symbol
241-
return token=get_quoted_symbol();
250+
token = get_quoted_symbol();
251+
return;
242252

243253
case '"': // string literal
244-
return token=get_string_literal();
254+
token = get_string_literal();
255+
return;
245256

246257
case ':': // keyword
247-
return token=get_simple_symbol();
258+
token = get_simple_symbol();
259+
return;
248260

249261
case '#':
250262
if(in->get(ch))
251263
{
252264
if(ch=='b')
253-
return token=get_bin_numeral();
265+
{
266+
token = get_bin_numeral();
267+
return;
268+
}
254269
else if(ch=='x')
255-
return token=get_hex_numeral();
270+
{
271+
token = get_hex_numeral();
272+
return;
273+
}
256274
else
257275
{
258276
error() << "unknown numeral token" << eom;
259-
return token=ERROR;
277+
token=ERROR;
278+
return;
260279
}
261280
}
262281
else
263282
{
264283
error() << "unexpected EOF in numeral token" << eom;
265-
return token=ERROR;
284+
token=ERROR;
285+
return;
266286
}
267287
break;
268288

269289
default: // likely a simple symbol or a numeral
270290
if(isdigit(ch))
271291
{
272292
in->unget();
273-
return token=get_decimal_numeral();
293+
token = get_decimal_numeral();
294+
return;
274295
}
275296
else if(is_simple_symbol_character(ch))
276297
{
277298
in->unget();
278-
return token=get_simple_symbol();
299+
token = get_simple_symbol();
300+
return;
279301
}
280302
else
281303
{
282304
// illegal character, error
283305
error() << "unexpected character `" << ch << '\'' << eom;
284-
return token=ERROR;
306+
token=ERROR;
307+
return;
285308
}
286309
}
287310
}
288311

289-
return token=END_OF_FILE;
312+
token = END_OF_FILE;
290313
}

src/solvers/smt2/smt2_tokenizer.h

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ Author: Daniel Kroening, [email protected]
1616
class smt2_tokenizert:public parsert
1717
{
1818
public:
19-
explicit smt2_tokenizert(std::istream &_in):
20-
ok(true), peeked(false), token(NONE)
19+
explicit smt2_tokenizert(std::istream &_in)
20+
: ok(true), peeked(false), token(NONE)
2121
{
2222
in=&_in;
2323
line_no=1;
@@ -35,15 +35,15 @@ class smt2_tokenizert:public parsert
3535
NUMERAL, SYMBOL, OPEN, CLOSE };
3636
tokent token;
3737

38-
tokent next_token();
38+
virtual tokent next_token();
3939

4040
tokent peek()
4141
{
4242
if(peeked)
4343
return token;
4444
else
4545
{
46-
next_token();
46+
get_token_from_stream();
4747
peeked=true;
4848
return token;
4949
}
@@ -56,6 +56,10 @@ class smt2_tokenizert:public parsert
5656
return messaget::error();
5757
}
5858

59+
/// skip any tokens until all parentheses are closed
60+
/// or the end of file is reached
61+
void skip_to_end_of_list();
62+
5963
private:
6064
tokent get_decimal_numeral();
6165
tokent get_hex_numeral();
@@ -64,6 +68,9 @@ class smt2_tokenizert:public parsert
6468
tokent get_quoted_symbol();
6569
tokent get_string_literal();
6670
static bool is_simple_symbol_character(char);
71+
72+
/// read a token from the input stream and store it in 'token'
73+
void get_token_from_stream();
6774
};
6875

6976
#endif // CPROVER_SOLVERS_SMT2_SMT2_PARSER_H

0 commit comments

Comments
 (0)