Skip to content

Commit 4c78d43

Browse files
committed
feat: show location info in parse errors
- add consistent Display for Location - show (optional) token location in parse errors - keep Location line = 0 as noop
1 parent 4c3a4ad commit 4c78d43

File tree

2 files changed

+63
-49
lines changed

2 files changed

+63
-49
lines changed

src/parser/mod.rs

+38-31
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ pub enum ParserError {
4444

4545
// Use `Parser::expected` instead, if possible
4646
macro_rules! parser_err {
47-
($MSG:expr) => {
48-
Err(ParserError::ParserError($MSG.to_string()))
47+
($MSG:expr, $loc:expr) => {
48+
Err(ParserError::ParserError(format!("{}{}", $MSG, $loc)))
4949
};
5050
}
5151

@@ -724,6 +724,7 @@ impl<'a> Parser<'a> {
724724
// Note also that naively `SELECT date` looks like a syntax error because the `date` type
725725
// name is not followed by a string literal, but in fact in PostgreSQL it is a valid
726726
// expression that should parse as the column name "date".
727+
let loc = self.peek_token().location;
727728
return_ok_if_some!(self.maybe_parse(|parser| {
728729
match parser.parse_data_type()? {
729730
DataType::Interval => parser.parse_interval(),
@@ -734,7 +735,7 @@ impl<'a> Parser<'a> {
734735
// name, resulting in `NOT 'a'` being recognized as a `TypedString` instead of
735736
// an unary negation `NOT ('a' LIKE 'b')`. To solve this, we don't accept the
736737
// `type 'string'` syntax for the custom data types at all.
737-
DataType::Custom(..) => parser_err!("dummy"),
738+
DataType::Custom(..) => parser_err!("dummy", loc),
738739
data_type => Ok(Expr::TypedString {
739740
data_type,
740741
value: parser.parse_literal_string()?,
@@ -909,7 +910,7 @@ impl<'a> Parser<'a> {
909910
let tok = self.next_token();
910911
let key = match tok.token {
911912
Token::Word(word) => word.to_ident(),
912-
_ => return parser_err!(format!("Expected identifier, found: {tok}")),
913+
_ => return parser_err!(format!("Expected identifier, found: {tok}"), tok.location),
913914
};
914915
Ok(Expr::CompositeAccess {
915916
expr: Box::new(expr),
@@ -1220,7 +1221,7 @@ impl<'a> Parser<'a> {
12201221
r#in: Box::new(from),
12211222
})
12221223
} else {
1223-
parser_err!("Position function must include IN keyword".to_string())
1224+
parser_err!("Position function must include IN keyword".to_string(), self.peek_token().location)
12241225
}
12251226
}
12261227

@@ -1884,7 +1885,7 @@ impl<'a> Parser<'a> {
18841885
}
18851886
}
18861887
// Can only happen if `get_next_precedence` got out of sync with this function
1887-
_ => parser_err!(format!("No infix parser for token {:?}", tok.token)),
1888+
_ => parser_err!(format!("No infix parser for token {:?}", tok.token), tok.location),
18881889
}
18891890
} else if Token::DoubleColon == tok {
18901891
self.parse_pg_cast(expr)
@@ -1935,7 +1936,7 @@ impl<'a> Parser<'a> {
19351936
})
19361937
} else {
19371938
// Can only happen if `get_next_precedence` got out of sync with this function
1938-
parser_err!(format!("No infix parser for token {:?}", tok.token))
1939+
parser_err!(format!("No infix parser for token {:?}", tok.token), tok.location)
19391940
}
19401941
}
19411942

@@ -2213,7 +2214,7 @@ impl<'a> Parser<'a> {
22132214

22142215
/// Report unexpected token
22152216
pub fn expected<T>(&self, expected: &str, found: TokenWithLocation) -> Result<T, ParserError> {
2216-
parser_err!(format!("Expected {expected}, found: {found}"))
2217+
parser_err!(format!("Expected {expected}, found: {found}"), found.location)
22172218
}
22182219

22192220
/// Look for an expected keyword and consume it if it exists
@@ -2378,13 +2379,14 @@ impl<'a> Parser<'a> {
23782379
/// Parse either `ALL`, `DISTINCT` or `DISTINCT ON (...)`. Returns `None` if `ALL` is parsed
23792380
/// and results in a `ParserError` if both `ALL` and `DISTINCT` are found.
23802381
pub fn parse_all_or_distinct(&mut self) -> Result<Option<Distinct>, ParserError> {
2382+
let loc = self.peek_token().location;
23812383
let all = self.parse_keyword(Keyword::ALL);
23822384
let distinct = self.parse_keyword(Keyword::DISTINCT);
23832385
if !distinct {
23842386
return Ok(None);
23852387
}
23862388
if all {
2387-
return parser_err!("Cannot specify both ALL and DISTINCT".to_string());
2389+
return parser_err!("Cannot specify both ALL and DISTINCT".to_string(), loc);
23882390
}
23892391
let on = self.parse_keyword(Keyword::ON);
23902392
if !on {
@@ -2985,75 +2987,76 @@ impl<'a> Parser<'a> {
29852987
let mut user = vec![];
29862988
let mut admin = vec![];
29872989

2988-
while let Some(keyword) = self.parse_one_of_keywords(&optional_keywords) {
2990+
while let Some(keyword) = self.parse_one_of_keywords(&optional_keywords) {
2991+
let loc = self.tokens.get(self.index - 1).map_or(Location { line: 0, column: 0 }, |t| t.location);
29892992
match keyword {
29902993
Keyword::AUTHORIZATION => {
29912994
if authorization_owner.is_some() {
2992-
parser_err!("Found multiple AUTHORIZATION")
2995+
parser_err!("Found multiple AUTHORIZATION", loc)
29932996
} else {
29942997
authorization_owner = Some(self.parse_object_name()?);
29952998
Ok(())
29962999
}
29973000
}
29983001
Keyword::LOGIN | Keyword::NOLOGIN => {
29993002
if login.is_some() {
3000-
parser_err!("Found multiple LOGIN or NOLOGIN")
3003+
parser_err!("Found multiple LOGIN or NOLOGIN", loc)
30013004
} else {
30023005
login = Some(keyword == Keyword::LOGIN);
30033006
Ok(())
30043007
}
30053008
}
30063009
Keyword::INHERIT | Keyword::NOINHERIT => {
30073010
if inherit.is_some() {
3008-
parser_err!("Found multiple INHERIT or NOINHERIT")
3011+
parser_err!("Found multiple INHERIT or NOINHERIT", loc)
30093012
} else {
30103013
inherit = Some(keyword == Keyword::INHERIT);
30113014
Ok(())
30123015
}
30133016
}
30143017
Keyword::BYPASSRLS | Keyword::NOBYPASSRLS => {
30153018
if bypassrls.is_some() {
3016-
parser_err!("Found multiple BYPASSRLS or NOBYPASSRLS")
3019+
parser_err!("Found multiple BYPASSRLS or NOBYPASSRLS", loc)
30173020
} else {
30183021
bypassrls = Some(keyword == Keyword::BYPASSRLS);
30193022
Ok(())
30203023
}
30213024
}
30223025
Keyword::CREATEDB | Keyword::NOCREATEDB => {
30233026
if create_db.is_some() {
3024-
parser_err!("Found multiple CREATEDB or NOCREATEDB")
3027+
parser_err!("Found multiple CREATEDB or NOCREATEDB", loc)
30253028
} else {
30263029
create_db = Some(keyword == Keyword::CREATEDB);
30273030
Ok(())
30283031
}
30293032
}
30303033
Keyword::CREATEROLE | Keyword::NOCREATEROLE => {
30313034
if create_role.is_some() {
3032-
parser_err!("Found multiple CREATEROLE or NOCREATEROLE")
3035+
parser_err!("Found multiple CREATEROLE or NOCREATEROLE", loc)
30333036
} else {
30343037
create_role = Some(keyword == Keyword::CREATEROLE);
30353038
Ok(())
30363039
}
30373040
}
30383041
Keyword::SUPERUSER | Keyword::NOSUPERUSER => {
30393042
if superuser.is_some() {
3040-
parser_err!("Found multiple SUPERUSER or NOSUPERUSER")
3043+
parser_err!("Found multiple SUPERUSER or NOSUPERUSER", loc)
30413044
} else {
30423045
superuser = Some(keyword == Keyword::SUPERUSER);
30433046
Ok(())
30443047
}
30453048
}
30463049
Keyword::REPLICATION | Keyword::NOREPLICATION => {
30473050
if replication.is_some() {
3048-
parser_err!("Found multiple REPLICATION or NOREPLICATION")
3051+
parser_err!("Found multiple REPLICATION or NOREPLICATION", loc)
30493052
} else {
30503053
replication = Some(keyword == Keyword::REPLICATION);
30513054
Ok(())
30523055
}
30533056
}
30543057
Keyword::PASSWORD => {
30553058
if password.is_some() {
3056-
parser_err!("Found multiple PASSWORD")
3059+
parser_err!("Found multiple PASSWORD", loc)
30573060
} else {
30583061
password = if self.parse_keyword(Keyword::NULL) {
30593062
Some(Password::NullPassword)
@@ -3066,7 +3069,7 @@ impl<'a> Parser<'a> {
30663069
Keyword::CONNECTION => {
30673070
self.expect_keyword(Keyword::LIMIT)?;
30683071
if connection_limit.is_some() {
3069-
parser_err!("Found multiple CONNECTION LIMIT")
3072+
parser_err!("Found multiple CONNECTION LIMIT", loc)
30703073
} else {
30713074
connection_limit = Some(Expr::Value(self.parse_number_value()?));
30723075
Ok(())
@@ -3075,7 +3078,7 @@ impl<'a> Parser<'a> {
30753078
Keyword::VALID => {
30763079
self.expect_keyword(Keyword::UNTIL)?;
30773080
if valid_until.is_some() {
3078-
parser_err!("Found multiple VALID UNTIL")
3081+
parser_err!("Found multiple VALID UNTIL", loc)
30793082
} else {
30803083
valid_until = Some(Expr::Value(self.parse_value()?));
30813084
Ok(())
@@ -3084,14 +3087,14 @@ impl<'a> Parser<'a> {
30843087
Keyword::IN => {
30853088
if self.parse_keyword(Keyword::ROLE) {
30863089
if !in_role.is_empty() {
3087-
parser_err!("Found multiple IN ROLE")
3090+
parser_err!("Found multiple IN ROLE", loc)
30883091
} else {
30893092
in_role = self.parse_comma_separated(Parser::parse_identifier)?;
30903093
Ok(())
30913094
}
30923095
} else if self.parse_keyword(Keyword::GROUP) {
30933096
if !in_group.is_empty() {
3094-
parser_err!("Found multiple IN GROUP")
3097+
parser_err!("Found multiple IN GROUP", loc)
30953098
} else {
30963099
in_group = self.parse_comma_separated(Parser::parse_identifier)?;
30973100
Ok(())
@@ -3102,23 +3105,23 @@ impl<'a> Parser<'a> {
31023105
}
31033106
Keyword::ROLE => {
31043107
if !role.is_empty() {
3105-
parser_err!("Found multiple ROLE")
3108+
parser_err!("Found multiple ROLE", loc)
31063109
} else {
31073110
role = self.parse_comma_separated(Parser::parse_identifier)?;
31083111
Ok(())
31093112
}
31103113
}
31113114
Keyword::USER => {
31123115
if !user.is_empty() {
3113-
parser_err!("Found multiple USER")
3116+
parser_err!("Found multiple USER", loc)
31143117
} else {
31153118
user = self.parse_comma_separated(Parser::parse_identifier)?;
31163119
Ok(())
31173120
}
31183121
}
31193122
Keyword::ADMIN => {
31203123
if !admin.is_empty() {
3121-
parser_err!("Found multiple ADMIN")
3124+
parser_err!("Found multiple ADMIN", loc)
31223125
} else {
31233126
admin = self.parse_comma_separated(Parser::parse_identifier)?;
31243127
Ok(())
@@ -3181,14 +3184,16 @@ impl<'a> Parser<'a> {
31813184
// specifying multiple objects to delete in a single statement
31823185
let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]);
31833186
let names = self.parse_comma_separated(Parser::parse_object_name)?;
3187+
3188+
let loc = self.peek_token().location;
31843189
let cascade = self.parse_keyword(Keyword::CASCADE);
31853190
let restrict = self.parse_keyword(Keyword::RESTRICT);
31863191
let purge = self.parse_keyword(Keyword::PURGE);
31873192
if cascade && restrict {
3188-
return parser_err!("Cannot specify both CASCADE and RESTRICT in DROP");
3193+
return parser_err!("Cannot specify both CASCADE and RESTRICT in DROP", loc);
31893194
}
31903195
if object_type == ObjectType::Role && (cascade || restrict || purge) {
3191-
return parser_err!("Cannot specify CASCADE, RESTRICT, or PURGE in DROP ROLE");
3196+
return parser_err!("Cannot specify CASCADE, RESTRICT, or PURGE in DROP ROLE", loc);
31923197
}
31933198
Ok(Statement::Drop {
31943199
object_type,
@@ -4446,7 +4451,8 @@ impl<'a> Parser<'a> {
44464451
fn parse_literal_char(&mut self) -> Result<char, ParserError> {
44474452
let s = self.parse_literal_string()?;
44484453
if s.len() != 1 {
4449-
return parser_err!(format!("Expect a char, found {s:?}"));
4454+
let loc = self.tokens.get(self.index - 1).map_or(Location { line: 0, column: 0 }, |t| t.location);
4455+
return parser_err!(format!("Expect a char, found {s:?}"), loc);
44504456
}
44514457
Ok(s.chars().next().unwrap())
44524458
}
@@ -4525,7 +4531,7 @@ impl<'a> Parser<'a> {
45254531
// (i.e., it returns the input string).
45264532
Token::Number(ref n, l) => match n.parse() {
45274533
Ok(n) => Ok(Value::Number(n, l)),
4528-
Err(e) => parser_err!(format!("Could not parse '{n}' as number: {e}")),
4534+
Err(e) => parser_err!(format!("Could not parse '{n}' as number: {e}"), location),
45294535
},
45304536
Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())),
45314537
Token::DoubleQuotedString(ref s) => Ok(Value::DoubleQuotedString(s.to_string())),
@@ -6465,10 +6471,11 @@ impl<'a> Parser<'a> {
64656471
.parse_keywords(&[Keyword::GRANTED, Keyword::BY])
64666472
.then(|| self.parse_identifier().unwrap());
64676473

6474+
let loc = self.peek_token().location;
64686475
let cascade = self.parse_keyword(Keyword::CASCADE);
64696476
let restrict = self.parse_keyword(Keyword::RESTRICT);
64706477
if cascade && restrict {
6471-
return parser_err!("Cannot specify both CASCADE and RESTRICT in REVOKE");
6478+
return parser_err!("Cannot specify both CASCADE and RESTRICT in REVOKE", loc);
64726479
}
64736480

64746481
Ok(Statement::Revoke {

src/tokenizer.rs

+25-18
Original file line numberDiff line numberDiff line change
@@ -350,14 +350,28 @@ impl fmt::Display for Whitespace {
350350
}
351351

352352
/// Location in input string
353-
#[derive(Debug, Eq, PartialEq, Clone)]
353+
#[derive(Debug, Eq, PartialEq, Clone, Copy)]
354354
pub struct Location {
355355
/// Line number, starting from 1
356356
pub line: u64,
357357
/// Line column, starting from 1
358358
pub column: u64,
359359
}
360360

361+
impl fmt::Display for Location {
362+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
363+
if self.line == 0 {
364+
return Ok(());
365+
}
366+
write!(
367+
f,
368+
// TODO: use standard compiler location syntax (<path>:<line>:<col>)
369+
" at Line: {}, Column {}",
370+
self.line, self.column,
371+
)
372+
}
373+
}
374+
361375
/// A [Token] with [Location] attached to it
362376
#[derive(Debug, Eq, PartialEq, Clone)]
363377
pub struct TokenWithLocation {
@@ -400,17 +414,12 @@ impl fmt::Display for TokenWithLocation {
400414
#[derive(Debug, PartialEq, Eq)]
401415
pub struct TokenizerError {
402416
pub message: String,
403-
pub line: u64,
404-
pub col: u64,
417+
pub location: Location,
405418
}
406419

407420
impl fmt::Display for TokenizerError {
408421
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
409-
write!(
410-
f,
411-
"{} at Line: {}, Column {}",
412-
self.message, self.line, self.col
413-
)
422+
write!(f, "{}{}", self.message, self.location,)
414423
}
415424
}
416425

@@ -1122,8 +1131,7 @@ impl<'a> Tokenizer<'a> {
11221131
) -> Result<R, TokenizerError> {
11231132
Err(TokenizerError {
11241133
message: message.into(),
1125-
col: loc.column,
1126-
line: loc.line,
1134+
location: loc,
11271135
})
11281136
}
11291137

@@ -1368,8 +1376,7 @@ mod tests {
13681376
fn tokenizer_error_impl() {
13691377
let err = TokenizerError {
13701378
message: "test".into(),
1371-
line: 1,
1372-
col: 1,
1379+
location: Location { line: 1, column: 1 },
13731380
};
13741381
#[cfg(feature = "std")]
13751382
{
@@ -1694,8 +1701,7 @@ mod tests {
16941701
tokenizer.tokenize(),
16951702
Err(TokenizerError {
16961703
message: "Unterminated string literal".to_string(),
1697-
line: 1,
1698-
col: 8
1704+
location: Location { line: 1, column: 8 },
16991705
})
17001706
);
17011707
}
@@ -1710,8 +1716,10 @@ mod tests {
17101716
tokenizer.tokenize(),
17111717
Err(TokenizerError {
17121718
message: "Unterminated string literal".to_string(),
1713-
line: 1,
1714-
col: 35
1719+
location: Location {
1720+
line: 1,
1721+
column: 35
1722+
}
17151723
})
17161724
);
17171725
}
@@ -1873,8 +1881,7 @@ mod tests {
18731881
tokenizer.tokenize(),
18741882
Err(TokenizerError {
18751883
message: "Expected close delimiter '\"' before EOF.".to_string(),
1876-
line: 1,
1877-
col: 1
1884+
location: Location { line: 1, column: 1 },
18781885
})
18791886
);
18801887
}

0 commit comments

Comments
 (0)