Skip to content

Commit b02c3f8

Browse files
authored
feat: show location info in parse errors (#958)
1 parent 4c3a4ad commit b02c3f8

File tree

4 files changed

+116
-71
lines changed

4 files changed

+116
-71
lines changed

src/parser/mod.rs

+66-35
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ pub enum ParserError {
4444

4545
// Use `Parser::expected` instead, if possible
4646
macro_rules! parser_err {
47-
($MSG:expr) => {
48-
Err(ParserError::ParserError($MSG.to_string()))
47+
($MSG:expr, $loc:expr) => {
48+
Err(ParserError::ParserError(format!("{}{}", $MSG, $loc)))
4949
};
5050
}
5151

@@ -368,8 +368,8 @@ impl<'a> Parser<'a> {
368368
debug!("Parsing sql '{}'...", sql);
369369
let tokens = Tokenizer::new(self.dialect, sql)
370370
.with_unescape(self.options.unescape)
371-
.tokenize()?;
372-
Ok(self.with_tokens(tokens))
371+
.tokenize_with_location()?;
372+
Ok(self.with_tokens_with_locations(tokens))
373373
}
374374

375375
/// Parse potentially multiple statements
@@ -724,6 +724,7 @@ impl<'a> Parser<'a> {
724724
// Note also that naively `SELECT date` looks like a syntax error because the `date` type
725725
// name is not followed by a string literal, but in fact in PostgreSQL it is a valid
726726
// expression that should parse as the column name "date".
727+
let loc = self.peek_token().location;
727728
return_ok_if_some!(self.maybe_parse(|parser| {
728729
match parser.parse_data_type()? {
729730
DataType::Interval => parser.parse_interval(),
@@ -734,7 +735,7 @@ impl<'a> Parser<'a> {
734735
// name, resulting in `NOT 'a'` being recognized as a `TypedString` instead of
735736
// an unary negation `NOT ('a' LIKE 'b')`. To solve this, we don't accept the
736737
// `type 'string'` syntax for the custom data types at all.
737-
DataType::Custom(..) => parser_err!("dummy"),
738+
DataType::Custom(..) => parser_err!("dummy", loc),
738739
data_type => Ok(Expr::TypedString {
739740
data_type,
740741
value: parser.parse_literal_string()?,
@@ -909,7 +910,12 @@ impl<'a> Parser<'a> {
909910
let tok = self.next_token();
910911
let key = match tok.token {
911912
Token::Word(word) => word.to_ident(),
912-
_ => return parser_err!(format!("Expected identifier, found: {tok}")),
913+
_ => {
914+
return parser_err!(
915+
format!("Expected identifier, found: {tok}"),
916+
tok.location
917+
)
918+
}
913919
};
914920
Ok(Expr::CompositeAccess {
915921
expr: Box::new(expr),
@@ -1220,7 +1226,10 @@ impl<'a> Parser<'a> {
12201226
r#in: Box::new(from),
12211227
})
12221228
} else {
1223-
parser_err!("Position function must include IN keyword".to_string())
1229+
parser_err!(
1230+
"Position function must include IN keyword".to_string(),
1231+
self.peek_token().location
1232+
)
12241233
}
12251234
}
12261235

@@ -1884,7 +1893,10 @@ impl<'a> Parser<'a> {
18841893
}
18851894
}
18861895
// Can only happen if `get_next_precedence` got out of sync with this function
1887-
_ => parser_err!(format!("No infix parser for token {:?}", tok.token)),
1896+
_ => parser_err!(
1897+
format!("No infix parser for token {:?}", tok.token),
1898+
tok.location
1899+
),
18881900
}
18891901
} else if Token::DoubleColon == tok {
18901902
self.parse_pg_cast(expr)
@@ -1935,7 +1947,10 @@ impl<'a> Parser<'a> {
19351947
})
19361948
} else {
19371949
// Can only happen if `get_next_precedence` got out of sync with this function
1938-
parser_err!(format!("No infix parser for token {:?}", tok.token))
1950+
parser_err!(
1951+
format!("No infix parser for token {:?}", tok.token),
1952+
tok.location
1953+
)
19391954
}
19401955
}
19411956

@@ -2213,7 +2228,10 @@ impl<'a> Parser<'a> {
22132228

22142229
/// Report unexpected token
22152230
pub fn expected<T>(&self, expected: &str, found: TokenWithLocation) -> Result<T, ParserError> {
2216-
parser_err!(format!("Expected {expected}, found: {found}"))
2231+
parser_err!(
2232+
format!("Expected {expected}, found: {found}"),
2233+
found.location
2234+
)
22172235
}
22182236

22192237
/// Look for an expected keyword and consume it if it exists
@@ -2378,13 +2396,14 @@ impl<'a> Parser<'a> {
23782396
/// Parse either `ALL`, `DISTINCT` or `DISTINCT ON (...)`. Returns `None` if `ALL` is parsed
23792397
/// and results in a `ParserError` if both `ALL` and `DISTINCT` are found.
23802398
pub fn parse_all_or_distinct(&mut self) -> Result<Option<Distinct>, ParserError> {
2399+
let loc = self.peek_token().location;
23812400
let all = self.parse_keyword(Keyword::ALL);
23822401
let distinct = self.parse_keyword(Keyword::DISTINCT);
23832402
if !distinct {
23842403
return Ok(None);
23852404
}
23862405
if all {
2387-
return parser_err!("Cannot specify both ALL and DISTINCT".to_string());
2406+
return parser_err!("Cannot specify both ALL and DISTINCT".to_string(), loc);
23882407
}
23892408
let on = self.parse_keyword(Keyword::ON);
23902409
if !on {
@@ -2986,74 +3005,78 @@ impl<'a> Parser<'a> {
29863005
let mut admin = vec![];
29873006

29883007
while let Some(keyword) = self.parse_one_of_keywords(&optional_keywords) {
3008+
let loc = self
3009+
.tokens
3010+
.get(self.index - 1)
3011+
.map_or(Location { line: 0, column: 0 }, |t| t.location);
29893012
match keyword {
29903013
Keyword::AUTHORIZATION => {
29913014
if authorization_owner.is_some() {
2992-
parser_err!("Found multiple AUTHORIZATION")
3015+
parser_err!("Found multiple AUTHORIZATION", loc)
29933016
} else {
29943017
authorization_owner = Some(self.parse_object_name()?);
29953018
Ok(())
29963019
}
29973020
}
29983021
Keyword::LOGIN | Keyword::NOLOGIN => {
29993022
if login.is_some() {
3000-
parser_err!("Found multiple LOGIN or NOLOGIN")
3023+
parser_err!("Found multiple LOGIN or NOLOGIN", loc)
30013024
} else {
30023025
login = Some(keyword == Keyword::LOGIN);
30033026
Ok(())
30043027
}
30053028
}
30063029
Keyword::INHERIT | Keyword::NOINHERIT => {
30073030
if inherit.is_some() {
3008-
parser_err!("Found multiple INHERIT or NOINHERIT")
3031+
parser_err!("Found multiple INHERIT or NOINHERIT", loc)
30093032
} else {
30103033
inherit = Some(keyword == Keyword::INHERIT);
30113034
Ok(())
30123035
}
30133036
}
30143037
Keyword::BYPASSRLS | Keyword::NOBYPASSRLS => {
30153038
if bypassrls.is_some() {
3016-
parser_err!("Found multiple BYPASSRLS or NOBYPASSRLS")
3039+
parser_err!("Found multiple BYPASSRLS or NOBYPASSRLS", loc)
30173040
} else {
30183041
bypassrls = Some(keyword == Keyword::BYPASSRLS);
30193042
Ok(())
30203043
}
30213044
}
30223045
Keyword::CREATEDB | Keyword::NOCREATEDB => {
30233046
if create_db.is_some() {
3024-
parser_err!("Found multiple CREATEDB or NOCREATEDB")
3047+
parser_err!("Found multiple CREATEDB or NOCREATEDB", loc)
30253048
} else {
30263049
create_db = Some(keyword == Keyword::CREATEDB);
30273050
Ok(())
30283051
}
30293052
}
30303053
Keyword::CREATEROLE | Keyword::NOCREATEROLE => {
30313054
if create_role.is_some() {
3032-
parser_err!("Found multiple CREATEROLE or NOCREATEROLE")
3055+
parser_err!("Found multiple CREATEROLE or NOCREATEROLE", loc)
30333056
} else {
30343057
create_role = Some(keyword == Keyword::CREATEROLE);
30353058
Ok(())
30363059
}
30373060
}
30383061
Keyword::SUPERUSER | Keyword::NOSUPERUSER => {
30393062
if superuser.is_some() {
3040-
parser_err!("Found multiple SUPERUSER or NOSUPERUSER")
3063+
parser_err!("Found multiple SUPERUSER or NOSUPERUSER", loc)
30413064
} else {
30423065
superuser = Some(keyword == Keyword::SUPERUSER);
30433066
Ok(())
30443067
}
30453068
}
30463069
Keyword::REPLICATION | Keyword::NOREPLICATION => {
30473070
if replication.is_some() {
3048-
parser_err!("Found multiple REPLICATION or NOREPLICATION")
3071+
parser_err!("Found multiple REPLICATION or NOREPLICATION", loc)
30493072
} else {
30503073
replication = Some(keyword == Keyword::REPLICATION);
30513074
Ok(())
30523075
}
30533076
}
30543077
Keyword::PASSWORD => {
30553078
if password.is_some() {
3056-
parser_err!("Found multiple PASSWORD")
3079+
parser_err!("Found multiple PASSWORD", loc)
30573080
} else {
30583081
password = if self.parse_keyword(Keyword::NULL) {
30593082
Some(Password::NullPassword)
@@ -3066,7 +3089,7 @@ impl<'a> Parser<'a> {
30663089
Keyword::CONNECTION => {
30673090
self.expect_keyword(Keyword::LIMIT)?;
30683091
if connection_limit.is_some() {
3069-
parser_err!("Found multiple CONNECTION LIMIT")
3092+
parser_err!("Found multiple CONNECTION LIMIT", loc)
30703093
} else {
30713094
connection_limit = Some(Expr::Value(self.parse_number_value()?));
30723095
Ok(())
@@ -3075,7 +3098,7 @@ impl<'a> Parser<'a> {
30753098
Keyword::VALID => {
30763099
self.expect_keyword(Keyword::UNTIL)?;
30773100
if valid_until.is_some() {
3078-
parser_err!("Found multiple VALID UNTIL")
3101+
parser_err!("Found multiple VALID UNTIL", loc)
30793102
} else {
30803103
valid_until = Some(Expr::Value(self.parse_value()?));
30813104
Ok(())
@@ -3084,14 +3107,14 @@ impl<'a> Parser<'a> {
30843107
Keyword::IN => {
30853108
if self.parse_keyword(Keyword::ROLE) {
30863109
if !in_role.is_empty() {
3087-
parser_err!("Found multiple IN ROLE")
3110+
parser_err!("Found multiple IN ROLE", loc)
30883111
} else {
30893112
in_role = self.parse_comma_separated(Parser::parse_identifier)?;
30903113
Ok(())
30913114
}
30923115
} else if self.parse_keyword(Keyword::GROUP) {
30933116
if !in_group.is_empty() {
3094-
parser_err!("Found multiple IN GROUP")
3117+
parser_err!("Found multiple IN GROUP", loc)
30953118
} else {
30963119
in_group = self.parse_comma_separated(Parser::parse_identifier)?;
30973120
Ok(())
@@ -3102,23 +3125,23 @@ impl<'a> Parser<'a> {
31023125
}
31033126
Keyword::ROLE => {
31043127
if !role.is_empty() {
3105-
parser_err!("Found multiple ROLE")
3128+
parser_err!("Found multiple ROLE", loc)
31063129
} else {
31073130
role = self.parse_comma_separated(Parser::parse_identifier)?;
31083131
Ok(())
31093132
}
31103133
}
31113134
Keyword::USER => {
31123135
if !user.is_empty() {
3113-
parser_err!("Found multiple USER")
3136+
parser_err!("Found multiple USER", loc)
31143137
} else {
31153138
user = self.parse_comma_separated(Parser::parse_identifier)?;
31163139
Ok(())
31173140
}
31183141
}
31193142
Keyword::ADMIN => {
31203143
if !admin.is_empty() {
3121-
parser_err!("Found multiple ADMIN")
3144+
parser_err!("Found multiple ADMIN", loc)
31223145
} else {
31233146
admin = self.parse_comma_separated(Parser::parse_identifier)?;
31243147
Ok(())
@@ -3181,14 +3204,19 @@ impl<'a> Parser<'a> {
31813204
// specifying multiple objects to delete in a single statement
31823205
let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]);
31833206
let names = self.parse_comma_separated(Parser::parse_object_name)?;
3207+
3208+
let loc = self.peek_token().location;
31843209
let cascade = self.parse_keyword(Keyword::CASCADE);
31853210
let restrict = self.parse_keyword(Keyword::RESTRICT);
31863211
let purge = self.parse_keyword(Keyword::PURGE);
31873212
if cascade && restrict {
3188-
return parser_err!("Cannot specify both CASCADE and RESTRICT in DROP");
3213+
return parser_err!("Cannot specify both CASCADE and RESTRICT in DROP", loc);
31893214
}
31903215
if object_type == ObjectType::Role && (cascade || restrict || purge) {
3191-
return parser_err!("Cannot specify CASCADE, RESTRICT, or PURGE in DROP ROLE");
3216+
return parser_err!(
3217+
"Cannot specify CASCADE, RESTRICT, or PURGE in DROP ROLE",
3218+
loc
3219+
);
31923220
}
31933221
Ok(Statement::Drop {
31943222
object_type,
@@ -4446,7 +4474,11 @@ impl<'a> Parser<'a> {
44464474
fn parse_literal_char(&mut self) -> Result<char, ParserError> {
44474475
let s = self.parse_literal_string()?;
44484476
if s.len() != 1 {
4449-
return parser_err!(format!("Expect a char, found {s:?}"));
4477+
let loc = self
4478+
.tokens
4479+
.get(self.index - 1)
4480+
.map_or(Location { line: 0, column: 0 }, |t| t.location);
4481+
return parser_err!(format!("Expect a char, found {s:?}"), loc);
44504482
}
44514483
Ok(s.chars().next().unwrap())
44524484
}
@@ -4525,7 +4557,7 @@ impl<'a> Parser<'a> {
45254557
// (i.e., it returns the input string).
45264558
Token::Number(ref n, l) => match n.parse() {
45274559
Ok(n) => Ok(Value::Number(n, l)),
4528-
Err(e) => parser_err!(format!("Could not parse '{n}' as number: {e}")),
4560+
Err(e) => parser_err!(format!("Could not parse '{n}' as number: {e}"), location),
45294561
},
45304562
Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())),
45314563
Token::DoubleQuotedString(ref s) => Ok(Value::DoubleQuotedString(s.to_string())),
@@ -6465,10 +6497,11 @@ impl<'a> Parser<'a> {
64656497
.parse_keywords(&[Keyword::GRANTED, Keyword::BY])
64666498
.then(|| self.parse_identifier().unwrap());
64676499

6500+
let loc = self.peek_token().location;
64686501
let cascade = self.parse_keyword(Keyword::CASCADE);
64696502
let restrict = self.parse_keyword(Keyword::RESTRICT);
64706503
if cascade && restrict {
6471-
return parser_err!("Cannot specify both CASCADE and RESTRICT in REVOKE");
6504+
return parser_err!("Cannot specify both CASCADE and RESTRICT in REVOKE", loc);
64726505
}
64736506

64746507
Ok(Statement::Revoke {
@@ -7929,14 +7962,12 @@ mod tests {
79297962

79307963
#[test]
79317964
fn test_parser_error_loc() {
7932-
// TODO: Once we thread token locations through the parser, we should update this
7933-
// test to assert the locations of the referenced token
79347965
let sql = "SELECT this is a syntax error";
79357966
let ast = Parser::parse_sql(&GenericDialect, sql);
79367967
assert_eq!(
79377968
ast,
79387969
Err(ParserError::ParserError(
7939-
"Expected [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: a"
7970+
"Expected [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: a at Line: 1, Column 16"
79407971
.to_string()
79417972
))
79427973
);

src/test_utils.rs

+7-1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ use core::fmt::Debug;
2828

2929
use crate::dialect::*;
3030
use crate::parser::{Parser, ParserError};
31+
use crate::tokenizer::Tokenizer;
3132
use crate::{ast::*, parser::ParserOptions};
3233

3334
/// Tests use the methods on this struct to invoke the parser on one or
@@ -82,8 +83,13 @@ impl TestedDialects {
8283
/// the result is the same for all tested dialects.
8384
pub fn parse_sql_statements(&self, sql: &str) -> Result<Vec<Statement>, ParserError> {
8485
self.one_of_identical_results(|dialect| {
86+
let mut tokenizer = Tokenizer::new(dialect, sql);
87+
if let Some(options) = &self.options {
88+
tokenizer = tokenizer.with_unescape(options.unescape);
89+
}
90+
let tokens = tokenizer.tokenize()?;
8591
self.new_parser(dialect)
86-
.try_with_sql(sql)?
92+
.with_tokens(tokens)
8793
.parse_statements()
8894
})
8995
// To fail the `ensure_multiple_dialects_are_tested` test:

0 commit comments

Comments
 (0)