Skip to content

Commit 315ffc7

Browse files
MartinNowakserprex
authored andcommitted
feat: show location info in parse errors (apache#958)
1 parent 5cc9827 commit 315ffc7

File tree

4 files changed

+116
-71
lines changed

4 files changed

+116
-71
lines changed

src/parser/mod.rs

+66-35
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ pub enum ParserError {
4444

4545
// Use `Parser::expected` instead, if possible
4646
macro_rules! parser_err {
47-
($MSG:expr) => {
48-
Err(ParserError::ParserError($MSG.to_string()))
47+
($MSG:expr, $loc:expr) => {
48+
Err(ParserError::ParserError(format!("{}{}", $MSG, $loc)))
4949
};
5050
}
5151

@@ -368,8 +368,8 @@ impl<'a> Parser<'a> {
368368
debug!("Parsing sql '{}'...", sql);
369369
let tokens = Tokenizer::new(self.dialect, sql)
370370
.with_unescape(self.options.unescape)
371-
.tokenize()?;
372-
Ok(self.with_tokens(tokens))
371+
.tokenize_with_location()?;
372+
Ok(self.with_tokens_with_locations(tokens))
373373
}
374374

375375
/// Parse potentially multiple statements
@@ -728,6 +728,7 @@ impl<'a> Parser<'a> {
728728
// Note also that naively `SELECT date` looks like a syntax error because the `date` type
729729
// name is not followed by a string literal, but in fact in PostgreSQL it is a valid
730730
// expression that should parse as the column name "date".
731+
let loc = self.peek_token().location;
731732
return_ok_if_some!(self.maybe_parse(|parser| {
732733
match parser.parse_data_type()? {
733734
DataType::Interval => parser.parse_interval(),
@@ -738,7 +739,7 @@ impl<'a> Parser<'a> {
738739
// name, resulting in `NOT 'a'` being recognized as a `TypedString` instead of
739740
// an unary negation `NOT ('a' LIKE 'b')`. To solve this, we don't accept the
740741
// `type 'string'` syntax for the custom data types at all.
741-
DataType::Custom(..) => parser_err!("dummy"),
742+
DataType::Custom(..) => parser_err!("dummy", loc),
742743
data_type => Ok(Expr::TypedString {
743744
data_type,
744745
value: parser.parse_literal_string()?,
@@ -913,7 +914,12 @@ impl<'a> Parser<'a> {
913914
let tok = self.next_token();
914915
let key = match tok.token {
915916
Token::Word(word) => word.to_ident(),
916-
_ => return parser_err!(format!("Expected identifier, found: {tok}")),
917+
_ => {
918+
return parser_err!(
919+
format!("Expected identifier, found: {tok}"),
920+
tok.location
921+
)
922+
}
917923
};
918924
Ok(Expr::CompositeAccess {
919925
expr: Box::new(expr),
@@ -1224,7 +1230,10 @@ impl<'a> Parser<'a> {
12241230
r#in: Box::new(from),
12251231
})
12261232
} else {
1227-
parser_err!("Position function must include IN keyword".to_string())
1233+
parser_err!(
1234+
"Position function must include IN keyword".to_string(),
1235+
self.peek_token().location
1236+
)
12281237
}
12291238
}
12301239

@@ -1888,7 +1897,10 @@ impl<'a> Parser<'a> {
18881897
}
18891898
}
18901899
// Can only happen if `get_next_precedence` got out of sync with this function
1891-
_ => parser_err!(format!("No infix parser for token {:?}", tok.token)),
1900+
_ => parser_err!(
1901+
format!("No infix parser for token {:?}", tok.token),
1902+
tok.location
1903+
),
18921904
}
18931905
} else if Token::DoubleColon == tok {
18941906
self.parse_pg_cast(expr)
@@ -1939,7 +1951,10 @@ impl<'a> Parser<'a> {
19391951
})
19401952
} else {
19411953
// Can only happen if `get_next_precedence` got out of sync with this function
1942-
parser_err!(format!("No infix parser for token {:?}", tok.token))
1954+
parser_err!(
1955+
format!("No infix parser for token {:?}", tok.token),
1956+
tok.location
1957+
)
19431958
}
19441959
}
19451960

@@ -2217,7 +2232,10 @@ impl<'a> Parser<'a> {
22172232

22182233
/// Report unexpected token
22192234
pub fn expected<T>(&self, expected: &str, found: TokenWithLocation) -> Result<T, ParserError> {
2220-
parser_err!(format!("Expected {expected}, found: {found}"))
2235+
parser_err!(
2236+
format!("Expected {expected}, found: {found}"),
2237+
found.location
2238+
)
22212239
}
22222240

22232241
/// Look for an expected keyword and consume it if it exists
@@ -2382,13 +2400,14 @@ impl<'a> Parser<'a> {
23822400
/// Parse either `ALL`, `DISTINCT` or `DISTINCT ON (...)`. Returns `None` if `ALL` is parsed
23832401
/// and results in a `ParserError` if both `ALL` and `DISTINCT` are found.
23842402
pub fn parse_all_or_distinct(&mut self) -> Result<Option<Distinct>, ParserError> {
2403+
let loc = self.peek_token().location;
23852404
let all = self.parse_keyword(Keyword::ALL);
23862405
let distinct = self.parse_keyword(Keyword::DISTINCT);
23872406
if !distinct {
23882407
return Ok(None);
23892408
}
23902409
if all {
2391-
return parser_err!("Cannot specify both ALL and DISTINCT".to_string());
2410+
return parser_err!("Cannot specify both ALL and DISTINCT".to_string(), loc);
23922411
}
23932412
let on = self.parse_keyword(Keyword::ON);
23942413
if !on {
@@ -2994,74 +3013,78 @@ impl<'a> Parser<'a> {
29943013
let mut admin = vec![];
29953014

29963015
while let Some(keyword) = self.parse_one_of_keywords(&optional_keywords) {
3016+
let loc = self
3017+
.tokens
3018+
.get(self.index - 1)
3019+
.map_or(Location { line: 0, column: 0 }, |t| t.location);
29973020
match keyword {
29983021
Keyword::AUTHORIZATION => {
29993022
if authorization_owner.is_some() {
3000-
parser_err!("Found multiple AUTHORIZATION")
3023+
parser_err!("Found multiple AUTHORIZATION", loc)
30013024
} else {
30023025
authorization_owner = Some(self.parse_object_name()?);
30033026
Ok(())
30043027
}
30053028
}
30063029
Keyword::LOGIN | Keyword::NOLOGIN => {
30073030
if login.is_some() {
3008-
parser_err!("Found multiple LOGIN or NOLOGIN")
3031+
parser_err!("Found multiple LOGIN or NOLOGIN", loc)
30093032
} else {
30103033
login = Some(keyword == Keyword::LOGIN);
30113034
Ok(())
30123035
}
30133036
}
30143037
Keyword::INHERIT | Keyword::NOINHERIT => {
30153038
if inherit.is_some() {
3016-
parser_err!("Found multiple INHERIT or NOINHERIT")
3039+
parser_err!("Found multiple INHERIT or NOINHERIT", loc)
30173040
} else {
30183041
inherit = Some(keyword == Keyword::INHERIT);
30193042
Ok(())
30203043
}
30213044
}
30223045
Keyword::BYPASSRLS | Keyword::NOBYPASSRLS => {
30233046
if bypassrls.is_some() {
3024-
parser_err!("Found multiple BYPASSRLS or NOBYPASSRLS")
3047+
parser_err!("Found multiple BYPASSRLS or NOBYPASSRLS", loc)
30253048
} else {
30263049
bypassrls = Some(keyword == Keyword::BYPASSRLS);
30273050
Ok(())
30283051
}
30293052
}
30303053
Keyword::CREATEDB | Keyword::NOCREATEDB => {
30313054
if create_db.is_some() {
3032-
parser_err!("Found multiple CREATEDB or NOCREATEDB")
3055+
parser_err!("Found multiple CREATEDB or NOCREATEDB", loc)
30333056
} else {
30343057
create_db = Some(keyword == Keyword::CREATEDB);
30353058
Ok(())
30363059
}
30373060
}
30383061
Keyword::CREATEROLE | Keyword::NOCREATEROLE => {
30393062
if create_role.is_some() {
3040-
parser_err!("Found multiple CREATEROLE or NOCREATEROLE")
3063+
parser_err!("Found multiple CREATEROLE or NOCREATEROLE", loc)
30413064
} else {
30423065
create_role = Some(keyword == Keyword::CREATEROLE);
30433066
Ok(())
30443067
}
30453068
}
30463069
Keyword::SUPERUSER | Keyword::NOSUPERUSER => {
30473070
if superuser.is_some() {
3048-
parser_err!("Found multiple SUPERUSER or NOSUPERUSER")
3071+
parser_err!("Found multiple SUPERUSER or NOSUPERUSER", loc)
30493072
} else {
30503073
superuser = Some(keyword == Keyword::SUPERUSER);
30513074
Ok(())
30523075
}
30533076
}
30543077
Keyword::REPLICATION | Keyword::NOREPLICATION => {
30553078
if replication.is_some() {
3056-
parser_err!("Found multiple REPLICATION or NOREPLICATION")
3079+
parser_err!("Found multiple REPLICATION or NOREPLICATION", loc)
30573080
} else {
30583081
replication = Some(keyword == Keyword::REPLICATION);
30593082
Ok(())
30603083
}
30613084
}
30623085
Keyword::PASSWORD => {
30633086
if password.is_some() {
3064-
parser_err!("Found multiple PASSWORD")
3087+
parser_err!("Found multiple PASSWORD", loc)
30653088
} else {
30663089
password = if self.parse_keyword(Keyword::NULL) {
30673090
Some(Password::NullPassword)
@@ -3074,7 +3097,7 @@ impl<'a> Parser<'a> {
30743097
Keyword::CONNECTION => {
30753098
self.expect_keyword(Keyword::LIMIT)?;
30763099
if connection_limit.is_some() {
3077-
parser_err!("Found multiple CONNECTION LIMIT")
3100+
parser_err!("Found multiple CONNECTION LIMIT", loc)
30783101
} else {
30793102
connection_limit = Some(Expr::Value(self.parse_number_value()?));
30803103
Ok(())
@@ -3083,7 +3106,7 @@ impl<'a> Parser<'a> {
30833106
Keyword::VALID => {
30843107
self.expect_keyword(Keyword::UNTIL)?;
30853108
if valid_until.is_some() {
3086-
parser_err!("Found multiple VALID UNTIL")
3109+
parser_err!("Found multiple VALID UNTIL", loc)
30873110
} else {
30883111
valid_until = Some(Expr::Value(self.parse_value()?));
30893112
Ok(())
@@ -3092,14 +3115,14 @@ impl<'a> Parser<'a> {
30923115
Keyword::IN => {
30933116
if self.parse_keyword(Keyword::ROLE) {
30943117
if !in_role.is_empty() {
3095-
parser_err!("Found multiple IN ROLE")
3118+
parser_err!("Found multiple IN ROLE", loc)
30963119
} else {
30973120
in_role = self.parse_comma_separated(Parser::parse_identifier)?;
30983121
Ok(())
30993122
}
31003123
} else if self.parse_keyword(Keyword::GROUP) {
31013124
if !in_group.is_empty() {
3102-
parser_err!("Found multiple IN GROUP")
3125+
parser_err!("Found multiple IN GROUP", loc)
31033126
} else {
31043127
in_group = self.parse_comma_separated(Parser::parse_identifier)?;
31053128
Ok(())
@@ -3110,23 +3133,23 @@ impl<'a> Parser<'a> {
31103133
}
31113134
Keyword::ROLE => {
31123135
if !role.is_empty() {
3113-
parser_err!("Found multiple ROLE")
3136+
parser_err!("Found multiple ROLE", loc)
31143137
} else {
31153138
role = self.parse_comma_separated(Parser::parse_identifier)?;
31163139
Ok(())
31173140
}
31183141
}
31193142
Keyword::USER => {
31203143
if !user.is_empty() {
3121-
parser_err!("Found multiple USER")
3144+
parser_err!("Found multiple USER", loc)
31223145
} else {
31233146
user = self.parse_comma_separated(Parser::parse_identifier)?;
31243147
Ok(())
31253148
}
31263149
}
31273150
Keyword::ADMIN => {
31283151
if !admin.is_empty() {
3129-
parser_err!("Found multiple ADMIN")
3152+
parser_err!("Found multiple ADMIN", loc)
31303153
} else {
31313154
admin = self.parse_comma_separated(Parser::parse_identifier)?;
31323155
Ok(())
@@ -3193,14 +3216,19 @@ impl<'a> Parser<'a> {
31933216
// specifying multiple objects to delete in a single statement
31943217
let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]);
31953218
let names = self.parse_comma_separated(Parser::parse_object_name)?;
3219+
3220+
let loc = self.peek_token().location;
31963221
let cascade = self.parse_keyword(Keyword::CASCADE);
31973222
let restrict = self.parse_keyword(Keyword::RESTRICT);
31983223
let purge = self.parse_keyword(Keyword::PURGE);
31993224
if cascade && restrict {
3200-
return parser_err!("Cannot specify both CASCADE and RESTRICT in DROP");
3225+
return parser_err!("Cannot specify both CASCADE and RESTRICT in DROP", loc);
32013226
}
32023227
if object_type == ObjectType::Role && (cascade || restrict || purge) {
3203-
return parser_err!("Cannot specify CASCADE, RESTRICT, or PURGE in DROP ROLE");
3228+
return parser_err!(
3229+
"Cannot specify CASCADE, RESTRICT, or PURGE in DROP ROLE",
3230+
loc
3231+
);
32043232
}
32053233
Ok(Statement::Drop {
32063234
object_type,
@@ -4489,7 +4517,11 @@ impl<'a> Parser<'a> {
44894517
fn parse_literal_char(&mut self) -> Result<char, ParserError> {
44904518
let s = self.parse_literal_string()?;
44914519
if s.len() != 1 {
4492-
return parser_err!(format!("Expect a char, found {s:?}"));
4520+
let loc = self
4521+
.tokens
4522+
.get(self.index - 1)
4523+
.map_or(Location { line: 0, column: 0 }, |t| t.location);
4524+
return parser_err!(format!("Expect a char, found {s:?}"), loc);
44934525
}
44944526
Ok(s.chars().next().unwrap())
44954527
}
@@ -4568,7 +4600,7 @@ impl<'a> Parser<'a> {
45684600
// (i.e., it returns the input string).
45694601
Token::Number(ref n, l) => match n.parse() {
45704602
Ok(n) => Ok(Value::Number(n, l)),
4571-
Err(e) => parser_err!(format!("Could not parse '{n}' as number: {e}")),
4603+
Err(e) => parser_err!(format!("Could not parse '{n}' as number: {e}"), location),
45724604
},
45734605
Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())),
45744606
Token::DoubleQuotedString(ref s) => Ok(Value::DoubleQuotedString(s.to_string())),
@@ -6512,10 +6544,11 @@ impl<'a> Parser<'a> {
65126544
.parse_keywords(&[Keyword::GRANTED, Keyword::BY])
65136545
.then(|| self.parse_identifier().unwrap());
65146546

6547+
let loc = self.peek_token().location;
65156548
let cascade = self.parse_keyword(Keyword::CASCADE);
65166549
let restrict = self.parse_keyword(Keyword::RESTRICT);
65176550
if cascade && restrict {
6518-
return parser_err!("Cannot specify both CASCADE and RESTRICT in REVOKE");
6551+
return parser_err!("Cannot specify both CASCADE and RESTRICT in REVOKE", loc);
65196552
}
65206553

65216554
Ok(Statement::Revoke {
@@ -8192,14 +8225,12 @@ mod tests {
81928225

81938226
#[test]
81948227
fn test_parser_error_loc() {
8195-
// TODO: Once we thread token locations through the parser, we should update this
8196-
// test to assert the locations of the referenced token
81978228
let sql = "SELECT this is a syntax error";
81988229
let ast = Parser::parse_sql(&GenericDialect, sql);
81998230
assert_eq!(
82008231
ast,
82018232
Err(ParserError::ParserError(
8202-
"Expected [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: a"
8233+
"Expected [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: a at Line: 1, Column 16"
82038234
.to_string()
82048235
))
82058236
);

src/test_utils.rs

+7-1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ use core::fmt::Debug;
2828

2929
use crate::dialect::*;
3030
use crate::parser::{Parser, ParserError};
31+
use crate::tokenizer::Tokenizer;
3132
use crate::{ast::*, parser::ParserOptions};
3233

3334
/// Tests use the methods on this struct to invoke the parser on one or
@@ -82,8 +83,13 @@ impl TestedDialects {
8283
/// the result is the same for all tested dialects.
8384
pub fn parse_sql_statements(&self, sql: &str) -> Result<Vec<Statement>, ParserError> {
8485
self.one_of_identical_results(|dialect| {
86+
let mut tokenizer = Tokenizer::new(dialect, sql);
87+
if let Some(options) = &self.options {
88+
tokenizer = tokenizer.with_unescape(options.unescape);
89+
}
90+
let tokens = tokenizer.tokenize()?;
8591
self.new_parser(dialect)
86-
.try_with_sql(sql)?
92+
.with_tokens(tokens)
8793
.parse_statements()
8894
})
8995
// To fail the `ensure_multiple_dialects_are_tested` test:

0 commit comments

Comments
 (0)