-
Notifications
You must be signed in to change notification settings - Fork 605
Add parse_multipart_identifier function to parser #860
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4705,6 +4705,92 @@ impl<'a> Parser<'a> { | |
Ok(idents) | ||
} | ||
|
||
/// Parse identifiers of form ident1[.identN]* | ||
/// | ||
/// Similar in functionality to [parse_identifiers], with difference | ||
/// being this function is much more strict about parsing a valid multipart identifier, not | ||
/// allowing extraneous tokens to be parsed, otherwise it fails. | ||
/// | ||
/// For example: | ||
/// | ||
/// ```rust | ||
/// use sqlparser::ast::Ident; | ||
/// use sqlparser::dialect::GenericDialect; | ||
/// use sqlparser::parser::Parser; | ||
/// | ||
/// let dialect = GenericDialect {}; | ||
/// let expected = vec![Ident::new("one"), Ident::new("two")]; | ||
/// | ||
/// // expected usage | ||
/// let sql = "one.two"; | ||
/// let mut parser = Parser::new(&dialect).try_with_sql(sql).unwrap(); | ||
/// let actual = parser.parse_multipart_identifier().unwrap(); | ||
/// assert_eq!(&actual, &expected); | ||
/// | ||
/// // parse_identifiers is more loose on what it allows, parsing successfully | ||
/// let sql = "one + two"; | ||
/// let mut parser = Parser::new(&dialect).try_with_sql(sql).unwrap(); | ||
/// let actual = parser.parse_identifiers().unwrap(); | ||
/// assert_eq!(&actual, &expected); | ||
/// | ||
/// // expected to strictly fail due to + separator | ||
/// let sql = "one + two"; | ||
/// let mut parser = Parser::new(&dialect).try_with_sql(sql).unwrap(); | ||
/// let actual = parser.parse_multipart_identifier().unwrap_err(); | ||
/// assert_eq!( | ||
/// actual.to_string(), | ||
/// "sql parser error: Unexpected token in identifier: +" | ||
/// ); | ||
/// ``` | ||
/// | ||
/// [parse_identifiers]: Parser::parse_identifiers | ||
pub fn parse_multipart_identifier(&mut self) -> Result<Vec<Ident>, ParserError> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looking at this function more closely, I wonder how if we can't reuse Or maybe we could add some more documentation (like docstrings / tests) for this function to make it clearer it is designed to parse strings into identifiers 🤔 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think Though I did base There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Upon some more reflection, I think we should accept this code and update the documentation strings on I can try and find time to update the documentation maybe next week -- or @Jefffrey do you have time to do so? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've update the doc a bit, let me know any further suggestions |
||
let mut idents = vec![]; | ||
|
||
// expecting at least one word for identifier | ||
match self.next_token().token { | ||
Token::Word(w) => idents.push(w.to_ident()), | ||
Token::EOF => { | ||
return Err(ParserError::ParserError( | ||
"Empty input when parsing identifier".to_string(), | ||
))? | ||
} | ||
token => { | ||
return Err(ParserError::ParserError(format!( | ||
"Unexpected token in identifier: {token}" | ||
)))? | ||
} | ||
}; | ||
|
||
// parse optional next parts if exist | ||
loop { | ||
match self.next_token().token { | ||
// ensure that optional period is succeeded by another identifier | ||
Token::Period => match self.next_token().token { | ||
Token::Word(w) => idents.push(w.to_ident()), | ||
Token::EOF => { | ||
return Err(ParserError::ParserError( | ||
"Trailing period in identifier".to_string(), | ||
))? | ||
} | ||
token => { | ||
return Err(ParserError::ParserError(format!( | ||
"Unexpected token following period in identifier: {token}" | ||
)))? | ||
} | ||
}, | ||
Token::EOF => break, | ||
token => { | ||
return Err(ParserError::ParserError(format!( | ||
"Unexpected token in identifier: {token}" | ||
)))? | ||
} | ||
} | ||
} | ||
|
||
Ok(idents) | ||
} | ||
|
||
/// Parse a simple one-word identifier (possibly quoted, possibly a keyword) | ||
pub fn parse_identifier(&mut self) -> Result<Ident, ParserError> { | ||
let next_token = self.next_token(); | ||
|
@@ -7453,4 +7539,85 @@ mod tests { | |
)) | ||
); | ||
} | ||
|
||
#[test] | ||
fn test_parse_multipart_identifier_positive() { | ||
let dialect = TestedDialects { | ||
dialects: vec![Box::new(GenericDialect {})], | ||
options: None, | ||
}; | ||
|
||
// parse multipart with quotes | ||
let expected = vec![ | ||
Ident { | ||
value: "CATALOG".to_string(), | ||
quote_style: None, | ||
}, | ||
Ident { | ||
value: "F(o)o. \"bar".to_string(), | ||
quote_style: Some('"'), | ||
}, | ||
Ident { | ||
value: "table".to_string(), | ||
quote_style: None, | ||
}, | ||
]; | ||
dialect.run_parser_method(r#"CATALOG."F(o)o. ""bar".table"#, |parser| { | ||
let actual = parser.parse_multipart_identifier().unwrap(); | ||
assert_eq!(expected, actual); | ||
}); | ||
|
||
// allow whitespace between ident parts | ||
let expected = vec![ | ||
Ident { | ||
value: "CATALOG".to_string(), | ||
quote_style: None, | ||
}, | ||
Ident { | ||
value: "table".to_string(), | ||
quote_style: None, | ||
}, | ||
]; | ||
dialect.run_parser_method("CATALOG . table", |parser| { | ||
let actual = parser.parse_multipart_identifier().unwrap(); | ||
assert_eq!(expected, actual); | ||
}); | ||
} | ||
|
||
#[test] | ||
fn test_parse_multipart_identifier_negative() { | ||
macro_rules! test_parse_multipart_identifier_error { | ||
($input:expr, $expected_err:expr $(,)?) => {{ | ||
all_dialects().run_parser_method(&*$input, |parser| { | ||
let actual_err = parser.parse_multipart_identifier().unwrap_err(); | ||
assert_eq!(actual_err.to_string(), $expected_err); | ||
}); | ||
}}; | ||
} | ||
|
||
test_parse_multipart_identifier_error!( | ||
"", | ||
"sql parser error: Empty input when parsing identifier", | ||
); | ||
|
||
test_parse_multipart_identifier_error!( | ||
"*schema.table", | ||
"sql parser error: Unexpected token in identifier: *", | ||
); | ||
|
||
test_parse_multipart_identifier_error!( | ||
"schema.table*", | ||
"sql parser error: Unexpected token in identifier: *", | ||
); | ||
|
||
test_parse_multipart_identifier_error!( | ||
"schema.table.", | ||
"sql parser error: Trailing period in identifier", | ||
); | ||
|
||
test_parse_multipart_identifier_error!( | ||
"schema.*", | ||
"sql parser error: Unexpected token following period in identifier: *", | ||
); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is great. Thank you