Skip to content

Commit 4559d87

Browse files
authored
Add parse_multipart_identifier function to parser (apache#860)
* Add parse_multipart_identifier function to parser * Update doc for parse_multipart_identifier * Fix conflict
1 parent 482a3ad commit 4559d87

File tree

1 file changed

+167
-0
lines changed

1 file changed

+167
-0
lines changed

src/parser.rs

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4707,6 +4707,92 @@ impl<'a> Parser<'a> {
47074707
Ok(idents)
47084708
}
47094709

4710+
/// Parse identifiers of form ident1[.identN]*
4711+
///
4712+
/// Similar in functionality to [parse_identifiers], with difference
4713+
/// being this function is much more strict about parsing a valid multipart identifier, not
4714+
/// allowing extraneous tokens to be parsed, otherwise it fails.
4715+
///
4716+
/// For example:
4717+
///
4718+
/// ```rust
4719+
/// use sqlparser::ast::Ident;
4720+
/// use sqlparser::dialect::GenericDialect;
4721+
/// use sqlparser::parser::Parser;
4722+
///
4723+
/// let dialect = GenericDialect {};
4724+
/// let expected = vec![Ident::new("one"), Ident::new("two")];
4725+
///
4726+
/// // expected usage
4727+
/// let sql = "one.two";
4728+
/// let mut parser = Parser::new(&dialect).try_with_sql(sql).unwrap();
4729+
/// let actual = parser.parse_multipart_identifier().unwrap();
4730+
/// assert_eq!(&actual, &expected);
4731+
///
4732+
/// // parse_identifiers is more loose on what it allows, parsing successfully
4733+
/// let sql = "one + two";
4734+
/// let mut parser = Parser::new(&dialect).try_with_sql(sql).unwrap();
4735+
/// let actual = parser.parse_identifiers().unwrap();
4736+
/// assert_eq!(&actual, &expected);
4737+
///
4738+
/// // expected to strictly fail due to + separator
4739+
/// let sql = "one + two";
4740+
/// let mut parser = Parser::new(&dialect).try_with_sql(sql).unwrap();
4741+
/// let actual = parser.parse_multipart_identifier().unwrap_err();
4742+
/// assert_eq!(
4743+
/// actual.to_string(),
4744+
/// "sql parser error: Unexpected token in identifier: +"
4745+
/// );
4746+
/// ```
4747+
///
4748+
/// [parse_identifiers]: Parser::parse_identifiers
4749+
pub fn parse_multipart_identifier(&mut self) -> Result<Vec<Ident>, ParserError> {
4750+
let mut idents = vec![];
4751+
4752+
// expecting at least one word for identifier
4753+
match self.next_token().token {
4754+
Token::Word(w) => idents.push(w.to_ident()),
4755+
Token::EOF => {
4756+
return Err(ParserError::ParserError(
4757+
"Empty input when parsing identifier".to_string(),
4758+
))?
4759+
}
4760+
token => {
4761+
return Err(ParserError::ParserError(format!(
4762+
"Unexpected token in identifier: {token}"
4763+
)))?
4764+
}
4765+
};
4766+
4767+
// parse optional next parts if exist
4768+
loop {
4769+
match self.next_token().token {
4770+
// ensure that optional period is succeeded by another identifier
4771+
Token::Period => match self.next_token().token {
4772+
Token::Word(w) => idents.push(w.to_ident()),
4773+
Token::EOF => {
4774+
return Err(ParserError::ParserError(
4775+
"Trailing period in identifier".to_string(),
4776+
))?
4777+
}
4778+
token => {
4779+
return Err(ParserError::ParserError(format!(
4780+
"Unexpected token following period in identifier: {token}"
4781+
)))?
4782+
}
4783+
},
4784+
Token::EOF => break,
4785+
token => {
4786+
return Err(ParserError::ParserError(format!(
4787+
"Unexpected token in identifier: {token}"
4788+
)))?
4789+
}
4790+
}
4791+
}
4792+
4793+
Ok(idents)
4794+
}
4795+
47104796
/// Parse a simple one-word identifier (possibly quoted, possibly a keyword)
47114797
pub fn parse_identifier(&mut self) -> Result<Ident, ParserError> {
47124798
let next_token = self.next_token();
@@ -7455,4 +7541,85 @@ mod tests {
74557541
))
74567542
);
74577543
}
7544+
7545+
#[test]
7546+
fn test_parse_multipart_identifier_positive() {
7547+
let dialect = TestedDialects {
7548+
dialects: vec![Box::new(GenericDialect {})],
7549+
options: None,
7550+
};
7551+
7552+
// parse multipart with quotes
7553+
let expected = vec![
7554+
Ident {
7555+
value: "CATALOG".to_string(),
7556+
quote_style: None,
7557+
},
7558+
Ident {
7559+
value: "F(o)o. \"bar".to_string(),
7560+
quote_style: Some('"'),
7561+
},
7562+
Ident {
7563+
value: "table".to_string(),
7564+
quote_style: None,
7565+
},
7566+
];
7567+
dialect.run_parser_method(r#"CATALOG."F(o)o. ""bar".table"#, |parser| {
7568+
let actual = parser.parse_multipart_identifier().unwrap();
7569+
assert_eq!(expected, actual);
7570+
});
7571+
7572+
// allow whitespace between ident parts
7573+
let expected = vec![
7574+
Ident {
7575+
value: "CATALOG".to_string(),
7576+
quote_style: None,
7577+
},
7578+
Ident {
7579+
value: "table".to_string(),
7580+
quote_style: None,
7581+
},
7582+
];
7583+
dialect.run_parser_method("CATALOG . table", |parser| {
7584+
let actual = parser.parse_multipart_identifier().unwrap();
7585+
assert_eq!(expected, actual);
7586+
});
7587+
}
7588+
7589+
#[test]
7590+
fn test_parse_multipart_identifier_negative() {
7591+
macro_rules! test_parse_multipart_identifier_error {
7592+
($input:expr, $expected_err:expr $(,)?) => {{
7593+
all_dialects().run_parser_method(&*$input, |parser| {
7594+
let actual_err = parser.parse_multipart_identifier().unwrap_err();
7595+
assert_eq!(actual_err.to_string(), $expected_err);
7596+
});
7597+
}};
7598+
}
7599+
7600+
test_parse_multipart_identifier_error!(
7601+
"",
7602+
"sql parser error: Empty input when parsing identifier",
7603+
);
7604+
7605+
test_parse_multipart_identifier_error!(
7606+
"*schema.table",
7607+
"sql parser error: Unexpected token in identifier: *",
7608+
);
7609+
7610+
test_parse_multipart_identifier_error!(
7611+
"schema.table*",
7612+
"sql parser error: Unexpected token in identifier: *",
7613+
);
7614+
7615+
test_parse_multipart_identifier_error!(
7616+
"schema.table.",
7617+
"sql parser error: Trailing period in identifier",
7618+
);
7619+
7620+
test_parse_multipart_identifier_error!(
7621+
"schema.*",
7622+
"sql parser error: Unexpected token following period in identifier: *",
7623+
);
7624+
}
74587625
}

0 commit comments

Comments
 (0)