Skip to content

Commit 2c20ec0

Browse files
authored
Support parsing scientific notation (such as 10e5) (#768)
1 parent 2d801c9 commit 2c20ec0

File tree

3 files changed

+118
-0
lines changed

3 files changed

+118
-0
lines changed

src/test_utils.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ pub fn all_dialects() -> TestedDialects {
144144
Box::new(RedshiftSqlDialect {}),
145145
Box::new(MySqlDialect {}),
146146
Box::new(BigQueryDialect {}),
147+
Box::new(SQLiteDialect {}),
147148
],
148149
}
149150
}

src/tokenizer.rs

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -541,6 +541,7 @@ impl<'a> Tokenizer<'a> {
541541
chars.next(); // consume the first char
542542
let s = self.tokenize_word(ch, chars);
543543

544+
// TODO: implement parsing of exponent here
544545
if s.chars().all(|x| ('0'..='9').contains(&x) || x == '.') {
545546
let mut inner_state = State {
546547
peekable: s.chars().peekable(),
@@ -617,6 +618,36 @@ impl<'a> Tokenizer<'a> {
617618
return Ok(Some(Token::Period));
618619
}
619620

621+
// Parse exponent as number
622+
if chars.peek() == Some(&'e') || chars.peek() == Some(&'E') {
623+
let mut char_clone = chars.peekable.clone();
624+
let mut exponent_part = String::new();
625+
exponent_part.push(char_clone.next().unwrap());
626+
627+
// Optional sign
628+
match char_clone.peek() {
629+
Some(&c) if matches!(c, '+' | '-') => {
630+
exponent_part.push(c);
631+
char_clone.next();
632+
}
633+
_ => (),
634+
}
635+
636+
match char_clone.peek() {
637+
// Definitely an exponent, get original iterator up to speed and use it
638+
Some(&c) if matches!(c, '0'..='9') => {
639+
for _ in 0..exponent_part.len() {
640+
chars.next();
641+
}
642+
exponent_part +=
643+
&peeking_take_while(chars, |ch| matches!(ch, '0'..='9'));
644+
s += exponent_part.as_str();
645+
}
646+
// Not an exponent, discard the work done
647+
_ => (),
648+
}
649+
}
650+
620651
let long = if chars.peek() == Some(&'L') {
621652
chars.next();
622653
true
@@ -1091,6 +1122,41 @@ mod tests {
10911122
compare(expected, tokens);
10921123
}
10931124

1125+
#[test]
1126+
fn tokenize_select_exponent() {
1127+
let sql = String::from("SELECT 1e10, 1e-10, 1e+10, 1ea, 1e-10a, 1e-10-10");
1128+
let dialect = GenericDialect {};
1129+
let mut tokenizer = Tokenizer::new(&dialect, &sql);
1130+
let tokens = tokenizer.tokenize().unwrap();
1131+
1132+
let expected = vec![
1133+
Token::make_keyword("SELECT"),
1134+
Token::Whitespace(Whitespace::Space),
1135+
Token::Number(String::from("1e10"), false),
1136+
Token::Comma,
1137+
Token::Whitespace(Whitespace::Space),
1138+
Token::Number(String::from("1e-10"), false),
1139+
Token::Comma,
1140+
Token::Whitespace(Whitespace::Space),
1141+
Token::Number(String::from("1e+10"), false),
1142+
Token::Comma,
1143+
Token::Whitespace(Whitespace::Space),
1144+
Token::Number(String::from("1"), false),
1145+
Token::make_word("ea", None),
1146+
Token::Comma,
1147+
Token::Whitespace(Whitespace::Space),
1148+
Token::Number(String::from("1e-10"), false),
1149+
Token::make_word("a", None),
1150+
Token::Comma,
1151+
Token::Whitespace(Whitespace::Space),
1152+
Token::Number(String::from("1e-10"), false),
1153+
Token::Minus,
1154+
Token::Number(String::from("10"), false),
1155+
];
1156+
1157+
compare(expected, tokens);
1158+
}
1159+
10941160
#[test]
10951161
fn tokenize_scalar_function() {
10961162
let sql = String::from("SELECT sqrt(1)");

tests/sqlparser_common.rs

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -775,6 +775,57 @@ fn parse_null_in_select() {
775775
);
776776
}
777777

778+
#[test]
779+
fn parse_exponent_in_select() -> Result<(), ParserError> {
780+
// all except Hive, as it allows numbers to start an identifier
781+
let dialects = TestedDialects {
782+
dialects: vec![
783+
Box::new(AnsiDialect {}),
784+
Box::new(BigQueryDialect {}),
785+
Box::new(ClickHouseDialect {}),
786+
Box::new(GenericDialect {}),
787+
// Box::new(HiveDialect {}),
788+
Box::new(MsSqlDialect {}),
789+
Box::new(MySqlDialect {}),
790+
Box::new(PostgreSqlDialect {}),
791+
Box::new(RedshiftSqlDialect {}),
792+
Box::new(SnowflakeDialect {}),
793+
Box::new(SQLiteDialect {}),
794+
],
795+
};
796+
let sql = "SELECT 10e-20, 1e3, 1e+3, 1e3a, 1e, 0.5e2";
797+
let mut select = dialects.parse_sql_statements(sql)?;
798+
799+
let select = match select.pop().unwrap() {
800+
Statement::Query(inner) => *inner,
801+
_ => panic!("Expected Query"),
802+
};
803+
let select = match *select.body {
804+
SetExpr::Select(inner) => *inner,
805+
_ => panic!("Expected SetExpr::Select"),
806+
};
807+
808+
assert_eq!(
809+
&vec![
810+
SelectItem::UnnamedExpr(Expr::Value(number("10e-20"))),
811+
SelectItem::UnnamedExpr(Expr::Value(number("1e3"))),
812+
SelectItem::UnnamedExpr(Expr::Value(number("1e+3"))),
813+
SelectItem::ExprWithAlias {
814+
expr: Expr::Value(number("1e3")),
815+
alias: Ident::new("a")
816+
},
817+
SelectItem::ExprWithAlias {
818+
expr: Expr::Value(number("1")),
819+
alias: Ident::new("e")
820+
},
821+
SelectItem::UnnamedExpr(Expr::Value(number("0.5e2"))),
822+
],
823+
&select.projection
824+
);
825+
826+
Ok(())
827+
}
828+
778829
#[test]
779830
fn parse_select_with_date_column_name() {
780831
let sql = "SELECT date";

0 commit comments

Comments
 (0)