Skip to content

Commit a8a8e65

Browse files
authored
PostgreSQL: GENERATED { ALWAYS | BY DEFAULT } AS IDENTITY and GENERATED ALWAYS AS ( generation_expr ) support (apache#832)
* GENERATED { ALWAYS | BY DEFAULT } AS IDENTITY [ ( sequence_options ) basic impl - test are failing. * PostgreSQL GENERATED { ALWAYS | BY DEFAULT } AS IDENTITY [ ( sequence_options ) and GENERATED ALWAYS AS ( generation_expr ) STORED implementation.
1 parent 4ff3aeb commit a8a8e65

17 files changed

+350
-57
lines changed

src/ast/ddl.rs

+63-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,9 @@ use serde::{Deserialize, Serialize};
2424
use sqlparser_derive::{Visit, VisitMut};
2525

2626
use crate::ast::value::escape_single_quote_string;
27-
use crate::ast::{display_comma_separated, display_separated, DataType, Expr, Ident, ObjectName};
27+
use crate::ast::{
28+
display_comma_separated, display_separated, DataType, Expr, Ident, ObjectName, SequenceOptions,
29+
};
2830
use crate::tokenizer::Token;
2931

3032
/// An `ALTER TABLE` (`Statement::AlterTable`) operation
@@ -575,6 +577,13 @@ pub enum ColumnOption {
575577
CharacterSet(ObjectName),
576578
Comment(String),
577579
OnUpdate(Expr),
580+
/// `Generated`s are modifiers that follow a column definition in a `CREATE
581+
/// TABLE` statement.
582+
Generated {
583+
generated_as: GeneratedAs,
584+
sequence_options: Option<Vec<SequenceOptions>>,
585+
generation_expr: Option<Expr>,
586+
},
578587
}
579588

580589
impl fmt::Display for ColumnOption {
@@ -610,10 +619,63 @@ impl fmt::Display for ColumnOption {
610619
CharacterSet(n) => write!(f, "CHARACTER SET {n}"),
611620
Comment(v) => write!(f, "COMMENT '{}'", escape_single_quote_string(v)),
612621
OnUpdate(expr) => write!(f, "ON UPDATE {expr}"),
622+
Generated {
623+
generated_as,
624+
sequence_options,
625+
generation_expr,
626+
} => match generated_as {
627+
GeneratedAs::Always => {
628+
write!(f, "GENERATED ALWAYS AS IDENTITY")?;
629+
if sequence_options.is_some() {
630+
let so = sequence_options.as_ref().unwrap();
631+
if !so.is_empty() {
632+
write!(f, " (")?;
633+
}
634+
for sequence_option in so {
635+
write!(f, "{sequence_option}")?;
636+
}
637+
if !so.is_empty() {
638+
write!(f, " )")?;
639+
}
640+
}
641+
Ok(())
642+
}
643+
GeneratedAs::ByDefault => {
644+
write!(f, "GENERATED BY DEFAULT AS IDENTITY")?;
645+
if sequence_options.is_some() {
646+
let so = sequence_options.as_ref().unwrap();
647+
if !so.is_empty() {
648+
write!(f, " (")?;
649+
}
650+
for sequence_option in so {
651+
write!(f, "{sequence_option}")?;
652+
}
653+
if !so.is_empty() {
654+
write!(f, " )")?;
655+
}
656+
}
657+
Ok(())
658+
}
659+
GeneratedAs::ExpStored => {
660+
let expr = generation_expr.as_ref().unwrap();
661+
write!(f, "GENERATED ALWAYS AS ({expr}) STORED")
662+
}
663+
},
613664
}
614665
}
615666
}
616667

668+
/// `GeneratedAs`s are modifiers that follow a column option in a `generated`.
669+
/// 'ExpStored' is PostgreSQL specific
670+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
671+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
672+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
673+
pub enum GeneratedAs {
674+
Always,
675+
ByDefault,
676+
ExpStored,
677+
}
678+
617679
fn display_constraint_name(name: &'_ Option<Ident>) -> impl fmt::Display + '_ {
618680
struct ConstraintName<'a>(&'a Option<Ident>);
619681
impl<'a> fmt::Display for ConstraintName<'a> {

src/ast/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ pub use self::data_type::{
3030
};
3131
pub use self::ddl::{
3232
AlterColumnOperation, AlterIndexOperation, AlterTableOperation, ColumnDef, ColumnOption,
33-
ColumnOptionDef, IndexType, KeyOrIndexDisplay, ReferentialAction, TableConstraint,
33+
ColumnOptionDef, GeneratedAs, IndexType, KeyOrIndexDisplay, ReferentialAction, TableConstraint,
3434
};
3535
pub use self::operator::{BinaryOperator, UnaryOperator};
3636
pub use self::query::{

src/dialect/ansi.rs

+2-5
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,10 @@ pub struct AnsiDialect {}
1717

1818
impl Dialect for AnsiDialect {
1919
fn is_identifier_start(&self, ch: char) -> bool {
20-
('a'..='z').contains(&ch) || ('A'..='Z').contains(&ch)
20+
ch.is_ascii_lowercase() || ch.is_ascii_uppercase()
2121
}
2222

2323
fn is_identifier_part(&self, ch: char) -> bool {
24-
('a'..='z').contains(&ch)
25-
|| ('A'..='Z').contains(&ch)
26-
|| ('0'..='9').contains(&ch)
27-
|| ch == '_'
24+
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch.is_ascii_digit() || ch == '_'
2825
}
2926
}

src/dialect/bigquery.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,13 @@ impl Dialect for BigQueryDialect {
2222
}
2323

2424
fn is_identifier_start(&self, ch: char) -> bool {
25-
('a'..='z').contains(&ch) || ('A'..='Z').contains(&ch) || ch == '_'
25+
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_'
2626
}
2727

2828
fn is_identifier_part(&self, ch: char) -> bool {
29-
('a'..='z').contains(&ch)
30-
|| ('A'..='Z').contains(&ch)
31-
|| ('0'..='9').contains(&ch)
29+
ch.is_ascii_lowercase()
30+
|| ch.is_ascii_uppercase()
31+
|| ch.is_ascii_digit()
3232
|| ch == '_'
3333
|| ch == '-'
3434
}

src/dialect/clickhouse.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,10 @@ pub struct ClickHouseDialect {}
1818
impl Dialect for ClickHouseDialect {
1919
fn is_identifier_start(&self, ch: char) -> bool {
2020
// See https://clickhouse.com/docs/en/sql-reference/syntax/#syntax-identifiers
21-
('a'..='z').contains(&ch) || ('A'..='Z').contains(&ch) || ch == '_'
21+
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_'
2222
}
2323

2424
fn is_identifier_part(&self, ch: char) -> bool {
25-
self.is_identifier_start(ch) || ('0'..='9').contains(&ch)
25+
self.is_identifier_start(ch) || ch.is_ascii_digit()
2626
}
2727
}

src/dialect/generic.rs

+4-8
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,13 @@ pub struct GenericDialect;
1717

1818
impl Dialect for GenericDialect {
1919
fn is_identifier_start(&self, ch: char) -> bool {
20-
('a'..='z').contains(&ch)
21-
|| ('A'..='Z').contains(&ch)
22-
|| ch == '_'
23-
|| ch == '#'
24-
|| ch == '@'
20+
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_' || ch == '#' || ch == '@'
2521
}
2622

2723
fn is_identifier_part(&self, ch: char) -> bool {
28-
('a'..='z').contains(&ch)
29-
|| ('A'..='Z').contains(&ch)
30-
|| ('0'..='9').contains(&ch)
24+
ch.is_ascii_lowercase()
25+
|| ch.is_ascii_uppercase()
26+
|| ch.is_ascii_digit()
3127
|| ch == '@'
3228
|| ch == '$'
3329
|| ch == '#'

src/dialect/hive.rs

+4-7
Original file line numberDiff line numberDiff line change
@@ -21,16 +21,13 @@ impl Dialect for HiveDialect {
2121
}
2222

2323
fn is_identifier_start(&self, ch: char) -> bool {
24-
('a'..='z').contains(&ch)
25-
|| ('A'..='Z').contains(&ch)
26-
|| ('0'..='9').contains(&ch)
27-
|| ch == '$'
24+
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch.is_ascii_digit() || ch == '$'
2825
}
2926

3027
fn is_identifier_part(&self, ch: char) -> bool {
31-
('a'..='z').contains(&ch)
32-
|| ('A'..='Z').contains(&ch)
33-
|| ('0'..='9').contains(&ch)
28+
ch.is_ascii_lowercase()
29+
|| ch.is_ascii_uppercase()
30+
|| ch.is_ascii_digit()
3431
|| ch == '_'
3532
|| ch == '$'
3633
|| ch == '{'

src/dialect/mssql.rs

+4-8
Original file line numberDiff line numberDiff line change
@@ -23,17 +23,13 @@ impl Dialect for MsSqlDialect {
2323
fn is_identifier_start(&self, ch: char) -> bool {
2424
// See https://docs.microsoft.com/en-us/sql/relational-databases/databases/database-identifiers?view=sql-server-2017#rules-for-regular-identifiers
2525
// We don't support non-latin "letters" currently.
26-
('a'..='z').contains(&ch)
27-
|| ('A'..='Z').contains(&ch)
28-
|| ch == '_'
29-
|| ch == '#'
30-
|| ch == '@'
26+
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_' || ch == '#' || ch == '@'
3127
}
3228

3329
fn is_identifier_part(&self, ch: char) -> bool {
34-
('a'..='z').contains(&ch)
35-
|| ('A'..='Z').contains(&ch)
36-
|| ('0'..='9').contains(&ch)
30+
ch.is_ascii_lowercase()
31+
|| ch.is_ascii_uppercase()
32+
|| ch.is_ascii_digit()
3733
|| ch == '@'
3834
|| ch == '$'
3935
|| ch == '#'

src/dialect/mysql.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -20,16 +20,16 @@ impl Dialect for MySqlDialect {
2020
// See https://dev.mysql.com/doc/refman/8.0/en/identifiers.html.
2121
// We don't yet support identifiers beginning with numbers, as that
2222
// makes it hard to distinguish numeric literals.
23-
('a'..='z').contains(&ch)
24-
|| ('A'..='Z').contains(&ch)
23+
ch.is_ascii_lowercase()
24+
|| ch.is_ascii_uppercase()
2525
|| ch == '_'
2626
|| ch == '$'
2727
|| ch == '@'
2828
|| ('\u{0080}'..='\u{ffff}').contains(&ch)
2929
}
3030

3131
fn is_identifier_part(&self, ch: char) -> bool {
32-
self.is_identifier_start(ch) || ('0'..='9').contains(&ch)
32+
self.is_identifier_start(ch) || ch.is_ascii_digit()
3333
}
3434

3535
fn is_delimited_identifier_start(&self, ch: char) -> bool {

src/dialect/postgresql.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,13 @@ impl Dialect for PostgreSqlDialect {
2424
// See https://www.postgresql.org/docs/11/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS
2525
// We don't yet support identifiers beginning with "letters with
2626
// diacritical marks and non-Latin letters"
27-
('a'..='z').contains(&ch) || ('A'..='Z').contains(&ch) || ch == '_'
27+
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_'
2828
}
2929

3030
fn is_identifier_part(&self, ch: char) -> bool {
31-
('a'..='z').contains(&ch)
32-
|| ('A'..='Z').contains(&ch)
33-
|| ('0'..='9').contains(&ch)
31+
ch.is_ascii_lowercase()
32+
|| ch.is_ascii_uppercase()
33+
|| ch.is_ascii_digit()
3434
|| ch == '$'
3535
|| ch == '_'
3636
}

src/dialect/snowflake.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,13 @@ pub struct SnowflakeDialect;
1818
impl Dialect for SnowflakeDialect {
1919
// see https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html
2020
fn is_identifier_start(&self, ch: char) -> bool {
21-
('a'..='z').contains(&ch) || ('A'..='Z').contains(&ch) || ch == '_'
21+
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_'
2222
}
2323

2424
fn is_identifier_part(&self, ch: char) -> bool {
25-
('a'..='z').contains(&ch)
26-
|| ('A'..='Z').contains(&ch)
27-
|| ('0'..='9').contains(&ch)
25+
ch.is_ascii_lowercase()
26+
|| ch.is_ascii_uppercase()
27+
|| ch.is_ascii_digit()
2828
|| ch == '$'
2929
|| ch == '_'
3030
}

src/dialect/sqlite.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -28,15 +28,15 @@ impl Dialect for SQLiteDialect {
2828

2929
fn is_identifier_start(&self, ch: char) -> bool {
3030
// See https://www.sqlite.org/draft/tokenreq.html
31-
('a'..='z').contains(&ch)
32-
|| ('A'..='Z').contains(&ch)
31+
ch.is_ascii_lowercase()
32+
|| ch.is_ascii_uppercase()
3333
|| ch == '_'
3434
|| ch == '$'
3535
|| ('\u{007f}'..='\u{ffff}').contains(&ch)
3636
}
3737

3838
fn is_identifier_part(&self, ch: char) -> bool {
39-
self.is_identifier_start(ch) || ('0'..='9').contains(&ch)
39+
self.is_identifier_start(ch) || ch.is_ascii_digit()
4040
}
4141

4242
fn parse_statement(&self, parser: &mut Parser) -> Option<Result<Statement, ParserError>> {

src/keywords.rs

+2
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ define_keywords!(
7777
ALL,
7878
ALLOCATE,
7979
ALTER,
80+
ALWAYS,
8081
ANALYZE,
8182
AND,
8283
ANTI,
@@ -270,6 +271,7 @@ define_keywords!(
270271
FUNCTION,
271272
FUNCTIONS,
272273
FUSION,
274+
GENERATED,
273275
GET,
274276
GLOBAL,
275277
GRANT,

src/parser.rs

+49
Original file line numberDiff line numberDiff line change
@@ -3567,6 +3567,55 @@ impl<'a> Parser<'a> {
35673567
{
35683568
let expr = self.parse_expr()?;
35693569
Ok(Some(ColumnOption::OnUpdate(expr)))
3570+
} else if self.parse_keyword(Keyword::GENERATED) {
3571+
self.parse_optional_column_option_generated()
3572+
} else {
3573+
Ok(None)
3574+
}
3575+
}
3576+
fn parse_optional_column_option_generated(
3577+
&mut self,
3578+
) -> Result<Option<ColumnOption>, ParserError> {
3579+
if self.parse_keywords(&[Keyword::ALWAYS, Keyword::AS, Keyword::IDENTITY]) {
3580+
let mut sequence_options = vec![];
3581+
if self.expect_token(&Token::LParen).is_ok() {
3582+
sequence_options = self.parse_create_sequence_options()?;
3583+
self.expect_token(&Token::RParen)?;
3584+
}
3585+
Ok(Some(ColumnOption::Generated {
3586+
generated_as: GeneratedAs::Always,
3587+
sequence_options: Some(sequence_options),
3588+
generation_expr: None,
3589+
}))
3590+
} else if self.parse_keywords(&[
3591+
Keyword::BY,
3592+
Keyword::DEFAULT,
3593+
Keyword::AS,
3594+
Keyword::IDENTITY,
3595+
]) {
3596+
let mut sequence_options = vec![];
3597+
if self.expect_token(&Token::LParen).is_ok() {
3598+
sequence_options = self.parse_create_sequence_options()?;
3599+
self.expect_token(&Token::RParen)?;
3600+
}
3601+
Ok(Some(ColumnOption::Generated {
3602+
generated_as: GeneratedAs::ByDefault,
3603+
sequence_options: Some(sequence_options),
3604+
generation_expr: None,
3605+
}))
3606+
} else if self.parse_keywords(&[Keyword::ALWAYS, Keyword::AS]) {
3607+
if self.expect_token(&Token::LParen).is_ok() {
3608+
let expr = self.parse_expr()?;
3609+
self.expect_token(&Token::RParen)?;
3610+
let _ = self.parse_keywords(&[Keyword::STORED]);
3611+
Ok(Some(ColumnOption::Generated {
3612+
generated_as: GeneratedAs::ExpStored,
3613+
sequence_options: None,
3614+
generation_expr: Some(expr),
3615+
}))
3616+
} else {
3617+
Ok(None)
3618+
}
35703619
} else {
35713620
Ok(None)
35723621
}

src/tokenizer.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -596,7 +596,7 @@ impl<'a> Tokenizer<'a> {
596596
let word = self.tokenize_word(ch, chars);
597597

598598
// TODO: implement parsing of exponent here
599-
if word.chars().all(|x| ('0'..='9').contains(&x) || x == '.') {
599+
if word.chars().all(|x| x.is_ascii_digit() || x == '.') {
600600
let mut inner_state = State {
601601
peekable: word.chars().peekable(),
602602
line: 0,

tests/sqlparser_custom_dialect.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -126,13 +126,13 @@ fn custom_statement_parser() -> Result<(), ParserError> {
126126
}
127127

128128
fn is_identifier_start(ch: char) -> bool {
129-
('a'..='z').contains(&ch) || ('A'..='Z').contains(&ch) || ch == '_'
129+
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_'
130130
}
131131

132132
fn is_identifier_part(ch: char) -> bool {
133-
('a'..='z').contains(&ch)
134-
|| ('A'..='Z').contains(&ch)
135-
|| ('0'..='9').contains(&ch)
133+
ch.is_ascii_lowercase()
134+
|| ch.is_ascii_uppercase()
135+
|| ch.is_ascii_digit()
136136
|| ch == '$'
137137
|| ch == '_'
138138
}

0 commit comments

Comments
 (0)