From 25fd8499e9044e7b399fd105fd5446f382c327d6 Mon Sep 17 00:00:00 2001 From: Andrew Harper Date: Thu, 10 Apr 2025 18:08:05 -0400 Subject: [PATCH 01/16] Add support for `GO` batch delimiter in SQL Server - per documentation, "not a statement" but acts like one in all other regards - since it's a batch delimiter and statements can't extend beyond a batch, it also acts as a statement delimiter --- src/ast/mod.rs | 27 ++++++++++++++++ src/ast/spans.rs | 1 + src/keywords.rs | 1 + src/parser/mod.rs | 62 +++++++++++++++++++++++++++++++++++ tests/sqlparser_mssql.rs | 70 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 161 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index b60ade78b..29020550e 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -4097,6 +4097,12 @@ pub enum Statement { /// /// See [ReturnStatement] Return(ReturnStatement), + /// Go (MsSql) + /// + /// GO is not a Transact-SQL statement; it is a command recognized by various tools as a batch delimiter + /// + /// See: + Go(GoStatement), } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -5791,6 +5797,7 @@ impl fmt::Display for Statement { Ok(()) } Statement::Print(s) => write!(f, "{s}"), + Statement::Go(s) => write!(f, "{s}"), Statement::Return(r) => write!(f, "{r}"), Statement::List(command) => write!(f, "LIST {command}"), Statement::Remove(command) => write!(f, "REMOVE {command}"), @@ -9315,6 +9322,26 @@ pub enum ReturnStatementValue { Expr(Expr), } +/// Represents a `GO` statement. +/// +/// [MsSql](https://learn.microsoft.com/en-us/sql/t-sql/language-elements/sql-server-utilities-statements-go) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct GoStatement { + pub count: Option, +} + +impl Display for GoStatement { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(count) = self.count { + write!(f, "GO {count}") + } else { + write!(f, "GO") + } + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 93de5fff2..69c177b29 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -522,6 +522,7 @@ impl Spanned for Statement { Statement::RaisError { .. } => Span::empty(), Statement::Print { .. } => Span::empty(), Statement::Return { .. } => Span::empty(), + Statement::Go { .. } => Span::empty(), Statement::List(..) | Statement::Remove(..) => Span::empty(), } } diff --git a/src/keywords.rs b/src/keywords.rs index 4eaad7ed2..bbe4fd68c 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -393,6 +393,7 @@ define_keywords!( GIN, GIST, GLOBAL, + GO, GRANT, GRANTED, GRANTS, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index fe81b5999..70127a4c9 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -475,6 +475,12 @@ impl<'a> Parser<'a> { if expecting_statement_delimiter && word.keyword == Keyword::END { break; } + // Treat batch delimiter as an end of statement + if expecting_statement_delimiter && dialect_of!(self is MsSqlDialect) { + if let Some(Statement::Go(GoStatement { count: _ })) = stmts.last() { + expecting_statement_delimiter = false; + } + } } _ => {} } @@ -613,6 +619,7 @@ impl<'a> Parser<'a> { Keyword::COMMENT if self.dialect.supports_comment_on() => self.parse_comment(), Keyword::PRINT => self.parse_print(), Keyword::RETURN => self.parse_return(), + Keyword::GO => self.parse_go(), _ => self.expected("an SQL statement", next_token), }, Token::LParen => { @@ -15225,6 +15232,61 @@ impl<'a> Parser<'a> { } } + /// Parse [Statement::Go] + fn parse_go(&mut self) -> Result { + // previous token should be a newline (skipping non-newline whitespace) + // see also, `previous_token` + let mut look_back_count = 2; + loop { + let prev_index = self.index.saturating_sub(look_back_count); + if prev_index == 0 { + break; + } + let prev_token = self.token_at(prev_index); + match prev_token.token { + Token::Whitespace(ref w) => match w { + Whitespace::Newline => break, + _ => look_back_count += 1, + }, + _ => { + if prev_token == self.get_current_token() { + // if we are at the start of the statement, we can skip this check + break; + } + + self.expected("newline before GO", prev_token.clone())? + } + }; + } + + let count = loop { + // using this peek function because we want to halt this statement parsing upon newline + let next_token = self.peek_token_no_skip(); + match next_token.token { + Token::EOF => break None::, + Token::Whitespace(ref w) => match w { + Whitespace::Newline => break None, + _ => _ = self.next_token_no_skip(), + }, + Token::Number(s, _) => { + let value = Some(Self::parse::(s, next_token.span.start)?); + self.advance_token(); + break value; + } + _ => self.expected("literal int or newline", next_token)?, + }; + }; + + if self.peek_token().token == Token::SemiColon { + parser_err!( + "GO may not end with a semicolon", + self.peek_token().span.start + )?; + } + + Ok(Statement::Go(GoStatement { count })) + } + /// Consume the parser and return its underlying token buffer pub fn into_tokens(self) -> Vec { self.tokens diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index b86e1a7d4..7b84d4e15 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -2156,3 +2156,73 @@ fn parse_print() { let _ = ms().verified_stmt("PRINT N'Hello, ⛄️!'"); let _ = ms().verified_stmt("PRINT @my_variable"); } + +#[test] +fn parse_mssql_go_keyword() { + let single_go_keyword = "USE some_database;\nGO"; + let stmts = ms().parse_sql_statements(single_go_keyword).unwrap(); + assert_eq!(stmts.len(), 2); + assert_eq!(stmts[1], Statement::Go(GoStatement { count: None }),); + + let go_with_count = "SELECT 1;\nGO 5"; + let stmts = ms().parse_sql_statements(go_with_count).unwrap(); + assert_eq!(stmts.len(), 2); + assert_eq!(stmts[1], Statement::Go(GoStatement { count: Some(5) })); + + let bare_go = "GO"; + let stmts = ms().parse_sql_statements(bare_go).unwrap(); + assert_eq!(stmts.len(), 1); + assert_eq!(stmts[0], Statement::Go(GoStatement { count: None })); + + let go_then_statements = "/* whitespace */ GO\nRAISERROR('This is a test', 16, 1);"; + let stmts = ms().parse_sql_statements(go_then_statements).unwrap(); + assert_eq!(stmts.len(), 2); + assert_eq!(stmts[0], Statement::Go(GoStatement { count: None })); + assert_eq!( + stmts[1], + Statement::RaisError { + message: Box::new(Expr::Value( + (Value::SingleQuotedString("This is a test".to_string())).with_empty_span() + )), + severity: Box::new(Expr::Value(number("16").with_empty_span())), + state: Box::new(Expr::Value(number("1").with_empty_span())), + arguments: vec![], + options: vec![], + } + ); + + let multiple_gos = "SELECT 1;\nGO 5\nSELECT 2;\n GO"; + let stmts = ms().parse_sql_statements(multiple_gos).unwrap(); + assert_eq!(stmts.len(), 4); + assert_eq!(stmts[1], Statement::Go(GoStatement { count: Some(5) })); + assert_eq!(stmts[3], Statement::Go(GoStatement { count: None })); + + let comment_following_go = "USE some_database;\nGO -- okay"; + let stmts = ms().parse_sql_statements(comment_following_go).unwrap(); + assert_eq!(stmts.len(), 2); + assert_eq!(stmts[1], Statement::Go(GoStatement { count: None })); + + let actually_column_alias = "SELECT NULL AS GO"; + let stmt = ms().verified_only_select(actually_column_alias); + assert_eq!( + only(stmt.projection), + SelectItem::ExprWithAlias { + expr: Expr::Value(Value::Null.with_empty_span()), + alias: Ident::new("GO"), + } + ); + + let invalid_go_position = "SELECT 1; GO"; + let err = ms().parse_sql_statements(invalid_go_position); + assert_eq!( + err.unwrap_err().to_string(), + "sql parser error: Expected: newline before GO, found: ;" + ); + + let invalid_go_count = "SELECT 1\nGO x"; + let err = ms().parse_sql_statements(invalid_go_count); + assert_eq!( + err.unwrap_err().to_string(), + "sql parser error: Expected: end of statement, found: x" + ); +} From 24d813b17b9d441aeab784a8898b87df735bab0f Mon Sep 17 00:00:00 2001 From: Andrew Harper Date: Mon, 14 Apr 2025 18:03:06 -0400 Subject: [PATCH 02/16] Enable `GO` to terminate an `IF` statement --- src/parser/mod.rs | 10 +++++- tests/sqlparser_mssql.rs | 75 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 83 insertions(+), 2 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 70127a4c9..4e8bf1145 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -490,8 +490,16 @@ impl<'a> Parser<'a> { } let statement = self.parse_statement()?; + expecting_statement_delimiter = match &statement { + Statement::If(s) => match &s.if_block.conditional_statements { + // the `END` keyword doesn't need to be followed by a statement delimiter, so it shouldn't be expected here + ConditionalStatements::BeginEnd { .. } => false, + // parsing the statement sequence consumes the statement delimiter, so it shouldn't be expected here + ConditionalStatements::Sequence { .. } => false, + }, + _ => true, + }; stmts.push(statement); - expecting_statement_delimiter = true; } Ok(stmts) } diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 7b84d4e15..ca5990db4 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -23,7 +23,8 @@ mod test_utils; use helpers::attached_token::AttachedToken; -use sqlparser::tokenizer::{Location, Span}; +use sqlparser::keywords::Keyword; +use sqlparser::tokenizer::{Location, Span, TokenWithSpan}; use test_utils::*; use sqlparser::ast::DataType::{Int, Text, Varbinary}; @@ -2226,3 +2227,75 @@ fn parse_mssql_go_keyword() { "sql parser error: Expected: end of statement, found: x" ); } + +#[test] +fn test_mssql_if_and_go() { + let sql = r#" + IF 1 = 2 + SELECT 3; + GO + "#; + let statements = ms().parse_sql_statements(sql).unwrap(); + assert_eq!(2, statements.len()); + assert_eq!( + statements[0], + Statement::If(IfStatement { + if_block: ConditionalStatementBlock { + start_token: AttachedToken(TokenWithSpan::wrap(sqlparser::tokenizer::Token::Word( + sqlparser::tokenizer::Word { + value: "IF".to_string(), + quote_style: None, + keyword: Keyword::IF + } + ))), + condition: Some(Expr::BinaryOp { + left: Box::new(Expr::Value((number("1")).with_empty_span())), + op: sqlparser::ast::BinaryOperator::Eq, + right: Box::new(Expr::Value((number("2")).with_empty_span())), + }), + then_token: None, + conditional_statements: ConditionalStatements::Sequence { + statements: vec![Statement::Query(Box::new(Query { + with: None, + limit_clause: None, + fetch: None, + locks: vec![], + for_clause: None, + order_by: None, + settings: None, + format_clause: None, + body: Box::new(SetExpr::Select(Box::new(Select { + select_token: AttachedToken::empty(), + distinct: None, + top: None, + top_before_distinct: false, + projection: vec![SelectItem::UnnamedExpr(Expr::Value( + (number("3")).with_empty_span() + ))], + into: None, + from: vec![], + lateral_views: vec![], + prewhere: None, + selection: None, + group_by: GroupByExpr::Expressions(vec![], vec![]), + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + window_before_qualify: false, + qualify: None, + value_table_mode: None, + connect_by: None, + flavor: SelectFlavor::Standard, + }))), + }))], + }, + }, + elseif_blocks: vec![], + else_block: None, + end_token: None, + }) + ); + assert_eq!(statements[1], Statement::Go(GoStatement { count: None })); +} From f4750f0be9f9c90f70917868b3226ee107fa461f Mon Sep 17 00:00:00 2001 From: Andrew Harper Date: Mon, 14 Apr 2025 18:16:19 -0400 Subject: [PATCH 03/16] Simplify logic to have `GO` terminate a statement --- src/parser/mod.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 4e8bf1145..02d91ba3b 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -475,12 +475,6 @@ impl<'a> Parser<'a> { if expecting_statement_delimiter && word.keyword == Keyword::END { break; } - // Treat batch delimiter as an end of statement - if expecting_statement_delimiter && dialect_of!(self is MsSqlDialect) { - if let Some(Statement::Go(GoStatement { count: _ })) = stmts.last() { - expecting_statement_delimiter = false; - } - } } _ => {} } @@ -497,6 +491,8 @@ impl<'a> Parser<'a> { // parsing the statement sequence consumes the statement delimiter, so it shouldn't be expected here ConditionalStatements::Sequence { .. } => false, }, + // Treat batch delimiter as an end of statement, so no additional statement delimiter expected here + Statement::Go(_) => false, _ => true, }; stmts.push(statement); From 3db7fba5ec21750b1d100fb39d4bf6c201d3c76a Mon Sep 17 00:00:00 2001 From: Andrew Harper Date: Thu, 17 Apr 2025 13:14:16 -0400 Subject: [PATCH 04/16] Fix not treating `GO` as a statement delimiter next to a query --- src/dialect/mssql.rs | 26 ++++++++++++++++++++++++-- src/parser/mod.rs | 4 ++++ tests/sqlparser_mssql.rs | 30 +++++++++++++++++++++--------- 3 files changed, 49 insertions(+), 11 deletions(-) diff --git a/src/dialect/mssql.rs b/src/dialect/mssql.rs index 31e324f06..be8d2dcad 100644 --- a/src/dialect/mssql.rs +++ b/src/dialect/mssql.rs @@ -22,7 +22,7 @@ use crate::ast::{ use crate::dialect::Dialect; use crate::keywords::{self, Keyword}; use crate::parser::{Parser, ParserError}; -use crate::tokenizer::Token; +use crate::tokenizer::{Token, Whitespace}; #[cfg(not(feature = "std"))] use alloc::{vec, vec::Vec}; @@ -118,7 +118,29 @@ impl Dialect for MsSqlDialect { true } - fn is_column_alias(&self, kw: &Keyword, _parser: &mut Parser) -> bool { + fn is_column_alias(&self, kw: &Keyword, parser: &mut Parser) -> bool { + // if: + // - keyword is `GO`, and + // - looking backwards there's only (any) whitespace preceded by a newline + // then: `GO` iSN'T a column alias + if kw == &Keyword::GO { + let mut look_back_count = 2; + loop { + let prev_index = parser.index().saturating_sub(look_back_count); + if prev_index == 0 { + break; + } + let prev_token = parser.token_at(prev_index); + match prev_token.token { + Token::Whitespace(ref w) => match w { + Whitespace::Newline => return false, + _ => look_back_count += 1, + }, + _ => break, + }; + } + } + !keywords::RESERVED_FOR_COLUMN_ALIAS.contains(kw) && !RESERVED_FOR_COLUMN_ALIAS.contains(kw) } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 02d91ba3b..3323d5303 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -475,6 +475,10 @@ impl<'a> Parser<'a> { if expecting_statement_delimiter && word.keyword == Keyword::END { break; } + + if expecting_statement_delimiter && word.keyword == Keyword::GO { + expecting_statement_delimiter = false; + } } _ => {} } diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index ca5990db4..f636bb8c4 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -2170,6 +2170,11 @@ fn parse_mssql_go_keyword() { assert_eq!(stmts.len(), 2); assert_eq!(stmts[1], Statement::Go(GoStatement { count: Some(5) })); + let go_statement_delimiter = "SELECT 1\nGO"; + let stmts = ms().parse_sql_statements(go_statement_delimiter).unwrap(); + assert_eq!(stmts.len(), 2); + assert_eq!(stmts[1], Statement::Go(GoStatement { count: None })); + let bare_go = "GO"; let stmts = ms().parse_sql_statements(bare_go).unwrap(); assert_eq!(stmts.len(), 1); @@ -2203,15 +2208,22 @@ fn parse_mssql_go_keyword() { assert_eq!(stmts.len(), 2); assert_eq!(stmts[1], Statement::Go(GoStatement { count: None })); - let actually_column_alias = "SELECT NULL AS GO"; - let stmt = ms().verified_only_select(actually_column_alias); - assert_eq!( - only(stmt.projection), - SelectItem::ExprWithAlias { - expr: Expr::Value(Value::Null.with_empty_span()), - alias: Ident::new("GO"), + let actually_column_alias = "SELECT NULL GO"; + let stmts = ms().parse_sql_statements(actually_column_alias).unwrap(); + assert_eq!(stmts.len(), 1); + match &stmts[0] { + Statement::Query(query) => { + let select = query.body.as_select().unwrap(); + assert_eq!( + only(select.clone().projection), + SelectItem::ExprWithAlias { + expr: Expr::Value(Value::Null.with_empty_span()), + alias: Ident::new("GO"), + } + ); } - ); + _ => panic!("Expected Query statement"), + } let invalid_go_position = "SELECT 1; GO"; let err = ms().parse_sql_statements(invalid_go_position); @@ -2224,7 +2236,7 @@ fn parse_mssql_go_keyword() { let err = ms().parse_sql_statements(invalid_go_count); assert_eq!( err.unwrap_err().to_string(), - "sql parser error: Expected: end of statement, found: x" + "sql parser error: Expected: literal int or newline, found: x" ); } From ca0bb28755df98cf85de23b4e00eebd9139e4365 Mon Sep 17 00:00:00 2001 From: Andrew Harper Date: Thu, 17 Apr 2025 13:41:25 -0400 Subject: [PATCH 05/16] Fix failing to parse `GO` after a comment --- src/parser/mod.rs | 6 ++++++ tests/sqlparser_mssql.rs | 14 ++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 3323d5303..6fc404ce4 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -15254,6 +15254,12 @@ impl<'a> Parser<'a> { match prev_token.token { Token::Whitespace(ref w) => match w { Whitespace::Newline => break, + Whitespace::SingleLineComment { comment, prefix: _ } => { + if comment.ends_with('\n') { + break; + } + look_back_count += 1; + } _ => look_back_count += 1, }, _ => { diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index f636bb8c4..34620bc67 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -2203,6 +2203,20 @@ fn parse_mssql_go_keyword() { assert_eq!(stmts[1], Statement::Go(GoStatement { count: Some(5) })); assert_eq!(stmts[3], Statement::Go(GoStatement { count: None })); + let single_line_comment_preceding_go = "USE some_database; -- okay\nGO"; + let stmts = ms() + .parse_sql_statements(single_line_comment_preceding_go) + .unwrap(); + assert_eq!(stmts.len(), 2); + assert_eq!(stmts[1], Statement::Go(GoStatement { count: None })); + + let multi_line_comment_preceding_go = "USE some_database; /* okay */\nGO"; + let stmts = ms() + .parse_sql_statements(multi_line_comment_preceding_go) + .unwrap(); + assert_eq!(stmts.len(), 2); + assert_eq!(stmts[1], Statement::Go(GoStatement { count: None })); + let comment_following_go = "USE some_database;\nGO -- okay"; let stmts = ms().parse_sql_statements(comment_following_go).unwrap(); assert_eq!(stmts.len(), 2); From 94abfb7753dc557962481d2bfd1eb220fc363967 Mon Sep 17 00:00:00 2001 From: Andrew Harper Date: Fri, 18 Apr 2025 11:18:36 -0400 Subject: [PATCH 06/16] Add additional multi line comment test --- tests/sqlparser_mssql.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 34620bc67..ffce101e1 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -2217,11 +2217,18 @@ fn parse_mssql_go_keyword() { assert_eq!(stmts.len(), 2); assert_eq!(stmts[1], Statement::Go(GoStatement { count: None })); - let comment_following_go = "USE some_database;\nGO -- okay"; - let stmts = ms().parse_sql_statements(comment_following_go).unwrap(); + let single_line_comment_following_go = "USE some_database;\nGO -- okay"; + let stmts = ms().parse_sql_statements(single_line_comment_following_go).unwrap(); assert_eq!(stmts.len(), 2); assert_eq!(stmts[1], Statement::Go(GoStatement { count: None })); + let multi_line_comment_following = "USE some_database;\nGO/* okay */42"; + let stmts = ms() + .parse_sql_statements(multi_line_comment_following) + .unwrap(); + assert_eq!(stmts.len(), 2); + assert_eq!(stmts[1], Statement::Go(GoStatement { count: Some(42) })); + let actually_column_alias = "SELECT NULL GO"; let stmts = ms().parse_sql_statements(actually_column_alias).unwrap(); assert_eq!(stmts.len(), 1); From cfecd3326958ef8c4e2c5519e3c6d1b9ba3d1f98 Mon Sep 17 00:00:00 2001 From: Andrew Harper Date: Fri, 18 Apr 2025 12:14:01 -0400 Subject: [PATCH 07/16] Add helper to simplify tests & add canonical string --- src/test_utils.rs | 28 +++++++++++++++++ tests/sqlparser_mssql.rs | 65 +++++++++++++++++++++------------------- 2 files changed, 62 insertions(+), 31 deletions(-) diff --git a/src/test_utils.rs b/src/test_utils.rs index 6270ac42b..dc91d0b88 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -151,6 +151,8 @@ impl TestedDialects { /// /// 2. re-serializing the result of parsing `sql` produces the same /// `canonical` sql string + /// + /// For multiple statements, use [`multiple_statements_parse_to`]. pub fn one_statement_parses_to(&self, sql: &str, canonical: &str) -> Statement { let mut statements = self.parse_sql_statements(sql).expect(sql); assert_eq!(statements.len(), 1); @@ -166,6 +168,32 @@ impl TestedDialects { only_statement } + /// The same as [`one_statement_parses_to`] but it works for a multiple statements + pub fn multiple_statements_parse_to( + &self, + sql: &str, + statement_count: usize, + canonical: &str, + ) -> Vec { + let statements = self.parse_sql_statements(sql).expect(sql); + assert_eq!(statements.len(), statement_count); + + if !canonical.is_empty() && sql != canonical { + assert_eq!(self.parse_sql_statements(canonical).unwrap(), statements); + } else { + assert_eq!( + sql, + statements + .iter() + .map(|s| s.to_string()) + .collect::>() + .join("; ") + ); + } + + statements + } + /// Ensures that `sql` parses as an [`Expr`], and that /// re-serializing the parse result produces canonical pub fn expr_parses_to(&self, sql: &str, canonical: &str) -> Expr { diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index ffce101e1..1c96f1ce6 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -2161,28 +2161,27 @@ fn parse_print() { #[test] fn parse_mssql_go_keyword() { let single_go_keyword = "USE some_database;\nGO"; - let stmts = ms().parse_sql_statements(single_go_keyword).unwrap(); - assert_eq!(stmts.len(), 2); - assert_eq!(stmts[1], Statement::Go(GoStatement { count: None }),); + let stmts = ms().multiple_statements_parse_to(single_go_keyword, 2, "USE some_database\nGO"); + assert_eq!(stmts[1], Statement::Go(GoStatement { count: None })); let go_with_count = "SELECT 1;\nGO 5"; - let stmts = ms().parse_sql_statements(go_with_count).unwrap(); - assert_eq!(stmts.len(), 2); + let stmts = ms().multiple_statements_parse_to(go_with_count, 2, "SELECT 1\nGO 5"); assert_eq!(stmts[1], Statement::Go(GoStatement { count: Some(5) })); let go_statement_delimiter = "SELECT 1\nGO"; - let stmts = ms().parse_sql_statements(go_statement_delimiter).unwrap(); - assert_eq!(stmts.len(), 2); + let stmts = ms().multiple_statements_parse_to(go_statement_delimiter, 2, "SELECT 1; \nGO"); assert_eq!(stmts[1], Statement::Go(GoStatement { count: None })); let bare_go = "GO"; - let stmts = ms().parse_sql_statements(bare_go).unwrap(); - assert_eq!(stmts.len(), 1); - assert_eq!(stmts[0], Statement::Go(GoStatement { count: None })); + let stmt = ms().one_statement_parses_to(bare_go, "GO"); + assert_eq!(stmt, Statement::Go(GoStatement { count: None })); let go_then_statements = "/* whitespace */ GO\nRAISERROR('This is a test', 16, 1);"; - let stmts = ms().parse_sql_statements(go_then_statements).unwrap(); - assert_eq!(stmts.len(), 2); + let stmts = ms().multiple_statements_parse_to( + go_then_statements, + 2, + "GO\nRAISERROR('This is a test', 16, 1)", + ); assert_eq!(stmts[0], Statement::Go(GoStatement { count: None })); assert_eq!( stmts[1], @@ -2198,41 +2197,45 @@ fn parse_mssql_go_keyword() { ); let multiple_gos = "SELECT 1;\nGO 5\nSELECT 2;\n GO"; - let stmts = ms().parse_sql_statements(multiple_gos).unwrap(); - assert_eq!(stmts.len(), 4); + let stmts = ms().multiple_statements_parse_to(multiple_gos, 4, "SELECT 1\nGO 5\nSELECT 2\nGO"); assert_eq!(stmts[1], Statement::Go(GoStatement { count: Some(5) })); assert_eq!(stmts[3], Statement::Go(GoStatement { count: None })); let single_line_comment_preceding_go = "USE some_database; -- okay\nGO"; - let stmts = ms() - .parse_sql_statements(single_line_comment_preceding_go) - .unwrap(); - assert_eq!(stmts.len(), 2); + let stmts = ms().multiple_statements_parse_to( + single_line_comment_preceding_go, + 2, + "USE some_database\nGO", + ); assert_eq!(stmts[1], Statement::Go(GoStatement { count: None })); let multi_line_comment_preceding_go = "USE some_database; /* okay */\nGO"; - let stmts = ms() - .parse_sql_statements(multi_line_comment_preceding_go) - .unwrap(); - assert_eq!(stmts.len(), 2); + let stmts = ms().multiple_statements_parse_to( + multi_line_comment_preceding_go, + 2, + "USE some_database\nGO", + ); assert_eq!(stmts[1], Statement::Go(GoStatement { count: None })); let single_line_comment_following_go = "USE some_database;\nGO -- okay"; - let stmts = ms().parse_sql_statements(single_line_comment_following_go).unwrap(); - assert_eq!(stmts.len(), 2); + let stmts = ms().multiple_statements_parse_to( + single_line_comment_following_go, + 2, + "USE some_database\nGO", + ); assert_eq!(stmts[1], Statement::Go(GoStatement { count: None })); let multi_line_comment_following = "USE some_database;\nGO/* okay */42"; - let stmts = ms() - .parse_sql_statements(multi_line_comment_following) - .unwrap(); - assert_eq!(stmts.len(), 2); + let stmts = ms().multiple_statements_parse_to( + multi_line_comment_following, + 2, + "USE some_database\nGO 42", + ); assert_eq!(stmts[1], Statement::Go(GoStatement { count: Some(42) })); let actually_column_alias = "SELECT NULL GO"; - let stmts = ms().parse_sql_statements(actually_column_alias).unwrap(); - assert_eq!(stmts.len(), 1); - match &stmts[0] { + let stmt = ms().one_statement_parses_to(actually_column_alias, "SELECT NULL AS GO"); + match &stmt { Statement::Query(query) => { let select = query.body.as_select().unwrap(); assert_eq!( From 2b282755ebc877d7b604447c77ca1b05f11a74dd Mon Sep 17 00:00:00 2001 From: Andrew Harper Date: Fri, 18 Apr 2025 14:21:10 -0400 Subject: [PATCH 08/16] Extract common logic to look back for a newline to a helper --- src/dialect/mssql.rs | 30 ++++++----------- src/parser/mod.rs | 69 +++++++++++++++++++++++----------------- tests/sqlparser_mssql.rs | 2 +- 3 files changed, 49 insertions(+), 52 deletions(-) diff --git a/src/dialect/mssql.rs b/src/dialect/mssql.rs index be8d2dcad..923f4c488 100644 --- a/src/dialect/mssql.rs +++ b/src/dialect/mssql.rs @@ -22,7 +22,7 @@ use crate::ast::{ use crate::dialect::Dialect; use crate::keywords::{self, Keyword}; use crate::parser::{Parser, ParserError}; -use crate::tokenizer::{Token, Whitespace}; +use crate::tokenizer::Token; #[cfg(not(feature = "std"))] use alloc::{vec, vec::Vec}; @@ -119,26 +119,14 @@ impl Dialect for MsSqlDialect { } fn is_column_alias(&self, kw: &Keyword, parser: &mut Parser) -> bool { - // if: - // - keyword is `GO`, and - // - looking backwards there's only (any) whitespace preceded by a newline - // then: `GO` iSN'T a column alias - if kw == &Keyword::GO { - let mut look_back_count = 2; - loop { - let prev_index = parser.index().saturating_sub(look_back_count); - if prev_index == 0 { - break; - } - let prev_token = parser.token_at(prev_index); - match prev_token.token { - Token::Whitespace(ref w) => match w { - Whitespace::Newline => return false, - _ => look_back_count += 1, - }, - _ => break, - }; - } + // if we find maybe whitespace then a newline looking backward, then `GO` ISN'T a column alias + // if we can't find a newline then we assume that `GO` IS a column alias + if kw == &Keyword::GO + && parser + .expect_previously_only_whitespace_until_newline() + .is_ok() + { + return false; } !keywords::RESERVED_FOR_COLUMN_ALIAS.contains(kw) && !RESERVED_FOR_COLUMN_ALIAS.contains(kw) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6fc404ce4..d825d5eb2 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4065,6 +4065,44 @@ impl<'a> Parser<'a> { ) } + /// Look backwards in the token stream and expect that there was only whitespace tokens until the previous newline + pub fn expect_previously_only_whitespace_until_newline(&mut self) -> Result<(), ParserError> { + let mut look_back_count = 2; + loop { + let prev_index = self.index.saturating_sub(look_back_count); + if prev_index == 0 { + break; + } + let prev_token = self.token_at(prev_index); + match prev_token.token { + Token::Whitespace(ref w) => match w { + Whitespace::Newline => break, + // special consideration required for single line comments since that string includes the newline + Whitespace::SingleLineComment { comment, prefix: _ } => { + if comment.ends_with('\n') { + break; + } + look_back_count += 1; + } + _ => look_back_count += 1, + }, + _ => { + let current_token = self.get_current_token(); + if prev_token == current_token { + // if we are at the start of the statement, we can skip this check + break; + } + + self.expected( + &format!("newline before current token ({})", current_token), + prev_token.clone(), + )? + } + }; + } + Ok(()) + } + /// If the current token is the `expected` keyword, consume it and returns /// true. Otherwise, no tokens are consumed and returns false. #[must_use] @@ -15242,36 +15280,7 @@ impl<'a> Parser<'a> { /// Parse [Statement::Go] fn parse_go(&mut self) -> Result { - // previous token should be a newline (skipping non-newline whitespace) - // see also, `previous_token` - let mut look_back_count = 2; - loop { - let prev_index = self.index.saturating_sub(look_back_count); - if prev_index == 0 { - break; - } - let prev_token = self.token_at(prev_index); - match prev_token.token { - Token::Whitespace(ref w) => match w { - Whitespace::Newline => break, - Whitespace::SingleLineComment { comment, prefix: _ } => { - if comment.ends_with('\n') { - break; - } - look_back_count += 1; - } - _ => look_back_count += 1, - }, - _ => { - if prev_token == self.get_current_token() { - // if we are at the start of the statement, we can skip this check - break; - } - - self.expected("newline before GO", prev_token.clone())? - } - }; - } + self.expect_previously_only_whitespace_until_newline()?; let count = loop { // using this peek function because we want to halt this statement parsing upon newline diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 1c96f1ce6..6d2d931d1 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -2253,7 +2253,7 @@ fn parse_mssql_go_keyword() { let err = ms().parse_sql_statements(invalid_go_position); assert_eq!( err.unwrap_err().to_string(), - "sql parser error: Expected: newline before GO, found: ;" + "sql parser error: Expected: newline before current token (GO), found: ;" ); let invalid_go_count = "SELECT 1\nGO x"; From bc55f6f94dee88b75379fac3c63db062411df5bd Mon Sep 17 00:00:00 2001 From: Andrew Harper Date: Mon, 21 Apr 2025 13:45:01 -0400 Subject: [PATCH 09/16] Rename `multiple_statements_parse_to` -> `statements_parse_to` --- src/test_utils.rs | 4 ++-- tests/sqlparser_mssql.rs | 38 +++++++++++++------------------------- 2 files changed, 15 insertions(+), 27 deletions(-) diff --git a/src/test_utils.rs b/src/test_utils.rs index dc91d0b88..bfd6d3d25 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -152,7 +152,7 @@ impl TestedDialects { /// 2. re-serializing the result of parsing `sql` produces the same /// `canonical` sql string /// - /// For multiple statements, use [`multiple_statements_parse_to`]. + /// For multiple statements, use [`statements_parse_to`]. pub fn one_statement_parses_to(&self, sql: &str, canonical: &str) -> Statement { let mut statements = self.parse_sql_statements(sql).expect(sql); assert_eq!(statements.len(), 1); @@ -169,7 +169,7 @@ impl TestedDialects { } /// The same as [`one_statement_parses_to`] but it works for a multiple statements - pub fn multiple_statements_parse_to( + pub fn statements_parse_to( &self, sql: &str, statement_count: usize, diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 6d2d931d1..ef3bae975 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -2161,15 +2161,15 @@ fn parse_print() { #[test] fn parse_mssql_go_keyword() { let single_go_keyword = "USE some_database;\nGO"; - let stmts = ms().multiple_statements_parse_to(single_go_keyword, 2, "USE some_database\nGO"); + let stmts = ms().statements_parse_to(single_go_keyword, 2, "USE some_database\nGO"); assert_eq!(stmts[1], Statement::Go(GoStatement { count: None })); let go_with_count = "SELECT 1;\nGO 5"; - let stmts = ms().multiple_statements_parse_to(go_with_count, 2, "SELECT 1\nGO 5"); + let stmts = ms().statements_parse_to(go_with_count, 2, "SELECT 1\nGO 5"); assert_eq!(stmts[1], Statement::Go(GoStatement { count: Some(5) })); let go_statement_delimiter = "SELECT 1\nGO"; - let stmts = ms().multiple_statements_parse_to(go_statement_delimiter, 2, "SELECT 1; \nGO"); + let stmts = ms().statements_parse_to(go_statement_delimiter, 2, "SELECT 1; \nGO"); assert_eq!(stmts[1], Statement::Go(GoStatement { count: None })); let bare_go = "GO"; @@ -2177,7 +2177,7 @@ fn parse_mssql_go_keyword() { assert_eq!(stmt, Statement::Go(GoStatement { count: None })); let go_then_statements = "/* whitespace */ GO\nRAISERROR('This is a test', 16, 1);"; - let stmts = ms().multiple_statements_parse_to( + let stmts = ms().statements_parse_to( go_then_statements, 2, "GO\nRAISERROR('This is a test', 16, 1)", @@ -2197,40 +2197,28 @@ fn parse_mssql_go_keyword() { ); let multiple_gos = "SELECT 1;\nGO 5\nSELECT 2;\n GO"; - let stmts = ms().multiple_statements_parse_to(multiple_gos, 4, "SELECT 1\nGO 5\nSELECT 2\nGO"); + let stmts = ms().statements_parse_to(multiple_gos, 4, "SELECT 1\nGO 5\nSELECT 2\nGO"); assert_eq!(stmts[1], Statement::Go(GoStatement { count: Some(5) })); assert_eq!(stmts[3], Statement::Go(GoStatement { count: None })); let single_line_comment_preceding_go = "USE some_database; -- okay\nGO"; - let stmts = ms().multiple_statements_parse_to( - single_line_comment_preceding_go, - 2, - "USE some_database\nGO", - ); + let stmts = + ms().statements_parse_to(single_line_comment_preceding_go, 2, "USE some_database\nGO"); assert_eq!(stmts[1], Statement::Go(GoStatement { count: None })); let multi_line_comment_preceding_go = "USE some_database; /* okay */\nGO"; - let stmts = ms().multiple_statements_parse_to( - multi_line_comment_preceding_go, - 2, - "USE some_database\nGO", - ); + let stmts = + ms().statements_parse_to(multi_line_comment_preceding_go, 2, "USE some_database\nGO"); assert_eq!(stmts[1], Statement::Go(GoStatement { count: None })); let single_line_comment_following_go = "USE some_database;\nGO -- okay"; - let stmts = ms().multiple_statements_parse_to( - single_line_comment_following_go, - 2, - "USE some_database\nGO", - ); + let stmts = + ms().statements_parse_to(single_line_comment_following_go, 2, "USE some_database\nGO"); assert_eq!(stmts[1], Statement::Go(GoStatement { count: None })); let multi_line_comment_following = "USE some_database;\nGO/* okay */42"; - let stmts = ms().multiple_statements_parse_to( - multi_line_comment_following, - 2, - "USE some_database\nGO 42", - ); + let stmts = + ms().statements_parse_to(multi_line_comment_following, 2, "USE some_database\nGO 42"); assert_eq!(stmts[1], Statement::Go(GoStatement { count: Some(42) })); let actually_column_alias = "SELECT NULL GO"; From d2b15dfc5f64e48f71ac3bf52dc3d5b2deb5aafd Mon Sep 17 00:00:00 2001 From: Andrew Harper Date: Mon, 21 Apr 2025 13:46:05 -0400 Subject: [PATCH 10/16] Introduce `peek_prev_nth_token_no_skip` helper - and use it for `expect_previously_only_whitespace_until_newline` so that can be simplified accordingly --- src/parser/mod.rs | 79 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 59 insertions(+), 20 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index d825d5eb2..826eeb1be 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3949,6 +3949,26 @@ impl<'a> Parser<'a> { }) } + /// Return nth previous token, possibly whitespace + /// (or [`Token::EOF`] when before the beginning of the stream). + pub fn peek_prev_nth_token_no_skip(&self, n: usize) -> TokenWithSpan { + // 0 = next token, -1 = current token, -2 = previous token + let peek_index = self.index.saturating_sub(1).saturating_sub(n); + if peek_index == 0 { + return TokenWithSpan { + token: Token::EOF, + span: Span::empty(), + }; + } + self.tokens + .get(peek_index) + .cloned() + .unwrap_or(TokenWithSpan { + token: Token::EOF, + span: Span::empty(), + }) + } + /// Return true if the next tokens exactly `expected` /// /// Does not advance the current token. @@ -4065,16 +4085,15 @@ impl<'a> Parser<'a> { ) } - /// Look backwards in the token stream and expect that there was only whitespace tokens until the previous newline - pub fn expect_previously_only_whitespace_until_newline(&mut self) -> Result<(), ParserError> { - let mut look_back_count = 2; + /// Look backwards in the token stream and expect that there was only whitespace tokens until the previous newline or beginning of string + pub(crate) fn expect_previously_only_whitespace_until_newline( + &mut self, + ) -> Result<(), ParserError> { + let mut look_back_count = 1; loop { - let prev_index = self.index.saturating_sub(look_back_count); - if prev_index == 0 { - break; - } - let prev_token = self.token_at(prev_index); + let prev_token = self.peek_prev_nth_token_no_skip(look_back_count); match prev_token.token { + Token::EOF => break, Token::Whitespace(ref w) => match w { Whitespace::Newline => break, // special consideration required for single line comments since that string includes the newline @@ -4086,18 +4105,13 @@ impl<'a> Parser<'a> { } _ => look_back_count += 1, }, - _ => { - let current_token = self.get_current_token(); - if prev_token == current_token { - // if we are at the start of the statement, we can skip this check - break; - } - - self.expected( - &format!("newline before current token ({})", current_token), - prev_token.clone(), - )? - } + _ => self.expected( + &format!( + "newline before current token ({})", + self.get_current_token() + ), + prev_token.clone(), + )?, }; } Ok(()) @@ -15540,6 +15554,31 @@ mod tests { }) } + #[test] + fn test_peek_prev_nth_token_no_skip() { + all_dialects().run_parser_method( + "SELECT 1;\n-- a comment\nRAISERROR('test', 16, 0);", + |parser| { + parser.index = 1; + assert_eq!(parser.peek_prev_nth_token_no_skip(0), Token::EOF); + assert_eq!(parser.index, 1); + parser.index = 7; + assert_eq!( + parser.token_at(parser.index - 1).token, + Token::Word(Word { + value: "RAISERROR".to_string(), + quote_style: None, + keyword: Keyword::RAISERROR, + }) + ); + assert_eq!( + parser.peek_prev_nth_token_no_skip(2), + Token::Whitespace(Whitespace::Newline) + ); + }, + ); + } + #[cfg(test)] mod test_parse_data_type { use crate::ast::{ From 827dfe7c363d06ce19f8d5ff2b1bfc5459ea403e Mon Sep 17 00:00:00 2001 From: Andrew Harper Date: Mon, 21 Apr 2025 14:03:07 -0400 Subject: [PATCH 11/16] Add clarification for whitespace sensitivity parsing `GO` --- src/parser/mod.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 826eeb1be..ecd2b988c 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -15294,6 +15294,16 @@ impl<'a> Parser<'a> { /// Parse [Statement::Go] fn parse_go(&mut self) -> Result { + // disambiguate between GO as batch delimiter & GO as identifier (etc) + // compare: + // ```sql + // select 1 go + // ``` + // vs + // ```sql + // select 1 + // go + // ``` self.expect_previously_only_whitespace_until_newline()?; let count = loop { From 1c01035d3401c06f3117c8508cbd7b50feebdc0c Mon Sep 17 00:00:00 2001 From: Andrew Harper Date: Wed, 23 Apr 2025 17:29:01 -0400 Subject: [PATCH 12/16] Fix `GO` failing to find following newline --- src/parser/mod.rs | 26 +++++++++++++++++++++----- tests/sqlparser_mssql.rs | 13 +++++++++++++ 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index ecd2b988c..1e578fa85 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -15324,11 +15324,27 @@ impl<'a> Parser<'a> { }; }; - if self.peek_token().token == Token::SemiColon { - parser_err!( - "GO may not end with a semicolon", - self.peek_token().span.start - )?; + loop { + let next_token = self.peek_token_no_skip(); + match next_token.token { + Token::EOF => break, + Token::Whitespace(ref w) => match w { + Whitespace::Newline => break, + Whitespace::SingleLineComment { comment, prefix: _ } => { + if comment.ends_with('\n') { + break; + } + _ = self.next_token_no_skip(); + } + _ => _ = self.next_token_no_skip(), + }, + _ => { + parser_err!( + "GO must be followed by a newline or EOF", + self.peek_token().span.start + )?; + } + }; } Ok(Statement::Go(GoStatement { count })) diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index ef3bae975..7ddd17619 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -2221,6 +2221,12 @@ fn parse_mssql_go_keyword() { ms().statements_parse_to(multi_line_comment_following, 2, "USE some_database\nGO 42"); assert_eq!(stmts[1], Statement::Go(GoStatement { count: Some(42) })); + let cte_following_go = + "USE some_database;\nGO\n;WITH cte AS (\nSELECT 1 x\n)\nSELECT * FROM cte;"; + let stmts = ms().parse_sql_statements(cte_following_go).unwrap(); + assert_eq!(stmts.len(), 3); + assert_eq!(stmts[1], Statement::Go(GoStatement { count: None })); + let actually_column_alias = "SELECT NULL GO"; let stmt = ms().one_statement_parses_to(actually_column_alias, "SELECT NULL AS GO"); match &stmt { @@ -2250,6 +2256,13 @@ fn parse_mssql_go_keyword() { err.unwrap_err().to_string(), "sql parser error: Expected: literal int or newline, found: x" ); + + let invalid_go_delimiter = "SELECT 1\nGO;"; + let err = ms().parse_sql_statements(invalid_go_delimiter); + assert_eq!( + err.unwrap_err().to_string(), + "sql parser error: Expected: literal int or newline, found: ;" + ); } #[test] From b46fefc4e548dca995db3d8d124c30631ce76c66 Mon Sep 17 00:00:00 2001 From: Andrew Harper Date: Wed, 23 Apr 2025 17:30:18 -0400 Subject: [PATCH 13/16] Make `peek_prev_nth_token_no_skip` -> `_ref`, reduce visibility to crate --- src/parser/mod.rs | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 1e578fa85..0596c22bf 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3951,22 +3951,13 @@ impl<'a> Parser<'a> { /// Return nth previous token, possibly whitespace /// (or [`Token::EOF`] when before the beginning of the stream). - pub fn peek_prev_nth_token_no_skip(&self, n: usize) -> TokenWithSpan { + pub(crate) fn peek_prev_nth_token_no_skip_ref(&self, n: usize) -> &TokenWithSpan { // 0 = next token, -1 = current token, -2 = previous token let peek_index = self.index.saturating_sub(1).saturating_sub(n); if peek_index == 0 { - return TokenWithSpan { - token: Token::EOF, - span: Span::empty(), - }; + return &EOF_TOKEN; } - self.tokens - .get(peek_index) - .cloned() - .unwrap_or(TokenWithSpan { - token: Token::EOF, - span: Span::empty(), - }) + self.tokens.get(peek_index).unwrap_or(&EOF_TOKEN) } /// Return true if the next tokens exactly `expected` @@ -4091,7 +4082,7 @@ impl<'a> Parser<'a> { ) -> Result<(), ParserError> { let mut look_back_count = 1; loop { - let prev_token = self.peek_prev_nth_token_no_skip(look_back_count); + let prev_token = self.peek_prev_nth_token_no_skip_ref(look_back_count); match prev_token.token { Token::EOF => break, Token::Whitespace(ref w) => match w { @@ -15581,12 +15572,12 @@ mod tests { } #[test] - fn test_peek_prev_nth_token_no_skip() { + fn test_peek_prev_nth_token_no_skip_ref() { all_dialects().run_parser_method( "SELECT 1;\n-- a comment\nRAISERROR('test', 16, 0);", |parser| { parser.index = 1; - assert_eq!(parser.peek_prev_nth_token_no_skip(0), Token::EOF); + assert_eq!(parser.peek_prev_nth_token_no_skip_ref(0), &Token::EOF); assert_eq!(parser.index, 1); parser.index = 7; assert_eq!( @@ -15598,8 +15589,8 @@ mod tests { }) ); assert_eq!( - parser.peek_prev_nth_token_no_skip(2), - Token::Whitespace(Whitespace::Newline) + parser.peek_prev_nth_token_no_skip_ref(2), + &Token::Whitespace(Whitespace::Newline) ); }, ); From 7863beff0087994d3c36b54f2a29caeea10b5932 Mon Sep 17 00:00:00 2001 From: Andrew Harper Date: Wed, 23 Apr 2025 17:56:32 -0400 Subject: [PATCH 14/16] Refactor `expect_previously_only_whitespace_until_newline` to return a bool - in `parse_go` we don't have a direct way to find what that previous non-whitespace token was, so the error message is adjusted accordingly --- src/dialect/mssql.rs | 6 +----- src/parser/mod.rs | 26 +++++++++++--------------- tests/sqlparser_mssql.rs | 2 +- 3 files changed, 13 insertions(+), 21 deletions(-) diff --git a/src/dialect/mssql.rs b/src/dialect/mssql.rs index 923f4c488..3a0861f21 100644 --- a/src/dialect/mssql.rs +++ b/src/dialect/mssql.rs @@ -121,11 +121,7 @@ impl Dialect for MsSqlDialect { fn is_column_alias(&self, kw: &Keyword, parser: &mut Parser) -> bool { // if we find maybe whitespace then a newline looking backward, then `GO` ISN'T a column alias // if we can't find a newline then we assume that `GO` IS a column alias - if kw == &Keyword::GO - && parser - .expect_previously_only_whitespace_until_newline() - .is_ok() - { + if kw == &Keyword::GO && parser.prev_only_whitespace_until_newline() { return false; } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 0596c22bf..5b69b2fd7 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4077,35 +4077,26 @@ impl<'a> Parser<'a> { } /// Look backwards in the token stream and expect that there was only whitespace tokens until the previous newline or beginning of string - pub(crate) fn expect_previously_only_whitespace_until_newline( - &mut self, - ) -> Result<(), ParserError> { + pub(crate) fn prev_only_whitespace_until_newline(&mut self) -> bool { let mut look_back_count = 1; loop { let prev_token = self.peek_prev_nth_token_no_skip_ref(look_back_count); match prev_token.token { - Token::EOF => break, + Token::EOF => break true, Token::Whitespace(ref w) => match w { - Whitespace::Newline => break, + Whitespace::Newline => break true, // special consideration required for single line comments since that string includes the newline Whitespace::SingleLineComment { comment, prefix: _ } => { if comment.ends_with('\n') { - break; + break true; } look_back_count += 1; } _ => look_back_count += 1, }, - _ => self.expected( - &format!( - "newline before current token ({})", - self.get_current_token() - ), - prev_token.clone(), - )?, + _ => break false, }; } - Ok(()) } /// If the current token is the `expected` keyword, consume it and returns @@ -15295,7 +15286,12 @@ impl<'a> Parser<'a> { // select 1 // go // ``` - self.expect_previously_only_whitespace_until_newline()?; + if !self.prev_only_whitespace_until_newline() { + parser_err!( + "GO may only be preceded by whitespace on a line", + self.peek_token().span.start + )?; + } let count = loop { // using this peek function because we want to halt this statement parsing upon newline diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 7ddd17619..7730695b8 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -2247,7 +2247,7 @@ fn parse_mssql_go_keyword() { let err = ms().parse_sql_statements(invalid_go_position); assert_eq!( err.unwrap_err().to_string(), - "sql parser error: Expected: newline before current token (GO), found: ;" + "sql parser error: GO may only be preceded by whitespace on a line" ); let invalid_go_count = "SELECT 1\nGO x"; From 6726d421bfcd704036d72a4a7a89df2d47d90d39 Mon Sep 17 00:00:00 2001 From: Andrew Harper Date: Wed, 23 Apr 2025 17:58:23 -0400 Subject: [PATCH 15/16] Rewind the current token before `parse_go` --- src/parser/mod.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 5b69b2fd7..923f6d745 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -627,7 +627,10 @@ impl<'a> Parser<'a> { Keyword::COMMENT if self.dialect.supports_comment_on() => self.parse_comment(), Keyword::PRINT => self.parse_print(), Keyword::RETURN => self.parse_return(), - Keyword::GO => self.parse_go(), + Keyword::GO => { + self.prev_token(); + self.parse_go() + } _ => self.expected("an SQL statement", next_token), }, Token::LParen => { @@ -15276,6 +15279,8 @@ impl<'a> Parser<'a> { /// Parse [Statement::Go] fn parse_go(&mut self) -> Result { + self.expect_keyword_is(Keyword::GO)?; + // disambiguate between GO as batch delimiter & GO as identifier (etc) // compare: // ```sql From fb41bfc6a52e4563f860040a7d7be5bfa78b2f43 Mon Sep 17 00:00:00 2001 From: Andrew Harper Date: Wed, 23 Apr 2025 18:11:14 -0400 Subject: [PATCH 16/16] Revert unnecessary conditional logic --- src/parser/mod.rs | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 923f6d745..e33dfa2bf 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -488,17 +488,8 @@ impl<'a> Parser<'a> { } let statement = self.parse_statement()?; - expecting_statement_delimiter = match &statement { - Statement::If(s) => match &s.if_block.conditional_statements { - // the `END` keyword doesn't need to be followed by a statement delimiter, so it shouldn't be expected here - ConditionalStatements::BeginEnd { .. } => false, - // parsing the statement sequence consumes the statement delimiter, so it shouldn't be expected here - ConditionalStatements::Sequence { .. } => false, - }, - // Treat batch delimiter as an end of statement, so no additional statement delimiter expected here - Statement::Go(_) => false, - _ => true, - }; + // Treat batch delimiter as an end of statement, so no additional statement delimiter expected here + expecting_statement_delimiter = !matches!(statement, Statement::Go(_)); stmts.push(statement); } Ok(stmts)