Skip to content

Add support for GO batch delimiter in SQL Server #1809

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4097,6 +4097,12 @@ pub enum Statement {
///
/// See [ReturnStatement]
Return(ReturnStatement),
/// Go (MsSql)
///
/// GO is not a Transact-SQL statement; it is a command recognized by various tools as a batch delimiter
///
/// See: <https://learn.microsoft.com/en-us/sql/t-sql/language-elements/sql-server-utilities-statements-go>
Go(GoStatement),
}

#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
Expand Down Expand Up @@ -5791,6 +5797,7 @@ impl fmt::Display for Statement {
Ok(())
}
Statement::Print(s) => write!(f, "{s}"),
Statement::Go(s) => write!(f, "{s}"),
Statement::Return(r) => write!(f, "{r}"),
Statement::List(command) => write!(f, "LIST {command}"),
Statement::Remove(command) => write!(f, "REMOVE {command}"),
Expand Down Expand Up @@ -9315,6 +9322,26 @@ pub enum ReturnStatementValue {
Expr(Expr),
}

/// Represents a `GO` statement.
///
/// [MsSql](https://learn.microsoft.com/en-us/sql/t-sql/language-elements/sql-server-utilities-statements-go)
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct GoStatement {
pub count: Option<u64>,
}

impl Display for GoStatement {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if let Some(count) = self.count {
write!(f, "GO {count}")
} else {
write!(f, "GO")
}
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
1 change: 1 addition & 0 deletions src/ast/spans.rs
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,7 @@ impl Spanned for Statement {
Statement::RaisError { .. } => Span::empty(),
Statement::Print { .. } => Span::empty(),
Statement::Return { .. } => Span::empty(),
Statement::Go { .. } => Span::empty(),
Statement::List(..) | Statement::Remove(..) => Span::empty(),
}
}
Expand Down
8 changes: 7 additions & 1 deletion src/dialect/mssql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,13 @@ impl Dialect for MsSqlDialect {
true
}

fn is_column_alias(&self, kw: &Keyword, _parser: &mut Parser) -> bool {
fn is_column_alias(&self, kw: &Keyword, parser: &mut Parser) -> bool {
// if we find maybe whitespace then a newline looking backward, then `GO` ISN'T a column alias
// if we can't find a newline then we assume that `GO` IS a column alias
if kw == &Keyword::GO && parser.prev_only_whitespace_until_newline() {
return false;
}

!keywords::RESERVED_FOR_COLUMN_ALIAS.contains(kw) && !RESERVED_FOR_COLUMN_ALIAS.contains(kw)
}

Expand Down
1 change: 1 addition & 0 deletions src/keywords.rs
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,7 @@ define_keywords!(
GIN,
GIST,
GLOBAL,
GO,
GRANT,
GRANTED,
GRANTS,
Expand Down
135 changes: 134 additions & 1 deletion src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,10 @@ impl<'a> Parser<'a> {
if expecting_statement_delimiter && word.keyword == Keyword::END {
break;
}

if expecting_statement_delimiter && word.keyword == Keyword::GO {
expecting_statement_delimiter = false;
}
}
_ => {}
}
Expand All @@ -484,8 +488,9 @@ impl<'a> Parser<'a> {
}

let statement = self.parse_statement()?;
// Treat batch delimiter as an end of statement, so no additional statement delimiter expected here
expecting_statement_delimiter = !matches!(statement, Statement::Go(_));
stmts.push(statement);
expecting_statement_delimiter = true;
}
Ok(stmts)
}
Expand Down Expand Up @@ -613,6 +618,10 @@ impl<'a> Parser<'a> {
Keyword::COMMENT if self.dialect.supports_comment_on() => self.parse_comment(),
Keyword::PRINT => self.parse_print(),
Keyword::RETURN => self.parse_return(),
Keyword::GO => {
self.prev_token();
self.parse_go()
}
_ => self.expected("an SQL statement", next_token),
},
Token::LParen => {
Expand Down Expand Up @@ -3934,6 +3943,17 @@ impl<'a> Parser<'a> {
})
}

/// Return nth previous token, possibly whitespace
/// (or [`Token::EOF`] when before the beginning of the stream).
pub(crate) fn peek_prev_nth_token_no_skip_ref(&self, n: usize) -> &TokenWithSpan {
// 0 = next token, -1 = current token, -2 = previous token
let peek_index = self.index.saturating_sub(1).saturating_sub(n);
if peek_index == 0 {
return &EOF_TOKEN;
}
self.tokens.get(peek_index).unwrap_or(&EOF_TOKEN)
}

/// Return true if the next tokens exactly `expected`
///
/// Does not advance the current token.
Expand Down Expand Up @@ -4050,6 +4070,29 @@ impl<'a> Parser<'a> {
)
}

/// Look backwards in the token stream and expect that there was only whitespace tokens until the previous newline or beginning of string
pub(crate) fn prev_only_whitespace_until_newline(&mut self) -> bool {
let mut look_back_count = 1;
loop {
let prev_token = self.peek_prev_nth_token_no_skip_ref(look_back_count);
match prev_token.token {
Token::EOF => break true,
Token::Whitespace(ref w) => match w {
Whitespace::Newline => break true,
// special consideration required for single line comments since that string includes the newline
Whitespace::SingleLineComment { comment, prefix: _ } => {
if comment.ends_with('\n') {
Comment on lines +4082 to +4084
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

double checking: is there a scenario where a single line comment doesn't end with a new line? spontaneously sounds like that should always hold true so that the manual newline check would not be required

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

double checking: is there a scenario where a single line comment doesn't end with a new line? spontaneously sounds like that should always hold true so that the manual newline check would not be required

I actually don't know, I was surprised to find that the newline is actually part of the comment text in this library. It seemed prudent to be defensive in this new code, since that comment parsing behavior maybe isn't particularly intentional.

break true;
}
look_back_count += 1;
}
_ => look_back_count += 1,
},
_ => break false,
};
}
}

/// If the current token is the `expected` keyword, consume it and returns
/// true. Otherwise, no tokens are consumed and returns false.
#[must_use]
Expand Down Expand Up @@ -15225,6 +15268,71 @@ impl<'a> Parser<'a> {
}
}

/// Parse [Statement::Go]
fn parse_go(&mut self) -> Result<Statement, ParserError> {
self.expect_keyword_is(Keyword::GO)?;

// disambiguate between GO as batch delimiter & GO as identifier (etc)
// compare:
// ```sql
// select 1 go
// ```
// vs
// ```sql
// select 1
// go
// ```
if !self.prev_only_whitespace_until_newline() {
parser_err!(
"GO may only be preceded by whitespace on a line",
self.peek_token().span.start
)?;
}

let count = loop {
// using this peek function because we want to halt this statement parsing upon newline
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we include the example you had in the comment earlier, explicitly highlighting why this statement is special? I think otherwise it would not be obvious to folks that come across the code later on why the current code is a special case

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I can add that comment. However there is also test coverage for this behavior, so it should be protected from future refactoring

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done 👍

let next_token = self.peek_token_no_skip();
match next_token.token {
Token::EOF => break None::<u64>,
Token::Whitespace(ref w) => match w {
Whitespace::Newline => break None,
_ => _ = self.next_token_no_skip(),
},
Token::Number(s, _) => {
let value = Some(Self::parse::<u64>(s, next_token.span.start)?);
self.advance_token();
break value;
}
_ => self.expected("literal int or newline", next_token)?,
};
};

loop {
let next_token = self.peek_token_no_skip();
match next_token.token {
Token::EOF => break,
Token::Whitespace(ref w) => match w {
Whitespace::Newline => break,
Whitespace::SingleLineComment { comment, prefix: _ } => {
if comment.ends_with('\n') {
break;
}
_ = self.next_token_no_skip();
}
_ => _ = self.next_token_no_skip(),
},
_ => {
parser_err!(
"GO must be followed by a newline or EOF",
self.peek_token().span.start
)?;
}
};
}

Ok(Statement::Go(GoStatement { count }))
}

/// Consume the parser and return its underlying token buffer
pub fn into_tokens(self) -> Vec<TokenWithSpan> {
self.tokens
Expand Down Expand Up @@ -15455,6 +15563,31 @@ mod tests {
})
}

#[test]
fn test_peek_prev_nth_token_no_skip_ref() {
all_dialects().run_parser_method(
"SELECT 1;\n-- a comment\nRAISERROR('test', 16, 0);",
|parser| {
parser.index = 1;
assert_eq!(parser.peek_prev_nth_token_no_skip_ref(0), &Token::EOF);
assert_eq!(parser.index, 1);
parser.index = 7;
assert_eq!(
parser.token_at(parser.index - 1).token,
Token::Word(Word {
value: "RAISERROR".to_string(),
quote_style: None,
keyword: Keyword::RAISERROR,
})
);
assert_eq!(
parser.peek_prev_nth_token_no_skip_ref(2),
&Token::Whitespace(Whitespace::Newline)
);
},
);
}

#[cfg(test)]
mod test_parse_data_type {
use crate::ast::{
Expand Down
28 changes: 28 additions & 0 deletions src/test_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@ impl TestedDialects {
///
/// 2. re-serializing the result of parsing `sql` produces the same
/// `canonical` sql string
///
/// For multiple statements, use [`statements_parse_to`].
pub fn one_statement_parses_to(&self, sql: &str, canonical: &str) -> Statement {
let mut statements = self.parse_sql_statements(sql).expect(sql);
assert_eq!(statements.len(), 1);
Expand All @@ -166,6 +168,32 @@ impl TestedDialects {
only_statement
}

/// The same as [`one_statement_parses_to`] but it works for a multiple statements
pub fn statements_parse_to(
&self,
sql: &str,
statement_count: usize,
canonical: &str,
) -> Vec<Statement> {
let statements = self.parse_sql_statements(sql).expect(sql);
assert_eq!(statements.len(), statement_count);

if !canonical.is_empty() && sql != canonical {
assert_eq!(self.parse_sql_statements(canonical).unwrap(), statements);
} else {
assert_eq!(
sql,
statements
.iter()
.map(|s| s.to_string())
.collect::<Vec<_>>()
.join("; ")
);
}

statements
}

/// Ensures that `sql` parses as an [`Expr`], and that
/// re-serializing the parse result produces canonical
pub fn expr_parses_to(&self, sql: &str, canonical: &str) -> Expr {
Expand Down
Loading