diff --git a/CHANGELOG.md b/CHANGELOG.md index 23b2fdde1..d552e942f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,10 +6,21 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), Given that the parser produces a typed AST, any changes to the AST will technically be breaking and thus will result in a `0.(N+1)` version. We document changes that break via addition as "Added". ## [Unreleased] -Nothing here yet! Check https://github.com/andygrove/sqlparser-rs/commits/master for undocumented changes. +Check https://github.com/andygrove/sqlparser-rs/commits/master for undocumented changes. +### Changed +- Change `Ident` (previously a simple `String`) to store the parsed (unquoted) `value` of the identifier and the `quote_style` separately (#143) - thanks @apparebit! +- Support Snowflake's `FROM (table_name)` (#155) - thanks @eyalleshem! + +### Added +- Support MSSQL `TOP () [ PERCENT ] [ WITH TIES ]` (#150) - thanks @alexkyllo! +- Support MySQL `LIMIT row_count OFFSET offset` (not followed by `ROW` or `ROWS`) and remember which variant was parsed (#158) - thanks @mjibson! +- Support PostgreSQL `CREATE TABLE IF NOT EXISTS table_name` (#163) - thanks @alex-dukhno! + +### Fixed +- Report an error for unterminated string literals (#165) -## [0.5.0] - 2019-10-10 +## [0.5.0] - 2019-10-10 ### Changed - Replace the `Value::Long(u64)` and `Value::Double(f64)` variants with `Value::Number(String)` to avoid losing precision when parsing decimal literals (#130) - thanks @benesch! diff --git a/examples/cli.rs b/examples/cli.rs index d0ba9a578..917629e43 100644 --- a/examples/cli.rs +++ b/examples/cli.rs @@ -12,8 +12,6 @@ #![warn(clippy::all)] -use simple_logger; - ///! A small command-line app to run the parser. /// Run with `cargo run --example cli` use std::fs; diff --git a/src/parser.rs b/src/parser.rs index 0939b7e39..7c6a401d5 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1324,6 +1324,16 @@ impl Parser { } // MSSQL supports single-quoted strings as aliases for columns // We accept them as table aliases too, although MSSQL does not. + // + // Note, that this conflicts with an obscure rule from the SQL + // standard, which we don't implement: + // https://crate.io/docs/sql-99/en/latest/chapters/07.html#character-string-literal-s + // "[Obscure Rule] SQL allows you to break a long up into two or more smaller s, split by a that includes a newline + // character. When it sees such a , your DBMS will + // ignore the and treat the multiple strings as + // a single ." Some(Token::SingleQuotedString(ref s)) => Ok(Some(Ident::with_quote('\'', s.clone()))), not_an_ident => { if after_as { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 96c9535ea..86452a445 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -278,7 +278,7 @@ impl<'a> Tokenizer<'a> { match chars.peek() { Some('\'') => { // N'...' - a - let s = self.tokenize_single_quoted_string(chars); + let s = self.tokenize_single_quoted_string(chars)?; Ok(Some(Token::NationalStringLiteral(s))) } _ => { @@ -295,7 +295,7 @@ impl<'a> Tokenizer<'a> { match chars.peek() { Some('\'') => { // X'...' - a - let s = self.tokenize_single_quoted_string(chars); + let s = self.tokenize_single_quoted_string(chars)?; Ok(Some(Token::HexStringLiteral(s))) } _ => { @@ -313,7 +313,7 @@ impl<'a> Tokenizer<'a> { } // string '\'' => { - let s = self.tokenize_single_quoted_string(chars); + let s = self.tokenize_single_quoted_string(chars)?; Ok(Some(Token::SingleQuotedString(s))) } // delimited (quoted) identifier @@ -431,11 +431,10 @@ impl<'a> Tokenizer<'a> { } /// Read a single quoted string, starting with the opening quote. - fn tokenize_single_quoted_string(&self, chars: &mut Peekable>) -> String { - //TODO: handle escaped quotes in string - //TODO: handle newlines in string - //TODO: handle EOF before terminating quote - //TODO: handle 'string' 'string continuation' + fn tokenize_single_quoted_string( + &self, + chars: &mut Peekable>, + ) -> Result { let mut s = String::new(); chars.next(); // consume the opening quote while let Some(&ch) = chars.peek() { @@ -447,7 +446,7 @@ impl<'a> Tokenizer<'a> { s.push('\''); chars.next(); } else { - break; + return Ok(s); } } _ => { @@ -456,7 +455,10 @@ impl<'a> Tokenizer<'a> { } } } - s + Err(TokenizerError(format!( + "Unterminated string literal at Line: {}, Col: {}", + self.line, self.col + ))) } fn tokenize_multiline_comment( @@ -640,6 +642,31 @@ mod tests { compare(expected, tokens); } + #[test] + fn tokenize_newline_in_string_literal() { + let sql = String::from("'foo\r\nbar\nbaz'"); + + let dialect = GenericDialect {}; + let mut tokenizer = Tokenizer::new(&dialect, &sql); + let tokens = tokenizer.tokenize().unwrap(); + let expected = vec![Token::SingleQuotedString("foo\r\nbar\nbaz".to_string())]; + compare(expected, tokens); + } + + #[test] + fn tokenize_unterminated_string_literal() { + let sql = String::from("select 'foo"); + + let dialect = GenericDialect {}; + let mut tokenizer = Tokenizer::new(&dialect, &sql); + assert_eq!( + tokenizer.tokenize(), + Err(TokenizerError( + "Unterminated string literal at Line: 1, Col: 8".to_string() + )) + ); + } + #[test] fn tokenize_invalid_string_cols() { let sql = String::from("\n\nSELECT * FROM table\tمصطفىh"); diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index c62fc86d7..41ceeae54 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1031,7 +1031,7 @@ fn parse_create_external_table() { name VARCHAR(100) NOT NULL,\ lat DOUBLE NULL,\ lng DOUBLE)\ - STORED AS TEXTFILE LOCATION '/tmp/example.csv"; + STORED AS TEXTFILE LOCATION '/tmp/example.csv'"; let ast = one_statement_parses_to( sql, "CREATE EXTERNAL TABLE uk_cities (\