diff --git a/src/ast/mod.rs b/src/ast/mod.rs index a2c28c810..03a64ed58 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -2977,7 +2977,12 @@ impl fmt::Display for Statement { Some(HiveRowFormat::SERDE { class }) => { write!(f, " ROW FORMAT SERDE '{class}'")? } - Some(HiveRowFormat::DELIMITED) => write!(f, " ROW FORMAT DELIMITED")?, + Some(HiveRowFormat::DELIMITED { delimiters }) => { + write!(f, " ROW FORMAT DELIMITED")?; + if !delimiters.is_empty() { + write!(f, " {}", display_separated(delimiters, " "))?; + } + } None => (), } match storage { @@ -4566,7 +4571,48 @@ pub enum HiveDistributionStyle { #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum HiveRowFormat { SERDE { class: String }, - DELIMITED, + DELIMITED { delimiters: Vec }, +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct HiveRowDelimiter { + pub delimiter: HiveDelimiter, + pub char: Ident, +} + +impl fmt::Display for HiveRowDelimiter { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{} ", self.delimiter)?; + write!(f, "{}", self.char) + } +} + +#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum HiveDelimiter { + FieldsTerminatedBy, + FieldsEscapedBy, + CollectionItemsTerminatedBy, + MapKeysTerminatedBy, + LinesTerminatedBy, + NullDefinedAs, +} + +impl fmt::Display for HiveDelimiter { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use HiveDelimiter::*; + f.write_str(match self { + FieldsTerminatedBy => "FIELDS TERMINATED BY", + FieldsEscapedBy => "ESCAPED BY", + CollectionItemsTerminatedBy => "COLLECTION ITEMS TERMINATED BY", + MapKeysTerminatedBy => "MAP KEYS TERMINATED BY", + LinesTerminatedBy => "LINES TERMINATED BY", + NullDefinedAs => "NULL DEFINED AS", + }) + } } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] diff --git a/src/keywords.rs b/src/keywords.rs index f14b92b76..6c47bf4cc 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -153,6 +153,7 @@ define_keywords!( COLLATE, COLLATION, COLLECT, + COLLECTION, COLUMN, COLUMNS, COMMENT, @@ -212,6 +213,7 @@ define_keywords!( DEFAULT, DEFERRABLE, DEFERRED, + DEFINED, DELAYED, DELETE, DELIMITED, @@ -258,6 +260,7 @@ define_keywords!( EQUALS, ERROR, ESCAPE, + ESCAPED, EVENT, EVERY, EXCEPT, @@ -366,6 +369,7 @@ define_keywords!( ISOLATION, ISOWEEK, ISOYEAR, + ITEMS, JAR, JOIN, JSON, @@ -374,6 +378,7 @@ define_keywords!( JSON_TABLE, JULIAN, KEY, + KEYS, KILL, LAG, LANGUAGE, @@ -388,6 +393,7 @@ define_keywords!( LIKE, LIKE_REGEX, LIMIT, + LINES, LISTAGG, LN, LOCAL, @@ -402,6 +408,7 @@ define_keywords!( LOW_PRIORITY, MACRO, MANAGEDLOCATION, + MAP, MATCH, MATCHED, MATERIALIZED, @@ -648,6 +655,7 @@ define_keywords!( TBLPROPERTIES, TEMP, TEMPORARY, + TERMINATED, TEXT, TEXTFILE, THEN, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 36ac2fd28..b63346ff8 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4128,7 +4128,92 @@ impl<'a> Parser<'a> { let class = self.parse_literal_string()?; Ok(HiveRowFormat::SERDE { class }) } - _ => Ok(HiveRowFormat::DELIMITED), + _ => { + let mut row_delimiters = vec![]; + + loop { + match self.parse_one_of_keywords(&[ + Keyword::FIELDS, + Keyword::COLLECTION, + Keyword::MAP, + Keyword::LINES, + Keyword::NULL, + ]) { + Some(Keyword::FIELDS) => { + if self.parse_keywords(&[Keyword::TERMINATED, Keyword::BY]) { + row_delimiters.push(HiveRowDelimiter { + delimiter: HiveDelimiter::FieldsTerminatedBy, + char: self.parse_identifier(false)?, + }); + + if self.parse_keywords(&[Keyword::ESCAPED, Keyword::BY]) { + row_delimiters.push(HiveRowDelimiter { + delimiter: HiveDelimiter::FieldsEscapedBy, + char: self.parse_identifier(false)?, + }); + } + } else { + break; + } + } + Some(Keyword::COLLECTION) => { + if self.parse_keywords(&[ + Keyword::ITEMS, + Keyword::TERMINATED, + Keyword::BY, + ]) { + row_delimiters.push(HiveRowDelimiter { + delimiter: HiveDelimiter::CollectionItemsTerminatedBy, + char: self.parse_identifier(false)?, + }); + } else { + break; + } + } + Some(Keyword::MAP) => { + if self.parse_keywords(&[ + Keyword::KEYS, + Keyword::TERMINATED, + Keyword::BY, + ]) { + row_delimiters.push(HiveRowDelimiter { + delimiter: HiveDelimiter::MapKeysTerminatedBy, + char: self.parse_identifier(false)?, + }); + } else { + break; + } + } + Some(Keyword::LINES) => { + if self.parse_keywords(&[Keyword::TERMINATED, Keyword::BY]) { + row_delimiters.push(HiveRowDelimiter { + delimiter: HiveDelimiter::LinesTerminatedBy, + char: self.parse_identifier(false)?, + }); + } else { + break; + } + } + Some(Keyword::NULL) => { + if self.parse_keywords(&[Keyword::DEFINED, Keyword::AS]) { + row_delimiters.push(HiveRowDelimiter { + delimiter: HiveDelimiter::NullDefinedAs, + char: self.parse_identifier(false)?, + }); + } else { + break; + } + } + _ => { + break; + } + } + } + + Ok(HiveRowFormat::DELIMITED { + delimiters: row_delimiters, + }) + } } } diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 66eef09e1..0cd77d814 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -191,6 +191,12 @@ fn create_temp_table() { hive().one_statement_parses_to(query2, query); } +#[test] +fn create_delimited_table() { + let query = "CREATE TABLE tab (cola STRING, colb BIGINT) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' ESCAPED BY '\"' MAP KEYS TERMINATED BY '\"'"; + hive().verified_stmt(query); +} + #[test] fn create_local_directory() { let query =