Skip to content

Table time travel clause support, add visit_table_factor to Visitor #951

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ pub use self::query::{
JoinConstraint, JoinOperator, LateralView, LockClause, LockType, NamedWindowDefinition,
NonBlock, Offset, OffsetRows, OrderByExpr, Query, RenameSelectItem, ReplaceSelectElement,
ReplaceSelectItem, Select, SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Table,
TableAlias, TableFactor, TableWithJoins, Top, Values, WildcardAdditionalOptions, With,
TableAlias, TableFactor, TableVersion, TableWithJoins, Top, Values, WildcardAdditionalOptions,
With,
};
pub use self::value::{
escape_quoted_string, DateTimeField, DollarQuotedString, TrimWhereField, Value,
Expand Down
24 changes: 24 additions & 0 deletions src/ast/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -646,6 +646,7 @@ impl fmt::Display for TableWithJoins {
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
#[cfg_attr(feature = "visitor", visit(with = "visit_table_factor"))]
pub enum TableFactor {
Table {
#[cfg_attr(feature = "visitor", visit(with = "visit_relation"))]
Expand All @@ -661,6 +662,9 @@ pub enum TableFactor {
args: Option<Vec<FunctionArg>>,
/// MSSQL-specific `WITH (...)` hints such as NOLOCK.
with_hints: Vec<Expr>,
/// Optional version qualifier to facilitate table time-travel, as
/// supported by BigQuery and MSSQL.
version: Option<TableVersion>,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

given it is so specific maybe this could be called system_time: Option<SystemTime> or something? I don't feel strongly

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ideally, I imagine this field would evolve into something capable of capturing other qualifiers beyond time, e.g. something like explicit version numbers as supported by delta-rs. To that end, I'd vote to keep it more generic than system_time.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps I could rename it to temporal_qualifier: Option<TemporalQualifier>,? Feels more SQL-y, and at the same time sufficiently generic such that it could also store explicit version number references (besides timestamps and intervals).

},
Derived {
lateral: bool,
Expand Down Expand Up @@ -720,6 +724,7 @@ impl fmt::Display for TableFactor {
alias,
args,
with_hints,
version,
} => {
write!(f, "{name}")?;
if let Some(args) = args {
Expand All @@ -731,6 +736,9 @@ impl fmt::Display for TableFactor {
if !with_hints.is_empty() {
write!(f, " WITH ({})", display_comma_separated(with_hints))?;
}
if let Some(version) = version {
write!(f, "{version}")?;
}
Ok(())
}
TableFactor::Derived {
Expand Down Expand Up @@ -835,6 +843,22 @@ impl fmt::Display for TableAlias {
}
}

#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum TableVersion {
ForSystemTimeAsOf(Expr),
}

impl Display for TableVersion {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
TableVersion::ForSystemTimeAsOf(e) => write!(f, " FOR SYSTEM_TIME AS OF {e}")?,
}
Ok(())
}
}

#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
Expand Down
82 changes: 73 additions & 9 deletions src/ast/visitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

//! Recursive visitors for ast Nodes. See [`Visitor`] for more details.

use crate::ast::{Expr, ObjectName, Statement};
use crate::ast::{Expr, ObjectName, Statement, TableFactor};
use core::ops::ControlFlow;

/// A type that can be visited by a [`Visitor`]. See [`Visitor`] for
Expand Down Expand Up @@ -115,8 +115,8 @@ visit_noop!(bigdecimal::BigDecimal);

/// A visitor that can be used to walk an AST tree.
///
/// `previst_` methods are invoked before visiting all children of the
/// node and `postvisit_` methods are invoked after visiting all
/// `pre_visit_` methods are invoked before visiting all children of the
/// node and `post_visit_` methods are invoked after visiting all
/// children of the node.
///
/// # See also
Expand All @@ -139,7 +139,7 @@ visit_noop!(bigdecimal::BigDecimal);
/// }
///
/// // Visit relations and exprs before children are visited (depth first walk)
/// // Note you can also visit statements and visit exprs after children have been visitoed
/// // Note you can also visit statements and visit exprs after children have been visited
/// impl Visitor for V {
/// type Break = ();
///
Expand Down Expand Up @@ -189,6 +189,16 @@ pub trait Visitor {
ControlFlow::Continue(())
}

/// Invoked for any table factors that appear in the AST before visiting children
fn pre_visit_table_factor(&mut self, _table_factor: &TableFactor) -> ControlFlow<Self::Break> {
ControlFlow::Continue(())
}

/// Invoked for any table factors that appear in the AST after visiting children
fn post_visit_table_factor(&mut self, _table_factor: &TableFactor) -> ControlFlow<Self::Break> {
ControlFlow::Continue(())
}

/// Invoked for any expressions that appear in the AST before visiting children
fn pre_visit_expr(&mut self, _expr: &Expr) -> ControlFlow<Self::Break> {
ControlFlow::Continue(())
Expand All @@ -212,8 +222,8 @@ pub trait Visitor {

/// A visitor that can be used to mutate an AST tree.
///
/// `previst_` methods are invoked before visiting all children of the
/// node and `postvisit_` methods are invoked after visiting all
/// `pre_visit_` methods are invoked before visiting all children of the
/// node and `post_visit_` methods are invoked after visiting all
/// children of the node.
///
/// # See also
Expand Down Expand Up @@ -267,6 +277,22 @@ pub trait VisitorMut {
ControlFlow::Continue(())
}

/// Invoked for any table factors that appear in the AST before visiting children
fn pre_visit_table_factor(
&mut self,
_table_factor: &mut TableFactor,
) -> ControlFlow<Self::Break> {
ControlFlow::Continue(())
}

/// Invoked for any table factors that appear in the AST after visiting children
fn post_visit_table_factor(
&mut self,
_table_factor: &mut TableFactor,
) -> ControlFlow<Self::Break> {
ControlFlow::Continue(())
}

/// Invoked for any expressions that appear in the AST before visiting children
fn pre_visit_expr(&mut self, _expr: &mut Expr) -> ControlFlow<Self::Break> {
ControlFlow::Continue(())
Expand Down Expand Up @@ -609,6 +635,24 @@ mod tests {
ControlFlow::Continue(())
}

fn pre_visit_table_factor(
&mut self,
table_factor: &TableFactor,
) -> ControlFlow<Self::Break> {
self.visited
.push(format!("PRE: TABLE FACTOR: {table_factor}"));
ControlFlow::Continue(())
}

fn post_visit_table_factor(
&mut self,
table_factor: &TableFactor,
) -> ControlFlow<Self::Break> {
self.visited
.push(format!("POST: TABLE FACTOR: {table_factor}"));
ControlFlow::Continue(())
}

fn pre_visit_expr(&mut self, expr: &Expr) -> ControlFlow<Self::Break> {
self.visited.push(format!("PRE: EXPR: {expr}"));
ControlFlow::Continue(())
Expand Down Expand Up @@ -647,22 +691,28 @@ mod tests {
fn test_sql() {
let tests = vec![
(
"SELECT * from table_name",
"SELECT * from table_name as my_table",
vec![
"PRE: STATEMENT: SELECT * FROM table_name",
"PRE: STATEMENT: SELECT * FROM table_name AS my_table",
"PRE: TABLE FACTOR: table_name AS my_table",
"PRE: RELATION: table_name",
"POST: RELATION: table_name",
"POST: STATEMENT: SELECT * FROM table_name",
"POST: TABLE FACTOR: table_name AS my_table",
"POST: STATEMENT: SELECT * FROM table_name AS my_table",
],
),
(
"SELECT * from t1 join t2 on t1.id = t2.t1_id",
vec![
"PRE: STATEMENT: SELECT * FROM t1 JOIN t2 ON t1.id = t2.t1_id",
"PRE: TABLE FACTOR: t1",
"PRE: RELATION: t1",
"POST: RELATION: t1",
"POST: TABLE FACTOR: t1",
"PRE: TABLE FACTOR: t2",
"PRE: RELATION: t2",
"POST: RELATION: t2",
"POST: TABLE FACTOR: t2",
"PRE: EXPR: t1.id = t2.t1_id",
"PRE: EXPR: t1.id",
"POST: EXPR: t1.id",
Expand All @@ -676,13 +726,17 @@ mod tests {
"SELECT * from t1 where EXISTS(SELECT column from t2)",
vec![
"PRE: STATEMENT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)",
"PRE: TABLE FACTOR: t1",
"PRE: RELATION: t1",
"POST: RELATION: t1",
"POST: TABLE FACTOR: t1",
"PRE: EXPR: EXISTS (SELECT column FROM t2)",
"PRE: EXPR: column",
"POST: EXPR: column",
"PRE: TABLE FACTOR: t2",
"PRE: RELATION: t2",
"POST: RELATION: t2",
"POST: TABLE FACTOR: t2",
"POST: EXPR: EXISTS (SELECT column FROM t2)",
"POST: STATEMENT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)",
],
Expand All @@ -691,13 +745,17 @@ mod tests {
"SELECT * from t1 where EXISTS(SELECT column from t2)",
vec![
"PRE: STATEMENT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)",
"PRE: TABLE FACTOR: t1",
"PRE: RELATION: t1",
"POST: RELATION: t1",
"POST: TABLE FACTOR: t1",
"PRE: EXPR: EXISTS (SELECT column FROM t2)",
"PRE: EXPR: column",
"POST: EXPR: column",
"PRE: TABLE FACTOR: t2",
"PRE: RELATION: t2",
"POST: RELATION: t2",
"POST: TABLE FACTOR: t2",
"POST: EXPR: EXISTS (SELECT column FROM t2)",
"POST: STATEMENT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)",
],
Expand All @@ -706,16 +764,22 @@ mod tests {
"SELECT * from t1 where EXISTS(SELECT column from t2) UNION SELECT * from t3",
vec![
"PRE: STATEMENT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2) UNION SELECT * FROM t3",
"PRE: TABLE FACTOR: t1",
"PRE: RELATION: t1",
"POST: RELATION: t1",
"POST: TABLE FACTOR: t1",
"PRE: EXPR: EXISTS (SELECT column FROM t2)",
"PRE: EXPR: column",
"POST: EXPR: column",
"PRE: TABLE FACTOR: t2",
"PRE: RELATION: t2",
"POST: RELATION: t2",
"POST: TABLE FACTOR: t2",
"POST: EXPR: EXISTS (SELECT column FROM t2)",
"PRE: TABLE FACTOR: t3",
"PRE: RELATION: t3",
"POST: RELATION: t3",
"POST: TABLE FACTOR: t3",
"POST: STATEMENT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2) UNION SELECT * FROM t3",
],
),
Expand Down
18 changes: 18 additions & 0 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6198,6 +6198,9 @@ impl<'a> Parser<'a> {
} else {
let name = self.parse_object_name()?;

// Parse potential version qualifier
let version = self.parse_table_version()?;

// Postgres, MSSQL: table-valued functions:
let args = if self.consume_token(&Token::LParen) {
Some(self.parse_optional_args()?)
Expand Down Expand Up @@ -6228,10 +6231,25 @@ impl<'a> Parser<'a> {
alias,
args,
with_hints,
version,
})
}
}

/// Parse a given table version specifier.
///
/// For now it only supports timestamp versioning for BigQuery and MSSQL dialects.
pub fn parse_table_version(&mut self) -> Result<Option<TableVersion>, ParserError> {
if dialect_of!(self is BigQueryDialect | MsSqlDialect)
&& self.parse_keywords(&[Keyword::FOR, Keyword::SYSTEM_TIME, Keyword::AS, Keyword::OF])
{
let expr = self.parse_expr()?;
Ok(Some(TableVersion::ForSystemTimeAsOf(expr)))
} else {
Ok(None)
}
}

pub fn parse_derived_table_factor(
&mut self,
lateral: IsLateral,
Expand Down
1 change: 1 addition & 0 deletions src/test_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ pub fn table(name: impl Into<String>) -> TableFactor {
alias: None,
args: None,
with_hints: vec![],
version: None,
}
}

Expand Down
26 changes: 26 additions & 0 deletions tests/sqlparser_bigquery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ fn parse_table_identifiers() {
alias: None,
args: None,
with_hints: vec![],
version: None,
},
joins: vec![]
},]
Expand Down Expand Up @@ -143,6 +144,31 @@ fn parse_table_identifiers() {
test_table_ident("abc5.GROUP", vec![Ident::new("abc5"), Ident::new("GROUP")]);
}

#[test]
fn parse_table_time_travel() {
let version = "2023-08-18 23:08:18".to_string();
let sql = format!("SELECT 1 FROM t1 FOR SYSTEM_TIME AS OF '{version}'");
let select = bigquery().verified_only_select(&sql);
assert_eq!(
select.from,
vec![TableWithJoins {
relation: TableFactor::Table {
name: ObjectName(vec![Ident::new("t1")]),
alias: None,
args: None,
with_hints: vec![],
version: Some(TableVersion::ForSystemTimeAsOf(Expr::Value(
Value::SingleQuotedString(version)
))),
},
joins: vec![]
},]
);

let sql = "SELECT 1 FROM t1 FOR SYSTEM TIME AS OF 'some_timestamp'".to_string();
assert!(bigquery().parse_sql_statements(&sql).is_err());
}

#[test]
fn parse_join_constraint_unnest_alias() {
assert_eq!(
Expand Down
3 changes: 3 additions & 0 deletions tests/sqlparser_clickhouse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ fn parse_map_access_expr() {
alias: None,
args: None,
with_hints: vec![],
version: None,
},
joins: vec![]
}],
Expand Down Expand Up @@ -169,11 +170,13 @@ fn parse_delimited_identifiers() {
alias,
args,
with_hints,
version,
} => {
assert_eq!(vec![Ident::with_quote('"', "a table")], name.0);
assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name);
assert!(args.is_none());
assert!(with_hints.is_empty());
assert!(version.is_none());
}
_ => panic!("Expecting TableFactor::Table"),
}
Expand Down
Loading