Skip to content

Commit 316bb14

Browse files
authored
Add support for TABLESAMPLE (#1580)
1 parent 7bc6ddb commit 316bb14

20 files changed

+546
-458
lines changed

src/ast/mod.rs

+5-2
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,11 @@ pub use self::query::{
6969
OrderBy, OrderByExpr, PivotValueSource, ProjectionSelect, Query, RenameSelectItem,
7070
RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select,
7171
SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Setting, SymbolDefinition, Table,
72-
TableAlias, TableAliasColumnDef, TableFactor, TableFunctionArgs, TableVersion, TableWithJoins,
73-
Top, TopQuantity, ValueTableMode, Values, WildcardAdditionalOptions, With, WithFill,
72+
TableAlias, TableAliasColumnDef, TableFactor, TableFunctionArgs, TableSample,
73+
TableSampleBucket, TableSampleKind, TableSampleMethod, TableSampleModifier,
74+
TableSampleQuantity, TableSampleSeed, TableSampleSeedModifier, TableSampleUnit, TableVersion,
75+
TableWithJoins, Top, TopQuantity, ValueTableMode, Values, WildcardAdditionalOptions, With,
76+
WithFill,
7477
};
7578

7679
pub use self::trigger::{

src/ast/query.rs

+188
Original file line numberDiff line numberDiff line change
@@ -1002,6 +1002,9 @@ pub enum TableFactor {
10021002
partitions: Vec<Ident>,
10031003
/// Optional PartiQL JsonPath: <https://partiql.org/dql/from.html>
10041004
json_path: Option<JsonPath>,
1005+
/// Optional table sample modifier
1006+
/// See: <https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#sample-clause>
1007+
sample: Option<TableSampleKind>,
10051008
},
10061009
Derived {
10071010
lateral: bool,
@@ -1146,6 +1149,184 @@ pub enum TableFactor {
11461149
},
11471150
}
11481151

1152+
/// The table sample modifier options
1153+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
1154+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1155+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
1156+
1157+
pub enum TableSampleKind {
1158+
/// Table sample located before the table alias option
1159+
BeforeTableAlias(Box<TableSample>),
1160+
/// Table sample located after the table alias option
1161+
AfterTableAlias(Box<TableSample>),
1162+
}
1163+
1164+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
1165+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1166+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
1167+
pub struct TableSample {
1168+
pub modifier: TableSampleModifier,
1169+
pub name: Option<TableSampleMethod>,
1170+
pub quantity: Option<TableSampleQuantity>,
1171+
pub seed: Option<TableSampleSeed>,
1172+
pub bucket: Option<TableSampleBucket>,
1173+
pub offset: Option<Expr>,
1174+
}
1175+
1176+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
1177+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1178+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
1179+
pub enum TableSampleModifier {
1180+
Sample,
1181+
TableSample,
1182+
}
1183+
1184+
impl fmt::Display for TableSampleModifier {
1185+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1186+
match self {
1187+
TableSampleModifier::Sample => write!(f, "SAMPLE")?,
1188+
TableSampleModifier::TableSample => write!(f, "TABLESAMPLE")?,
1189+
}
1190+
Ok(())
1191+
}
1192+
}
1193+
1194+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
1195+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1196+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
1197+
pub struct TableSampleQuantity {
1198+
pub parenthesized: bool,
1199+
pub value: Expr,
1200+
pub unit: Option<TableSampleUnit>,
1201+
}
1202+
1203+
impl fmt::Display for TableSampleQuantity {
1204+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1205+
if self.parenthesized {
1206+
write!(f, "(")?;
1207+
}
1208+
write!(f, "{}", self.value)?;
1209+
if let Some(unit) = &self.unit {
1210+
write!(f, " {}", unit)?;
1211+
}
1212+
if self.parenthesized {
1213+
write!(f, ")")?;
1214+
}
1215+
Ok(())
1216+
}
1217+
}
1218+
1219+
/// The table sample method names
1220+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
1221+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1222+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
1223+
pub enum TableSampleMethod {
1224+
Row,
1225+
Bernoulli,
1226+
System,
1227+
Block,
1228+
}
1229+
1230+
impl fmt::Display for TableSampleMethod {
1231+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1232+
match self {
1233+
TableSampleMethod::Bernoulli => write!(f, "BERNOULLI"),
1234+
TableSampleMethod::Row => write!(f, "ROW"),
1235+
TableSampleMethod::System => write!(f, "SYSTEM"),
1236+
TableSampleMethod::Block => write!(f, "BLOCK"),
1237+
}
1238+
}
1239+
}
1240+
1241+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
1242+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1243+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
1244+
pub struct TableSampleSeed {
1245+
pub modifier: TableSampleSeedModifier,
1246+
pub value: Value,
1247+
}
1248+
1249+
impl fmt::Display for TableSampleSeed {
1250+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1251+
write!(f, "{} ({})", self.modifier, self.value)?;
1252+
Ok(())
1253+
}
1254+
}
1255+
1256+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
1257+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1258+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
1259+
pub enum TableSampleSeedModifier {
1260+
Repeatable,
1261+
Seed,
1262+
}
1263+
1264+
impl fmt::Display for TableSampleSeedModifier {
1265+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1266+
match self {
1267+
TableSampleSeedModifier::Repeatable => write!(f, "REPEATABLE"),
1268+
TableSampleSeedModifier::Seed => write!(f, "SEED"),
1269+
}
1270+
}
1271+
}
1272+
1273+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
1274+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1275+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
1276+
pub enum TableSampleUnit {
1277+
Rows,
1278+
Percent,
1279+
}
1280+
1281+
impl fmt::Display for TableSampleUnit {
1282+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1283+
match self {
1284+
TableSampleUnit::Percent => write!(f, "PERCENT"),
1285+
TableSampleUnit::Rows => write!(f, "ROWS"),
1286+
}
1287+
}
1288+
}
1289+
1290+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
1291+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1292+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
1293+
pub struct TableSampleBucket {
1294+
pub bucket: Value,
1295+
pub total: Value,
1296+
pub on: Option<Expr>,
1297+
}
1298+
1299+
impl fmt::Display for TableSampleBucket {
1300+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1301+
write!(f, "BUCKET {} OUT OF {}", self.bucket, self.total)?;
1302+
if let Some(on) = &self.on {
1303+
write!(f, " ON {}", on)?;
1304+
}
1305+
Ok(())
1306+
}
1307+
}
1308+
impl fmt::Display for TableSample {
1309+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1310+
write!(f, " {}", self.modifier)?;
1311+
if let Some(name) = &self.name {
1312+
write!(f, " {}", name)?;
1313+
}
1314+
if let Some(quantity) = &self.quantity {
1315+
write!(f, " {}", quantity)?;
1316+
}
1317+
if let Some(seed) = &self.seed {
1318+
write!(f, " {}", seed)?;
1319+
}
1320+
if let Some(bucket) = &self.bucket {
1321+
write!(f, " ({})", bucket)?;
1322+
}
1323+
if let Some(offset) = &self.offset {
1324+
write!(f, " OFFSET {}", offset)?;
1325+
}
1326+
Ok(())
1327+
}
1328+
}
1329+
11491330
/// The source of values in a `PIVOT` operation.
11501331
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
11511332
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
@@ -1404,6 +1585,7 @@ impl fmt::Display for TableFactor {
14041585
partitions,
14051586
with_ordinality,
14061587
json_path,
1588+
sample,
14071589
} => {
14081590
write!(f, "{name}")?;
14091591
if let Some(json_path) = json_path {
@@ -1426,6 +1608,9 @@ impl fmt::Display for TableFactor {
14261608
if *with_ordinality {
14271609
write!(f, " WITH ORDINALITY")?;
14281610
}
1611+
if let Some(TableSampleKind::BeforeTableAlias(sample)) = sample {
1612+
write!(f, "{sample}")?;
1613+
}
14291614
if let Some(alias) = alias {
14301615
write!(f, " AS {alias}")?;
14311616
}
@@ -1435,6 +1620,9 @@ impl fmt::Display for TableFactor {
14351620
if let Some(version) = version {
14361621
write!(f, "{version}")?;
14371622
}
1623+
if let Some(TableSampleKind::AfterTableAlias(sample)) = sample {
1624+
write!(f, "{sample}")?;
1625+
}
14381626
Ok(())
14391627
}
14401628
TableFactor::Derived {

src/ast/spans.rs

+1
Original file line numberDiff line numberDiff line change
@@ -1699,6 +1699,7 @@ impl Spanned for TableFactor {
16991699
with_ordinality: _,
17001700
partitions: _,
17011701
json_path: _,
1702+
sample: _,
17021703
} => union_spans(
17031704
name.0
17041705
.iter()

src/dialect/hive.rs

+5
Original file line numberDiff line numberDiff line change
@@ -61,4 +61,9 @@ impl Dialect for HiveDialect {
6161
fn supports_load_data(&self) -> bool {
6262
true
6363
}
64+
65+
/// See Hive <https://cwiki.apache.org/confluence/display/hive/languagemanual+sampling>
66+
fn supports_table_sample_before_alias(&self) -> bool {
67+
true
68+
}
6469
}

src/dialect/mod.rs

+11
Original file line numberDiff line numberDiff line change
@@ -707,6 +707,17 @@ pub trait Dialect: Debug + Any {
707707
fn is_reserved_for_identifier(&self, kw: Keyword) -> bool {
708708
keywords::RESERVED_FOR_IDENTIFIER.contains(&kw)
709709
}
710+
711+
/// Returns true if this dialect supports the `TABLESAMPLE` option
712+
/// before the table alias option. For example:
713+
///
714+
/// Table sample before alias: `SELECT * FROM tbl AS t TABLESAMPLE (10)`
715+
/// Table sample after alias: `SELECT * FROM tbl TABLESAMPLE (10) AS t`
716+
///
717+
/// <https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#_7_6_table_reference>
718+
fn supports_table_sample_before_alias(&self) -> bool {
719+
false
720+
}
710721
}
711722

712723
/// This represents the operators for which precedence must be defined

src/keywords.rs

+8
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ define_keywords!(
120120
BEGIN,
121121
BEGIN_FRAME,
122122
BEGIN_PARTITION,
123+
BERNOULLI,
123124
BETWEEN,
124125
BIGDECIMAL,
125126
BIGINT,
@@ -128,12 +129,14 @@ define_keywords!(
128129
BINDING,
129130
BIT,
130131
BLOB,
132+
BLOCK,
131133
BLOOMFILTER,
132134
BOOL,
133135
BOOLEAN,
134136
BOTH,
135137
BROWSE,
136138
BTREE,
139+
BUCKET,
137140
BUCKETS,
138141
BY,
139142
BYPASSRLS,
@@ -680,6 +683,7 @@ define_keywords!(
680683
RUN,
681684
SAFE,
682685
SAFE_CAST,
686+
SAMPLE,
683687
SAVEPOINT,
684688
SCHEMA,
685689
SCHEMAS,
@@ -690,6 +694,7 @@ define_keywords!(
690694
SECONDARY,
691695
SECRET,
692696
SECURITY,
697+
SEED,
693698
SELECT,
694699
SEMI,
695700
SENSITIVE,
@@ -932,6 +937,9 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[
932937
Keyword::CONNECT,
933938
// Reserved for snowflake MATCH_RECOGNIZE
934939
Keyword::MATCH_RECOGNIZE,
940+
// Reserved for Snowflake table sample
941+
Keyword::SAMPLE,
942+
Keyword::TABLESAMPLE,
935943
];
936944

937945
/// Can't be used as a column alias, so that `SELECT <expr> alias`

0 commit comments

Comments
 (0)