Skip to content

Commit 1c03cb3

Browse files
authored
feat(glue): add ExternalTable for use with connections (#24753)
Changing the table structure to include an initial `TableBase` abstract class, allowing different tables of different data sources to be created from. Initially there are two, `S3Table` and `ExternalTable`. - `S3Table`: The current table structure that has been used throughout the previous versions of the CDK - `ExternalTable`: The new glue table that will be used to store metadata about external data sources. This subclass will contain an `externalDataLocation` property to explicitly specify the `Location` property of the underlying `CfnTable` L1 construct - `Table`: This is now `@deprecated` to shift the usage towards `S3Table` Closes #24741. ---- *By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license*
1 parent 23fba8a commit 1c03cb3

28 files changed

+4396
-330
lines changed

Diff for: packages/@aws-cdk/aws-glue-alpha/README.md

+36-14
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ A Glue table describes a table of data in S3: its structure (column names and ty
211211

212212
```ts
213213
declare const myDatabase: glue.Database;
214-
new glue.Table(this, 'MyTable', {
214+
new glue.S3Table(this, 'MyTable', {
215215
database: myDatabase,
216216
columns: [{
217217
name: 'col1',
@@ -230,7 +230,7 @@ By default, a S3 bucket will be created to store the table's data but you can ma
230230
```ts
231231
declare const myBucket: s3.Bucket;
232232
declare const myDatabase: glue.Database;
233-
new glue.Table(this, 'MyTable', {
233+
new glue.S3Table(this, 'MyTable', {
234234
bucket: myBucket,
235235
s3Prefix: 'my-table/',
236236
// ...
@@ -247,7 +247,7 @@ Glue tables can be configured to contain user-defined properties, to describe th
247247

248248
```ts
249249
declare const myDatabase: glue.Database;
250-
new glue.Table(this, 'MyTable', {
250+
new glue.S3Table(this, 'MyTable', {
251251
storageParameters: [
252252
glue.StorageParameter.skipHeaderLineCount(1),
253253
glue.StorageParameter.compressionType(glue.CompressionType.GZIP),
@@ -269,7 +269,7 @@ To improve query performance, a table can specify `partitionKeys` on which data
269269

270270
```ts
271271
declare const myDatabase: glue.Database;
272-
new glue.Table(this, 'MyTable', {
272+
new glue.S3Table(this, 'MyTable', {
273273
database: myDatabase,
274274
columns: [{
275275
name: 'col1',
@@ -300,7 +300,7 @@ property:
300300

301301
```ts
302302
declare const myDatabase: glue.Database;
303-
new glue.Table(this, 'MyTable', {
303+
new glue.S3Table(this, 'MyTable', {
304304
database: myDatabase,
305305
columns: [{
306306
name: 'col1',
@@ -337,7 +337,7 @@ If you have a table with a large number of partitions that grows over time, cons
337337

338338
```ts
339339
declare const myDatabase: glue.Database;
340-
new glue.Table(this, 'MyTable', {
340+
new glue.S3Table(this, 'MyTable', {
341341
database: myDatabase,
342342
columns: [{
343343
name: 'col1',
@@ -355,6 +355,28 @@ new glue.Table(this, 'MyTable', {
355355
});
356356
```
357357

358+
### Glue Connections
359+
360+
Glue connections allow external data connections to third party databases and data warehouses. However, these connections can also be assigned to Glue Tables, allowing you to query external data sources using the Glue Data Catalog.
361+
362+
Whereas `S3Table` will point to (and if needed, create) a bucket to store the tables' data, `ExternalTable` will point to an existing table in a data source. For example, to create a table in Glue that points to a table in Redshift:
363+
364+
```ts
365+
declare const myConnection: glue.Connection;
366+
declare const myDatabase: glue.Database;
367+
new glue.ExternalTable(this, 'MyTable', {
368+
connection: myConnection,
369+
externalDataLocation: 'default_db_public_example', // A table in Redshift
370+
// ...
371+
database: myDatabase,
372+
columns: [{
373+
name: 'col1',
374+
type: glue.Schema.STRING,
375+
}],
376+
dataFormat: glue.DataFormat.JSON,
377+
});
378+
```
379+
358380
## [Encryption](https://docs.aws.amazon.com/athena/latest/ug/encryption.html)
359381

360382
You can enable encryption on a Table's data:
@@ -363,7 +385,7 @@ You can enable encryption on a Table's data:
363385

364386
```ts
365387
declare const myDatabase: glue.Database;
366-
new glue.Table(this, 'MyTable', {
388+
new glue.S3Table(this, 'MyTable', {
367389
encryption: glue.TableEncryption.S3_MANAGED,
368390
// ...
369391
database: myDatabase,
@@ -380,7 +402,7 @@ new glue.Table(this, 'MyTable', {
380402
```ts
381403
declare const myDatabase: glue.Database;
382404
// KMS key is created automatically
383-
new glue.Table(this, 'MyTable', {
405+
new glue.S3Table(this, 'MyTable', {
384406
encryption: glue.TableEncryption.KMS,
385407
// ...
386408
database: myDatabase,
@@ -392,7 +414,7 @@ new glue.Table(this, 'MyTable', {
392414
});
393415

394416
// with an explicit KMS key
395-
new glue.Table(this, 'MyTable', {
417+
new glue.S3Table(this, 'MyTable', {
396418
encryption: glue.TableEncryption.KMS,
397419
encryptionKey: new kms.Key(this, 'MyKey'),
398420
// ...
@@ -409,7 +431,7 @@ new glue.Table(this, 'MyTable', {
409431

410432
```ts
411433
declare const myDatabase: glue.Database;
412-
new glue.Table(this, 'MyTable', {
434+
new glue.S3Table(this, 'MyTable', {
413435
encryption: glue.TableEncryption.KMS_MANAGED,
414436
// ...
415437
database: myDatabase,
@@ -426,7 +448,7 @@ new glue.Table(this, 'MyTable', {
426448
```ts
427449
declare const myDatabase: glue.Database;
428450
// KMS key is created automatically
429-
new glue.Table(this, 'MyTable', {
451+
new glue.S3Table(this, 'MyTable', {
430452
encryption: glue.TableEncryption.CLIENT_SIDE_KMS,
431453
// ...
432454
database: myDatabase,
@@ -438,7 +460,7 @@ new glue.Table(this, 'MyTable', {
438460
});
439461

440462
// with an explicit KMS key
441-
new glue.Table(this, 'MyTable', {
463+
new glue.S3Table(this, 'MyTable', {
442464
encryption: glue.TableEncryption.CLIENT_SIDE_KMS,
443465
encryptionKey: new kms.Key(this, 'MyKey'),
444466
// ...
@@ -451,15 +473,15 @@ new glue.Table(this, 'MyTable', {
451473
});
452474
```
453475

454-
*Note: you cannot provide a `Bucket` when creating the `Table` if you wish to use server-side encryption (`KMS`, `KMS_MANAGED` or `S3_MANAGED`)*.
476+
*Note: you cannot provide a `Bucket` when creating the `S3Table` if you wish to use server-side encryption (`KMS`, `KMS_MANAGED` or `S3_MANAGED`)*.
455477

456478
## Types
457479

458480
A table's schema is a collection of columns, each of which have a `name` and a `type`. Types are recursive structures, consisting of primitive and complex types:
459481

460482
```ts
461483
declare const myDatabase: glue.Database;
462-
new glue.Table(this, 'MyTable', {
484+
new glue.S3Table(this, 'MyTable', {
463485
columns: [{
464486
name: 'primitive_column',
465487
type: glue.Schema.STRING,
+171
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
import { CfnTable } from 'aws-cdk-lib/aws-glue';
2+
import * as iam from 'aws-cdk-lib/aws-iam';
3+
import { Construct } from 'constructs';
4+
import { IConnection } from './connection';
5+
import { Column } from './schema';
6+
import { PartitionIndex, TableBase, TableBaseProps } from './table-base';
7+
8+
export interface ExternalTableProps extends TableBaseProps {
9+
/**
10+
* The connection the table will use when performing reads and writes.
11+
*
12+
* @default - No connection
13+
*/
14+
readonly connection: IConnection;
15+
16+
/**
17+
* The data source location of the glue table, (e.g. `default_db_public_example` for Redshift).
18+
*
19+
* If this property is set, it will override both `bucket` and `s3Prefix`.
20+
*
21+
* @default - No outsourced data source location
22+
*/
23+
readonly externalDataLocation: string;
24+
}
25+
26+
/**
27+
* A Glue table that targets an external data location (e.g. A table in a Redshift Cluster).
28+
*/
29+
export class ExternalTable extends TableBase {
30+
/**
31+
* Name of this table.
32+
*/
33+
public readonly tableName: string;
34+
35+
/**
36+
* ARN of this table.
37+
*/
38+
public readonly tableArn: string;
39+
40+
/**
41+
* The connection associated to this table
42+
*/
43+
public readonly connection: IConnection;
44+
45+
/**
46+
* This table's partition indexes.
47+
*/
48+
public readonly partitionIndexes?: PartitionIndex[];
49+
50+
protected readonly tableResource: CfnTable;
51+
52+
constructor(scope: Construct, id: string, props: ExternalTableProps) {
53+
super(scope, id, props);
54+
this.connection = props.connection;
55+
this.tableResource = new CfnTable(this, 'Table', {
56+
catalogId: props.database.catalogId,
57+
58+
databaseName: props.database.databaseName,
59+
60+
tableInput: {
61+
name: this.physicalName,
62+
description: props.description || `${this.physicalName} generated by CDK`,
63+
64+
partitionKeys: renderColumns(props.partitionKeys),
65+
66+
parameters: {
67+
'classification': props.dataFormat.classificationString?.value,
68+
'has_encrypted_data': true,
69+
'partition_filtering.enabled': props.enablePartitionFiltering,
70+
'connectionName': props.connection.connectionName,
71+
},
72+
storageDescriptor: {
73+
location: props.externalDataLocation,
74+
compressed: this.compressed,
75+
storedAsSubDirectories: props.storedAsSubDirectories ?? false,
76+
columns: renderColumns(props.columns),
77+
inputFormat: props.dataFormat.inputFormat.className,
78+
outputFormat: props.dataFormat.outputFormat.className,
79+
serdeInfo: {
80+
serializationLibrary: props.dataFormat.serializationLibrary.className,
81+
},
82+
parameters: props.storageParameters ? props.storageParameters.reduce((acc, param) => {
83+
if (param.key in acc) {
84+
throw new Error(`Duplicate storage parameter key: ${param.key}`);
85+
}
86+
const key = param.key;
87+
acc[key] = param.value;
88+
return acc;
89+
}, {} as { [key: string]: string }) : undefined,
90+
},
91+
92+
tableType: 'EXTERNAL_TABLE',
93+
},
94+
});
95+
96+
this.tableName = this.getResourceNameAttribute(this.tableResource.ref);
97+
this.tableArn = this.stack.formatArn({
98+
service: 'glue',
99+
resource: 'table',
100+
resourceName: `${this.database.databaseName}/${this.tableName}`,
101+
});
102+
this.node.defaultChild = this.tableResource;
103+
104+
// Partition index creation relies on created table.
105+
if (props.partitionIndexes) {
106+
this.partitionIndexes = props.partitionIndexes;
107+
this.partitionIndexes.forEach((index) => this.addPartitionIndex(index));
108+
}
109+
}
110+
111+
/**
112+
* Grant read permissions to the table
113+
*
114+
* @param grantee the principal
115+
*/
116+
public grantRead(grantee: iam.IGrantable): iam.Grant {
117+
const ret = this.grant(grantee, readPermissions);
118+
return ret;
119+
}
120+
121+
/**
122+
* Grant write permissions to the table
123+
*
124+
* @param grantee the principal
125+
*/
126+
public grantWrite(grantee: iam.IGrantable): iam.Grant {
127+
const ret = this.grant(grantee, writePermissions);
128+
return ret;
129+
}
130+
131+
/**
132+
* Grant read and write permissions to the table
133+
*
134+
* @param grantee the principal
135+
*/
136+
public grantReadWrite(grantee: iam.IGrantable): iam.Grant {
137+
const ret = this.grant(grantee, [...readPermissions, ...writePermissions]);
138+
return ret;
139+
}
140+
}
141+
142+
const readPermissions = [
143+
'glue:BatchGetPartition',
144+
'glue:GetPartition',
145+
'glue:GetPartitions',
146+
'glue:GetTable',
147+
'glue:GetTables',
148+
'glue:GetTableVersion',
149+
'glue:GetTableVersions',
150+
];
151+
152+
const writePermissions = [
153+
'glue:BatchCreatePartition',
154+
'glue:BatchDeletePartition',
155+
'glue:CreatePartition',
156+
'glue:DeletePartition',
157+
'glue:UpdatePartition',
158+
];
159+
160+
function renderColumns(columns?: Array<Column | Column>) {
161+
if (columns === undefined) {
162+
return undefined;
163+
}
164+
return columns.map(column => {
165+
return {
166+
name: column.name,
167+
type: column.type.inputString,
168+
comment: column.comment,
169+
};
170+
});
171+
}

Diff for: packages/@aws-cdk/aws-glue-alpha/lib/index.ts

+4-1
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,12 @@ export * from './connection';
55
export * from './data-format';
66
export * from './data-quality-ruleset';
77
export * from './database';
8+
export * from './external-table';
89
export * from './job';
910
export * from './job-executable';
11+
export * from './s3-table';
1012
export * from './schema';
1113
export * from './security-configuration';
1214
export * from './storage-parameter';
13-
export * from './table';
15+
export * from './table-base';
16+
export * from './table-deprecated';

0 commit comments

Comments
 (0)