1
1
import logging
2
2
import random
3
3
import string
4
+ from decimal import Decimal
4
5
5
6
import boto3
6
7
import pandas as pd
@@ -114,8 +115,7 @@ def test_postgresql_param():
114
115
assert df ["col0" ].iloc [0 ] == 1
115
116
116
117
117
- def test_redshift_copy_unload (bucket , databases_parameters ):
118
- path = f"s3://{ bucket } /test_redshift_copy/"
118
+ def test_redshift_copy_unload (path , databases_parameters ):
119
119
df = get_df ().drop (["iint8" , "binary" ], axis = 1 , inplace = False )
120
120
engine = wr .catalog .get_engine (connection = "aws-data-wrangler-redshift" )
121
121
wr .db .copy_to_redshift (
@@ -258,10 +258,9 @@ def test_redshift_copy_upsert(bucket, databases_parameters):
258
258
(None , None , wr .exceptions .InvalidRedshiftSortstyle , "foo" , ["id" ]),
259
259
],
260
260
)
261
- def test_redshift_exceptions (bucket , databases_parameters , diststyle , distkey , sortstyle , sortkey , exc ):
261
+ def test_redshift_exceptions (path , databases_parameters , diststyle , distkey , sortstyle , sortkey , exc ):
262
262
df = pd .DataFrame ({"id" : [1 ], "name" : "joe" })
263
263
engine = wr .catalog .get_engine (connection = "aws-data-wrangler-redshift" )
264
- path = f"s3://{ bucket } /test_redshift_exceptions_{ random .randint (0 , 1_000_000 )} /"
265
264
with pytest .raises (exc ):
266
265
wr .db .copy_to_redshift (
267
266
df = df ,
@@ -280,9 +279,8 @@ def test_redshift_exceptions(bucket, databases_parameters, diststyle, distkey, s
280
279
wr .s3 .delete_objects (path = path )
281
280
282
281
283
- def test_redshift_spectrum (bucket , glue_database , redshift_external_schema ):
282
+ def test_redshift_spectrum (path , glue_database , redshift_external_schema ):
284
283
df = pd .DataFrame ({"id" : [1 , 2 , 3 , 4 , 5 ], "col_str" : ["foo" , None , "bar" , None , "xoo" ], "par_int" : [0 , 1 , 0 , 1 , 1 ]})
285
- path = f"s3://{ bucket } /test_redshift_spectrum/"
286
284
paths = wr .s3 .to_parquet (
287
285
df = df ,
288
286
path = path ,
@@ -305,8 +303,7 @@ def test_redshift_spectrum(bucket, glue_database, redshift_external_schema):
305
303
assert wr .catalog .delete_table_if_exists (database = glue_database , table = "test_redshift_spectrum" ) is True
306
304
307
305
308
- def test_redshift_category (bucket , databases_parameters ):
309
- path = f"s3://{ bucket } /test_redshift_category/"
306
+ def test_redshift_category (path , databases_parameters ):
310
307
df = get_df_category ().drop (["binary" ], axis = 1 , inplace = False )
311
308
engine = wr .catalog .get_engine (connection = "aws-data-wrangler-redshift" )
312
309
wr .db .copy_to_redshift (
@@ -341,10 +338,9 @@ def test_redshift_category(bucket, databases_parameters):
341
338
wr .s3 .delete_objects (path = path )
342
339
343
340
344
- def test_redshift_unload_extras (bucket , databases_parameters , kms_key_id ):
341
+ def test_redshift_unload_extras (bucket , path , databases_parameters , kms_key_id ):
345
342
table = "test_redshift_unload_extras"
346
343
schema = databases_parameters ["redshift" ]["schema" ]
347
- path = f"s3://{ bucket } /{ table } /"
348
344
wr .s3 .delete_objects (path = path )
349
345
engine = wr .catalog .get_engine (connection = "aws-data-wrangler-redshift" )
350
346
df = pd .DataFrame ({"id" : [1 , 2 ], "name" : ["foo" , "boo" ]})
@@ -529,3 +525,52 @@ def test_redshift_copy_unload_long_string(path, databases_parameters):
529
525
)
530
526
assert len (df2 .index ) == 2
531
527
assert len (df2 .columns ) == 2
528
+
529
+
530
+ def test_spectrum_decimal_cast (path , path2 , glue_table , glue_database , redshift_external_schema , databases_parameters ):
531
+ df = pd .DataFrame (
532
+ {"c0" : [1 , 2 ], "c1" : [1 , None ], "c2" : [2.22222 , None ], "c3" : ["3.33333" , None ], "c4" : [None , None ]}
533
+ )
534
+ paths = wr .s3 .to_parquet (
535
+ df = df ,
536
+ path = path ,
537
+ database = glue_database ,
538
+ table = glue_table ,
539
+ dataset = True ,
540
+ dtype = {"c1" : "decimal(11,5)" , "c2" : "decimal(11,5)" , "c3" : "decimal(11,5)" , "c4" : "decimal(11,5)" },
541
+ )["paths" ]
542
+ wr .s3 .wait_objects_exist (paths = paths , use_threads = False )
543
+
544
+ # Athena
545
+ df2 = wr .athena .read_sql_table (table = glue_table , database = glue_database )
546
+ assert df2 .shape == (2 , 5 )
547
+ df2 = df2 .drop (df2 [df2 .c0 == 2 ].index )
548
+ assert df2 .c1 [0 ] == Decimal ((0 , (1 , 0 , 0 , 0 , 0 , 0 ), - 5 ))
549
+ assert df2 .c2 [0 ] == Decimal ((0 , (2 , 2 , 2 , 2 , 2 , 2 ), - 5 ))
550
+ assert df2 .c3 [0 ] == Decimal ((0 , (3 , 3 , 3 , 3 , 3 , 3 ), - 5 ))
551
+ assert df2 .c4 [0 ] is None
552
+
553
+ # Redshift Spectrum
554
+ engine = wr .catalog .get_engine (connection = "aws-data-wrangler-redshift" )
555
+ df2 = wr .db .read_sql_table (table = glue_table , schema = redshift_external_schema , con = engine )
556
+ assert df2 .shape == (2 , 5 )
557
+ df2 = df2 .drop (df2 [df2 .c0 == 2 ].index )
558
+ assert df2 .c1 [0 ] == Decimal ((0 , (1 , 0 , 0 , 0 , 0 , 0 ), - 5 ))
559
+ assert df2 .c2 [0 ] == Decimal ((0 , (2 , 2 , 2 , 2 , 2 , 2 ), - 5 ))
560
+ assert df2 .c3 [0 ] == Decimal ((0 , (3 , 3 , 3 , 3 , 3 , 3 ), - 5 ))
561
+ assert df2 .c4 [0 ] is None
562
+
563
+ # Redshift Spectrum Unload
564
+ engine = wr .catalog .get_engine (connection = "aws-data-wrangler-redshift" )
565
+ df2 = wr .db .unload_redshift (
566
+ sql = f"SELECT * FROM { redshift_external_schema } .{ glue_table } " ,
567
+ con = engine ,
568
+ iam_role = databases_parameters ["redshift" ]["role" ],
569
+ path = path2 ,
570
+ )
571
+ assert df2 .shape == (2 , 5 )
572
+ df2 = df2 .drop (df2 [df2 .c0 == 2 ].index )
573
+ assert df2 .c1 [0 ] == Decimal ((0 , (1 , 0 , 0 , 0 , 0 , 0 ), - 5 ))
574
+ assert df2 .c2 [0 ] == Decimal ((0 , (2 , 2 , 2 , 2 , 2 , 2 ), - 5 ))
575
+ assert df2 .c3 [0 ] == Decimal ((0 , (3 , 3 , 3 , 3 , 3 , 3 ), - 5 ))
576
+ assert df2 .c4 [0 ] is None
0 commit comments