@@ -283,10 +283,6 @@ def test_nunique(self):
283
283
self .assert_eq (psdf .nunique (), pdf .nunique ())
284
284
self .assert_eq (psdf .nunique (dropna = False ), pdf .nunique (dropna = False ))
285
285
286
- @unittest .skipIf (
287
- LooseVersion (pd .__version__ ) >= LooseVersion ("2.0.0" ),
288
- "TODO(SPARK-43810): Enable DataFrameSlowTests.test_quantile for pandas 2.0.0." ,
289
- )
290
286
def test_quantile (self ):
291
287
pdf , psdf = self .df_pair
292
288
@@ -332,59 +328,57 @@ def test_quantile(self):
332
328
pdf = pd .DataFrame ({"x" : ["a" , "b" , "c" ]})
333
329
psdf = ps .from_pandas (pdf )
334
330
335
- self .assert_eq (psdf .quantile (0.5 ), pdf .quantile (0.5 ))
336
- self .assert_eq (psdf .quantile ([0.25 , 0.5 , 0.75 ]), pdf .quantile ([0.25 , 0.5 , 0.75 ]))
331
+ self .assert_eq (psdf .quantile (0.5 ), pdf .quantile (0.5 , numeric_only = True ))
332
+ self .assert_eq (
333
+ psdf .quantile ([0.25 , 0.5 , 0.75 ]), pdf .quantile ([0.25 , 0.5 , 0.75 ], numeric_only = True )
334
+ )
337
335
338
336
with self .assertRaisesRegex (TypeError , "Could not convert object \\ (string\\ ) to numeric" ):
339
337
psdf .quantile (0.5 , numeric_only = False )
340
338
with self .assertRaisesRegex (TypeError , "Could not convert object \\ (string\\ ) to numeric" ):
341
339
psdf .quantile ([0.25 , 0.5 , 0.75 ], numeric_only = False )
342
340
343
- @unittest .skipIf (
344
- LooseVersion (pd .__version__ ) >= LooseVersion ("2.0.0" ),
345
- "TODO(SPARK-43558): Enable DataFrameSlowTests.test_product for pandas 2.0.0." ,
346
- )
347
341
def test_product (self ):
348
342
pdf = pd .DataFrame (
349
343
{"A" : [1 , 2 , 3 , 4 , 5 ], "B" : [10 , 20 , 30 , 40 , 50 ], "C" : ["a" , "b" , "c" , "d" , "e" ]}
350
344
)
351
345
psdf = ps .from_pandas (pdf )
352
- self .assert_eq (pdf .prod (), psdf .prod ().sort_index ())
346
+ self .assert_eq (pdf .prod (numeric_only = True ), psdf .prod ().sort_index ())
353
347
354
348
# Named columns
355
349
pdf .columns .name = "Koalas"
356
350
psdf = ps .from_pandas (pdf )
357
- self .assert_eq (pdf .prod (), psdf .prod ().sort_index ())
351
+ self .assert_eq (pdf .prod (numeric_only = True ), psdf .prod ().sort_index ())
358
352
359
353
# MultiIndex columns
360
354
pdf .columns = pd .MultiIndex .from_tuples ([("a" , "x" ), ("b" , "y" ), ("c" , "z" )])
361
355
psdf = ps .from_pandas (pdf )
362
- self .assert_eq (pdf .prod (), psdf .prod ().sort_index ())
356
+ self .assert_eq (pdf .prod (numeric_only = True ), psdf .prod ().sort_index ())
363
357
364
358
# Named MultiIndex columns
365
359
pdf .columns .names = ["Hello" , "Koalas" ]
366
360
psdf = ps .from_pandas (pdf )
367
- self .assert_eq (pdf .prod (), psdf .prod ().sort_index ())
361
+ self .assert_eq (pdf .prod (numeric_only = True ), psdf .prod ().sort_index ())
368
362
369
363
# No numeric columns
370
364
pdf = pd .DataFrame ({"key" : ["a" , "b" , "c" ], "val" : ["x" , "y" , "z" ]})
371
365
psdf = ps .from_pandas (pdf )
372
- self .assert_eq (pdf .prod (), psdf .prod ().sort_index ())
366
+ self .assert_eq (pdf .prod (numeric_only = True ), psdf .prod ().sort_index ())
373
367
374
368
# No numeric named columns
375
369
pdf .columns .name = "Koalas"
376
370
psdf = ps .from_pandas (pdf )
377
- self .assert_eq (pdf .prod (), psdf .prod ().sort_index (), almost = True )
371
+ self .assert_eq (pdf .prod (numeric_only = True ), psdf .prod ().sort_index (), almost = True )
378
372
379
373
# No numeric MultiIndex columns
380
374
pdf .columns = pd .MultiIndex .from_tuples ([("a" , "x" ), ("b" , "y" )])
381
375
psdf = ps .from_pandas (pdf )
382
- self .assert_eq (pdf .prod (), psdf .prod ().sort_index (), almost = True )
376
+ self .assert_eq (pdf .prod (numeric_only = True ), psdf .prod ().sort_index (), almost = True )
383
377
384
378
# No numeric named MultiIndex columns
385
379
pdf .columns .names = ["Hello" , "Koalas" ]
386
380
psdf = ps .from_pandas (pdf )
387
- self .assert_eq (pdf .prod (), psdf .prod ().sort_index (), almost = True )
381
+ self .assert_eq (pdf .prod (numeric_only = True ), psdf .prod ().sort_index (), almost = True )
388
382
389
383
# All NaN columns
390
384
pdf = pd .DataFrame (
@@ -395,22 +389,22 @@ def test_product(self):
395
389
}
396
390
)
397
391
psdf = ps .from_pandas (pdf )
398
- self .assert_eq (pdf .prod (), psdf .prod ().sort_index (), check_exact = False )
392
+ self .assert_eq (pdf .prod (numeric_only = True ), psdf .prod ().sort_index (), check_exact = False )
399
393
400
394
# All NaN named columns
401
395
pdf .columns .name = "Koalas"
402
396
psdf = ps .from_pandas (pdf )
403
- self .assert_eq (pdf .prod (), psdf .prod ().sort_index (), check_exact = False )
397
+ self .assert_eq (pdf .prod (numeric_only = True ), psdf .prod ().sort_index (), check_exact = False )
404
398
405
399
# All NaN MultiIndex columns
406
400
pdf .columns = pd .MultiIndex .from_tuples ([("a" , "x" ), ("b" , "y" ), ("c" , "z" )])
407
401
psdf = ps .from_pandas (pdf )
408
- self .assert_eq (pdf .prod (), psdf .prod ().sort_index (), check_exact = False )
402
+ self .assert_eq (pdf .prod (numeric_only = True ), psdf .prod ().sort_index (), check_exact = False )
409
403
410
404
# All NaN named MultiIndex columns
411
405
pdf .columns .names = ["Hello" , "Koalas" ]
412
406
psdf = ps .from_pandas (pdf )
413
- self .assert_eq (pdf .prod (), psdf .prod ().sort_index (), check_exact = False )
407
+ self .assert_eq (pdf .prod (numeric_only = True ), psdf .prod ().sort_index (), check_exact = False )
414
408
415
409
416
410
class FrameComputeTests (FrameComputeMixin , ComparisonTestBase , SQLTestUtils ):
0 commit comments