2
2
from datetime import datetime
3
3
from itertools import chain
4
4
import operator
5
+ import re
5
6
import warnings
6
7
7
8
import numpy as np
14
15
import pandas ._testing as tm
15
16
from pandas .core .apply import frame_apply
16
17
from pandas .core .base import SpecificationError
18
+ from pandas .core .groupby .base import transformation_kernels
17
19
18
20
19
21
@pytest .fixture
@@ -1131,9 +1133,29 @@ def test_agg_transform(self, axis, float_frame):
1131
1133
result = float_frame .transform ([np .abs , "sqrt" ], axis = axis )
1132
1134
tm .assert_frame_equal (result , expected )
1133
1135
1136
+ # UDF via apply
1137
+ def func (x ):
1138
+ if isinstance (x , DataFrame ):
1139
+ raise ValueError
1140
+ return x + 1
1141
+
1142
+ result = float_frame .transform (func , axis = axis )
1143
+ expected = float_frame + 1
1144
+ tm .assert_frame_equal (result , expected )
1145
+
1146
+ # UDF that maps DataFrame -> DataFrame
1147
+ def func (x ):
1148
+ if not isinstance (x , DataFrame ):
1149
+ raise ValueError
1150
+ return x + 1
1151
+
1152
+ result = float_frame .transform (func , axis = axis )
1153
+ expected = float_frame + 1
1154
+ tm .assert_frame_equal (result , expected )
1155
+
1134
1156
def test_transform_and_agg_err (self , axis , float_frame ):
1135
1157
# cannot both transform and agg
1136
- msg = "transforms cannot produce aggregated results "
1158
+ msg = "Function did not transform "
1137
1159
with pytest .raises (ValueError , match = msg ):
1138
1160
float_frame .transform (["max" , "min" ], axis = axis )
1139
1161
@@ -1142,6 +1164,7 @@ def test_transform_and_agg_err(self, axis, float_frame):
1142
1164
with np .errstate (all = "ignore" ):
1143
1165
float_frame .agg (["max" , "sqrt" ], axis = axis )
1144
1166
1167
+ msg = "Function did not transform"
1145
1168
with pytest .raises (ValueError , match = msg ):
1146
1169
with np .errstate (all = "ignore" ):
1147
1170
float_frame .transform (["max" , "sqrt" ], axis = axis )
@@ -1221,6 +1244,9 @@ def test_agg_dict_nested_renaming_depr(self):
1221
1244
with pytest .raises (SpecificationError , match = msg ):
1222
1245
df .agg ({"A" : {"foo" : "min" }, "B" : {"bar" : "max" }})
1223
1246
1247
+ with pytest .raises (SpecificationError , match = msg ):
1248
+ df .transform ({"A" : {"foo" : "min" }, "B" : {"bar" : "max" }})
1249
+
1224
1250
def test_agg_reduce (self , axis , float_frame ):
1225
1251
other_axis = 1 if axis in {0 , "index" } else 0
1226
1252
name1 , name2 = float_frame .axes [other_axis ].unique ()[:2 ].sort_values ()
@@ -1550,3 +1576,88 @@ def test_apply_empty_list_reduce():
1550
1576
result = df .apply (lambda x : [], result_type = "reduce" )
1551
1577
expected = pd .Series ({"a" : [], "b" : []}, dtype = object )
1552
1578
tm .assert_series_equal (result , expected )
1579
+
1580
+
1581
+ def test_transform_reducer_raises (all_reductions ):
1582
+ op = all_reductions
1583
+ s = pd .DataFrame ({"A" : [1 , 2 , 3 ]})
1584
+ msg = "Function did not transform"
1585
+ with pytest .raises (ValueError , match = msg ):
1586
+ s .transform (op )
1587
+ with pytest .raises (ValueError , match = msg ):
1588
+ s .transform ([op ])
1589
+ with pytest .raises (ValueError , match = msg ):
1590
+ s .transform ({"A" : op })
1591
+ with pytest .raises (ValueError , match = msg ):
1592
+ s .transform ({"A" : [op ]})
1593
+
1594
+
1595
+ # mypy doesn't allow adding lists of different types
1596
+ # https://github.com/python/mypy/issues/5492
1597
+ @pytest .mark .parametrize ("op" , [* transformation_kernels , lambda x : x + 1 ])
1598
+ def test_transform_bad_dtype (op ):
1599
+ s = pd .DataFrame ({"A" : 3 * [object ]}) # DataFrame that will fail on most transforms
1600
+ if op in ("backfill" , "shift" , "pad" , "bfill" , "ffill" ):
1601
+ pytest .xfail ("Transform function works on any datatype" )
1602
+ msg = "Transform function failed"
1603
+ with pytest .raises (ValueError , match = msg ):
1604
+ s .transform (op )
1605
+ with pytest .raises (ValueError , match = msg ):
1606
+ s .transform ([op ])
1607
+ with pytest .raises (ValueError , match = msg ):
1608
+ s .transform ({"A" : op })
1609
+ with pytest .raises (ValueError , match = msg ):
1610
+ s .transform ({"A" : [op ]})
1611
+
1612
+
1613
+ @pytest .mark .parametrize ("op" , transformation_kernels )
1614
+ def test_transform_multi_dtypes (op ):
1615
+ df = pd .DataFrame ({"A" : ["a" , "b" , "c" ], "B" : [1 , 2 , 3 ]})
1616
+
1617
+ # Determine which columns op will work on
1618
+ columns = []
1619
+ for column in df :
1620
+ try :
1621
+ df [column ].transform (op )
1622
+ columns .append (column )
1623
+ except Exception :
1624
+ pass
1625
+
1626
+ if len (columns ) > 0 :
1627
+ expected = df [columns ].transform ([op ])
1628
+ result = df .transform ([op ])
1629
+ tm .assert_equal (result , expected )
1630
+
1631
+ expected = df [columns ].transform ({column : op for column in columns })
1632
+ result = df .transform ({column : op for column in columns })
1633
+ tm .assert_equal (result , expected )
1634
+
1635
+ expected = df [columns ].transform ({column : [op ] for column in columns })
1636
+ result = df .transform ({column : [op ] for column in columns })
1637
+ tm .assert_equal (result , expected )
1638
+
1639
+
1640
+ @pytest .mark .parametrize ("use_apply" , [True , False ])
1641
+ def test_transform_passes_args (use_apply ):
1642
+ # transform uses UDF either via apply or passing the entire DataFrame
1643
+ expected_args = [1 , 2 ]
1644
+ expected_kwargs = {"c" : 3 }
1645
+
1646
+ def f (x , a , b , c ):
1647
+ # transform is using apply iff x is not a DataFrame
1648
+ if use_apply == isinstance (x , DataFrame ):
1649
+ # Force transform to fallback
1650
+ raise ValueError
1651
+ assert [a , b ] == expected_args
1652
+ assert c == expected_kwargs ["c" ]
1653
+ return x
1654
+
1655
+ pd .DataFrame ([1 ]).transform (f , 0 , * expected_args , ** expected_kwargs )
1656
+
1657
+
1658
+ @pytest .mark .parametrize ("axis" , [0 , "index" , 1 , "columns" ])
1659
+ def test_transform_missing_columns (axis ):
1660
+ df = pd .DataFrame ({"A" : [1 , 2 ], "B" : [3 , 4 ]})
1661
+ match = re .escape ("Column(s) ['C'] do not exist" )
1662
+ with pytest .raises (SpecificationError , match = match ):
1663
+ df .transform ({"C" : "cumsum" })
0 commit comments