@@ -1895,61 +1895,49 @@ def test_rank_apply(self):
1895
1895
expected = expected .reindex (result .index )
1896
1896
assert_series_equal (result , expected )
1897
1897
1898
+ @pytest .mark .parametrize ("grps" , [
1899
+ ['qux' ], ['qux' , 'quux' ]])
1898
1900
@pytest .mark .parametrize ("vals" , [
1899
1901
[2 , 2 , 8 , 2 , 6 ], ['bar' , 'bar' , 'foo' , 'bar' , 'baz' ],
1900
1902
[pd .Timestamp ('2018-01-02' ), pd .Timestamp ('2018-01-02' ),
1901
1903
pd .Timestamp ('2018-01-08' ), pd .Timestamp ('2018-01-02' ),
1902
1904
pd .Timestamp ('2018-01-06' )]])
1903
1905
@pytest .mark .parametrize ("ties_method,ascending,pct,exp" , [
1904
- ('average' , True , False , DataFrame (
1905
- [2. , 2. , 5. , 2. , 4. ], columns = ['val' ])),
1906
- ('average' , True , True , DataFrame (
1907
- [0.4 , 0.4 , 1.0 , 0.4 , 0.8 ], columns = ['val' ])),
1908
- ('average' , False , False , DataFrame (
1909
- [4. , 4. , 1. , 4. , 2. ], columns = ['val' ])),
1910
- ('average' , False , True , DataFrame (
1911
- [.8 , .8 , .2 , .8 , .4 ], columns = ['val' ])),
1912
- ('min' , True , False , DataFrame (
1913
- [1. , 1. , 5. , 1. , 4. ], columns = ['val' ])),
1914
- ('min' , True , True , DataFrame (
1915
- [0.2 , 0.2 , 1.0 , 0.2 , 0.8 ], columns = ['val' ])),
1916
- ('min' , False , False , DataFrame (
1917
- [3. , 3. , 1. , 3. , 2. ], columns = ['val' ])),
1918
- ('min' , False , True , DataFrame (
1919
- [.6 , .6 , .2 , .6 , .4 ], columns = ['val' ])),
1920
- ('max' , True , False , DataFrame (
1921
- [3. , 3. , 5. , 3. , 4. ], columns = ['val' ])),
1922
- ('max' , True , True , DataFrame (
1923
- [0.6 , 0.6 , 1.0 , 0.6 , 0.8 ], columns = ['val' ])),
1924
- ('max' , False , False , DataFrame (
1925
- [5. , 5. , 1. , 5. , 2. ], columns = ['val' ])),
1926
- ('max' , False , True , DataFrame (
1927
- [1. , 1. , .2 , 1. , .4 ], columns = ['val' ])),
1928
- ('first' , True , False , DataFrame (
1929
- [1. , 2. , 5. , 3. , 4. ], columns = ['val' ])),
1930
- ('first' , True , True , DataFrame (
1931
- [0.2 , 0.4 , 1.0 , 0.6 , 0.8 ], columns = ['val' ])),
1932
- ('first' , False , False , DataFrame (
1933
- [3. , 4. , 1. , 5. , 2. ], columns = ['val' ])),
1934
- ('first' , False , True , DataFrame (
1935
- [.6 , .8 , .2 , 1. , .4 ], columns = ['val' ])),
1936
- ('dense' , True , False , DataFrame (
1937
- [1. , 1. , 3. , 1. , 2. ], columns = ['val' ])),
1938
- ('dense' , True , True , DataFrame (
1939
- [0.2 , 0.2 , 0.6 , 0.2 , 0.4 ], columns = ['val' ])),
1940
- ('dense' , False , False , DataFrame (
1941
- [3. , 3. , 1. , 3. , 2. ], columns = ['val' ])),
1942
- ('dense' , False , True , DataFrame (
1943
- [.6 , .6 , .2 , .6 , .4 ], columns = ['val' ])),
1906
+ ('average' , True , False , [2. , 2. , 5. , 2. , 4. ]),
1907
+ ('average' , True , True , [0.4 , 0.4 , 1.0 , 0.4 , 0.8 ]),
1908
+ ('average' , False , False , [4. , 4. , 1. , 4. , 2. ]),
1909
+ ('average' , False , True , [.8 , .8 , .2 , .8 , .4 ]),
1910
+ ('min' , True , False , [1. , 1. , 5. , 1. , 4. ]),
1911
+ ('min' , True , True , [0.2 , 0.2 , 1.0 , 0.2 , 0.8 ]),
1912
+ ('min' , False , False , [3. , 3. , 1. , 3. , 2. ]),
1913
+ ('min' , False , True , [.6 , .6 , .2 , .6 , .4 ]),
1914
+ ('max' , True , False , [3. , 3. , 5. , 3. , 4. ]),
1915
+ ('max' , True , True , [0.6 , 0.6 , 1.0 , 0.6 , 0.8 ]),
1916
+ ('max' , False , False , [5. , 5. , 1. , 5. , 2. ]),
1917
+ ('max' , False , True , [1. , 1. , .2 , 1. , .4 ]),
1918
+ ('first' , True , False , [1. , 2. , 5. , 3. , 4. ]),
1919
+ ('first' , True , True , [0.2 , 0.4 , 1.0 , 0.6 , 0.8 ]),
1920
+ ('first' , False , False , [3. , 4. , 1. , 5. , 2. ]),
1921
+ ('first' , False , True , [.6 , .8 , .2 , 1. , .4 ]),
1922
+ ('dense' , True , False , [1. , 1. , 3. , 1. , 2. ]),
1923
+ ('dense' , True , True , [0.2 , 0.2 , 0.6 , 0.2 , 0.4 ]),
1924
+ ('dense' , False , False , [3. , 3. , 1. , 3. , 2. ]),
1925
+ ('dense' , False , True , [.6 , .6 , .2 , .6 , .4 ]),
1944
1926
])
1945
- def test_rank_args (self , vals , ties_method , ascending , pct , exp ):
1927
+ def test_rank_args (self , grps , vals , ties_method , ascending , pct , exp ):
1946
1928
if ties_method == 'first' and vals [0 ] == 'bar' :
1947
1929
pytest .xfail ("See GH 19482" )
1948
- df = DataFrame ({'key' : ['foo' ]* 5 , 'val' : vals })
1930
+ key = np .repeat (grps , len (vals ))
1931
+ vals = vals * len (grps )
1932
+ df = DataFrame ({'key' : key , 'val' : vals })
1949
1933
result = df .groupby ('key' ).rank (method = ties_method , ascending = ascending ,
1950
1934
pct = pct )
1951
- assert_frame_equal (result , exp )
1952
1935
1936
+ exp_df = DataFrame (exp * len (grps ), columns = ['val' ])
1937
+ assert_frame_equal (result , exp_df )
1938
+
1939
+ @pytest .mark .parametrize ("grps" , [
1940
+ ['qux' ], ['qux' , 'quux' ]])
1953
1941
@pytest .mark .parametrize ("vals" , [
1954
1942
[2 , 2 , np .nan , 8 , 2 , 6 , np .nan , np .nan ], # floats
1955
1943
['bar' , 'bar' , np .nan , 'foo' , 'bar' , 'baz' , np .nan , np .nan ], # objects
@@ -1958,110 +1946,89 @@ def test_rank_args(self, vals, ties_method, ascending, pct, exp):
1958
1946
pd .Timestamp ('2018-01-06' ), np .nan , np .nan ]
1959
1947
])
1960
1948
@pytest .mark .parametrize ("ties_method,ascending,na_option,pct,exp" , [
1961
- ('average' , True , 'keep' , False , DataFrame (
1962
- [2. , 2. , np .nan , 5. , 2. , 4. , np .nan , np .nan ], columns = ['val' ])),
1963
- ('average' , True , 'keep' , True , DataFrame (
1964
- [0.4 , 0.4 , np .nan , 1.0 , 0.4 , 0.8 , np .nan , np .nan ],
1965
- columns = ['val' ])),
1966
- ('average' , False , 'keep' , False , DataFrame (
1967
- [4. , 4. , np .nan , 1. , 4. , 2. , np .nan , np .nan ], columns = ['val' ])),
1968
- ('average' , False , 'keep' , True , DataFrame (
1969
- [.8 , 0.8 , np .nan , 0.2 , 0.8 , 0.4 , np .nan , np .nan ], columns = ['val' ])),
1970
- ('min' , True , 'keep' , False , DataFrame (
1971
- [1. , 1. , np .nan , 5. , 1. , 4. , np .nan , np .nan ], columns = ['val' ])),
1972
- ('min' , True , 'keep' , True , DataFrame (
1973
- [0.2 , 0.2 , np .nan , 1.0 , 0.2 , 0.8 , np .nan , np .nan ],
1974
- columns = ['val' ])),
1975
- ('min' , False , 'keep' , False , DataFrame (
1976
- [3. , 3. , np .nan , 1. , 3. , 2. , np .nan , np .nan ], columns = ['val' ])),
1977
- ('min' , False , 'keep' , True , DataFrame (
1978
- [.6 , 0.6 , np .nan , 0.2 , 0.6 , 0.4 , np .nan , np .nan ], columns = ['val' ])),
1979
- ('max' , True , 'keep' , False , DataFrame (
1980
- [3. , 3. , np .nan , 5. , 3. , 4. , np .nan , np .nan ], columns = ['val' ])),
1981
- ('max' , True , 'keep' , True , DataFrame (
1982
- [0.6 , 0.6 , np .nan , 1.0 , 0.6 , 0.8 , np .nan , np .nan ],
1983
- columns = ['val' ])),
1984
- ('max' , False , 'keep' , False , DataFrame (
1985
- [5. , 5. , np .nan , 1. , 5. , 2. , np .nan , np .nan ], columns = ['val' ])),
1986
- ('max' , False , 'keep' , True , DataFrame (
1987
- [1. , 1. , np .nan , 0.2 , 1. , 0.4 , np .nan , np .nan ], columns = ['val' ])),
1988
- ('first' , True , 'keep' , False , DataFrame (
1989
- [1. , 2. , np .nan , 5. , 3. , 4. , np .nan , np .nan ], columns = ['val' ])),
1990
- ('first' , True , 'keep' , True , DataFrame (
1991
- [0.2 , 0.4 , np .nan , 1.0 , 0.6 , 0.8 , np .nan , np .nan ],
1992
- columns = ['val' ])),
1993
- ('first' , False , 'keep' , False , DataFrame (
1994
- [3. , 4. , np .nan , 1. , 5. , 2. , np .nan , np .nan ], columns = ['val' ])),
1995
- ('first' , False , 'keep' , True , DataFrame (
1996
- [.6 , 0.8 , np .nan , 0.2 , 1. , 0.4 , np .nan , np .nan ], columns = ['val' ])),
1997
- ('dense' , True , 'keep' , False , DataFrame (
1998
- [1. , 1. , np .nan , 3. , 1. , 2. , np .nan , np .nan ], columns = ['val' ])),
1999
- ('dense' , True , 'keep' , True , DataFrame (
2000
- [0.2 , 0.2 , np .nan , 0.6 , 0.2 , 0.4 , np .nan , np .nan ],
2001
- columns = ['val' ])),
2002
- ('dense' , False , 'keep' , False , DataFrame (
2003
- [3. , 3. , np .nan , 1. , 3. , 2. , np .nan , np .nan ], columns = ['val' ])),
2004
- ('dense' , False , 'keep' , True , DataFrame (
2005
- [.6 , 0.6 , np .nan , 0.2 , 0.6 , 0.4 , np .nan , np .nan ], columns = ['val' ])),
2006
- ('average' , True , 'no_na' , False , DataFrame (
2007
- [2. , 2. , 7. , 5. , 2. , 4. , 7. , 7. ], columns = ['val' ])),
2008
- ('average' , True , 'no_na' , True , DataFrame (
2009
- [0.25 , 0.25 , 0.875 , 0.625 , 0.25 , 0.5 , 0.875 , 0.875 ],
2010
- columns = ['val' ])),
2011
- ('average' , False , 'no_na' , False , DataFrame (
2012
- [4. , 4. , 7.0 , 1. , 4. , 2. , 7.0 , 7.0 ], columns = ['val' ])),
2013
- ('average' , False , 'no_na' , True , DataFrame (
2014
- [0.5 , 0.5 , 0.875 , 0.125 , 0.5 , 0.25 , 0.875 , 0.875 ],
2015
- columns = ['val' ])),
2016
- ('min' , True , 'no_na' , False , DataFrame (
2017
- [1. , 1. , 6. , 5. , 1. , 4. , 6. , 6. ], columns = ['val' ])),
2018
- ('min' , True , 'no_na' , True , DataFrame (
2019
- [0.125 , 0.125 , 0.75 , 0.625 , 0.125 , 0.5 , 0.75 , 0.75 ],
2020
- columns = ['val' ])),
2021
- ('min' , False , 'no_na' , False , DataFrame (
2022
- [3. , 3. , 6. , 1. , 3. , 2. , 6. , 6. ], columns = ['val' ])),
2023
- ('min' , False , 'no_na' , True , DataFrame (
2024
- [0.375 , 0.375 , 0.75 , 0.125 , 0.375 , 0.25 , 0.75 , 0.75 ],
2025
- columns = ['val' ])),
2026
- ('max' , True , 'no_na' , False , DataFrame (
2027
- [3. , 3. , 8. , 5. , 3. , 4. , 8. , 8. ], columns = ['val' ])),
2028
- ('max' , True , 'no_na' , True , DataFrame (
2029
- [0.375 , 0.375 , 1. , 0.625 , 0.375 , 0.5 , 1. , 1. ], columns = ['val' ])),
2030
- ('max' , False , 'no_na' , False , DataFrame (
2031
- [5. , 5. , 8. , 1. , 5. , 2. , 8. , 8. ], columns = ['val' ])),
2032
- ('max' , False , 'no_na' , True , DataFrame (
2033
- [0.625 , 0.625 , 1. , 0.125 , 0.625 , 0.25 , 1. , 1. ], columns = ['val' ])),
2034
- ('first' , True , 'no_na' , False , DataFrame (
2035
- [1. , 2. , 6. , 5. , 3. , 4. , 7. , 8. ], columns = ['val' ])),
2036
- ('first' , True , 'no_na' , True , DataFrame (
2037
- [0.125 , 0.25 , 0.75 , 0.625 , 0.375 , 0.5 , 0.875 , 1. ],
2038
- columns = ['val' ])),
2039
- ('first' , False , 'no_na' , False , DataFrame (
2040
- [3. , 4. , 6. , 1. , 5. , 2. , 7. , 8. ], columns = ['val' ])),
2041
- ('first' , False , 'no_na' , True , DataFrame (
2042
- [0.375 , 0.5 , 0.75 , 0.125 , 0.625 , 0.25 , 0.875 , 1. ],
2043
- columns = ['val' ])),
2044
- ('dense' , True , 'no_na' , False , DataFrame (
2045
- [1. , 1. , 4. , 3. , 1. , 2. , 4. , 4. ], columns = ['val' ])),
2046
- ('dense' , True , 'no_na' , True , DataFrame (
2047
- [0.125 , 0.125 , 0.5 , 0.375 , 0.125 , 0.25 , 0.5 , 0.5 ],
2048
- columns = ['val' ])),
2049
- ('dense' , False , 'no_na' , False , DataFrame (
2050
- [3. , 3. , 4. , 1. , 3. , 2. , 4. , 4. ], columns = ['val' ])),
2051
- ('dense' , False , 'no_na' , True , DataFrame (
2052
- [0.375 , 0.375 , 0.5 , 0.125 , 0.375 , 0.25 , 0.5 , 0.5 ],
2053
- columns = ['val' ])),
1949
+ ('average' , True , 'keep' , False ,
1950
+ [2. , 2. , np .nan , 5. , 2. , 4. , np .nan , np .nan ]),
1951
+ ('average' , True , 'keep' , True ,
1952
+ [0.4 , 0.4 , np .nan , 1.0 , 0.4 , 0.8 , np .nan , np .nan ]),
1953
+ ('average' , False , 'keep' , False ,
1954
+ [4. , 4. , np .nan , 1. , 4. , 2. , np .nan , np .nan ]),
1955
+ ('average' , False , 'keep' , True ,
1956
+ [.8 , 0.8 , np .nan , 0.2 , 0.8 , 0.4 , np .nan , np .nan ]),
1957
+ ('min' , True , 'keep' , False ,
1958
+ [1. , 1. , np .nan , 5. , 1. , 4. , np .nan , np .nan ]),
1959
+ ('min' , True , 'keep' , True ,
1960
+ [0.2 , 0.2 , np .nan , 1.0 , 0.2 , 0.8 , np .nan , np .nan ]),
1961
+ ('min' , False , 'keep' , False ,
1962
+ [3. , 3. , np .nan , 1. , 3. , 2. , np .nan , np .nan ]),
1963
+ ('min' , False , 'keep' , True ,
1964
+ [.6 , 0.6 , np .nan , 0.2 , 0.6 , 0.4 , np .nan , np .nan ]),
1965
+ ('max' , True , 'keep' , False ,
1966
+ [3. , 3. , np .nan , 5. , 3. , 4. , np .nan , np .nan ]),
1967
+ ('max' , True , 'keep' , True ,
1968
+ [0.6 , 0.6 , np .nan , 1.0 , 0.6 , 0.8 , np .nan , np .nan ]),
1969
+ ('max' , False , 'keep' , False ,
1970
+ [5. , 5. , np .nan , 1. , 5. , 2. , np .nan , np .nan ]),
1971
+ ('max' , False , 'keep' , True ,
1972
+ [1. , 1. , np .nan , 0.2 , 1. , 0.4 , np .nan , np .nan ]),
1973
+ ('first' , True , 'keep' , False ,
1974
+ [1. , 2. , np .nan , 5. , 3. , 4. , np .nan , np .nan ]),
1975
+ ('first' , True , 'keep' , True ,
1976
+ [0.2 , 0.4 , np .nan , 1.0 , 0.6 , 0.8 , np .nan , np .nan ]),
1977
+ ('first' , False , 'keep' , False ,
1978
+ [3. , 4. , np .nan , 1. , 5. , 2. , np .nan , np .nan ]),
1979
+ ('first' , False , 'keep' , True ,
1980
+ [.6 , 0.8 , np .nan , 0.2 , 1. , 0.4 , np .nan , np .nan ]),
1981
+ ('dense' , True , 'keep' , False ,
1982
+ [1. , 1. , np .nan , 3. , 1. , 2. , np .nan , np .nan ]),
1983
+ ('dense' , True , 'keep' , True ,
1984
+ [0.2 , 0.2 , np .nan , 0.6 , 0.2 , 0.4 , np .nan , np .nan ]),
1985
+ ('dense' , False , 'keep' , False ,
1986
+ [3. , 3. , np .nan , 1. , 3. , 2. , np .nan , np .nan ]),
1987
+ ('dense' , False , 'keep' , True ,
1988
+ [.6 , 0.6 , np .nan , 0.2 , 0.6 , 0.4 , np .nan , np .nan ]),
1989
+ ('average' , True , 'no_na' , False , [2. , 2. , 7. , 5. , 2. , 4. , 7. , 7. ]),
1990
+ ('average' , True , 'no_na' , True ,
1991
+ [0.25 , 0.25 , 0.875 , 0.625 , 0.25 , 0.5 , 0.875 , 0.875 ]),
1992
+ ('average' , False , 'no_na' , False , [4. , 4. , 7. , 1. , 4. , 2. , 7. , 7. ]),
1993
+ ('average' , False , 'no_na' , True ,
1994
+ [0.5 , 0.5 , 0.875 , 0.125 , 0.5 , 0.25 , 0.875 , 0.875 ]),
1995
+ ('min' , True , 'no_na' , False , [1. , 1. , 6. , 5. , 1. , 4. , 6. , 6. ]),
1996
+ ('min' , True , 'no_na' , True ,
1997
+ [0.125 , 0.125 , 0.75 , 0.625 , 0.125 , 0.5 , 0.75 , 0.75 ]),
1998
+ ('min' , False , 'no_na' , False , [3. , 3. , 6. , 1. , 3. , 2. , 6. , 6. ]),
1999
+ ('min' , False , 'no_na' , True ,
2000
+ [0.375 , 0.375 , 0.75 , 0.125 , 0.375 , 0.25 , 0.75 , 0.75 ]),
2001
+ ('max' , True , 'no_na' , False , [3. , 3. , 8. , 5. , 3. , 4. , 8. , 8. ]),
2002
+ ('max' , True , 'no_na' , True ,
2003
+ [0.375 , 0.375 , 1. , 0.625 , 0.375 , 0.5 , 1. , 1. ]),
2004
+ ('max' , False , 'no_na' , False , [5. , 5. , 8. , 1. , 5. , 2. , 8. , 8. ]),
2005
+ ('max' , False , 'no_na' , True ,
2006
+ [0.625 , 0.625 , 1. , 0.125 , 0.625 , 0.25 , 1. , 1. ]),
2007
+ ('first' , True , 'no_na' , False , [1. , 2. , 6. , 5. , 3. , 4. , 7. , 8. ]),
2008
+ ('first' , True , 'no_na' , True ,
2009
+ [0.125 , 0.25 , 0.75 , 0.625 , 0.375 , 0.5 , 0.875 , 1. ]),
2010
+ ('first' , False , 'no_na' , False , [3. , 4. , 6. , 1. , 5. , 2. , 7. , 8. ]),
2011
+ ('first' , False , 'no_na' , True ,
2012
+ [0.375 , 0.5 , 0.75 , 0.125 , 0.625 , 0.25 , 0.875 , 1. ]),
2013
+ ('dense' , True , 'no_na' , False , [1. , 1. , 4. , 3. , 1. , 2. , 4. , 4. ]),
2014
+ ('dense' , True , 'no_na' , True ,
2015
+ [0.125 , 0.125 , 0.5 , 0.375 , 0.125 , 0.25 , 0.5 , 0.5 ]),
2016
+ ('dense' , False , 'no_na' , False , [3. , 3. , 4. , 1. , 3. , 2. , 4. , 4. ]),
2017
+ ('dense' , False , 'no_na' , True ,
2018
+ [0.375 , 0.375 , 0.5 , 0.125 , 0.375 , 0.25 , 0.5 , 0.5 ])
2054
2019
])
2055
- def test_rank_args_missing (self , vals , ties_method , ascending , na_option ,
2056
- pct , exp ):
2020
+ def test_rank_args_missing (self , grps , vals , ties_method , ascending ,
2021
+ na_option , pct , exp ):
2057
2022
if ties_method == 'first' and vals [0 ] == 'bar' :
2058
2023
pytest .xfail ("See GH 19482" )
2059
-
2060
- df = DataFrame ({'key' : ['foo' ]* 8 , 'val' : vals })
2024
+ key = np .repeat (grps , len (vals ))
2025
+ vals = vals * len (grps )
2026
+ df = DataFrame ({'key' : key , 'val' : vals })
2061
2027
result = df .groupby ('key' ).rank (method = ties_method , ascending = ascending ,
2062
2028
na_option = na_option , pct = pct )
2063
2029
2064
- assert_frame_equal (result , exp )
2030
+ exp_df = DataFrame (exp * len (grps ), columns = ['val' ])
2031
+ assert_frame_equal (result , exp_df )
2065
2032
2066
2033
def test_dont_clobber_name_column (self ):
2067
2034
df = DataFrame ({'key' : ['a' , 'a' , 'a' , 'b' , 'b' , 'b' ],
0 commit comments