Skip to content

Commit d08a792

Browse files
authored
Complex Dtype Support for Hashmap Algos (#36482)
1 parent a0d0cf1 commit d08a792

File tree

12 files changed

+177
-1
lines changed

12 files changed

+177
-1
lines changed

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ Other enhancements
101101
- :meth:`Series.ewm`, :meth:`DataFrame.ewm`, now support a ``method`` argument with a ``'table'`` option that performs the windowing operation over an entire :class:`DataFrame`. See :ref:`Window Overview <window.overview>` for performance and functional benefits (:issue:`42273`)
102102
- :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` now support the argument ``skipna`` (:issue:`34047`)
103103
- :meth:`read_table` now supports the argument ``storage_options`` (:issue:`39167`)
104+
- Methods that relied on hashmap based algos such as :meth:`DataFrameGroupBy.value_counts`, :meth:`DataFrameGroupBy.count` and :func:`factorize` ignored imaginary component for complex numbers (:issue:`17927`)
104105

105106
.. ---------------------------------------------------------------------------
106107

pandas/_libs/algos.pyi

+2
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,8 @@ def diff_2d(
150150
) -> None: ...
151151
def ensure_platform_int(arr: object) -> npt.NDArray[np.intp]: ...
152152
def ensure_object(arr: object) -> npt.NDArray[np.object_]: ...
153+
def ensure_complex64(arr: object, copy=True) -> npt.NDArray[np.complex64]: ...
154+
def ensure_complex128(arr: object, copy=True) -> npt.NDArray[np.complex128]: ...
153155
def ensure_float64(arr: object, copy=True) -> npt.NDArray[np.float64]: ...
154156
def ensure_float32(arr: object, copy=True) -> npt.NDArray[np.float32]: ...
155157
def ensure_int8(arr: object, copy=True) -> npt.NDArray[np.int8]: ...

pandas/_libs/algos.pyx

+2
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ import numpy as np
1515

1616
cimport numpy as cnp
1717
from numpy cimport (
18+
NPY_COMPLEX64,
19+
NPY_COMPLEX128,
1820
NPY_FLOAT32,
1921
NPY_FLOAT64,
2022
NPY_INT8,

pandas/_libs/algos_common_helper.pxi.in

+2
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ dtypes = [('float64', 'FLOAT64', 'float64'),
4747
('uint16', 'UINT16', 'uint16'),
4848
('uint32', 'UINT32', 'uint32'),
4949
('uint64', 'UINT64', 'uint64'),
50+
('complex64', 'COMPLEX64', 'complex64'),
51+
('complex128', 'COMPLEX128', 'complex128')
5052
# ('platform_int', 'INT', 'int_'),
5153
# ('object', 'OBJECT', 'object_'),
5254
]

pandas/core/dtypes/common.py

+2
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,8 @@ def ensure_float(arr):
9797
ensure_int32 = algos.ensure_int32
9898
ensure_int16 = algos.ensure_int16
9999
ensure_int8 = algos.ensure_int8
100+
ensure_complex64 = algos.ensure_complex64
101+
ensure_complex128 = algos.ensure_complex128
100102
ensure_platform_int = algos.ensure_platform_int
101103
ensure_object = algos.ensure_object
102104

pandas/tests/groupby/test_groupby.py

+24
Original file line numberDiff line numberDiff line change
@@ -1009,6 +1009,30 @@ def test_groupby_complex():
10091009
tm.assert_series_equal(result, expected)
10101010

10111011

1012+
def test_groupby_complex_numbers():
1013+
# GH 17927
1014+
df = DataFrame(
1015+
[
1016+
{"a": 1, "b": 1 + 1j},
1017+
{"a": 1, "b": 1 + 2j},
1018+
{"a": 4, "b": 1},
1019+
]
1020+
)
1021+
expected = DataFrame(
1022+
np.array([1, 1, 1], dtype=np.int64),
1023+
index=Index([(1 + 1j), (1 + 2j), (1 + 0j)], dtype="object", name="b"),
1024+
columns=Index(["a"], dtype="object"),
1025+
)
1026+
result = df.groupby("b", sort=False).count()
1027+
tm.assert_frame_equal(result, expected)
1028+
1029+
# Sorted by the magnitude of the complex numbers
1030+
# Complex Index dtype is cast to object
1031+
expected.index = Index([(1 + 0j), (1 + 1j), (1 + 2j)], dtype="object", name="b")
1032+
result = df.groupby("b", sort=True).count()
1033+
tm.assert_frame_equal(result, expected)
1034+
1035+
10121036
def test_groupby_series_indexed_differently():
10131037
s1 = Series(
10141038
[5.0, -9.0, 4.0, 100.0, -5.0, 55.0, 6.7],

pandas/tests/indexes/multi/test_duplicates.py

+32
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from pandas import (
99
DatetimeIndex,
1010
MultiIndex,
11+
Series,
1112
)
1213
import pandas._testing as tm
1314

@@ -299,6 +300,37 @@ def test_duplicated_drop_duplicates():
299300
tm.assert_index_equal(idx.drop_duplicates(keep=False), expected)
300301

301302

303+
@pytest.mark.parametrize(
304+
"dtype",
305+
[
306+
np.complex64,
307+
np.complex128,
308+
],
309+
)
310+
def test_duplicated_series_complex_numbers(dtype):
311+
# GH 17927
312+
expected = Series(
313+
[False, False, False, True, False, False, False, True, False, True],
314+
dtype=bool,
315+
)
316+
result = Series(
317+
[
318+
np.nan + np.nan * 1j,
319+
0,
320+
1j,
321+
1j,
322+
1,
323+
1 + 1j,
324+
1 + 2j,
325+
1 + 1j,
326+
np.nan,
327+
np.nan + np.nan * 1j,
328+
],
329+
dtype=dtype,
330+
).duplicated()
331+
tm.assert_series_equal(result, expected)
332+
333+
302334
def test_multi_drop_duplicates_pos_args_deprecation():
303335
# GH#41485
304336
idx = MultiIndex.from_arrays([[1, 2, 3, 1], [1, 2, 3, 1]])

pandas/tests/indexes/period/methods/test_factorize.py

+16-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import numpy as np
22

3-
from pandas import PeriodIndex
3+
from pandas import (
4+
PeriodIndex,
5+
factorize,
6+
)
47
import pandas._testing as tm
58

69

@@ -35,3 +38,15 @@ def test_factorize(self):
3538
arr, idx = idx2.factorize()
3639
tm.assert_numpy_array_equal(arr, exp_arr)
3740
tm.assert_index_equal(idx, exp_idx)
41+
42+
def test_factorize_complex(self):
43+
# GH 17927
44+
array = [1, 2, 2 + 1j]
45+
labels, uniques = factorize(array)
46+
47+
expected_labels = np.array([0, 1, 2], dtype=np.intp)
48+
tm.assert_numpy_array_equal(labels, expected_labels)
49+
50+
# Should return a complex dtype in the future
51+
expected_uniques = np.array([(1 + 0j), (2 + 0j), (2 + 1j)], dtype=object)
52+
tm.assert_numpy_array_equal(uniques, expected_uniques)

pandas/tests/reductions/test_reductions.py

+47
Original file line numberDiff line numberDiff line change
@@ -1487,3 +1487,50 @@ def test_mode_boolean_with_na(self):
14871487
result = ser.mode()
14881488
expected = Series({0: True}, dtype="boolean")
14891489
tm.assert_series_equal(result, expected)
1490+
1491+
@pytest.mark.parametrize(
1492+
"array,expected,dtype",
1493+
[
1494+
(
1495+
[0, 1j, 1, 1, 1 + 1j, 1 + 2j],
1496+
Series([1], dtype=np.complex128),
1497+
np.complex128,
1498+
),
1499+
(
1500+
[0, 1j, 1, 1, 1 + 1j, 1 + 2j],
1501+
Series([1], dtype=np.complex64),
1502+
np.complex64,
1503+
),
1504+
(
1505+
[1 + 1j, 2j, 1 + 1j],
1506+
Series([1 + 1j], dtype=np.complex128),
1507+
np.complex128,
1508+
),
1509+
],
1510+
)
1511+
def test_single_mode_value_complex(self, array, expected, dtype):
1512+
result = Series(array, dtype=dtype).mode()
1513+
tm.assert_series_equal(result, expected)
1514+
1515+
@pytest.mark.parametrize(
1516+
"array,expected,dtype",
1517+
[
1518+
(
1519+
# no modes
1520+
[0, 1j, 1, 1 + 1j, 1 + 2j],
1521+
Series([0j, 1j, 1 + 0j, 1 + 1j, 1 + 2j], dtype=np.complex128),
1522+
np.complex128,
1523+
),
1524+
(
1525+
[1 + 1j, 2j, 1 + 1j, 2j, 3],
1526+
Series([2j, 1 + 1j], dtype=np.complex64),
1527+
np.complex64,
1528+
),
1529+
],
1530+
)
1531+
def test_multimode_complex(self, array, expected, dtype):
1532+
# GH 17927
1533+
# mode tries to sort multimodal series.
1534+
# Complex numbers are sorted by their magnitude
1535+
result = Series(array, dtype=dtype).mode()
1536+
tm.assert_series_equal(result, expected)

pandas/tests/series/methods/test_isin.py

+15
Original file line numberDiff line numberDiff line change
@@ -186,3 +186,18 @@ def test_isin_large_series_mixed_dtypes_and_nan():
186186
result = ser.isin({"foo", "bar"})
187187
expected = Series([False] * 3 * 1_000_000)
188188
tm.assert_series_equal(result, expected)
189+
190+
191+
@pytest.mark.parametrize(
192+
"array,expected",
193+
[
194+
(
195+
[0, 1j, 1j, 1, 1 + 1j, 1 + 2j, 1 + 1j],
196+
Series([False, True, True, False, True, True, True], dtype=bool),
197+
)
198+
],
199+
)
200+
def test_isin_complex_numbers(array, expected):
201+
# GH 17927
202+
result = Series(array).isin([1j, 1 + 1j, 1 + 2j])
203+
tm.assert_series_equal(result, expected)

pandas/tests/series/methods/test_value_counts.py

+19
Original file line numberDiff line numberDiff line change
@@ -207,3 +207,22 @@ def test_value_counts_bool_with_nan(self, ser, dropna, exp):
207207
# GH32146
208208
out = ser.value_counts(dropna=dropna)
209209
tm.assert_series_equal(out, exp)
210+
211+
@pytest.mark.parametrize(
212+
"input_array,expected",
213+
[
214+
(
215+
[1 + 1j, 1 + 1j, 1, 3j, 3j, 3j],
216+
Series([3, 2, 1], index=pd.Index([3j, 1 + 1j, 1], dtype=np.complex128)),
217+
),
218+
(
219+
[1 + 1j, 1 + 1j, 1, 3j, 3j, 3j],
220+
Series([3, 2, 1], index=pd.Index([3j, 1 + 1j, 1], dtype=np.complex64)),
221+
),
222+
],
223+
)
224+
def test_value_counts_complex_numbers(self, input_array, expected):
225+
# GH 17927
226+
# Complex Index dtype is cast to object
227+
result = Series(input_array).value_counts()
228+
tm.assert_series_equal(result, expected)

pandas/tests/test_algos.py

+15
Original file line numberDiff line numberDiff line change
@@ -1513,6 +1513,21 @@ def test_unique_tuples(self, arr, uniques):
15131513
result = pd.unique(arr)
15141514
tm.assert_numpy_array_equal(result, expected)
15151515

1516+
@pytest.mark.parametrize(
1517+
"array,expected",
1518+
[
1519+
(
1520+
[1 + 1j, 0, 1, 1j, 1 + 2j, 1 + 2j],
1521+
# Should return a complex dtype in the future
1522+
np.array([(1 + 1j), 0j, (1 + 0j), 1j, (1 + 2j)], dtype=object),
1523+
)
1524+
],
1525+
)
1526+
def test_unique_complex_numbers(self, array, expected):
1527+
# GH 17927
1528+
result = pd.unique(array)
1529+
tm.assert_numpy_array_equal(result, expected)
1530+
15161531

15171532
class TestHashTable:
15181533
def test_string_hashtable_set_item_signature(self):

0 commit comments

Comments
 (0)