-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
[WIP] Quick fix to provide complex data type support for hashmap based algorithms #27599
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
0fbd5d9
e3933ef
665e7a6
a1421fe
7a16823
0bf66a9
85edadb
0a1e59a
13afa7e
a8fba28
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
import numpy as np | ||
import pytest | ||
|
||
import pandas as pd | ||
from pandas import ( | ||
Index, | ||
Series, | ||
DataFrame, | ||
) | ||
import pandas.core.algorithms as algos | ||
import pandas.util.testing as tm | ||
|
||
|
||
class TestComplexSupportBasic: | ||
@pytest.mark.parametrize("array,expected", [ | ||
( | ||
[1 + 1j, 0, 1, 1j, 1 + 2j], | ||
Series([1, 1, 1, 1, 1], index=[1 + 2j, 1 + 1j, 1j, 1, 0]) | ||
), | ||
( | ||
[1 + 2j, 0, 1j, 1, 1j, 1 + 1j], | ||
# index is sorted by value counts in descending order by default | ||
Series([2, 1, 1, 1, 1], index=[1j, 1 + 2j, 1 + 1j, 1, 0]) | ||
) | ||
]) | ||
def test_value_counts(self, array, expected): | ||
result = algos.value_counts(array) | ||
tm.assert_series_equal(result, expected) | ||
|
||
@pytest.mark.parametrize("array,expected", [ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For any that are just one set of parameters here you can just define in the function body. No need to parametrize There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is just an initial set of test cases. I plan on adding more test cases for each of the algos. |
||
( | ||
[1 + 1j, 0, 1, 1j, 1 + 2j, 1 + 2j], | ||
np.array([(1 + 1j), 0j, (1 + 0j), 1j, (1 + 2j)], dtype=object) | ||
), | ||
]) | ||
def test_unique(self, array, expected): | ||
result = algos.unique(array) | ||
assert np.array_equal(result, expected) | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
@pytest.mark.parametrize("array,expected", [ | ||
( | ||
[0, 1j, 1j, 1, 1 + 1j, 1 + 2j, 1 + 1j], | ||
Series([False, False, True, False, False, False, True], dtype=bool) | ||
), | ||
]) | ||
def test_duplicated(self, array, expected): | ||
result = Series(array).duplicated() | ||
tm.assert_series_equal(result, expected) | ||
|
||
@pytest.mark.parametrize("array,expected", [ | ||
( | ||
[0, 1j, 1j, 1, 1 + 1j, 1 + 2j, 1 + 1j], | ||
Series([False, True, True, False, True, True, True], dtype=bool) | ||
), | ||
]) | ||
def test_isin(self, array, expected): | ||
result = Series(array).isin([1j, 1 + 1j, 1 + 2j]) | ||
tm.assert_series_equal(result, expected) | ||
|
||
@pytest.mark.parametrize("array,expected", [ | ||
( | ||
[1, 2, 2 + 1j], | ||
(np.array([0, 1, 2]), np.array([(1 + 0j), (2 + 0j), (2 + 1j)], | ||
dtype=object)) | ||
), | ||
]) | ||
def test_factorize(self, array, expected): | ||
result = pd.factorize(array) | ||
assert len(result) == 2 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think easier to read at a quick glance if you just compare the two elements rather than the enumerating loop |
||
|
||
for i, r in enumerate(result): | ||
assert np.array_equal(r, expected[i]) | ||
|
||
@pytest.mark.parametrize("frame,expected", [ | ||
( | ||
DataFrame([dict(a=1, b=1 + 1j), dict(a=1, b=1 + 2j)]), | ||
DataFrame( | ||
np.array([1, 1]), | ||
index=Index([(1 + 1j), (1 + 2j)], dtype='object', name='b'), | ||
columns=Index(['a'], dtype='object')) | ||
), | ||
]) | ||
def test_groupby(self, frame, expected): | ||
result = frame.groupby("b").count() | ||
tm.assert_frame_equal(result, expected) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we cant just return non-object values? regardless, definitely dont leave the commented-out stuff above, just delete