Skip to content

TST/REF: misplaced tests in frame.test_dtypes #37424

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Oct 26, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from datetime import timedelta

import numpy as np
import pytest

from pandas.core.dtypes.dtypes import DatetimeTZDtype

Expand Down Expand Up @@ -89,16 +88,7 @@ def test_dtypes_gh8722(self, float_string_frame):
result = df.dtypes
tm.assert_series_equal(result, Series({0: np.dtype("int64")}))

def test_singlerow_slice_categoricaldtype_gives_series(self):
# GH29521
df = DataFrame({"x": pd.Categorical("a b c d e".split())})
result = df.iloc[0]
raw_cat = pd.Categorical(["a"], categories=["a", "b", "c", "d", "e"])
expected = Series(raw_cat, index=["x"], name=0, dtype="category")

tm.assert_series_equal(result, expected)

def test_timedeltas(self):
def test_dtypes_timedeltas(self):
df = DataFrame(
dict(
A=Series(date_range("2012-1-1", periods=3, freq="D")),
Expand Down Expand Up @@ -136,95 +126,3 @@ def test_timedeltas(self):
index=list("ABCD"),
)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize(
"input_vals",
[
([1, 2]),
(["1", "2"]),
(list(pd.date_range("1/1/2011", periods=2, freq="H"))),
(list(pd.date_range("1/1/2011", periods=2, freq="H", tz="US/Eastern"))),
([pd.Interval(left=0, right=5)]),
],
)
def test_constructor_list_str(self, input_vals, string_dtype):
# GH 16605
# Ensure that data elements are converted to strings when
# dtype is str, 'str', or 'U'

result = DataFrame({"A": input_vals}, dtype=string_dtype)
expected = DataFrame({"A": input_vals}).astype({"A": string_dtype})
tm.assert_frame_equal(result, expected)

def test_constructor_list_str_na(self, string_dtype):

result = DataFrame({"A": [1.0, 2.0, None]}, dtype=string_dtype)
expected = DataFrame({"A": ["1.0", "2.0", None]}, dtype=object)
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize(
"data, expected",
[
# empty
(DataFrame(), True),
# multi-same
(DataFrame({"A": [1, 2], "B": [1, 2]}), True),
# multi-object
(
DataFrame(
{
"A": np.array([1, 2], dtype=object),
"B": np.array(["a", "b"], dtype=object),
}
),
True,
),
# multi-extension
(
DataFrame(
{"A": pd.Categorical(["a", "b"]), "B": pd.Categorical(["a", "b"])}
),
True,
),
# differ types
(DataFrame({"A": [1, 2], "B": [1.0, 2.0]}), False),
# differ sizes
(
DataFrame(
{
"A": np.array([1, 2], dtype=np.int32),
"B": np.array([1, 2], dtype=np.int64),
}
),
False,
),
# multi-extension differ
(
DataFrame(
{"A": pd.Categorical(["a", "b"]), "B": pd.Categorical(["b", "c"])}
),
False,
),
],
)
def test_is_homogeneous_type(self, data, expected):
assert data._is_homogeneous_type is expected

def test_asarray_homogenous(self):
df = DataFrame({"A": pd.Categorical([1, 2]), "B": pd.Categorical([1, 2])})
result = np.asarray(df)
# may change from object in the future
expected = np.array([[1, 1], [2, 2]], dtype="object")
tm.assert_numpy_array_equal(result, expected)

def test_str_to_small_float_conversion_type(self):
# GH 20388
np.random.seed(13)
col_data = [str(np.random.random() * 1e-12) for _ in range(5)]
result = DataFrame(col_data, columns=["A"])
expected = DataFrame(col_data, columns=["A"], dtype=object)
tm.assert_frame_equal(result, expected)
# change the dtype of the elements from object to float one by one
result.loc[result.index, "A"] = [float(x) for x in col_data]
expected = DataFrame(col_data, columns=["A"], dtype=float)
tm.assert_frame_equal(result, expected)
49 changes: 49 additions & 0 deletions pandas/tests/frame/methods/test_is_homogeneous_dtype.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import numpy as np
import pytest

from pandas import Categorical, DataFrame


@pytest.mark.parametrize(
"data, expected",
[
# empty
(DataFrame(), True),
# multi-same
(DataFrame({"A": [1, 2], "B": [1, 2]}), True),
# multi-object
(
DataFrame(
{
"A": np.array([1, 2], dtype=object),
"B": np.array(["a", "b"], dtype=object),
}
),
True,
),
# multi-extension
(
DataFrame({"A": Categorical(["a", "b"]), "B": Categorical(["a", "b"])}),
True,
),
# differ types
(DataFrame({"A": [1, 2], "B": [1.0, 2.0]}), False),
# differ sizes
(
DataFrame(
{
"A": np.array([1, 2], dtype=np.int32),
"B": np.array([1, 2], dtype=np.int64),
}
),
False,
),
# multi-extension differ
(
DataFrame({"A": Categorical(["a", "b"]), "B": Categorical(["b", "c"])}),
False,
),
],
)
def test_is_homogeneous_type(data, expected):
assert data._is_homogeneous_type is expected
25 changes: 25 additions & 0 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2697,6 +2697,31 @@ def test_frame_ctor_datetime64_column(self):
df = DataFrame({"A": np.random.randn(len(rng)), "B": dates})
assert np.issubdtype(df["B"].dtype, np.dtype("M8[ns]"))

@pytest.mark.parametrize(
"input_vals",
[
([1, 2]),
(["1", "2"]),
(list(date_range("1/1/2011", periods=2, freq="H"))),
(list(date_range("1/1/2011", periods=2, freq="H", tz="US/Eastern"))),
([pd.Interval(left=0, right=5)]),
],
)
def test_constructor_list_str(self, input_vals, string_dtype):
# GH#16605
# Ensure that data elements are converted to strings when
# dtype is str, 'str', or 'U'

result = DataFrame({"A": input_vals}, dtype=string_dtype)
expected = DataFrame({"A": input_vals}).astype({"A": string_dtype})
tm.assert_frame_equal(result, expected)

def test_constructor_list_str_na(self, string_dtype):

result = DataFrame({"A": [1.0, 2.0, None]}, dtype=string_dtype)
expected = DataFrame({"A": ["1.0", "2.0", None]}, dtype=object)
tm.assert_frame_equal(result, expected)


class TestDataFrameConstructorWithDatetimeTZ:
def test_constructor_data_aware_dtype_naive(self, tz_aware_fixture):
Expand Down
16 changes: 16 additions & 0 deletions pandas/tests/frame/test_npfuncs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
"""
Tests for np.foo applied to DataFrame, not necessarily ufuncs.
"""
import numpy as np

from pandas import Categorical, DataFrame
import pandas._testing as tm


class TestAsArray:
def test_asarray_homogenous(self):
df = DataFrame({"A": Categorical([1, 2]), "B": Categorical([1, 2])})
result = np.asarray(df)
# may change from object in the future
expected = np.array([[1, 1], [2, 2]], dtype="object")
tm.assert_numpy_array_equal(result, expected)
9 changes: 9 additions & 0 deletions pandas/tests/indexing/test_iloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -739,6 +739,15 @@ def test_iloc_with_boolean_operation(self):
expected = DataFrame([[0.0, 4.0], [8.0, 12.0], [4.0, 5.0], [6.0, np.nan]])
tm.assert_frame_equal(result, expected)

def test_iloc_getitem_singlerow_slice_categoricaldtype_gives_series(self):
# GH#29521
df = DataFrame({"x": pd.Categorical("a b c d e".split())})
result = df.iloc[0]
raw_cat = pd.Categorical(["a"], categories=["a", "b", "c", "d", "e"])
expected = Series(raw_cat, index=["x"], name=0, dtype="category")

tm.assert_series_equal(result, expected)


class TestILocSetItemDuplicateColumns:
def test_iloc_setitem_scalar_duplicate_columns(self):
Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -978,6 +978,19 @@ def test_loc_reverse_assignment(self):

tm.assert_series_equal(result, expected)

def test_loc_setitem_str_to_small_float_conversion_type(self):
# GH#20388
np.random.seed(13)
col_data = [str(np.random.random() * 1e-12) for _ in range(5)]
result = DataFrame(col_data, columns=["A"])
expected = DataFrame(col_data, columns=["A"], dtype=object)
tm.assert_frame_equal(result, expected)

# change the dtype of the elements from object to float one by one
result.loc[result.index, "A"] = [float(x) for x in col_data]
expected = DataFrame(col_data, columns=["A"], dtype=float)
tm.assert_frame_equal(result, expected)


class TestLocWithMultiIndex:
@pytest.mark.parametrize(
Expand Down