Skip to content

TST (string dtype): update all tests in tests/frame/indexing #60193

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions pandas/tests/frame/indexing/test_coercion.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas as pd
from pandas import (
DataFrame,
Expand Down Expand Up @@ -84,14 +82,18 @@ def test_6942(indexer_al):
assert df.iloc[0, 0] == t2


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_26395(indexer_al):
# .at case fixed by GH#45121 (best guess)
df = DataFrame(index=["A", "B", "C"])
df["D"] = 0

indexer_al(df)["C", "D"] = 2
expected = DataFrame({"D": [0, 0, 2]}, index=["A", "B", "C"], dtype=np.int64)
expected = DataFrame(
{"D": [0, 0, 2]},
index=["A", "B", "C"],
columns=pd.Index(["D"], dtype=object),
dtype=np.int64,
)
tm.assert_frame_equal(df, expected)

with pytest.raises(TypeError, match="Invalid value"):
Expand Down
16 changes: 5 additions & 11 deletions pandas/tests/frame/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from pandas._config import using_string_dtype

from pandas._libs import iNaT
from pandas.compat import HAS_PYARROW
from pandas.errors import InvalidIndexError

from pandas.core.dtypes.common import is_integer
Expand Down Expand Up @@ -506,17 +505,16 @@ def test_setitem_ambig(self, using_infer_string):
assert dm[2].dtype == np.object_

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_setitem_None(self, float_frame, using_infer_string):
def test_setitem_None(self, float_frame):
# GH #766
float_frame[None] = float_frame["A"]
key = None if not using_infer_string else np.nan
tm.assert_series_equal(
float_frame.iloc[:, -1], float_frame["A"], check_names=False
)
tm.assert_series_equal(
float_frame.loc[:, key], float_frame["A"], check_names=False
float_frame.loc[:, None], float_frame["A"], check_names=False
)
tm.assert_series_equal(float_frame[key], float_frame["A"], check_names=False)
tm.assert_series_equal(float_frame[None], float_frame["A"], check_names=False)

def test_loc_setitem_boolean_mask_allfalse(self):
# GH 9596
Expand Down Expand Up @@ -1126,7 +1124,6 @@ def test_setitem_with_unaligned_tz_aware_datetime_column(self):
df.loc[[0, 1, 2], "dates"] = column[[1, 0, 2]]
tm.assert_series_equal(df["dates"], column)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_loc_setitem_datetimelike_with_inference(self):
# GH 7592
# assignment of timedeltas with NaT
Expand All @@ -1145,13 +1142,10 @@ def test_loc_setitem_datetimelike_with_inference(self):
result = df.dtypes
expected = Series(
[np.dtype("timedelta64[ns]")] * 6 + [np.dtype("datetime64[ns]")] * 2,
index=list("ABCDEFGH"),
index=Index(list("ABCDEFGH"), dtype=object),
)
tm.assert_series_equal(result, expected)

@pytest.mark.xfail(
using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)"
)
def test_getitem_boolean_indexing_mixed(self):
df = DataFrame(
{
Expand Down Expand Up @@ -1193,7 +1187,7 @@ def test_getitem_boolean_indexing_mixed(self):
tm.assert_frame_equal(df2, expected)

df["foo"] = "test"
msg = "not supported between instances|unorderable types"
msg = "not supported between instances|unorderable types|Invalid comparison"

with pytest.raises(TypeError, match=msg):
df[df > 0.3] = 1
Expand Down
6 changes: 2 additions & 4 deletions pandas/tests/frame/indexing/test_insert.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.errors import PerformanceWarning

from pandas import (
Expand Down Expand Up @@ -63,15 +61,15 @@ def test_insert_column_bug_4032(self):
expected = DataFrame([[1.3, 1, 1.1], [2.3, 2, 2.2]], columns=["c", "a", "b"])
tm.assert_frame_equal(result, expected)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_insert_with_columns_dups(self):
# GH#14291
df = DataFrame()
df.insert(0, "A", ["g", "h", "i"], allow_duplicates=True)
df.insert(0, "A", ["d", "e", "f"], allow_duplicates=True)
df.insert(0, "A", ["a", "b", "c"], allow_duplicates=True)
exp = DataFrame(
[["a", "d", "g"], ["b", "e", "h"], ["c", "f", "i"]], columns=["A", "A", "A"]
[["a", "d", "g"], ["b", "e", "h"], ["c", "f", "i"]],
columns=Index(["A", "A", "A"], dtype=object),
)
tm.assert_frame_equal(df, exp)

Expand Down
26 changes: 12 additions & 14 deletions pandas/tests/frame/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.core.dtypes.base import _registry as ea_registry
from pandas.core.dtypes.common import is_object_dtype
from pandas.core.dtypes.dtypes import (
Expand Down Expand Up @@ -146,13 +144,16 @@ def test_setitem_different_dtype(self):
)
tm.assert_series_equal(result, expected)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_setitem_empty_columns(self):
# GH 13522
df = DataFrame(index=["A", "B", "C"])
df["X"] = df.index
df["X"] = ["x", "y", "z"]
exp = DataFrame(data={"X": ["x", "y", "z"]}, index=["A", "B", "C"])
exp = DataFrame(
data={"X": ["x", "y", "z"]},
index=["A", "B", "C"],
columns=Index(["X"], dtype=object),
)
tm.assert_frame_equal(df, exp)

def test_setitem_dt64_index_empty_columns(self):
Expand All @@ -162,14 +163,15 @@ def test_setitem_dt64_index_empty_columns(self):
df["A"] = rng
assert df["A"].dtype == np.dtype("M8[ns]")

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_setitem_timestamp_empty_columns(self):
# GH#19843
df = DataFrame(index=range(3))
df["now"] = Timestamp("20130101", tz="UTC")

expected = DataFrame(
[[Timestamp("20130101", tz="UTC")]] * 3, index=range(3), columns=["now"]
[[Timestamp("20130101", tz="UTC")]] * 3,
index=range(3),
columns=Index(["now"], dtype=object),
)
tm.assert_frame_equal(df, expected)

Expand Down Expand Up @@ -202,14 +204,13 @@ def test_setitem_with_unaligned_sparse_value(self):
expected = Series(SparseArray([1, 0, 0]), name="new_column")
tm.assert_series_equal(df["new_column"], expected)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_setitem_period_preserves_dtype(self):
# GH: 26861
data = [Period("2003-12", "D")]
result = DataFrame([])
result["a"] = data

expected = DataFrame({"a": data})
expected = DataFrame({"a": data}, columns=Index(["a"], dtype=object))

tm.assert_frame_equal(result, expected)

Expand Down Expand Up @@ -672,11 +673,10 @@ def test_setitem_iloc_two_dimensional_generator(self):
expected = DataFrame({"a": [1, 2, 3], "b": [4, 1, 1]})
tm.assert_frame_equal(df, expected)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_setitem_dtypes_bytes_type_to_object(self):
# GH 20734
index = Series(name="id", dtype="S24")
df = DataFrame(index=index)
df = DataFrame(index=index, columns=Index([], dtype="str"))
df["a"] = Series(name="a", index=index, dtype=np.uint32)
df["b"] = Series(name="b", index=index, dtype="S64")
df["c"] = Series(name="c", index=index, dtype="S64")
Expand Down Expand Up @@ -705,7 +705,6 @@ def test_setitem_ea_dtype_rhs_series(self):
expected = DataFrame({"a": [1, 2]}, dtype="Int64")
tm.assert_frame_equal(df, expected)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_setitem_npmatrix_2d(self):
# GH#42376
# for use-case df["x"] = sparse.random((10, 10)).mean(axis=1)
Expand All @@ -714,7 +713,7 @@ def test_setitem_npmatrix_2d(self):
)

a = np.ones((10, 1))
df = DataFrame(index=np.arange(10))
df = DataFrame(index=np.arange(10), columns=Index([], dtype="str"))
df["np-array"] = a

# Instantiation of `np.matrix` gives PendingDeprecationWarning
Expand Down Expand Up @@ -927,12 +926,11 @@ def test_setitem_with_expansion_categorical_dtype(self):
ser.name = "E"
tm.assert_series_equal(result2.sort_index(), ser.sort_index())

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_setitem_scalars_no_index(self):
# GH#16823 / GH#17894
df = DataFrame()
df["foo"] = 1
expected = DataFrame(columns=["foo"]).astype(np.int64)
expected = DataFrame(columns=Index(["foo"], dtype=object)).astype(np.int64)
tm.assert_frame_equal(df, expected)

def test_setitem_newcol_tuple_key(self, float_frame):
Expand Down
18 changes: 12 additions & 6 deletions pandas/tests/frame/indexing/test_where.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ def is_ok(s):


class TestDataFrameIndexingWhere:
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
def test_where_get(self, where_frame, float_string_frame):
def _check_get(df, cond, check_dtypes=True):
other1 = _safe_add(df)
Expand All @@ -68,7 +67,10 @@ def _check_get(df, cond, check_dtypes=True):
# check getting
df = where_frame
if df is float_string_frame:
msg = "'>' not supported between instances of 'str' and 'int'"
msg = (
"'>' not supported between instances of 'str' and 'int'"
"|Invalid comparison"
)
with pytest.raises(TypeError, match=msg):
df > 0
return
Expand Down Expand Up @@ -101,7 +103,6 @@ def test_where_upcasting(self):

tm.assert_series_equal(result, expected)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
def test_where_alignment(self, where_frame, float_string_frame):
# aligning
def _check_align(df, cond, other, check_dtypes=True):
Expand Down Expand Up @@ -133,7 +134,10 @@ def _check_align(df, cond, other, check_dtypes=True):

df = where_frame
if df is float_string_frame:
msg = "'>' not supported between instances of 'str' and 'int'"
msg = (
"'>' not supported between instances of 'str' and 'int'"
"|Invalid comparison"
)
with pytest.raises(TypeError, match=msg):
df > 0
return
Expand Down Expand Up @@ -176,7 +180,6 @@ def test_where_invalid(self):
with pytest.raises(ValueError, match=msg):
df.mask(0)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
def test_where_set(self, where_frame, float_string_frame, mixed_int_frame):
# where inplace

Expand All @@ -198,7 +201,10 @@ def _check_set(df, cond, check_dtypes=True):

df = where_frame
if df is float_string_frame:
msg = "'>' not supported between instances of 'str' and 'int'"
msg = (
"'>' not supported between instances of 'str' and 'int'"
"|Invalid comparison"
)
with pytest.raises(TypeError, match=msg):
df > 0
return
Expand Down
5 changes: 1 addition & 4 deletions pandas/tests/frame/indexing/test_xs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas import (
DataFrame,
Index,
Expand Down Expand Up @@ -74,10 +72,9 @@ def test_xs_other(self, float_frame):
tm.assert_series_equal(float_frame["A"], float_frame_orig["A"])
assert not (expected == 5).all()

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_xs_corner(self):
# pathological mixed-type reordering case
df = DataFrame(index=[0])
df = DataFrame(index=[0], columns=Index([], dtype="str"))
df["A"] = 1.0
df["B"] = "foo"
df["C"] = 2.0
Expand Down
Loading