Skip to content

TST (string dtype): resolve xfails in pandas/tests/series #60233

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions pandas/tests/series/accessors/test_dt_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas._libs.tslibs.timezones import maybe_get_tz

from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -556,7 +554,6 @@ def test_strftime(self):
)
tm.assert_series_equal(result, expected)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_strftime_dt64_days(self):
ser = Series(date_range("20130101", periods=5))
ser.iloc[0] = pd.NaT
Expand All @@ -571,7 +568,6 @@ def test_strftime_dt64_days(self):

expected = Index(
["2015/03/01", "2015/03/02", "2015/03/03", "2015/03/04", "2015/03/05"],
dtype=np.object_,
)
# dtype may be S10 or U10 depending on python version
tm.assert_index_equal(result, expected)
Expand Down
21 changes: 15 additions & 6 deletions pandas/tests/series/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.errors import IndexingError

from pandas import (
Expand Down Expand Up @@ -251,18 +249,29 @@ def test_slice(string_series, object_series):
tm.assert_series_equal(string_series, original)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_timedelta_assignment():
# GH 8209
s = Series([], dtype=object)
s.loc["B"] = timedelta(1)
tm.assert_series_equal(s, Series(Timedelta("1 days"), index=["B"]))
expected = Series(
Timedelta("1 days"), dtype="timedelta64[ns]", index=Index(["B"], dtype=object)
)
tm.assert_series_equal(s, expected)

s = s.reindex(s.index.insert(0, "A"))
tm.assert_series_equal(s, Series([np.nan, Timedelta("1 days")], index=["A", "B"]))
expected = Series(
[np.nan, Timedelta("1 days")],
dtype="timedelta64[ns]",
index=Index(["A", "B"], dtype=object),
)
tm.assert_series_equal(s, expected)

s.loc["A"] = timedelta(1)
expected = Series(Timedelta("1 days"), index=["A", "B"])
expected = Series(
Timedelta("1 days"),
dtype="timedelta64[ns]",
index=Index(["A", "B"], dtype=object),
)
tm.assert_series_equal(s, expected)


Expand Down
47 changes: 28 additions & 19 deletions pandas/tests/series/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,7 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.compat import (
HAS_PYARROW,
WASM,
)
from pandas.compat import WASM
from pandas.compat.numpy import np_version_gte1p24
from pandas.errors import IndexingError

Expand All @@ -32,6 +27,7 @@
NaT,
Period,
Series,
StringDtype,
Timedelta,
Timestamp,
array,
Expand Down Expand Up @@ -535,14 +531,16 @@ def test_append_timedelta_does_not_cast(self, td, using_infer_string, request):
tm.assert_series_equal(ser, expected)
assert isinstance(ser["td"], Timedelta)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_setitem_with_expansion_type_promotion(self):
# GH#12599
ser = Series(dtype=object)
ser["a"] = Timestamp("2016-01-01")
ser["b"] = 3.0
ser["c"] = "foo"
expected = Series([Timestamp("2016-01-01"), 3.0, "foo"], index=["a", "b", "c"])
expected = Series(
[Timestamp("2016-01-01"), 3.0, "foo"],
index=Index(["a", "b", "c"], dtype=object),
)
tm.assert_series_equal(ser, expected)

def test_setitem_not_contained(self, string_series):
Expand Down Expand Up @@ -826,11 +824,6 @@ def test_mask_key(self, obj, key, expected, raises, val, indexer_sli):
else:
indexer_sli(obj)[mask] = val

@pytest.mark.xfail(
using_string_dtype() and not HAS_PYARROW,
reason="TODO(infer_string)",
strict=False,
)
def test_series_where(self, obj, key, expected, raises, val, is_inplace):
mask = np.zeros(obj.shape, dtype=bool)
mask[key] = True
Expand All @@ -846,6 +839,11 @@ def test_series_where(self, obj, key, expected, raises, val, is_inplace):
obj = obj.copy()
arr = obj._values

if raises and obj.dtype == "string":
with pytest.raises(TypeError, match="Invalid value"):
obj.where(~mask, val)
return

res = obj.where(~mask, val)

if val is NA and res.dtype == object:
Expand All @@ -858,25 +856,23 @@ def test_series_where(self, obj, key, expected, raises, val, is_inplace):

self._check_inplace(is_inplace, orig, arr, obj)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
def test_index_where(self, obj, key, expected, raises, val, using_infer_string):
def test_index_where(self, obj, key, expected, raises, val):
mask = np.zeros(obj.shape, dtype=bool)
mask[key] = True

if using_infer_string and obj.dtype == object:
if raises and obj.dtype == "string":
with pytest.raises(TypeError, match="Invalid value"):
Index(obj).where(~mask, val)
else:
res = Index(obj).where(~mask, val)
expected_idx = Index(expected, dtype=expected.dtype)
tm.assert_index_equal(res, expected_idx)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
def test_index_putmask(self, obj, key, expected, raises, val, using_infer_string):
def test_index_putmask(self, obj, key, expected, raises, val):
mask = np.zeros(obj.shape, dtype=bool)
mask[key] = True

if using_infer_string and obj.dtype == object:
if raises and obj.dtype == "string":
with pytest.raises(TypeError, match="Invalid value"):
Index(obj).putmask(mask, val)
else:
Expand Down Expand Up @@ -1372,6 +1368,19 @@ def raises(self):
return False


@pytest.mark.parametrize(
"val,exp_dtype,raises",
[
(1, object, True),
("e", StringDtype(na_value=np.nan), False),
],
)
class TestCoercionString(CoercionTest):
@pytest.fixture
def obj(self):
return Series(["a", "b", "c", "d"], dtype=StringDtype(na_value=np.nan))


@pytest.mark.parametrize(
"val,exp_dtype,raises",
[
Expand Down
17 changes: 7 additions & 10 deletions pandas/tests/series/indexing/test_where.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.core.dtypes.common import is_integer

import pandas as pd
Expand Down Expand Up @@ -231,7 +229,6 @@ def test_where_ndframe_align():
tm.assert_series_equal(out, expected)


@pytest.mark.xfail(using_string_dtype(), reason="can't set ints into string")
def test_where_setitem_invalid():
# GH 2702
# make sure correct exceptions are raised on invalid list assignment
Expand All @@ -241,7 +238,7 @@ def test_where_setitem_invalid():
"different length than the value"
)
# slice
s = Series(list("abc"))
s = Series(list("abc"), dtype=object)

with pytest.raises(ValueError, match=msg("slice")):
s[0:3] = list(range(27))
Expand All @@ -251,18 +248,18 @@ def test_where_setitem_invalid():
tm.assert_series_equal(s.astype(np.int64), expected)

# slice with step
s = Series(list("abcdef"))
s = Series(list("abcdef"), dtype=object)

with pytest.raises(ValueError, match=msg("slice")):
s[0:4:2] = list(range(27))

s = Series(list("abcdef"))
s = Series(list("abcdef"), dtype=object)
s[0:4:2] = list(range(2))
expected = Series([0, "b", 1, "d", "e", "f"])
tm.assert_series_equal(s, expected)

# neg slices
s = Series(list("abcdef"))
s = Series(list("abcdef"), dtype=object)

with pytest.raises(ValueError, match=msg("slice")):
s[:-1] = list(range(27))
Expand All @@ -272,18 +269,18 @@ def test_where_setitem_invalid():
tm.assert_series_equal(s, expected)

# list
s = Series(list("abc"))
s = Series(list("abc"), dtype=object)

with pytest.raises(ValueError, match=msg("list-like")):
s[[0, 1, 2]] = list(range(27))

s = Series(list("abc"))
s = Series(list("abc"), dtype=object)

with pytest.raises(ValueError, match=msg("list-like")):
s[[0, 1, 2]] = list(range(2))

# scalar
s = Series(list("abc"))
s = Series(list("abc"), dtype=object)
s[0] = list(range(10))
expected = Series([list(range(10)), "b", "c"])
tm.assert_series_equal(s, expected)
Expand Down
29 changes: 16 additions & 13 deletions pandas/tests/series/methods/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas as pd
import pandas._testing as tm
from pandas.core.arrays import IntervalArray
Expand Down Expand Up @@ -628,15 +626,23 @@ def test_replace_nullable_numeric(self):
with pytest.raises(TypeError, match="Invalid value"):
ints.replace(1, 9.5)

@pytest.mark.xfail(using_string_dtype(), reason="can't fill 1 in string")
@pytest.mark.parametrize("regex", [False, True])
def test_replace_regex_dtype_series(self, regex):
# GH-48644
series = pd.Series(["0"])
series = pd.Series(["0"], dtype=object)
expected = pd.Series([1], dtype=object)
result = series.replace(to_replace="0", value=1, regex=regex)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize("regex", [False, True])
def test_replace_regex_dtype_series_string(self, regex, using_infer_string):
if not using_infer_string:
# then this is object dtype which is already tested above
return
series = pd.Series(["0"], dtype="str")
with pytest.raises(TypeError, match="Invalid value"):
series.replace(to_replace="0", value=1, regex=regex)

def test_replace_different_int_types(self, any_int_numpy_dtype):
# GH#45311
labs = pd.Series([1, 1, 1, 0, 0, 2, 2, 2], dtype=any_int_numpy_dtype)
Expand All @@ -656,21 +662,18 @@ def test_replace_value_none_dtype_numeric(self, val):
expected = pd.Series([1, None], dtype=object)
tm.assert_series_equal(result, expected)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_replace_change_dtype_series(self, using_infer_string):
def test_replace_change_dtype_series(self):
# GH#25797
df = pd.DataFrame.from_dict({"Test": ["0.5", True, "0.6"]})
warn = FutureWarning if using_infer_string else None
with tm.assert_produces_warning(warn, match="Downcasting"):
df["Test"] = df["Test"].replace([True], [np.nan])
expected = pd.DataFrame.from_dict({"Test": ["0.5", np.nan, "0.6"]})
df = pd.DataFrame({"Test": ["0.5", True, "0.6"]}, dtype=object)
df["Test"] = df["Test"].replace([True], [np.nan])
expected = pd.DataFrame({"Test": ["0.5", np.nan, "0.6"]}, dtype=object)
tm.assert_frame_equal(df, expected)

df = pd.DataFrame.from_dict({"Test": ["0.5", None, "0.6"]})
df = pd.DataFrame({"Test": ["0.5", None, "0.6"]}, dtype=object)
df["Test"] = df["Test"].replace([None], [np.nan])
tm.assert_frame_equal(df, expected)

df = pd.DataFrame.from_dict({"Test": ["0.5", None, "0.6"]})
df = pd.DataFrame({"Test": ["0.5", None, "0.6"]}, dtype=object)
df["Test"] = df["Test"].fillna(np.nan)
tm.assert_frame_equal(df, expected)

Expand Down
5 changes: 1 addition & 4 deletions pandas/tests/series/methods/test_unstack.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas as pd
from pandas import (
DataFrame,
Expand Down Expand Up @@ -136,11 +134,10 @@ def test_unstack_mixed_type_name_in_multiindex(
tm.assert_frame_equal(result, expected)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_unstack_multi_index_categorical_values():
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=Index(list("ABCD"), dtype=object),
columns=Index(list("ABCD")),
index=date_range("2000-01-01", periods=10, freq="B"),
)
mi = df.stack().index.rename(["major", "minor"])
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/series/test_logical_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,7 @@ def test_logical_ops_label_based(self, using_infer_string):
for e in [Series(["z"])]:
if using_infer_string:
# TODO(infer_string) should this behave differently?
# -> https://github.com/pandas-dev/pandas/issues/60234
with pytest.raises(
TypeError, match="not supported for dtype|unsupported operand type"
):
Expand Down