Skip to content

REGR: Fix to_csv with IntervalIndex #28229

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 30, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.25.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ Missing
I/O
^^^

-
- Regression in :meth:`~DataFrame.to_csv` where writing a :class:`Series` or :class:`DataFrame` indexed by an :class:`IntervalIndex` would incorrectly raise a ``TypeError`` (:issue:`28210`)
-
-

Expand Down
8 changes: 2 additions & 6 deletions pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -1096,12 +1096,8 @@ def _format_with_header(self, header, **kwargs):
return header + list(self._format_native_types(**kwargs))

def _format_native_types(self, na_rep="NaN", quoting=None, **kwargs):
""" actually format my specific types """
from pandas.io.formats.format import ExtensionArrayFormatter

return ExtensionArrayFormatter(
values=self, na_rep=na_rep, justify="all", leading_space=False
).get_result()
# GH 28210: use base method but with different default na_rep
return super()._format_native_types(na_rep=na_rep, quoting=quoting, **kwargs)

def _format_data(self, name=None):

Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/frame/test_to_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -695,6 +695,20 @@ def _make_frame(names=None):
tm.assert_index_equal(recons.columns, exp.columns)
assert len(recons) == 0

def test_to_csv_interval_index(self):
# GH 28210
df = DataFrame({"A": list("abc"), "B": range(3)}, index=pd.interval_range(0, 3))

with ensure_clean("__tmp_to_csv_interval_index__.csv") as path:
df.to_csv(path)
result = self.read_csv(path, index_col=0)

# can't roundtrip intervalindex via read_csv so check string repr (GH 23595)
expected = df.copy()
expected.index = expected.index.astype(str)

assert_frame_equal(result, expected)

def test_to_csv_float32_nanrep(self):
df = DataFrame(np.random.randn(1, 4).astype(np.float32))
df[1] = np.nan
Expand Down
40 changes: 40 additions & 0 deletions pandas/tests/indexes/interval/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,46 @@ def test_repr_missing(self, constructor, expected):
result = repr(obj)
assert result == expected

@pytest.mark.parametrize(
"tuples, closed, expected_data",
[
([(0, 1), (1, 2), (2, 3)], "left", ["[0, 1)", "[1, 2)", "[2, 3)"]),
(
[(0.5, 1.0), np.nan, (2.0, 3.0)],
"right",
["(0.5, 1.0]", "NaN", "(2.0, 3.0]"],
),
(
[
(Timestamp("20180101"), Timestamp("20180102")),
np.nan,
((Timestamp("20180102"), Timestamp("20180103"))),
],
"both",
["[2018-01-01, 2018-01-02]", "NaN", "[2018-01-02, 2018-01-03]"],
),
(
[
(Timedelta("0 days"), Timedelta("1 days")),
(Timedelta("1 days"), Timedelta("2 days")),
np.nan,
],
"neither",
[
"(0 days 00:00:00, 1 days 00:00:00)",
"(1 days 00:00:00, 2 days 00:00:00)",
"NaN",
],
),
],
)
def test_to_native_types(self, tuples, closed, expected_data):
# GH 28210
index = IntervalIndex.from_tuples(tuples, closed=closed)
result = index.to_native_types()
expected = np.array(expected_data)
tm.assert_numpy_array_equal(result, expected)

def test_get_item(self, closed):
i = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan), closed=closed)
assert i[0] == Interval(0.0, 1.0, closed=closed)
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/series/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,20 @@ def test_to_csv_compression(self, s, encoding, compression):
s, pd.read_csv(fh, index_col=0, squeeze=True, encoding=encoding)
)

def test_to_csv_interval_index(self):
# GH 28210
s = Series(["foo", "bar", "baz"], index=pd.interval_range(0, 3))

with ensure_clean("__tmp_to_csv_interval_index__.csv") as path:
s.to_csv(path, header=False)
result = self.read_csv(path, index_col=0, squeeze=True)

# can't roundtrip intervalindex via read_csv so check string repr (GH 23595)
expected = s.copy()
expected.index = expected.index.astype(str)

assert_series_equal(result, expected)


class TestSeriesIO:
def test_to_frame(self, datetime_series):
Expand Down