pandas/tests/series/methods/test_to_csv.py

from datetime import datetime
from io import StringIO

import numpy as np
import pytest

import pandas as pd
from pandas import Series
import pandas._testing as tm

from pandas.io.common import get_handle


class TestSeriesToCSV:
    def read_csv(self, path, **kwargs):
        params = {"index_col": 0, "header": None, "parse_dates": True}
        params.update(**kwargs)

        header = params.get("header")
        out = pd.read_csv(path, **params).squeeze("columns")

        if header is None:
            out.name = out.index.name = None

        return out

    def test_from_csv(self, datetime_series, string_series):
        # freq doesn't round-trip
        datetime_series.index = datetime_series.index._with_freq(None)

        with tm.ensure_clean() as path:
            datetime_series.to_csv(path, header=False)
            ts = self.read_csv(path)
            tm.assert_series_equal(datetime_series, ts, check_names=False)

            assert ts.name is None
            assert ts.index.name is None

            # see gh-10483
            datetime_series.to_csv(path, header=True)
            ts_h = self.read_csv(path, header=0)
            assert ts_h.name == "ts"

            string_series.to_csv(path, header=False)
            series = self.read_csv(path)
            tm.assert_series_equal(string_series, series, check_names=False)

            assert series.name is None
            assert series.index.name is None

            string_series.to_csv(path, header=True)
            series_h = self.read_csv(path, header=0)
            assert series_h.name == "series"

            with open(path, "w") as outfile:
                outfile.write("1998-01-01|1.0\n1999-01-01|2.0")

            series = self.read_csv(path, sep="|")
            check_series = Series(
                {datetime(1998, 1, 1): 1.0, datetime(1999, 1, 1): 2.0}
            )
            tm.assert_series_equal(check_series, series)

            series = self.read_csv(path, sep="|", parse_dates=False)
            check_series = Series({"1998-01-01": 1.0, "1999-01-01": 2.0})
            tm.assert_series_equal(check_series, series)

    def test_to_csv(self, datetime_series):

        with tm.ensure_clean() as path:
            datetime_series.to_csv(path, header=False)

            with open(path, newline=None) as f:
                lines = f.readlines()
            assert lines[1] != "\n"

            datetime_series.to_csv(path, index=False, header=False)
            arr = np.loadtxt(path)
            tm.assert_almost_equal(arr, datetime_series.values)

    def test_to_csv_unicode_index(self):
        buf = StringIO()
        s = Series(["\u05d0", "d2"], index=["\u05d0", "\u05d1"])

        s.to_csv(buf, encoding="UTF-8", header=False)
        buf.seek(0)

        s2 = self.read_csv(buf, index_col=0, encoding="UTF-8")
        tm.assert_series_equal(s, s2)

    def test_to_csv_float_format(self):

        with tm.ensure_clean() as filename:
            ser = Series([0.123456, 0.234567, 0.567567])
            ser.to_csv(filename, float_format="%.2f", header=False)

            rs = self.read_csv(filename)
            xp = Series([0.12, 0.23, 0.57])
            tm.assert_series_equal(rs, xp)

    def test_to_csv_list_entries(self):
        s = Series(["jack and jill", "jesse and frank"])

        split = s.str.split(r"\s+and\s+")

        buf = StringIO()
        split.to_csv(buf, header=False)

    def test_to_csv_path_is_none(self):
        # GH 8215
        # Series.to_csv() was returning None, inconsistent with
        # DataFrame.to_csv() which returned string
        s = Series([1, 2, 3])
        csv_str = s.to_csv(path_or_buf=None, header=False)
        assert isinstance(csv_str, str)

    @pytest.mark.parametrize(
        "s,encoding",
        [
            (
                Series([0.123456, 0.234567, 0.567567], index=["A", "B", "C"], name="X"),
                None,
            ),
            # GH 21241, 21118
            (Series(["abc", "def", "ghi"], name="X"), "ascii"),
            (Series(["123", "你好", "世界"], name="中文"), "gb2312"),
            (Series(["123", "Γειά σου", "Κόσμε"], name="Ελληνικά"), "cp737"),
        ],
    )
    def test_to_csv_compression(self, s, encoding, compression):

        with tm.ensure_clean() as filename:

            s.to_csv(filename, compression=compression, encoding=encoding, header=True)
            # test the round trip - to_csv -> read_csv
            result = pd.read_csv(
                filename,
                compression=compression,
                encoding=encoding,
                index_col=0,
            ).squeeze("columns")
            tm.assert_series_equal(s, result)

            # test the round trip using file handle - to_csv -> read_csv
            with get_handle(
                filename, "w", compression=compression, encoding=encoding
            ) as handles:
                s.to_csv(handles.handle, encoding=encoding, header=True)

            result = pd.read_csv(
                filename,
                compression=compression,
                encoding=encoding,
                index_col=0,
            ).squeeze("columns")
            tm.assert_series_equal(s, result)

            # explicitly ensure file was compressed
            with tm.decompress_file(filename, compression) as fh:
                text = fh.read().decode(encoding or "utf8")
                assert s.name in text

            with tm.decompress_file(filename, compression) as fh:
                tm.assert_series_equal(
                    s,
                    pd.read_csv(fh, index_col=0, encoding=encoding).squeeze("columns"),
                )

    def test_to_csv_interval_index(self):
        # GH 28210
        s = Series(["foo", "bar", "baz"], index=pd.interval_range(0, 3))

        with tm.ensure_clean("__tmp_to_csv_interval_index__.csv") as path:
            s.to_csv(path, header=False)
            result = self.read_csv(path, index_col=0)

            # can't roundtrip intervalindex via read_csv so check string repr (GH 23595)
            expected = s.copy()
            expected.index = expected.index.astype(str)

            tm.assert_series_equal(result, expected)