Skip to content

Commit 0aa913e

Browse files
mohitanand001Blake Hawkins
authored and
Blake Hawkins
committed
To string with encoding (pandas-dev#28951)
1 parent 9a98680 commit 0aa913e

File tree

4 files changed

+43
-11
lines changed

4 files changed

+43
-11
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ Other enhancements
109109
(:issue:`28368`)
110110
- :meth:`DataFrame.to_json` now accepts an ``indent`` integer argument to enable pretty printing of JSON output (:issue:`12004`)
111111
- :meth:`read_stata` can read Stata 119 dta files. (:issue:`28250`)
112+
- Added ``encoding`` argument to :meth:`DataFrame.to_string` for non-ascii text (:issue:`28766`)
112113
- Added ``encoding`` argument to :func:`DataFrame.to_html` for non-ascii text (:issue:`28663`)
113114

114115
Build Changes

pandas/core/frame.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -754,6 +754,7 @@ def to_string(
754754
decimal: str = ".",
755755
line_width: Optional[int] = None,
756756
max_colwidth: Optional[int] = None,
757+
encoding: Optional[str] = None,
757758
) -> Optional[str]:
758759
"""
759760
Render a DataFrame to a console-friendly tabular output.
@@ -764,6 +765,10 @@ def to_string(
764765
Max width to truncate each column in characters. By default, no limit.
765766
766767
.. versionadded:: 1.0.0
768+
encoding : str, default "utf-8"
769+
Set character encoding.
770+
771+
.. versionadded:: 1.0
767772
%(returns)s
768773
See Also
769774
--------
@@ -802,7 +807,7 @@ def to_string(
802807
decimal=decimal,
803808
line_width=line_width,
804809
)
805-
return formatter.to_string(buf=buf)
810+
return formatter.to_string(buf=buf, encoding=encoding)
806811

807812
# ----------------------------------------------------------------------
808813

pandas/io/formats/format.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,8 @@ def get_buffer(
486486

487487
if encoding is None:
488488
encoding = "utf-8"
489+
elif not isinstance(buf, str):
490+
raise ValueError("buf is not a file name and encoding is specified.")
489491

490492
if hasattr(buf, "write"):
491493
yield buf
@@ -896,8 +898,12 @@ def _join_multiline(self, *args) -> str:
896898
st = ed
897899
return "\n\n".join(str_lst)
898900

899-
def to_string(self, buf: Optional[FilePathOrBuffer[str]] = None) -> Optional[str]:
900-
return self.get_result(buf=buf)
901+
def to_string(
902+
self,
903+
buf: Optional[FilePathOrBuffer[str]] = None,
904+
encoding: Optional[str] = None,
905+
) -> Optional[str]:
906+
return self.get_result(buf=buf, encoding=encoding)
901907

902908
def to_latex(
903909
self,

pandas/tests/io/formats/test_format.py

+28-8
Original file line numberDiff line numberDiff line change
@@ -73,17 +73,19 @@ def filepath_or_buffer(filepath_or_buffer_id, tmp_path):
7373

7474

7575
@pytest.fixture
76-
def assert_filepath_or_buffer_equals(filepath_or_buffer, filepath_or_buffer_id):
76+
def assert_filepath_or_buffer_equals(
77+
filepath_or_buffer, filepath_or_buffer_id, encoding
78+
):
7779
"""
7880
Assertion helper for checking filepath_or_buffer.
7981
"""
8082

8183
def _assert_filepath_or_buffer_equals(expected):
8284
if filepath_or_buffer_id == "string":
83-
with open(filepath_or_buffer) as f:
85+
with open(filepath_or_buffer, encoding=encoding) as f:
8486
result = f.read()
8587
elif filepath_or_buffer_id == "pathlike":
86-
result = filepath_or_buffer.read_text()
88+
result = filepath_or_buffer.read_text(encoding=encoding)
8789
elif filepath_or_buffer_id == "buffer":
8890
result = filepath_or_buffer.getvalue()
8991
assert result == expected
@@ -3240,14 +3242,32 @@ def test_repr_html_ipython_config(ip):
32403242

32413243

32423244
@pytest.mark.parametrize("method", ["to_string", "to_html", "to_latex"])
3245+
@pytest.mark.parametrize(
3246+
"encoding, data",
3247+
[(None, "abc"), ("utf-8", "abc"), ("gbk", "造成输出中文显示乱码"), ("foo", "abc")],
3248+
)
32433249
def test_filepath_or_buffer_arg(
3244-
float_frame, method, filepath_or_buffer, assert_filepath_or_buffer_equals
3250+
method,
3251+
filepath_or_buffer,
3252+
assert_filepath_or_buffer_equals,
3253+
encoding,
3254+
data,
3255+
filepath_or_buffer_id,
32453256
):
3246-
df = float_frame
3247-
expected = getattr(df, method)()
3257+
df = DataFrame([data])
32483258

3249-
getattr(df, method)(buf=filepath_or_buffer)
3250-
assert_filepath_or_buffer_equals(expected)
3259+
if filepath_or_buffer_id not in ["string", "pathlike"] and encoding is not None:
3260+
with pytest.raises(
3261+
ValueError, match="buf is not a file name and encoding is specified."
3262+
):
3263+
getattr(df, method)(buf=filepath_or_buffer, encoding=encoding)
3264+
elif encoding == "foo":
3265+
with pytest.raises(LookupError, match="unknown encoding"):
3266+
getattr(df, method)(buf=filepath_or_buffer, encoding=encoding)
3267+
else:
3268+
expected = getattr(df, method)()
3269+
getattr(df, method)(buf=filepath_or_buffer, encoding=encoding)
3270+
assert_filepath_or_buffer_equals(expected)
32513271

32523272

32533273
@pytest.mark.parametrize("method", ["to_string", "to_html", "to_latex"])

0 commit comments

Comments
 (0)