Skip to content

Commit 7cf51a0

Browse files
jbrockmendelproost
authored andcommitted
DEPR: remove encoding kwarg from read_stata, DataFrame.to_stata (pandas-dev#29722)
1 parent 577cf40 commit 7cf51a0

File tree

4 files changed

+4
-34
lines changed

4 files changed

+4
-34
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more.
334334
- Removed support for nexted renaming in :meth:`DataFrame.aggregate`, :meth:`Series.aggregate`, :meth:`DataFrameGroupBy.aggregate`, :meth:`SeriesGroupBy.aggregate`, :meth:`Rolling.aggregate` (:issue:`29608`)
335335
- Removed previously deprecated "order" argument from :func:`factorize` (:issue:`19751`)
336336
- Removed previously deprecated "v" argument from :meth:`FrozenNDarray.searchsorted`, use "value" instead (:issue:`22672`)
337+
- :func:`read_stata` and :meth:`DataFrame.to_stata` no longer supports the "encoding" argument (:issue:`21400`)
337338
- Removed previously deprecated "raise_conflict" argument from :meth:`DataFrame.update`, use "errors" instead (:issue:`23585`)
338339
- Removed previously deprecated keyword "n" from :meth:`DatetimeIndex.shift`, :meth:`TimedeltaIndex.shift`, :meth:`PeriodIndex.shift`, use "periods" instead (:issue:`22458`)
339340
-

pandas/core/frame.py

+1-10
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,7 @@
3535

3636
from pandas._libs import algos as libalgos, lib
3737
from pandas.compat.numpy import function as nv
38-
from pandas.util._decorators import (
39-
Appender,
40-
Substitution,
41-
deprecate_kwarg,
42-
rewrite_axis_style_signature,
43-
)
38+
from pandas.util._decorators import Appender, Substitution, rewrite_axis_style_signature
4439
from pandas.util._validators import (
4540
validate_axis_style_args,
4641
validate_bool_kwarg,
@@ -1853,13 +1848,11 @@ def _from_arrays(cls, arrays, columns, index, dtype=None):
18531848
mgr = arrays_to_mgr(arrays, columns, index, columns, dtype=dtype)
18541849
return cls(mgr)
18551850

1856-
@deprecate_kwarg(old_arg_name="encoding", new_arg_name=None)
18571851
def to_stata(
18581852
self,
18591853
fname,
18601854
convert_dates=None,
18611855
write_index=True,
1862-
encoding="latin-1",
18631856
byteorder=None,
18641857
time_stamp=None,
18651858
data_label=None,
@@ -1889,8 +1882,6 @@ def to_stata(
18891882
a datetime column has timezone information.
18901883
write_index : bool
18911884
Write the index to Stata dataset.
1892-
encoding : str
1893-
Default is latin-1. Unicode is not supported.
18941885
byteorder : str
18951886
Can be ">", "<", "little", or "big". default is `sys.byteorder`.
18961887
time_stamp : datetime

pandas/io/stata.py

-18
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,6 @@
5858
convert_categoricals : bool, default True
5959
Read value labels and convert columns to Categorical/Factor variables."""
6060

61-
_encoding_params = """\
62-
encoding : str, None or encoding
63-
Encoding used to parse the files. None defaults to latin-1."""
64-
6561
_statafile_processing_params2 = """\
6662
index_col : str, optional
6763
Column to set as index.
@@ -108,7 +104,6 @@
108104
%s
109105
%s
110106
%s
111-
%s
112107
113108
Returns
114109
-------
@@ -132,7 +127,6 @@
132127
... do_something(chunk)
133128
""" % (
134129
_statafile_processing_params1,
135-
_encoding_params,
136130
_statafile_processing_params2,
137131
_chunksize_params,
138132
_iterator_params,
@@ -189,23 +183,19 @@
189183
%s
190184
%s
191185
%s
192-
%s
193186
""" % (
194187
_statafile_processing_params1,
195188
_statafile_processing_params2,
196-
_encoding_params,
197189
_chunksize_params,
198190
)
199191

200192

201193
@Appender(_read_stata_doc)
202-
@deprecate_kwarg(old_arg_name="encoding", new_arg_name=None)
203194
@deprecate_kwarg(old_arg_name="index", new_arg_name="index_col")
204195
def read_stata(
205196
filepath_or_buffer,
206197
convert_dates=True,
207198
convert_categoricals=True,
208-
encoding=None,
209199
index_col=None,
210200
convert_missing=False,
211201
preserve_dtypes=True,
@@ -1044,7 +1034,6 @@ def __init__(self):
10441034
class StataReader(StataParser, BaseIterator):
10451035
__doc__ = _stata_reader_doc
10461036

1047-
@deprecate_kwarg(old_arg_name="encoding", new_arg_name=None)
10481037
@deprecate_kwarg(old_arg_name="index", new_arg_name="index_col")
10491038
def __init__(
10501039
self,
@@ -1056,7 +1045,6 @@ def __init__(
10561045
preserve_dtypes=True,
10571046
columns=None,
10581047
order_categoricals=True,
1059-
encoding=None,
10601048
chunksize=None,
10611049
):
10621050
super().__init__()
@@ -2134,14 +2122,12 @@ class StataWriter(StataParser):
21342122

21352123
_max_string_length = 244
21362124

2137-
@deprecate_kwarg(old_arg_name="encoding", new_arg_name=None)
21382125
def __init__(
21392126
self,
21402127
fname,
21412128
data,
21422129
convert_dates=None,
21432130
write_index=True,
2144-
encoding="latin-1",
21452131
byteorder=None,
21462132
time_stamp=None,
21472133
data_label=None,
@@ -2859,8 +2845,6 @@ class StataWriter117(StataWriter):
28592845
timezone information
28602846
write_index : bool
28612847
Write the index to Stata dataset.
2862-
encoding : str
2863-
Default is latin-1. Only latin-1 and ascii are supported.
28642848
byteorder : str
28652849
Can be ">", "<", "little", or "big". default is `sys.byteorder`
28662850
time_stamp : datetime
@@ -2912,14 +2896,12 @@ class StataWriter117(StataWriter):
29122896

29132897
_max_string_length = 2045
29142898

2915-
@deprecate_kwarg(old_arg_name="encoding", new_arg_name=None)
29162899
def __init__(
29172900
self,
29182901
fname,
29192902
data,
29202903
convert_dates=None,
29212904
write_index=True,
2922-
encoding="latin-1",
29232905
byteorder=None,
29242906
time_stamp=None,
29252907
data_label=None,

pandas/tests/io/test_stata.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -383,19 +383,15 @@ def test_encoding(self, version):
383383

384384
# GH 4626, proper encoding handling
385385
raw = read_stata(self.dta_encoding)
386-
with tm.assert_produces_warning(FutureWarning):
387-
encoded = read_stata(self.dta_encoding, encoding="latin-1")
386+
encoded = read_stata(self.dta_encoding)
388387
result = encoded.kreis1849[0]
389388

390389
expected = raw.kreis1849[0]
391390
assert result == expected
392391
assert isinstance(result, str)
393392

394393
with tm.ensure_clean() as path:
395-
with tm.assert_produces_warning(FutureWarning):
396-
encoded.to_stata(
397-
path, write_index=False, version=version, encoding="latin-1"
398-
)
394+
encoded.to_stata(path, write_index=False, version=version)
399395
reread_encoded = read_stata(path)
400396
tm.assert_frame_equal(encoded, reread_encoded)
401397

0 commit comments

Comments
 (0)