We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 9767da6 commit 2154ad3Copy full SHA for 2154ad3
doc/source/whatsnew/v1.0.2.rst
@@ -38,8 +38,10 @@ Bug fixes
38
- Fixed bug in parquet roundtrip with nullable unsigned integer dtypes (:issue:`31896`).
39
40
41
+
42
**Experimental dtypes**
43
44
+- Fix bug in :meth:`DataFrame.convert_dtypes` for columns that were already using the ``"string"`` dtype (:issue:`31731`).
45
- Fixed bug in setting values using a slice indexer with string dtype (:issue:`31772`)
46
47
.. ---------------------------------------------------------------------------
pandas/_libs/lib.pyx
@@ -1005,7 +1005,7 @@ _TYPE_MAP = {
1005
'complex64': 'complex',
1006
'complex128': 'complex',
1007
'c': 'complex',
1008
- 'string': 'bytes',
+ 'string': 'string',
1009
'S': 'bytes',
1010
'U': 'string',
1011
'bool': 'boolean',
pandas/conftest.py
@@ -744,6 +744,7 @@ def any_numpy_dtype(request):
744
# categoricals are handled separately
745
_any_skipna_inferred_dtype = [
746
("string", ["a", np.nan, "c"]),
747
+ ("string", ["a", pd.NA, "c"]),
748
("bytes", [b"a", np.nan, b"c"]),
749
("empty", [np.nan, np.nan, np.nan]),
750
("empty", []),
@@ -754,6 +755,7 @@ def any_numpy_dtype(request):
754
755
("mixed-integer-float", [1, np.nan, 2.0]),
756
("decimal", [Decimal(1), np.nan, Decimal(2)]),
757
("boolean", [True, np.nan, False]),
758
+ ("boolean", [True, pd.NA, False]),
759
("datetime64", [np.datetime64("2013-01-01"), np.nan, np.datetime64("2018-01-01")]),
760
("datetime", [pd.Timestamp("20130101"), np.nan, pd.Timestamp("20180101")]),
761
("date", [date(2013, 1, 1), np.nan, date(2018, 1, 1)]),
pandas/tests/dtypes/test_inference.py
@@ -1200,6 +1200,24 @@ def test_interval(self):
1200
inferred = lib.infer_dtype(pd.Series(idx), skipna=False)
1201
assert inferred == "interval"
1202
1203
+ @pytest.mark.parametrize("klass", [pd.array, pd.Series])
1204
+ @pytest.mark.parametrize("skipna", [True, False])
1205
+ @pytest.mark.parametrize("data", [["a", "b", "c"], ["a", "b", pd.NA]])
1206
+ def test_string_dtype(self, data, skipna, klass):
1207
+ # StringArray
1208
+ val = klass(data, dtype="string")
1209
+ inferred = lib.infer_dtype(val, skipna=skipna)
1210
+ assert inferred == "string"
1211
1212
1213
1214
+ @pytest.mark.parametrize("data", [[True, False, True], [True, False, pd.NA]])
1215
+ def test_boolean_dtype(self, data, skipna, klass):
1216
+ # BooleanArray
1217
+ val = klass(data, dtype="boolean")
1218
1219
+ assert inferred == "boolean"
1220
1221
1222
class TestNumberScalar:
1223
def test_is_number(self):
pandas/tests/series/methods/test_convert_dtypes.py
@@ -246,3 +246,12 @@ def test_convert_dtypes(self, data, maindtype, params, answerdict):
246
247
# Make sure original not changed
248
tm.assert_series_equal(series, copy)
249
250
+ def test_convert_string_dtype(self):
251
+ # https://github.com/pandas-dev/pandas/issues/31731 -> converting columns
252
+ # that are already string dtype
253
+ df = pd.DataFrame(
254
+ {"A": ["a", "b", pd.NA], "B": ["ä", "ö", "ü"]}, dtype="string"
255
+ )
256
+ result = df.convert_dtypes()
257
+ tm.assert_frame_equal(df, result)
pandas/tests/test_strings.py
@@ -7,6 +7,7 @@
7
8
from pandas._libs import lib
9
10
+import pandas as pd
11
from pandas import DataFrame, Index, MultiIndex, Series, concat, isna, notna
12
import pandas._testing as tm
13
import pandas.core.strings as strings
@@ -207,6 +208,9 @@ def test_api_per_dtype(self, index_or_series, dtype, any_skipna_inferred_dtype):
207
208
box = index_or_series
209
inferred_dtype, values = any_skipna_inferred_dtype
210
211
+ if dtype == "category" and len(values) and values[1] is pd.NA:
212
+ pytest.xfail(reason="Categorical does not yet support pd.NA")
213
214
t = box(values, dtype=dtype) # explicit dtype to avoid casting
215
216
# TODO: get rid of these xfails
0 commit comments