@@ -5879,6 +5879,7 @@ def infer_objects(self: FrameOrSeries) -> FrameOrSeries:
5879
5879
to_datetime : Convert argument to datetime.
5880
5880
to_timedelta : Convert argument to timedelta.
5881
5881
to_numeric : Convert argument to numeric type.
5882
+ convert_dtypes : Convert argument to best possible dtype.
5882
5883
5883
5884
Examples
5884
5885
--------
@@ -5907,6 +5908,142 @@ def infer_objects(self: FrameOrSeries) -> FrameOrSeries:
5907
5908
)
5908
5909
).__finalize__ (self )
5909
5910
5911
+ def convert_dtypes (
5912
+ self : FrameOrSeries ,
5913
+ infer_objects : bool_t = True ,
5914
+ convert_string : bool_t = True ,
5915
+ convert_integer : bool_t = True ,
5916
+ convert_boolean : bool_t = True ,
5917
+ ) -> FrameOrSeries :
5918
+ """
5919
+ Convert columns to best possible dtypes using dtypes supporting ``pd.NA``.
5920
+
5921
+ .. versionadded:: 1.0.0
5922
+
5923
+ Parameters
5924
+ ----------
5925
+ infer_objects : bool, default True
5926
+ Whether object dtypes should be converted to the best possible types.
5927
+ convert_string : bool, default True
5928
+ Whether object dtypes should be converted to ``StringDtype()``.
5929
+ convert_integer : bool, default True
5930
+ Whether, if possible, conversion can be done to integer extension types.
5931
+ convert_boolean : bool, defaults True
5932
+ Whether object dtypes should be converted to ``BooleanDtypes()``.
5933
+
5934
+ Returns
5935
+ -------
5936
+ Series or DataFrame
5937
+ Copy of input object with new dtype.
5938
+
5939
+ See Also
5940
+ --------
5941
+ infer_objects : Infer dtypes of objects.
5942
+ to_datetime : Convert argument to datetime.
5943
+ to_timedelta : Convert argument to timedelta.
5944
+ to_numeric : Convert argument to a numeric type.
5945
+
5946
+ Notes
5947
+ -----
5948
+
5949
+ By default, ``convert_dtypes`` will attempt to convert a Series (or each
5950
+ Series in a DataFrame) to dtypes that support ``pd.NA``. By using the options
5951
+ ``convert_string``, ``convert_integer``, and ``convert_boolean``, it is
5952
+ possible to turn off individual conversions to ``StringDtype``, the integer
5953
+ extension types or ``BooleanDtype``, respectively.
5954
+
5955
+ For object-dtyped columns, if ``infer_objects`` is ``True``, use the inference
5956
+ rules as during normal Series/DataFrame construction. Then, if possible,
5957
+ convert to ``StringDtype``, ``BooleanDtype`` or an appropriate integer extension
5958
+ type, otherwise leave as ``object``.
5959
+
5960
+ If the dtype is integer, convert to an appropriate integer extension type.
5961
+
5962
+ If the dtype is numeric, and consists of all integers, convert to an
5963
+ appropriate integer extension type.
5964
+
5965
+ In the future, as new dtypes are added that support ``pd.NA``, the results
5966
+ of this method will change to support those new dtypes.
5967
+
5968
+ Examples
5969
+ --------
5970
+ >>> df = pd.DataFrame(
5971
+ ... {
5972
+ ... "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")),
5973
+ ... "b": pd.Series(["x", "y", "z"], dtype=np.dtype("O")),
5974
+ ... "c": pd.Series([True, False, np.nan], dtype=np.dtype("O")),
5975
+ ... "d": pd.Series(["h", "i", np.nan], dtype=np.dtype("O")),
5976
+ ... "e": pd.Series([10, np.nan, 20], dtype=np.dtype("float")),
5977
+ ... "f": pd.Series([np.nan, 100.5, 200], dtype=np.dtype("float")),
5978
+ ... }
5979
+ ... )
5980
+
5981
+ Start with a DataFrame with default dtypes.
5982
+
5983
+ >>> df
5984
+ a b c d e f
5985
+ 0 1 x True h 10.0 NaN
5986
+ 1 2 y False i NaN 100.5
5987
+ 2 3 z NaN NaN 20.0 200.0
5988
+
5989
+ >>> df.dtypes
5990
+ a int32
5991
+ b object
5992
+ c object
5993
+ d object
5994
+ e float64
5995
+ f float64
5996
+ dtype: object
5997
+
5998
+ Convert the DataFrame to use best possible dtypes.
5999
+
6000
+ >>> dfn = df.convert_dtypes()
6001
+ >>> dfn
6002
+ a b c d e f
6003
+ 0 1 x True h 10 NaN
6004
+ 1 2 y False i <NA> 100.5
6005
+ 2 3 z <NA> <NA> 20 200.0
6006
+
6007
+ >>> dfn.dtypes
6008
+ a Int32
6009
+ b string
6010
+ c boolean
6011
+ d string
6012
+ e Int64
6013
+ f float64
6014
+ dtype: object
6015
+
6016
+ Start with a Series of strings and missing data represented by ``np.nan``.
6017
+
6018
+ >>> s = pd.Series(["a", "b", np.nan])
6019
+ >>> s
6020
+ 0 a
6021
+ 1 b
6022
+ 2 NaN
6023
+ dtype: object
6024
+
6025
+ Obtain a Series with dtype ``StringDtype``.
6026
+
6027
+ >>> s.convert_dtypes()
6028
+ 0 a
6029
+ 1 b
6030
+ 2 <NA>
6031
+ dtype: string
6032
+ """
6033
+ if self .ndim == 1 :
6034
+ return self ._convert_dtypes (
6035
+ infer_objects , convert_string , convert_integer , convert_boolean
6036
+ )
6037
+ else :
6038
+ results = [
6039
+ col ._convert_dtypes (
6040
+ infer_objects , convert_string , convert_integer , convert_boolean
6041
+ )
6042
+ for col_name , col in self .items ()
6043
+ ]
6044
+ result = pd .concat (results , axis = 1 , copy = False )
6045
+ return result
6046
+
5910
6047
# ----------------------------------------------------------------------
5911
6048
# Filling NA's
5912
6049
0 commit comments