Skip to content

Commit aba927f

Browse files
Kevin SheppardSheppard, Kevin
Kevin Sheppard
authored and
Sheppard, Kevin
committed
ENH: Resotre original convert_objects and add _convert
Restores the v0.16 behavior of convert_objects and moves the new version of _convert Adds to_numeric for directly converting numeric data closes #11116 closes #11133
1 parent 6ab626f commit aba927f

17 files changed

+370
-186
lines changed

pandas/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
from pandas.tools.pivot import pivot_table, crosstab
5353
from pandas.tools.plotting import scatter_matrix, plot_params
5454
from pandas.tools.tile import cut, qcut
55+
from pandas.tools.util import to_numeric
5556
from pandas.core.reshape import melt
5657
from pandas.util.print_versions import show_versions
5758
import pandas.util.testing

pandas/core/common.py

+73-16
Original file line numberDiff line numberDiff line change
@@ -1857,23 +1857,84 @@ def _maybe_box_datetimelike(value):
18571857

18581858
_values_from_object = lib.values_from_object
18591859

1860-
1861-
def _possibly_convert_objects(values,
1862-
datetime=True,
1863-
numeric=True,
1864-
timedelta=True,
1865-
coerce=False,
1860+
# TODO: Remove in 0.18 or 2017, which ever is sooner
1861+
def _possibly_convert_objects(values, convert_dates=True,
1862+
convert_numeric=True,
1863+
convert_timedeltas=True,
18661864
copy=True):
18671865
""" if we have an object dtype, try to coerce dates and/or numbers """
18681866

1867+
# if we have passed in a list or scalar
1868+
if isinstance(values, (list, tuple)):
1869+
values = np.array(values, dtype=np.object_)
1870+
if not hasattr(values, 'dtype'):
1871+
values = np.array([values], dtype=np.object_)
1872+
1873+
# convert dates
1874+
if convert_dates and values.dtype == np.object_:
1875+
1876+
# we take an aggressive stance and convert to datetime64[ns]
1877+
if convert_dates == 'coerce':
1878+
new_values = _possibly_cast_to_datetime(
1879+
values, 'M8[ns]', errors='coerce')
1880+
1881+
# if we are all nans then leave me alone
1882+
if not isnull(new_values).all():
1883+
values = new_values
1884+
1885+
else:
1886+
values = lib.maybe_convert_objects(
1887+
values, convert_datetime=convert_dates)
1888+
1889+
# convert timedeltas
1890+
if convert_timedeltas and values.dtype == np.object_:
1891+
1892+
if convert_timedeltas == 'coerce':
1893+
from pandas.tseries.timedeltas import to_timedelta
1894+
new_values = to_timedelta(values, coerce=True)
1895+
1896+
# if we are all nans then leave me alone
1897+
if not isnull(new_values).all():
1898+
values = new_values
1899+
1900+
else:
1901+
values = lib.maybe_convert_objects(
1902+
values, convert_timedelta=convert_timedeltas)
1903+
1904+
# convert to numeric
1905+
if values.dtype == np.object_:
1906+
if convert_numeric:
1907+
try:
1908+
new_values = lib.maybe_convert_numeric(
1909+
values, set(), coerce_numeric=True)
1910+
1911+
# if we are all nans then leave me alone
1912+
if not isnull(new_values).all():
1913+
values = new_values
1914+
1915+
except:
1916+
pass
1917+
else:
1918+
# soft-conversion
1919+
values = lib.maybe_convert_objects(values)
1920+
1921+
values = values.copy() if copy else values
1922+
1923+
return values
1924+
1925+
1926+
def _soft_convert_objects(values, datetime=True, numeric=True, timedelta=True,
1927+
coerce=False, copy=True):
1928+
""" if we have an object dtype, try to coerce dates and/or numbers """
1929+
18691930
conversion_count = sum((datetime, numeric, timedelta))
18701931
if conversion_count == 0:
1871-
import warnings
1872-
warnings.warn('Must explicitly pass type for conversion. Defaulting to '
1873-
'pre-0.17 behavior where datetime=True, numeric=True, '
1874-
'timedelta=True and coerce=False', DeprecationWarning)
1875-
datetime = numeric = timedelta = True
1876-
coerce = False
1932+
raise ValueError('At least one of datetime, numeric or timedelta must '
1933+
'be True.')
1934+
elif conversion_count > 1 and coerce:
1935+
raise ValueError("Only one of 'datetime', 'numeric' or "
1936+
"'timedelta' can be True when when coerce=True.")
1937+
18771938

18781939
if isinstance(values, (list, tuple)):
18791940
# List or scalar
@@ -1887,10 +1948,6 @@ def _possibly_convert_objects(values,
18871948

18881949
# If 1 flag is coerce, ensure 2 others are False
18891950
if coerce:
1890-
if conversion_count > 1:
1891-
raise ValueError("Only one of 'datetime', 'numeric' or "
1892-
"'timedelta' can be True when when coerce=True.")
1893-
18941951
# Immediate return if coerce
18951952
if datetime:
18961953
return pd.to_datetime(values, errors='coerce', box=False)

pandas/core/frame.py

+4-7
Original file line numberDiff line numberDiff line change
@@ -3543,9 +3543,8 @@ def combine(self, other, func, fill_value=None, overwrite=True):
35433543
# convert_objects just in case
35443544
return self._constructor(result,
35453545
index=new_index,
3546-
columns=new_columns).convert_objects(
3547-
datetime=True,
3548-
copy=False)
3546+
columns=new_columns)._convert(datetime=True,
3547+
copy=False)
35493548

35503549
def combine_first(self, other):
35513550
"""
@@ -4026,9 +4025,7 @@ def _apply_standard(self, func, axis, ignore_failures=False, reduce=True):
40264025

40274026
if axis == 1:
40284027
result = result.T
4029-
result = result.convert_objects(datetime=True,
4030-
timedelta=True,
4031-
copy=False)
4028+
result = result._convert(datetime=True, timedelta=True, copy=False)
40324029

40334030
else:
40344031

@@ -4158,7 +4155,7 @@ def append(self, other, ignore_index=False, verify_integrity=False):
41584155
other = DataFrame(other.values.reshape((1, len(other))),
41594156
index=index,
41604157
columns=combined_columns)
4161-
other = other.convert_objects(datetime=True, timedelta=True)
4158+
other = other._convert(datetime=True, timedelta=True)
41624159

41634160
if not self.columns.equals(combined_columns):
41644161
self = self.reindex(columns=combined_columns)

pandas/core/generic.py

+41-27
Original file line numberDiff line numberDiff line change
@@ -2534,11 +2534,8 @@ def copy(self, deep=True):
25342534
data = self._data.copy(deep=deep)
25352535
return self._constructor(data).__finalize__(self)
25362536

2537-
@deprecate_kwarg(old_arg_name='convert_dates', new_arg_name='datetime')
2538-
@deprecate_kwarg(old_arg_name='convert_numeric', new_arg_name='numeric')
2539-
@deprecate_kwarg(old_arg_name='convert_timedeltas', new_arg_name='timedelta')
2540-
def convert_objects(self, datetime=False, numeric=False,
2541-
timedelta=False, coerce=False, copy=True):
2537+
def _convert(self, datetime=False, numeric=False, timedelta=False,
2538+
coerce=False, copy=True):
25422539
"""
25432540
Attempt to infer better dtype for object columns
25442541
@@ -2563,31 +2560,48 @@ def convert_objects(self, datetime=False, numeric=False,
25632560
-------
25642561
converted : same as input object
25652562
"""
2563+
return self._constructor(
2564+
self._data.convert(datetime=datetime,
2565+
numeric=numeric,
2566+
timedelta=timedelta,
2567+
coerce=coerce,
2568+
copy=copy)).__finalize__(self)
2569+
2570+
# TODO: Remove in 0.18 or 2017, which ever is sooner
2571+
def convert_objects(self, convert_dates=True, convert_numeric=False,
2572+
convert_timedeltas=True, copy=True):
2573+
"""
2574+
Attempt to infer better dtype for object columns
2575+
2576+
Parameters
2577+
----------
2578+
convert_dates : boolean, default True
2579+
If True, convert to date where possible. If 'coerce', force
2580+
conversion, with unconvertible values becoming NaT.
2581+
convert_numeric : boolean, default False
2582+
If True, attempt to coerce to numbers (including strings), with
2583+
unconvertible values becoming NaN.
2584+
convert_timedeltas : boolean, default True
2585+
If True, convert to timedelta where possible. If 'coerce', force
2586+
conversion, with unconvertible values becoming NaT.
2587+
copy : boolean, default True
2588+
If True, return a copy even if no copy is necessary (e.g. no
2589+
conversion was done). Note: This is meant for internal use, and
2590+
should not be confused with inplace.
25662591
2567-
# Deprecation code to handle usage change
2568-
issue_warning = False
2569-
if datetime == 'coerce':
2570-
datetime = coerce = True
2571-
numeric = timedelta = False
2572-
issue_warning = True
2573-
elif numeric == 'coerce':
2574-
numeric = coerce = True
2575-
datetime = timedelta = False
2576-
issue_warning = True
2577-
elif timedelta == 'coerce':
2578-
timedelta = coerce = True
2579-
datetime = numeric = False
2580-
issue_warning = True
2581-
if issue_warning:
2582-
warnings.warn("The use of 'coerce' as an input is deprecated. "
2583-
"Instead set coerce=True.",
2584-
FutureWarning)
2592+
Returns
2593+
-------
2594+
converted : same as input object
2595+
"""
2596+
from warnings import warn
2597+
warn("convert_objects is deprecated. Use the data-type specific "
2598+
"converters pd.to_datetime, pd.to_timestamp and pd.to_numeric.",
2599+
FutureWarning, stacklevel=2)
25852600

25862601
return self._constructor(
2587-
self._data.convert(datetime=datetime,
2588-
numeric=numeric,
2589-
timedelta=timedelta,
2590-
coerce=coerce,
2602+
self._data.convert(convert_dates=convert_dates,
2603+
convert_numeric=convert_numeric,
2604+
convert_timedeltas=convert_timedeltas,
25912605
copy=copy)).__finalize__(self)
25922606

25932607
#----------------------------------------------------------------------

pandas/core/groupby.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ def f(self):
112112
except Exception:
113113
result = self.aggregate(lambda x: npfunc(x, axis=self.axis))
114114
if _convert:
115-
result = result.convert_objects(datetime=True)
115+
result = result._convert(datetime=True)
116116
return result
117117

118118
f.__doc__ = "Compute %s of group values" % name
@@ -2882,7 +2882,7 @@ def aggregate(self, arg, *args, **kwargs):
28822882
self._insert_inaxis_grouper_inplace(result)
28832883
result.index = np.arange(len(result))
28842884

2885-
return result.convert_objects(datetime=True)
2885+
return result._convert(datetime=True)
28862886

28872887
def _aggregate_multiple_funcs(self, arg):
28882888
from pandas.tools.merge import concat
@@ -3123,22 +3123,22 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
31233123
# as we are stacking can easily have object dtypes here
31243124
if (self._selected_obj.ndim == 2 and
31253125
self._selected_obj.dtypes.isin(_DATELIKE_DTYPES).any()):
3126-
result = result.convert_objects(numeric=True)
3126+
result = result._convert(numeric=True)
31273127
date_cols = self._selected_obj.select_dtypes(
31283128
include=list(_DATELIKE_DTYPES)).columns
31293129
result[date_cols] = (result[date_cols]
3130-
.convert_objects(datetime=True,
3130+
._convert(datetime=True,
31313131
coerce=True))
31323132
else:
3133-
result = result.convert_objects(datetime=True)
3133+
result = result._convert(datetime=True)
31343134

31353135
return self._reindex_output(result)
31363136

31373137
else:
31383138
# only coerce dates if we find at least 1 datetime
31393139
coerce = True if any([ isinstance(v,Timestamp) for v in values ]) else False
31403140
return (Series(values, index=key_index)
3141-
.convert_objects(datetime=True,
3141+
._convert(datetime=True,
31423142
coerce=coerce))
31433143

31443144
else:
@@ -3243,7 +3243,7 @@ def transform(self, func, *args, **kwargs):
32433243
results = self._try_cast(results, obj[result.columns])
32443244

32453245
return (DataFrame(results,columns=result.columns,index=obj.index)
3246-
.convert_objects(datetime=True))
3246+
._convert(datetime=True))
32473247

32483248
def _define_paths(self, func, *args, **kwargs):
32493249
if isinstance(func, compat.string_types):
@@ -3436,7 +3436,7 @@ def _wrap_aggregated_output(self, output, names=None):
34363436
if self.axis == 1:
34373437
result = result.T
34383438

3439-
return self._reindex_output(result).convert_objects(datetime=True)
3439+
return self._reindex_output(result)._convert(datetime=True)
34403440

34413441
def _wrap_agged_blocks(self, items, blocks):
34423442
if not self.as_index:
@@ -3454,7 +3454,7 @@ def _wrap_agged_blocks(self, items, blocks):
34543454
if self.axis == 1:
34553455
result = result.T
34563456

3457-
return self._reindex_output(result).convert_objects(datetime=True)
3457+
return self._reindex_output(result)._convert(datetime=True)
34583458

34593459
def _reindex_output(self, result):
34603460
"""

pandas/core/internals.py

+27-24
Original file line numberDiff line numberDiff line change
@@ -1517,14 +1517,34 @@ def is_bool(self):
15171517
"""
15181518
return lib.is_bool_array(self.values.ravel())
15191519

1520-
def convert(self, datetime=True, numeric=True, timedelta=True, coerce=False,
1521-
copy=True, by_item=True):
1520+
# TODO: Refactor when convert_objects is removed since there will be 1 path
1521+
def convert(self, *args, **kwargs):
15221522
""" attempt to coerce any object types to better types
15231523
return a copy of the block (if copy = True)
15241524
by definition we ARE an ObjectBlock!!!!!
15251525
15261526
can return multiple blocks!
15271527
"""
1528+
if args:
1529+
raise NotImplementedError
1530+
by_item = True if 'by_item' not in kwargs else kwargs['by_item']
1531+
1532+
new_inputs = ['coerce','datetime','numeric','timedelta']
1533+
new_style = False
1534+
for kw in new_inputs:
1535+
new_style |= kw in kwargs
1536+
1537+
if new_style:
1538+
fn = com._soft_convert_objects
1539+
fn_inputs = new_inputs + ['copy']
1540+
else:
1541+
fn = com._possibly_convert_objects
1542+
fn_inputs = ['convert_dates','convert_numeric','convert_timedeltas']
1543+
1544+
fn_kwargs = {}
1545+
for key in fn_inputs:
1546+
if key in kwargs:
1547+
fn_kwargs[key] = kwargs[key]
15281548

15291549
# attempt to create new type blocks
15301550
blocks = []
@@ -1533,30 +1553,14 @@ def convert(self, datetime=True, numeric=True, timedelta=True, coerce=False,
15331553
for i, rl in enumerate(self.mgr_locs):
15341554
values = self.iget(i)
15351555

1536-
values = com._possibly_convert_objects(
1537-
values.ravel(),
1538-
datetime=datetime,
1539-
numeric=numeric,
1540-
timedelta=timedelta,
1541-
coerce=coerce,
1542-
copy=copy
1543-
).reshape(values.shape)
1556+
values = fn(values.ravel(), **fn_kwargs).reshape(values.shape)
15441557
values = _block_shape(values, ndim=self.ndim)
1545-
newb = self.make_block(values,
1546-
placement=[rl])
1558+
newb = make_block(values, ndim=self.ndim, placement=[rl])
15471559
blocks.append(newb)
15481560

15491561
else:
1550-
1551-
values = com._possibly_convert_objects(
1552-
self.values.ravel(),
1553-
datetime=datetime,
1554-
numeric=numeric,
1555-
timedelta=timedelta,
1556-
coerce=coerce,
1557-
copy=copy
1558-
).reshape(self.values.shape)
1559-
blocks.append(self.make_block(values))
1562+
values = fn(self.values.ravel(), **fn_kwargs).reshape(self.values.shape)
1563+
blocks.append(make_block(values, ndim=self.ndim, placement=self.mgr_locs))
15601564

15611565
return blocks
15621566

@@ -1597,8 +1601,7 @@ def _maybe_downcast(self, blocks, downcast=None):
15971601
# split and convert the blocks
15981602
result_blocks = []
15991603
for blk in blocks:
1600-
result_blocks.extend(blk.convert(datetime=True,
1601-
numeric=False))
1604+
result_blocks.extend(blk.convert(datetime=True, numeric=False))
16021605
return result_blocks
16031606

16041607
def _can_hold_element(self, element):

0 commit comments

Comments
 (0)