|
| 1 | +""" |
| 2 | +Functions for converting object to other types |
| 3 | +""" |
| 4 | + |
| 5 | +import numpy as np |
| 6 | + |
| 7 | +import pandas as pd |
| 8 | +from pandas.core.common import (_possibly_cast_to_datetime, is_object_dtype, |
| 9 | + isnull) |
| 10 | +import pandas.lib as lib |
| 11 | + |
| 12 | +# TODO: Remove in 0.18 or 2017, which ever is sooner |
| 13 | +def _possibly_convert_objects(values, convert_dates=True, |
| 14 | + convert_numeric=True, |
| 15 | + convert_timedeltas=True, |
| 16 | + copy=True): |
| 17 | + """ if we have an object dtype, try to coerce dates and/or numbers """ |
| 18 | + |
| 19 | + # if we have passed in a list or scalar |
| 20 | + if isinstance(values, (list, tuple)): |
| 21 | + values = np.array(values, dtype=np.object_) |
| 22 | + if not hasattr(values, 'dtype'): |
| 23 | + values = np.array([values], dtype=np.object_) |
| 24 | + |
| 25 | + # convert dates |
| 26 | + if convert_dates and values.dtype == np.object_: |
| 27 | + |
| 28 | + # we take an aggressive stance and convert to datetime64[ns] |
| 29 | + if convert_dates == 'coerce': |
| 30 | + new_values = _possibly_cast_to_datetime( |
| 31 | + values, 'M8[ns]', errors='coerce') |
| 32 | + |
| 33 | + # if we are all nans then leave me alone |
| 34 | + if not isnull(new_values).all(): |
| 35 | + values = new_values |
| 36 | + |
| 37 | + else: |
| 38 | + values = lib.maybe_convert_objects( |
| 39 | + values, convert_datetime=convert_dates) |
| 40 | + |
| 41 | + # convert timedeltas |
| 42 | + if convert_timedeltas and values.dtype == np.object_: |
| 43 | + |
| 44 | + if convert_timedeltas == 'coerce': |
| 45 | + from pandas.tseries.timedeltas import to_timedelta |
| 46 | + new_values = to_timedelta(values, coerce=True) |
| 47 | + |
| 48 | + # if we are all nans then leave me alone |
| 49 | + if not isnull(new_values).all(): |
| 50 | + values = new_values |
| 51 | + |
| 52 | + else: |
| 53 | + values = lib.maybe_convert_objects( |
| 54 | + values, convert_timedelta=convert_timedeltas) |
| 55 | + |
| 56 | + # convert to numeric |
| 57 | + if values.dtype == np.object_: |
| 58 | + if convert_numeric: |
| 59 | + try: |
| 60 | + new_values = lib.maybe_convert_numeric( |
| 61 | + values, set(), coerce_numeric=True) |
| 62 | + |
| 63 | + # if we are all nans then leave me alone |
| 64 | + if not isnull(new_values).all(): |
| 65 | + values = new_values |
| 66 | + |
| 67 | + except: |
| 68 | + pass |
| 69 | + else: |
| 70 | + # soft-conversion |
| 71 | + values = lib.maybe_convert_objects(values) |
| 72 | + |
| 73 | + values = values.copy() if copy else values |
| 74 | + |
| 75 | + return values |
| 76 | + |
| 77 | + |
| 78 | +def _soft_convert_objects(values, datetime=True, numeric=True, timedelta=True, |
| 79 | + coerce=False, copy=True): |
| 80 | + """ if we have an object dtype, try to coerce dates and/or numbers """ |
| 81 | + |
| 82 | + conversion_count = sum((datetime, numeric, timedelta)) |
| 83 | + if conversion_count == 0: |
| 84 | + raise ValueError('At least one of datetime, numeric or timedelta must ' |
| 85 | + 'be True.') |
| 86 | + elif conversion_count > 1 and coerce: |
| 87 | + raise ValueError("Only one of 'datetime', 'numeric' or " |
| 88 | + "'timedelta' can be True when when coerce=True.") |
| 89 | + |
| 90 | + |
| 91 | + if isinstance(values, (list, tuple)): |
| 92 | + # List or scalar |
| 93 | + values = np.array(values, dtype=np.object_) |
| 94 | + elif not hasattr(values, 'dtype'): |
| 95 | + values = np.array([values], dtype=np.object_) |
| 96 | + elif not is_object_dtype(values.dtype): |
| 97 | + # If not object, do not attempt conversion |
| 98 | + values = values.copy() if copy else values |
| 99 | + return values |
| 100 | + |
| 101 | + # If 1 flag is coerce, ensure 2 others are False |
| 102 | + if coerce: |
| 103 | + # Immediate return if coerce |
| 104 | + if datetime: |
| 105 | + return pd.to_datetime(values, errors='coerce', box=False) |
| 106 | + elif timedelta: |
| 107 | + return pd.to_timedelta(values, errors='coerce', box=False) |
| 108 | + elif numeric: |
| 109 | + return pd.to_numeric(values, errors='coerce') |
| 110 | + |
| 111 | + # Soft conversions |
| 112 | + if datetime: |
| 113 | + values = lib.maybe_convert_objects(values, |
| 114 | + convert_datetime=datetime) |
| 115 | + |
| 116 | + if timedelta and is_object_dtype(values.dtype): |
| 117 | + # Object check to ensure only run if previous did not convert |
| 118 | + values = lib.maybe_convert_objects(values, |
| 119 | + convert_timedelta=timedelta) |
| 120 | + |
| 121 | + if numeric and is_object_dtype(values.dtype): |
| 122 | + try: |
| 123 | + converted = lib.maybe_convert_numeric(values, |
| 124 | + set(), |
| 125 | + coerce_numeric=True) |
| 126 | + # If all NaNs, then do not-alter |
| 127 | + values = converted if not isnull(converted).all() else values |
| 128 | + values = values.copy() if copy else values |
| 129 | + except: |
| 130 | + pass |
| 131 | + |
| 132 | + return values |
0 commit comments