From d8f2d70b728516d3ae1cb93c542013788089dd16 Mon Sep 17 00:00:00 2001 From: peterpanmj Date: Thu, 7 Jun 2018 20:16:10 +0800 Subject: [PATCH 1/6] BUG: align logic between replace dict using integers and using strings (# 20656) --- doc/source/whatsnew/v0.24.0.txt | 3 +- pandas/core/internals/blocks.py | 85 ++++++++++++++++++++++++++++- pandas/core/internals/managers.py | 48 +++++++++------- pandas/tests/series/test_replace.py | 7 +++ 4 files changed, 118 insertions(+), 25 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 42e286f487a7d..1431745c4f59d 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -573,6 +573,5 @@ Other - :meth: `~pandas.io.formats.style.Styler.background_gradient` now takes a ``text_color_threshold`` parameter to automatically lighten the text color based on the luminance of the background color. This improves readability with dark background colors without the need to limit the background colormap range. (:issue:`21258`) - Require at least 0.28.2 version of ``cython`` to support read-only memoryviews (:issue:`21688`) - :meth: `~pandas.io.formats.style.Styler.background_gradient` now also supports tablewise application (in addition to rowwise and columnwise) with ``axis=None`` (:issue:`15204`) -- -- +- Bug in :meth:`Series.replace` and meth:`DataFrame.replace` when dict is used as the `to_replace` value and one key in the dict is is another key's value, the results were inconsistent between using integer key and using string key (:issue:`20656`) - diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index ffa2267dd6877..8874fd3486401 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1689,6 +1689,44 @@ def _nanpercentile(values, q, axis, **kw): placement=np.arange(len(result)), ndim=ndim) + def _replace_coerce(self, mask=None, src=None, dst=None, inplace=True, + convert=False, regex=False, mgr=None): + """ + Replace value corresponding to the given boolean array with another + value. + + Parameters + ---------- + mask : array_like of bool + The mask of values to replace. + src : object + The value to replace. It is ignored if regex is False. + dst : object + The value to be replaced with. + convert : bool + If true, try to coerce any object types to better types. + regex : bool + If true, search for element matching with the pattern in src. + Masked element is ignored. + mgr : BlockPlacement, optional + + Returns + ------- + A new block if there is anything to replace or the original block. + """ + + if mask.any(): + if not regex: + self = self.coerce_to_target_dtype(dst) + return self.putmask(mask, dst, inplace=inplace) + else: + return self._replace_single(src, dst, inplace=inplace, + regex=regex, + convert=convert, + mask=mask, + mgr=mgr) + return self + class ScalarBlock(Block): """ @@ -2464,7 +2502,7 @@ def replace(self, to_replace, value, inplace=False, filter=None, regex=regex, mgr=mgr) def _replace_single(self, to_replace, value, inplace=False, filter=None, - regex=False, convert=True, mgr=None): + regex=False, convert=True, mgr=None, mask=None): inplace = validate_bool_kwarg(inplace, 'inplace') @@ -2531,15 +2569,56 @@ def re_replacer(s): else: filt = self.mgr_locs.isin(filter).nonzero()[0] - new_values[filt] = f(new_values[filt]) + if mask is None: + new_values[filt] = f(new_values[filt]) + else: + new_values[filt][mask] = f(new_values[filt][mask]) # convert block = self.make_block(new_values) if convert: block = block.convert(by_item=True, numeric=False) - return block + def _replace_coerce(self, mask=None, src=None, dst=None, inplace=True, + convert=False, regex=False, mgr=None): + """ + Replace value corresponding to the given boolean array with another + value. + + Parameters + ---------- + mask : array_like of bool + The mask of values to replace. + src : object + The value to replace. It is ignored if regex is False. + dst : object + The value to be replaced with. + convert : bool + If true, try to coerce any object types to better types. + regex : bool + If true, search for element matching with the pattern in src. + Masked element is ignored. + mgr : BlockPlacement, optional + + Returns + ------- + A new block if there is anything to replace or the original block. + """ + if mask.any(): + block = super(ObjectBlock, self)._replace_coerce(mask=mask, + src=src, + dst=dst, + inplace=inplace, + convert=convert, + regex=regex, + mgr=mgr) + if convert: + block = [b.convert(by_item=True, numeric=False, copy=True) + for b in block] + return block + return self + class CategoricalBlock(ExtensionBlock): __slots__ = () diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index e7b7cb463a27b..5d61e8ea8eb9a 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -3,6 +3,7 @@ from functools import partial import itertools import operator +import re import numpy as np @@ -19,11 +20,13 @@ is_datetimelike_v_numeric, is_numeric_v_string_like, is_extension_type, is_extension_array_dtype, - is_scalar) + is_scalar, + is_re_compilable) from pandas.core.dtypes.cast import ( maybe_promote, infer_dtype_from_scalar, - find_common_type) + find_common_type, + maybe_convert_objects) from pandas.core.dtypes.missing import isna import pandas.core.dtypes.concat as _concat from pandas.core.dtypes.generic import ABCSeries, ABCExtensionArray @@ -571,12 +574,17 @@ def replace_list(self, src_list, dest_list, inplace=False, regex=False, # figure out our mask a-priori to avoid repeated replacements values = self.as_array() - def comp(s): + def comp(s, reg=False): if isna(s): return isna(values) - return _maybe_compare(values, getattr(s, 'asm8', s), operator.eq) + if hasattr(s, 'asm8'): + return _maybe_compare(maybe_convert_objects(values), + getattr(s, 'asm8'), reg) + if reg and is_re_compilable(s): + return _maybe_compare(values, s, reg) + return _maybe_compare(values, s, reg) - masks = [comp(s) for i, s in enumerate(src_list)] + masks = [comp(s, regex) for i, s in enumerate(src_list)] result_blocks = [] src_len = len(src_list) - 1 @@ -588,20 +596,16 @@ def comp(s): for i, (s, d) in enumerate(zip(src_list, dest_list)): new_rb = [] for b in rb: - if b.dtype == np.object_: - convert = i == src_len - result = b.replace(s, d, inplace=inplace, regex=regex, - mgr=mgr, convert=convert) + m = masks[i][b.mgr_locs.indexer] + convert = i == src_len + result = b._replace_coerce(mask=m, src=s, dst=d, + inplace=inplace, + convert=convert, regex=regex, + mgr=mgr) + if m.any(): new_rb = _extend_blocks(result, new_rb) else: - # get our mask for this element, sized to this - # particular block - m = masks[i][b.mgr_locs.indexer] - if m.any(): - b = b.coerce_to_target_dtype(d) - new_rb.extend(b.putmask(m, d, inplace=True)) - else: - new_rb.append(b) + new_rb.append(b) rb = new_rb result_blocks.extend(rb) @@ -1890,7 +1894,12 @@ def _consolidate(blocks): return new_blocks -def _maybe_compare(a, b, op): +def _maybe_compare(a, b, regex=False): + if not regex: + op = lambda x: operator.eq(x, b) + else: + op = np.vectorize(lambda x: bool(re.match(b, x)) if isinstance(x, str) + else False) is_a_array = isinstance(a, np.ndarray) is_b_array = isinstance(b, np.ndarray) @@ -1902,9 +1911,8 @@ def _maybe_compare(a, b, op): # numpy deprecation warning if comparing numeric vs string-like elif is_numeric_v_string_like(a, b): result = False - else: - result = op(a, b) + result = op(a) if is_scalar(result) and (is_a_array or is_b_array): type_names = [type(a).__name__, type(b).__name__] diff --git a/pandas/tests/series/test_replace.py b/pandas/tests/series/test_replace.py index a3b92798879f5..850d160156f41 100644 --- a/pandas/tests/series/test_replace.py +++ b/pandas/tests/series/test_replace.py @@ -243,6 +243,13 @@ def test_replace_string_with_number(self): expected = pd.Series([1, 2, 3]) tm.assert_series_equal(expected, result) + def test_repace_intertwined_key_value_dict(self): + # GH 20656 + s = pd.Series(['a', 'b']) + expected = pd.Series(['b', 'a']) + result = s.replace({'a': 'b', 'b': 'a'}) + tm.assert_series_equal(expected, result) + def test_replace_unicode_with_number(self): # GH 15743 s = pd.Series([1, 2, 3]) From 3afd287cbbfbe562d87be0c0adfb702213f664b7 Mon Sep 17 00:00:00 2001 From: peterpanmj Date: Tue, 31 Jul 2018 11:15:41 +0800 Subject: [PATCH 2/6] remove unused condition BlockManager --- pandas/core/internals/managers.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 5d61e8ea8eb9a..f68e94295d96a 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -20,8 +20,7 @@ is_datetimelike_v_numeric, is_numeric_v_string_like, is_extension_type, is_extension_array_dtype, - is_scalar, - is_re_compilable) + is_scalar) from pandas.core.dtypes.cast import ( maybe_promote, infer_dtype_from_scalar, @@ -580,8 +579,6 @@ def comp(s, reg=False): if hasattr(s, 'asm8'): return _maybe_compare(maybe_convert_objects(values), getattr(s, 'asm8'), reg) - if reg and is_re_compilable(s): - return _maybe_compare(values, s, reg) return _maybe_compare(values, s, reg) masks = [comp(s, regex) for i, s in enumerate(src_list)] From 2bafaaaf62b9518624dfef2f5c49c83af510ce75 Mon Sep 17 00:00:00 2001 From: peterpanmj Date: Tue, 31 Jul 2018 20:22:41 +0800 Subject: [PATCH 3/6] add docstring for ObjectBlock._replace_single and BlockManager._maybe_compare --- pandas/core/internals/blocks.py | 23 +++++++++++++++++++++++ pandas/core/internals/managers.py | 16 ++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 8874fd3486401..6f9bb7ba715ea 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2503,7 +2503,30 @@ def replace(self, to_replace, value, inplace=False, filter=None, def _replace_single(self, to_replace, value, inplace=False, filter=None, regex=False, convert=True, mgr=None, mask=None): + """ + Replace elements by the given value. + Parameters + ---------- + to_replace : object or pattern + Scalar to replace or regular expression to match. + value : object + Replacement object. + inplace : bool + Perform inplace modification, default is False. + filter : list + regex : bool + If true, perform regular expression substitution. + convert : bool + If true, try to coerce any object types to better types. + mgr : BlockManager + mask : array-like of bool + True indicate corresponding element is ignored. + + Returns + ------- + a new block, the result after replacing + """ inplace = validate_bool_kwarg(inplace, 'inplace') # to_replace is regex compilable diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index f68e94295d96a..e199d41ecc30f 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1892,6 +1892,22 @@ def _consolidate(blocks): def _maybe_compare(a, b, regex=False): + """ + Compare two array_like inputs of the same shape or two scalar values + + Calls operator.eq or re.match, depending on regex argument. If regex is + True, perform an element-wise regex matching. + + Parameters + ---------- + a : array_like or scalar + b : array_like or scalar + regex : bool + + Returns + ------- + mask : array_like of bool + """ if not regex: op = lambda x: operator.eq(x, b) else: From f76b2e2b0eaf6bb3be993104bd67b816f0f89609 Mon Sep 17 00:00:00 2001 From: peterpanmj Date: Tue, 31 Jul 2018 20:24:15 +0800 Subject: [PATCH 4/6] update whatsnew entry ,move to reshaping --- doc/source/whatsnew/v0.24.0.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 1431745c4f59d..469111b10b0eb 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -558,7 +558,7 @@ Reshaping - Bug in :meth:`Series.combine_first` with ``datetime64[ns, tz]`` dtype which would return tz-naive result (:issue:`21469`) - Bug in :meth:`Series.where` and :meth:`DataFrame.where` with ``datetime64[ns, tz]`` dtype (:issue:`21546`) - Bug in :meth:`Series.mask` and :meth:`DataFrame.mask` with ``list`` conditionals (:issue:`21891`) -- +- Bug in :meth:`Series.replace` and meth:`DataFrame.replace` when dict is used as the `to_replace` value and one key in the dict is is another key's value, the results were inconsistent between using integer key and using string key (:issue:`20656`) - Build Changes @@ -573,5 +573,4 @@ Other - :meth: `~pandas.io.formats.style.Styler.background_gradient` now takes a ``text_color_threshold`` parameter to automatically lighten the text color based on the luminance of the background color. This improves readability with dark background colors without the need to limit the background colormap range. (:issue:`21258`) - Require at least 0.28.2 version of ``cython`` to support read-only memoryviews (:issue:`21688`) - :meth: `~pandas.io.formats.style.Styler.background_gradient` now also supports tablewise application (in addition to rowwise and columnwise) with ``axis=None`` (:issue:`15204`) -- Bug in :meth:`Series.replace` and meth:`DataFrame.replace` when dict is used as the `to_replace` value and one key in the dict is is another key's value, the results were inconsistent between using integer key and using string key (:issue:`20656`) - From 6f836b44a4f2ba9e5520965691b1333d784dbe54 Mon Sep 17 00:00:00 2001 From: Peter Li Date: Fri, 3 Aug 2018 23:04:00 +0800 Subject: [PATCH 5/6] cosmetic changes and doc-string enhancing --- pandas/core/internals/blocks.py | 84 ++++++++++++++--------------- pandas/core/internals/managers.py | 18 ++++--- pandas/tests/series/test_replace.py | 3 +- 3 files changed, 54 insertions(+), 51 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 6f9bb7ba715ea..fcbdf816bddf4 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1689,26 +1689,27 @@ def _nanpercentile(values, q, axis, **kw): placement=np.arange(len(result)), ndim=ndim) - def _replace_coerce(self, mask=None, src=None, dst=None, inplace=True, - convert=False, regex=False, mgr=None): + def _replace_coerce(self, to_replace, value, inplace=True, regex=False, + convert=False, mgr=None, mask=None): """ Replace value corresponding to the given boolean array with another value. Parameters ---------- - mask : array_like of bool - The mask of values to replace. - src : object - The value to replace. It is ignored if regex is False. - dst : object - The value to be replaced with. - convert : bool + to_replace : object or pattern + Scalar to replace or regular expression to match. + value : object + Replacement object. + inplace : bool, default False + Perform inplace modification. + regex : bool, default False + If true, perform regular expression substitution. + convert : bool, default True If true, try to coerce any object types to better types. - regex : bool - If true, search for element matching with the pattern in src. - Masked element is ignored. - mgr : BlockPlacement, optional + mgr : BlockManager, optional + mask : array-like of bool, optional + True indicate corresponding element is ignored. Returns ------- @@ -1717,10 +1718,10 @@ def _replace_coerce(self, mask=None, src=None, dst=None, inplace=True, if mask.any(): if not regex: - self = self.coerce_to_target_dtype(dst) - return self.putmask(mask, dst, inplace=inplace) + self = self.coerce_to_target_dtype(value) + return self.putmask(mask, value, inplace=inplace) else: - return self._replace_single(src, dst, inplace=inplace, + return self._replace_single(to_replace, value, inplace=inplace, regex=regex, convert=convert, mask=mask, @@ -2512,15 +2513,15 @@ def _replace_single(self, to_replace, value, inplace=False, filter=None, Scalar to replace or regular expression to match. value : object Replacement object. - inplace : bool - Perform inplace modification, default is False. - filter : list - regex : bool + inplace : bool, default False + Perform inplace modification. + filter : list, optional + regex : bool, default False If true, perform regular expression substitution. - convert : bool + convert : bool, default True If true, try to coerce any object types to better types. - mgr : BlockManager - mask : array-like of bool + mgr : BlockManager, optional + mask : array-like of bool, optional True indicate corresponding element is ignored. Returns @@ -2603,39 +2604,36 @@ def re_replacer(s): block = block.convert(by_item=True, numeric=False) return block - def _replace_coerce(self, mask=None, src=None, dst=None, inplace=True, - convert=False, regex=False, mgr=None): + def _replace_coerce(self, to_replace, value, inplace=True, regex=False, + convert=False, mgr=None, mask=None): """ Replace value corresponding to the given boolean array with another value. Parameters ---------- - mask : array_like of bool - The mask of values to replace. - src : object - The value to replace. It is ignored if regex is False. - dst : object - The value to be replaced with. - convert : bool + to_replace : object or pattern + Scalar to replace or regular expression to match. + value : object + Replacement object. + inplace : bool, default False + Perform inplace modification. + regex : bool, default False + If true, perform regular expression substitution. + convert : bool, default True If true, try to coerce any object types to better types. - regex : bool - If true, search for element matching with the pattern in src. - Masked element is ignored. - mgr : BlockPlacement, optional + mgr : BlockManager, optional + mask : array-like of bool, optional + True indicate corresponding element is ignored. Returns ------- A new block if there is anything to replace or the original block. """ if mask.any(): - block = super(ObjectBlock, self)._replace_coerce(mask=mask, - src=src, - dst=dst, - inplace=inplace, - convert=convert, - regex=regex, - mgr=mgr) + block = super(ObjectBlock, self)._replace_coerce( + to_replace=to_replace, value=value, inplace=inplace, + regex=regex, convert=convert, mgr=mgr, mask=mask) if convert: block = [b.convert(by_item=True, numeric=False, copy=True) for b in block] diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index e199d41ecc30f..9b0732e222d37 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -573,13 +573,17 @@ def replace_list(self, src_list, dest_list, inplace=False, regex=False, # figure out our mask a-priori to avoid repeated replacements values = self.as_array() - def comp(s, reg=False): + def comp(s, regex=False): + """ + Generate a bool array by perform an equality check, or perform + an element-wise regular expression matching + """ if isna(s): return isna(values) if hasattr(s, 'asm8'): - return _maybe_compare(maybe_convert_objects(values), - getattr(s, 'asm8'), reg) - return _maybe_compare(values, s, reg) + return _compare_or_regex_match(maybe_convert_objects(values), + getattr(s, 'asm8'), regex) + return _compare_or_regex_match(values, s, regex) masks = [comp(s, regex) for i, s in enumerate(src_list)] @@ -595,7 +599,7 @@ def comp(s, reg=False): for b in rb: m = masks[i][b.mgr_locs.indexer] convert = i == src_len - result = b._replace_coerce(mask=m, src=s, dst=d, + result = b._replace_coerce(mask=m, to_replace=s, value=d, inplace=inplace, convert=convert, regex=regex, mgr=mgr) @@ -1891,7 +1895,7 @@ def _consolidate(blocks): return new_blocks -def _maybe_compare(a, b, regex=False): +def _compare_or_regex_match(a, b, regex=False): """ Compare two array_like inputs of the same shape or two scalar values @@ -1902,7 +1906,7 @@ def _maybe_compare(a, b, regex=False): ---------- a : array_like or scalar b : array_like or scalar - regex : bool + regex : bool, default False Returns ------- diff --git a/pandas/tests/series/test_replace.py b/pandas/tests/series/test_replace.py index 850d160156f41..e61f47679a994 100644 --- a/pandas/tests/series/test_replace.py +++ b/pandas/tests/series/test_replace.py @@ -243,8 +243,9 @@ def test_replace_string_with_number(self): expected = pd.Series([1, 2, 3]) tm.assert_series_equal(expected, result) - def test_repace_intertwined_key_value_dict(self): + def test_replace_replacer_equals_replacement(self): # GH 20656 + # make sure all replacers are matching against original values s = pd.Series(['a', 'b']) expected = pd.Series(['b', 'a']) result = s.replace({'a': 'b', 'b': 'a'}) From dd916d2663a23752c77da95943fda6bf4b52118c Mon Sep 17 00:00:00 2001 From: Peter Li Date: Fri, 3 Aug 2018 23:40:29 +0800 Subject: [PATCH 6/6] pull and update whatsnew --- doc/source/whatsnew/v0.24.0.txt | 120 ++++++++++++++++++++++++++++++-- 1 file changed, 113 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 469111b10b0eb..722e72aa3ca30 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -177,7 +177,8 @@ Other Enhancements - :func:`read_html` copies cell data across ``colspan`` and ``rowspan``, and it treats all-``th`` table rows as headers if ``header`` kwarg is not given and there is no ``thead`` (:issue:`17054`) - :meth:`Series.nlargest`, :meth:`Series.nsmallest`, :meth:`DataFrame.nlargest`, and :meth:`DataFrame.nsmallest` now accept the value ``"all"`` for the ``keep`` argument. This keeps all ties for the nth largest/smallest value (:issue:`16818`) - :class:`IntervalIndex` has gained the :meth:`~IntervalIndex.set_closed` method to change the existing ``closed`` value (:issue:`21670`) -- :func:`~DataFrame.to_csv` and :func:`~DataFrame.to_json` now support ``compression='infer'`` to infer compression based on filename (:issue:`15008`) +- :func:`~DataFrame.to_csv`, :func:`~Series.to_csv`, :func:`~DataFrame.to_json`, and :func:`~Series.to_json` now support ``compression='infer'`` to infer compression based on filename extension (:issue:`15008`). + The default compression for ``to_csv``, ``to_json``, and ``to_pickle`` methods has been updated to ``'infer'`` (:issue:`22004`). - :func:`to_timedelta` now supports iso-formated timedelta strings (:issue:`21877`) - :class:`Series` and :class:`DataFrame` now support :class:`Iterable` in constructor (:issue:`2193`) @@ -224,6 +225,99 @@ For situations where you need an ``ndarray`` of ``Interval`` objects, use np.asarray(idx) idx.values.astype(object) +.. _whatsnew_0240.api.timezone_offset_parsing: + +Parsing Datetime Strings with Timezone Offsets +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously, parsing datetime strings with UTC offsets with :func:`to_datetime` +or :class:`DatetimeIndex` would automatically convert the datetime to UTC +without timezone localization. This is inconsistent from parsing the same +datetime string with :class:`Timestamp` which would preserve the UTC +offset in the ``tz`` attribute. Now, :func:`to_datetime` preserves the UTC +offset in the ``tz`` attribute when all the datetime strings have the same +UTC offset (:issue:`17697`, :issue:`11736`) + +*Previous Behavior*: + +.. code-block:: ipython + + + In [2]: pd.to_datetime("2015-11-18 15:30:00+05:30") + Out[2]: Timestamp('2015-11-18 10:00:00') + + In [3]: pd.Timestamp("2015-11-18 15:30:00+05:30") + Out[3]: Timestamp('2015-11-18 15:30:00+0530', tz='pytz.FixedOffset(330)') + + # Different UTC offsets would automatically convert the datetimes to UTC (without a UTC timezone) + In [4]: pd.to_datetime(["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30"]) + Out[4]: DatetimeIndex(['2015-11-18 10:00:00', '2015-11-18 10:00:00'], dtype='datetime64[ns]', freq=None) + +*Current Behavior*: + +.. ipython:: python + + pd.to_datetime("2015-11-18 15:30:00+05:30") + pd.Timestamp("2015-11-18 15:30:00+05:30") + +Parsing datetime strings with the same UTC offset will preserve the UTC offset in the ``tz`` + +.. ipython:: python + + pd.to_datetime(["2015-11-18 15:30:00+05:30"] * 2) + +Parsing datetime strings with different UTC offsets will now create an Index of +``datetime.datetime`` objects with different UTC offsets + +.. ipython:: python + + idx = pd.to_datetime(["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30"]) + idx + idx[0] + idx[1] + +Passing ``utc=True`` will mimic the previous behavior but will correctly indicate +that the dates have been converted to UTC + +.. ipython:: python + pd.to_datetime(["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30"], utc=True) + +.. _whatsnew_0240.api_breaking.period_end_time: + +Time values in ``dt.end_time`` and ``to_timestamp(how='end')`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The time values in :class:`Period` and :class:`PeriodIndex` objects are now set +to '23:59:59.999999999' when calling :attr:`Series.dt.end_time`, :attr:`Period.end_time`, +:attr:`PeriodIndex.end_time`, :func:`Period.to_timestamp()` with ``how='end'``, +or :func:`PeriodIndex.to_timestamp()` with ``how='end'`` (:issue:`17157`) + +Previous Behavior: + +.. code-block:: ipython + + In [2]: p = pd.Period('2017-01-01', 'D') + In [3]: pi = pd.PeriodIndex([p]) + + In [4]: pd.Series(pi).dt.end_time[0] + Out[4]: Timestamp(2017-01-01 00:00:00) + + In [5]: p.end_time + Out[5]: Timestamp(2017-01-01 23:59:59.999999999) + +Current Behavior: + +Calling :attr:`Series.dt.end_time` will now result in a time of '23:59:59.999999999' as +is the case with :attr:`Period.end_time`, for example + +.. ipython:: python + + p = pd.Period('2017-01-01', 'D') + pi = pd.PeriodIndex([p]) + + pd.Series(pi).dt.end_time[0] + + p.end_time .. _whatsnew_0240.api.datetimelike.normalize: @@ -383,6 +477,8 @@ Deprecations - :meth:`MultiIndex.to_hierarchical` is deprecated and will be removed in a future version (:issue:`21613`) - :meth:`Series.ptp` is deprecated. Use ``numpy.ptp`` instead (:issue:`21614`) - :meth:`Series.compress` is deprecated. Use ``Series[condition]`` instead (:issue:`18262`) +- :meth:`Categorical.from_codes` has deprecated providing float values for the ``codes`` argument. (:issue:`21767`) +- :func:`pandas.read_table` is deprecated. Instead, use :func:`pandas.read_csv` passing ``sep='\t'`` if necessary (:issue:`21948`) .. _whatsnew_0240.prior_deprecations: @@ -391,7 +487,7 @@ Removal of prior version deprecations/changes - The ``LongPanel`` and ``WidePanel`` classes have been removed (:issue:`10892`) - Several private functions were removed from the (non-public) module ``pandas.core.common`` (:issue:`22001`) -- +- Removal of the previously deprecated module ``pandas.core.datetools`` (:issue:`14105`, :issue:`14094`) - .. _whatsnew_0240.performance: @@ -430,15 +526,17 @@ Bug Fixes Categorical ^^^^^^^^^^^ -- -- -- +- Bug in :meth:`Categorical.from_codes` where ``NaN`` values in `codes` were silently converted to ``0`` (:issue:`21767`). In the future this will raise a ``ValueError``. Also changes the behavior of `.from_codes([1.1, 2.0])`. Datetimelike ^^^^^^^^^^^^ - Fixed bug where two :class:`DateOffset` objects with different ``normalize`` attributes could evaluate as equal (:issue:`21404`) - Fixed bug where :meth:`Timestamp.resolution` incorrectly returned 1-microsecond ``timedelta`` instead of 1-nanosecond :class:`Timedelta` (:issue:`21336`,:issue:`21365`) +- Bug in :func:`to_datetime` that did not consistently return an :class:`Index` when ``box=True`` was specified (:issue:`21864`) +- Bug in :class:`DatetimeIndex` comparisons where string comparisons incorrectly raises ``TypeError`` (:issue:`22074`) +- Bug in :class:`DatetimeIndex` comparisons when comparing against ``timedelta64[ns]`` dtyped arrays; in some cases ``TypeError`` was incorrectly raised, in others it incorrectly failed to raise (:issue:`22074`) +- Bug in :class:`DatetimeIndex` comparisons when comparing against object-dtyped arrays (:issue:`22074`) Timedelta ^^^^^^^^^ @@ -461,6 +559,8 @@ Timezones - Bug in :class:`Index` with ``datetime64[ns, tz]`` dtype that did not localize integer data correctly (:issue:`20964`) - Bug in :class:`DatetimeIndex` where constructing with an integer and tz would not localize correctly (:issue:`12619`) - Fixed bug where :meth:`DataFrame.describe` and :meth:`Series.describe` on tz-aware datetimes did not show `first` and `last` result (:issue:`21328`) +- Bug in :class:`DatetimeIndex` comparisons failing to raise ``TypeError`` when comparing timezone-aware ``DatetimeIndex`` against ``np.datetime64`` (:issue:`22074`) +- Bug in ``DataFrame`` assignment with a timezone-aware scalar (:issue:`19843`) Offsets ^^^^^^^ @@ -478,6 +578,7 @@ Numeric - Bug in :meth:`DataFrame.agg`, :meth:`DataFrame.transform` and :meth:`DataFrame.apply` where, when supplied with a list of functions and ``axis=1`` (e.g. ``df.apply(['sum', 'mean'], axis=1)``), a ``TypeError`` was wrongly raised. For all three methods such calculation are now done correctly. (:issue:`16679`). +- Bug in :class:`Series` comparison against datetime-like scalars and arrays (:issue:`22074`) - Strings @@ -530,8 +631,8 @@ I/O Plotting ^^^^^^^^ -- Bug in :func:'DataFrame.plot.scatter' and :func:'DataFrame.plot.hexbin' caused x-axis label and ticklabels to disappear when colorbar was on in IPython inline backend (:issue:`10611`, :issue:`10678`, and :issue:`20455`) -- +- Bug in :func:`DataFrame.plot.scatter` and :func:`DataFrame.plot.hexbin` caused x-axis label and ticklabels to disappear when colorbar was on in IPython inline backend (:issue:`10611`, :issue:`10678`, and :issue:`20455`) +- Bug in plotting a Series with datetimes using :func:`matplotlib.axes.Axes.scatter` (:issue:`22039`) Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -558,6 +659,9 @@ Reshaping - Bug in :meth:`Series.combine_first` with ``datetime64[ns, tz]`` dtype which would return tz-naive result (:issue:`21469`) - Bug in :meth:`Series.where` and :meth:`DataFrame.where` with ``datetime64[ns, tz]`` dtype (:issue:`21546`) - Bug in :meth:`Series.mask` and :meth:`DataFrame.mask` with ``list`` conditionals (:issue:`21891`) +- Bug in :meth:`DataFrame.replace` raises RecursionError when converting OutOfBounds ``datetime64[ns, tz]`` (:issue:`20380`) +- :func:`pandas.core.groupby.GroupBy.rank` now raises a ``ValueError`` when an invalid value is passed for argument ``na_option`` (:issue:`22124`) +- Bug in :func:`get_dummies` with Unicode attributes in Python 2 (:issue:`22084`) - Bug in :meth:`Series.replace` and meth:`DataFrame.replace` when dict is used as the `to_replace` value and one key in the dict is is another key's value, the results were inconsistent between using integer key and using string key (:issue:`20656`) - @@ -574,3 +678,5 @@ Other - Require at least 0.28.2 version of ``cython`` to support read-only memoryviews (:issue:`21688`) - :meth: `~pandas.io.formats.style.Styler.background_gradient` now also supports tablewise application (in addition to rowwise and columnwise) with ``axis=None`` (:issue:`15204`) - +- +-