From eb4a23951269965eaac8af95f00ce9ecc765f08a Mon Sep 17 00:00:00 2001 From: JustinZhengBC Date: Wed, 3 Oct 2018 17:03:33 -0700 Subject: [PATCH 001/122] BUG-22984 Fix truncation of DataFrame representations --- pandas/io/formats/format.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index db86409adc2b0..0c382141a3eb6 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -620,7 +620,6 @@ def to_string(self): # Size of last col determines dot col size. See # `self._to_str_columns size_tr_col = len(headers[self.tr_size_col]) - max_len += size_tr_col # Need to make space for largest row # plus truncate dot col dif = max_len - self.w # '+ 1' to avoid too wide repr (GH PR #17023) From 8e82c823f2ed368000cd6a2d87b3fad4c6bc6230 Mon Sep 17 00:00:00 2001 From: JustinZhengBC Date: Wed, 3 Oct 2018 17:05:41 -0700 Subject: [PATCH 002/122] BUG-22984 Fix flake8 issues --- pandas/io/formats/format.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 0c382141a3eb6..ee5a1733623fc 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -616,10 +616,6 @@ def to_string(self): else: # max_cols == 0. Try to fit frame to terminal text = self.adj.adjoin(1, *strcols).split('\n') max_len = Series(text).str.len().max() - headers = [ele[0] for ele in strcols] - # Size of last col determines dot col size. See - # `self._to_str_columns - size_tr_col = len(headers[self.tr_size_col]) # plus truncate dot col dif = max_len - self.w # '+ 1' to avoid too wide repr (GH PR #17023) From 448153d089d3ea3a0e01d274c406f4e5e0bd275c Mon Sep 17 00:00:00 2001 From: JustinZhengBC Date: Sat, 6 Oct 2018 11:42:55 -0700 Subject: [PATCH 003/122] BUG-22984 Fix whatsnew and add test --- doc/source/whatsnew/v0.24.0.txt | 4 ++++ pandas/tests/io/formats/test_format.py | 10 ++++++++++ 2 files changed, 14 insertions(+) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 64cc098ccaa94..a5e3162561b32 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1273,8 +1273,12 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form - :func:`read_sas()` will correctly parse sas7bdat files with data page types having also bit 7 set (so page type is 128 + 256 = 384) (:issue:`16615`) - Bug in :meth:`detect_client_encoding` where potential ``IOError`` goes unhandled when importing in a mod_wsgi process due to restricted access to stdout. (:issue:`21552`) - Bug in :func:`to_string()` that broke column alignment when ``index=False`` and width of first column's values is greater than the width of first column's header (:issue:`16839`, :issue:`13032`) +<<<<<<< HEAD - Bug in :func:`DataFrame.to_csv` where a single level MultiIndex incorrectly wrote a tuple. Now just the value of the index is written (:issue:`19589`). - Bug in :meth:`HDFStore.append` when appending a :class:`DataFrame` with an empty string column and ``min_itemsize`` < 8 (:issue:`12242`) +======= +- Bug in :func:`to_string()` that caused representations of :class:`DataFrame` to not take up the whole window (:issue:`22984`) +>>>>>>> a3ace8012... BUG-22984 Fix whatsnew and add test Plotting ^^^^^^^^ diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 28aa8a92cc410..7d9bb5906c7fe 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -343,6 +343,16 @@ def test_repr_truncates_terminal_size(self): assert df2.columns[0] in result.split('\n')[0] + # GH 22984 ensure entire window is filled + terminal_size = (80, 24) + df = pd.DataFrame(np.random.rand(1,7)) + p1 = mock.patch('pandas.io.formats.console.get_terminal_size', + return_value=terminal_size) + p2 = mock.patch('pandas.io.formats.format.get_terminal_size', + return_value=terminal_size) + with p1, p2: + assert "..." not in str(df) + def test_repr_max_columns_max_rows(self): term_width, term_height = get_terminal_size() if term_width < 10 or term_height < 10: From 244b29533c204b06c767fed969c4cef3e6b9361e Mon Sep 17 00:00:00 2001 From: JustinZhengBC Date: Sat, 6 Oct 2018 11:42:55 -0700 Subject: [PATCH 004/122] BUG-22984 Fix whatsnew and add test --- doc/source/whatsnew/v0.24.0.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index a5e3162561b32..a0d675e24ce89 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1274,11 +1274,15 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form - Bug in :meth:`detect_client_encoding` where potential ``IOError`` goes unhandled when importing in a mod_wsgi process due to restricted access to stdout. (:issue:`21552`) - Bug in :func:`to_string()` that broke column alignment when ``index=False`` and width of first column's values is greater than the width of first column's header (:issue:`16839`, :issue:`13032`) <<<<<<< HEAD +<<<<<<< HEAD - Bug in :func:`DataFrame.to_csv` where a single level MultiIndex incorrectly wrote a tuple. Now just the value of the index is written (:issue:`19589`). - Bug in :meth:`HDFStore.append` when appending a :class:`DataFrame` with an empty string column and ``min_itemsize`` < 8 (:issue:`12242`) ======= - Bug in :func:`to_string()` that caused representations of :class:`DataFrame` to not take up the whole window (:issue:`22984`) >>>>>>> a3ace8012... BUG-22984 Fix whatsnew and add test +======= +- Bug in :func:`to_string()` that caused representations of :class:`DataFrame` to not take up the whole window (:issue:`22984`) +>>>>>>> 6271148c6... BUG-22984 Fix whatsnew and add test Plotting ^^^^^^^^ From aa867b0827ec608223ba218db1eebdc2b83845ba Mon Sep 17 00:00:00 2001 From: JustinZhengBC Date: Sat, 6 Oct 2018 18:58:50 -0700 Subject: [PATCH 005/122] BUG-22984 Fix linting issue --- pandas/tests/io/formats/test_format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 7d9bb5906c7fe..148ef9cf83b85 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -345,7 +345,7 @@ def test_repr_truncates_terminal_size(self): # GH 22984 ensure entire window is filled terminal_size = (80, 24) - df = pd.DataFrame(np.random.rand(1,7)) + df = pd.DataFrame(np.random.rand(1, 7)) p1 = mock.patch('pandas.io.formats.console.get_terminal_size', return_value=terminal_size) p2 = mock.patch('pandas.io.formats.format.get_terminal_size', From 34b464f728060b24c11d78daebd7c101d7aeaf78 Mon Sep 17 00:00:00 2001 From: HubertKl <39779339+HubertKl@users.noreply.github.com> Date: Sat, 3 Nov 2018 07:04:42 +0000 Subject: [PATCH 006/122] DOC: Updating the docstring of Series.dot (#22890) --- pandas/core/series.py | 45 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 6cc5acc4a61d0..cb8371ba086ba 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2063,16 +2063,53 @@ def autocorr(self, lag=1): def dot(self, other): """ - Matrix multiplication with DataFrame or inner-product with Series - objects. Can also be called using `self @ other` in Python >= 3.5. + Compute the dot product between the Series and the columns of other. + + This method computes the dot product between the Series and another + one, or the Series and each columns of a DataFrame, or the Series and + each columns of an array. + + It can also be called using `self @ other` in Python >= 3.5. Parameters ---------- - other : Series or DataFrame + other : Series, DataFrame or array-like + The other object to compute the dot product with its columns. Returns ------- - dot_product : scalar or Series + scalar, Series or numpy.ndarray + Return the dot product of the Series and other if other is a + Series, the Series of the dot product of Series and each rows of + other if other is a DataFrame or a numpy.ndarray between the Series + and each columns of the numpy array. + + See Also + -------- + DataFrame.dot: Compute the matrix product with the DataFrame. + Series.mul: Multiplication of series and other, element-wise. + + Notes + ----- + The Series and other has to share the same index if other is a Series + or a DataFrame. + + Examples + -------- + >>> s = pd.Series([0, 1, 2, 3]) + >>> other = pd.Series([-1, 2, -3, 4]) + >>> s.dot(other) + 8 + >>> s @ other + 8 + >>> df = pd.DataFrame([[0 ,1], [-2, 3], [4, -5], [6, 7]]) + >>> s.dot(df) + 0 24 + 1 14 + dtype: int64 + >>> arr = np.array([[0, 1], [-2, 3], [4, -5], [6, 7]]) + >>> s.dot(arr) + array([24, 14]) """ from pandas.core.frame import DataFrame if isinstance(other, (Series, DataFrame)): From 4629504e245c92c3710698eb6152355a4b056480 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Sat, 3 Nov 2018 13:09:46 +0000 Subject: [PATCH 007/122] Fixing flake8 problems new to flake8 3.6.0 (#23472) --- doc/source/conf.py | 6 +++--- scripts/find_commits_touching_func.py | 6 +++--- scripts/find_undoc_args.py | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/doc/source/conf.py b/doc/source/conf.py index 29f947e1144ea..e8d87d4c8368c 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -99,7 +99,7 @@ # JP: added from sphinxdocs autosummary_generate = False -if any(re.match("\s*api\s*", l) for l in index_rst_lines): +if any(re.match(r"\s*api\s*", l) for l in index_rst_lines): autosummary_generate = True # numpydoc @@ -341,8 +341,8 @@ # file, target name, title, author, documentclass [howto/manual]). latex_documents = [ ('index', 'pandas.tex', - u'pandas: powerful Python data analysis toolkit', - u'Wes McKinney\n\& PyData Development Team', 'manual'), + 'pandas: powerful Python data analysis toolkit', + r'Wes McKinney\n\& PyData Development Team', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of diff --git a/scripts/find_commits_touching_func.py b/scripts/find_commits_touching_func.py index e144f5187ac9f..a4583155b1bde 100755 --- a/scripts/find_commits_touching_func.py +++ b/scripts/find_commits_touching_func.py @@ -31,7 +31,7 @@ argparser.add_argument('funcname', metavar='FUNCNAME', help='Name of function/method to search for changes on') argparser.add_argument('-f', '--file-masks', metavar='f_re(,f_re)*', - default=["\.py.?$"], + default=[r"\.py.?$"], help='comma separated list of regexes to match ' 'filenames against\ndefaults all .py? files') argparser.add_argument('-d', '--dir-masks', metavar='d_re(,d_re)*', @@ -80,7 +80,7 @@ def get_hits(defname, files=()): try: r = sh.git('blame', '-L', - '/def\s*{start}/,/def/'.format(start=defname), + r'/def\s*{start}/,/def/'.format(start=defname), f, _tty_out=False) except sh.ErrorReturnCode_128: @@ -89,7 +89,7 @@ def get_hits(defname, files=()): lines = r.strip().splitlines()[:-1] # remove comment lines - lines = [x for x in lines if not re.search("^\w+\s*\(.+\)\s*#", x)] + lines = [x for x in lines if not re.search(r"^\w+\s*\(.+\)\s*#", x)] hits = set(map(lambda x: x.split(" ")[0], lines)) cs.update({Hit(commit=c, path=f) for c in hits}) diff --git a/scripts/find_undoc_args.py b/scripts/find_undoc_args.py index a135c8e5171a1..ea9541bfaed3a 100755 --- a/scripts/find_undoc_args.py +++ b/scripts/find_undoc_args.py @@ -65,10 +65,10 @@ def build_loc(f): sig_names = set(inspect.getargspec(f).args) # XXX numpydoc can be used to get the list of parameters doc = f.__doc__.lower() - doc = re.split('^\s*parameters\s*', doc, 1, re.M)[-1] - doc = re.split('^\s*returns*', doc, 1, re.M)[0] + doc = re.split(r'^\s*parameters\s*', doc, 1, re.M)[-1] + doc = re.split(r'^\s*returns*', doc, 1, re.M)[0] doc_names = {x.split(":")[0].strip() for x in doc.split('\n') - if re.match('\s+[\w_]+\s*:', x)} + if re.match(r'\s+[\w_]+\s*:', x)} sig_names.discard('self') doc_names.discard('kwds') doc_names.discard('kwargs') From 696e8c7f00f6e4cc85e90f79d1c2eef82860335b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 3 Nov 2018 06:12:49 -0700 Subject: [PATCH 008/122] strictness and checks for Timedelta _simple_new (#23433) --- pandas/core/arrays/timedeltas.py | 30 ++++++++++++++++-------------- pandas/core/indexes/timedeltas.py | 29 +++++++++++++++++++++++++++-- 2 files changed, 43 insertions(+), 16 deletions(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 397297c1b88d0..9653121879c0d 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -11,7 +11,7 @@ from pandas import compat from pandas.core.dtypes.common import ( - _TD_DTYPE, ensure_int64, is_timedelta64_dtype, is_list_like) + _TD_DTYPE, is_list_like) from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.missing import isna @@ -111,16 +111,16 @@ def dtype(self): _attributes = ["freq"] @classmethod - def _simple_new(cls, values, freq=None, **kwargs): - values = np.array(values, copy=False) - if values.dtype == np.object_: - values = array_to_timedelta64(values) - if values.dtype != _TD_DTYPE: - if is_timedelta64_dtype(values): - # non-nano unit - values = values.astype(_TD_DTYPE) - else: - values = ensure_int64(values).view(_TD_DTYPE) + def _simple_new(cls, values, freq=None, dtype=_TD_DTYPE): + # `dtype` is passed by _shallow_copy in corner cases, should always + # be timedelta64[ns] if present + assert dtype == _TD_DTYPE + assert isinstance(values, np.ndarray), type(values) + + if values.dtype == 'i8': + values = values.view('m8[ns]') + + assert values.dtype == 'm8[ns]' result = object.__new__(cls) result._data = values @@ -131,6 +131,10 @@ def __new__(cls, values, freq=None): freq, freq_infer = dtl.maybe_infer_freq(freq) + values = np.array(values, copy=False) + if values.dtype == np.object_: + values = array_to_timedelta64(values) + result = cls._simple_new(values, freq=freq) if freq_infer: inferred = result.inferred_freq @@ -166,17 +170,15 @@ def _generate_range(cls, start, end, periods, freq, closed=None): if freq is not None: index = _generate_regular_range(start, end, periods, freq) - index = cls._simple_new(index, freq=freq) else: index = np.linspace(start.value, end.value, periods).astype('i8') - index = cls._simple_new(index, freq=freq) if not left_closed: index = index[1:] if not right_closed: index = index[:-1] - return index + return cls._simple_new(index, freq=freq) # ---------------------------------------------------------------- # Arithmetic Methods diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index e5da21478d0a4..22ecefae8cbe2 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -35,6 +35,7 @@ to_timedelta, _coerce_scalar_to_timedelta_type) from pandas._libs import (lib, index as libindex, join as libjoin, Timedelta, NaT) +from pandas._libs.tslibs.timedeltas import array_to_timedelta64 class TimedeltaIndex(TimedeltaArrayMixin, DatetimeIndexOpsMixin, @@ -166,6 +167,19 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, elif copy: data = np.array(data, copy=True) + data = np.array(data, copy=False) + if data.dtype == np.object_: + data = array_to_timedelta64(data) + if data.dtype != _TD_DTYPE: + if is_timedelta64_dtype(data): + # non-nano unit + # TODO: watch out for overflows + data = data.astype(_TD_DTYPE) + else: + data = ensure_int64(data).view(_TD_DTYPE) + + assert data.dtype == 'm8[ns]', data.dtype + subarr = cls._simple_new(data, name=name, freq=freq) # check that we are matching freqs if verify_integrity and len(subarr) > 0: @@ -180,12 +194,23 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, return subarr @classmethod - def _simple_new(cls, values, name=None, freq=None, **kwargs): - result = super(TimedeltaIndex, cls)._simple_new(values, freq, **kwargs) + def _simple_new(cls, values, name=None, freq=None, dtype=_TD_DTYPE): + # `dtype` is passed by _shallow_copy in corner cases, should always + # be timedelta64[ns] if present + assert dtype == _TD_DTYPE + + assert isinstance(values, np.ndarray), type(values) + if values.dtype == 'i8': + values = values.view('m8[ns]') + assert values.dtype == 'm8[ns]', values.dtype + + result = super(TimedeltaIndex, cls)._simple_new(values, freq) result.name = name result._reset_identity() return result + _shallow_copy = Index._shallow_copy + @property def _formatter_func(self): from pandas.io.formats.format import _get_format_timedelta64 From 3faf1a909ea838f3a3e09eb29349b27f718cb0bf Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 3 Nov 2018 06:24:18 -0700 Subject: [PATCH 009/122] REF: cython cleanup, typing, optimizations (#23464) * Easy bits of #23382 * Easy parts of #23368 --- pandas/_libs/algos.pyx | 4 +- pandas/_libs/groupby.pyx | 2 +- pandas/_libs/groupby_helper.pxi.in | 20 ----- pandas/_libs/hashtable_class_helper.pxi.in | 26 +++---- pandas/_libs/hashtable_func_helper.pxi.in | 22 +++--- pandas/_libs/join.pyx | 4 +- pandas/_libs/lib.pyx | 90 ++++++++++++---------- pandas/_libs/tslibs/ccalendar.pyx | 2 +- pandas/_libs/tslibs/conversion.pyx | 11 ++- pandas/_libs/tslibs/fields.pyx | 36 ++++----- pandas/_libs/tslibs/offsets.pyx | 7 +- pandas/_libs/tslibs/timedeltas.pyx | 5 ++ pandas/_libs/window.pyx | 4 +- 13 files changed, 117 insertions(+), 116 deletions(-) diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 3ba4c2375b4e8..02815dce156fb 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -77,6 +77,8 @@ class NegInfinity(object): __ge__ = lambda self, other: isinstance(other, NegInfinity) +@cython.wraparound(False) +@cython.boundscheck(False) cpdef ndarray[int64_t, ndim=1] unique_deltas(ndarray[int64_t] arr): """ Efficiently find the unique first-differences of the given array. @@ -793,7 +795,7 @@ arrmap_bool = arrmap["uint8_t"] @cython.boundscheck(False) @cython.wraparound(False) -def is_monotonic(ndarray[algos_t] arr, bint timelike): +def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike): """ Returns ------- diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index d683c93c9b32e..2894e014b84b8 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -353,7 +353,7 @@ def group_any_all(ndarray[uint8_t] out, The returned values will either be 0 or 1 (False or True, respectively). """ cdef: - Py_ssize_t i, N=len(labels) + Py_ssize_t i, N = len(labels) int64_t lab uint8_t flag_val diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in index 335c8ee5c2340..315cfea56896e 100644 --- a/pandas/_libs/groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -667,11 +667,6 @@ def group_max(ndarray[groupby_t, ndim=2] out, out[i, j] = maxx[i, j] -group_max_float64 = group_max["float64_t"] -group_max_float32 = group_max["float32_t"] -group_max_int64 = group_max["int64_t"] - - @cython.wraparound(False) @cython.boundscheck(False) def group_min(ndarray[groupby_t, ndim=2] out, @@ -734,11 +729,6 @@ def group_min(ndarray[groupby_t, ndim=2] out, out[i, j] = minx[i, j] -group_min_float64 = group_min["float64_t"] -group_min_float32 = group_min["float32_t"] -group_min_int64 = group_min["int64_t"] - - @cython.boundscheck(False) @cython.wraparound(False) def group_cummin(ndarray[groupby_t, ndim=2] out, @@ -787,11 +777,6 @@ def group_cummin(ndarray[groupby_t, ndim=2] out, out[i, j] = mval -group_cummin_float64 = group_cummin["float64_t"] -group_cummin_float32 = group_cummin["float32_t"] -group_cummin_int64 = group_cummin["int64_t"] - - @cython.boundscheck(False) @cython.wraparound(False) def group_cummax(ndarray[groupby_t, ndim=2] out, @@ -837,8 +822,3 @@ def group_cummax(ndarray[groupby_t, ndim=2] out, if val > mval: accum[lab, j] = mval = val out[i, j] = mval - - -group_cummax_float64 = group_cummax["float64_t"] -group_cummax_float32 = group_cummax["float32_t"] -group_cummax_int64 = group_cummax["int64_t"] diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 27758234c0cf1..1fdd8e3b1987f 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -86,12 +86,12 @@ cdef class {{name}}Vector: self.data.n = 0 self.data.m = _INIT_VEC_CAP self.ao = np.empty(self.data.m, dtype={{idtype}}) - self.data.data = <{{arg}}*> self.ao.data + self.data.data = <{{arg}}*>self.ao.data cdef resize(self): self.data.m = max(self.data.m * 4, _INIT_VEC_CAP) self.ao.resize(self.data.m, refcheck=False) - self.data.data = <{{arg}}*> self.ao.data + self.data.data = <{{arg}}*>self.ao.data def __dealloc__(self): if self.data is not NULL: @@ -140,7 +140,7 @@ cdef class StringVector: self.external_view_exists = False self.data.n = 0 self.data.m = _INIT_VEC_CAP - self.data.data = malloc(self.data.m * sizeof(char *)) + self.data.data = malloc(self.data.m * sizeof(char *)) if not self.data.data: raise MemoryError() @@ -153,7 +153,7 @@ cdef class StringVector: self.data.m = max(self.data.m * 4, _INIT_VEC_CAP) orig_data = self.data.data - self.data.data = malloc(self.data.m * sizeof(char *)) + self.data.data = malloc(self.data.m * sizeof(char *)) if not self.data.data: raise MemoryError() for i in range(m): @@ -208,22 +208,22 @@ cdef class ObjectVector: self.n = 0 self.m = _INIT_VEC_CAP self.ao = np.empty(_INIT_VEC_CAP, dtype=object) - self.data = self.ao.data + self.data = self.ao.data def __len__(self): return self.n - cdef inline append(self, object o): + cdef inline append(self, object obj): if self.n == self.m: if self.external_view_exists: raise ValueError("external reference but " "Vector.resize() needed") self.m = max(self.m * 2, _INIT_VEC_CAP) self.ao.resize(self.m, refcheck=False) - self.data = self.ao.data + self.data = self.ao.data - Py_INCREF(o) - self.data[self.n] = o + Py_INCREF(obj) + self.data[self.n] = obj self.n += 1 def to_array(self): @@ -768,7 +768,7 @@ cdef class StringHashTable(HashTable): use_na_value = na_value is not None # assign pointers and pre-filter out missing - vecs = malloc(n * sizeof(char *)) + vecs = malloc(n * sizeof(char *)) for i in range(n): val = values[i] @@ -844,9 +844,9 @@ cdef class PyObjectHashTable(HashTable): def sizeof(self, deep=False): """ return the size of my table in bytes """ - return self.table.n_buckets * (sizeof(PyObject *) + # keys - sizeof(Py_ssize_t) + # vals - sizeof(uint32_t)) # flags + return self.table.n_buckets * (sizeof(PyObject *) + # keys + sizeof(Py_ssize_t) + # vals + sizeof(uint32_t)) # flags cpdef get_item(self, object val): cdef khiter_t k diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in index 801c67832d8b9..80d864c65d087 100644 --- a/pandas/_libs/hashtable_func_helper.pxi.in +++ b/pandas/_libs/hashtable_func_helper.pxi.in @@ -45,11 +45,11 @@ cdef build_count_table_{{dtype}}({{dtype}}_t[:] values, val = values[i] if not checknull(val) or not dropna: - k = kh_get_{{ttype}}(table, val) + k = kh_get_{{ttype}}(table, val) if k != table.n_buckets: table.vals[k] += 1 else: - k = kh_put_{{ttype}}(table, val, &ret) + k = kh_put_{{ttype}}(table, val, &ret) table.vals[k] = 1 {{else}} with nogil: @@ -103,7 +103,7 @@ cpdef value_count_{{dtype}}({{scalar}}[:] values, bint dropna): {{if dtype == 'object'}} for k in range(table.n_buckets): if kh_exist_{{ttype}}(table, k): - result_keys[i] = <{{dtype}}> table.keys[k] + result_keys[i] = <{{dtype}}>table.keys[k] result_counts[i] = table.vals[k] i += 1 {{else}} @@ -152,7 +152,7 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'): if keep == 'last': {{if dtype == 'object'}} for i from n > i >= 0: - kh_put_{{ttype}}(table, values[i], &ret) + kh_put_{{ttype}}(table, values[i], &ret) out[i] = ret == 0 {{else}} with nogil: @@ -163,7 +163,7 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'): elif keep == 'first': {{if dtype == 'object'}} for i in range(n): - kh_put_{{ttype}}(table, values[i], &ret) + kh_put_{{ttype}}(table, values[i], &ret) out[i] = ret == 0 {{else}} with nogil: @@ -175,13 +175,13 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'): {{if dtype == 'object'}} for i in range(n): value = values[i] - k = kh_get_{{ttype}}(table, value) + k = kh_get_{{ttype}}(table, value) if k != table.n_buckets: out[table.vals[k]] = 1 out[i] = 1 else: - k = kh_put_{{ttype}}(table, value, &ret) - table.keys[k] = value + k = kh_put_{{ttype}}(table, value, &ret) + table.keys[k] = value table.vals[k] = i out[i] = 0 {{else}} @@ -245,7 +245,7 @@ def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values): {{if dtype == 'object'}} for i in range(n): - kh_put_{{ttype}}(table, values[i], &ret) + kh_put_{{ttype}}(table, values[i], &ret) {{else}} with nogil: for i in range(n): @@ -259,7 +259,7 @@ def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values): {{if dtype == 'object'}} for i in range(n): val = arr[i] - k = kh_get_{{ttype}}(table, val) + k = kh_get_{{ttype}}(table, val) result[i] = (k != table.n_buckets) {{else}} with nogil: @@ -342,7 +342,7 @@ def mode_{{dtype}}({{ctype}}[:] values, bint dropna): else: continue - modes[j] = table.keys[k] + modes[j] = table.keys[k] {{endif}} kh_destroy_{{table_type}}(table) diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index c6afeda6a37dc..c92e0a4a7aa23 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -cimport cython -from cython cimport Py_ssize_t +import cython +from cython import Py_ssize_t import numpy as np cimport numpy as cnp diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 6ec9a7e93bc55..c57dd66a33fe0 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -18,7 +18,7 @@ PyDateTime_IMPORT import numpy as np cimport numpy as cnp -from numpy cimport (ndarray, PyArray_NDIM, PyArray_GETITEM, +from numpy cimport (ndarray, PyArray_GETITEM, PyArray_ITER_DATA, PyArray_ITER_NEXT, PyArray_IterNew, flatiter, NPY_OBJECT, int64_t, @@ -74,9 +74,9 @@ cdef bint PY2 = sys.version_info[0] == 2 cdef double nan = np.NaN -def values_from_object(object obj): +def values_from_object(obj: object): """ return my values or the object if we are say an ndarray """ - cdef func # TODO: Does declaring this without a type accomplish anything? + func: object func = getattr(obj, 'get_values', None) if func is not None: @@ -195,7 +195,7 @@ def fast_unique_multiple(list arrays): @cython.wraparound(False) @cython.boundscheck(False) -def fast_unique_multiple_list(list lists, bint sort=True): +def fast_unique_multiple_list(lists: list, sort: bool=True) -> list: cdef: list buf Py_ssize_t k = len(lists) @@ -263,7 +263,7 @@ def fast_unique_multiple_list_gen(object gen, bint sort=True): @cython.wraparound(False) @cython.boundscheck(False) -def dicts_to_array(list dicts, list columns): +def dicts_to_array(dicts: list, columns: list): cdef: Py_ssize_t i, j, k, n ndarray[object, ndim=2] result @@ -356,7 +356,9 @@ def get_reverse_indexer(ndarray[int64_t] indexer, Py_ssize_t length): return rev_indexer -def has_infs_f4(ndarray[float32_t] arr) -> bint: +@cython.wraparound(False) +@cython.boundscheck(False) +def has_infs_f4(ndarray[float32_t] arr) -> bool: cdef: Py_ssize_t i, n = len(arr) float32_t inf, neginf, val @@ -371,7 +373,9 @@ def has_infs_f4(ndarray[float32_t] arr) -> bint: return False -def has_infs_f8(ndarray[float64_t] arr) -> bint: +@cython.wraparound(False) +@cython.boundscheck(False) +def has_infs_f8(ndarray[float64_t] arr) -> bool: cdef: Py_ssize_t i, n = len(arr) float64_t inf, neginf, val @@ -423,6 +427,8 @@ def maybe_indices_to_slice(ndarray[int64_t] indices, int max_len): return slice(vstart, vlast - 1, k) +@cython.wraparound(False) +@cython.boundscheck(False) def maybe_booleans_to_slice(ndarray[uint8_t] mask): cdef: Py_ssize_t i, n = len(mask) @@ -454,7 +460,7 @@ def maybe_booleans_to_slice(ndarray[uint8_t] mask): @cython.wraparound(False) @cython.boundscheck(False) -def array_equivalent_object(left: object[:], right: object[:]) -> bint: +def array_equivalent_object(left: object[:], right: object[:]) -> bool: """ perform an element by element comparion on 1-d object arrays taking into account nan positions """ cdef: @@ -478,7 +484,7 @@ def array_equivalent_object(left: object[:], right: object[:]) -> bint: def astype_intsafe(ndarray[object] arr, new_dtype): cdef: Py_ssize_t i, n = len(arr) - object v + object val bint is_datelike ndarray result @@ -487,19 +493,18 @@ def astype_intsafe(ndarray[object] arr, new_dtype): result = np.empty(n, dtype=new_dtype) for i in range(n): - v = arr[i] - if is_datelike and checknull(v): + val = arr[i] + if is_datelike and checknull(val): result[i] = NPY_NAT else: - result[i] = v + result[i] = val return result @cython.wraparound(False) @cython.boundscheck(False) -def astype_unicode(arr: ndarray, - skipna: bool=False) -> ndarray[object]: +def astype_unicode(arr: ndarray, skipna: bool=False) -> ndarray[object]: """ Convert all elements in an array to unicode. @@ -534,8 +539,7 @@ def astype_unicode(arr: ndarray, @cython.wraparound(False) @cython.boundscheck(False) -def astype_str(arr: ndarray, - skipna: bool=False) -> ndarray[object]: +def astype_str(arr: ndarray, skipna: bool=False) -> ndarray[object]: """ Convert all elements in an array to string. @@ -570,19 +574,19 @@ def astype_str(arr: ndarray, @cython.wraparound(False) @cython.boundscheck(False) -def clean_index_list(list obj): +def clean_index_list(obj: list): """ Utility used in pandas.core.index.ensure_index """ cdef: Py_ssize_t i, n = len(obj) - object v + object val bint all_arrays = 1 for i in range(n): - v = obj[i] - if not (isinstance(v, list) or - util.is_array(v) or hasattr(v, '_data')): + val = obj[i] + if not (isinstance(val, list) or + util.is_array(val) or hasattr(val, '_data')): all_arrays = 0 break @@ -594,7 +598,7 @@ def clean_index_list(list obj): if inferred in ['string', 'bytes', 'unicode', 'mixed', 'mixed-integer']: return np.asarray(obj, dtype=object), 0 elif inferred in ['integer']: - # TODO: we infer an integer but it *could* be a unint64 + # TODO: we infer an integer but it *could* be a uint64 try: return np.asarray(obj, dtype='int64'), 0 except OverflowError: @@ -680,7 +684,7 @@ def row_bool_subset(ndarray[float64_t, ndim=2] values, Py_ssize_t i, j, n, k, pos = 0 ndarray[float64_t, ndim=2] out - n, k = ( values).shape + n, k = (values).shape assert (n == len(mask)) out = np.empty((mask.sum(), k), dtype=np.float64) @@ -702,7 +706,7 @@ def row_bool_subset_object(ndarray[object, ndim=2] values, Py_ssize_t i, j, n, k, pos = 0 ndarray[object, ndim=2] out - n, k = ( values).shape + n, k = (values).shape assert (n == len(mask)) out = np.empty((mask.sum(), k), dtype=object) @@ -750,7 +754,7 @@ def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask, ndarray[int64_t, ndim=2] counts assert (axis == 0 or axis == 1) - n, k = ( mask).shape + n, k = (mask).shape if axis == 0: counts = np.zeros((max_bin, k), dtype='i8') @@ -841,19 +845,19 @@ def indices_fast(object index, ndarray[int64_t] labels, list keys, # core.common import for fast inference checks -def is_float(obj: object) -> bint: +def is_float(obj: object) -> bool: return util.is_float_object(obj) -def is_integer(obj: object) -> bint: +def is_integer(obj: object) -> bool: return util.is_integer_object(obj) -def is_bool(obj: object) -> bint: +def is_bool(obj: object) -> bool: return util.is_bool_object(obj) -def is_complex(obj: object) -> bint: +def is_complex(obj: object) -> bool: return util.is_complex_object(obj) @@ -865,7 +869,7 @@ cpdef bint is_interval(object obj): return getattr(obj, '_typ', '_typ') == 'interval' -def is_period(val: object) -> bint: +def is_period(val: object) -> bool: """ Return a boolean if this is a Period object """ return util.is_period_object(val) @@ -1046,7 +1050,7 @@ cdef _try_infer_map(v): return None -def infer_dtype(object value, bint skipna=False): +def infer_dtype(value: object, skipna: bool=False) -> str: """ Efficiently infer the type of a passed val, or list-like array of values. Return a string describing the type. @@ -1347,7 +1351,7 @@ def infer_datetimelike_array(arr: object) -> object: seen_datetime = 1 elif PyDate_Check(v): seen_date = 1 - elif is_timedelta(v) or util.is_timedelta64_object(v): + elif is_timedelta(v): # timedelta, or timedelta64 seen_timedelta = 1 else: @@ -1626,7 +1630,7 @@ cpdef bint is_datetime64_array(ndarray values): return validator.validate(values) -def is_datetime_with_singletz_array(values: ndarray) -> bint: +def is_datetime_with_singletz_array(values: ndarray) -> bool: """ Check values have the same tzinfo attribute. Doesn't check values are datetime-like types. @@ -2110,6 +2114,8 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, return objects +@cython.boundscheck(False) +@cython.wraparound(False) def map_infer_mask(ndarray arr, object f, ndarray[uint8_t] mask, bint convert=1): """ @@ -2133,11 +2139,11 @@ def map_infer_mask(ndarray arr, object f, ndarray[uint8_t] mask, result = np.empty(n, dtype=object) for i in range(n): if mask[i]: - val = util.get_value_at(arr, i) + val = arr[i] else: - val = f(util.get_value_at(arr, i)) + val = f(arr[i]) - if util.is_array(val) and PyArray_NDIM(val) == 0: + if cnp.PyArray_IsZeroDim(val): # unbox 0-dim arrays, GH#690 # TODO: is there a faster way to unbox? # item_from_zerodim? @@ -2154,6 +2160,8 @@ def map_infer_mask(ndarray arr, object f, ndarray[uint8_t] mask, return result +@cython.boundscheck(False) +@cython.wraparound(False) def map_infer(ndarray arr, object f, bint convert=1): """ Substitute for np.vectorize with pandas-friendly dtype inference @@ -2175,9 +2183,9 @@ def map_infer(ndarray arr, object f, bint convert=1): n = len(arr) result = np.empty(n, dtype=object) for i in range(n): - val = f(util.get_value_at(arr, i)) + val = f(arr[i]) - if util.is_array(val) and PyArray_NDIM(val) == 0: + if cnp.PyArray_IsZeroDim(val): # unbox 0-dim arrays, GH#690 # TODO: is there a faster way to unbox? # item_from_zerodim? @@ -2194,7 +2202,7 @@ def map_infer(ndarray arr, object f, bint convert=1): return result -def to_object_array(list rows, int min_width=0): +def to_object_array(rows: list, min_width: int=0): """ Convert a list of lists into an object array. @@ -2253,7 +2261,7 @@ def tuples_to_object_array(ndarray[object] tuples): return result -def to_object_array_tuples(list rows): +def to_object_array_tuples(rows: list): cdef: Py_ssize_t i, j, n, k, tmp ndarray[object, ndim=2] result @@ -2284,6 +2292,8 @@ def to_object_array_tuples(list rows): return result +@cython.wraparound(False) +@cython.boundscheck(False) def fast_multiget(dict mapping, ndarray keys, default=np.nan): cdef: Py_ssize_t i, n = len(keys) diff --git a/pandas/_libs/tslibs/ccalendar.pyx b/pandas/_libs/tslibs/ccalendar.pyx index 7d58b43e5d460..07c146c06b510 100644 --- a/pandas/_libs/tslibs/ccalendar.pyx +++ b/pandas/_libs/tslibs/ccalendar.pyx @@ -54,7 +54,7 @@ weekday_to_int = {int_to_weekday[key]: key for key in int_to_weekday} @cython.wraparound(False) @cython.boundscheck(False) -cpdef inline int32_t get_days_in_month(int year, Py_ssize_t month) nogil: +cpdef int32_t get_days_in_month(int year, Py_ssize_t month) nogil: """Return the number of days in the given month of the given year. Parameters diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index d91283deb9fc7..d199997d2e9fe 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -75,7 +75,7 @@ cdef inline int64_t get_datetime64_nanos(object val) except? -1: @cython.boundscheck(False) @cython.wraparound(False) -def ensure_datetime64ns(ndarray arr, copy=True): +def ensure_datetime64ns(arr: ndarray, copy: bool=True): """ Ensure a np.datetime64 array has dtype specifically 'datetime64[ns]' @@ -122,7 +122,7 @@ def ensure_datetime64ns(ndarray arr, copy=True): return result -def ensure_timedelta64ns(ndarray arr, copy=True): +def ensure_timedelta64ns(arr: ndarray, copy: bool=True): """ Ensure a np.timedelta64 array has dtype specifically 'timedelta64[ns]' @@ -137,11 +137,12 @@ def ensure_timedelta64ns(ndarray arr, copy=True): """ return arr.astype(TD_DTYPE, copy=copy) + # TODO: check for overflows when going from a lower-resolution to nanos @cython.boundscheck(False) @cython.wraparound(False) -def datetime_to_datetime64(object[:] values): +def datetime_to_datetime64(values: object[:]): """ Convert ndarray of datetime-like objects to int64 array representing nanosecond timestamps. @@ -614,6 +615,8 @@ cpdef inline datetime localize_pydatetime(datetime dt, object tz): # ---------------------------------------------------------------------- # Timezone Conversion +@cython.boundscheck(False) +@cython.wraparound(False) cdef inline int64_t[:] _tz_convert_dst(int64_t[:] values, tzinfo tz, bint to_utc=True): """ @@ -1220,7 +1223,7 @@ cdef inline int64_t _normalized_stamp(npy_datetimestruct *dts) nogil: @cython.wraparound(False) @cython.boundscheck(False) -def is_date_array_normalized(int64_t[:] stamps, tz=None): +def is_date_array_normalized(int64_t[:] stamps, object tz=None): """ Check if all of the given (nanosecond) timestamps are normalized to midnight, i.e. hour == minute == second == 0. If the optional timezone diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 216128cae2002..4c0af69d72517 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -114,7 +114,7 @@ def get_date_name_field(int64_t[:] dtindex, object field, object locale=None): dt64_to_dtstruct(dtindex[i], &dts) dow = dayofweek(dts.year, dts.month, dts.day) out[i] = names[dow].capitalize() - return out + elif field == 'month_name': if locale is None: names = np.array(MONTHS_FULL, dtype=np.object_) @@ -128,12 +128,15 @@ def get_date_name_field(int64_t[:] dtindex, object field, object locale=None): dt64_to_dtstruct(dtindex[i], &dts) out[i] = names[dts.month].capitalize() - return out - raise ValueError("Field %s not supported" % field) + else: + raise ValueError("Field {field} not supported".format(field=field)) + + return out @cython.wraparound(False) +@cython.boundscheck(False) def get_start_end_field(int64_t[:] dtindex, object field, object freqstr=None, int month_kw=12): """ @@ -163,8 +166,8 @@ def get_start_end_field(int64_t[:] dtindex, object field, if freqstr: if freqstr == 'C': - raise ValueError( - "Custom business days is not supported by %s" % field) + raise ValueError("Custom business days is not supported by {field}" + .format(field=field)) is_business = freqstr[0] == 'B' # YearBegin(), BYearBegin() use month = starting month of year. @@ -196,7 +199,7 @@ def get_start_end_field(int64_t[:] dtindex, object field, if (dom == 1 and dow < 5) or (dom <= 3 and dow == 0): out[i] = 1 - return out.view(bool) + else: for i in range(count): if dtindex[i] == NPY_NAT: @@ -208,7 +211,6 @@ def get_start_end_field(int64_t[:] dtindex, object field, if dom == 1: out[i] = 1 - return out.view(bool) elif field == 'is_month_end': if is_business: @@ -228,7 +230,7 @@ def get_start_end_field(int64_t[:] dtindex, object field, if (ldom == doy and dow < 5) or ( dow == 4 and (ldom - doy <= 2)): out[i] = 1 - return out.view(bool) + else: for i in range(count): if dtindex[i] == NPY_NAT: @@ -244,7 +246,6 @@ def get_start_end_field(int64_t[:] dtindex, object field, if ldom == doy: out[i] = 1 - return out.view(bool) elif field == 'is_quarter_start': if is_business: @@ -260,7 +261,7 @@ def get_start_end_field(int64_t[:] dtindex, object field, if ((dts.month - start_month) % 3 == 0) and ( (dom == 1 and dow < 5) or (dom <= 3 and dow == 0)): out[i] = 1 - return out.view(bool) + else: for i in range(count): if dtindex[i] == NPY_NAT: @@ -272,7 +273,6 @@ def get_start_end_field(int64_t[:] dtindex, object field, if ((dts.month - start_month) % 3 == 0) and dom == 1: out[i] = 1 - return out.view(bool) elif field == 'is_quarter_end': if is_business: @@ -293,7 +293,7 @@ def get_start_end_field(int64_t[:] dtindex, object field, (ldom == doy and dow < 5) or ( dow == 4 and (ldom - doy <= 2))): out[i] = 1 - return out.view(bool) + else: for i in range(count): if dtindex[i] == NPY_NAT: @@ -309,7 +309,6 @@ def get_start_end_field(int64_t[:] dtindex, object field, if ((dts.month - end_month) % 3 == 0) and (ldom == doy): out[i] = 1 - return out.view(bool) elif field == 'is_year_start': if is_business: @@ -325,7 +324,7 @@ def get_start_end_field(int64_t[:] dtindex, object field, if (dts.month == start_month) and ( (dom == 1 and dow < 5) or (dom <= 3 and dow == 0)): out[i] = 1 - return out.view(bool) + else: for i in range(count): if dtindex[i] == NPY_NAT: @@ -337,7 +336,6 @@ def get_start_end_field(int64_t[:] dtindex, object field, if (dts.month == start_month) and dom == 1: out[i] = 1 - return out.view(bool) elif field == 'is_year_end': if is_business: @@ -358,7 +356,7 @@ def get_start_end_field(int64_t[:] dtindex, object field, (ldom == doy and dow < 5) or ( dow == 4 and (ldom - doy <= 2))): out[i] = 1 - return out.view(bool) + else: for i in range(count): if dtindex[i] == NPY_NAT: @@ -374,9 +372,11 @@ def get_start_end_field(int64_t[:] dtindex, object field, if (dts.month == end_month) and (ldom == doy): out[i] = 1 - return out.view(bool) - raise ValueError("Field %s not supported" % field) + else: + raise ValueError("Field {field} not supported".format(field=field)) + + return out.view(bool) @cython.wraparound(False) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 528d30ddd7205..78e1269aa5363 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -816,7 +816,8 @@ def shift_months(int64_t[:] dtindex, int months, object day=None): return np.asarray(out) -cpdef datetime shift_month(datetime stamp, int months, object day_opt=None): +def shift_month(stamp: datetime, months: int, + day_opt: object=None) -> datetime: """ Given a datetime (or Timestamp) `stamp`, an integer `months` and an option `day_opt`, return a new datetimelike that many months later, @@ -946,8 +947,8 @@ cpdef int roll_convention(int other, int n, int compare) nogil: return n -cpdef int roll_qtrday(datetime other, int n, int month, object day_opt, - int modby=3) except? -1: +def roll_qtrday(other: datetime, n: int, month: int, + day_opt: object, modby: int=3) -> int: """ Possibly increment or decrement the number of periods to shift based on rollforward/rollbackward conventions. diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 74bbc64af25f2..fa965e2ca7c8c 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -6,6 +6,7 @@ import warnings import sys cdef bint PY3 = (sys.version_info[0] >= 3) +import cython from cython import Py_ssize_t from cpython cimport Py_NE, Py_EQ, PyObject_RichCompare @@ -82,6 +83,8 @@ _no_input = object() # ---------------------------------------------------------------------- # API +@cython.boundscheck(False) +@cython.wraparound(False) def ints_to_pytimedelta(int64_t[:] arr, box=False): """ convert an i8 repr to an ndarray of timedelta or Timedelta (if box == @@ -198,6 +201,8 @@ cpdef convert_to_timedelta64(object ts, object unit): return ts.astype('timedelta64[ns]') +@cython.boundscheck(False) +@cython.wraparound(False) def array_to_timedelta64(object[:] values, unit='ns', errors='raise'): """ Convert an ndarray to an array of timedeltas. If errors == 'coerce', diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index c7cfaab60b606..c4af4a6b35a37 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -1449,8 +1449,8 @@ def roll_quantile(ndarray[float64_t, cast=True] values, int64_t win, try: interpolation_type = interpolation_types[interpolation] except KeyError: - raise ValueError("Interpolation '{}' is not supported" - .format(interpolation)) + raise ValueError("Interpolation '{interp}' is not supported" + .format(interp=interpolation)) # we use the Fixed/Variable Indexer here as the # actual skiplist ops outweigh any window computation costs From 126edd9cb86f505b8bd418e7264bd5429b969ee2 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sat, 3 Nov 2018 06:56:52 -0700 Subject: [PATCH 010/122] ENH: Add FrozenList.union and .difference (#23394) Re-attempt of gh-15506. Closes gh-15475. --- doc/source/groupby.rst | 10 +++++++ doc/source/whatsnew/v0.24.0.txt | 5 ++-- pandas/core/indexes/frozen.py | 42 +++++++++++++++++++++++++---- pandas/tests/indexes/test_frozen.py | 23 +++++++++++++--- 4 files changed, 69 insertions(+), 11 deletions(-) diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index fecc336049a40..0a896bac0f2d7 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -125,6 +125,16 @@ We could naturally group by either the ``A`` or ``B`` columns, or both: grouped = df.groupby('A') grouped = df.groupby(['A', 'B']) +.. versionadded:: 0.24 + +If we also have a MultiIndex on columns ``A`` and ``B``, we can group by all +but the specified columns + +.. ipython:: python + + df2 = df.set_index(['A', 'B']) + grouped = df2.groupby(level=df2.index.names.difference(['B']) + These will split the DataFrame on its index (rows). We could also split by the columns: diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index a0d675e24ce89..05380ef2deb5f 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -13,10 +13,9 @@ v0.24.0 (Month XX, 2018) New features ~~~~~~~~~~~~ - :func:`merge` now directly allows merge between objects of type ``DataFrame`` and named ``Series``, without the need to convert the ``Series`` object into a ``DataFrame`` beforehand (:issue:`21220`) - - - ``ExcelWriter`` now accepts ``mode`` as a keyword argument, enabling append to existing workbooks when using the ``openpyxl`` engine (:issue:`3441`) - +- ``FrozenList`` has gained the ``.union()`` and ``.difference()`` methods. This functionality greatly simplifies groupby's that rely on explicitly excluding certain columns. See :ref:`Splitting an object into groups +` for more information (:issue:`15475`, :issue:`15506`) - :func:`DataFrame.to_parquet` now accepts ``index`` as an argument, allowing the user to override the engine's default behavior to include or omit the dataframe's indexes from the resulting Parquet file. (:issue:`20768`) diff --git a/pandas/core/indexes/frozen.py b/pandas/core/indexes/frozen.py index 4f782e22c2370..3ac4a2bf31a7e 100644 --- a/pandas/core/indexes/frozen.py +++ b/pandas/core/indexes/frozen.py @@ -23,15 +23,47 @@ class FrozenList(PandasObject, list): because it's technically non-hashable, will be used for lookups, appropriately, etc. """ - # Sidenote: This has to be of type list, otherwise it messes up PyTables - # typechecks + # Side note: This has to be of type list. Otherwise, + # it messes up PyTables type checks. - def __add__(self, other): + def union(self, other): + """ + Returns a FrozenList with other concatenated to the end of self. + + Parameters + ---------- + other : array-like + The array-like whose elements we are concatenating. + + Returns + ------- + diff : FrozenList + The collection difference between self and other. + """ if isinstance(other, tuple): other = list(other) - return self.__class__(super(FrozenList, self).__add__(other)) + return type(self)(super(FrozenList, self).__add__(other)) + + def difference(self, other): + """ + Returns a FrozenList with elements from other removed from self. + + Parameters + ---------- + other : array-like + The array-like whose elements we are removing self. + + Returns + ------- + diff : FrozenList + The collection difference between self and other. + """ + other = set(other) + temp = [x for x in self if x not in other] + return type(self)(temp) - __iadd__ = __add__ + # TODO: Consider deprecating these in favor of `union` (xref gh-15506) + __add__ = __iadd__ = union # Python 2 compat def __getslice__(self, i, j): diff --git a/pandas/tests/indexes/test_frozen.py b/pandas/tests/indexes/test_frozen.py index e62329dec9846..db9f875b77b8a 100644 --- a/pandas/tests/indexes/test_frozen.py +++ b/pandas/tests/indexes/test_frozen.py @@ -11,7 +11,7 @@ class TestFrozenList(CheckImmutable, CheckStringMixin): mutable_methods = ('extend', 'pop', 'remove', 'insert') unicode_container = FrozenList([u("\u05d0"), u("\u05d1"), "c"]) - def setup_method(self, method): + def setup_method(self, _): self.lst = [1, 2, 3, 4, 5] self.container = FrozenList(self.lst) self.klass = FrozenList @@ -25,13 +25,30 @@ def test_add(self): expected = FrozenList([1, 2, 3] + self.lst) self.check_result(result, expected) - def test_inplace(self): + def test_iadd(self): q = r = self.container + q += [5] self.check_result(q, self.lst + [5]) - # other shouldn't be mutated + + # Other shouldn't be mutated. self.check_result(r, self.lst) + def test_union(self): + result = self.container.union((1, 2, 3)) + expected = FrozenList(self.lst + [1, 2, 3]) + self.check_result(result, expected) + + def test_difference(self): + result = self.container.difference([2]) + expected = FrozenList([1, 3, 4, 5]) + self.check_result(result, expected) + + def test_difference_dupe(self): + result = FrozenList([1, 2, 3, 2]).difference([2]) + expected = FrozenList([1, 3]) + self.check_result(result, expected) + class TestFrozenNDArray(CheckImmutable, CheckStringMixin): mutable_methods = ('put', 'itemset', 'fill') From da08eeb403906cadc561178aeac6ccf9b38f0238 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sat, 3 Nov 2018 06:58:45 -0700 Subject: [PATCH 011/122] BUG: Allow freq conversion from dt64 to period (#23460) Closes #23438 --- pandas/core/arrays/period.py | 23 ++++++------------- pandas/tests/arrays/test_period.py | 9 +++++--- .../tests/indexes/period/test_construction.py | 15 ++++++++++++ 3 files changed, 28 insertions(+), 19 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 1bbad4b73953d..90e7beac63427 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -967,24 +967,15 @@ def dt64arr_to_periodarr(data, freq, tz=None): if data.dtype != np.dtype('M8[ns]'): raise ValueError('Wrong dtype: %s' % data.dtype) - if freq is not None: - freq = Period._maybe_convert_freq(freq) + if freq is None: + if isinstance(data, ABCIndexClass): + data, freq = data._values, data.freq + elif isinstance(data, ABCSeries): + data, freq = data._values, data.dt.freq - if isinstance(data, ABCIndexClass): - if freq is None: - freq = data.freq - elif freq != data.freq: - msg = DIFFERENT_FREQ_INDEX.format(freq.freqstr, data.freq.freqstr) - raise IncompatibleFrequency(msg) - data = data._values + freq = Period._maybe_convert_freq(freq) - elif isinstance(data, ABCSeries): - if freq is None: - freq = data.dt.freq - elif freq != data.dt.freq: - msg = DIFFERENT_FREQ_INDEX.format(freq.freqstr, - data.dt.freq.freqstr) - raise IncompatibleFrequency(msg) + if isinstance(data, (ABCIndexClass, ABCSeries)): data = data._values base, mult = frequencies.get_freq_code(freq) diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index 40bdd4f1aaa5c..0125729048cdd 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -34,10 +34,13 @@ def test_period_array_ok(data, freq, expected): tm.assert_numpy_array_equal(result, expected) -def test_from_datetime64_raises(): +def test_from_datetime64_freq_changes(): + # https://github.com/pandas-dev/pandas/issues/23438 arr = pd.date_range("2017", periods=3, freq="D") - with tm.assert_raises_regex(IncompatibleFrequency, "freq"): - PeriodArray._from_datetime64(arr, freq="M") + result = PeriodArray._from_datetime64(arr, freq="M") + expected = period_array(['2017-01-01', '2017-01-01', '2017-01-01'], + freq="M") + tm.assert_period_array_equal(result, expected) @pytest.mark.parametrize("data, freq, msg", [ diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index 9622f47697f8d..fb74244d815c2 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -158,6 +158,21 @@ def test_constructor_datetime64arr(self): pytest.raises(ValueError, PeriodIndex, vals, freq='D') + @pytest.mark.parametrize('box', [None, 'series', 'index']) + def test_constructor_datetime64arr_ok(self, box): + # https://github.com/pandas-dev/pandas/issues/23438 + data = pd.date_range('2017', periods=4, freq="M") + if box is None: + data = data._values + elif box == 'series': + data = pd.Series(data) + + result = PeriodIndex(data, freq='D') + expected = PeriodIndex([ + '2017-01-31', '2017-02-28', '2017-03-31', '2017-04-30' + ], freq="D") + tm.assert_index_equal(result, expected) + def test_constructor_dtype(self): # passing a dtype with a tz should localize idx = PeriodIndex(['2013-01', '2013-03'], dtype='period[M]') From 29239ad6d543da33197b9adceffa053ae065bae8 Mon Sep 17 00:00:00 2001 From: hongshaoyang Date: Sat, 3 Nov 2018 22:33:17 +0800 Subject: [PATCH 012/122] add number of Errors, Warnings to scripts/validate_docstrings.py (#23150) --- scripts/validate_docstrings.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index 29d485550be40..4b1834adcaf33 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -610,11 +610,11 @@ def header(title, width=80, char='#'): fd.write('{}\n'.format(doc_info['docstring'])) fd.write(header('Validation')) if doc_info['errors']: - fd.write('Errors found:\n') + fd.write('{} Errors found:\n'.format(len(doc_info['errors']))) for err in doc_info['errors']: fd.write('\t{}\n'.format(err)) if doc_info['warnings']: - fd.write('Warnings found:\n') + fd.write('{} Warnings found:\n'.format(len(doc_info['warnings']))) for wrn in doc_info['warnings']: fd.write('\t{}\n'.format(wrn)) From 90be7b3b7ebd95876b3ab01fc13611d8b70f6764 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sat, 3 Nov 2018 09:36:24 -0500 Subject: [PATCH 013/122] DOC: Add cookbook entry for triangular correlation matrix (GH22840) (#23032) --- doc/source/cookbook.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst index be8457fc14a4f..21d1f11ba49ba 100644 --- a/doc/source/cookbook.rst +++ b/doc/source/cookbook.rst @@ -1226,6 +1226,17 @@ Computation Correlation *********** +Often it's useful to obtain the lower (or upper) triangular form of a correlation matrix calculated from :func:`DataFrame.corr`. This can be achieved by passing a boolean mask to ``where`` as follows: + +.. ipython:: python + + df = pd.DataFrame(np.random.random(size=(100, 5))) + + corr_mat = df.corr() + mask = np.tril(np.ones_like(corr_mat, dtype=np.bool), k=-1) + + corr_mat.where(mask) + The `method` argument within `DataFrame.corr` can accept a callable in addition to the named correlation types. Here we compute the `distance correlation `__ matrix for a `DataFrame` object. .. code-block:: python From defff22f93c3c699f5823292d6495b4b1b1475e3 Mon Sep 17 00:00:00 2001 From: topper-123 Date: Sat, 3 Nov 2018 14:40:08 +0000 Subject: [PATCH 014/122] CLN: doc string (#23469) --- pandas/core/generic.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6ca8f6731bbb8..d1e9f103477cc 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5279,7 +5279,9 @@ def astype(self, dtype, copy=True, errors='raise', **kwargs): Convert to ordered categorical type with custom ordering: - >>> ser.astype('category', ordered=True, categories=[2, 1]) + >>> cat_dtype = pd.api.types.CategoricalDtype( + ... categories=[2, 1], ordered=True) + >>> ser.astype(cat_dtype) 0 1 1 2 dtype: category From 7aed9e686f52d1df4025446f18c568c2bdf9face Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 3 Nov 2018 22:01:21 -0700 Subject: [PATCH 015/122] STYLE: Standardize cython spacing for casting, with linting (#23474) --- ci/code_checks.sh | 8 ++++++ pandas/_libs/algos.pyx | 10 +++---- pandas/_libs/algos_common_helper.pxi.in | 10 +++---- pandas/_libs/algos_rank_helper.pxi.in | 2 +- pandas/_libs/algos_take_helper.pxi.in | 2 +- pandas/_libs/groupby.pyx | 10 +++---- pandas/_libs/groupby_helper.pxi.in | 32 +++++++++++----------- pandas/_libs/hashtable_class_helper.pxi.in | 2 +- pandas/_libs/join.pyx | 2 +- pandas/_libs/lib.pyx | 8 +++--- pandas/_libs/missing.pyx | 6 ++-- pandas/_libs/parsers.pyx | 12 ++++---- pandas/_libs/properties.pyx | 2 +- pandas/_libs/reduction.pyx | 4 +-- pandas/_libs/skiplist.pyx | 6 ++-- pandas/_libs/sparse.pyx | 4 +-- pandas/_libs/tslibs/conversion.pyx | 2 +- pandas/_libs/tslibs/timedeltas.pyx | 8 +++--- pandas/_libs/window.pyx | 6 ++-- 19 files changed, 72 insertions(+), 64 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index b829cbefe8f7a..26b7eaca87a04 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -44,6 +44,14 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then flake8 pandas/_libs --filename=*.pxi.in,*.pxd --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403 RET=$(($RET + $?)) ; echo $MSG "DONE" + # Check that cython casting is of the form `obj` as opposed to ` obj`; + # it doesn't make a difference, but we want to be internally consistent. + # Note: this grep pattern is (intended to be) equivalent to the python + # regex r'(?])> ' + MSG='Linting .pyx code for spacing conventions in casting' ; echo $MSG + ! grep -r -E --include '*.pyx' --include '*.pxi.in' '> ' pandas/_libs | grep -v '[ ->]> ' + RET=$(($RET + $?)) ; echo $MSG "DONE" + # readability/casting: Warnings about C casting instead of C++ casting # runtime/int: Warnings about using C number types instead of C++ ones # build/include_subdir: Warnings about prefacing included header files with directory diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 02815dce156fb..24828db64c392 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -32,7 +32,7 @@ import missing cdef float64_t FP_ERR = 1e-13 -cdef double NaN = np.NaN +cdef double NaN = np.NaN cdef double nan = NaN cdef int64_t iNaT = get_nat() @@ -242,7 +242,7 @@ def nancorr(ndarray[float64_t, ndim=2] mat, bint cov=0, minp=None): int64_t nobs = 0 float64_t vx, vy, sumx, sumy, sumxx, sumyy, meanx, meany, divisor - N, K = ( mat).shape + N, K = (mat).shape if minp is None: minpv = 1 @@ -307,7 +307,7 @@ def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1): int64_t nobs = 0 float64_t vx, vy, sumx, sumxx, sumyy, mean, divisor - N, K = ( mat).shape + N, K = (mat).shape result = np.empty((K, K), dtype=np.float64) mask = np.isfinite(mat).view(np.uint8) @@ -531,7 +531,7 @@ def pad_2d_inplace(ndarray[algos_t, ndim=2] values, algos_t val int lim, fill_count = 0 - K, N = ( values).shape + K, N = (values).shape # GH#2778 if N == 0: @@ -730,7 +730,7 @@ def backfill_2d_inplace(ndarray[algos_t, ndim=2] values, algos_t val int lim, fill_count = 0 - K, N = ( values).shape + K, N = (values).shape # GH#2778 if N == 0: diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in index 2835c95c96575..c2b0a4119e6e5 100644 --- a/pandas/_libs/algos_common_helper.pxi.in +++ b/pandas/_libs/algos_common_helper.pxi.in @@ -34,7 +34,7 @@ def diff_2d_{{name}}(ndarray[{{c_type}}, ndim=2] arr, cdef: Py_ssize_t i, j, sx, sy - sx, sy = ( arr).shape + sx, sy = (arr).shape if arr.flags.f_contiguous: if axis == 0: if periods >= 0: @@ -88,14 +88,14 @@ def put2d_{{name}}_{{dest_name}}(ndarray[{{c_type}}, ndim=2, cast=True] values, # ensure_dtype #---------------------------------------------------------------------- -cdef int PLATFORM_INT = ( np.arange(0, dtype=np.intp)).descr.type_num +cdef int PLATFORM_INT = (np.arange(0, dtype=np.intp)).descr.type_num def ensure_platform_int(object arr): # GH3033, GH1392 # platform int is the size of the int pointer, e.g. np.intp if util.is_array(arr): - if ( arr).descr.type_num == PLATFORM_INT: + if (arr).descr.type_num == PLATFORM_INT: return arr else: return arr.astype(np.intp) @@ -105,7 +105,7 @@ def ensure_platform_int(object arr): def ensure_object(object arr): if util.is_array(arr): - if ( arr).descr.type_num == NPY_OBJECT: + if (arr).descr.type_num == NPY_OBJECT: return arr else: return arr.astype(np.object_) @@ -142,7 +142,7 @@ def get_dispatch(dtypes): def ensure_{{name}}(object arr, copy=True): if util.is_array(arr): - if ( arr).descr.type_num == NPY_{{c_type}}: + if (arr).descr.type_num == NPY_{{c_type}}: return arr else: return arr.astype(np.{{dtype}}, copy=copy) diff --git a/pandas/_libs/algos_rank_helper.pxi.in b/pandas/_libs/algos_rank_helper.pxi.in index bb4aec75ed567..e13f87d15aace 100644 --- a/pandas/_libs/algos_rank_helper.pxi.in +++ b/pandas/_libs/algos_rank_helper.pxi.in @@ -263,7 +263,7 @@ def rank_2d_{{dtype}}(object in_arr, axis=0, ties_method='average', np.putmask(values, mask, nan_value) {{endif}} - n, k = ( values).shape + n, k = (values).shape ranks = np.empty((n, k), dtype='f8') {{if dtype == 'object'}} diff --git a/pandas/_libs/algos_take_helper.pxi.in b/pandas/_libs/algos_take_helper.pxi.in index 358479c837d05..bd5feef1ff2b0 100644 --- a/pandas/_libs/algos_take_helper.pxi.in +++ b/pandas/_libs/algos_take_helper.pxi.in @@ -278,7 +278,7 @@ cdef _take_2d(ndarray[take_t, ndim=2] values, object idx): ndarray[take_t, ndim=2] result object val - N, K = ( values).shape + N, K = (values).shape if take_t is object: # evaluated at compile-time diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 2894e014b84b8..c72b4001dcb79 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -22,7 +22,7 @@ from algos import take_2d_axis1_float64_float64, groupsort_indexer, tiebreakers cdef int64_t iNaT = get_nat() -cdef double NaN = np.NaN +cdef double NaN = np.NaN cdef double nan = NaN @@ -115,7 +115,7 @@ def group_median_float64(ndarray[float64_t, ndim=2] out, assert min_count == -1, "'min_count' only used in add and prod" ngroups = len(counts) - N, K = ( values).shape + N, K = (values).shape indexer, _counts = groupsort_indexer(labels, ngroups) counts[:] = _counts[1:] @@ -152,7 +152,7 @@ def group_cumprod_float64(float64_t[:, :] out, float64_t[:, :] accum int64_t lab - N, K = ( values).shape + N, K = (values).shape accum = np.ones_like(values) with nogil: @@ -189,7 +189,7 @@ def group_cumsum(numeric[:, :] out, numeric[:, :] accum int64_t lab - N, K = ( values).shape + N, K = (values).shape accum = np.zeros_like(values) with nogil: @@ -226,7 +226,7 @@ def group_shift_indexer(ndarray[int64_t] out, ndarray[int64_t] labels, int64_t[:] label_seen = np.zeros(ngroups, dtype=np.int64) int64_t[:, :] label_indexer - N, = ( labels).shape + N, = (labels).shape if periods < 0: periods = -periods diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in index 315cfea56896e..0917453e3f864 100644 --- a/pandas/_libs/groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -48,7 +48,7 @@ def group_add_{{name}}(ndarray[{{c_type}}, ndim=2] out, nobs = np.zeros_like(out) sumx = np.zeros_like(out) - N, K = ( values).shape + N, K = (values).shape with nogil: @@ -95,7 +95,7 @@ def group_prod_{{name}}(ndarray[{{c_type}}, ndim=2] out, nobs = np.zeros_like(out) prodx = np.ones_like(out) - N, K = ( values).shape + N, K = (values).shape with nogil: for i in range(N): @@ -141,7 +141,7 @@ def group_var_{{name}}(ndarray[{{c_type}}, ndim=2] out, nobs = np.zeros_like(out) mean = np.zeros_like(out) - N, K = ( values).shape + N, K = (values).shape out[:, :] = 0.0 @@ -193,7 +193,7 @@ def group_mean_{{name}}(ndarray[{{c_type}}, ndim=2] out, nobs = np.zeros_like(out) sumx = np.zeros_like(out) - N, K = ( values).shape + N, K = (values).shape with nogil: for i in range(N): @@ -238,7 +238,7 @@ def group_ohlc_{{name}}(ndarray[{{c_type}}, ndim=2] out, if len(labels) == 0: return - N, K = ( values).shape + N, K = (values).shape if out.shape[1] != 4: raise ValueError('Output array must have 4 columns') @@ -312,14 +312,14 @@ def group_last_{{name}}(ndarray[{{c_type}}, ndim=2] out, if not len(values) == len(labels): raise AssertionError("len(index) != len(labels)") - nobs = np.zeros(( out).shape, dtype=np.int64) + nobs = np.zeros((out).shape, dtype=np.int64) {{if name == 'object'}} - resx = np.empty(( out).shape, dtype=object) + resx = np.empty((out).shape, dtype=object) {{else}} resx = np.empty_like(out) {{endif}} - N, K = ( values).shape + N, K = (values).shape {{if name == "object"}} if True: # make templating happy @@ -369,14 +369,14 @@ def group_nth_{{name}}(ndarray[{{c_type}}, ndim=2] out, if not len(values) == len(labels): raise AssertionError("len(index) != len(labels)") - nobs = np.zeros(( out).shape, dtype=np.int64) + nobs = np.zeros((out).shape, dtype=np.int64) {{if name=='object'}} - resx = np.empty(( out).shape, dtype=object) + resx = np.empty((out).shape, dtype=object) {{else}} resx = np.empty_like(out) {{endif}} - N, K = ( values).shape + N, K = (values).shape {{if name == "object"}} if True: # make templating happy @@ -462,7 +462,7 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out, tiebreak = tiebreakers[ties_method] keep_na = na_option == 'keep' - N, K = ( values).shape + N, K = (values).shape grp_sizes = np.ones_like(out) # Copy values into new array in order to fill missing data @@ -635,7 +635,7 @@ def group_max(ndarray[groupby_t, ndim=2] out, maxx.fill(-np.inf) nan_val = NAN - N, K = ( values).shape + N, K = (values).shape with nogil: for i in range(N): @@ -697,7 +697,7 @@ def group_min(ndarray[groupby_t, ndim=2] out, minx.fill(np.inf) nan_val = NAN - N, K = ( values).shape + N, K = (values).shape with nogil: for i in range(N): @@ -744,7 +744,7 @@ def group_cummin(ndarray[groupby_t, ndim=2] out, ndarray[groupby_t, ndim=2] accum int64_t lab - N, K = ( values).shape + N, K = (values).shape accum = np.empty_like(values) if groupby_t is int64_t: accum.fill(_int64_max) @@ -792,7 +792,7 @@ def group_cummax(ndarray[groupby_t, ndim=2] out, ndarray[groupby_t, ndim=2] accum int64_t lab - N, K = ( values).shape + N, K = (values).shape accum = np.empty_like(values) if groupby_t is int64_t: accum.fill(-_int64_max) diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 1fdd8e3b1987f..affb6a038074a 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -318,7 +318,7 @@ cdef class {{name}}HashTable(HashTable): for i in range(n): key = keys[i] k = kh_put_{{dtype}}(self.table, key, &ret) - self.table.vals[k] = values[i] + self.table.vals[k] = values[i] @cython.boundscheck(False) def map_locations(self, ndarray[{{dtype}}_t, ndim=1] values): diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index c92e0a4a7aa23..04c2f222b14ad 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -11,7 +11,7 @@ from numpy cimport (ndarray, cnp.import_array() -cdef double NaN = np.NaN +cdef double NaN = np.NaN cdef double nan = NaN from pandas._libs.algos import groupsort_indexer, ensure_platform_int diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index c57dd66a33fe0..ad8ae9c4bdb74 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -304,7 +304,7 @@ def fast_zip(list ndarrays): # initialize tuples on first pass arr = ndarrays[0] - it = PyArray_IterNew(arr) + it = PyArray_IterNew(arr) for i in range(n): val = PyArray_GETITEM(arr, PyArray_ITER_DATA(it)) tup = PyTuple_New(k) @@ -316,7 +316,7 @@ def fast_zip(list ndarrays): for j in range(1, k): arr = ndarrays[j] - it = PyArray_IterNew(arr) + it = PyArray_IterNew(arr) if len(arr) != n: raise ValueError('all arrays must be same length') @@ -1994,8 +1994,8 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, break elif util.is_integer_object(val): seen.int_ = 1 - floats[i] = val - complexes[i] = val + floats[i] = val + complexes[i] = val if not seen.null_: seen.saw_int(int(val)) diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index 2590a30c57f33..95ea103025b23 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -13,7 +13,7 @@ cimport util from tslibs.np_datetime cimport get_timedelta64_value, get_datetime64_value from tslibs.nattype import NaT -cdef double INF = np.inf +cdef double INF = np.inf cdef double NEGINF = -INF cdef int64_t NPY_NAT = util.get_nat() @@ -224,7 +224,7 @@ def isnaobj2d(ndarray arr): assert arr.ndim == 2, "'arr' must be 2-D." - n, m = ( arr).shape + n, m = (arr).shape result = np.zeros((n, m), dtype=np.uint8) for i in range(n): for j in range(m): @@ -268,7 +268,7 @@ def isnaobj2d_old(ndarray arr): assert arr.ndim == 2, "'arr' must be 2-D." - n, m = ( arr).shape + n, m = (arr).shape result = np.zeros((n, m), dtype=np.uint8) for i in range(n): for j in range(m): diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index e3df391c5c45d..027a4e36204dc 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -65,7 +65,7 @@ CParserError = ParserError cdef bint PY3 = (sys.version_info[0] >= 3) -cdef double INF = np.inf +cdef double INF = np.inf cdef double NEGINF = -INF @@ -1438,7 +1438,7 @@ cdef _string_box_factorize(parser_t *parser, int64_t col, # in the hash table if k != table.n_buckets: # this increments the refcount, but need to test - pyval = table.vals[k] + pyval = table.vals[k] else: # box it. new ref? pyval = PyBytes_FromString(word) @@ -1492,7 +1492,7 @@ cdef _string_box_utf8(parser_t *parser, int64_t col, # in the hash table if k != table.n_buckets: # this increments the refcount, but need to test - pyval = table.vals[k] + pyval = table.vals[k] else: # box it. new ref? pyval = PyUnicode_FromString(word) @@ -1549,7 +1549,7 @@ cdef _string_box_decode(parser_t *parser, int64_t col, # in the hash table if k != table.n_buckets: # this increments the refcount, but need to test - pyval = table.vals[k] + pyval = table.vals[k] else: # box it. new ref? size = strlen(word) @@ -2087,14 +2087,14 @@ cdef raise_parser_error(object base, parser_t *parser): Py_XDECREF(traceback) if value != NULL: - old_exc = value + old_exc = value Py_XDECREF(value) # PyErr_Fetch only returned the error message in *value, # so the Exception class must be extracted from *type. if isinstance(old_exc, compat.string_types): if type != NULL: - exc_type = type + exc_type = type else: exc_type = ParserError diff --git a/pandas/_libs/properties.pyx b/pandas/_libs/properties.pyx index 6e4c0c62b0dd8..d2fbf5aa66fbf 100644 --- a/pandas/_libs/properties.pyx +++ b/pandas/_libs/properties.pyx @@ -31,7 +31,7 @@ cdef class CachedProperty(object): if PyDict_Contains(cache, self.name): # not necessary to Py_INCREF - val = PyDict_GetItem(cache, self.name) + val = PyDict_GetItem(cache, self.name) else: val = self.func(obj) PyDict_SetItem(cache, self.name, val) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 119060bd28a1c..951c163522401 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -153,7 +153,7 @@ cdef class Reducer: result = _get_result_array(res, self.nresults, len(self.dummy)) - it = PyArray_IterNew(result) + it = PyArray_IterNew(result) PyArray_SETITEM(result, PyArray_ITER_DATA(it), res) chunk.data = chunk.data + self.increment @@ -574,7 +574,7 @@ cdef class BlockSlider: self.base_ptrs = malloc(sizeof(char*) * len(self.blocks)) for i, block in enumerate(self.blocks): - self.base_ptrs[i] = ( block).data + self.base_ptrs[i] = (block).data def __dealloc__(self): free(self.base_ptrs) diff --git a/pandas/_libs/skiplist.pyx b/pandas/_libs/skiplist.pyx index eec0457fc4caf..6698fcb767d7c 100644 --- a/pandas/_libs/skiplist.pyx +++ b/pandas/_libs/skiplist.pyx @@ -105,7 +105,7 @@ cdef class IndexableSkiplist: steps += steps_at_level[level] for level in range(d, self.maxlevels): - ( chain[level]).width[level] += 1 + (chain[level]).width[level] += 1 self.size += 1 @@ -126,11 +126,11 @@ cdef class IndexableSkiplist: chain[level] = node - if value != ( ( ( chain[0]).next)[0]).value: + if value != (((chain[0]).next)[0]).value: raise KeyError('Not Found') # remove one link at each level - d = len(( ( ( chain[0]).next)[0]).next) + d = len((((chain[0]).next)[0]).next) for level in range(d): prevnode = chain[level] diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index 705d93da10ba8..67698f1b4c2ca 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -19,8 +19,8 @@ _np_version_under1p11 = LooseVersion(_np_version) < LooseVersion('1.11') # ----------------------------------------------------------------------------- # Preamble stuff -cdef float64_t NaN = np.NaN -cdef float64_t INF = np.inf +cdef float64_t NaN = np.NaN +cdef float64_t INF = np.inf cdef inline int int_max(int a, int b): return a if a >= b else b cdef inline int int_min(int a, int b): return a if a <= b else b diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index d199997d2e9fe..f55966fd053af 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -95,7 +95,7 @@ def ensure_datetime64ns(arr: ndarray, copy: bool=True): NPY_DATETIMEUNIT unit npy_datetimestruct dts - shape = ( arr).shape + shape = (arr).shape ivalues = arr.view(np.int64).ravel() diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index fa965e2ca7c8c..f0a57c49a98fc 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -373,7 +373,7 @@ cdef inline parse_timedelta_string(object ts): elif current_unit == 'm': current_unit = 's' m = 1000000000L - r = int(''.join(number)) * m + r = int(''.join(number)) * m result += timedelta_as_neg(r, neg) have_hhmmss = 1 else: @@ -393,7 +393,7 @@ cdef inline parse_timedelta_string(object ts): if current_unit != 'm': raise ValueError("expected hh:mm:ss format before .") m = 1000000000L - r = int(''.join(number)) * m + r = int(''.join(number)) * m result += timedelta_as_neg(r, neg) have_value = 1 unit, number, frac = [], [], [] @@ -427,7 +427,7 @@ cdef inline parse_timedelta_string(object ts): else: m = 10**(9 -len(frac)) - r = int(''.join(frac)) * m + r = int(''.join(frac)) * m result += timedelta_as_neg(r, neg) # we have a regular format @@ -436,7 +436,7 @@ cdef inline parse_timedelta_string(object ts): if current_unit != 'm': raise ValueError("expected hh:mm:ss format") m = 1000000000L - r = int(''.join(number)) * m + r = int(''.join(number)) * m result += timedelta_as_neg(r, neg) # we have a last abbreviation diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index c4af4a6b35a37..8de2852942865 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -32,7 +32,7 @@ cdef float64_t MINfloat64 = np.NINF cdef float32_t MAXfloat32 = np.inf cdef float64_t MAXfloat64 = np.inf -cdef double NaN = np.NaN +cdef double NaN = np.NaN cdef inline int int_max(int a, int b): return a if a >= b else b cdef inline int int_min(int a, int b): return a if a <= b else b @@ -1498,7 +1498,7 @@ def roll_quantile(ndarray[float64_t, cast=True] values, int64_t win, output[i] = skiplist_get(skiplist, 0, &ret) else: idx_with_fraction = quantile * (nobs - 1) - idx = idx_with_fraction + idx = idx_with_fraction if idx_with_fraction == idx: # no need to interpolate @@ -1529,7 +1529,7 @@ def roll_quantile(ndarray[float64_t, cast=True] values, int64_t win, elif interpolation_type == MIDPOINT: vlow = skiplist_get(skiplist, idx, &ret) vhigh = skiplist_get(skiplist, idx + 1, &ret) - output[i] = (vlow + vhigh) / 2 + output[i] = (vlow + vhigh) / 2 else: output[i] = NaN From f1768c72c2c914dcf04d9e4606f8445f447b6a8a Mon Sep 17 00:00:00 2001 From: araraonline Date: Sun, 4 Nov 2018 07:56:13 -0200 Subject: [PATCH 016/122] DOC: Adding documentation for pandas.core.indexes.api internal functions (#22980) --- pandas/core/indexes/api.py | 118 ++++++++++++++++++++++++++++++++++++- 1 file changed, 115 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index 6e0c5e1bc2fe0..fb090c0fd83ba 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -44,9 +44,28 @@ def _get_objs_combined_axis(objs, intersect=False, axis=0, sort=True): - # Extract combined index: return intersection or union (depending on the - # value of "intersect") of indexes on given axis, or None if all objects - # lack indexes (e.g. they are numpy arrays) + """ + Extract combined index: return intersection or union (depending on the + value of "intersect") of indexes on given axis, or None if all objects + lack indexes (e.g. they are numpy arrays). + + Parameters + ---------- + objs : list of objects + Each object will only be considered if it has a _get_axis + attribute. + intersect : bool, default False + If True, calculate the intersection between indexes. Otherwise, + calculate the union. + axis : {0 or 'index', 1 or 'outer'}, default 0 + The axis to extract indexes from. + sort : bool, default True + Whether the result index should come out sorted or not. + + Returns + ------- + Index + """ obs_idxes = [obj._get_axis(axis) for obj in objs if hasattr(obj, '_get_axis')] if obs_idxes: @@ -68,6 +87,24 @@ def _get_distinct_objs(objs): def _get_combined_index(indexes, intersect=False, sort=False): + """ + Return the union or intersection of indexes. + + Parameters + ---------- + indexes : list of Index or list objects + When intersect=True, do not accept list of lists. + intersect : bool, default False + If True, calculate the intersection between indexes. Otherwise, + calculate the union. + sort : bool, default False + Whether the result index should come out sorted or not. + + Returns + ------- + Index + """ + # TODO: handle index names! indexes = _get_distinct_objs(indexes) if len(indexes) == 0: @@ -91,6 +128,21 @@ def _get_combined_index(indexes, intersect=False, sort=False): def _union_indexes(indexes, sort=True): + """ + Return the union of indexes. + + The behavior of sort and names is not consistent. + + Parameters + ---------- + indexes : list of Index or list objects + sort : bool, default True + Whether the result index should come out sorted or not. + + Returns + ------- + Index + """ if len(indexes) == 0: raise AssertionError('Must have at least 1 Index to union') if len(indexes) == 1: @@ -102,6 +154,19 @@ def _union_indexes(indexes, sort=True): indexes, kind = _sanitize_and_check(indexes) def _unique_indices(inds): + """ + Convert indexes to lists and concatenate them, removing duplicates. + + The final dtype is inferred. + + Parameters + ---------- + inds : list of Index or list objects + + Returns + ------- + Index + """ def conv(i): if isinstance(i, Index): i = i.tolist() @@ -140,6 +205,26 @@ def conv(i): def _sanitize_and_check(indexes): + """ + Verify the type of indexes and convert lists to Index. + + Cases: + + - [list, list, ...]: Return ([list, list, ...], 'list') + - [list, Index, ...]: Return _sanitize_and_check([Index, Index, ...]) + Lists are sorted and converted to Index. + - [Index, Index, ...]: Return ([Index, Index, ...], TYPE) + TYPE = 'special' if at least one special type, 'array' otherwise. + + Parameters + ---------- + indexes : list of Index or list objects + + Returns + ------- + sanitized_indexes : list of Index or list objects + type : {'list', 'array', 'special'} + """ kinds = list({type(index) for index in indexes}) if list in kinds: @@ -158,6 +243,21 @@ def _sanitize_and_check(indexes): def _get_consensus_names(indexes): + """ + Give a consensus 'names' to indexes. + + If there's exactly one non-empty 'names', return this, + otherwise, return empty. + + Parameters + ---------- + indexes : list of Index objects + + Returns + ------- + list + A list representing the consensus 'names' found. + """ # find the non-none names, need to tupleify to make # the set hashable, then reverse on return @@ -169,6 +269,18 @@ def _get_consensus_names(indexes): def _all_indexes_same(indexes): + """ + Determine if all indexes contain the same elements. + + Parameters + ---------- + indexes : list of Index objects + + Returns + ------- + bool + True if all indexes contain the same elements, False otherwise. + """ first = indexes[0] for index in indexes[1:]: if not first.equals(index): From 547c24031557e9d065c79f7baac1adc3b87dac4d Mon Sep 17 00:00:00 2001 From: Bart Aelterman Date: Sun, 4 Nov 2018 14:56:58 +0100 Subject: [PATCH 017/122] DOC: Clarify documentation of 'ambiguous' parameter (#23408) * Add documentation line with example for the ambiguous parameter of tz_locaclize * Updating 'ambiguous'-param doc + update it on Timestamp, DatetimeIndex and NaT This is following the discussion at https://github.com/pandas-dev/pandas/pull/23408#discussion_r229109984 --- pandas/_libs/tslibs/conversion.pyx | 15 ++++++++- pandas/_libs/tslibs/nattype.pyx | 7 ++++ pandas/_libs/tslibs/timestamps.pyx | 7 ++++ pandas/core/arrays/datetimes.py | 40 ++++++++++++++++++++++ pandas/core/generic.py | 53 ++++++++++++++++++++++++++++++ pandas/core/indexes/datetimes.py | 7 ++++ 6 files changed, 128 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index f55966fd053af..8cf42bf93eb2c 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -844,7 +844,20 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, vals : ndarray[int64_t] tz : tzinfo or None ambiguous : str, bool, or arraylike - If arraylike, must have the same length as vals + When clocks moved backward due to DST, ambiguous times may arise. + For example in Central European Time (UTC+01), when going from 03:00 + DST to 02:00 non-DST, 02:30:00 local time occurs both at 00:30:00 UTC + and at 01:30:00 UTC. In such a situation, the `ambiguous` parameter + dictates how ambiguous times should be handled. + + - 'infer' will attempt to infer fall dst-transition hours based on + order + - bool-ndarray where True signifies a DST time, False signifies a + non-DST time (note that this flag is only applicable for ambiguous + times, but the array must have the same length as vals) + - bool if True, treat all vals as DST. If False, treat them as non-DST + - 'NaT' will return NaT where there are ambiguous times + nonexistent : str If arraylike, must have the same length as vals diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 3eaf624e10757..a010cbf76cf5d 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -593,6 +593,13 @@ class NaTType(_NaT): None will remove timezone holding local time. ambiguous : bool, 'NaT', default 'raise' + When clocks moved backward due to DST, ambiguous times may arise. + For example in Central European Time (UTC+01), when going from + 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at + 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the + `ambiguous` parameter dictates how ambiguous times should be + handled. + - bool contains flags to determine if time is dst or not (note that this flag is only applicable for ambiguous fall dst dates) - 'NaT' will return NaT for an ambiguous time diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 65da765bae739..094b48920fc46 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1026,6 +1026,13 @@ class Timestamp(_Timestamp): None will remove timezone holding local time. ambiguous : bool, 'NaT', default 'raise' + When clocks moved backward due to DST, ambiguous times may arise. + For example in Central European Time (UTC+01), when going from + 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at + 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the + `ambiguous` parameter dictates how ambiguous times should be + handled. + - bool contains flags to determine if time is dst or not (note that this flag is only applicable for ambiguous fall dst dates) - 'NaT' will return NaT for an ambiguous time diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 270e4757df30f..0258e1e6e5973 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -614,6 +614,12 @@ def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', Time zone to convert timestamps to. Passing ``None`` will remove the time zone information preserving local time. ambiguous : 'infer', 'NaT', bool array, default 'raise' + When clocks moved backward due to DST, ambiguous times may arise. + For example in Central European Time (UTC+01), when going from + 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at + 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the + `ambiguous` parameter dictates how ambiguous times should be + handled. - 'infer' will attempt to infer fall dst-transition hours based on order @@ -685,6 +691,40 @@ def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00', '2018-03-03 09:00:00'], dtype='datetime64[ns]', freq='D') + + Be careful with DST changes. When there is sequential data, pandas can + infer the DST time: + >>> s = pd.to_datetime(pd.Series([ + ... '2018-10-28 01:30:00', + ... '2018-10-28 02:00:00', + ... '2018-10-28 02:30:00', + ... '2018-10-28 02:00:00', + ... '2018-10-28 02:30:00', + ... '2018-10-28 03:00:00', + ... '2018-10-28 03:30:00'])) + >>> s.dt.tz_localize('CET', ambiguous='infer') + 2018-10-28 01:30:00+02:00 0 + 2018-10-28 02:00:00+02:00 1 + 2018-10-28 02:30:00+02:00 2 + 2018-10-28 02:00:00+01:00 3 + 2018-10-28 02:30:00+01:00 4 + 2018-10-28 03:00:00+01:00 5 + 2018-10-28 03:30:00+01:00 6 + dtype: int64 + + In some cases, inferring the DST is impossible. In such cases, you can + pass an ndarray to the ambiguous parameter to set the DST explicitly + + >>> s = pd.to_datetime(pd.Series([ + ... '2018-10-28 01:20:00', + ... '2018-10-28 02:36:00', + ... '2018-10-28 03:46:00'])) + >>> s.dt.tz_localize('CET', ambiguous=np.array([True, True, False])) + 0 2018-10-28 01:20:00+02:00 + 1 2018-10-28 02:36:00+02:00 + 2 2018-10-28 03:46:00+01:00 + dtype: datetime64[ns, CET] + """ if errors is not None: warnings.warn("The errors argument is deprecated and will be " diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d1e9f103477cc..c0ff080826ac1 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8718,6 +8718,13 @@ def tz_localize(self, tz, axis=0, level=None, copy=True, copy : boolean, default True Also make a copy of the underlying data ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise' + When clocks moved backward due to DST, ambiguous times may arise. + For example in Central European Time (UTC+01), when going from + 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at + 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the + `ambiguous` parameter dictates how ambiguous times should be + handled. + - 'infer' will attempt to infer fall dst-transition hours based on order - bool-ndarray where True signifies a DST time, False designates @@ -8745,6 +8752,52 @@ def tz_localize(self, tz, axis=0, level=None, copy=True, ------ TypeError If the TimeSeries is tz-aware and tz is not None. + + Examples + -------- + + Localize local times: + + >>> s = pd.Series([1], + ... index=pd.DatetimeIndex(['2018-09-15 01:30:00'])) + >>> s.tz_localize('CET') + 2018-09-15 01:30:00+02:00 1 + dtype: int64 + + Be careful with DST changes. When there is sequential data, pandas + can infer the DST time: + + >>> s = pd.Series(range(7), index=pd.DatetimeIndex([ + ... '2018-10-28 01:30:00', + ... '2018-10-28 02:00:00', + ... '2018-10-28 02:30:00', + ... '2018-10-28 02:00:00', + ... '2018-10-28 02:30:00', + ... '2018-10-28 03:00:00', + ... '2018-10-28 03:30:00'])) + >>> s.tz_localize('CET', ambiguous='infer') + 2018-10-28 01:30:00+02:00 0 + 2018-10-28 02:00:00+02:00 1 + 2018-10-28 02:30:00+02:00 2 + 2018-10-28 02:00:00+01:00 3 + 2018-10-28 02:30:00+01:00 4 + 2018-10-28 03:00:00+01:00 5 + 2018-10-28 03:30:00+01:00 6 + dtype: int64 + + In some cases, inferring the DST is impossible. In such cases, you can + pass an ndarray to the ambiguous parameter to set the DST explicitly + + >>> s = pd.Series(range(3), index=pd.DatetimeIndex([ + ... '2018-10-28 01:20:00', + ... '2018-10-28 02:36:00', + ... '2018-10-28 03:46:00'])) + >>> s.tz_localize('CET', ambiguous=np.array([True, True, False])) + 2018-10-28 01:20:00+02:00 0 + 2018-10-28 02:36:00+02:00 1 + 2018-10-28 03:46:00+01:00 2 + dtype: int64 + """ if nonexistent not in ('raise', 'NaT', 'shift'): raise ValueError("The nonexistent argument must be one of 'raise'," diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index f87059ba1f017..8178e042debda 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -99,6 +99,12 @@ class DatetimeIndex(DatetimeArrayMixin, DatelikeOps, TimelikeOps, the 'left', 'right', or both sides (None) tz : pytz.timezone or dateutil.tz.tzfile ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise' + When clocks moved backward due to DST, ambiguous times may arise. + For example in Central European Time (UTC+01), when going from 03:00 + DST to 02:00 non-DST, 02:30:00 local time occurs both at 00:30:00 UTC + and at 01:30:00 UTC. In such a situation, the `ambiguous` parameter + dictates how ambiguous times should be handled. + - 'infer' will attempt to infer fall dst-transition hours based on order - bool-ndarray where True signifies a DST time, False signifies a @@ -173,6 +179,7 @@ class DatetimeIndex(DatetimeArrayMixin, DatelikeOps, TimelikeOps, TimedeltaIndex : Index of timedelta64 data PeriodIndex : Index of Period data pandas.to_datetime : Convert argument to datetime + """ _resolution = cache_readonly(DatetimeArrayMixin._resolution.fget) _shallow_copy = Index._shallow_copy From cb51a02fbae9d226b3cb35e54601ade33bddd2c5 Mon Sep 17 00:00:00 2001 From: Thein Oo Date: Sun, 4 Nov 2018 09:04:23 -0500 Subject: [PATCH 018/122] DOC: Validate in docstrings that numpy and pandas are not imported (#23161) --- scripts/tests/test_validate_docstrings.py | 17 +++++++++++++++++ scripts/validate_docstrings.py | 12 ++++++++++++ 2 files changed, 29 insertions(+) diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py index 0e10265a7291d..aa8a1500d9d3d 100644 --- a/scripts/tests/test_validate_docstrings.py +++ b/scripts/tests/test_validate_docstrings.py @@ -218,6 +218,18 @@ def mode(self, axis, numeric_only): """ pass + def good_imports(self): + """ + Ensure import other than numpy and pandas are fine. + + Examples + -------- + This example does not import pandas or import numpy. + >>> import time + >>> import datetime + """ + pass + class BadGenericDocStrings(object): """Everything here has a bad docstring @@ -700,6 +712,11 @@ def test_bad_generic_functions(self, func): marks=pytest.mark.xfail), pytest.param('BadReturns', 'no_punctuation', ('foo',), marks=pytest.mark.xfail), + # Examples tests + ('BadGenericDocStrings', 'method', + ('numpy does not need to be imported in the examples,')), + ('BadGenericDocStrings', 'method', + ('pandas does not need to be imported in the examples,')), # See Also tests ('BadSeeAlso', 'prefix_pandas', ('pandas.Series.rename in `See Also` section ' diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index 4b1834adcaf33..4c54762f6df31 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -402,6 +402,11 @@ def examples_errors(self): error_msgs += f.getvalue() return error_msgs + @property + def examples_source_code(self): + lines = doctest.DocTestParser().get_examples(self.raw_doc) + return [line.source for line in lines] + def validate_one(func_name): """ @@ -531,6 +536,13 @@ def validate_one(func_name): examples_errs = doc.examples_errors if examples_errs: errs.append('Examples do not pass tests') + examples_source_code = ''.join(doc.examples_source_code) + if 'import numpy' in examples_source_code: + errs.append("numpy does not need to be imported in the examples, " + "as it's assumed to be already imported as np") + if 'import pandas' in examples_source_code: + errs.append("pandas does not need to be imported in the examples, " + "as it's assumed to be already imported as pd") return {'type': doc.type, 'docstring': doc.clean_doc, From ae938fd8eed7589245d7daf1e72b640335447ffe Mon Sep 17 00:00:00 2001 From: Steve Cook Date: Mon, 5 Nov 2018 00:59:20 +1000 Subject: [PATCH 019/122] DOC: Updated docstrings related to DateTimeIndex. GH22459 (#22504) --- pandas/core/indexes/datetimes.py | 36 +++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 8178e042debda..210bdabbd9dd7 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1420,7 +1420,8 @@ def date_range(start=None, end=None, periods=None, freq=None, tz=None, >>> pd.date_range(start='2018-04-24', end='2018-04-27', periods=3) DatetimeIndex(['2018-04-24 00:00:00', '2018-04-25 12:00:00', - '2018-04-27 00:00:00'], freq=None) + '2018-04-27 00:00:00'], + dtype='datetime64[ns]', freq=None) **Other Parameters** @@ -1491,37 +1492,39 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None, Parameters ---------- start : string or datetime-like, default None - Left bound for generating dates + Left bound for generating dates. end : string or datetime-like, default None - Right bound for generating dates + Right bound for generating dates. periods : integer, default None - Number of periods to generate + Number of periods to generate. freq : string or DateOffset, default 'B' (business daily) - Frequency strings can have multiples, e.g. '5H' + Frequency strings can have multiples, e.g. '5H'. tz : string or None Time zone name for returning localized DatetimeIndex, for example - Asia/Beijing + Asia/Beijing. normalize : bool, default False - Normalize start/end dates to midnight before generating date range + Normalize start/end dates to midnight before generating date range. name : string, default None - Name of the resulting DatetimeIndex + Name of the resulting DatetimeIndex. weekmask : string or None, default None Weekmask of valid business days, passed to ``numpy.busdaycalendar``, only used when custom frequency strings are passed. The default - value None is equivalent to 'Mon Tue Wed Thu Fri' + value None is equivalent to 'Mon Tue Wed Thu Fri'. .. versionadded:: 0.21.0 holidays : list-like or None, default None Dates to exclude from the set of valid business days, passed to ``numpy.busdaycalendar``, only used when custom frequency strings - are passed + are passed. .. versionadded:: 0.21.0 closed : string, default None Make the interval closed with respect to the given frequency to - the 'left', 'right', or both sides (None) + the 'left', 'right', or both sides (None). + **kwargs + For compatibility. Has no effect on the result. Notes ----- @@ -1535,7 +1538,16 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None, Returns ------- - rng : DatetimeIndex + DatetimeIndex + + Examples + -------- + Note how the two weekend days are skipped in the result. + + >>> pd.bdate_range(start='1/1/2018', end='1/08/2018') + DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04', + '2018-01-05', '2018-01-08'], + dtype='datetime64[ns]', freq='B') """ if freq is None: msg = 'freq must be specified for bdate_range; use date_range instead' From 8c29ede7739fc9f93b57df86ca9ead6fe8b815be Mon Sep 17 00:00:00 2001 From: Alex Radu Date: Sun, 4 Nov 2018 15:01:15 +0000 Subject: [PATCH 020/122] DOC: Rephrased doc for Series.asof. Added examples (#21034) --- pandas/core/generic.py | 88 +++++++++++++++++++++++++++++++++++------- 1 file changed, 73 insertions(+), 15 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c0ff080826ac1..6e5938e834774 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6497,40 +6497,98 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False, def asof(self, where, subset=None): """ - The last row without any NaN is taken (or the last row without - NaN considering only the subset of columns in the case of a DataFrame) + Return the last row(s) without any `NaN`s before `where`. + + The last row (for each element in `where`, if list) without any + `NaN` is taken. + In case of a :class:`~pandas.DataFrame`, the last row without `NaN` + considering only the subset of columns (if not `None`) .. versionadded:: 0.19.0 For DataFrame - If there is no good value, NaN is returned for a Series + If there is no good value, `NaN` is returned for a Series or a Series of NaN values for a DataFrame Parameters ---------- - where : date or array of dates - subset : string or list of strings, default None - if not None use these columns for NaN propagation + where : date or array-like of dates + Date(s) before which the last row(s) are returned. + subset : str or array-like of str, default `None` + For DataFrame, if not `None`, only use these columns to + check for `NaN`s. Notes ----- - Dates are assumed to be sorted - Raises if this is not the case + Dates are assumed to be sorted. Raises if this is not the case. Returns ------- - where is scalar - - - value or NaN if input is Series - - Series if input is DataFrame + scalar, Series, or DataFrame - where is Index: same shape object as input + * scalar : when `self` is a Series and `where` is a scalar + * Series: when `self` is a Series and `where` is an array-like, + or when `self` is a DataFrame and `where` is a scalar + * DataFrame : when `self` is a DataFrame and `where` is an + array-like See Also -------- - merge_asof + merge_asof : Perform an asof merge. Similar to left join. - """ + Examples + -------- + A Series and a scalar `where`. + + >>> s = pd.Series([1, 2, np.nan, 4], index=[10, 20, 30, 40]) + >>> s + 10 1.0 + 20 2.0 + 30 NaN + 40 4.0 + dtype: float64 + + >>> s.asof(20) + 2.0 + For a sequence `where`, a Series is returned. The first value is + ``NaN``, because the first element of `where` is before the first + index value. + + >>> s.asof([5, 20]) + 5 NaN + 20 2.0 + dtype: float64 + + Missing values are not considered. The following is ``2.0``, not + ``NaN``, even though ``NaN`` is at the index location for ``30``. + + >>> s.asof(30) + 2.0 + + Take all columns into consideration + + >>> df = pd.DataFrame({'a': [10, 20, 30, 40, 50], + ... 'b': [None, None, None, None, 500]}, + ... index=pd.DatetimeIndex(['2018-02-27 09:01:00', + ... '2018-02-27 09:02:00', + ... '2018-02-27 09:03:00', + ... '2018-02-27 09:04:00', + ... '2018-02-27 09:05:00'])) + >>> df.asof(pd.DatetimeIndex(['2018-02-27 09:03:30', + ... '2018-02-27 09:04:30'])) + a b + 2018-02-27 09:03:30 NaN NaN + 2018-02-27 09:04:30 NaN NaN + + Take a single column into consideration + + >>> df.asof(pd.DatetimeIndex(['2018-02-27 09:03:30', + ... '2018-02-27 09:04:30']), + ... subset=['a']) + a b + 2018-02-27 09:03:30 30.0 NaN + 2018-02-27 09:04:30 40.0 NaN + """ if isinstance(where, compat.string_types): from pandas import to_datetime where = to_datetime(where) From b0c41565d7c29bab53cf76d644e84c51c12a7e4f Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 4 Nov 2018 07:16:55 -0800 Subject: [PATCH 021/122] TST: drop tz-aware timestamp from DatetimIndex with DST transition (#23479) --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/tests/test_multilevel.py | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 05380ef2deb5f..0d8cc601d4d2c 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1150,6 +1150,7 @@ Timezones - Bug in :meth:`DatetimeIndex.unique` that did not re-localize tz-aware dates correctly (:issue:`21737`) - Bug when indexing a :class:`Series` with a DST transition (:issue:`21846`) - Bug in :meth:`DataFrame.resample` and :meth:`Series.resample` where an ``AmbiguousTimeError`` or ``NonExistentTimeError`` would raise if a timezone aware timeseries ended on a DST transition (:issue:`19375`, :issue:`10117`) +- Bug in :meth:`DataFrame.drop` and :meth:`Series.drop` when specifying a tz-aware Timestamp key to drop from a :class:`DatetimeIndex` with a DST transition (:issue:`21761`) Offsets ^^^^^^^ diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 2022340926cca..9829c04ea108f 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1931,6 +1931,19 @@ def test_drop_level_nonunique_datetime(self): expected = df.loc[idx != 4] tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize('box', [Series, DataFrame]) + def test_drop_tz_aware_timestamp_across_dst(self, box): + # GH 21761 + start = Timestamp('2017-10-29', tz='Europe/Berlin') + end = Timestamp('2017-10-29 04:00:00', tz='Europe/Berlin') + index = pd.date_range(start, end, freq='15min') + data = box(data=[1] * len(index), index=index) + result = data.drop(start) + expected_start = Timestamp('2017-10-29 00:15:00', tz='Europe/Berlin') + expected_idx = pd.date_range(expected_start, end, freq='15min') + expected = box(data=[1] * len(expected_idx), index=expected_idx) + tm.assert_equal(result, expected) + def test_drop_preserve_names(self): index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]], From 23fae32909126acfee5d1a78e537a0da8e91ee1f Mon Sep 17 00:00:00 2001 From: Yitzhak Andrade Date: Sun, 4 Nov 2018 13:24:10 -0200 Subject: [PATCH 022/122] DOC: update the pandas.Series.shift docstring (#20472) --- pandas/core/generic.py | 57 +++++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 15 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6e5938e834774..71e4641d20c1b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8421,32 +8421,59 @@ def mask(self, cond, other=np.nan, inplace=False, axis=None, level=None, errors=errors) _shared_docs['shift'] = (""" - Shift index by desired number of periods with an optional time freq + Shift index by desired number of periods with an optional time `freq`. + + When `freq` is not passed, shift the index without realigning the data. + If `freq` is passed (in this case, the index must be date or datetime, + or it will raise a `NotImplementedError`), the index will be + increased using the periods and the `freq`. Parameters ---------- periods : int - Number of periods to move, can be positive or negative. - freq : DateOffset, timedelta, or time rule string, optional - Increment to use from the tseries module or time rule (e.g. 'EOM'). - See Notes. - axis : %(axes_single_arg)s + Number of periods to shift. Can be positive or negative. + freq : DateOffset, tseries.offsets, timedelta, or str, optional + Offset to use from the tseries module or time rule (e.g. 'EOM'). + If `freq` is specified then the index values are shifted but the + data is not realigned. That is, use `freq` if you would like to + extend the index when shifting and preserve the original data. + axis : {0 or 'index', 1 or 'columns', None}, default None + Shift direction. + + Returns + ------- + %(klass)s + Copy of input object, shifted. See Also -------- Index.shift : Shift values of Index. DatetimeIndex.shift : Shift values of DatetimeIndex. PeriodIndex.shift : Shift values of PeriodIndex. + tshift : Shift the time index, using the index's frequency if + available. - Notes - ----- - If freq is specified then the index values are shifted but the data - is not realigned. That is, use freq if you would like to extend the - index when shifting and preserve the original data. - - Returns - ------- - shifted : %(klass)s + Examples + -------- + >>> df = pd.DataFrame({'Col1': [10, 20, 15, 30, 45], + ... 'Col2': [13, 23, 18, 33, 48], + ... 'Col3': [17, 27, 22, 37, 52]}) + + >>> df.shift(periods=3) + Col1 Col2 Col3 + 0 NaN NaN NaN + 1 NaN NaN NaN + 2 NaN NaN NaN + 3 10.0 13.0 17.0 + 4 20.0 23.0 27.0 + + >>> df.shift(periods=1, axis='columns') + Col1 Col2 Col3 + 0 NaN 10.0 13.0 + 1 NaN 20.0 23.0 + 2 NaN 15.0 18.0 + 3 NaN 30.0 33.0 + 4 NaN 45.0 48.0 """) @Appender(_shared_docs['shift'] % _shared_doc_kwargs) From 1f4bad5c2676ea568bf347b59f7af3b1e798aeff Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Sun, 4 Nov 2018 15:30:43 +0000 Subject: [PATCH 023/122] Isort contributing guide (#23364) --- doc/source/contributing.rst | 52 +++++++++++++++++++++++++++++++++++++ setup.cfg | 1 + 2 files changed, 53 insertions(+) diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 66d545a0de6e9..3ec505998fde0 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -612,6 +612,54 @@ Alternatively, you can install the ``grep`` and ``xargs`` commands via the `MinGW `__ toolchain, and it will allow you to run the commands above. +.. _contributing.import-formatting: + +Import Formatting +~~~~~~~~~~~~~~~~~ +*pandas* uses `isort `__ to standardise import +formatting across the codebase. + +A guide to import layout as per pep8 can be found `here `__. + +A summary of our current import sections ( in order ): + +* Future +* Python Standard Library +* Third Party +* ``pandas._libs``, ``pandas.compat``, ``pandas.util._*``, ``pandas.errors`` (largely not dependent on ``pandas.core``) +* ``pandas.core.dtypes`` (largely not dependent on the rest of ``pandas.core``) +* Rest of ``pandas.core.*`` +* Non-core ``pandas.io``, ``pandas.plotting``, ``pandas.tseries`` +* Local application/library specific imports + +Imports are alphabetically sorted within these sections. + + +As part of :ref:`Continuous Integration ` checks we run:: + + isort --recursive --check-only pandas + +to check that imports are correctly formatted as per the `setup.cfg`. + +If you see output like the below in :ref:`Continuous Integration ` checks: + +.. code-block:: shell + + Check import format using isort + ERROR: /home/travis/build/pandas-dev/pandas/pandas/io/pytables.py Imports are incorrectly sorted + Check import format using isort DONE + The command "ci/code_checks.sh" exited with 1 + +You should run:: + + isort pandas/io/pytables.py + +to automatically format imports correctly. This will modify your local copy of the files. + +The `--recursive` flag can be passed to sort all files in a directory. + +You can then verify the changes look ok, then git :ref:`commit ` and :ref:`push `. + Backwards Compatibility ~~~~~~~~~~~~~~~~~~~~~~~ @@ -1078,6 +1126,8 @@ or a new keyword argument (`example Date: Sun, 4 Nov 2018 16:53:27 +0100 Subject: [PATCH 024/122] API: fix corner case of lib.infer_dtype (#23422) --- pandas/_libs/lib.pyx | 5 ++++- pandas/_libs/missing.pxd | 3 +++ pandas/_libs/missing.pyx | 2 +- pandas/tests/dtypes/test_inference.py | 16 ++++++++++++++++ 4 files changed, 24 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index ad8ae9c4bdb74..5907f76c20853 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -57,7 +57,7 @@ from tslibs.conversion cimport convert_to_tsobject from tslibs.timedeltas cimport convert_to_timedelta64 from tslibs.timezones cimport get_timezone, tz_compare -from missing cimport (checknull, +from missing cimport (checknull, isnaobj, is_null_datetime64, is_null_timedelta64, is_null_period) @@ -1181,6 +1181,9 @@ def infer_dtype(value: object, skipna: bool=False) -> str: values = construct_1d_object_array_from_listlike(value) values = getattr(values, 'values', values) + if skipna: + values = values[~isnaobj(values)] + val = _try_infer_map(values) if val is not None: return val diff --git a/pandas/_libs/missing.pxd b/pandas/_libs/missing.pxd index 9f660cc6785c8..d0dd306680ae8 100644 --- a/pandas/_libs/missing.pxd +++ b/pandas/_libs/missing.pxd @@ -1,7 +1,10 @@ # -*- coding: utf-8 -*- +from numpy cimport ndarray, uint8_t + cpdef bint checknull(object val) cpdef bint checknull_old(object val) +cpdef ndarray[uint8_t] isnaobj(ndarray arr) cdef bint is_null_datetime64(v) cdef bint is_null_timedelta64(v) diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index 95ea103025b23..d6786a96871bd 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -124,7 +124,7 @@ cdef inline bint _check_none_nan_inf_neginf(object val): @cython.wraparound(False) @cython.boundscheck(False) -def isnaobj(ndarray arr): +cpdef ndarray[uint8_t] isnaobj(ndarray arr): """ Return boolean mask denoting which elements of a 1-D array are na-like, according to the criteria defined in `_check_all_nulls`: diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index d0dd03d6eb8df..c5911da1666d2 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -591,6 +591,22 @@ def test_unicode(self): expected = 'unicode' if PY2 else 'string' assert result == expected + @pytest.mark.parametrize('dtype, missing, skipna, expected', [ + (float, np.nan, False, 'floating'), + (float, np.nan, True, 'floating'), + (object, np.nan, False, 'floating'), + (object, np.nan, True, 'empty'), + (object, None, False, 'mixed'), + (object, None, True, 'empty') + ]) + @pytest.mark.parametrize('box', [pd.Series, np.array]) + def test_object_empty(self, box, missing, dtype, skipna, expected): + # GH 23421 + arr = box([missing, missing], dtype=dtype) + + result = lib.infer_dtype(arr, skipna=skipna) + assert result == expected + def test_datetime(self): dates = [datetime(2012, 1, x) for x in range(1, 20)] From 39a2e0a7295500789308b26c7fcb45fba5bac78b Mon Sep 17 00:00:00 2001 From: Fabian Haase Date: Sun, 4 Nov 2018 19:42:46 +0100 Subject: [PATCH 025/122] DOC: Use flake8 to check for PEP8 violations in doctests (#23399) --- scripts/tests/test_validate_docstrings.py | 60 ++++++++++++++++++++--- scripts/validate_docstrings.py | 33 ++++++++++++- 2 files changed, 86 insertions(+), 7 deletions(-) diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py index aa8a1500d9d3d..a3feee6552178 100644 --- a/scripts/tests/test_validate_docstrings.py +++ b/scripts/tests/test_validate_docstrings.py @@ -225,8 +225,9 @@ def good_imports(self): Examples -------- This example does not import pandas or import numpy. - >>> import time >>> import datetime + >>> datetime.MAXYEAR + 9999 """ pass @@ -596,6 +597,44 @@ def prefix_pandas(self): pass +class BadExamples(object): + + def unused_import(self): + """ + Examples + -------- + >>> import pandas as pdf + >>> df = pd.DataFrame(np.ones((3, 3)), columns=('a', 'b', 'c')) + """ + pass + + def missing_whitespace_around_arithmetic_operator(self): + """ + Examples + -------- + >>> 2+5 + 7 + """ + pass + + def indentation_is_not_a_multiple_of_four(self): + """ + Examples + -------- + >>> if 2 + 5: + ... pass + """ + pass + + def missing_whitespace_after_comma(self): + """ + Examples + -------- + >>> df = pd.DataFrame(np.ones((3,3)),columns=('a','b', 'c')) + """ + pass + + class TestValidator(object): def _import_path(self, klass=None, func=None): @@ -634,7 +673,7 @@ def test_good_class(self): @capture_stderr @pytest.mark.parametrize("func", [ 'plot', 'sample', 'random_letters', 'sample_values', 'head', 'head1', - 'contains', 'mode']) + 'contains', 'mode', 'good_imports']) def test_good_functions(self, func): errors = validate_one(self._import_path( klass='GoodDocStrings', func=func))['errors'] @@ -714,16 +753,25 @@ def test_bad_generic_functions(self, func): marks=pytest.mark.xfail), # Examples tests ('BadGenericDocStrings', 'method', - ('numpy does not need to be imported in the examples,')), + ('numpy does not need to be imported in the examples',)), ('BadGenericDocStrings', 'method', - ('pandas does not need to be imported in the examples,')), + ('pandas does not need to be imported in the examples',)), # See Also tests ('BadSeeAlso', 'prefix_pandas', ('pandas.Series.rename in `See Also` section ' - 'does not need `pandas` prefix',)) + 'does not need `pandas` prefix',)), + # Examples tests + ('BadExamples', 'unused_import', + ('1 F401 \'pandas as pdf\' imported but unused',)), + ('BadExamples', 'indentation_is_not_a_multiple_of_four', + ('1 E111 indentation is not a multiple of four',)), + ('BadExamples', 'missing_whitespace_around_arithmetic_operator', + ('1 E226 missing whitespace around arithmetic operator',)), + ('BadExamples', 'missing_whitespace_after_comma', + ('3 E231 missing whitespace after \',\'',)), ]) def test_bad_examples(self, capsys, klass, func, msgs): - result = validate_one(self._import_path(klass=klass, func=func)) # noqa:F821 + result = validate_one(self._import_path(klass=klass, func=func)) for msg in msgs: assert msg in ' '.join(result['errors']) diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index 4c54762f6df31..ef6465c3e988d 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -24,6 +24,10 @@ import inspect import importlib import doctest +import tempfile + +import flake8.main.application + try: from io import StringIO except ImportError: @@ -168,7 +172,7 @@ def _load_obj(name): @staticmethod def _to_original_callable(obj): """ - Find the Python object that contains the source code ot the object. + Find the Python object that contains the source code of the object. This is useful to find the place in the source code (file and line number) where a docstring is defined. It does not currently work for @@ -407,6 +411,26 @@ def examples_source_code(self): lines = doctest.DocTestParser().get_examples(self.raw_doc) return [line.source for line in lines] + def validate_pep8(self): + if not self.examples: + return + + content = ''.join(('import numpy as np # noqa: F401\n', + 'import pandas as pd # noqa: F401\n', + *self.examples_source_code)) + + application = flake8.main.application.Application() + application.initialize(["--quiet"]) + + with tempfile.NamedTemporaryFile(mode='w') as file: + file.write(content) + file.flush() + application.run_checks([file.name]) + + application.report() + + yield from application.guide.stats.statistics_for('') + def validate_one(func_name): """ @@ -495,6 +519,13 @@ def validate_one(func_name): for param_err in param_errs: errs.append('\t{}'.format(param_err)) + pep8_errs = list(doc.validate_pep8()) + if pep8_errs: + errs.append('Linting issues in doctests:') + for err in pep8_errs: + errs.append('\t{} {} {}'.format(err.count, err.error_code, + err.message)) + if doc.is_function_or_method: if not doc.returns and "return" in doc.method_source: errs.append('No Returns section found') From 1b00ec51db87d0b2caade6e291a0a5abd20476b0 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 4 Nov 2018 12:12:23 -0800 Subject: [PATCH 026/122] TST: Empty Series.reindex with tz-aware dtype (#23480) --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/tests/series/indexing/test_alter_index.py | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 0d8cc601d4d2c..0fc38b87de7d8 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1198,6 +1198,7 @@ Indexing - The traceback from a ``KeyError`` when asking ``.loc`` for a single missing label is now shorter and more clear (:issue:`21557`) - When ``.ix`` is asked for a missing integer label in a :class:`MultiIndex` with a first level of integer type, it now raises a ``KeyError``, consistently with the case of a flat :class:`Int64Index`, rather than falling back to positional indexing (:issue:`21593`) - Bug in :meth:`DatetimeIndex.reindex` when reindexing a tz-naive and tz-aware :class:`DatetimeIndex` (:issue:`8306`) +- Bug in :meth:`Series.reindex` when reindexing an empty series with a ``datetime64[ns, tz]`` dtype (:issue:`20869`) - Bug in :class:`DataFrame` when setting values with ``.loc`` and a timezone aware :class:`DatetimeIndex` (:issue:`11365`) - ``DataFrame.__getitem__`` now accepts dictionaries and dictionary keys as list-likes of labels, consistently with ``Series.__getitem__`` (:issue:`21294`) - Fixed ``DataFrame[np.nan]`` when columns are non-unique (:issue:`21428`) diff --git a/pandas/tests/series/indexing/test_alter_index.py b/pandas/tests/series/indexing/test_alter_index.py index 25c930e8cade6..57a087221f411 100644 --- a/pandas/tests/series/indexing/test_alter_index.py +++ b/pandas/tests/series/indexing/test_alter_index.py @@ -459,6 +459,13 @@ def test_reindex_datetimeindexes_tz_naive_and_aware(): s.reindex(newidx, method='ffill') +def test_reindex_empty_series_tz_dtype(): + # GH 20869 + result = Series(dtype='datetime64[ns, UTC]').reindex([0, 1]) + expected = Series([pd.NaT] * 2, dtype='datetime64[ns, UTC]') + tm.assert_equal(result, expected) + + def test_rename(): # GH 17407 s = Series(range(1, 6), index=pd.Index(range(2, 7), name='IntIndex')) From 0d0b5617e4f506c9549a0251aa9433003b26e3f9 Mon Sep 17 00:00:00 2001 From: vkk800 Date: Sun, 4 Nov 2018 22:32:19 +0200 Subject: [PATCH 027/122] DOC: Fix creation of [source] links in the doc creation (#23129) --- doc/source/conf.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/source/conf.py b/doc/source/conf.py index e8d87d4c8368c..b0501eaf54dc2 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -569,7 +569,11 @@ def linkcode_resolve(domain, info): return None try: - fn = inspect.getsourcefile(obj) + # inspect.unwrap() was added in Python version 3.4 + if sys.version_info >= (3, 5): + fn = inspect.getsourcefile(inspect.unwrap(obj)) + else: + fn = inspect.getsourcefile(obj) except: fn = None if not fn: From 6202eb08a16db0a38adeb3d576dfb37f951ec62d Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 4 Nov 2018 21:19:00 -0800 Subject: [PATCH 028/122] DOC: Remove dead link and update links to https (#23476) * DOC: Remove dead links and update links to https * Add missing ~ --- doc/source/basics.rst | 4 +-- doc/source/comparison_with_sas.rst | 18 +++++----- doc/source/comparison_with_sql.rst | 4 +-- doc/source/contributing_docstring.rst | 8 ++--- doc/source/cookbook.rst | 34 +++++++++--------- doc/source/dsintro.rst | 12 +++---- doc/source/ecosystem.rst | 28 +++++++-------- doc/source/index.rst.template | 16 ++++----- doc/source/io.rst | 52 +++++++++++++-------------- doc/source/options.rst | 2 +- doc/source/overview.rst | 4 +-- doc/source/release.rst | 18 +++++----- doc/source/tutorials.rst | 6 ++-- 13 files changed, 104 insertions(+), 102 deletions(-) diff --git a/doc/source/basics.rst b/doc/source/basics.rst index 16ab345fd1744..81efbfd6d1403 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -767,7 +767,7 @@ We encourage you to view the source code of :meth:`~DataFrame.pipe`. .. _dplyr: https://github.com/hadley/dplyr .. _magrittr: https://github.com/smbache/magrittr -.. _R: http://www.r-project.org +.. _R: https://www.r-project.org Row or Column-wise Function Application @@ -2296,7 +2296,7 @@ For example, to select ``bool`` columns: df.select_dtypes(include=[bool]) You can also pass the name of a dtype in the `NumPy dtype hierarchy -`__: +`__: .. ipython:: python diff --git a/doc/source/comparison_with_sas.rst b/doc/source/comparison_with_sas.rst index 0354ad473544b..4d7acdf9ab16c 100644 --- a/doc/source/comparison_with_sas.rst +++ b/doc/source/comparison_with_sas.rst @@ -365,8 +365,8 @@ Length ~~~~~~ SAS determines the length of a character string with the -`LENGTHN `__ -and `LENGTHC `__ +`LENGTHN `__ +and `LENGTHC `__ functions. ``LENGTHN`` excludes trailing blanks and ``LENGTHC`` includes trailing blanks. .. code-block:: sas @@ -391,7 +391,7 @@ Find ~~~~ SAS determines the position of a character in a string with the -`FINDW `__ function. +`FINDW `__ function. ``FINDW`` takes the string defined by the first argument and searches for the first position of the substring you supply as the second argument. @@ -417,7 +417,7 @@ Substring ~~~~~~~~~ SAS extracts a substring from a string based on its position with the -`SUBSTR `__ function. +`SUBSTR `__ function. .. code-block:: sas @@ -438,7 +438,7 @@ indexes are zero-based. Scan ~~~~ -The SAS `SCAN `__ +The SAS `SCAN `__ function returns the nth word from a string. The first argument is the string you want to parse and the second argument specifies which word you want to extract. @@ -469,9 +469,9 @@ approaches, but this just shows a simple approach. Upcase, Lowcase, and Propcase ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The SAS `UPCASE `__ -`LOWCASE `__ and -`PROPCASE `__ +The SAS `UPCASE `__ +`LOWCASE `__ and +`PROPCASE `__ functions change the case of the argument. .. code-block:: sas @@ -709,7 +709,7 @@ This means that the size of data able to be loaded in pandas is limited by your machine's memory, but also that the operations on that data may be faster. If out of core processing is needed, one possibility is the -`dask.dataframe `_ +`dask.dataframe `_ library (currently in development) which provides a subset of pandas functionality for an on-disk ``DataFrame`` diff --git a/doc/source/comparison_with_sql.rst b/doc/source/comparison_with_sql.rst index ba069b5a44c72..db143cd586441 100644 --- a/doc/source/comparison_with_sql.rst +++ b/doc/source/comparison_with_sql.rst @@ -4,7 +4,7 @@ Comparison with SQL ******************** Since many potential pandas users have some familiarity with -`SQL `_, this page is meant to provide some examples of how +`SQL `_, this page is meant to provide some examples of how various SQL operations would be performed using pandas. If you're new to pandas, you might want to first read through :ref:`10 Minutes to pandas<10min>` @@ -59,7 +59,7 @@ Filtering in SQL is done via a WHERE clause. LIMIT 5; DataFrames can be filtered in multiple ways; the most intuitive of which is using -`boolean indexing `_. +`boolean indexing `_. .. ipython:: python diff --git a/doc/source/contributing_docstring.rst b/doc/source/contributing_docstring.rst index afb554aeffbc3..38e4baa66ef67 100644 --- a/doc/source/contributing_docstring.rst +++ b/doc/source/contributing_docstring.rst @@ -16,7 +16,7 @@ function or method, so programmers can understand what it does without having to read the details of the implementation. Also, it is a common practice to generate online (html) documentation -automatically from docstrings. `Sphinx `_ serves +automatically from docstrings. `Sphinx `_ serves this purpose. Next example gives an idea on how a docstring looks like: @@ -68,7 +68,7 @@ As PEP-257 is quite open, and some other standards exist on top of it. In the case of pandas, the numpy docstring convention is followed. The conventions is explained in this document: -* `numpydoc docstring guide `_ +* `numpydoc docstring guide `_ (which is based in the original `Guide to NumPy/SciPy documentation `_) @@ -78,7 +78,7 @@ The standard uses reStructuredText (reST). reStructuredText is a markup language that allows encoding styles in plain text files. Documentation about reStructuredText can be found in: -* `Sphinx reStructuredText primer `_ +* `Sphinx reStructuredText primer `_ * `Quick reStructuredText reference `_ * `Full reStructuredText specification `_ @@ -119,7 +119,7 @@ backticks. It is considered inline code: function, prefix it with ``~``. For example, ``:class:`~pandas.Series``` will link to ``pandas.Series`` but only display the last part, ``Series`` as the link text. See `Sphinx cross-referencing syntax - `_ + `_ for details. **Good:** diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst index 21d1f11ba49ba..3d26a9c7d3d54 100644 --- a/doc/source/cookbook.rst +++ b/doc/source/cookbook.rst @@ -52,7 +52,7 @@ Idioms These are some neat pandas ``idioms`` `if-then/if-then-else on one column, and assignment to another one or more columns: -`__ +`__ .. ipython:: python @@ -88,7 +88,7 @@ Or use pandas where after you've set up a mask df.where(df_mask,-1000) `if-then-else using numpy's where() -`__ +`__ .. ipython:: python @@ -101,7 +101,7 @@ Splitting ********* `Split a frame with a boolean criterion -`__ +`__ .. ipython:: python @@ -115,7 +115,7 @@ Building Criteria ***************** `Select with multi-column criteria -`__ +`__ .. ipython:: python @@ -141,7 +141,7 @@ Building Criteria df.loc[(df['BBB'] > 25) | (df['CCC'] >= 75), 'AAA'] = 0.1; df `Select rows with data closest to certain value using argsort -`__ +`__ .. ipython:: python @@ -152,7 +152,7 @@ Building Criteria df.loc[(df.CCC-aValue).abs().argsort()] `Dynamically reduce a list of criteria using a binary operators -`__ +`__ .. ipython:: python @@ -189,7 +189,7 @@ DataFrames The :ref:`indexing ` docs. `Using both row labels and value conditionals -`__ +`__ .. ipython:: python @@ -232,7 +232,7 @@ Ambiguity arises when an index consists of integers with a non-zero start or non df2.loc[1:3] #Label-oriented `Using inverse operator (~) to take the complement of a mask -`__ +`__ .. ipython:: python @@ -259,13 +259,13 @@ Panels pf.loc[:,:,'F'] = pd.DataFrame(data, rng, cols);pf `Mask a panel by using np.where and then reconstructing the panel with the new masked values -`__ +`__ New Columns *********** `Efficiently and dynamically creating new columns using applymap -`__ +`__ .. ipython:: python @@ -279,7 +279,7 @@ New Columns df[new_cols] = df[source_cols].applymap(categories.get);df `Keep other columns when using min() with groupby -`__ +`__ .. ipython:: python @@ -308,7 +308,7 @@ MultiIndexing The :ref:`multindexing ` docs. `Creating a MultiIndex from a labeled frame -`__ +`__ .. ipython:: python @@ -331,7 +331,7 @@ Arithmetic ********** `Performing arithmetic with a MultiIndex that needs broadcasting -`__ +`__ .. ipython:: python @@ -343,7 +343,7 @@ Slicing ******* `Slicing a MultiIndex with xs -`__ +`__ .. ipython:: python @@ -364,7 +364,7 @@ To take the cross section of the 1st level and 1st axis the index: df.xs('six',level=1,axis=0) `Slicing a MultiIndex with xs, method #2 -`__ +`__ .. ipython:: python @@ -387,13 +387,13 @@ To take the cross section of the 1st level and 1st axis the index: df.loc[(All,'Math'),(All,'II')] `Setting portions of a MultiIndex with xs -`__ +`__ Sorting ******* `Sort by specific column or an ordered list of columns, with a MultiIndex -`__ +`__ .. ipython:: python diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst index efa52a6f7cfe2..d02912294060c 100644 --- a/doc/source/dsintro.rst +++ b/doc/source/dsintro.rst @@ -249,7 +249,7 @@ pandas object. Like Series, DataFrame accepts many different kinds of input: * Dict of 1D ndarrays, lists, dicts, or Series * 2-D numpy.ndarray * `Structured or record - `__ ndarray + `__ ndarray * A ``Series`` * Another ``DataFrame`` @@ -476,7 +476,7 @@ Assigning New Columns in Method Chains ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Inspired by `dplyr's -`__ +`__ ``mutate`` verb, DataFrame has an :meth:`~pandas.DataFrame.assign` method that allows you to easily create new columns that are potentially derived from existing columns. @@ -815,7 +815,7 @@ accessed like an attribute: df df.foo1 -The columns are also connected to the `IPython `__ +The columns are also connected to the `IPython `__ completion mechanism so they can be tab-completed: .. code-block:: ipython @@ -834,7 +834,7 @@ Panel a future version. See the section :ref:`Deprecate Panel `. Panel is a somewhat less-used, but still important container for 3-dimensional -data. The term `panel data `__ is +data. The term `panel data `__ is derived from econometrics and is partially responsible for the name pandas: pan(el)-da(ta)-s. The names for the 3 axes are intended to give some semantic meaning to describing operations involving panel data and, in particular, @@ -1024,7 +1024,7 @@ Oftentimes, one can simply use a MultiIndex ``DataFrame`` for easily working wit In addition, the ``xarray`` package was built from the ground up, specifically in order to support the multi-dimensional analysis that is one of ``Panel`` s main use cases. -`Here is a link to the xarray panel-transition documentation `__. +`Here is a link to the xarray panel-transition documentation `__. .. ipython:: python :okwarning: @@ -1046,4 +1046,4 @@ Alternatively, one can convert to an xarray ``DataArray``. p.to_xarray() -You can see the full-documentation for the `xarray package `__. +You can see the full-documentation for the `xarray package `__. diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst index 7fffcadd8ee8c..edbd6629a617d 100644 --- a/doc/source/ecosystem.rst +++ b/doc/source/ecosystem.rst @@ -27,8 +27,8 @@ substantial projects that you feel should be on this list, please let us know. Statistics and Machine Learning ------------------------------- -`Statsmodels `__ -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +`Statsmodels `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Statsmodels is the prominent Python "statistics and econometrics library" and it has a long-standing special relationship with pandas. Statsmodels provides powerful statistics, @@ -38,7 +38,7 @@ Statsmodels leverages pandas objects as the underlying data container for comput `sklearn-pandas `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Use pandas DataFrames in your `scikit-learn `__ +Use pandas DataFrames in your `scikit-learn `__ ML pipeline. `Featuretools `__ @@ -62,8 +62,8 @@ simplicity produces beautiful and effective visualizations with a minimal amount of code. Altair works with Pandas DataFrames. -`Bokeh `__ -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +`Bokeh `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bokeh is a Python interactive visualization library for large datasets that natively uses the latest web technologies. Its goal is to provide elegant, concise construction of novel @@ -74,7 +74,7 @@ large data to thin clients. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Seaborn is a Python visualization library based on -`matplotlib `__. It provides a high-level, dataset-oriented +`matplotlib `__. It provides a high-level, dataset-oriented interface for creating attractive statistical graphics. The plotting functions in seaborn understand pandas objects and leverage pandas grouping operations internally to support concise specification of complex visualizations. Seaborn @@ -85,8 +85,8 @@ fit of statistical models to emphasize patterns in a dataset. `yhat/ggpy `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Hadley Wickham's `ggplot2 `__ is a foundational exploratory visualization package for the R language. -Based on `"The Grammar of Graphics" `__ it +Hadley Wickham's `ggplot2 `__ is a foundational exploratory visualization package for the R language. +Based on `"The Grammar of Graphics" `__ it provides a powerful, declarative and extremely general way to generate bespoke plots of any kind of data. It's really quite incredible. Various implementations to other languages are available, but a faithful implementation for Python users has long been missing. Although still young @@ -102,7 +102,7 @@ progressing quickly in that direction. `Plotly `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -`Plotly’s `__ `Python API `__ enables interactive figures and web shareability. Maps, 2D, 3D, and live-streaming graphs are rendered with WebGL and `D3.js `__. The library supports plotting directly from a pandas DataFrame and cloud-based collaboration. Users of `matplotlib, ggplot for Python, and Seaborn `__ can convert figures into interactive web-based plots. Plots can be drawn in `IPython Notebooks `__ , edited with R or MATLAB, modified in a GUI, or embedded in apps and dashboards. Plotly is free for unlimited sharing, and has `cloud `__, `offline `__, or `on-premise `__ accounts for private use. +`Plotly’s `__ `Python API `__ enables interactive figures and web shareability. Maps, 2D, 3D, and live-streaming graphs are rendered with WebGL and `D3.js `__. The library supports plotting directly from a pandas DataFrame and cloud-based collaboration. Users of `matplotlib, ggplot for Python, and Seaborn `__ can convert figures into interactive web-based plots. Plots can be drawn in `IPython Notebooks `__ , edited with R or MATLAB, modified in a GUI, or embedded in apps and dashboards. Plotly is free for unlimited sharing, and has `cloud `__, `offline `__, or `on-premise `__ accounts for private use. `QtPandas `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -116,8 +116,8 @@ library enables DataFrame visualization and manipulation in PyQt4 and PySide app IDE ------ -`IPython `__ -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +`IPython `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IPython is an interactive command shell and distributed computing environment. IPython tab completion works with Pandas methods and also @@ -221,7 +221,7 @@ This package requires valid credentials for this API (non free). ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pandaSDMX is a library to retrieve and acquire statistical data and metadata disseminated in -`SDMX `_ 2.1, an ISO-standard +`SDMX `_ 2.1, an ISO-standard widely used by institutions such as statistics offices, central banks, and international organisations. pandaSDMX can expose datasets and related structural metadata including data flows, code-lists, @@ -230,7 +230,7 @@ or MultiIndexed DataFrames. `fredapi `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -fredapi is a Python interface to the `Federal Reserve Economic Data (FRED) `__ +fredapi is a Python interface to the `Federal Reserve Economic Data (FRED) `__ provided by the Federal Reserve Bank of St. Louis. It works with both the FRED database and ALFRED database that contains point-in-time data (i.e. historic data revisions). fredapi provides a wrapper in Python to the FRED HTTP API, and also provides several convenient methods for parsing and analyzing point-in-time data from ALFRED. @@ -316,7 +316,7 @@ Increasingly, packages are being built on top of pandas to address specific need Data validation --------------- -`Engarde `__ +`Engarde `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Engarde is a lightweight library used to explicitly state your assumptions about your datasets diff --git a/doc/source/index.rst.template b/doc/source/index.rst.template index f5ac7b77f4db1..d2b88e794e51e 100644 --- a/doc/source/index.rst.template +++ b/doc/source/index.rst.template @@ -14,15 +14,15 @@ pandas: powerful Python data analysis toolkit **Binary Installers:** https://pypi.org/project/pandas -**Source Repository:** http://github.com/pandas-dev/pandas +**Source Repository:** https://github.com/pandas-dev/pandas **Issues & Ideas:** https://github.com/pandas-dev/pandas/issues -**Q&A Support:** http://stackoverflow.com/questions/tagged/pandas +**Q&A Support:** https://stackoverflow.com/questions/tagged/pandas -**Developer Mailing List:** http://groups.google.com/group/pydata +**Developer Mailing List:** https://groups.google.com/forum/#!forum/pydata -**pandas** is a `Python `__ package providing fast, +**pandas** is a `Python `__ package providing fast, flexible, and expressive data structures designed to make working with "relational" or "labeled" data both easy and intuitive. It aims to be the fundamental high-level building block for doing practical, **real world** data @@ -45,7 +45,7 @@ and :class:`DataFrame` (2-dimensional), handle the vast majority of typical use cases in finance, statistics, social science, and many areas of engineering. For R users, :class:`DataFrame` provides everything that R's ``data.frame`` provides and much more. pandas is built on top of `NumPy -`__ and is intended to integrate well within a scientific +`__ and is intended to integrate well within a scientific computing environment with many other 3rd party libraries. Here are just a few of the things that pandas does well: @@ -86,13 +86,13 @@ is the ideal tool for all of these tasks. Some other notes - pandas is **fast**. Many of the low-level algorithmic bits have been - extensively tweaked in `Cython `__ code. However, as with + extensively tweaked in `Cython `__ code. However, as with anything else generalization usually sacrifices performance. So if you focus on one feature for your application you may be able to create a faster specialized tool. - pandas is a dependency of `statsmodels - `__, making it an important part of the + `__, making it an important part of the statistical computing ecosystem in Python. - pandas has been used extensively in production in financial applications. @@ -101,7 +101,7 @@ Some other notes This documentation assumes general familiarity with NumPy. If you haven't used NumPy much or at all, do invest some time in `learning about NumPy - `__ first. + `__ first. See the package overview for more detail about what's in the library. diff --git a/doc/source/io.rst b/doc/source/io.rst index 56da4dbea8706..68faefa872c88 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -40,14 +40,14 @@ The pandas I/O API is a set of top level ``reader`` functions accessed like :delim: ; text;`CSV `__;:ref:`read_csv`;:ref:`to_csv` - text;`JSON `__;:ref:`read_json`;:ref:`to_json` + text;`JSON `__;:ref:`read_json`;:ref:`to_json` text;`HTML `__;:ref:`read_html`;:ref:`to_html` text; Local clipboard;:ref:`read_clipboard`;:ref:`to_clipboard` binary;`MS Excel `__;:ref:`read_excel`;:ref:`to_excel` binary;`HDF5 Format `__;:ref:`read_hdf`;:ref:`to_hdf` binary;`Feather Format `__;:ref:`read_feather`;:ref:`to_feather` binary;`Parquet Format `__;:ref:`read_parquet`;:ref:`to_parquet` - binary;`Msgpack `__;:ref:`read_msgpack`;:ref:`to_msgpack` + binary;`Msgpack `__;:ref:`read_msgpack`;:ref:`to_msgpack` binary;`Stata `__;:ref:`read_stata`;:ref:`to_stata` binary;`SAS `__;:ref:`read_sas`; binary;`Python Pickle Format `__;:ref:`read_pickle`;:ref:`to_pickle` @@ -2273,7 +2273,7 @@ indicate missing values and the subsequent read cannot distinguish the intent. new_df = pd.read_json('test.json', orient='table') print(new_df.index.name) -.. _Table Schema: http://specs.frictionlessdata.io/json-table-schema/ +.. _Table Schema: https://specs.frictionlessdata.io/json-table-schema/ HTML ---- @@ -2301,7 +2301,7 @@ Read a URL with no options: .. ipython:: python - url = 'http://www.fdic.gov/bank/individual/failed/banklist.html' + url = 'https://www.fdic.gov/bank/individual/failed/banklist.html' dfs = pd.read_html(url) dfs @@ -2341,7 +2341,7 @@ You can even pass in an instance of ``StringIO`` if you so desire: that having so many network-accessing functions slows down the documentation build. If you spot an error or an example that doesn't run, please do not hesitate to report it over on `pandas GitHub issues page - `__. + `__. Read a URL and match a table that contains specific text: @@ -2650,16 +2650,16 @@ parse HTML tables in the top-level pandas io function ``read_html``. .. |svm| replace:: **strictly valid markup** -.. _svm: http://validator.w3.org/docs/help.html#validation_basics +.. _svm: https://validator.w3.org/docs/help.html#validation_basics .. |html5lib| replace:: **html5lib** .. _html5lib: https://github.com/html5lib/html5lib-python .. |BeautifulSoup4| replace:: **BeautifulSoup4** -.. _BeautifulSoup4: http://www.crummy.com/software/BeautifulSoup +.. _BeautifulSoup4: https://www.crummy.com/software/BeautifulSoup .. |lxml| replace:: **lxml** -.. _lxml: http://lxml.de +.. _lxml: https://lxml.de @@ -3143,10 +3143,10 @@ any pickled pandas object (or any other pickled object) from file: for such cases, pickled ``DataFrames``, ``Series`` etc, must be read with ``pd.read_pickle``, rather than ``pickle.load``. - See `here `__ - and `here `__ + See `here `__ + and `here `__ for some examples of compatibility-breaking changes. See - `this question `__ + `this question `__ for a detailed explanation. .. _io.pickle.compression: @@ -3294,7 +3294,7 @@ HDF5 (PyTables) ``HDFStore`` is a dict-like object which reads and writes pandas using the high performance HDF5 format using the excellent `PyTables -`__ library. See the :ref:`cookbook ` +`__ library. See the :ref:`cookbook ` for some advanced strategies .. warning:: @@ -3878,7 +3878,7 @@ Then create the index when finished appending. os.remove('appends.h5') -See `here `__ for how to create a completely-sorted-index (CSI) on an existing store. +See `here `__ for how to create a completely-sorted-index (CSI) on an existing store. .. _io.hdf5-query-data-columns: @@ -4151,8 +4151,8 @@ control compression: ``complevel`` and ``complib``. compression to choose depends on your specific needs and data. The list of supported compression libraries: - - `zlib `_: The default compression library. A classic in terms of compression, achieves good compression rates but is somewhat slow. - - `lzo `_: Fast compression and decompression. + - `zlib `_: The default compression library. A classic in terms of compression, achieves good compression rates but is somewhat slow. + - `lzo `_: Fast compression and decompression. - `bzip2 `_: Good compression rates. - `blosc `_: Fast compression and decompression. @@ -4171,7 +4171,7 @@ control compression: ``complevel`` and ``complib``. compression ratios at the expense of speed. - `blosc:snappy `_: A popular compressor used in many places. - - `blosc:zlib `_: A classic; + - `blosc:zlib `_: A classic; somewhat slower than the previous ones, but achieving better compression ratios. - `blosc:zstd `_: An @@ -4372,7 +4372,7 @@ tables. It is possible to write an ``HDFStore`` object that can easily be imported into ``R`` using the ``rhdf5`` library (`Package website`_). Create a table format store like this: -.. _package website: http://www.bioconductor.org/packages/release/bioc/html/rhdf5.html +.. _package website: https://www.bioconductor.org/packages/release/bioc/html/rhdf5.html .. ipython:: python @@ -4471,7 +4471,7 @@ Performance * A ``PerformanceWarning`` will be raised if you are attempting to store types that will be pickled by PyTables (rather than stored as endemic types). See - `Here `__ + `Here `__ for more information and some solutions. @@ -4579,7 +4579,7 @@ You can specify an ``engine`` to direct the serialization. This can be one of `` If the engine is NOT specified, then the ``pd.options.io.parquet.engine`` option is checked; if this is also ``auto``, then ``pyarrow`` is tried, and falling back to ``fastparquet``. -See the documentation for `pyarrow `__ and `fastparquet `__. +See the documentation for `pyarrow `__ and `fastparquet `__. .. note:: @@ -4681,13 +4681,13 @@ for PostgreSQL or `pymysql `__ for MySQL. For `SQLite `__ this is included in Python's standard library by default. You can find an overview of supported drivers for each SQL dialect in the -`SQLAlchemy docs `__. +`SQLAlchemy docs `__. If SQLAlchemy is not installed, a fallback is only provided for sqlite (and for mysql for backwards compatibility, but this is deprecated and will be removed in a future version). This mode requires a Python database adapter which respect the `Python -DB-API `__. +DB-API `__. See also some :ref:`cookbook examples ` for some advanced strategies. @@ -4709,7 +4709,7 @@ The key functions are: the provided input (database table name or sql query). Table names do not need to be quoted if they have special characters. -In the following example, we use the `SQlite `__ SQL database +In the following example, we use the `SQlite `__ SQL database engine. You can use a temporary SQLite database where data are stored in "memory". @@ -4717,7 +4717,7 @@ To connect with SQLAlchemy you use the :func:`create_engine` function to create object from database URI. You only need to create the engine once per database you are connecting to. For more information on :func:`create_engine` and the URI formatting, see the examples -below and the SQLAlchemy `documentation `__ +below and the SQLAlchemy `documentation `__ .. ipython:: python @@ -4930,7 +4930,7 @@ connecting to. # or absolute, starting with a slash: engine = create_engine('sqlite:////absolute/path/to/foo.db') -For more information see the examples the SQLAlchemy `documentation `__ +For more information see the examples the SQLAlchemy `documentation `__ Advanced SQLAlchemy queries @@ -4975,7 +4975,7 @@ Sqlite fallback The use of sqlite is supported without using SQLAlchemy. This mode requires a Python database adapter which respect the `Python -DB-API `__. +DB-API `__. You can create connections like so: @@ -5233,7 +5233,7 @@ xarray_ provides data structures inspired by the pandas ``DataFrame`` for workin with multi-dimensional datasets, with a focus on the netCDF file format and easy conversion to and from pandas. -.. _xarray: http://xarray.pydata.org/ +.. _xarray: https://xarray.pydata.org/ .. _io.perf: diff --git a/doc/source/options.rst b/doc/source/options.rst index cbe0264f442bc..dc4d0da32008c 100644 --- a/doc/source/options.rst +++ b/doc/source/options.rst @@ -137,7 +137,7 @@ Using startup scripts for the python/ipython environment to import pandas and se $IPYTHONDIR/profile_default/startup More information can be found in the `ipython documentation -`__. An example startup script for pandas is displayed below: +`__. An example startup script for pandas is displayed below: .. code-block:: python diff --git a/doc/source/overview.rst b/doc/source/overview.rst index 6ba9501ba0b5e..b71f4bfa2f3be 100644 --- a/doc/source/overview.rst +++ b/doc/source/overview.rst @@ -82,7 +82,7 @@ Getting Support The first stop for pandas issues and ideas is the `Github Issue Tracker `__. If you have a general question, pandas community experts can answer through `Stack Overflow -`__. +`__. Community --------- @@ -92,7 +92,7 @@ the world who contribute their valuable time and energy to help make open source pandas possible. Thanks to `all of our contributors `__. If you're interested in contributing, please -visit `Contributing to pandas webpage `__. +visit `Contributing to pandas webpage `__. pandas is a `NumFOCUS `__ sponsored project. This will help ensure the success of development of pandas as a world-class open-source diff --git a/doc/source/release.rst b/doc/source/release.rst index cd04288dce2c2..af6fc23e12b78 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -20,7 +20,7 @@ Release Notes ************* This is the list of changes to pandas between each release. For full details, -see the commit logs at http://github.com/pandas-dev/pandas +see the commit logs at https://github.com/pandas-dev/pandas **What is it** @@ -33,9 +33,9 @@ analysis / manipulation tool available in any language. **Where to get it** -* Source code: http://github.com/pandas-dev/pandas +* Source code: https://github.com/pandas-dev/pandas * Binary installers on PyPI: https://pypi.org/project/pandas -* Documentation: http://pandas.pydata.org +* Documentation: https://pandas.pydata.org pandas 0.23.2 ------------- @@ -586,7 +586,7 @@ Highlights include: - Integration with `Apache Parquet `__, including a new top-level :func:`read_parquet` function and :meth:`DataFrame.to_parquet` method, see :ref:`here `. - New user-facing :class:`pandas.api.types.CategoricalDtype` for specifying categoricals independent of the data, see :ref:`here `. -- The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames is now consistent and no longer depends on whether `bottleneck `__ is installed, and ``sum`` and ``prod`` on empty Series now return NaN instead of 0, see :ref:`here `. +- The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames is now consistent and no longer depends on whether `bottleneck `__ is installed, and ``sum`` and ``prod`` on empty Series now return NaN instead of 0, see :ref:`here `. - Compatibility fixes for pypy, see :ref:`here `. - Additions to the ``drop``, ``reindex`` and ``rename`` API to make them more consistent, see :ref:`here `. - Addition of the new methods ``DataFrame.infer_objects`` (see :ref:`here `) and ``GroupBy.pipe`` (see :ref:`here `). @@ -1171,7 +1171,7 @@ Highlights include: - Sparse data structures gained enhanced support of ``int`` and ``bool`` dtypes, see :ref:`here ` - Comparison operations with ``Series`` no longer ignores the index, see :ref:`here ` for an overview of the API changes. - Introduction of a pandas development API for utility functions, see :ref:`here `. -- Deprecation of ``Panel4D`` and ``PanelND``. We recommend to represent these types of n-dimensional data with the `xarray package `__. +- Deprecation of ``Panel4D`` and ``PanelND``. We recommend to represent these types of n-dimensional data with the `xarray package `__. - Removal of the previously deprecated modules ``pandas.io.data``, ``pandas.io.wb``, ``pandas.tools.rplot``. See the :ref:`v0.19.0 Whatsnew ` overview for an extensive list @@ -1402,7 +1402,7 @@ Highlights include: - Removal of support for positional indexing with floats, which was deprecated since 0.14.0. This will now raise a ``TypeError``, see :ref:`here `. - The ``.to_xarray()`` function has been added for compatibility with the - `xarray package `__, see :ref:`here `. + `xarray package `__, see :ref:`here `. - The ``read_sas`` function has been enhanced to read ``sas7bdat`` files, see :ref:`here `. - Addition of the :ref:`.str.extractall() method `, and API changes to the :ref:`.str.extract() method ` @@ -1757,7 +1757,7 @@ along with several new features, enhancements, and performance improvements. Highlights include: - A new ``pipe`` method, see :ref:`here ` -- Documentation on how to use `numba `_ with *pandas*, see :ref:`here ` +- Documentation on how to use `numba `_ with *pandas*, see :ref:`here ` See the :ref:`v0.16.2 Whatsnew ` overview for an extensive list of all enhancements and bugs that have been fixed in 0.16.2. @@ -1889,9 +1889,9 @@ Highlights include: - Changes to the default for ordering in the ``Categorical`` constructor, see :ref:`here ` - The ``pandas.tools.rplot``, ``pandas.sandbox.qtpandas`` and ``pandas.rpy`` modules are deprecated. We refer users to external packages like - `seaborn `_, + `seaborn `_, `pandas-qt `_ and - `rpy2 `_ for similar or equivalent + `rpy2 `_ for similar or equivalent functionality, see :ref:`here ` See the :ref:`v0.16.0 Whatsnew ` overview or the issue tracker on GitHub for an extensive list diff --git a/doc/source/tutorials.rst b/doc/source/tutorials.rst index 381031fa128e6..83c891c0c0e40 100644 --- a/doc/source/tutorials.rst +++ b/doc/source/tutorials.rst @@ -7,7 +7,7 @@ Tutorials This is a guide to many pandas tutorials, geared mainly for new users. Internal Guides ---------------- +=============== pandas' own :ref:`10 Minutes to pandas<10min>`. @@ -15,6 +15,9 @@ More complex recipes are in the :ref:`Cookbook`. A handy pandas `cheat sheet `_. +Community Guides +================ + pandas Cookbook --------------- @@ -200,6 +203,5 @@ Various Tutorials * `Financial analysis in Python, by Thomas Wiecki `_ * `Intro to pandas data structures, by Greg Reda `_ * `Pandas and Python: Top 10, by Manish Amde `_ -* `Pandas Tutorial, by Mikhail Semeniuk `_ * `Pandas DataFrames Tutorial, by Karlijn Willems `_ * `A concise tutorial with real life examples `_ From 58dc180eb339f712fd16740b570454b483a1c86f Mon Sep 17 00:00:00 2001 From: Thierry Moisan Date: Mon, 5 Nov 2018 03:15:05 -0500 Subject: [PATCH 029/122] DOC: Fix DataFrame.nlargest and DataFrame.nsmallest doctests (#23202) --- ci/code_checks.sh | 2 +- pandas/core/frame.py | 189 ++++++++++++++++++++++++------------------- 2 files changed, 105 insertions(+), 86 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 26b7eaca87a04..2e42912965f97 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -131,7 +131,7 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then MSG='Doctests frame.py' ; echo $MSG pytest -q --doctest-modules pandas/core/frame.py \ - -k"-axes -combine -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_stata" + -k"-axes -combine -itertuples -join -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_stata" RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Doctests series.py' ; echo $MSG diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f3fd924ee7e6e..7aadf7e735f38 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4694,60 +4694,63 @@ def nlargest(self, n, columns, keep='first'): Examples -------- - >>> df = pd.DataFrame({'a': [1, 10, 8, 11, 8, 2], - ... 'b': list('abdcef'), - ... 'c': [1.0, 2.0, np.nan, 3.0, 4.0, 9.0]}) + >>> df = pd.DataFrame({'population': [59000000, 65000000, 434000, + ... 434000, 434000, 337000, 11300, + ... 11300, 11300], + ... 'GDP': [1937894, 2583560 , 12011, 4520, 12128, + ... 17036, 182, 38, 311], + ... 'alpha-2': ["IT", "FR", "MT", "MV", "BN", + ... "IS", "NR", "TV", "AI"]}, + ... index=["Italy", "France", "Malta", + ... "Maldives", "Brunei", "Iceland", + ... "Nauru", "Tuvalu", "Anguilla"]) >>> df - a b c - 0 1 a 1.0 - 1 10 b 2.0 - 2 8 d NaN - 3 11 c 3.0 - 4 8 e 4.0 - 5 2 f 9.0 + population GDP alpha-2 + Italy 59000000 1937894 IT + France 65000000 2583560 FR + Malta 434000 12011 MT + Maldives 434000 4520 MV + Brunei 434000 12128 BN + Iceland 337000 17036 IS + Nauru 11300 182 NR + Tuvalu 11300 38 TV + Anguilla 11300 311 AI In the following example, we will use ``nlargest`` to select the three - rows having the largest values in column "a". + rows having the largest values in column "population". - >>> df.nlargest(3, 'a') - a b c - 3 11 c 3.0 - 1 10 b 2.0 - 2 8 d NaN + >>> df.nlargest(3, 'population') + population GDP alpha-2 + France 65000000 2583560 FR + Italy 59000000 1937894 IT + Malta 434000 12011 MT When using ``keep='last'``, ties are resolved in reverse order: - >>> df.nlargest(3, 'a', keep='last') - a b c - 3 11 c 3.0 - 1 10 b 2.0 - 4 8 e 4.0 + >>> df.nlargest(3, 'population', keep='last') + population GDP alpha-2 + France 65000000 2583560 FR + Italy 59000000 1937894 IT + Brunei 434000 12128 BN When using ``keep='all'``, all duplicate items are maintained: - >>> df.nlargest(3, 'a', keep='all') - a b c - 3 11 c 3.0 - 1 10 b 2.0 - 2 8 d NaN - 4 8 e 4.0 + >>> df.nlargest(3, 'population', keep='all') + population GDP alpha-2 + France 65000000 2583560 FR + Italy 59000000 1937894 IT + Malta 434000 12011 MT + Maldives 434000 4520 MV + Brunei 434000 12128 BN - To order by the largest values in column "a" and then "c", we can - specify multiple columns like in the next example. - - >>> df.nlargest(3, ['a', 'c']) - a b c - 4 8 e 4.0 - 3 11 c 3.0 - 1 10 b 2.0 - - Attempting to use ``nlargest`` on non-numeric dtypes will raise a - ``TypeError``: - - >>> df.nlargest(3, 'b') + To order by the largest values in column "population" and then "GDP", + we can specify multiple columns like in the next example. - Traceback (most recent call last): - TypeError: Column 'b' has dtype object, cannot use method 'nlargest' + >>> df.nlargest(3, ['population', 'GDP']) + population GDP alpha-2 + France 65000000 2583560 FR + Italy 59000000 1937894 IT + Brunei 434000 12128 BN """ return algorithms.SelectNFrame(self, n=n, @@ -4755,15 +4758,23 @@ def nlargest(self, n, columns, keep='first'): columns=columns).nlargest() def nsmallest(self, n, columns, keep='first'): - """Get the rows of a DataFrame sorted by the `n` smallest - values of `columns`. + """ + Return the first `n` rows ordered by `columns` in ascending order. + + Return the first `n` rows with the smallest values in `columns`, in + ascending order. The columns that are not specified are returned as + well, but not used for ordering. + + This method is equivalent to + ``df.sort_values(columns, ascending=True).head(n)``, but more + performant. Parameters ---------- n : int - Number of items to retrieve + Number of items to retrieve. columns : list or str - Column name or names to order by + Column name or names to order by. keep : {'first', 'last', 'all'}, default 'first' Where there are duplicate values: @@ -4778,62 +4789,70 @@ def nsmallest(self, n, columns, keep='first'): ------- DataFrame + See Also + -------- + DataFrame.nlargest : Return the first `n` rows ordered by `columns` in + descending order. + DataFrame.sort_values : Sort DataFrame by the values. + DataFrame.head : Return the first `n` rows without re-ordering. + Examples -------- - >>> df = pd.DataFrame({'a': [1, 10, 8, 11, 8, 2], - ... 'b': list('abdcef'), - ... 'c': [1.0, 2.0, np.nan, 3.0, 4.0, 9.0]}) + >>> df = pd.DataFrame({'population': [59000000, 65000000, 434000, + ... 434000, 434000, 337000, 11300, + ... 11300, 11300], + ... 'GDP': [1937894, 2583560 , 12011, 4520, 12128, + ... 17036, 182, 38, 311], + ... 'alpha-2': ["IT", "FR", "MT", "MV", "BN", + ... "IS", "NR", "TV", "AI"]}, + ... index=["Italy", "France", "Malta", + ... "Maldives", "Brunei", "Iceland", + ... "Nauru", "Tuvalu", "Anguilla"]) >>> df - a b c - 0 1 a 1.0 - 1 10 b 2.0 - 2 8 d NaN - 3 11 c 3.0 - 4 8 e 4.0 - 5 2 f 9.0 + population GDP alpha-2 + Italy 59000000 1937894 IT + France 65000000 2583560 FR + Malta 434000 12011 MT + Maldives 434000 4520 MV + Brunei 434000 12128 BN + Iceland 337000 17036 IS + Nauru 11300 182 NR + Tuvalu 11300 38 TV + Anguilla 11300 311 AI In the following example, we will use ``nsmallest`` to select the three rows having the smallest values in column "a". - >>> df.nsmallest(3, 'a') - a b c - 0 1 a 1.0 - 5 2 f 9.0 - 2 8 d NaN + >>> df.nsmallest(3, 'population') + population GDP alpha-2 + Nauru 11300 182 NR + Tuvalu 11300 38 TV + Anguilla 11300 311 AI When using ``keep='last'``, ties are resolved in reverse order: - >>> df.nsmallest(3, 'a', keep='last') - a b c - 0 1 a 1.0 - 5 2 f 9.0 - 4 8 e 4.0 + >>> df.nsmallest(3, 'population', keep='last') + population GDP alpha-2 + Anguilla 11300 311 AI + Tuvalu 11300 38 TV + Nauru 11300 182 NR When using ``keep='all'``, all duplicate items are maintained: - >>> df.nsmallest(3, 'a', keep='all') - a b c - 0 1 a 1.0 - 5 2 f 9.0 - 2 8 d NaN - 4 8 e 4.0 + >>> df.nsmallest(3, 'population', keep='all') + population GDP alpha-2 + Nauru 11300 182 NR + Tuvalu 11300 38 TV + Anguilla 11300 311 AI To order by the largest values in column "a" and then "c", we can specify multiple columns like in the next example. - >>> df.nsmallest(3, ['a', 'c']) - a b c - 0 1 a 1.0 - 5 2 f 9.0 - 4 8 e 4.0 - - Attempting to use ``nsmallest`` on non-numeric dtypes will raise a - ``TypeError``: - - >>> df.nsmallest(3, 'b') - - Traceback (most recent call last): - TypeError: Column 'b' has dtype object, cannot use method 'nsmallest' + >>> df.nsmallest(3, ['population', 'GDP']) + population GDP alpha-2 + Tuvalu 11300 38 TV + Nauru 11300 182 NR + Anguilla 11300 311 AI """ return algorithms.SelectNFrame(self, n=n, From 55e2f3d5ef10cb7482ab79e1a6ffc272afe0cfad Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 5 Nov 2018 02:19:28 -0800 Subject: [PATCH 030/122] DOC: Fix syntax error in groupby docs (#23498) Follow-up to gh-23394. --- doc/source/groupby.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index 0a896bac0f2d7..17a723e2a2f42 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -133,7 +133,8 @@ but the specified columns .. ipython:: python df2 = df.set_index(['A', 'B']) - grouped = df2.groupby(level=df2.index.names.difference(['B']) + grouped = df2.groupby(level=df2.index.names.difference(['B'])) + grouped.sum() These will split the DataFrame on its index (rows). We could also split by the columns: From 5e0969850b49f9e32f8e3a54cf50eb295a570bdf Mon Sep 17 00:00:00 2001 From: Thein Oo Date: Mon, 5 Nov 2018 13:31:33 -0500 Subject: [PATCH 031/122] Run Isort on tests-> util,sereis,arrays (#23501) --- pandas/tests/arrays/categorical/test_missing.py | 6 ++++-- pandas/tests/arrays/categorical/test_sorting.py | 2 +- pandas/tests/arrays/test_datetimelike.py | 5 ++--- pandas/tests/arrays/test_integer.py | 8 ++++---- pandas/tests/series/test_duplicates.py | 5 ++--- pandas/tests/util/test_hashing.py | 2 +- pandas/tests/util/test_testing.py | 8 ++++---- pandas/tests/util/test_util.py | 12 ++++++------ setup.cfg | 11 ----------- 9 files changed, 24 insertions(+), 35 deletions(-) diff --git a/pandas/tests/arrays/categorical/test_missing.py b/pandas/tests/arrays/categorical/test_missing.py index c78f02245a5b4..32698d190d93c 100644 --- a/pandas/tests/arrays/categorical/test_missing.py +++ b/pandas/tests/arrays/categorical/test_missing.py @@ -4,11 +4,13 @@ import numpy as np import pytest -import pandas.util.testing as tm -from pandas import Categorical, Index, isna from pandas.compat import lrange + from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas import Categorical, Index, isna +import pandas.util.testing as tm + class TestCategoricalMissing(object): diff --git a/pandas/tests/arrays/categorical/test_sorting.py b/pandas/tests/arrays/categorical/test_sorting.py index 88edb6c8f1348..922d9fdb788b1 100644 --- a/pandas/tests/arrays/categorical/test_sorting.py +++ b/pandas/tests/arrays/categorical/test_sorting.py @@ -2,8 +2,8 @@ import numpy as np -import pandas.util.testing as tm from pandas import Categorical, Index +import pandas.util.testing as tm class TestCategoricalSort(object): diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 3d5c810402fba..3fd03a351de7c 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -3,10 +3,9 @@ import pytest import pandas as pd -import pandas.util.testing as tm from pandas.core.arrays import ( - DatetimeArrayMixin, PeriodArray, TimedeltaArrayMixin -) + DatetimeArrayMixin, PeriodArray, TimedeltaArrayMixin) +import pandas.util.testing as tm # TODO: more freq variants diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 41ec2d3026499..ec627c2789d8f 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -2,16 +2,16 @@ import numpy as np import pytest +from pandas.core.dtypes.generic import ABCIndexClass + import pandas as pd -import pandas.util.testing as tm from pandas.api.types import is_float, is_float_dtype, is_integer, is_scalar from pandas.core.arrays import IntegerArray, integer_array from pandas.core.arrays.integer import ( Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype, UInt8Dtype, UInt16Dtype, - UInt32Dtype, UInt64Dtype -) -from pandas.core.dtypes.generic import ABCIndexClass + UInt32Dtype, UInt64Dtype) from pandas.tests.extension.base import BaseOpsUtil +import pandas.util.testing as tm def make_data(): diff --git a/pandas/tests/series/test_duplicates.py b/pandas/tests/series/test_duplicates.py index 735ecd3917a1b..f41483405f6cc 100644 --- a/pandas/tests/series/test_duplicates.py +++ b/pandas/tests/series/test_duplicates.py @@ -1,10 +1,9 @@ # coding=utf-8 -import pytest - import numpy as np +import pytest -from pandas import Series, Categorical +from pandas import Categorical, Series import pandas.util.testing as tm diff --git a/pandas/tests/util/test_hashing.py b/pandas/tests/util/test_hashing.py index 49f8fa30ecb6a..11dd2e98adda2 100644 --- a/pandas/tests/util/test_hashing.py +++ b/pandas/tests/util/test_hashing.py @@ -4,10 +4,10 @@ import pytest import pandas as pd -import pandas.util.testing as tm from pandas import DataFrame, Index, MultiIndex, Series from pandas.core.util.hashing import _hash_scalar, hash_tuple, hash_tuples from pandas.util import hash_array, hash_pandas_object +import pandas.util.testing as tm class TestHashing(object): diff --git a/pandas/tests/util/test_testing.py b/pandas/tests/util/test_testing.py index db2fc5ec868c6..d1dc91f94e3c4 100644 --- a/pandas/tests/util/test_testing.py +++ b/pandas/tests/util/test_testing.py @@ -6,14 +6,14 @@ import numpy as np import pytest -import pandas as pd import pandas.util._test_decorators as td -import pandas.util.testing as tm + +import pandas as pd from pandas import DataFrame, Series, compat +import pandas.util.testing as tm from pandas.util.testing import ( RNGContext, assert_almost_equal, assert_frame_equal, assert_index_equal, - assert_numpy_array_equal, assert_series_equal, raise_with_traceback -) + assert_numpy_array_equal, assert_series_equal, raise_with_traceback) class TestAssertAlmostEqual(object): diff --git a/pandas/tests/util/test_util.py b/pandas/tests/util/test_util.py index 61b3cc526d6d9..032ee5eb22aaa 100644 --- a/pandas/tests/util/test_util.py +++ b/pandas/tests/util/test_util.py @@ -1,23 +1,23 @@ # -*- coding: utf-8 -*- import codecs +from collections import OrderedDict import locale import os import sys -from collections import OrderedDict from uuid import uuid4 import pytest -import pandas.core.common as com -import pandas.util._test_decorators as td -import pandas.util.testing as tm from pandas.compat import PY3, intern from pandas.util._decorators import deprecate_kwarg, make_signature from pandas.util._move import BadMove, move_into_mutable_buffer, stolenbuf +import pandas.util._test_decorators as td from pandas.util._validators import ( validate_args, validate_args_and_kwargs, validate_bool_kwarg, - validate_kwargs -) + validate_kwargs) + +import pandas.core.common as com +import pandas.util.testing as tm class TestDecorators(object): diff --git a/setup.cfg b/setup.cfg index 8a3cfd1551a46..0608585872775 100644 --- a/setup.cfg +++ b/setup.cfg @@ -196,7 +196,6 @@ skip= pandas/tests/indexes/multi/conftest.py, pandas/tests/indexes/multi/test_join.py, pandas/tests/indexes/multi/test_conversion.py, - pandas/tests/indexes/interval/test_construction.py, pandas/tests/indexes/interval/test_interval_new.py, pandas/tests/indexes/interval/test_interval.py, @@ -214,16 +213,6 @@ skip= pandas/tests/indexes/timedeltas/test_partial_slicing.py, pandas/tests/indexes/timedeltas/test_timedelta_range.py, pandas/tests/indexes/timedeltas/test_ops.py, - pandas/tests/series/test_duplicates.py, - pandas/tests/series/indexing/test_callable.py, - pandas/tests/arrays/test_datetimelike.py, - pandas/tests/arrays/test_integer.py, - pandas/tests/arrays/test_interval.py, - pandas/tests/arrays/categorical/test_missing.py, - pandas/tests/arrays/categorical/test_sorting.py, - pandas/tests/util/test_testing.py, - pandas/tests/util/test_util.py, - pandas/tests/util/test_hashing.py, pandas/tests/io/test_clipboard.py, pandas/tests/io/test_compression.py, pandas/tests/io/test_pytables.py, From ed46d6d95f86da987005c1a89b6393c512e84bc8 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 5 Nov 2018 11:10:37 -0800 Subject: [PATCH 032/122] TST: Add test of assignment chaining and dupe cols (#23487) xref gh-13017. --- .../indexing/test_chaining_and_caching.py | 23 ++++++++++++++----- 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 71fec75f9a7d3..8bc8cb3fb1535 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -337,13 +337,24 @@ def f(): df2['y'] = ['g', 'h', 'i'] def test_detect_chained_assignment_warnings(self): + with option_context("chained_assignment", "warn"): + df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]}) - # warnings - with option_context('chained_assignment', 'warn'): - df = DataFrame({'A': ['aaa', 'bbb', 'ccc'], 'B': [1, 2, 3]}) - with tm.assert_produces_warning( - expected_warning=com.SettingWithCopyWarning): - df.loc[0]['A'] = 111 + with tm.assert_produces_warning(com.SettingWithCopyWarning): + df.loc[0]["A"] = 111 + + def test_detect_chained_assignment_warnings_filter_and_dupe_cols(self): + # xref gh-13017. + with option_context("chained_assignment", "warn"): + df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, -9]], + columns=["a", "a", "c"]) + + with tm.assert_produces_warning(com.SettingWithCopyWarning): + df.c.loc[df.c > 0] = None + + expected = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, -9]], + columns=["a", "a", "c"]) + tm.assert_frame_equal(df, expected) def test_chained_getitem_with_lists(self): From c3f6b8bdf3ba926eac261badbb0048c841550f2b Mon Sep 17 00:00:00 2001 From: Li Jin Date: Mon, 5 Nov 2018 14:15:00 -0500 Subject: [PATCH 033/122] =?UTF-8?q?BUG:=20Avoid=20casting=20to=20double=20?= =?UTF-8?q?type=20unnecessarily=20when=20setting=20values=20i=E2=80=A6=20(?= =?UTF-8?q?#23462)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/internals/blocks.py | 4 ++-- pandas/tests/indexing/test_timedelta.py | 15 +++++++++++++++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 0fc38b87de7d8..86c3e5a4dc14b 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1210,6 +1210,7 @@ Indexing - :class:`Index` no longer mangles ``None``, ``NaN`` and ``NaT``, i.e. they are treated as three different keys. However, for numeric Index all three are still coerced to a ``NaN`` (:issue:`22332`) - Bug in `scalar in Index` if scalar is a float while the ``Index`` is of integer dtype (:issue:`22085`) - Bug in `MultiIndex.set_levels` when levels value is not subscriptable (:issue:`23273`) +- Bug where setting a timedelta column by ``Index`` causes it to be casted to double, and therefore lose precision (:issue:`23511`) Missing ^^^^^^^ diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 5ce8a9103f008..e84953f3dab56 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2104,9 +2104,9 @@ def _box_func(self): def _can_hold_element(self, element): tipo = maybe_infer_dtype_type(element) if tipo is not None: - return issubclass(tipo.type, np.timedelta64) + return issubclass(tipo.type, (np.timedelta64, np.int64)) return is_integer(element) or isinstance( - element, (timedelta, np.timedelta64)) + element, (timedelta, np.timedelta64, np.int64)) def fillna(self, value, **kwargs): diff --git a/pandas/tests/indexing/test_timedelta.py b/pandas/tests/indexing/test_timedelta.py index 29031c908bda4..acd8bee3e5663 100644 --- a/pandas/tests/indexing/test_timedelta.py +++ b/pandas/tests/indexing/test_timedelta.py @@ -80,3 +80,18 @@ def test_numpy_timedelta_scalar_indexing(self, start, stop, result = s.loc[slice(start, stop)] expected = s.iloc[expected_slice] tm.assert_series_equal(result, expected) + + def test_roundtrip_thru_setitem(self): + # PR 23462 + dt1 = pd.Timedelta(0) + dt2 = pd.Timedelta(28767471428571405) + df = pd.DataFrame({'dt': pd.Series([dt1, dt2])}) + df_copy = df.copy() + s = pd.Series([dt1]) + + expected = df['dt'].iloc[1].value + df.loc[[True, False]] = s + result = df['dt'].iloc[1].value + + assert expected == result + tm.assert_frame_equal(df, df_copy) From 9a77c2f7c924613caf104a029083189abb4be641 Mon Sep 17 00:00:00 2001 From: Anjana S Date: Tue, 6 Nov 2018 03:16:10 +0530 Subject: [PATCH 034/122] Bumping up min version for pyarrow and fastparquet (#23482) * Bumping up min version for pyarrow --- ci/requirements-optional-conda.txt | 4 +- ci/requirements-optional-pip.txt | 8 +-- ci/travis-27.yaml | 2 +- doc/source/install.rst | 4 +- doc/source/whatsnew/v0.24.0.txt | 6 ++- pandas/io/parquet.py | 78 +++++------------------------- pandas/tests/io/test_parquet.py | 34 ++----------- 7 files changed, 32 insertions(+), 104 deletions(-) diff --git a/ci/requirements-optional-conda.txt b/ci/requirements-optional-conda.txt index c9dc385b87986..8758c8154abca 100644 --- a/ci/requirements-optional-conda.txt +++ b/ci/requirements-optional-conda.txt @@ -1,7 +1,7 @@ beautifulsoup4>=4.2.1 blosc bottleneck>=1.2.0 -fastparquet +fastparquet>=0.1.2 gcsfs html5lib ipython>=5.6.0 @@ -12,7 +12,7 @@ matplotlib>=2.0.0 nbsphinx numexpr>=2.6.1 openpyxl -pyarrow>=0.4.1 +pyarrow>=0.7.0 pymysql pytables>=3.4.2 pytest-cov diff --git a/ci/requirements-optional-pip.txt b/ci/requirements-optional-pip.txt index 347ea0d9832b0..62f1c555d8544 100644 --- a/ci/requirements-optional-pip.txt +++ b/ci/requirements-optional-pip.txt @@ -3,7 +3,7 @@ beautifulsoup4>=4.2.1 blosc bottleneck>=1.2.0 -fastparquet +fastparquet>=0.1.2 gcsfs html5lib ipython>=5.6.0 @@ -14,9 +14,9 @@ matplotlib>=2.0.0 nbsphinx numexpr>=2.6.1 openpyxl -pyarrow>=0.4.1 +pyarrow>=0.7.0 pymysql -tables +pytables>=3.4.2 pytest-cov pytest-xdist s3fs @@ -27,4 +27,4 @@ statsmodels xarray xlrd xlsxwriter -xlwt +xlwt \ No newline at end of file diff --git a/ci/travis-27.yaml b/ci/travis-27.yaml index 9641a76152d7b..28bee387a4f4a 100644 --- a/ci/travis-27.yaml +++ b/ci/travis-27.yaml @@ -22,7 +22,7 @@ dependencies: - patsy - psycopg2 - py - - pyarrow=0.4.1 + - pyarrow=0.7.0 - PyCrypto - pymysql=0.6.3 - pytables diff --git a/doc/source/install.rst b/doc/source/install.rst index b32c5b1145e85..89f7b580303f5 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -258,8 +258,8 @@ Optional Dependencies * `SciPy `__: miscellaneous statistical functions, Version 0.18.1 or higher * `xarray `__: pandas like handling for > 2 dims, needed for converting Panels to xarray objects. Version 0.7.0 or higher is recommended. * `PyTables `__: necessary for HDF5-based storage, Version 3.4.2 or higher -* `pyarrow `__ (>= 0.4.1): necessary for feather-based storage. -* `Apache Parquet `__, either `pyarrow `__ (>= 0.4.1) or `fastparquet `__ (>= 0.0.6) for parquet-based storage. The `snappy `__ and `brotli `__ are available for compression support. +* `pyarrow `__ (>= 0.7.0): necessary for feather-based storage. +* `Apache Parquet `__, either `pyarrow `__ (>= 0.7.0) or `fastparquet `__ (>= 0.1.2) for parquet-based storage. The `snappy `__ and `brotli `__ are available for compression support. * `SQLAlchemy `__: for SQL database support. Version 0.8.1 or higher recommended. Besides SQLAlchemy, you also need a database specific driver. You can find an overview of supported drivers for each SQL dialect in the `SQLAlchemy docs `__. Some common drivers are: * `psycopg2 `__: for PostgreSQL diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 86c3e5a4dc14b..d4046540cb8bb 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -250,7 +250,7 @@ Backwards incompatible API changes Dependencies have increased minimum versions ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -We have updated our minimum supported versions of dependencies (:issue:`21242`). +We have updated our minimum supported versions of dependencies (:issue:`21242`, `18742`). If installed, we now require: +-----------------+-----------------+----------+ @@ -268,6 +268,10 @@ If installed, we now require: +-----------------+-----------------+----------+ | scipy | 0.18.1 | | +-----------------+-----------------+----------+ +| pyarrow | 0.7.0 | | ++-----------------+-----------------+----------+ +| fastparquet | 0.1.2 | | ++-----------------+-----------------+----------+ Additionally we no longer depend on `feather-format` for feather based storage and replaced it with references to `pyarrow` (:issue:`21639` and :issue:`23053`). diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 2c75f46385e86..160a26533fb89 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -5,7 +5,7 @@ from pandas.compat import string_types -from pandas import DataFrame, Int64Index, RangeIndex, get_option +from pandas import DataFrame, get_option import pandas.core.common as com from pandas.io.common import get_filepath_or_buffer, is_s3_url @@ -89,29 +89,20 @@ def __init__(self): "\nor via pip\n" "pip install -U pyarrow\n" ) - if LooseVersion(pyarrow.__version__) < '0.4.1': + if LooseVersion(pyarrow.__version__) < '0.7.0': raise ImportError( - "pyarrow >= 0.4.1 is required for parquet support\n\n" + "pyarrow >= 0.7.0 is required for parquet support\n\n" "you can install via conda\n" "conda install pyarrow -c conda-forge\n" "\nor via pip\n" "pip install -U pyarrow\n" ) - self._pyarrow_lt_060 = ( - LooseVersion(pyarrow.__version__) < LooseVersion('0.6.0')) - self._pyarrow_lt_070 = ( - LooseVersion(pyarrow.__version__) < LooseVersion('0.7.0')) - self.api = pyarrow def write(self, df, path, compression='snappy', coerce_timestamps='ms', index=None, **kwargs): self.validate_dataframe(df) - - # Only validate the index if we're writing it. - if self._pyarrow_lt_070 and index is not False: - self._validate_write_lt_070(df) path, _, _, _ = get_filepath_or_buffer(path, mode='wb') if index is None: @@ -119,27 +110,17 @@ def write(self, df, path, compression='snappy', else: from_pandas_kwargs = {'preserve_index': index} - if self._pyarrow_lt_060: - table = self.api.Table.from_pandas(df, timestamps_to_ms=True, - **from_pandas_kwargs) - self.api.parquet.write_table( - table, path, compression=compression, **kwargs) - - else: - table = self.api.Table.from_pandas(df, **from_pandas_kwargs) - self.api.parquet.write_table( - table, path, compression=compression, - coerce_timestamps=coerce_timestamps, **kwargs) + table = self.api.Table.from_pandas(df, **from_pandas_kwargs) + self.api.parquet.write_table( + table, path, compression=compression, + coerce_timestamps=coerce_timestamps, **kwargs) def read(self, path, columns=None, **kwargs): path, _, _, should_close = get_filepath_or_buffer(path) - if self._pyarrow_lt_070: - result = self.api.parquet.read_pandas(path, columns=columns, - **kwargs).to_pandas() - else: - kwargs['use_pandas_metadata'] = True - result = self.api.parquet.read_table(path, columns=columns, - **kwargs).to_pandas() + + kwargs['use_pandas_metadata'] = True + result = self.api.parquet.read_table(path, columns=columns, + **kwargs).to_pandas() if should_close: try: path.close() @@ -148,39 +129,6 @@ def read(self, path, columns=None, **kwargs): return result - def _validate_write_lt_070(self, df): - # Compatibility shim for pyarrow < 0.7.0 - # TODO: Remove in pandas 0.23.0 - from pandas.core.indexes.multi import MultiIndex - if isinstance(df.index, MultiIndex): - msg = ( - "Multi-index DataFrames are only supported " - "with pyarrow >= 0.7.0" - ) - raise ValueError(msg) - # Validate index - if not isinstance(df.index, Int64Index): - msg = ( - "pyarrow < 0.7.0 does not support serializing {} for the " - "index; you can .reset_index() to make the index into " - "column(s), or install the latest version of pyarrow or " - "fastparquet." - ) - raise ValueError(msg.format(type(df.index))) - if not df.index.equals(RangeIndex(len(df))): - raise ValueError( - "pyarrow < 0.7.0 does not support serializing a non-default " - "index; you can .reset_index() to make the index into " - "column(s), or install the latest version of pyarrow or " - "fastparquet." - ) - if df.index.name is not None: - raise ValueError( - "pyarrow < 0.7.0 does not serialize indexes with a name; you " - "can set the index.name to None or install the latest version " - "of pyarrow or fastparquet." - ) - class FastParquetImpl(BaseImpl): @@ -197,9 +145,9 @@ def __init__(self): "\nor via pip\n" "pip install -U fastparquet" ) - if LooseVersion(fastparquet.__version__) < '0.1.0': + if LooseVersion(fastparquet.__version__) < '0.1.2': raise ImportError( - "fastparquet >= 0.1.0 is required for parquet " + "fastparquet >= 0.1.2 is required for parquet " "support\n\n" "you can install via conda\n" "conda install fastparquet -c conda-forge\n" diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 4c58d8ce29d8b..3b3e7f757bf60 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -41,22 +41,6 @@ def engine(request): @pytest.fixture def pa(): - if not _HAVE_PYARROW: - pytest.skip("pyarrow is not installed") - return 'pyarrow' - - -@pytest.fixture -def pa_lt_070(): - if not _HAVE_PYARROW: - pytest.skip("pyarrow is not installed") - if LooseVersion(pyarrow.__version__) >= LooseVersion('0.7.0'): - pytest.skip("pyarrow is >= 0.7.0") - return 'pyarrow' - - -@pytest.fixture -def pa_ge_070(): if not _HAVE_PYARROW: pytest.skip("pyarrow is not installed") if LooseVersion(pyarrow.__version__) < LooseVersion('0.7.0'): @@ -337,9 +321,9 @@ def test_write_index(self, engine): df.index.name = 'foo' check_round_trip(df, engine) - def test_write_multiindex(self, pa_ge_070): + def test_write_multiindex(self, pa): # Not suppoprted in fastparquet as of 0.1.3 or older pyarrow version - engine = pa_ge_070 + engine = pa df = pd.DataFrame({'A': [1, 2, 3]}) index = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)]) @@ -352,8 +336,8 @@ def test_write_column_multiindex(self, engine): df = pd.DataFrame(np.random.randn(4, 3), columns=mi_columns) self.check_error_on_write(df, engine, ValueError) - def test_multiindex_with_columns(self, pa_ge_070): - engine = pa_ge_070 + def test_multiindex_with_columns(self, pa): + engine = pa dates = pd.date_range('01-Jan-2018', '01-Dec-2018', freq='MS') df = pd.DataFrame(np.random.randn(2 * len(dates), 3), columns=list('ABC')) @@ -456,8 +440,7 @@ def test_unsupported(self, pa): # older pyarrows raise ArrowInvalid self.check_error_on_write(df, pa, Exception) - def test_categorical(self, pa_ge_070): - pa = pa_ge_070 + def test_categorical(self, pa): # supported in >= 0.7.0 df = pd.DataFrame({'a': pd.Categorical(list('abc'))}) @@ -466,13 +449,6 @@ def test_categorical(self, pa_ge_070): expected = df.assign(a=df.a.astype(object)) check_round_trip(df, pa, expected=expected) - def test_categorical_unsupported(self, pa_lt_070): - pa = pa_lt_070 - - # supported in >= 0.7.0 - df = pd.DataFrame({'a': pd.Categorical(list('abc'))}) - self.check_error_on_write(df, pa, NotImplementedError) - def test_s3_roundtrip(self, df_compat, s3_resource, pa): # GH #19134 check_round_trip(df_compat, pa, From feede9993ee041439333d2dd4929b25477a4cb9a Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 5 Nov 2018 16:23:46 -0800 Subject: [PATCH 035/122] CI: Fixed pytest minversion (#23520) --- setup.cfg | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 0608585872775..2362ef05733e0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -36,7 +36,6 @@ split_penalty_after_opening_bracket = 1000000 split_penalty_logical_operator = 30 [tool:pytest] -minversion = 3.6 testpaths = pandas markers = single: mark a test as single cpu only From 32c22d49a5f707ef1f82e6522b037da45c397d03 Mon Sep 17 00:00:00 2001 From: Christopher Whelan Date: Mon, 5 Nov 2018 17:49:38 -0800 Subject: [PATCH 036/122] CI: Exclude asv benchmark envs from flake8 checks (#23497) --- setup.cfg | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 2362ef05733e0..4068935d9970f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -27,7 +27,8 @@ exclude = doc/build/*.py, doc/temp/*.py, .eggs/*.py, - versioneer.py + versioneer.py, + env # exclude asv benchmark environments from linting [yapf] based_on_style = pep8 From b34cfff8280e4545998f4e5cd2ad2d6486a41619 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 5 Nov 2018 19:11:52 -0800 Subject: [PATCH 037/122] Extraneous parts broken off from other PRS (#23518) --- pandas/core/arrays/datetimelike.py | 9 ++++----- pandas/core/arrays/datetimes.py | 2 ++ pandas/core/arrays/period.py | 5 ++++- pandas/core/indexes/datetimes.py | 31 +++++++++++++++++++++--------- pandas/core/indexes/timedeltas.py | 7 +++++-- 5 files changed, 37 insertions(+), 17 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 95b997fae6b6c..58044aeb7d84c 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -12,7 +12,8 @@ from pandas._libs.tslibs.period import ( Period, DIFFERENT_FREQ_INDEX, IncompatibleFrequency) -from pandas.errors import NullFrequencyError, PerformanceWarning +from pandas.errors import ( + AbstractMethodError, NullFrequencyError, PerformanceWarning) from pandas import compat from pandas.tseries import frequencies @@ -78,12 +79,10 @@ class AttributesMixin(object): @property def _attributes(self): # Inheriting subclass should implement _attributes as a list of strings - from pandas.errors import AbstractMethodError raise AbstractMethodError(self) @classmethod def _simple_new(cls, values, **kwargs): - from pandas.errors import AbstractMethodError raise AbstractMethodError(cls) def _get_attributes_dict(self): @@ -108,7 +107,7 @@ def _box_func(self): """ box function to get object from internal representation """ - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _box_values(self, values): """ @@ -337,7 +336,7 @@ def _sub_period(self, other): .format(cls=type(self).__name__)) def _add_offset(self, offset): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _add_delta(self, other): """ diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 0258e1e6e5973..e7edd54c4177b 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -170,6 +170,8 @@ class DatetimeArrayMixin(dtl.DatetimeLikeArrayMixin): # Constructors _attributes = ["freq", "tz"] + _tz = None + _freq = None @classmethod def _simple_new(cls, values, freq=None, tz=None, **kwargs): diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 90e7beac63427..ea7eeb7fc9f8e 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -165,7 +165,10 @@ class PeriodArray(dtl.DatetimeLikeArrayMixin, ExtensionArray): # -------------------------------------------------------------------- # Constructors - def __init__(self, values, freq=None, copy=False): + + def __init__(self, values, freq=None, dtype=None, copy=False): + freq = dtl.validate_dtype_freq(dtype, freq) + if freq is not None: freq = Period._maybe_convert_freq(freq) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 210bdabbd9dd7..bd6f0c68a9aa5 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -182,7 +182,6 @@ class DatetimeIndex(DatetimeArrayMixin, DatelikeOps, TimelikeOps, """ _resolution = cache_readonly(DatetimeArrayMixin._resolution.fget) - _shallow_copy = Index._shallow_copy _typ = 'datetimeindex' _join_precedence = 10 @@ -199,11 +198,15 @@ def _join_i8_wrapper(joinf, **kwargs): _engine_type = libindex.DatetimeEngine - tz = None + _tz = None _freq = None _comparables = ['name', 'freqstr', 'tz'] _attributes = ['name', 'freq', 'tz'] + # dummy attribute so that datetime.__eq__(DatetimeArray) defers + # by returning NotImplemented + timetuple = None + # define my properties & methods for delegation _bool_ops = ['is_month_start', 'is_month_end', 'is_quarter_start', 'is_quarter_end', 'is_year_start', @@ -226,6 +229,9 @@ def _join_i8_wrapper(joinf, **kwargs): _timezone = cache_readonly(DatetimeArrayMixin._timezone.fget) is_normalized = cache_readonly(DatetimeArrayMixin.is_normalized.fget) + # -------------------------------------------------------------------- + # Constructors + def __new__(cls, data=None, freq=None, start=None, end=None, periods=None, tz=None, normalize=False, closed=None, ambiguous='raise', @@ -280,7 +286,7 @@ def __new__(cls, data=None, data = data.tz_localize(tz, ambiguous=ambiguous) else: # the tz's must match - if str(tz) != str(data.tz): + if not timezones.tz_compare(tz, data.tz): msg = ('data is already tz-aware {0}, unable to ' 'set specified tz: {1}') raise TypeError(msg.format(data.tz, tz)) @@ -327,12 +333,6 @@ def __new__(cls, data=None, return subarr._deepcopy_if_needed(ref_to_data, copy) - def _convert_for_op(self, value): - """ Convert value to be insertable to ndarray """ - if self._has_same_tz(value): - return _to_m8(value) - raise ValueError('Passed item and index have different timezone') - @classmethod def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None, **kwargs): @@ -349,6 +349,8 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, result._reset_identity() return result + # -------------------------------------------------------------------- + @property def _values(self): # tz-naive -> ndarray @@ -448,6 +450,12 @@ def __setstate__(self, state): raise Exception("invalid pickle state") _unpickle_compat = __setstate__ + def _convert_for_op(self, value): + """ Convert value to be insertable to ndarray """ + if self._has_same_tz(value): + return _to_m8(value) + raise ValueError('Passed item and index have different timezone') + def _maybe_update_attributes(self, attrs): """ Update Index attributes (e.g. freq) depending on op """ freq = attrs.get('freq', None) @@ -1104,6 +1112,9 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): else: raise + # -------------------------------------------------------------------- + # Wrapping DatetimeArray + year = wrap_field_accessor(DatetimeArrayMixin.year) month = wrap_field_accessor(DatetimeArrayMixin.month) day = wrap_field_accessor(DatetimeArrayMixin.day) @@ -1142,6 +1153,8 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): month_name = wrap_array_method(DatetimeArrayMixin.month_name, True) day_name = wrap_array_method(DatetimeArrayMixin.day_name, True) + # -------------------------------------------------------------------- + @Substitution(klass='DatetimeIndex') @Appender(_shared_docs['searchsorted']) def searchsorted(self, value, side='left', sorter=None): diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 22ecefae8cbe2..33361c851a4c5 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -209,8 +209,6 @@ def _simple_new(cls, values, name=None, freq=None, dtype=_TD_DTYPE): result._reset_identity() return result - _shallow_copy = Index._shallow_copy - @property def _formatter_func(self): from pandas.io.formats.format import _get_format_timedelta64 @@ -243,6 +241,9 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): nat_rep=na_rep, justify='all').get_result() + # ------------------------------------------------------------------- + # Wrapping TimedeltaArray + days = wrap_field_accessor(TimedeltaArrayMixin.days) seconds = wrap_field_accessor(TimedeltaArrayMixin.seconds) microseconds = wrap_field_accessor(TimedeltaArrayMixin.microseconds) @@ -250,6 +251,8 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): total_seconds = wrap_array_method(TimedeltaArrayMixin.total_seconds, True) + # ------------------------------------------------------------------- + @Appender(_index_shared_docs['astype']) def astype(self, dtype, copy=True): dtype = pandas_dtype(dtype) From 53dad83c33cdb17dbb460d75275c3fbb5ca730de Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Tue, 6 Nov 2018 03:17:50 +0000 Subject: [PATCH 038/122] Fixing typo in cython casting lint, and making it azure friendly (#23486) --- ci/code_checks.sh | 2 +- pandas/_libs/algos.pyx | 4 ++-- pandas/_libs/groupby.pyx | 4 ++-- pandas/_libs/hashing.pyx | 4 ++-- pandas/_libs/hashtable_class_helper.pxi.in | 10 +++++----- pandas/_libs/parsers.pyx | 22 +++++++++++----------- pandas/_libs/reduction.pyx | 4 ++-- pandas/_libs/sparse.pyx | 6 +++--- pandas/_libs/tslibs/conversion.pyx | 2 +- pandas/_libs/tslibs/period.pyx | 2 +- pandas/_libs/window.pyx | 8 ++++---- 11 files changed, 34 insertions(+), 34 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 2e42912965f97..330901ba56fbd 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -49,7 +49,7 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then # Note: this grep pattern is (intended to be) equivalent to the python # regex r'(?])> ' MSG='Linting .pyx code for spacing conventions in casting' ; echo $MSG - ! grep -r -E --include '*.pyx' --include '*.pxi.in' '> ' pandas/_libs | grep -v '[ ->]> ' + ! grep -r -E --include '*.pyx' --include '*.pxi.in' '[a-zA-Z0-9*]> ' pandas/_libs RET=$(($RET + $?)) ; echo $MSG "DONE" # readability/casting: Warnings about C casting instead of C++ casting diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 24828db64c392..d675ceab13667 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -128,11 +128,11 @@ def is_lexsorted(list_of_arrays: list) -> bint: nlevels = len(list_of_arrays) n = len(list_of_arrays[0]) - cdef int64_t **vecs = malloc(nlevels * sizeof(int64_t*)) + cdef int64_t **vecs = malloc(nlevels * sizeof(int64_t*)) for i in range(nlevels): arr = list_of_arrays[i] assert arr.dtype.name == 'int64' - vecs[i] = cnp.PyArray_DATA(arr) + vecs[i] = cnp.PyArray_DATA(arr) # Assume uniqueness?? with nogil: diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index c72b4001dcb79..9e758700811a8 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -44,7 +44,7 @@ cdef inline float64_t median_linear(float64_t* a, int n) nogil: if na_count == n: return NaN - tmp = malloc((n - na_count) * sizeof(float64_t)) + tmp = malloc((n - na_count) * sizeof(float64_t)) j = 0 for i in range(n): @@ -121,7 +121,7 @@ def group_median_float64(ndarray[float64_t, ndim=2] out, counts[:] = _counts[1:] data = np.empty((K, N), dtype=np.float64) - ptr = cnp.PyArray_DATA(data) + ptr = cnp.PyArray_DATA(data) take_2d_axis1_float64_float64(values.T, indexer, out=data) diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx index c2305c8f3ff00..6e66693decc01 100644 --- a/pandas/_libs/hashing.pyx +++ b/pandas/_libs/hashing.pyx @@ -54,8 +54,8 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'): n = len(arr) # create an array of bytes - vecs = malloc(n * sizeof(char *)) - lens = malloc(n * sizeof(uint64_t)) + vecs = malloc(n * sizeof(char *)) + lens = malloc(n * sizeof(uint64_t)) for i in range(n): val = arr[i] diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index affb6a038074a..36ed8a88aa78b 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -590,13 +590,13 @@ cdef class StringHashTable(HashTable): cdef: Py_ssize_t i, n = len(values) ndarray[int64_t] labels = np.empty(n, dtype=np.int64) - int64_t *resbuf = labels.data + int64_t *resbuf = labels.data khiter_t k kh_str_t *table = self.table const char *v const char **vecs - vecs = malloc(n * sizeof(char *)) + vecs = malloc(n * sizeof(char *)) for i in range(n): val = values[i] v = util.get_c_string(val) @@ -639,7 +639,7 @@ cdef class StringHashTable(HashTable): const char *v const char **vecs - vecs = malloc(n * sizeof(char *)) + vecs = malloc(n * sizeof(char *)) uindexer = np.empty(n, dtype=np.int64) for i in range(n): val = values[i] @@ -674,7 +674,7 @@ cdef class StringHashTable(HashTable): int64_t[:] locs = np.empty(n, dtype=np.int64) # these by-definition *must* be strings - vecs = malloc(n * sizeof(char *)) + vecs = malloc(n * sizeof(char *)) for i in range(n): val = values[i] @@ -707,7 +707,7 @@ cdef class StringHashTable(HashTable): khiter_t k # these by-definition *must* be strings - vecs = malloc(n * sizeof(char *)) + vecs = malloc(n * sizeof(char *)) for i in range(n): val = values[i] diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 027a4e36204dc..a2a718aa8b591 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -361,7 +361,7 @@ cdef class TextReader: if not isinstance(encoding, bytes): encoding = encoding.encode('utf-8') encoding = encoding.lower() - self.c_encoding = encoding + self.c_encoding = encoding else: self.c_encoding = NULL @@ -611,7 +611,7 @@ cdef class TextReader: for i in self.skiprows: parser_add_skiprow(self.parser, i) else: - self.parser.skipfunc = self.skiprows + self.parser.skipfunc = self.skiprows cdef _setup_parser_source(self, source): cdef: @@ -668,7 +668,7 @@ cdef class TextReader: source = icom.UTF8Recoder(source, self.encoding.decode('utf-8')) self.encoding = b'utf-8' - self.c_encoding = self.encoding + self.c_encoding = self.encoding self.handle = source @@ -1444,7 +1444,7 @@ cdef _string_box_factorize(parser_t *parser, int64_t col, pyval = PyBytes_FromString(word) k = kh_put_strbox(table, word, &ret) - table.vals[k] = pyval + table.vals[k] = pyval result[i] = pyval @@ -1498,7 +1498,7 @@ cdef _string_box_utf8(parser_t *parser, int64_t col, pyval = PyUnicode_FromString(word) k = kh_put_strbox(table, word, &ret) - table.vals[k] = pyval + table.vals[k] = pyval result[i] = pyval @@ -1556,7 +1556,7 @@ cdef _string_box_decode(parser_t *parser, int64_t col, pyval = PyUnicode_Decode(word, size, encoding, errors) k = kh_put_strbox(table, word, &ret) - table.vals[k] = pyval + table.vals[k] = pyval result[i] = pyval @@ -1648,7 +1648,7 @@ cdef _to_fw_string(parser_t *parser, int64_t col, int64_t line_start, ndarray result result = np.empty(line_end - line_start, dtype='|S%d' % width) - data = result.data + data = result.data with nogil: _to_fw_string_nogil(parser, col, line_start, line_end, width, data) @@ -1695,7 +1695,7 @@ cdef _try_double(parser_t *parser, int64_t col, lines = line_end - line_start result = np.empty(lines, dtype=np.float64) - data = result.data + data = result.data na_fset = kset_float64_from_list(na_flist) if parser.double_converter_nogil != NULL: # if it can run without the GIL with nogil: @@ -1803,7 +1803,7 @@ cdef _try_uint64(parser_t *parser, int64_t col, lines = line_end - line_start result = np.empty(lines, dtype=np.uint64) - data = result.data + data = result.data uint_state_init(&state) coliter_setup(&it, parser, col, line_start) @@ -1879,7 +1879,7 @@ cdef _try_int64(parser_t *parser, int64_t col, lines = line_end - line_start result = np.empty(lines, dtype=np.int64) - data = result.data + data = result.data coliter_setup(&it, parser, col, line_start) with nogil: error = _try_int64_nogil(parser, col, line_start, line_end, @@ -1951,7 +1951,7 @@ cdef _try_bool_flex(parser_t *parser, int64_t col, lines = line_end - line_start result = np.empty(lines, dtype=np.uint8) - data = result.data + data = result.data with nogil: error = _try_bool_flex_nogil(parser, col, line_start, line_end, na_filter, na_hashset, true_hashset, diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 951c163522401..6f892c928805e 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -467,7 +467,7 @@ cdef class Slider: self.buf.strides[0] = self.stride cpdef advance(self, Py_ssize_t k): - self.buf.data = self.buf.data + self.stride * k + self.buf.data = self.buf.data + self.stride * k cdef move(self, int start, int end): """ @@ -572,7 +572,7 @@ cdef class BlockSlider: self.idx_slider = Slider( self.frame.index.values, self.dummy.index.values) - self.base_ptrs = malloc(sizeof(char*) * len(self.blocks)) + self.base_ptrs = malloc(sizeof(char*) * len(self.blocks)) for i, block in enumerate(self.blocks): self.base_ptrs[i] = (block).data diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index 67698f1b4c2ca..bfb03ef307355 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -342,8 +342,8 @@ cdef class BlockIndex(SparseIndex): self.blengths = np.ascontiguousarray(blengths, dtype=np.int32) # in case we need - self.locbuf = self.blocs.data - self.lenbuf = self.blengths.data + self.locbuf = self.blocs.data + self.lenbuf = self.blengths.data self.length = length self.nblocks = np.int32(len(self.blocs)) @@ -853,7 +853,7 @@ def get_reindexer(ndarray[object, ndim=1] values, dict index_map): # SparseIndex index): # self.index = index -# self.buf = values.data +# self.buf = values.data def reindex_integer(ndarray[float64_t, ndim=1] values, diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 8cf42bf93eb2c..5945a32a0e228 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -920,7 +920,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, trans, deltas, typ = get_dst_info(tz) - tdata = cnp.PyArray_DATA(trans) + tdata = cnp.PyArray_DATA(trans) ntrans = len(trans) # Determine whether each date lies left of the DST transition (store in diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index a769bbb081398..ebcbea0ee30b3 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1260,7 +1260,7 @@ cdef object _period_strftime(int64_t value, int freq, object fmt): fmt = fmt.replace(pat, repl) found_pat[i] = True - formatted = c_strftime(&dts, fmt) + formatted = c_strftime(&dts, fmt) result = util.char_to_string(formatted) free(formatted) diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index 8de2852942865..bb7af67d14585 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -1611,17 +1611,17 @@ def roll_generic(object obj, output[i] = NaN # remaining full-length windows - buf = arr.data + buf = arr.data bufarr = np.empty(win, dtype=float) - oldbuf = bufarr.data + oldbuf = bufarr.data for i from (win - offset) <= i < (N - offset): buf = buf + 1 - bufarr.data = buf + bufarr.data = buf if counts[i] >= minp: output[i] = func(bufarr, *args, **kwargs) else: output[i] = NaN - bufarr.data = oldbuf + bufarr.data = oldbuf # truncated windows at the end for i from int_max(N - offset, 0) <= i < N: From 510ba4e2af407b2fa38b622c7c720d018281d41c Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 5 Nov 2018 19:18:46 -0800 Subject: [PATCH 039/122] CI: Unpin NumPy (#23465) --- ci/travis-37-numpydev.yaml | 2 +- pandas/core/groupby/generic.py | 7 +++++-- pandas/util/_test_decorators.py | 6 ++++-- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/ci/travis-37-numpydev.yaml b/ci/travis-37-numpydev.yaml index 957941b7379aa..82c75b7c91b1f 100644 --- a/ci/travis-37-numpydev.yaml +++ b/ci/travis-37-numpydev.yaml @@ -13,5 +13,5 @@ dependencies: - "git+git://github.com/dateutil/dateutil.git" - "-f https://7933911d6844c6c53a7d-47bd50c35cd79bd838daf386af554a83.ssl.cf2.rackcdn.com" - "--pre" - - "numpy<=1.16.0.dev0+20181015190246" + - "numpy" - "scipy" diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 3ed80d266ce4d..5d9a5616e133b 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -410,7 +410,9 @@ def first_not_none(values): if (isinstance(v.index, MultiIndex) or key_index is None or isinstance(key_index, MultiIndex)): - stacked_values = np.vstack(map(np.asarray, values)) + stacked_values = np.vstack([ + np.asarray(v) for v in values + ]) result = DataFrame(stacked_values, index=key_index, columns=index) else: @@ -422,7 +424,8 @@ def first_not_none(values): axis=self.axis).unstack() result.columns = index else: - stacked_values = np.vstack(map(np.asarray, values)) + stacked_values = np.vstack([np.asarray(v) + for v in values]) result = DataFrame(stacked_values.T, index=v.index, columns=key_index) diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index 52a6740f119b7..3f8332ade4487 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -104,8 +104,10 @@ def _skip_if_not_us_locale(): def _skip_if_no_scipy(): - return not (safe_import('scipy.stats') and safe_import('scipy.sparse') and - safe_import('scipy.interpolate')) + return not (safe_import('scipy.stats') and + safe_import('scipy.sparse') and + safe_import('scipy.interpolate') and + safe_import('scipy.signal')) def _skip_if_no_lzma(): From 6a88f0ebb186a90863ceba3cda35037688ee9221 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 5 Nov 2018 19:19:50 -0800 Subject: [PATCH 040/122] TST: Add test for mangling of unnamed columns (#23485) xref gh-13017. --- pandas/tests/io/parser/mangle_dupes.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/pandas/tests/io/parser/mangle_dupes.py b/pandas/tests/io/parser/mangle_dupes.py index 1ebfa9cb0f645..56d59060cc17d 100644 --- a/pandas/tests/io/parser/mangle_dupes.py +++ b/pandas/tests/io/parser/mangle_dupes.py @@ -86,3 +86,22 @@ def test_thorough_mangle_names(self): mangle_dupe_cols=True) assert list(df.columns) == ["a", "a.1", "a.3", "a.1.1", "a.2", "a.2.1", "a.3.1"] + + def test_mangled_unnamed_placeholders(self): + # xref gh-13017 + orig_key = "0" + orig_value = [1, 2, 3] + + df = DataFrame({orig_key: orig_value}) + + # This test recursively updates `df`. + for i in range(3): + expected = DataFrame() + + for j in range(i + 1): + expected["Unnamed: 0" + ".1" * j] = [0, 1, 2] + + expected[orig_key] = orig_value + df = self.read_csv(StringIO(df.to_csv())) + + tm.assert_frame_equal(df, expected) From 37dd36f632a3af806d329b9fb9bd71b603b11605 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 5 Nov 2018 19:23:46 -0800 Subject: [PATCH 041/122] PERF: cython optimizations (#23477) --- pandas/_libs/algos.pyx | 4 ++-- pandas/_libs/algos_rank_helper.pxi.in | 4 ++-- pandas/_libs/groupby.pyx | 2 +- pandas/_libs/groupby_helper.pxi.in | 22 +++++++++++----------- pandas/_libs/join.pyx | 2 +- pandas/_libs/lib.pyx | 6 +++--- pandas/_libs/missing.pyx | 4 ++-- pandas/_libs/sparse.pyx | 8 ++++---- pandas/_libs/tslibs/conversion.pyx | 16 +++++++++------- pandas/_libs/tslibs/frequencies.pyx | 12 ++++++++---- pandas/_libs/tslibs/offsets.pyx | 3 ++- pandas/_libs/tslibs/timestamps.pyx | 3 ++- pandas/_libs/writers.pyx | 16 ++++++++-------- 13 files changed, 55 insertions(+), 47 deletions(-) diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index d675ceab13667..075e2c5129579 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -409,7 +409,7 @@ def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None): nleft = len(old) nright = len(new) indexer = np.empty(nright, dtype=np.int64) - indexer.fill(-1) + indexer[:] = -1 if limit is None: lim = nright @@ -607,7 +607,7 @@ def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None): nleft = len(old) nright = len(new) indexer = np.empty(nright, dtype=np.int64) - indexer.fill(-1) + indexer[:] = -1 if limit is None: lim = nright diff --git a/pandas/_libs/algos_rank_helper.pxi.in b/pandas/_libs/algos_rank_helper.pxi.in index e13f87d15aace..fcb052e8be63b 100644 --- a/pandas/_libs/algos_rank_helper.pxi.in +++ b/pandas/_libs/algos_rank_helper.pxi.in @@ -4,9 +4,9 @@ Template for each `dtype` helper function for rank WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in """ -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # rank_1d, rank_2d -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- {{py: diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 9e758700811a8..83ded64b742ed 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -370,7 +370,7 @@ def group_any_all(ndarray[uint8_t] out, else: raise ValueError("'bool_func' must be either 'any' or 'all'!") - out.fill(1 - flag_val) + out[:] = 1 - flag_val with nogil: for i in range(N): diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in index 0917453e3f864..484a4b069305f 100644 --- a/pandas/_libs/groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -8,9 +8,9 @@ cdef extern from "numpy/npy_math.h": double NAN "NPY_NAN" _int64_max = np.iinfo(np.int64).max -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # group_add, group_prod, group_var, group_mean, group_ohlc -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- {{py: @@ -246,7 +246,7 @@ def group_ohlc_{{name}}(ndarray[{{c_type}}, ndim=2] out, if K > 1: raise NotImplementedError("Argument 'values' must have only " "one dimension") - out.fill(np.nan) + out[:] = np.nan with nogil: for i in range(N): @@ -629,10 +629,10 @@ def group_max(ndarray[groupby_t, ndim=2] out, maxx = np.empty_like(out) if groupby_t is int64_t: # Note: evaluated at compile-time - maxx.fill(-_int64_max) + maxx[:] = -_int64_max nan_val = iNaT else: - maxx.fill(-np.inf) + maxx[:] = -np.inf nan_val = NAN N, K = (values).shape @@ -691,10 +691,10 @@ def group_min(ndarray[groupby_t, ndim=2] out, minx = np.empty_like(out) if groupby_t is int64_t: - minx.fill(_int64_max) + minx[:] = _int64_max nan_val = iNaT else: - minx.fill(np.inf) + minx[:] = np.inf nan_val = NAN N, K = (values).shape @@ -747,9 +747,9 @@ def group_cummin(ndarray[groupby_t, ndim=2] out, N, K = (values).shape accum = np.empty_like(values) if groupby_t is int64_t: - accum.fill(_int64_max) + accum[:] = _int64_max else: - accum.fill(np.inf) + accum[:] = np.inf with nogil: for i in range(N): @@ -795,9 +795,9 @@ def group_cummax(ndarray[groupby_t, ndim=2] out, N, K = (values).shape accum = np.empty_like(values) if groupby_t is int64_t: - accum.fill(-_int64_max) + accum[:] = -_int64_max else: - accum.fill(-np.inf) + accum[:] = -np.inf with nogil: for i in range(N): diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index 04c2f222b14ad..748f3f265dd34 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -212,7 +212,7 @@ def _get_result_indexer(sorter, indexer): else: # length-0 case res = np.empty(len(indexer), dtype=np.int64) - res.fill(-1) + res[:] = -1 return res diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 5907f76c20853..a9e0fcbc4a826 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -347,7 +347,7 @@ def get_reverse_indexer(ndarray[int64_t] indexer, Py_ssize_t length): int64_t idx rev_indexer = np.empty(length, dtype=np.int64) - rev_indexer.fill(-1) + rev_indexer[:] = -1 for i in range(n): idx = indexer[i] if idx != -1: @@ -1670,7 +1670,7 @@ cdef class TimedeltaValidator(TemporalValidator): # TODO: Not used outside of tests; remove? -def is_timedelta_array(values: ndarray) -> bint: +def is_timedelta_array(values: ndarray) -> bool: cdef: TimedeltaValidator validator = TimedeltaValidator(len(values), skipna=True) @@ -1683,7 +1683,7 @@ cdef class Timedelta64Validator(TimedeltaValidator): # TODO: Not used outside of tests; remove? -def is_timedelta64_array(values: ndarray) -> bint: +def is_timedelta64_array(values: ndarray) -> bool: cdef: Timedelta64Validator validator = Timedelta64Validator(len(values), skipna=True) diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index d6786a96871bd..b8791359241ad 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -278,14 +278,14 @@ def isnaobj2d_old(ndarray arr): return result.view(np.bool_) -cpdef bint isposinf_scalar(object val): +def isposinf_scalar(val: object) -> bool: if util.is_float_object(val) and val == INF: return True else: return False -cpdef bint isneginf_scalar(object val): +def isneginf_scalar(val: object) -> bool: if util.is_float_object(val) and val == NEGINF: return True else: diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index bfb03ef307355..b8ca744ac88c4 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -221,7 +221,7 @@ cdef class IntIndex(SparseIndex): n = len(indexer) results = np.empty(n, dtype=np.int32) - results.fill(-1) + results[:] = -1 if self.npoints == 0: return results @@ -250,9 +250,9 @@ cdef class IntIndex(SparseIndex): sinds = self.indices result = np.empty(other.npoints, dtype=np.float64) - result.fill(fill_value) + result[:] = fill_value - for 0 <= i < other.npoints: + for i in range(other.npoints): while oinds[i] > sinds[j] and j < self.npoints: j += 1 @@ -582,7 +582,7 @@ cdef class BlockIndex(SparseIndex): n = len(indexer) results = np.empty(n, dtype=np.int32) - results.fill(-1) + results[:] = -1 if self.npoints == 0: return results diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 5945a32a0e228..f88671b41a16a 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -869,10 +869,10 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, """ cdef: ndarray[int64_t] trans - int64_t[:] deltas, idx_shifted + int64_t[:] deltas, idx_shifted, idx_shifted_left, idx_shifted_right ndarray ambiguous_array Py_ssize_t i, idx, pos, ntrans, n = len(vals) - Py_ssize_t delta_idx_offset, delta_idx + Py_ssize_t delta_idx_offset, delta_idx, pos_left, pos_right int64_t *tdata int64_t v, left, right, val, v_left, v_right, new_local, remaining_mins ndarray[int64_t] result, result_a, result_b, dst_hours @@ -927,8 +927,8 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, # result_a) or right of the DST transition (store in result_b) result_a = np.empty(n, dtype=np.int64) result_b = np.empty(n, dtype=np.int64) - result_a.fill(NPY_NAT) - result_b.fill(NPY_NAT) + result_a[:] = NPY_NAT + result_b[:] = NPY_NAT idx_shifted_left = (np.maximum(0, trans.searchsorted( vals - DAY_NS, side='right') - 1)).astype(np.int64) @@ -952,7 +952,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, if infer_dst: dst_hours = np.empty(n, dtype=np.int64) - dst_hours.fill(NPY_NAT) + dst_hours[:] = NPY_NAT # Get the ambiguous hours (given the above, these are the hours # where result_a != result_b and neither of them are NAT) @@ -1045,8 +1045,10 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, return result -cdef inline bisect_right_i8(int64_t *data, int64_t val, Py_ssize_t n): - cdef Py_ssize_t pivot, left = 0, right = n +cdef inline Py_ssize_t bisect_right_i8(int64_t *data, + int64_t val, Py_ssize_t n): + cdef: + Py_ssize_t pivot, left = 0, right = n assert n >= 1 diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx index c555fce9dd007..fff4d04399481 100644 --- a/pandas/_libs/tslibs/frequencies.pyx +++ b/pandas/_libs/tslibs/frequencies.pyx @@ -154,8 +154,7 @@ cpdef get_freq_code(freqstr): freqstr = (freqstr.rule_code, freqstr.n) if isinstance(freqstr, tuple): - if (is_integer_object(freqstr[0]) and - is_integer_object(freqstr[1])): + if is_integer_object(freqstr[0]) and is_integer_object(freqstr[1]): # e.g., freqstr = (2000, 1) return freqstr else: @@ -171,7 +170,7 @@ cpdef get_freq_code(freqstr): return code, stride if is_integer_object(freqstr): - return (freqstr, 1) + return freqstr, 1 base, stride = _base_and_stride(freqstr) code = _period_str_to_code(base) @@ -183,6 +182,11 @@ cpdef _base_and_stride(freqstr): """ Return base freq and stride info from string representation + Returns + ------- + base : str + stride : int + Examples -------- _freq_and_stride('5Min') -> 'Min', 5 @@ -201,7 +205,7 @@ cpdef _base_and_stride(freqstr): base = groups.group(2) - return (base, stride) + return base, stride cpdef _period_str_to_code(freqstr): diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 78e1269aa5363..8f5887754e40d 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -532,7 +532,8 @@ cdef inline int month_add_months(npy_datetimestruct dts, int months) nogil: New month number after shifting npy_datetimestruct number of months. """ - cdef int new_month = (dts.month + months) % 12 + cdef: + int new_month = (dts.month + months) % 12 return 12 if new_month == 0 else new_month diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 094b48920fc46..d5bd2e90af3a7 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -177,7 +177,8 @@ def round_nsint64(values, mode, freq): # if/elif above should catch all rounding modes defined in enum 'RoundTo': # if flow of control arrives here, it is a bug - assert False, "round_nsint64 called with an unrecognized rounding mode" + raise AssertionError("round_nsint64 called with an unrecognized " + "rounding mode") # This is PITA. Because we inherit from datetime, which has very specific diff --git a/pandas/_libs/writers.pyx b/pandas/_libs/writers.pyx index 9af12cbec1e9c..4a0d1a7620fc5 100644 --- a/pandas/_libs/writers.pyx +++ b/pandas/_libs/writers.pyx @@ -128,16 +128,16 @@ def max_len_string_array(pandas_string[:] arr) -> Py_ssize_t: """ return the maximum size of elements in a 1-dim string array """ cdef: Py_ssize_t i, m = 0, l = 0, length = arr.shape[0] - pandas_string v + pandas_string val for i in range(length): - v = arr[i] - if isinstance(v, str): - l = PyString_GET_SIZE(v) - elif isinstance(v, bytes): - l = PyBytes_GET_SIZE(v) - elif isinstance(v, unicode): - l = PyUnicode_GET_SIZE(v) + val = arr[i] + if isinstance(val, str): + l = PyString_GET_SIZE(val) + elif isinstance(val, bytes): + l = PyBytes_GET_SIZE(val) + elif isinstance(val, unicode): + l = PyUnicode_GET_SIZE(val) if l > m: m = l From 2b626d59907f46743d3097549d004dba6b547302 Mon Sep 17 00:00:00 2001 From: Sinhrks Date: Tue, 6 Nov 2018 12:27:29 +0900 Subject: [PATCH 042/122] BUG: Cleanup timedelta offset (#23439) --- doc/source/whatsnew/v0.24.0.txt | 2 + pandas/_libs/tslibs/timedeltas.pyx | 66 ++++++++++++++--- pandas/core/tools/timedeltas.py | 54 +++----------- .../tests/scalar/timedelta/test_timedelta.py | 73 +++++++++++++------ 4 files changed, 120 insertions(+), 75 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index d4046540cb8bb..f89623805d8dd 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1131,6 +1131,8 @@ Timedelta - Fixed bug in adding a :class:`DataFrame` with all-`timedelta64[ns]` dtypes to a :class:`DataFrame` with all-integer dtypes returning incorrect results instead of raising ``TypeError`` (:issue:`22696`) - Bug in :class:`TimedeltaIndex` where adding a timezone-aware datetime scalar incorrectly returned a timezone-naive :class:`DatetimeIndex` (:issue:`23215`) - Bug in :class:`TimedeltaIndex` where adding ``np.timedelta64('NaT')`` incorrectly returned an all-`NaT` :class:`DatetimeIndex` instead of an all-`NaT` :class:`TimedeltaIndex` (:issue:`23215`) +- Bug in :class:`Timedelta` and :func:`to_timedelta()` have inconsistencies in supported unit string (:issue:`21762`) + Timezones ^^^^^^^^^ diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index f0a57c49a98fc..c09a8e5b395ee 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -45,10 +45,16 @@ Components = collections.namedtuple('Components', [ 'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds', 'nanoseconds']) -cdef dict timedelta_abbrevs = { 'D': 'd', - 'd': 'd', - 'days': 'd', - 'day': 'd', + +cdef dict timedelta_abbrevs = { 'Y': 'Y', + 'y': 'Y', + 'M': 'M', + 'W': 'W', + 'w': 'W', + 'D': 'D', + 'd': 'D', + 'days': 'D', + 'day': 'D', 'hours': 'h', 'hour': 'h', 'hr': 'h', @@ -57,6 +63,7 @@ cdef dict timedelta_abbrevs = { 'D': 'd', 'minute': 'm', 'min': 'm', 'minutes': 'm', + 't': 'm', 's': 's', 'seconds': 's', 'sec': 's', @@ -66,16 +73,19 @@ cdef dict timedelta_abbrevs = { 'D': 'd', 'millisecond': 'ms', 'milli': 'ms', 'millis': 'ms', + 'l': 'ms', 'us': 'us', 'microseconds': 'us', 'microsecond': 'us', 'micro': 'us', 'micros': 'us', + 'u': 'us', 'ns': 'ns', 'nanoseconds': 'ns', 'nano': 'ns', 'nanos': 'ns', - 'nanosecond': 'ns'} + 'nanosecond': 'ns', + 'n': 'ns'} _no_input = object() @@ -140,7 +150,8 @@ cpdef int64_t delta_to_nanoseconds(delta) except? -1: cpdef convert_to_timedelta64(object ts, object unit): """ - Convert an incoming object to a timedelta64 if possible + Convert an incoming object to a timedelta64 if possible. + Before calling, unit must be standardized to avoid repeated unit conversion Handle these types of objects: - timedelta/Timedelta @@ -228,6 +239,7 @@ def array_to_timedelta64(object[:] values, unit='ns', errors='raise'): for i in range(n): result[i] = parse_timedelta_string(values[i]) except: + unit = parse_timedelta_unit(unit) for i in range(n): try: result[i] = convert_to_timedelta64(values[i], unit) @@ -247,7 +259,16 @@ cdef inline int64_t cast_from_unit(object ts, object unit) except? -1: int64_t m int p - if unit == 'D' or unit == 'd': + if unit == 'Y': + m = 1000000000L * 31556952 + p = 9 + elif unit == 'M': + m = 1000000000L * 2629746 + p = 9 + elif unit == 'W': + m = 1000000000L * 86400 * 7 + p = 9 + elif unit == 'D' or unit == 'd': m = 1000000000L * 86400 p = 9 elif unit == 'h': @@ -485,7 +506,11 @@ cdef inline timedelta_from_spec(object number, object frac, object unit): try: unit = ''.join(unit) - unit = timedelta_abbrevs[unit.lower()] + if unit == 'M': + # To parse ISO 8601 string, 'M' should be treated as minute, + # not month + unit = 'm' + unit = parse_timedelta_unit(unit) except KeyError: raise ValueError("invalid abbreviation: {unit}".format(unit=unit)) @@ -493,6 +518,22 @@ cdef inline timedelta_from_spec(object number, object frac, object unit): return cast_from_unit(float(n), unit) +cpdef inline object parse_timedelta_unit(object unit): + """ + Parameters + ---------- + unit : an unit string + """ + if unit is None: + return 'ns' + elif unit == 'M': + return unit + try: + return timedelta_abbrevs[unit.lower()] + except (KeyError, AttributeError): + raise ValueError("invalid unit abbreviation: {unit}" + .format(unit=unit)) + # ---------------------------------------------------------------------- # Timedelta ops utilities @@ -1070,7 +1111,13 @@ class Timedelta(_Timedelta): Parameters ---------- value : Timedelta, timedelta, np.timedelta64, string, or integer - unit : string, {'ns', 'us', 'ms', 's', 'm', 'h', 'D'}, optional + unit : string, {'Y', 'M', 'W', 'D', 'days', 'day', + 'hours', hour', 'hr', 'h', 'm', 'minute', 'min', 'minutes', + 'T', 'S', 'seconds', 'sec', 'second', 'ms', + 'milliseconds', 'millisecond', 'milli', 'millis', 'L', + 'us', 'microseconds', 'microsecond', 'micro', 'micros', + 'U', 'ns', 'nanoseconds', 'nano', 'nanos', 'nanosecond' + 'N'}, optional Denote the unit of the input, if input is an integer. Default 'ns'. days, seconds, microseconds, milliseconds, minutes, hours, weeks : numeric, optional @@ -1121,6 +1168,7 @@ class Timedelta(_Timedelta): value = np.timedelta64(delta_to_nanoseconds(value.delta), 'ns') elif is_integer_object(value) or is_float_object(value): # unit=None is de-facto 'ns' + unit = parse_timedelta_unit(unit) value = convert_to_timedelta64(value, unit) elif checknull_with_nat(value): return NaT diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 4dc4fcb00d84d..220b14a9cb7c6 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -6,7 +6,8 @@ import pandas as pd from pandas._libs import tslibs from pandas._libs.tslibs.timedeltas import (convert_to_timedelta64, - array_to_timedelta64) + array_to_timedelta64, + parse_timedelta_unit) from pandas.core.dtypes.common import ( ensure_object, @@ -23,8 +24,14 @@ def to_timedelta(arg, unit='ns', box=True, errors='raise'): Parameters ---------- arg : string, timedelta, list, tuple, 1-d array, or Series - unit : unit of the arg (D,h,m,s,ms,us,ns) denote the unit, which is an - integer/float number + unit : string, {'Y', 'M', 'W', 'D', 'days', 'day', + 'hours', hour', 'hr', 'h', 'm', 'minute', 'min', 'minutes', + 'T', 'S', 'seconds', 'sec', 'second', 'ms', + 'milliseconds', 'millisecond', 'milli', 'millis', 'L', + 'us', 'microseconds', 'microsecond', 'micro', 'micros', + 'U', 'ns', 'nanoseconds', 'nano', 'nanos', 'nanosecond' + 'N'}, optional + Denote the unit of the input, if input is an integer. Default 'ns'. box : boolean, default True - If True returns a Timedelta/TimedeltaIndex of the results - if False returns a np.timedelta64 or ndarray of values of dtype @@ -69,7 +76,7 @@ def to_timedelta(arg, unit='ns', box=True, errors='raise'): pandas.DataFrame.astype : Cast argument to a specified dtype. pandas.to_datetime : Convert argument to datetime. """ - unit = _validate_timedelta_unit(unit) + unit = parse_timedelta_unit(unit) if errors not in ('ignore', 'raise', 'coerce'): raise ValueError("errors must be one of 'ignore', " @@ -99,45 +106,6 @@ def to_timedelta(arg, unit='ns', box=True, errors='raise'): box=box, errors=errors) -_unit_map = { - 'Y': 'Y', - 'y': 'Y', - 'W': 'W', - 'w': 'W', - 'D': 'D', - 'd': 'D', - 'days': 'D', - 'Days': 'D', - 'day': 'D', - 'Day': 'D', - 'M': 'M', - 'H': 'h', - 'h': 'h', - 'm': 'm', - 'T': 'm', - 'S': 's', - 's': 's', - 'L': 'ms', - 'MS': 'ms', - 'ms': 'ms', - 'US': 'us', - 'us': 'us', - 'NS': 'ns', - 'ns': 'ns', -} - - -def _validate_timedelta_unit(arg): - """ provide validation / translation for timedelta short units """ - try: - return _unit_map[arg] - except (KeyError, TypeError): - if arg is None: - return 'ns' - raise ValueError("invalid timedelta unit {arg} provided" - .format(arg=arg)) - - def _coerce_scalar_to_timedelta_type(r, unit='ns', box=True, errors='raise'): """Convert string 'r' to a timedelta object.""" diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index 0cac1119f76b5..58064213d9b3b 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -293,37 +293,64 @@ def test_nat_converters(self): assert to_timedelta('nat', box=False).astype('int64') == iNaT assert to_timedelta('nan', box=False).astype('int64') == iNaT - def testit(unit, transform): - - # array - result = to_timedelta(np.arange(5), unit=unit) - expected = TimedeltaIndex([np.timedelta64(i, transform(unit)) + @pytest.mark.parametrize('units, np_unit', + [(['Y', 'y'], 'Y'), + (['M'], 'M'), + (['W', 'w'], 'W'), + (['D', 'd', 'days', 'day', 'Days', 'Day'], 'D'), + (['m', 'minute', 'min', 'minutes', 't', + 'Minute', 'Min', 'Minutes', 'T'], 'm'), + (['s', 'seconds', 'sec', 'second', + 'S', 'Seconds', 'Sec', 'Second'], 's'), + (['ms', 'milliseconds', 'millisecond', 'milli', + 'millis', 'l', 'MS', 'Milliseconds', + 'Millisecond', 'Milli', 'Millis', 'L'], 'ms'), + (['us', 'microseconds', 'microsecond', 'micro', + 'micros', 'u', 'US', 'Microseconds', + 'Microsecond', 'Micro', 'Micros', 'U'], 'us'), + (['ns', 'nanoseconds', 'nanosecond', 'nano', + 'nanos', 'n', 'NS', 'Nanoseconds', + 'Nanosecond', 'Nano', 'Nanos', 'N'], 'ns')]) + @pytest.mark.parametrize('wrapper', [np.array, list, pd.Index]) + def test_unit_parser(self, units, np_unit, wrapper): + # validate all units, GH 6855, GH 21762 + for unit in units: + # array-likes + expected = TimedeltaIndex([np.timedelta64(i, np_unit) for i in np.arange(5).tolist()]) + result = to_timedelta(wrapper(range(5)), unit=unit) + tm.assert_index_equal(result, expected) + result = TimedeltaIndex(wrapper(range(5)), unit=unit) + tm.assert_index_equal(result, expected) + + if unit == 'M': + # M is treated as minutes in string repr + expected = TimedeltaIndex([np.timedelta64(i, 'm') + for i in np.arange(5).tolist()]) + + str_repr = ['{}{}'.format(x, unit) for x in np.arange(5)] + result = to_timedelta(wrapper(str_repr)) + tm.assert_index_equal(result, expected) + result = TimedeltaIndex(wrapper(str_repr)) tm.assert_index_equal(result, expected) # scalar - result = to_timedelta(2, unit=unit) - expected = Timedelta(np.timedelta64(2, transform(unit)).astype( + expected = Timedelta(np.timedelta64(2, np_unit).astype( 'timedelta64[ns]')) - assert result == expected - - # validate all units - # GH 6855 - for unit in ['Y', 'M', 'W', 'D', 'y', 'w', 'd']: - testit(unit, lambda x: x.upper()) - for unit in ['days', 'day', 'Day', 'Days']: - testit(unit, lambda x: 'D') - for unit in ['h', 'm', 's', 'ms', 'us', 'ns', 'H', 'S', 'MS', 'US', - 'NS']: - testit(unit, lambda x: x.lower()) - # offsets + result = to_timedelta(2, unit=unit) + assert result == expected + result = Timedelta(2, unit=unit) + assert result == expected - # m - testit('T', lambda x: 'm') + if unit == 'M': + expected = Timedelta(np.timedelta64(2, 'm').astype( + 'timedelta64[ns]')) - # ms - testit('L', lambda x: 'ms') + result = to_timedelta('2{}'.format(unit)) + assert result == expected + result = Timedelta('2{}'.format(unit)) + assert result == expected def test_numeric_conversions(self): assert ct(0) == np.timedelta64(0, 'ns') From 9436e219f2815dfc8bf128b4ffe6756e95d3a12d Mon Sep 17 00:00:00 2001 From: h-vetinari <33685575+h-vetinari@users.noreply.github.com> Date: Tue, 6 Nov 2018 04:55:30 +0100 Subject: [PATCH 043/122] API: change default for sep in str.cat (in docstring) (#23443) --- pandas/core/strings.py | 7 ++++--- pandas/tests/test_strings.py | 11 ++++++----- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index c824ad1712a5a..18a83269a2f0f 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -2074,9 +2074,10 @@ def cat(self, others=None, sep=None, na_rep=None, join=None): If others is None, the method returns the concatenation of all strings in the calling Series/Index. - sep : string or None, default None - If None, concatenates without any separator. - na_rep : string or None, default None + sep : str, default '' + The separator between the different elements/columns. By default + the empty string `''` is used. + na_rep : str or None, default None Representation that is inserted for all missing values: - If `na_rep` is None, and `others` is None, missing values in the diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 87bf89229a64e..f0873eb7683e9 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -162,10 +162,11 @@ def test_str_cat_raises_intuitive_error(self, box): with tm.assert_raises_regex(ValueError, message): s.str.cat(' ') + @pytest.mark.parametrize('sep', ['', None]) @pytest.mark.parametrize('dtype_target', ['object', 'category']) @pytest.mark.parametrize('dtype_caller', ['object', 'category']) @pytest.mark.parametrize('box', [Series, Index]) - def test_str_cat_categorical(self, box, dtype_caller, dtype_target): + def test_str_cat_categorical(self, box, dtype_caller, dtype_target, sep): s = Index(['a', 'a', 'b', 'a'], dtype=dtype_caller) s = s if box == Index else Series(s, index=s) t = Index(['b', 'a', 'b', 'c'], dtype=dtype_target) @@ -176,23 +177,23 @@ def test_str_cat_categorical(self, box, dtype_caller, dtype_target): # Series/Index with unaligned Index with tm.assert_produces_warning(expected_warning=FutureWarning): # FutureWarning to switch to alignment by default - result = s.str.cat(t) + result = s.str.cat(t, sep=sep) assert_series_or_index_equal(result, expected) # Series/Index with Series having matching Index t = Series(t, index=s) - result = s.str.cat(t) + result = s.str.cat(t, sep=sep) assert_series_or_index_equal(result, expected) # Series/Index with Series.values - result = s.str.cat(t.values) + result = s.str.cat(t.values, sep=sep) assert_series_or_index_equal(result, expected) # Series/Index with Series having different Index t = Series(t.values, index=t) with tm.assert_produces_warning(expected_warning=FutureWarning): # FutureWarning to switch to alignment by default - result = s.str.cat(t) + result = s.str.cat(t, sep=sep) assert_series_or_index_equal(result, expected) @pytest.mark.parametrize('box', [Series, Index]) From 1071c2847cf17a6120f0efc5cadd193186845a50 Mon Sep 17 00:00:00 2001 From: pajachiet Date: Tue, 6 Nov 2018 14:06:18 +0100 Subject: [PATCH 044/122] BUG: fix df.where(cond) when cond is empty (#21947) --- doc/source/whatsnew/v0.24.0.txt | 5 +++-- pandas/core/generic.py | 2 +- pandas/tests/frame/test_indexing.py | 8 ++++++++ 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index f89623805d8dd..f547601476a14 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1047,7 +1047,7 @@ Removal of prior version deprecations/changes Performance Improvements ~~~~~~~~~~~~~~~~~~~~~~~~ -- Slicing Series and Dataframes with an monotonically increasing :class:`CategoricalIndex` +- Slicing Series and DataFrames with an monotonically increasing :class:`CategoricalIndex` is now very fast and has speed comparable to slicing with an ``Int64Index``. The speed increase is both when indexing by label (using .loc) and position(.iloc) (:issue:`20395`) Slicing a monotonically increasing :class:`CategoricalIndex` itself (i.e. ``ci[1000:2000]``) @@ -1150,7 +1150,7 @@ Timezones - Fixed bug where :meth:`DataFrame.describe` and :meth:`Series.describe` on tz-aware datetimes did not show `first` and `last` result (:issue:`21328`) - Bug in :class:`DatetimeIndex` comparisons failing to raise ``TypeError`` when comparing timezone-aware ``DatetimeIndex`` against ``np.datetime64`` (:issue:`22074`) - Bug in ``DataFrame`` assignment with a timezone-aware scalar (:issue:`19843`) -- Bug in :func:`Dataframe.asof` that raised a ``TypeError`` when attempting to compare tz-naive and tz-aware timestamps (:issue:`21194`) +- Bug in :func:`DataFrame.asof` that raised a ``TypeError`` when attempting to compare tz-naive and tz-aware timestamps (:issue:`21194`) - Bug when constructing a :class:`DatetimeIndex` with :class:`Timestamp`s constructed with the ``replace`` method across DST (:issue:`18785`) - Bug when setting a new value with :meth:`DataFrame.loc` with a :class:`DatetimeIndex` with a DST transition (:issue:`18308`, :issue:`20724`) - Bug in :meth:`DatetimeIndex.unique` that did not re-localize tz-aware dates correctly (:issue:`21737`) @@ -1321,6 +1321,7 @@ Reshaping - Bug in :func:`pandas.concat` when joining resampled DataFrames with timezone aware index (:issue:`13783`) - Bug in :meth:`Series.combine_first` with ``datetime64[ns, tz]`` dtype which would return tz-naive result (:issue:`21469`) - Bug in :meth:`Series.where` and :meth:`DataFrame.where` with ``datetime64[ns, tz]`` dtype (:issue:`21546`) +- Bug in :meth:`DataFrame.where` with an empty DataFrame and empty ``cond`` having non-bool dtype (:issue:`21947`) - Bug in :meth:`Series.mask` and :meth:`DataFrame.mask` with ``list`` conditionals (:issue:`21891`) - Bug in :meth:`DataFrame.replace` raises RecursionError when converting OutOfBounds ``datetime64[ns, tz]`` (:issue:`20380`) - :func:`pandas.core.groupby.GroupBy.rank` now raises a ``ValueError`` when an invalid value is passed for argument ``na_option`` (:issue:`22124`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 71e4641d20c1b..396b092a286c1 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8142,7 +8142,7 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, # This is a single-dimensional object. if not is_bool_dtype(cond): raise ValueError(msg.format(dtype=cond.dtype)) - else: + elif not cond.empty: for dt in cond.dtypes: if not is_bool_dtype(dt): raise ValueError(msg.format(dtype=dt)) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index ae04ffff37419..2467b2a89472b 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -2877,6 +2877,14 @@ def test_where_none(self): 'on mixed-type'): df.where(~isna(df), None, inplace=True) + def test_where_empty_df_and_empty_cond_having_non_bool_dtypes(self): + # see gh-21947 + df = pd.DataFrame(columns=["a"]) + cond = df.applymap(lambda x: x > 0) + + result = df.where(cond) + tm.assert_frame_equal(result, df) + def test_where_align(self): def create(): From 079f6325754e29402d68ce28b147d482c2d6776d Mon Sep 17 00:00:00 2001 From: gfyoung Date: Tue, 6 Nov 2018 05:08:13 -0800 Subject: [PATCH 045/122] BUG: Fix of handle missing CSV MI column names (#23484) --- doc/source/whatsnew/v0.24.0.txt | 7 +--- pandas/_libs/parsers.pyx | 19 +++++++--- pandas/io/parsers.py | 54 +++++++++++++++++++---------- pandas/tests/io/parser/index_col.py | 28 +++++++++++++++ 4 files changed, 80 insertions(+), 28 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index f547601476a14..31b147354f662 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1281,16 +1281,11 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form - :func:`read_sas()` will correctly parse sas7bdat files with data page types having also bit 7 set (so page type is 128 + 256 = 384) (:issue:`16615`) - Bug in :meth:`detect_client_encoding` where potential ``IOError`` goes unhandled when importing in a mod_wsgi process due to restricted access to stdout. (:issue:`21552`) - Bug in :func:`to_string()` that broke column alignment when ``index=False`` and width of first column's values is greater than the width of first column's header (:issue:`16839`, :issue:`13032`) -<<<<<<< HEAD -<<<<<<< HEAD - Bug in :func:`DataFrame.to_csv` where a single level MultiIndex incorrectly wrote a tuple. Now just the value of the index is written (:issue:`19589`). - Bug in :meth:`HDFStore.append` when appending a :class:`DataFrame` with an empty string column and ``min_itemsize`` < 8 (:issue:`12242`) -======= - Bug in :func:`to_string()` that caused representations of :class:`DataFrame` to not take up the whole window (:issue:`22984`) ->>>>>>> a3ace8012... BUG-22984 Fix whatsnew and add test -======= - Bug in :func:`to_string()` that caused representations of :class:`DataFrame` to not take up the whole window (:issue:`22984`) ->>>>>>> 6271148c6... BUG-22984 Fix whatsnew and add test +- Bug in :meth:`read_csv()` in which :class:`MultiIndex` index names were being improperly handled in the cases when they were not provided (:issue:`23484`) Plotting ^^^^^^^^ diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index a2a718aa8b591..391de339ad60e 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -302,6 +302,7 @@ cdef class TextReader: object tupleize_cols object usecols list dtype_cast_order + set unnamed_cols set noconvert def __cinit__(self, source, @@ -536,7 +537,7 @@ cdef class TextReader: self.header = [ header ] self.names = names - self.header, self.table_width = self._get_header() + self.header, self.table_width, self.unnamed_cols = self._get_header() if not self.table_width: raise EmptyDataError("No columns to parse from file") @@ -720,13 +721,15 @@ cdef class TextReader: cdef: Py_ssize_t i, start, field_count, passed_count, unnamed_count # noqa char *word - object name + object name, old_name int status int64_t hr, data_line char *errors = "strict" cdef StringPath path = _string_path(self.c_encoding) header = [] + unnamed_cols = set() + if self.parser.header_start >= 0: # Header is in the file @@ -759,6 +762,7 @@ cdef class TextReader: counts = {} unnamed_count = 0 + for i in range(field_count): word = self.parser.words[start + i] @@ -770,6 +774,9 @@ cdef class TextReader: name = PyUnicode_Decode(word, strlen(word), self.c_encoding, errors) + # We use this later when collecting placeholder names. + old_name = name + if name == '': if self.has_mi_columns: name = ('Unnamed: {i}_level_{lvl}' @@ -786,6 +793,9 @@ cdef class TextReader: name = '%s.%d' % (name, count) count = counts.get(name, 0) + if old_name == '': + unnamed_cols.add(name) + this_header.append(name) counts[name] = count + 1 @@ -798,6 +808,7 @@ cdef class TextReader: lc = len(this_header) ic = (len(self.index_col) if self.index_col is not None else 0) + if lc != unnamed_count and lc - ic > unnamed_count: hr -= 1 self.parser_start -= 1 @@ -830,7 +841,7 @@ cdef class TextReader: if self.parser.lines < 1: self._tokenize_rows(1) - return None, self.parser.line_fields[0] + return None, self.parser.line_fields[0], unnamed_cols # Corner case, not enough lines in the file if self.parser.lines < data_line + 1: @@ -864,7 +875,7 @@ cdef class TextReader: elif self.allow_leading_cols and passed_count < field_count: self.leading_cols = field_count - passed_count - return header, field_count + return header, field_count, unnamed_cols def read(self, rows=None): """ diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index cd9d3ccb79af8..12914c10e0655 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1265,6 +1265,7 @@ def __init__(self, kwds): self.prefix = kwds.pop('prefix', None) self.index_col = kwds.get('index_col', None) + self.unnamed_cols = set() self.index_names = None self.col_names = None @@ -1374,7 +1375,8 @@ def _extract_multi_indexer_columns(self, header, index_names, col_names, # clean the index_names index_names = header.pop(-1) index_names, names, index_col = _clean_index_names(index_names, - self.index_col) + self.index_col, + self.unnamed_cols) # extract the columns field_count = len(header[0]) @@ -1454,7 +1456,8 @@ def _make_index(self, data, alldata, columns, indexnamerow=False): if not self._name_processed: (self.index_names, _, self.index_col) = _clean_index_names(list(columns), - self.index_col) + self.index_col, + self.unnamed_cols) self._name_processed = True index = self._get_complex_date_index(data, columns) index = self._agg_index(index, try_parse_dates=False) @@ -1732,6 +1735,7 @@ def __init__(self, src, **kwds): kwds['usecols'] = self.usecols self._reader = parsers.TextReader(src, **kwds) + self.unnamed_cols = self._reader.unnamed_cols passed_names = self.names is None @@ -1792,7 +1796,8 @@ def __init__(self, src, **kwds): self._name_processed = True (index_names, self.names, self.index_col) = _clean_index_names(self.names, - self.index_col) + self.index_col, + self.unnamed_cols) if self.index_names is None: self.index_names = index_names @@ -1966,7 +1971,8 @@ def _get_index_names(self): if self._reader.leading_cols == 0 and self.index_col is not None: (idx_names, names, - self.index_col) = _clean_index_names(names, self.index_col) + self.index_col) = _clean_index_names(names, self.index_col, + self.unnamed_cols) return names, idx_names @@ -2112,7 +2118,8 @@ def __init__(self, f, **kwds): # Get columns in two steps: infer from data, then # infer column indices from self.usecols if it is specified. self._col_indices = None - self.columns, self.num_original_columns = self._infer_columns() + (self.columns, self.num_original_columns, + self.unnamed_cols) = self._infer_columns() # Now self.columns has the set of columns that we will process. # The original set is stored in self.original_columns. @@ -2367,6 +2374,8 @@ def _infer_columns(self): names = self.names num_original_columns = 0 clear_buffer = True + unnamed_cols = set() + if self.header is not None: header = self.header @@ -2400,7 +2409,7 @@ def _infer_columns(self): if clear_buffer: self._clear_buffer() columns.append([None] * len(columns[-1])) - return columns, num_original_columns + return columns, num_original_columns, unnamed_cols if not self.names: raise EmptyDataError( @@ -2408,16 +2417,19 @@ def _infer_columns(self): line = self.names[:] - unnamed_count = 0 this_columns = [] + this_unnamed_cols = [] + for i, c in enumerate(line): if c == '': if have_mi_columns: - this_columns.append('Unnamed: %d_level_%d' - % (i, level)) + col_name = ("Unnamed: {i}_level_{level}" + .format(i=i, level=level)) else: - this_columns.append('Unnamed: %d' % i) - unnamed_count += 1 + col_name = "Unnamed: {i}".format(i=i) + + this_unnamed_cols.append(i) + this_columns.append(col_name) else: this_columns.append(c) @@ -2443,12 +2455,17 @@ def _infer_columns(self): lc = len(this_columns) ic = (len(self.index_col) if self.index_col is not None else 0) + unnamed_count = len(this_unnamed_cols) + if lc != unnamed_count and lc - ic > unnamed_count: clear_buffer = False this_columns = [None] * lc self.buf = [self.buf[-1]] columns.append(this_columns) + unnamed_cols.update({this_columns[i] + for i in this_unnamed_cols}) + if len(columns) == 1: num_original_columns = len(this_columns) @@ -2513,7 +2530,7 @@ def _infer_columns(self): columns = [names] num_original_columns = ncols - return columns, num_original_columns + return columns, num_original_columns, unnamed_cols def _handle_usecols(self, columns, usecols_key): """ @@ -2879,7 +2896,8 @@ def _get_index_name(self, columns): else: # Case 2 (index_name, columns_, - self.index_col) = _clean_index_names(columns, self.index_col) + self.index_col) = _clean_index_names(columns, self.index_col, + self.unnamed_cols) return index_name, orig_names, columns @@ -3178,7 +3196,7 @@ def _clean_na_values(na_values, keep_default_na=True): return na_values, na_fvalues -def _clean_index_names(columns, index_col): +def _clean_index_names(columns, index_col, unnamed_cols): if not _is_index_col(index_col): return None, columns, index_col @@ -3203,10 +3221,10 @@ def _clean_index_names(columns, index_col): columns.remove(name) index_names.append(name) - # hack - if (isinstance(index_names[0], compat.string_types) and - 'Unnamed' in index_names[0]): - index_names[0] = None + # Only clean index names that were placeholders. + for i, name in enumerate(index_names): + if isinstance(name, compat.string_types) and name in unnamed_cols: + index_names[i] = None return index_names, columns, index_col diff --git a/pandas/tests/io/parser/index_col.py b/pandas/tests/io/parser/index_col.py index 2909ef6214e62..ba54ed4620199 100644 --- a/pandas/tests/io/parser/index_col.py +++ b/pandas/tests/io/parser/index_col.py @@ -141,3 +141,31 @@ def test_empty_with_index_col_false(self): result = self.read_csv(StringIO(data), index_col=False) expected = DataFrame([], columns=['x', 'y']) tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("index_names", [ + ["", ""], + ["foo", ""], + ["", "bar"], + ["foo", "bar"], + ["NotReallyUnnamed", "Unnamed: 0"], + ]) + def test_multi_index_naming(self, index_names): + # We don't want empty index names being replaced with "Unnamed: 0" + data = ",".join(index_names + ["col\na,c,1\na,d,2\nb,c,3\nb,d,4"]) + result = self.read_csv(StringIO(data), index_col=[0, 1]) + + expected = DataFrame({"col": [1, 2, 3, 4]}, + index=MultiIndex.from_product([["a", "b"], + ["c", "d"]])) + expected.index.names = [name if name else None for name in index_names] + tm.assert_frame_equal(result, expected) + + def test_multi_index_naming_not_all_at_beginning(self): + data = ",Unnamed: 2,\na,c,1\na,d,2\nb,c,3\nb,d,4" + result = self.read_csv(StringIO(data), index_col=[0, 2]) + + expected = DataFrame({"Unnamed: 2": ["c", "d", "c", "d"]}, + index=MultiIndex( + levels=[['a', 'b'], [1, 2, 3, 4]], + labels=[[0, 0, 1, 1], [0, 1, 2, 3]])) + tm.assert_frame_equal(result, expected) From 013315ad422936db5c98d7462d747f7c387a793b Mon Sep 17 00:00:00 2001 From: Justin Zheng Date: Tue, 6 Nov 2018 05:08:46 -0800 Subject: [PATCH 046/122] BUG GH23451 Allow setting date in string index for Series (#23495) --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/series.py | 4 +++- pandas/tests/series/test_datetime_values.py | 7 +++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 31b147354f662..077f018b23e1f 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1118,6 +1118,7 @@ Datetimelike - Bug in :func:`DataFrame.combine` with datetimelike values raising a TypeError (:issue:`23079`) - Bug in :func:`date_range` with frequency of ``Day`` or higher where dates sufficiently far in the future could wrap around to the past instead of raising ``OutOfBoundsDatetime`` (:issue:`14187`) - Bug in :class:`PeriodIndex` with attribute ``freq.n`` greater than 1 where adding a :class:`DateOffset` object would return incorrect results (:issue:`23215`) +- Bug in :class:`Series` that interpreted string indices as lists of characters when setting datetimelike values (:issue:`23451`) Timedelta ^^^^^^^^^ diff --git a/pandas/core/series.py b/pandas/core/series.py index cb8371ba086ba..6971b0b0c78e0 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -947,7 +947,9 @@ def _set_with(self, key, value): except Exception: pass - if not isinstance(key, (list, Series, np.ndarray, Series)): + if is_scalar(key): + key = [key] + elif not isinstance(key, (list, Series, np.ndarray)): try: key = list(key) except Exception: diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index 2f6efc112819c..f3ae2b1e6ad15 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -548,3 +548,10 @@ def test_minmax_nat_series(self, nat): def test_minmax_nat_dataframe(self, nat): assert nat.min()[0] is pd.NaT assert nat.max()[0] is pd.NaT + + def test_setitem_with_string_index(self): + # GH 23451 + x = pd.Series([1, 2, 3], index=['Date', 'b', 'other']) + x['Date'] = date.today() + assert x.Date == date.today() + assert x['Date'] == date.today() From 662759a91fbad4b88febfccc1d803aa9059d6ec9 Mon Sep 17 00:00:00 2001 From: Diego Torres Date: Tue, 6 Nov 2018 08:23:04 -0500 Subject: [PATCH 047/122] BUG: fix groupby.transform rename bug (#23461) (#23463) --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/groupby/generic.py | 11 +++++++---- pandas/tests/groupby/test_transform.py | 23 +++++++++++++++++++++++ 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 077f018b23e1f..215c0f7438e71 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1310,6 +1310,7 @@ Groupby/Resample/Rolling - :func:`RollingGroupby.agg` and :func:`ExpandingGroupby.agg` now support multiple aggregation functions as parameters (:issue:`15072`) - Bug in :meth:`DataFrame.resample` and :meth:`Series.resample` when resampling by a weekly offset (``'W'``) across a DST transition (:issue:`9119`, :issue:`21459`) - Bug in :meth:`DataFrame.expanding` in which the ``axis`` argument was not being respected during aggregations (:issue:`23372`) +- Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.transform` which caused missing values when the input function can accept a :class:`DataFrame` but renames it (:issue:`23455`). Reshaping ^^^^^^^^^ diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 5d9a5616e133b..451f1199ac8e6 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -586,14 +586,17 @@ def _choose_path(self, fast_path, slow_path, group): try: res_fast = fast_path(group) - # compare that we get the same results + # verify fast path does not change columns (and names), otherwise + # its results cannot be joined with those of the slow path + if res_fast.columns != group.columns: + return path, res + # verify numerical equality with the slow path if res.shape == res_fast.shape: res_r = res.values.ravel() res_fast_r = res_fast.values.ravel() mask = notna(res_r) - if (res_r[mask] == res_fast_r[mask]).all(): - path = fast_path - + if (res_r[mask] == res_fast_r[mask]).all(): + path = fast_path except Exception: pass return path, res diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index aec51afb99ef0..4cf63a321a47a 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -808,3 +808,26 @@ def test_any_all_np_func(func): res = df.groupby('key')['val'].transform(func) tm.assert_series_equal(res, exp) + + +def test_groupby_transform_rename(): + # https://github.com/pandas-dev/pandas/issues/23461 + def demean_rename(x): + result = x - x.mean() + + if isinstance(x, pd.Series): + return result + + result = result.rename( + columns={c: '{}_demeaned'.format(c) for c in result.columns}) + + return result + + df = pd.DataFrame({'group': list('ababa'), + 'value': [1, 1, 1, 2, 2]}) + expected = pd.DataFrame({'value': [-1. / 3, -0.5, -1. / 3, 0.5, 2. / 3]}) + + result = df.groupby('group').transform(demean_rename) + tm.assert_frame_equal(result, expected) + result_single = df.groupby('group').value.transform(demean_rename) + tm.assert_series_equal(result_single, expected['value']) From 6efd3314a240302e3980ee5a575b414103a0faf9 Mon Sep 17 00:00:00 2001 From: Shirish Kadam Date: Tue, 6 Nov 2018 20:18:44 +0530 Subject: [PATCH 048/122] BUG: GroupBy return EA dtype (#23318) --- doc/source/whatsnew/v0.24.0.txt | 2 ++ pandas/core/groupby/groupby.py | 16 +++++++++++-- pandas/tests/arrays/test_integer.py | 6 +++-- pandas/tests/sparse/test_groupby.py | 37 +++++++++++++++++++---------- pandas/tests/test_resample.py | 1 + 5 files changed, 45 insertions(+), 17 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 215c0f7438e71..574e0dc696864 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -854,6 +854,7 @@ update the ``ExtensionDtype._metadata`` tuple to match the signature of your - :func:`ExtensionArray.isna` is allowed to return an ``ExtensionArray`` (:issue:`22325`). - Support for reduction operations such as ``sum``, ``mean`` via opt-in base class method override (:issue:`22762`) - :meth:`Series.unstack` no longer converts extension arrays to object-dtype ndarrays. The output ``DataFrame`` will now have the same dtype as the input. This changes behavior for Categorical and Sparse data (:issue:`23077`). +- Bug when grouping :meth:`Dataframe.groupby()` and aggregating on ``ExtensionArray`` it was not returning the actual ``ExtensionArray`` dtype (:issue:`23227`). .. _whatsnew_0240.api.incompatibilities: @@ -1089,6 +1090,7 @@ Categorical - Bug when indexing with a boolean-valued ``Categorical``. Now a boolean-valued ``Categorical`` is treated as a boolean mask (:issue:`22665`) - Constructing a :class:`CategoricalIndex` with empty values and boolean categories was raising a ``ValueError`` after a change to dtype coercion (:issue:`22702`). - Bug in :meth:`Categorical.take` with a user-provided ``fill_value`` not encoding the ``fill_value``, which could result in a ``ValueError``, incorrect results, or a segmentation fault (:issue:`23296`). +- Bug when resampling :meth:`Dataframe.resample()` and aggregating on categorical data, the categorical dtype was getting lost. (:issue:`23227`) Datetimelike ^^^^^^^^^^^^ diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index e31929434b5d6..ea7507799fa9a 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -24,7 +24,8 @@ class providing the base-class of operations. from pandas.util._validators import validate_kwargs from pandas.core.dtypes.cast import maybe_downcast_to_dtype -from pandas.core.dtypes.common import ensure_float, is_numeric_dtype, is_scalar +from pandas.core.dtypes.common import ( + ensure_float, is_extension_array_dtype, is_numeric_dtype, is_scalar) from pandas.core.dtypes.missing import isna, notna import pandas.core.algorithms as algorithms @@ -754,7 +755,18 @@ def _try_cast(self, result, obj, numeric_only=False): dtype = obj.dtype if not is_scalar(result): - if numeric_only and is_numeric_dtype(dtype) or not numeric_only: + if is_extension_array_dtype(dtype): + # The function can return something of any type, so check + # if the type is compatible with the calling EA. + try: + result = obj.values._from_sequence(result) + except Exception: + # https://github.com/pandas-dev/pandas/issues/22850 + # pandas has no control over what 3rd-party ExtensionArrays + # do in _values_from_sequence. We still want ops to work + # though, so we catch any regular Exception. + pass + elif numeric_only and is_numeric_dtype(dtype) or not numeric_only: result = maybe_downcast_to_dtype(result, dtype) return result diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index ec627c2789d8f..0fe07caed5b85 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -650,9 +650,10 @@ def test_preserve_dtypes(op): # groupby result = getattr(df.groupby("A"), op)() + expected = pd.DataFrame({ "B": np.array([1.0, 3.0]), - "C": np.array([1, 3], dtype="int64") + "C": integer_array([1, 3], dtype="Int64") }, index=pd.Index(['a', 'b'], name='A')) tm.assert_frame_equal(result, expected) @@ -673,9 +674,10 @@ def test_reduce_to_float(op): # groupby result = getattr(df.groupby("A"), op)() + expected = pd.DataFrame({ "B": np.array([1.0, 3.0]), - "C": np.array([1, 3], dtype="float64") + "C": integer_array([1, 3], dtype="Int64") }, index=pd.Index(['a', 'b'], name='A')) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/sparse/test_groupby.py b/pandas/tests/sparse/test_groupby.py index 1d2129312fb1b..d0ff2a02c4046 100644 --- a/pandas/tests/sparse/test_groupby.py +++ b/pandas/tests/sparse/test_groupby.py @@ -24,27 +24,39 @@ def test_first_last_nth(self): sparse_grouped = self.sparse.groupby('A') dense_grouped = self.dense.groupby('A') + sparse_grouped_first = sparse_grouped.first() + sparse_grouped_last = sparse_grouped.last() + sparse_grouped_nth = sparse_grouped.nth(1) + + dense_grouped_first = dense_grouped.first().to_sparse() + dense_grouped_last = dense_grouped.last().to_sparse() + dense_grouped_nth = dense_grouped.nth(1).to_sparse() + # TODO: shouldn't these all be spares or not? - tm.assert_frame_equal(sparse_grouped.first(), - dense_grouped.first()) - tm.assert_frame_equal(sparse_grouped.last(), - dense_grouped.last()) - tm.assert_frame_equal(sparse_grouped.nth(1), - dense_grouped.nth(1).to_sparse()) + tm.assert_frame_equal(sparse_grouped_first, + dense_grouped_first) + tm.assert_frame_equal(sparse_grouped_last, + dense_grouped_last) + tm.assert_frame_equal(sparse_grouped_nth, + dense_grouped_nth) def test_aggfuncs(self): sparse_grouped = self.sparse.groupby('A') dense_grouped = self.dense.groupby('A') - tm.assert_frame_equal(sparse_grouped.mean(), - dense_grouped.mean()) + result = sparse_grouped.mean().to_sparse() + expected = dense_grouped.mean().to_sparse() + + tm.assert_frame_equal(result, expected) # ToDo: sparse sum includes str column # tm.assert_frame_equal(sparse_grouped.sum(), # dense_grouped.sum()) - tm.assert_frame_equal(sparse_grouped.count(), - dense_grouped.count()) + result = sparse_grouped.count().to_sparse() + expected = dense_grouped.count().to_sparse() + + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("fill_value", [0, np.nan]) @@ -54,6 +66,5 @@ def test_groupby_includes_fill_value(fill_value): 'b': [fill_value, 1, fill_value, fill_value]}) sdf = df.to_sparse(fill_value=fill_value) result = sdf.groupby('a').sum() - expected = df.groupby('a').sum() - tm.assert_frame_equal(result, expected, - check_index_type=False) + expected = df.groupby('a').sum().to_sparse(fill_value=fill_value) + tm.assert_frame_equal(result, expected, check_index_type=False) diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 69a0613c95475..ed29e20fd5ca5 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -1576,6 +1576,7 @@ def test_resample_categorical_data_with_timedeltaindex(self): 'Group': ['A', 'A']}, index=pd.to_timedelta([0, 10], unit='s')) expected = expected.reindex(['Group_obj', 'Group'], axis=1) + expected['Group'] = expected['Group_obj'].astype('category') tm.assert_frame_equal(result, expected) def test_resample_daily_anchored(self): From 1da1d63ece35026b2e8fc6af2783ae3086fe4ac1 Mon Sep 17 00:00:00 2001 From: ArtinSarraf Date: Tue, 6 Nov 2018 12:58:02 -0500 Subject: [PATCH 049/122] BUG - pd.concat with all Series on axis=1 ignores the `names` argument (#23499) Closes gh-23490. --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/reshape/concat.py | 2 +- pandas/tests/reshape/test_concat.py | 17 +++++++++++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 574e0dc696864..96e1510efa784 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1318,6 +1318,7 @@ Reshaping ^^^^^^^^^ - Bug in :func:`pandas.concat` when joining resampled DataFrames with timezone aware index (:issue:`13783`) +- Bug in :func:`pandas.concat` when joining only `Series` the `names` argument of `concat` is no longer ignored (:issue:`23490`) - Bug in :meth:`Series.combine_first` with ``datetime64[ns, tz]`` dtype which would return tz-naive result (:issue:`21469`) - Bug in :meth:`Series.where` and :meth:`DataFrame.where` with ``datetime64[ns, tz]`` dtype (:issue:`21546`) - Bug in :meth:`DataFrame.where` with an empty DataFrame and empty ``cond`` having non-bool dtype (:issue:`21947`) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 0e60068732447..0443f3246c05f 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -502,7 +502,7 @@ def _get_concat_axis(self): else: return ibase.default_index(len(self.objs)) else: - return ensure_index(self.keys) + return ensure_index(self.keys).set_names(self.names) else: indexes = [x._data.axes[self.axis] for x in self.objs] diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 23bf8896409c9..65e2b4f6c3f31 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1618,6 +1618,23 @@ def test_concat_series_axis1(self, sort=sort): expected = DataFrame({'A': s, 'B': s2}) assert_frame_equal(result, expected) + def test_concat_series_axis1_names_applied(self): + # ensure names argument is not ignored on axis=1, #23490 + s = Series([1, 2, 3]) + s2 = Series([4, 5, 6]) + result = concat([s, s2], axis=1, keys=['a', 'b'], names=['A']) + expected = DataFrame([[1, 4], [2, 5], [3, 6]], + columns=pd.Index(['a', 'b'], name='A')) + assert_frame_equal(result, expected) + + result = concat([s, s2], axis=1, keys=[('a', 1), ('b', 2)], + names=['A', 'B']) + expected = DataFrame([[1, 4], [2, 5], [3, 6]], + columns=MultiIndex.from_tuples([('a', 1), + ('b', 2)], + names=['A', 'B'])) + assert_frame_equal(result, expected) + def test_concat_single_with_key(self): df = DataFrame(np.random.randn(10, 4)) From 2acb22c7f230988509f9cc430662fa3e537cd29c Mon Sep 17 00:00:00 2001 From: "Dr. Irv" Date: Tue, 6 Nov 2018 13:21:47 -0500 Subject: [PATCH 050/122] BUG: names on union and intersection for Index were inconsistent (#19849) Closes gh-9862. xref gh-9943. --- doc/source/whatsnew/v0.24.0.txt | 2 + pandas/core/indexes/base.py | 39 ++++++++---------- pandas/core/indexes/category.py | 5 +++ pandas/core/indexes/datetimes.py | 17 ++++++-- pandas/core/indexes/interval.py | 3 +- pandas/core/indexes/numeric.py | 6 +-- pandas/core/indexes/period.py | 8 ++-- pandas/core/indexes/range.py | 26 ++++++++---- pandas/core/indexes/timedeltas.py | 15 ++++--- pandas/tests/indexes/common.py | 4 +- pandas/tests/indexes/conftest.py | 1 + pandas/tests/indexes/test_base.py | 63 ++++++++++++++++++++++++++++- pandas/tests/reshape/test_concat.py | 8 ++-- 13 files changed, 143 insertions(+), 54 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 96e1510efa784..72c11900f7fa3 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1220,6 +1220,8 @@ Indexing - Bug in `scalar in Index` if scalar is a float while the ``Index`` is of integer dtype (:issue:`22085`) - Bug in `MultiIndex.set_levels` when levels value is not subscriptable (:issue:`23273`) - Bug where setting a timedelta column by ``Index`` causes it to be casted to double, and therefore lose precision (:issue:`23511`) +- Bug in :func:`Index.union` and :func:`Index.intersection` where name of the ``Index`` of the result was not computed correctly for certain cases (:issue:`9943`, :issue:`9862`) + Missing ^^^^^^^ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ae64179b36485..7434a02043d65 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -61,7 +61,7 @@ import pandas.core.sorting as sorting from pandas.io.formats.printing import ( pprint_thing, default_pprint, format_object_summary, format_object_attrs) -from pandas.core.ops import make_invalid_op +from pandas.core.ops import make_invalid_op, get_op_result_name from pandas.core.strings import StringMethods __all__ = ['Index'] @@ -1253,7 +1253,7 @@ def _convert_can_do_setop(self, other): other = Index(other, name=self.name) result_name = self.name else: - result_name = self.name if self.name == other.name else None + result_name = get_op_result_name(self, other) return other, result_name def _convert_for_op(self, value): @@ -2745,19 +2745,15 @@ def __or__(self, other): def __xor__(self, other): return self.symmetric_difference(other) - def _get_consensus_name(self, other): + def _get_reconciled_name_object(self, other): """ - Given 2 indexes, give a consensus name meaning - we take the not None one, or None if the names differ. - Return a new object if we are resetting the name + If the result of a set operation will be self, + return self, unless the name changes, in which + case make a shallow copy of self. """ - if self.name != other.name: - if self.name is None or other.name is None: - name = self.name or other.name - else: - name = None - if self.name != name: - return self._shallow_copy(name=name) + name = get_op_result_name(self, other) + if self.name != name: + return self._shallow_copy(name=name) return self def union(self, other): @@ -2785,10 +2781,10 @@ def union(self, other): other = ensure_index(other) if len(other) == 0 or self.equals(other): - return self._get_consensus_name(other) + return self._get_reconciled_name_object(other) if len(self) == 0: - return other._get_consensus_name(self) + return other._get_reconciled_name_object(self) # TODO: is_dtype_union_equal is a hack around # 1. buggy set ops with duplicates (GH #13432) @@ -2851,11 +2847,10 @@ def union(self, other): stacklevel=3) # for subclasses - return self._wrap_union_result(other, result) + return self._wrap_setop_result(other, result) - def _wrap_union_result(self, other, result): - name = self.name if self.name == other.name else None - return self.__class__(result, name=name) + def _wrap_setop_result(self, other, result): + return self._constructor(result, name=get_op_result_name(self, other)) def intersection(self, other): """ @@ -2885,7 +2880,7 @@ def intersection(self, other): other = ensure_index(other) if self.equals(other): - return self._get_consensus_name(other) + return self._get_reconciled_name_object(other) if not is_dtype_equal(self.dtype, other.dtype): this = self.astype('O') @@ -2905,7 +2900,7 @@ def intersection(self, other): if self.is_monotonic and other.is_monotonic: try: result = self._inner_indexer(lvals, rvals)[0] - return self._wrap_union_result(other, result) + return self._wrap_setop_result(other, result) except TypeError: pass @@ -4175,7 +4170,7 @@ def _join_monotonic(self, other, how='left', return_indexers=False): return join_index def _wrap_joined_index(self, joined, other): - name = self.name if self.name == other.name else None + name = get_op_result_name(self, other) return Index(joined, name=name) def _get_string_slice(self, key, use_lhs=True, use_rhs=True): diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 278e395d65014..6e2f0b00fcd6e 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -25,6 +25,7 @@ import pandas.core.common as com import pandas.core.missing as missing import pandas.core.indexes.base as ibase +from pandas.core.ops import get_op_result_name from pandas.core.arrays.categorical import Categorical, contains _index_doc_kwargs = dict(ibase._index_doc_kwargs) @@ -324,6 +325,10 @@ def itemsize(self): # Size of the items in categories, not codes. return self.values.itemsize + def _wrap_setop_result(self, other, result): + name = get_op_result_name(self, other) + return self._shallow_copy(result, name=name) + def get_values(self): """ return the underlying data as an ndarray """ return self._data.get_values() diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index bd6f0c68a9aa5..3a2f9986760d3 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -34,6 +34,7 @@ from pandas.core.indexes.base import Index, _index_shared_docs from pandas.core.indexes.numeric import Int64Index +from pandas.core.ops import get_op_result_name import pandas.compat as compat from pandas.tseries.frequencies import to_offset, Resolution from pandas.core.indexes.datetimelike import ( @@ -592,6 +593,10 @@ def union(self, other): y : Index or DatetimeIndex """ self._assert_can_do_setop(other) + + if len(other) == 0 or self.equals(other) or len(self) == 0: + return super(DatetimeIndex, self).union(other) + if not isinstance(other, DatetimeIndex): try: other = DatetimeIndex(other) @@ -674,7 +679,7 @@ def _maybe_utc_convert(self, other): return this, other def _wrap_joined_index(self, joined, other): - name = self.name if self.name == other.name else None + name = get_op_result_name(self, other) if (isinstance(other, DatetimeIndex) and self.freq == other.freq and self._can_fast_union(other)): @@ -745,11 +750,11 @@ def _fast_union(self, other): else: return left - def _wrap_union_result(self, other, result): - name = self.name if self.name == other.name else None + def _wrap_setop_result(self, other, result): + name = get_op_result_name(self, other) if not timezones.tz_compare(self.tz, other.tz): raise ValueError('Passed item and index have different timezone') - return self._simple_new(result, name=name, freq=None, tz=self.tz) + return self._shallow_copy(result, name=name, freq=None, tz=self.tz) def intersection(self, other): """ @@ -765,6 +770,10 @@ def intersection(self, other): y : Index or DatetimeIndex """ self._assert_can_do_setop(other) + + if self.equals(other): + return self._get_reconciled_name_object(other) + if not isinstance(other, DatetimeIndex): try: other = DatetimeIndex(other) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index b86921b5579ed..79239ec90ac80 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -25,6 +25,7 @@ from pandas.core.indexes.base import ( Index, ensure_index, default_pprint, _index_shared_docs) +from pandas.core.ops import get_op_result_name from pandas._libs import Timestamp, Timedelta from pandas._libs.interval import ( @@ -1048,7 +1049,7 @@ def func(self, other): raise TypeError(msg.format(op=op_name)) result = getattr(self._multiindex, op_name)(other._multiindex) - result_name = self.name if self.name == other.name else None + result_name = get_op_result_name(self, other) # GH 19101: ensure empty results have correct dtype if result.empty: diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 420b862ae16a4..795ffeefa1794 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -21,7 +21,7 @@ from pandas.util._decorators import Appender, cache_readonly import pandas.core.dtypes.concat as _concat import pandas.core.indexes.base as ibase - +from pandas.core.ops import get_op_result_name _num_index_shared_docs = dict() @@ -215,7 +215,7 @@ def _convert_scalar_indexer(self, key, kind=None): ._convert_scalar_indexer(key, kind=kind)) def _wrap_joined_index(self, joined, other): - name = self.name if self.name == other.name else None + name = get_op_result_name(self, other) return Int64Index(joined, name=name) @classmethod @@ -288,7 +288,7 @@ def _convert_index_indexer(self, keyarr): return keyarr def _wrap_joined_index(self, joined, other): - name = self.name if self.name == other.name else None + name = get_op_result_name(self, other) return UInt64Index(joined, name=name) @classmethod diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 7e11ca5dbfcef..92ffaea521d7f 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -12,9 +12,9 @@ is_integer_dtype, is_datetime64_any_dtype, is_bool_dtype, - pandas_dtype, + pandas_dtype ) - +from pandas.core.ops import get_op_result_name from pandas.core.accessor import PandasDelegate, delegate_names from pandas.core.indexes.datetimes import DatetimeIndex, Int64Index, Index from pandas.core.indexes.datetimelike import ( @@ -848,8 +848,8 @@ def _assert_can_do_setop(self, other): msg = DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) - def _wrap_union_result(self, other, result): - name = self.name if self.name == other.name else None + def _wrap_setop_result(self, other, result): + name = get_op_result_name(self, other) result = self._apply_meta(result) result.name = name return result diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 673ab9f2118a4..d1b5645928921 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -5,7 +5,7 @@ import numpy as np -from pandas._libs import index as libindex +from pandas._libs import index as libindex, lib import pandas.compat as compat from pandas.compat import get_range_parameters, lrange, range from pandas.compat.numpy import function as nv @@ -263,8 +263,9 @@ def tolist(self): @Appender(_index_shared_docs['_shallow_copy']) def _shallow_copy(self, values=None, **kwargs): if values is None: + name = kwargs.get("name", self.name) return RangeIndex._simple_new( - name=self.name, **dict(self._get_data_as_items())) + name=name, **dict(self._get_data_as_items())) else: kwargs.setdefault('name', self.name) return self._int64index._shallow_copy(values, **kwargs) @@ -344,6 +345,10 @@ def intersection(self, other): ------- intersection : Index """ + + if self.equals(other): + return self._get_reconciled_name_object(other) + if not isinstance(other, RangeIndex): return super(RangeIndex, self).intersection(other) @@ -424,10 +429,9 @@ def union(self, other): union : Index """ self._assert_can_do_setop(other) - if len(other) == 0 or self.equals(other): - return self - if len(self) == 0: - return other + if len(other) == 0 or self.equals(other) or len(self) == 0: + return super(RangeIndex, self).union(other) + if isinstance(other, RangeIndex): start_s, step_s = self._start, self._step end_s = self._start + self._step * (len(self) - 1) @@ -498,7 +502,12 @@ def __getitem__(self, key): super_getitem = super(RangeIndex, self).__getitem__ if is_scalar(key): - n = int(key) + if not lib.is_integer(key): + raise IndexError("only integers, slices (`:`), " + "ellipsis (`...`), numpy.newaxis (`None`) " + "and integer or boolean " + "arrays are valid indices") + n = com.cast_scalar_indexer(key) if n != key: return super_getitem(key) if n < 0: @@ -649,7 +658,8 @@ def _evaluate_numeric_binop(self, other): return op(self._int64index, other) # TODO: Do attrs get handled reliably? - return _evaluate_numeric_binop + name = '__{name}__'.format(name=op.__name__) + return compat.set_function_name(_evaluate_numeric_binop, name, cls) cls.__add__ = _make_evaluate_binop(operator.add) cls.__radd__ = _make_evaluate_binop(ops.radd) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 33361c851a4c5..5b077a6984114 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -26,6 +26,7 @@ from pandas.core.base import _shared_docs from pandas.core.indexes.base import _index_shared_docs import pandas.core.common as com +from pandas.core.ops import get_op_result_name import pandas.core.dtypes.concat as _concat from pandas.util._decorators import Appender, Substitution from pandas.core.indexes.datetimelike import ( @@ -281,6 +282,10 @@ def union(self, other): y : Index or TimedeltaIndex """ self._assert_can_do_setop(other) + + if len(other) == 0 or self.equals(other) or len(self) == 0: + return super(TimedeltaIndex, self).union(other) + if not isinstance(other, TimedeltaIndex): try: other = TimedeltaIndex(other) @@ -313,7 +318,7 @@ def join(self, other, how='left', level=None, return_indexers=False, sort=sort) def _wrap_joined_index(self, joined, other): - name = self.name if self.name == other.name else None + name = get_op_result_name(self, other) if (isinstance(other, TimedeltaIndex) and self.freq == other.freq and self._can_fast_union(other)): joined = self._shallow_copy(joined, name=name) @@ -373,10 +378,6 @@ def _fast_union(self, other): else: return left - def _wrap_union_result(self, other, result): - name = self.name if self.name == other.name else None - return self._simple_new(result, name=name, freq=None) - def intersection(self, other): """ Specialized intersection for TimedeltaIndex objects. May be much faster @@ -391,6 +392,10 @@ def intersection(self, other): y : Index or TimedeltaIndex """ self._assert_can_do_setop(other) + + if self.equals(other): + return self._get_reconciled_name_object(other) + if not isinstance(other, TimedeltaIndex): try: other = TimedeltaIndex(other) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 468b1610a9142..c5cbaea23df76 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -360,10 +360,10 @@ def test_has_duplicates(self, indices): def test_duplicated(self, indices, keep): if type(indices) is not self._holder: pytest.skip('Can only check if we know the index type') - if not len(indices) or isinstance(indices, MultiIndex): + if not len(indices) or isinstance(indices, (MultiIndex, RangeIndex)): # MultiIndex tested separately in: # tests/indexes/multi/test_unique_and_duplicates - pytest.skip('Skip check for empty Index and MultiIndex') + pytest.skip('Skip check for empty Index, MultiIndex, RangeIndex') idx = self._holder(indices) if idx.has_duplicates: diff --git a/pandas/tests/indexes/conftest.py b/pandas/tests/indexes/conftest.py index 8cfed33a96ac5..e82cce873e75c 100644 --- a/pandas/tests/indexes/conftest.py +++ b/pandas/tests/indexes/conftest.py @@ -15,6 +15,7 @@ tm.makeTimedeltaIndex(100), tm.makeIntIndex(100), tm.makeUIntIndex(100), + tm.makeRangeIndex(100), tm.makeFloatIndex(100), Index([True, False]), tm.makeCategoricalIndex(100), diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index daebc6e95aac4..724dffc49dd3b 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -786,6 +786,67 @@ def test_intersect_str_dates(self): assert len(result) == 0 + @pytest.mark.parametrize( + 'fname, sname, expected_name', + [ + ('A', 'A', 'A'), + ('A', 'B', None), + ('A', None, None), + (None, 'B', None), + (None, None, None), + ]) + def test_corner_union(self, indices, fname, sname, expected_name): + # GH 9943 9862 + # Test unions with various name combinations + # Do not test MultiIndex or repeats + + if isinstance(indices, MultiIndex) or not indices.is_unique: + pytest.skip("Not for MultiIndex or repeated indices") + + # Test copy.union(copy) + first = indices.copy().set_names(fname) + second = indices.copy().set_names(sname) + union = first.union(second) + expected = indices.copy().set_names(expected_name) + tm.assert_index_equal(union, expected) + + # Test copy.union(empty) + first = indices.copy().set_names(fname) + second = indices.drop(indices).set_names(sname) + union = first.union(second) + expected = indices.copy().set_names(expected_name) + tm.assert_index_equal(union, expected) + + # Test empty.union(copy) + first = indices.drop(indices).set_names(fname) + second = indices.copy().set_names(sname) + union = first.union(second) + expected = indices.copy().set_names(expected_name) + tm.assert_index_equal(union, expected) + + # Test empty.union(empty) + first = indices.drop(indices).set_names(fname) + second = indices.drop(indices).set_names(sname) + union = first.union(second) + expected = indices.drop(indices).set_names(expected_name) + tm.assert_index_equal(union, expected) + + def test_chained_union(self): + # Chained unions handles names correctly + i1 = Index([1, 2], name='i1') + i2 = Index([3, 4], name='i2') + i3 = Index([5, 6], name='i3') + union = i1.union(i2.union(i3)) + expected = i1.union(i2).union(i3) + tm.assert_index_equal(union, expected) + + j1 = Index([1, 2], name='j1') + j2 = Index([], name='j2') + j3 = Index([], name='j3') + union = j1.union(j2.union(j3)) + expected = j1.union(j2).union(j3) + tm.assert_index_equal(union, expected) + def test_union(self): # TODO: Replace with fixturesult first = self.strIndex[5:20] @@ -824,7 +885,7 @@ def test_union_identity(self): @pytest.mark.parametrize("first_list", [list('ab'), list()]) @pytest.mark.parametrize("second_list", [list('ab'), list()]) @pytest.mark.parametrize("first_name, second_name, expected_name", [ - ('A', 'B', None), (None, 'B', 'B'), ('A', None, 'A')]) + ('A', 'B', None), (None, 'B', None), ('A', None, None)]) def test_union_name_preservation(self, first_list, second_list, first_name, second_name, expected_name): first = Index(first_list, name=first_name) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 65e2b4f6c3f31..673658c29fe75 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -2307,10 +2307,10 @@ def test_concat_categoricalindex(self): result = pd.concat([a, b, c], axis=1) - exp_idx = pd.CategoricalIndex([0, 1, 2, 9]) - exp = pd.DataFrame({0: [1, np.nan, np.nan, 1], - 1: [2, 2, np.nan, np.nan], - 2: [np.nan, 3, 3, np.nan]}, + exp_idx = pd.CategoricalIndex([9, 0, 1, 2], categories=categories) + exp = pd.DataFrame({0: [1, 1, np.nan, np.nan], + 1: [np.nan, 2, 2, np.nan], + 2: [np.nan, np.nan, 3, 3]}, columns=[0, 1, 2], index=exp_idx) tm.assert_frame_equal(result, exp) From 898c2a2f6609f6eba74929e3f35ec735fe8dbc76 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 6 Nov 2018 13:21:24 -0600 Subject: [PATCH 051/122] DOC/TST: Fix warning in concat docstring (#23529) xref https://github.com/pandas-dev/pandas/issues/23149 --- pandas/core/reshape/concat.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 0443f3246c05f..f01c9d29fd457 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -177,12 +177,12 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, letter number animal 0 c 3 cat 1 d 4 dog - >>> pd.concat([df1, df3]) - animal letter number - 0 NaN a 1 - 1 NaN b 2 - 0 cat c 3 - 1 dog d 4 + >>> pd.concat([df1, df3], sort=False) + letter number animal + 0 a 1 NaN + 1 b 2 NaN + 0 c 3 cat + 1 d 4 dog Combine ``DataFrame`` objects with overlapping columns and return only those that are shared by passing ``inner`` to From fa8e1304e16ca802d68f8895d5acfa7eda999b5a Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Tue, 6 Nov 2018 19:48:58 +0000 Subject: [PATCH 052/122] CI: Moving CI dependencies to "ci/deps" (#23526) --- .circleci/config.yml | 2 +- .travis.yml | 18 +++++++++--------- ci/azure/linux.yml | 8 ++++---- ci/azure/macos.yml | 2 +- ci/azure/windows-py27.yml | 2 +- ci/azure/windows.yml | 2 +- ci/{ => deps}/azure-27-compat.yaml | 0 ci/{ => deps}/azure-36-locale_slow.yaml | 0 ci/{ => deps}/azure-37-locale.yaml | 0 ci/{ => deps}/azure-macos-35.yaml | 0 ci/{ => deps}/azure-windows-27.yaml | 0 ci/{ => deps}/azure-windows-36.yaml | 0 ci/{ => deps}/circle-36-locale.yaml | 0 ci/{ => deps}/travis-27-locale.yaml | 0 ci/{ => deps}/travis-27.yaml | 0 ci/{ => deps}/travis-36-doc.yaml | 0 ci/{ => deps}/travis-36-slow.yaml | 0 ci/{ => deps}/travis-36.yaml | 0 ci/{ => deps}/travis-37-numpydev.yaml | 0 ci/{ => deps}/travis-37.yaml | 0 ci/incremental/setup_conda_environment.cmd | 2 +- 21 files changed, 18 insertions(+), 18 deletions(-) rename ci/{ => deps}/azure-27-compat.yaml (100%) rename ci/{ => deps}/azure-36-locale_slow.yaml (100%) rename ci/{ => deps}/azure-37-locale.yaml (100%) rename ci/{ => deps}/azure-macos-35.yaml (100%) rename ci/{ => deps}/azure-windows-27.yaml (100%) rename ci/{ => deps}/azure-windows-36.yaml (100%) rename ci/{ => deps}/circle-36-locale.yaml (100%) rename ci/{ => deps}/travis-27-locale.yaml (100%) rename ci/{ => deps}/travis-27.yaml (100%) rename ci/{ => deps}/travis-36-doc.yaml (100%) rename ci/{ => deps}/travis-36-slow.yaml (100%) rename ci/{ => deps}/travis-36.yaml (100%) rename ci/{ => deps}/travis-37-numpydev.yaml (100%) rename ci/{ => deps}/travis-37.yaml (100%) diff --git a/.circleci/config.yml b/.circleci/config.yml index 5b10036818901..cdfe93613fbdd 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -21,7 +21,7 @@ jobs: environment: JOB: "3.6_LOCALE" - ENV_FILE: "ci/circle-36-locale.yaml" + ENV_FILE: "ci/deps/circle-36-locale.yaml" LOCALE_OVERRIDE: "zh_CN.UTF-8" MINICONDA_DIR: /home/ubuntu/miniconda3 steps: diff --git a/.travis.yml b/.travis.yml index 8ac4d827b0820..9fac09e1fa788 100644 --- a/.travis.yml +++ b/.travis.yml @@ -34,28 +34,28 @@ matrix: include: - dist: trusty env: - - JOB="3.7" ENV_FILE="ci/travis-37.yaml" TEST_ARGS="--skip-slow --skip-network" + - JOB="3.7" ENV_FILE="ci/deps/travis-37.yaml" TEST_ARGS="--skip-slow --skip-network" - dist: trusty env: - - JOB="2.7, locale, slow, old NumPy" ENV_FILE="ci/travis-27-locale.yaml" LOCALE_OVERRIDE="zh_CN.UTF-8" SLOW=true + - JOB="2.7, locale, slow, old NumPy" ENV_FILE="ci/deps/travis-27-locale.yaml" LOCALE_OVERRIDE="zh_CN.UTF-8" SLOW=true addons: apt: packages: - language-pack-zh-hans - dist: trusty env: - - JOB="2.7" ENV_FILE="ci/travis-27.yaml" TEST_ARGS="--skip-slow" + - JOB="2.7" ENV_FILE="ci/deps/travis-27.yaml" TEST_ARGS="--skip-slow" addons: apt: packages: - python-gtk2 - dist: trusty env: - - JOB="3.6, lint, coverage" ENV_FILE="ci/travis-36.yaml" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" COVERAGE=true LINT=true + - JOB="3.6, lint, coverage" ENV_FILE="ci/deps/travis-36.yaml" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" COVERAGE=true LINT=true - dist: trusty env: - - JOB="3.7, NumPy dev" ENV_FILE="ci/travis-37-numpydev.yaml" TEST_ARGS="--skip-slow --skip-network -W error" PANDAS_TESTING_MODE="deprecate" + - JOB="3.7, NumPy dev" ENV_FILE="ci/deps/travis-37-numpydev.yaml" TEST_ARGS="--skip-slow --skip-network -W error" PANDAS_TESTING_MODE="deprecate" addons: apt: packages: @@ -64,19 +64,19 @@ matrix: # In allow_failures - dist: trusty env: - - JOB="3.6, slow" ENV_FILE="ci/travis-36-slow.yaml" SLOW=true + - JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" SLOW=true # In allow_failures - dist: trusty env: - - JOB="3.6, doc" ENV_FILE="ci/travis-36-doc.yaml" DOC=true + - JOB="3.6, doc" ENV_FILE="ci/deps/travis-36-doc.yaml" DOC=true allow_failures: - dist: trusty env: - - JOB="3.6, slow" ENV_FILE="ci/travis-36-slow.yaml" SLOW=true + - JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" SLOW=true - dist: trusty env: - - JOB="3.6, doc" ENV_FILE="ci/travis-36-doc.yaml" DOC=true + - JOB="3.6, doc" ENV_FILE="ci/deps/travis-36-doc.yaml" DOC=true before_install: - echo "before_install" diff --git a/ci/azure/linux.yml b/ci/azure/linux.yml index f34cba69a6195..1f62c30f6dfef 100644 --- a/ci/azure/linux.yml +++ b/ci/azure/linux.yml @@ -10,20 +10,20 @@ jobs: maxParallel: 11 matrix: py27_np_19: - ENV_FILE: ci/azure-27-compat.yaml + ENV_FILE: ci/deps/azure-27-compat.yaml CONDA_PY: "27" CONDA_ENV: pandas TEST_ARGS: "--skip-slow --skip-network" py36_locale: - ENV_FILE: ci/azure-37-locale.yaml + ENV_FILE: ci/deps/azure-37-locale.yaml CONDA_PY: "37" CONDA_ENV: pandas TEST_ARGS: "--skip-slow --skip-network" LOCALE_OVERRIDE: "zh_CN.UTF-8" py36_locale_slow: - ENV_FILE: ci/azure-36-locale_slow.yaml + ENV_FILE: ci/deps/azure-36-locale_slow.yaml CONDA_PY: "36" CONDA_ENV: pandas TEST_ARGS: "--only-slow --skip-network" @@ -53,4 +53,4 @@ jobs: - task: PublishTestResults@2 inputs: testResultsFiles: 'test-data-*.xml' - testRunTitle: 'Linux' \ No newline at end of file + testRunTitle: 'Linux' diff --git a/ci/azure/macos.yml b/ci/azure/macos.yml index 53ce51c76683c..1a44933b75853 100644 --- a/ci/azure/macos.yml +++ b/ci/azure/macos.yml @@ -10,7 +10,7 @@ jobs: maxParallel: 11 matrix: py35_np_120: - ENV_FILE: ci/azure-macos-35.yaml + ENV_FILE: ci/deps/azure-macos-35.yaml CONDA_PY: "35" CONDA_ENV: pandas TEST_ARGS: "--skip-slow --skip-network" diff --git a/ci/azure/windows-py27.yml b/ci/azure/windows-py27.yml index 991e7adc4e9d6..1f82bda5988dc 100644 --- a/ci/azure/windows-py27.yml +++ b/ci/azure/windows-py27.yml @@ -10,7 +10,7 @@ jobs: maxParallel: 11 matrix: py36_np121: - ENV_FILE: ci/azure-windows-27.yaml + ENV_FILE: ci/deps/azure-windows-27.yaml CONDA_PY: "27" CONDA_ENV: pandas diff --git a/ci/azure/windows.yml b/ci/azure/windows.yml index e2e545978e09d..74235fc64d634 100644 --- a/ci/azure/windows.yml +++ b/ci/azure/windows.yml @@ -10,7 +10,7 @@ jobs: maxParallel: 11 matrix: py36_np14: - ENV_FILE: ci/azure-windows-36.yaml + ENV_FILE: ci/deps/azure-windows-36.yaml CONDA_PY: "36" CONDA_ENV: pandas diff --git a/ci/azure-27-compat.yaml b/ci/deps/azure-27-compat.yaml similarity index 100% rename from ci/azure-27-compat.yaml rename to ci/deps/azure-27-compat.yaml diff --git a/ci/azure-36-locale_slow.yaml b/ci/deps/azure-36-locale_slow.yaml similarity index 100% rename from ci/azure-36-locale_slow.yaml rename to ci/deps/azure-36-locale_slow.yaml diff --git a/ci/azure-37-locale.yaml b/ci/deps/azure-37-locale.yaml similarity index 100% rename from ci/azure-37-locale.yaml rename to ci/deps/azure-37-locale.yaml diff --git a/ci/azure-macos-35.yaml b/ci/deps/azure-macos-35.yaml similarity index 100% rename from ci/azure-macos-35.yaml rename to ci/deps/azure-macos-35.yaml diff --git a/ci/azure-windows-27.yaml b/ci/deps/azure-windows-27.yaml similarity index 100% rename from ci/azure-windows-27.yaml rename to ci/deps/azure-windows-27.yaml diff --git a/ci/azure-windows-36.yaml b/ci/deps/azure-windows-36.yaml similarity index 100% rename from ci/azure-windows-36.yaml rename to ci/deps/azure-windows-36.yaml diff --git a/ci/circle-36-locale.yaml b/ci/deps/circle-36-locale.yaml similarity index 100% rename from ci/circle-36-locale.yaml rename to ci/deps/circle-36-locale.yaml diff --git a/ci/travis-27-locale.yaml b/ci/deps/travis-27-locale.yaml similarity index 100% rename from ci/travis-27-locale.yaml rename to ci/deps/travis-27-locale.yaml diff --git a/ci/travis-27.yaml b/ci/deps/travis-27.yaml similarity index 100% rename from ci/travis-27.yaml rename to ci/deps/travis-27.yaml diff --git a/ci/travis-36-doc.yaml b/ci/deps/travis-36-doc.yaml similarity index 100% rename from ci/travis-36-doc.yaml rename to ci/deps/travis-36-doc.yaml diff --git a/ci/travis-36-slow.yaml b/ci/deps/travis-36-slow.yaml similarity index 100% rename from ci/travis-36-slow.yaml rename to ci/deps/travis-36-slow.yaml diff --git a/ci/travis-36.yaml b/ci/deps/travis-36.yaml similarity index 100% rename from ci/travis-36.yaml rename to ci/deps/travis-36.yaml diff --git a/ci/travis-37-numpydev.yaml b/ci/deps/travis-37-numpydev.yaml similarity index 100% rename from ci/travis-37-numpydev.yaml rename to ci/deps/travis-37-numpydev.yaml diff --git a/ci/travis-37.yaml b/ci/deps/travis-37.yaml similarity index 100% rename from ci/travis-37.yaml rename to ci/deps/travis-37.yaml diff --git a/ci/incremental/setup_conda_environment.cmd b/ci/incremental/setup_conda_environment.cmd index b4446c49fabd3..35595ffb03695 100644 --- a/ci/incremental/setup_conda_environment.cmd +++ b/ci/incremental/setup_conda_environment.cmd @@ -13,7 +13,7 @@ conda list @rem Clean up any left-over from a previous build conda remove --all -q -y -n %CONDA_ENV% @rem Scipy, CFFI, jinja2 and IPython are optional dependencies, but exercised in the test suite -conda env create -n %CONDA_ENV% --file=ci\azure-windows-%CONDA_PY%.yaml +conda env create -n %CONDA_ENV% --file=ci\deps\azure-windows-%CONDA_PY%.yaml call activate %CONDA_ENV% conda list From 278124e47bc2ad2455d5481e6b527e786086f6f7 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 6 Nov 2018 12:49:42 -0800 Subject: [PATCH 053/122] Fix+test timedelta64(nat) ops (#23425) Follow-up to gh-23320. --- pandas/core/arrays/datetimelike.py | 14 +++++++++++++- pandas/core/arrays/period.py | 11 ++++++++--- pandas/core/indexes/base.py | 2 +- pandas/tests/arithmetic/test_datetime64.py | 19 +++++++++++++++++++ pandas/tests/arithmetic/test_period.py | 20 +++++++++++++++++++- pandas/tests/arithmetic/test_timedelta64.py | 18 ++++++++++++++++++ 6 files changed, 78 insertions(+), 6 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 58044aeb7d84c..92de1fe2e0679 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -36,6 +36,7 @@ is_object_dtype) from pandas.core.dtypes.generic import ABCSeries, ABCDataFrame, ABCIndexClass from pandas.core.dtypes.dtypes import DatetimeTZDtype +from pandas.core.dtypes.missing import isna import pandas.core.common as com from pandas.core.algorithms import checked_add_with_arr @@ -370,6 +371,12 @@ def _add_timedeltalike_scalar(self, other): Add a delta of a timedeltalike return the i8 result view """ + if isna(other): + # i.e np.timedelta64("NaT"), not recognized by delta_to_nanoseconds + new_values = np.empty(len(self), dtype='i8') + new_values[:] = iNaT + return new_values + inc = delta_to_nanoseconds(other) new_values = checked_add_with_arr(self.asi8, inc, arr_mask=self._isnan).view('i8') @@ -442,7 +449,7 @@ def _sub_period_array(self, other): Array of DateOffset objects; nulls represented by NaT """ if not is_period_dtype(self): - raise TypeError("cannot subtract {dtype}-dtype to {cls}" + raise TypeError("cannot subtract {dtype}-dtype from {cls}" .format(dtype=other.dtype, cls=type(self).__name__)) @@ -741,6 +748,11 @@ def __rsub__(self, other): raise TypeError("cannot subtract {cls} from {typ}" .format(cls=type(self).__name__, typ=type(other).__name__)) + elif is_period_dtype(self) and is_timedelta64_dtype(other): + # TODO: Can we simplify/generalize these cases at all? + raise TypeError("cannot subtract {cls} from {dtype}" + .format(cls=type(self).__name__, + dtype=other.dtype)) return -(self - other) cls.__rsub__ = __rsub__ diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index ea7eeb7fc9f8e..5a75f2706b218 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -35,7 +35,7 @@ from pandas.core.dtypes.generic import ( ABCSeries, ABCIndexClass, ABCPeriodIndex ) -from pandas.core.dtypes.missing import isna +from pandas.core.dtypes.missing import isna, notna from pandas.core.missing import pad_1d, backfill_1d import pandas.core.common as com @@ -149,6 +149,8 @@ class PeriodArray(dtl.DatetimeLikeArrayMixin, ExtensionArray): period_array : Create a new PeriodArray pandas.PeriodIndex : Immutable Index for period data """ + # array priority higher than numpy scalars + __array_priority__ = 1000 _attributes = ["freq"] _typ = "periodarray" # ABCPeriodArray @@ -761,12 +763,15 @@ def _add_timedeltalike_scalar(self, other): assert isinstance(self.freq, Tick) # checked by calling function assert isinstance(other, (timedelta, np.timedelta64, Tick)) - delta = self._check_timedeltalike_freq_compat(other) + if notna(other): + # special handling for np.timedelta64("NaT"), avoid calling + # _check_timedeltalike_freq_compat as that would raise TypeError + other = self._check_timedeltalike_freq_compat(other) # Note: when calling parent class's _add_timedeltalike_scalar, # it will call delta_to_nanoseconds(delta). Because delta here # is an integer, delta_to_nanoseconds will return it unchanged. - ordinals = super(PeriodArray, self)._add_timedeltalike_scalar(delta) + ordinals = super(PeriodArray, self)._add_timedeltalike_scalar(other) return ordinals def _add_delta_tdi(self, other): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 7434a02043d65..6e65d6899787f 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4707,7 +4707,7 @@ def _evaluate_with_timedelta_like(self, other, op): 'radd', 'rsub']: raise TypeError("Operation {opname} between {cls} and {other} " "is invalid".format(opname=op.__name__, - cls=type(self).__name__, + cls=self.dtype, other=type(other).__name__)) other = Timedelta(other) diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index b71ad08cb523e..4f1a26ae50c3b 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -1188,6 +1188,25 @@ def test_dti_isub_timedeltalike(self, tz_naive_fixture, two_hours): rng -= two_hours tm.assert_index_equal(rng, expected) + def test_dt64arr_add_sub_td64_nat(self, box, tz_naive_fixture): + # GH#23320 special handling for timedelta64("NaT") + tz = tz_naive_fixture + dti = pd.date_range("1994-04-01", periods=9, tz=tz, freq="QS") + other = np.timedelta64("NaT") + expected = pd.DatetimeIndex(["NaT"] * 9, tz=tz) + + obj = tm.box_expected(dti, box) + expected = tm.box_expected(expected, box) + + result = obj + other + tm.assert_equal(result, expected) + result = other + obj + tm.assert_equal(result, expected) + result = obj - other + tm.assert_equal(result, expected) + with pytest.raises(TypeError): + other - obj + # ------------------------------------------------------------- # Binary operations DatetimeIndex and TimedeltaIndex/array def test_dti_add_tdi(self, tz_naive_fixture): diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index d2d725b6dc595..c52112a4fa147 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -419,7 +419,7 @@ def test_pi_add_sub_td64_array_non_tick_raises(self): with pytest.raises(period.IncompatibleFrequency): rng - tdarr - with pytest.raises(period.IncompatibleFrequency): + with pytest.raises(TypeError): tdarr - rng def test_pi_add_sub_td64_array_tick(self): @@ -801,6 +801,24 @@ def test_pi_add_sub_timedeltalike_freq_mismatch_monthly(self, with tm.assert_raises_regex(period.IncompatibleFrequency, msg): rng -= other + def test_parr_add_sub_td64_nat(self, box): + # GH#23320 special handling for timedelta64("NaT") + pi = pd.period_range("1994-04-01", periods=9, freq="19D") + other = np.timedelta64("NaT") + expected = pd.PeriodIndex(["NaT"] * 9, freq="19D") + + obj = tm.box_expected(pi, box) + expected = tm.box_expected(expected, box) + + result = obj + other + tm.assert_equal(result, expected) + result = other + obj + tm.assert_equal(result, expected) + result = obj - other + tm.assert_equal(result, expected) + with pytest.raises(TypeError): + other - obj + class TestPeriodSeriesArithmetic(object): def test_ops_series_timedelta(self): diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index d1ea51a46889f..902d0716aed8d 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -735,6 +735,24 @@ def test_td64arr_add_sub_tdi(self, box_df_broadcast_failure, names): else: assert result.dtypes[0] == 'timedelta64[ns]' + def test_td64arr_add_sub_td64_nat(self, box): + # GH#23320 special handling for timedelta64("NaT") + tdi = pd.TimedeltaIndex([NaT, Timedelta('1s')]) + other = np.timedelta64("NaT") + expected = pd.TimedeltaIndex(["NaT"] * 2) + + obj = tm.box_expected(tdi, box) + expected = tm.box_expected(expected, box) + + result = obj + other + tm.assert_equal(result, expected) + result = other + obj + tm.assert_equal(result, expected) + result = obj - other + tm.assert_equal(result, expected) + result = other - obj + tm.assert_equal(result, expected) + def test_td64arr_sub_NaT(self, box): # GH#18808 ser = Series([NaT, Timedelta('1s')]) From 863e628f15413d97ad49fe6c8026f5b02352663a Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Wed, 7 Nov 2018 01:17:36 +0000 Subject: [PATCH 054/122] CLN: Isort io and compat dirs (#23534) --- pandas/compat/numpy/function.py | 10 +++++---- pandas/compat/pickle_compat.py | 10 +++++---- pandas/io/clipboard/clipboards.py | 4 +++- pandas/io/clipboard/windows.py | 5 +++-- pandas/io/formats/console.py | 3 ++- pandas/io/formats/csvs.py | 19 ++++++---------- pandas/io/formats/excel.py | 9 ++++---- pandas/io/formats/format.py | 36 ++++++++++++------------------- pandas/io/formats/html.py | 15 ++++++------- pandas/io/formats/latex.py | 5 +++-- pandas/io/formats/printing.py | 5 ++++- pandas/io/formats/style.py | 33 +++++++++++++++------------- pandas/io/formats/terminal.py | 2 +- pandas/io/json/json.py | 20 ++++++++++------- pandas/io/json/normalize.py | 6 ++++-- pandas/io/json/table_schema.py | 11 +++++----- pandas/io/sas/sas7bdat.py | 10 ++++----- setup.cfg | 17 --------------- 18 files changed, 106 insertions(+), 114 deletions(-) diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index a4232ca2ebf78..30fdeca35faf3 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -19,11 +19,13 @@ """ from numpy import ndarray -from pandas.util._validators import (validate_args, validate_kwargs, - validate_args_and_kwargs) -from pandas.errors import UnsupportedFunctionCall -from pandas.core.dtypes.common import is_integer, is_bool + from pandas.compat import OrderedDict +from pandas.errors import UnsupportedFunctionCall +from pandas.util._validators import ( + validate_args, validate_args_and_kwargs, validate_kwargs) + +from pandas.core.dtypes.common import is_bool, is_integer class CompatValidator(object): diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index a8fd54e39091b..5ac8753c3a2d5 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -2,12 +2,14 @@ Support pre-0.12 series pickle compatibility. """ -import sys -import pandas # noqa import copy import pickle as pkl -from pandas import compat, Index -from pandas.compat import u, string_types # noqa +import sys + +from pandas.compat import string_types, u # noqa + +import pandas # noqa +from pandas import Index, compat def load_reduce(self): diff --git a/pandas/io/clipboard/clipboards.py b/pandas/io/clipboard/clipboards.py index 0793ca6877cdb..d6d0ba0a560bb 100644 --- a/pandas/io/clipboard/clipboards.py +++ b/pandas/io/clipboard/clipboards.py @@ -1,7 +1,9 @@ import subprocess -from .exceptions import PyperclipException + from pandas.compat import PY2, text_type +from .exceptions import PyperclipException + EXCEPT_MSG = """ Pyperclip could not find a copy/paste mechanism for your system. For more information, please visit https://pyperclip.readthedocs.org """ diff --git a/pandas/io/clipboard/windows.py b/pandas/io/clipboard/windows.py index 5fc23f7102f41..3d979a61b5f2d 100644 --- a/pandas/io/clipboard/windows.py +++ b/pandas/io/clipboard/windows.py @@ -1,10 +1,11 @@ """ This module implements clipboard handling on Windows using ctypes. """ -import time import contextlib import ctypes -from ctypes import c_size_t, sizeof, c_wchar_p, get_errno, c_wchar +from ctypes import c_size_t, c_wchar, c_wchar_p, get_errno, sizeof +import time + from .exceptions import PyperclipWindowsException diff --git a/pandas/io/formats/console.py b/pandas/io/formats/console.py index e347f6bce0168..64168dd7db1b8 100644 --- a/pandas/io/formats/console.py +++ b/pandas/io/formats/console.py @@ -2,8 +2,9 @@ Internal module for console introspection """ -import sys import locale +import sys + from pandas.io.formats.terminal import get_terminal_size # ----------------------------------------------------------------------------- diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index 115e885a23b96..46c843af043e7 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -5,29 +5,24 @@ from __future__ import print_function -import warnings - import csv as csvlib +import os +import warnings from zipfile import ZipFile import numpy as np -import os from pandas._libs import writers as libwriters - -from pandas import compat from pandas.compat import StringIO, range, zip -from pandas.core.dtypes.missing import notna from pandas.core.dtypes.generic import ( - ABCMultiIndex, ABCPeriodIndex, ABCDatetimeIndex, ABCIndexClass) + ABCDatetimeIndex, ABCIndexClass, ABCMultiIndex, ABCPeriodIndex) +from pandas.core.dtypes.missing import notna + +from pandas import compat from pandas.io.common import ( - _get_handle, - _infer_compression, - get_filepath_or_buffer, - UnicodeWriter, -) + UnicodeWriter, _get_handle, _infer_compression, get_filepath_or_buffer) class CSVFormatter(object): diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index d6fcfb2207cf9..c2ea3715b9f3b 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -1,23 +1,24 @@ """Utilities for conversion to writer-agnostic Excel representation """ +import itertools import re import warnings -import itertools import numpy as np from pandas.compat import reduce -import pandas.core.common as com -from pandas.core.dtypes.common import is_float, is_scalar from pandas.core.dtypes import missing +from pandas.core.dtypes.common import is_float, is_scalar from pandas.core.dtypes.generic import ABCMultiIndex, ABCPeriodIndex + from pandas import Index +import pandas.core.common as com from pandas.io.formats.css import CSSResolver, CSSWarning -from pandas.io.formats.printing import pprint_thing from pandas.io.formats.format import get_level_lengths +from pandas.io.formats.printing import pprint_thing class ExcelCell(object): diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index ee5a1733623fc..9857129f56b0c 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -5,45 +5,37 @@ """ from __future__ import print_function -# pylint: disable=W0141 from functools import partial import numpy as np from pandas._libs import lib -from pandas._libs.tslibs import NaT, iNaT, Timestamp, Timedelta from pandas._libs.tslib import format_array_from_datetime +from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT +from pandas.compat import StringIO, lzip, map, u, zip -from pandas import compat -from pandas.compat import StringIO, lzip, map, zip, u - -from pandas.core.dtypes.missing import isna, notna from pandas.core.dtypes.common import ( - is_categorical_dtype, - is_float_dtype, - is_period_arraylike, - is_integer_dtype, - is_interval_dtype, - is_datetimetz, - is_integer, - is_float, - is_scalar, - is_numeric_dtype, - is_datetime64_dtype, - is_timedelta64_dtype, - is_list_like) -from pandas.core.dtypes.generic import ABCSparseArray, ABCMultiIndex + is_categorical_dtype, is_datetime64_dtype, is_datetimetz, is_float, + is_float_dtype, is_integer, is_integer_dtype, is_interval_dtype, + is_list_like, is_numeric_dtype, is_period_arraylike, is_scalar, + is_timedelta64_dtype) +from pandas.core.dtypes.generic import ABCMultiIndex, ABCSparseArray +from pandas.core.dtypes.missing import isna, notna + +from pandas import compat from pandas.core.base import PandasObject import pandas.core.common as com -from pandas.core.index import Index, ensure_index from pandas.core.config import get_option, set_option +from pandas.core.index import Index, ensure_index from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.period import PeriodIndex -from pandas.io.formats.terminal import get_terminal_size from pandas.io.common import _expand_user, _stringify_path from pandas.io.formats.printing import adjoin, justify, pprint_thing +from pandas.io.formats.terminal import get_terminal_size + +# pylint: disable=W0141 common_docstring = """ diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index a6b03c9c6dd23..2a2a3e57729ec 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -4,22 +4,21 @@ """ from __future__ import print_function -from distutils.version import LooseVersion +from distutils.version import LooseVersion from textwrap import dedent -from pandas import compat -from pandas.compat import (lzip, range, map, zip, u, - OrderedDict, unichr) +from pandas.compat import OrderedDict, lzip, map, range, u, unichr, zip -import pandas.core.common as com from pandas.core.dtypes.generic import ABCMultiIndex + +from pandas import compat +import pandas.core.common as com from pandas.core.config import get_option +from pandas.io.formats.format import ( + TableFormatter, buffer_put_lines, get_level_lengths) from pandas.io.formats.printing import pprint_thing -from pandas.io.formats.format import (get_level_lengths, - buffer_put_lines) -from pandas.io.formats.format import TableFormatter class HTMLFormatter(TableFormatter): diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index fbbad763dd97b..90be3364932a2 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -6,11 +6,12 @@ import numpy as np -from pandas import compat -from pandas.compat import range, map, zip, u +from pandas.compat import map, range, u, zip from pandas.core.dtypes.generic import ABCMultiIndex +from pandas import compat + from pandas.io.formats.format import TableFormatter diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index e22d7bce42841..f814bf965a1e9 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -3,9 +3,12 @@ """ import sys + +from pandas.compat import u + from pandas.core.dtypes.inference import is_sequence + from pandas import compat -from pandas.compat import u from pandas.core.config import get_option diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 3b3238586b310..8291e0ac98cd1 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -2,12 +2,27 @@ Module for applying conditional formatting to DataFrames and Series. """ +from collections import MutableMapping, defaultdict +from contextlib import contextmanager +import copy from functools import partial from itertools import product -from contextlib import contextmanager from uuid import uuid1 -import copy -from collections import defaultdict, MutableMapping + +import numpy as np + +from pandas.compat import range +from pandas.util._decorators import Appender + +from pandas.core.dtypes.common import is_float, is_string_like +from pandas.core.dtypes.generic import ABCSeries + +import pandas as pd +from pandas.api.types import is_list_like +import pandas.core.common as com +from pandas.core.config import get_option +from pandas.core.generic import _shared_docs +from pandas.core.indexing import _maybe_numeric_slice, _non_reducing_slice try: from jinja2 import ( @@ -18,18 +33,6 @@ "Please install with `conda install Jinja2`\n" "or `pip install Jinja2`") -from pandas.core.dtypes.common import is_float, is_string_like - -import numpy as np -import pandas as pd -from pandas.api.types import is_list_like -from pandas.compat import range -from pandas.core.config import get_option -from pandas.core.generic import _shared_docs -import pandas.core.common as com -from pandas.core.indexing import _maybe_numeric_slice, _non_reducing_slice -from pandas.util._decorators import Appender -from pandas.core.dtypes.generic import ABCSeries try: import matplotlib.pyplot as plt diff --git a/pandas/io/formats/terminal.py b/pandas/io/formats/terminal.py index ac73363b92b1e..bb34259d710c7 100644 --- a/pandas/io/formats/terminal.py +++ b/pandas/io/formats/terminal.py @@ -15,8 +15,8 @@ import os import shutil -from pandas.compat import PY3 +from pandas.compat import PY3 __all__ = ['get_terminal_size', 'is_terminal'] diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index c5f8872f93d94..af7b390de213d 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -1,23 +1,27 @@ # pylint: disable-msg=E1101,W0613,W0603 from itertools import islice import os + import numpy as np import pandas._libs.json as json from pandas._libs.tslibs import iNaT -from pandas.compat import StringIO, long, u, to_str -from pandas import compat, isna -from pandas import Series, DataFrame, to_datetime, MultiIndex -from pandas.io.common import (get_filepath_or_buffer, _get_handle, - _infer_compression, _stringify_path, - BaseIterator) -from pandas.io.parsers import _validate_integer +from pandas.compat import StringIO, long, to_str, u + +from pandas.core.dtypes.common import is_period_dtype + +from pandas import DataFrame, MultiIndex, Series, compat, isna, to_datetime import pandas.core.common as com from pandas.core.reshape.concat import concat + +from pandas.io.common import ( + BaseIterator, _get_handle, _infer_compression, _stringify_path, + get_filepath_or_buffer) from pandas.io.formats.printing import pprint_thing +from pandas.io.parsers import _validate_integer + from .normalize import _convert_to_line_delimits from .table_schema import build_table_schema, parse_table_schema -from pandas.core.dtypes.common import is_period_dtype loads = json.loads dumps = json.dumps diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py index 5c7b964cf69d1..ce07a795017e5 100644 --- a/pandas/io/json/normalize.py +++ b/pandas/io/json/normalize.py @@ -1,12 +1,14 @@ # --------------------------------------------------------------------- # JSON normalization routines -import copy from collections import defaultdict +import copy + import numpy as np from pandas._libs.writers import convert_json_to_lines -from pandas import compat, DataFrame + +from pandas import DataFrame, compat def _convert_to_line_delimits(s): diff --git a/pandas/io/json/table_schema.py b/pandas/io/json/table_schema.py index 2dc176648fb31..3b4ebb638412e 100644 --- a/pandas/io/json/table_schema.py +++ b/pandas/io/json/table_schema.py @@ -6,14 +6,15 @@ import warnings import pandas._libs.json as json + +from pandas.core.dtypes.common import ( + is_bool_dtype, is_categorical_dtype, is_datetime64_dtype, + is_datetime64tz_dtype, is_integer_dtype, is_numeric_dtype, is_period_dtype, + is_string_dtype, is_timedelta64_dtype) + from pandas import DataFrame from pandas.api.types import CategoricalDtype import pandas.core.common as com -from pandas.core.dtypes.common import ( - is_integer_dtype, is_timedelta64_dtype, is_numeric_dtype, - is_bool_dtype, is_datetime64_dtype, is_datetime64tz_dtype, - is_categorical_dtype, is_period_dtype, is_string_dtype -) loads = json.loads diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index 3582f538c16bf..d634b5ec4f8f9 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -18,14 +18,14 @@ import numpy as np -from pandas import compat from pandas.errors import EmptyDataError -from pandas.io.common import get_filepath_or_buffer, BaseIterator -import pandas.io.sas.sas_constants as const -from pandas.io.sas._sas import Parser - import pandas as pd +from pandas import compat + +from pandas.io.common import BaseIterator, get_filepath_or_buffer +from pandas.io.sas._sas import Parser +import pandas.io.sas.sas_constants as const class _subheader_pointer(object): diff --git a/setup.cfg b/setup.cfg index 4068935d9970f..17b88d084ebf6 100644 --- a/setup.cfg +++ b/setup.cfg @@ -90,23 +90,6 @@ force_grid_wrap=0 combine_as_imports=True force_sort_within_sections=True skip= - pandas/io/sas/sas7bdat.py, - pandas/io/formats/console.py, - pandas/io/formats/excel.py, - pandas/io/formats/style.py, - pandas/io/formats/printing.py, - pandas/io/formats/latex.py, - pandas/io/formats/csvs.py, - pandas/io/formats/html.py, - pandas/io/formats/terminal.py, - pandas/io/formats/format.py, - pandas/io/json/normalize.py, - pandas/io/json/json.py, - pandas/io/json/table_schema.py, - pandas/io/clipboard/windows.py, - pandas/io/clipboard/clipboards.py, - pandas/compat/pickle_compat.py, - pandas/compat/numpy/function.py, pandas/core/ops.py, pandas/core/categorical.py, pandas/core/api.py, From 52446476820a27cfb2281d0af1b524f3e46ef4f2 Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Wed, 7 Nov 2018 02:02:58 +0000 Subject: [PATCH 055/122] CI: Fail Azure CI for failing tests (#23454) --- ci/azure/linux.yml | 25 +++++++++++++++++++++++++ ci/azure/macos.yml | 25 +++++++++++++++++++++++++ ci/azure/windows-py27.yml | 13 +++++++++++++ ci/azure/windows.yml | 13 +++++++++++++ ci/deps/azure-macos-35.yaml | 2 +- 5 files changed, 77 insertions(+), 1 deletion(-) diff --git a/ci/azure/linux.yml b/ci/azure/linux.yml index 1f62c30f6dfef..b5a8e36d5097d 100644 --- a/ci/azure/linux.yml +++ b/ci/azure/linux.yml @@ -54,3 +54,28 @@ jobs: inputs: testResultsFiles: 'test-data-*.xml' testRunTitle: 'Linux' + - powershell: | + $junitXml = "test-data-single.xml" + $(Get-Content $junitXml | Out-String) -match 'failures="(.*?)"' + if ($matches[1] -eq 0) + { + Write-Host "No test failures in test-data-single" + } + else + { + # note that this will produce $LASTEXITCODE=1 + Write-Error "$($matches[1]) tests failed" + } + + $junitXmlMulti = "test-data-multiple.xml" + $(Get-Content $junitXmlMulti | Out-String) -match 'failures="(.*?)"' + if ($matches[1] -eq 0) + { + Write-Host "No test failures in test-data-multi" + } + else + { + # note that this will produce $LASTEXITCODE=1 + Write-Error "$($matches[1]) tests failed" + } + displayName: Check for test failures diff --git a/ci/azure/macos.yml b/ci/azure/macos.yml index 1a44933b75853..16f2fa2d4890f 100644 --- a/ci/azure/macos.yml +++ b/ci/azure/macos.yml @@ -41,3 +41,28 @@ jobs: inputs: testResultsFiles: 'test-data-*.xml' testRunTitle: 'MacOS-35' + - powershell: | + $junitXml = "test-data-single.xml" + $(Get-Content $junitXml | Out-String) -match 'failures="(.*?)"' + if ($matches[1] -eq 0) + { + Write-Host "No test failures in test-data-single" + } + else + { + # note that this will produce $LASTEXITCODE=1 + Write-Error "$($matches[1]) tests failed" + } + + $junitXmlMulti = "test-data-multiple.xml" + $(Get-Content $junitXmlMulti | Out-String) -match 'failures="(.*?)"' + if ($matches[1] -eq 0) + { + Write-Host "No test failures in test-data-multi" + } + else + { + # note that this will produce $LASTEXITCODE=1 + Write-Error "$($matches[1]) tests failed" + } + displayName: Check for test failures \ No newline at end of file diff --git a/ci/azure/windows-py27.yml b/ci/azure/windows-py27.yml index 1f82bda5988dc..fd72b7080e84d 100644 --- a/ci/azure/windows-py27.yml +++ b/ci/azure/windows-py27.yml @@ -43,3 +43,16 @@ jobs: inputs: testResultsFiles: 'test-data.xml' testRunTitle: 'Windows 27' + - powershell: | + $junitXml = "test-data.xml" + $(Get-Content $junitXml | Out-String) -match 'failures="(.*?)"' + if ($matches[1] -eq 0) + { + Write-Host "No test failures in test-data" + } + else + { + # note that this will produce $LASTEXITCODE=1 + Write-Error "$($matches[1]) tests failed" + } + displayName: Check for test failures diff --git a/ci/azure/windows.yml b/ci/azure/windows.yml index 74235fc64d634..9b87ac7711f40 100644 --- a/ci/azure/windows.yml +++ b/ci/azure/windows.yml @@ -34,3 +34,16 @@ jobs: inputs: testResultsFiles: 'test-data.xml' testRunTitle: 'Windows 36' + - powershell: | + $junitXml = "test-data.xml" + $(Get-Content $junitXml | Out-String) -match 'failures="(.*?)"' + if ($matches[1] -eq 0) + { + Write-Host "No test failures in test-data" + } + else + { + # note that this will produce $LASTEXITCODE=1 + Write-Error "$($matches[1]) tests failed" + } + displayName: Check for test failures \ No newline at end of file diff --git a/ci/deps/azure-macos-35.yaml b/ci/deps/azure-macos-35.yaml index 065deb914dae6..6ccdc79d11b27 100644 --- a/ci/deps/azure-macos-35.yaml +++ b/ci/deps/azure-macos-35.yaml @@ -12,7 +12,7 @@ dependencies: - nomkl - numexpr - numpy=1.12.0 - - openpyxl + - openpyxl=2.5.5 - pytables - python=3.5* - pytz From c6112726441955174f028fe20e9f1baa3fb7632d Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Wed, 7 Nov 2018 08:51:16 +0000 Subject: [PATCH 056/122] Make validate_docstrings.py ready for the CI (#23514) * validate_docstrings.py to exit with status code as the number of errors (so, 0 for no errors) * Implemented different output types for the validate_all, and a prefix to filter which docstrings are validated * Codifying errors * Adding --errors parameter to be able to validate only specific errors --- scripts/tests/test_validate_docstrings.py | 164 +++++++--- scripts/validate_docstrings.py | 368 +++++++++++++++------- 2 files changed, 387 insertions(+), 145 deletions(-) diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py index a3feee6552178..cf8abd1680341 100644 --- a/scripts/tests/test_validate_docstrings.py +++ b/scripts/tests/test_validate_docstrings.py @@ -1,14 +1,13 @@ -import string -import random import io +import random +import string +import textwrap import pytest import numpy as np - +from pandas.util.testing import capture_stderr import validate_docstrings validate_one = validate_docstrings.validate_one -from pandas.util.testing import capture_stderr - class GoodDocStrings(object): """ @@ -712,9 +711,9 @@ def test_bad_generic_functions(self, func): ('BadSummaries', 'no_capitalization', ('Summary must start with infinitive verb',)), ('BadSummaries', 'multi_line', - ('Summary should fit in a single line.',)), + ('Summary should fit in a single line',)), ('BadSummaries', 'two_paragraph_multi_line', - ('Summary should fit in a single line.',)), + ('Summary should fit in a single line',)), # Parameters tests ('BadParameters', 'missing_params', ('Parameters {**kwargs} not documented',)), @@ -753,66 +752,67 @@ def test_bad_generic_functions(self, func): marks=pytest.mark.xfail), # Examples tests ('BadGenericDocStrings', 'method', - ('numpy does not need to be imported in the examples',)), + ('Do not import numpy, as it is imported automatically',)), ('BadGenericDocStrings', 'method', - ('pandas does not need to be imported in the examples',)), + ('Do not import pandas, as it is imported automatically',)), # See Also tests ('BadSeeAlso', 'prefix_pandas', ('pandas.Series.rename in `See Also` section ' 'does not need `pandas` prefix',)), # Examples tests ('BadExamples', 'unused_import', - ('1 F401 \'pandas as pdf\' imported but unused',)), + ("flake8 error: F401 'pandas as pdf' imported but unused",)), ('BadExamples', 'indentation_is_not_a_multiple_of_four', - ('1 E111 indentation is not a multiple of four',)), + ('flake8 error: E111 indentation is not a multiple of four',)), ('BadExamples', 'missing_whitespace_around_arithmetic_operator', - ('1 E226 missing whitespace around arithmetic operator',)), + ('flake8 error: ' + 'E226 missing whitespace around arithmetic operator',)), ('BadExamples', 'missing_whitespace_after_comma', - ('3 E231 missing whitespace after \',\'',)), + ("flake8 error: E231 missing whitespace after ',' (3 times)",)), ]) def test_bad_examples(self, capsys, klass, func, msgs): result = validate_one(self._import_path(klass=klass, func=func)) for msg in msgs: - assert msg in ' '.join(result['errors']) + assert msg in ' '.join(err[1] for err in result['errors']) class ApiItems(object): @property def api_doc(self): - return io.StringIO(''' -.. currentmodule:: itertools + return textwrap.dedent(io.StringIO(''' + .. currentmodule:: itertools -Itertools ---------- + Itertools + --------- -Infinite -~~~~~~~~ + Infinite + ~~~~~~~~ -.. autosummary:: + .. autosummary:: - cycle - count + cycle + count -Finite -~~~~~~ + Finite + ~~~~~~ -.. autosummary:: + .. autosummary:: - chain + chain -.. currentmodule:: random + .. currentmodule:: random -Random ------- + Random + ------ -All -~~~ + All + ~~~ -.. autosummary:: + .. autosummary:: - seed - randint -''') + seed + randint + ''')) @pytest.mark.parametrize('idx,name', [(0, 'itertools.cycle'), (1, 'itertools.count'), @@ -850,3 +850,95 @@ def test_item_section(self, idx, section): def test_item_subsection(self, idx, subsection): result = list(validate_docstrings.get_api_items(self.api_doc)) assert result[idx][3] == subsection + + +class MainFunction(object): + def test_num_errors_for_validate_one(self, monkeypatch): + monkeypatch.setattr( + validate_docstrings, 'validate_one', + lambda func_name: {'docstring': 'docstring1', + 'errors': [('ER01', 'err desc'), + ('ER02', 'err desc') + ('ER03', 'err desc')], + 'warnings': [], + 'examples_errors': ''}) + num_errors = validate_docstrings.main(func_name='docstring1', + prefix=None, + errors=[], + output_format='default') + assert num_errors == 3 + + def test_no_num_errors_for_validate_one(self, monkeypatch): + monkeypatch.setattr( + validate_docstrings, 'validate_one', + lambda func_name: {'docstring': 'docstring1', + 'errors': [], + 'warnings': [('WN01', 'warn desc')], + 'examples_errors': ''}) + num_errors = validate_docstrings.main(func_name='docstring1', + prefix=None, + errors=[], + output_format='default') + assert num_errors == 0 + + def test_num_errors_for_validate_all(self, monkeypatch): + monkeypatch.setattr( + validate_docstrings, 'validate_all', + lambda: {'docstring1': {'errors': [('ER01', 'err desc'), + ('ER02', 'err desc'), + ('ER03', 'err desc')]}, + 'docstring2': {'errors': [('ER04', 'err desc'), + ('ER05', 'err desc')]}}) + num_errors = validate_docstrings.main(func_name=None, + prefix=None, + errors=[], + output_format='default') + assert num_errors == 5 + + def test_no_num_errors_for_validate_all(self, monkeypatch): + monkeypatch.setattr( + validate_docstrings, 'validate_all', + lambda: {'docstring1': {'errors': [], + 'warnings': [('WN01', 'warn desc')]}, + 'docstring2': {'errors': []}}) + num_errors = validate_docstrings.main(func_name=None, + prefix=None, + errors=[], + output_format='default') + assert num_errors == 0 + + def test_prefix_param_filters_docstrings(self, monkeypatch): + monkeypatch.setattr( + validate_docstrings, 'validate_all', + lambda: {'Series.foo': {'errors': [('ER01', 'err desc'), + ('ER02', 'err desc'), + ('ER03', 'err desc')]}, + 'DataFrame.bar': {'errors': [('ER04', 'err desc'), + ('ER05', 'err desc')]}, + 'Series.foobar': {'errors': [('ER06', 'err desc')]}}) + num_errors = validate_docstrings.main(func_name=None, + prefix='Series.', + errors=[], + output_format='default') + assert num_errors == 4 + + def test_errors_param_filters_errors(self, monkeypatch): + monkeypatch.setattr( + validate_docstrings, 'validate_all', + lambda: {'Series.foo': {'errors': [('ER01', 'err desc'), + ('ER02', 'err desc'), + ('ER03', 'err desc')]}, + 'DataFrame.bar': {'errors': [('ER01', 'err desc'), + ('ER02', 'err desc')]}, + 'Series.foobar': {'errors': [('ER01', 'err desc')]}}) + num_errors = validate_docstrings.main(func_name=None, + prefix=None, + errors=['E01'], + output_format='default') + assert num_errors == 3 + + num_errors = validate_docstrings.main(func_name=None, + prefix=None, + errors=['E03'], + output_format='default') + assert num_errors == 1 diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index ef6465c3e988d..08fd3a4ce54d4 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -47,6 +47,83 @@ PRIVATE_CLASSES = ['NDFrame', 'IndexOpsMixin'] DIRECTIVES = ['versionadded', 'versionchanged', 'deprecated'] +ERROR_MSGS = { + 'GL01': 'Docstring text (summary) should start in the line immediately ' + 'after the opening quotes (not in the same line, or leaving a ' + 'blank line in between)', + 'GL02': 'Closing quotes should be placed in the line after the last text ' + 'in the docstring (do not close the quotes in the same line as ' + 'the text, or leave a blank line between the last text and the ' + 'quotes)', + 'GL03': 'Use only one blank line to separate sections or paragraphs', + 'GL04': 'Private classes ({mentioned_private_classes}) should not be ' + 'mentioned in public docstring', + 'GL05': 'Tabs found at the start of line "{line_with_tabs}", please use ' + 'whitespace only', + 'SS01': 'No summary found (a short summary in a single line should be ' + 'present at the beginning of the docstring)', + 'SS02': 'Summary does not start with a capital letter', + 'SS03': 'Summary does not end with a period', + 'SS04': 'Summary contains heading whitespaces', + 'SS05': 'Summary must start with infinitive verb, not third person ' + '(e.g. use "Generate" instead of "Generates")', + 'SS06': 'Summary should fit in a single line', + 'ES01': 'No extended summary found', + 'PR01': 'Parameters {missing_params} not documented', + 'PR02': 'Unknown parameters {unknown_params}', + 'PR03': 'Wrong parameters order. Actual: {actual_params}. ' + 'Documented: {documented_params}', + 'PR04': 'Parameter "{param_name}" has no type', + 'PR05': 'Parameter "{param_name}" type should not finish with "."', + 'PR06': 'Parameter "{param_name}" type should use "{right_type}" instead ' + 'of "{wrong_type}"', + 'PR07': 'Parameter "{param_name}" has no description', + 'PR08': 'Parameter "{param_name}" description should start with a ' + 'capital letter', + 'PR09': 'Parameter "{param_name}" description should finish with "."', + 'RT01': 'No Returns section found', + 'YD01': 'No Yields section found', + 'SA01': 'See Also section not found', + 'SA02': 'Missing period at end of description for See Also ' + '"{reference_name}" reference', + 'SA03': 'Description should be capitalized for See Also ' + '"{reference_name}" reference', + 'SA04': 'Missing description for See Also "{reference_name}" reference', + 'SA05': '{reference_name} in `See Also` section does not need `pandas` ' + 'prefix, use {right_reference} instead.', + 'EX01': 'No examples section found', + 'EX02': 'Examples do not pass tests:\n{doctest_log}', + 'EX03': 'flake8 error: {error_code} {error_message}{times_happening}', + 'EX04': 'Do not import {imported_library}, as it is imported ' + 'automatically for the examples (numpy as np, pandas as pd)', +} + + +def error(code, **kwargs): + """ + Return a tuple with the error code and the message with variables replaced. + + This is syntactic sugar so instead of: + - `('EX02', ERROR_MSGS['EX02'].format(doctest_log=log))` + + We can simply use: + - `error('EX02', doctest_log=log)` + + Parameters + ---------- + code : str + Error code. + **kwargs + Values for the variables in the error messages + + Returns + ------- + code : str + Error code. + message : str + Error message with varaibles replaced. + """ + return (code, ERROR_MSGS[code].format(**kwargs)) def get_api_items(api_doc_fd): @@ -322,16 +399,15 @@ def parameter_mismatches(self): doc_params = tuple(self.doc_parameters) missing = set(signature_params) - set(doc_params) if missing: - errs.append( - 'Parameters {} not documented'.format(pprint_thing(missing))) + errs.append(error('PR01', missing_params=pprint_thing(missing))) extra = set(doc_params) - set(signature_params) if extra: - errs.append('Unknown parameters {}'.format(pprint_thing(extra))) + errs.append(error('PR02', unknown_params=pprint_thing(extra))) if (not missing and not extra and signature_params != doc_params and not (not signature_params and not doc_params)): - errs.append('Wrong parameters order. ' + - 'Actual: {!r}. '.format(signature_params) + - 'Documented: {!r}'.format(doc_params)) + errs.append(error('PR03', + actual_params=signature_params, + documented_params=doc_params)) return errs @@ -415,6 +491,8 @@ def validate_pep8(self): if not self.examples: return + # F401 is needed to not generate flake8 errors in examples + # that do not user numpy or pandas content = ''.join(('import numpy as np # noqa: F401\n', 'import pandas as pd # noqa: F401\n', *self.examples_source_code)) @@ -446,134 +524,135 @@ def validate_one(func_name): dict A dictionary containing all the information obtained from validating the docstring. + + Notes + ----- + The errors codes are defined as: + - First two characters: Section where the error happens: + * GL: Global (no section, like section ordering errors) + * SS: Short summary + * ES: Extended summary + * PR: Parameters + * RT: Returns + * YD: Yields + * RS: Raises + * WN: Warns + * SA: See Also + * NT: Notes + * RF: References + * EX: Examples + - Last two characters: Numeric error code inside the section + + For example, EX02 is the second codified error in the Examples section + (which in this case is assigned to examples that do not pass the tests). + + The error codes, their corresponding error messages, and the details on how + they are validated, are not documented more than in the source code of this + function. """ doc = Docstring(func_name) errs = [] wrns = [] if doc.start_blank_lines != 1: - errs.append('Docstring text (summary) should start in the line ' - 'immediately after the opening quotes (not in the same ' - 'line, or leaving a blank line in between)') + errs.append(error('GL01')) if doc.end_blank_lines != 1: - errs.append('Closing quotes should be placed in the line after ' - 'the last text in the docstring (do not close the ' - 'quotes in the same line as the text, or leave a ' - 'blank line between the last text and the quotes)') + errs.append(error('GL02')) if doc.double_blank_lines: - errs.append('Use only one blank line to separate sections or ' - 'paragraphs') + errs.append(error('GL03')) + mentioned_errs = doc.mentioned_private_classes + if mentioned_errs: + errs.append(error('GL04'), mentioned_private_classes=mentioned_errs) + for line in doc.raw_doc.splitlines(): + if re.match("^ *\t", line): + errs.append(error('GL05', line_with_tabs=line.lstrip())) if not doc.summary: - errs.append('No summary found (a short summary in a single line ' - 'should be present at the beginning of the docstring)') + errs.append(error('SS01')) else: if not doc.summary[0].isupper(): - errs.append('Summary does not start with a capital letter') + errs.append(error('SS02')) if doc.summary[-1] != '.': - errs.append('Summary does not end with a period') + errs.append(error('SS03')) if doc.summary != doc.summary.lstrip(): - errs.append('Summary contains heading whitespaces.') + errs.append(error('SS04')) elif (doc.is_function_or_method and doc.summary.split(' ')[0][-1] == 's'): - errs.append('Summary must start with infinitive verb, ' - 'not third person (e.g. use "Generate" instead of ' - '"Generates")') + errs.append(error('SS05')) if doc.num_summary_lines > 1: - errs.append("Summary should fit in a single line.") + errs.append(error('SS06')) + if not doc.extended_summary: - wrns.append('No extended summary found') + wrns.append(('ES01', 'No extended summary found')) + + # PR01: Parameters not documented + # PR02: Unknown parameters + # PR03: Wrong parameters order + errs += doc.parameter_mismatches - param_errs = doc.parameter_mismatches for param in doc.doc_parameters: if not param.startswith("*"): # Check can ignore var / kwargs if not doc.parameter_type(param): - param_errs.append('Parameter "{}" has no type'.format(param)) + errs.append(error('PR04', param_name=param)) else: if doc.parameter_type(param)[-1] == '.': - param_errs.append('Parameter "{}" type should ' - 'not finish with "."'.format(param)) + errs.append(error('PR05', param_name=param)) common_type_errors = [('integer', 'int'), ('boolean', 'bool'), ('string', 'str')] - for incorrect_type, correct_type in common_type_errors: - if incorrect_type in doc.parameter_type(param): - param_errs.append('Parameter "{}" type should use ' - '"{}" instead of "{}"' - .format(param, - correct_type, - incorrect_type)) + for wrong_type, right_type in common_type_errors: + if wrong_type in doc.parameter_type(param): + errs.append(error('PR06', + param_name=param, + right_type=right_type, + wrong_type=wrong_type)) if not doc.parameter_desc(param): - param_errs.append('Parameter "{}" ' - 'has no description'.format(param)) + errs.append(error('PR07', param_name=param)) else: if not doc.parameter_desc(param)[0].isupper(): - param_errs.append('Parameter "{}" description ' - 'should start with a ' - 'capital letter'.format(param)) + errs.append(error('PR08', param_name=param)) if doc.parameter_desc(param)[-1] != '.': - param_errs.append('Parameter "{}" description ' - 'should finish with "."'.format(param)) - if param_errs: - errs.append('Errors in parameters section') - for param_err in param_errs: - errs.append('\t{}'.format(param_err)) - - pep8_errs = list(doc.validate_pep8()) - if pep8_errs: - errs.append('Linting issues in doctests:') - for err in pep8_errs: - errs.append('\t{} {} {}'.format(err.count, err.error_code, - err.message)) + errs.append(error('PR09', param_name=param)) if doc.is_function_or_method: - if not doc.returns and "return" in doc.method_source: - errs.append('No Returns section found') - if not doc.yields and "yield" in doc.method_source: - errs.append('No Yields section found') - - mentioned_errs = doc.mentioned_private_classes - if mentioned_errs: - errs.append('Private classes ({}) should not be mentioned in public ' - 'docstring.'.format(mentioned_errs)) + if not doc.returns and 'return' in doc.method_source: + errs.append(error('RT01')) + if not doc.yields and 'yield' in doc.method_source: + errs.append(error('YD01')) if not doc.see_also: - wrns.append('See Also section not found') + wrns.append(error('SA01')) else: for rel_name, rel_desc in doc.see_also.items(): if rel_desc: if not rel_desc.endswith('.'): - errs.append('Missing period at end of description for ' - 'See Also "{}" reference'.format(rel_name)) + errs.append(error('SA02', reference_name=rel_name)) if not rel_desc[0].isupper(): - errs.append('Description should be capitalized for ' - 'See Also "{}" reference'.format(rel_name)) + errs.append(error('SA03', reference_name=rel_name)) else: - errs.append('Missing description for ' - 'See Also "{}" reference'.format(rel_name)) + errs.append(error('SA04', reference_name=rel_name)) if rel_name.startswith('pandas.'): - errs.append('{} in `See Also` section does not ' - 'need `pandas` prefix, use {} instead.' - .format(rel_name, rel_name[len('pandas.'):])) - for line in doc.raw_doc.splitlines(): - if re.match("^ *\t", line): - errs.append('Tabs found at the start of line "{}", ' - 'please use whitespace only'.format(line.lstrip())) + errs.append(error('SA05', + reference_name=rel_name, + right_reference=rel_name[len('pandas.'):])) examples_errs = '' if not doc.examples: - wrns.append('No examples section found') + wrns.append(error('EX01')) else: examples_errs = doc.examples_errors if examples_errs: - errs.append('Examples do not pass tests') + errs.append(error('EX02', doctest_log=examples_errs)) + for err in doc.validate_pep8(): + errs.append(error('EX03', + error_code=err.error_code, + error_message=err.message, + times_happening=' ({} times)'.format(err.count) + if err.count > 1 else '')) examples_source_code = ''.join(doc.examples_source_code) - if 'import numpy' in examples_source_code: - errs.append("numpy does not need to be imported in the examples, " - "as it's assumed to be already imported as np") - if 'import pandas' in examples_source_code: - errs.append("pandas does not need to be imported in the examples, " - "as it's assumed to be already imported as pd") + for wrong_import in ('numpy', 'pandas'): + if 'import {}'.format(wrong_import) in examples_source_code: + errs.append(error('EX04', imported_library=wrong_import)) return {'type': doc.type, 'docstring': doc.clean_doc, @@ -586,11 +665,17 @@ def validate_one(func_name): 'examples_errors': examples_errs} -def validate_all(): +def validate_all(prefix): """ Execute the validation of all docstrings, and return a dict with the results. + Parameters + ---------- + prefix : str or None + If provided, only the docstrings that start with this pattern will be + validated. If None, all docstrings will be validated. + Returns ------- dict @@ -605,6 +690,8 @@ def validate_all(): with open(api_doc_fname) as f: api_items = list(get_api_items(f)) for func_name, func_obj, section, subsection in api_items: + if prefix and not func_name.startswith(prefix): + continue doc_info = validate_one(func_name) result[func_name] = doc_info @@ -624,6 +711,8 @@ def validate_all(): func_name = 'pandas.{}.{}'.format(class_.__name__, member[0]) if (not member[0].startswith('_') and func_name not in api_item_names): + if prefix and not func_name.startswith(prefix): + continue doc_info = validate_one(func_name) result[func_name] = doc_info result[func_name]['in_api'] = False @@ -631,7 +720,7 @@ def validate_all(): return result -def main(func_name, fd): +def main(func_name, prefix, errors, output_format): def header(title, width=80, char='#'): full_line = char * width side_len = (width - len(title) - 2) // 2 @@ -644,32 +733,76 @@ def header(title, width=80, char='#'): full_line=full_line, title_line=title_line) if func_name is None: - json_doc = validate_all() - fd.write(json.dumps(json_doc)) - else: - doc_info = validate_one(func_name) - - fd.write(header('Docstring ({})'.format(func_name))) - fd.write('{}\n'.format(doc_info['docstring'])) - fd.write(header('Validation')) - if doc_info['errors']: - fd.write('{} Errors found:\n'.format(len(doc_info['errors']))) - for err in doc_info['errors']: - fd.write('\t{}\n'.format(err)) - if doc_info['warnings']: - fd.write('{} Warnings found:\n'.format(len(doc_info['warnings']))) - for wrn in doc_info['warnings']: - fd.write('\t{}\n'.format(wrn)) + result = validate_all(prefix) - if not doc_info['errors']: - fd.write('Docstring for "{}" correct. :)\n'.format(func_name)) + if output_format == 'json': + output = json.dumps(result) + else: + if output_format == 'default': + output_format = '{text}\n' + elif output_format == 'azure': + output_format = ('##vso[task.logissue type=error;' + 'sourcepath={path};' + 'linenumber={row};' + 'code={code};' + ']{text}\n') + else: + raise ValueError('Unknown output_format "{}"'.format( + output_format)) + + num_errors, output = 0, '' + for name, res in result.items(): + for err_code, err_desc in res['errors']: + # The script would be faster if instead of filtering the + # errors after validating them, it didn't validate them + # initially. But that would complicate the code too much + if errors and err_code not in errors: + continue + num_errors += 1 + output += output_format.format( + name=name, + path=res['file'], + row=res['file_line'], + code=err_code, + text='{}: {}'.format(name, err_desc)) + + sys.stderr.write(output) - if doc_info['examples_errors']: - fd.write(header('Doctests')) - fd.write(doc_info['examples_errors']) + else: + result = validate_one(func_name) + num_errors = len(result['errors']) + + sys.stderr.write(header('Docstring ({})'.format(func_name))) + sys.stderr.write('{}\n'.format(result['docstring'])) + sys.stderr.write(header('Validation')) + if result['errors']: + sys.stderr.write('{} Errors found:\n'.format( + len(result['errors']))) + for err_code, err_desc in result['errors']: + # Failing examples are printed at the end + if err_code == 'EX02': + sys.stderr.write('\tExamples do not pass tests\n') + continue + sys.stderr.write('\t{}\n'.format(err_desc)) + if result['warnings']: + sys.stderr.write('{} Warnings found:\n'.format( + len(result['warnings']))) + for wrn_code, wrn_desc in result['warnings']: + sys.stderr.write('\t{}\n'.format(wrn_desc)) + + if not result['errors']: + sys.stderr.write('Docstring for "{}" correct. :)\n'.format( + func_name)) + + if result['examples_errors']: + sys.stderr.write(header('Doctests')) + sys.stderr.write(result['examples_errors']) + + return num_errors if __name__ == '__main__': + format_opts = 'default', 'json', 'azure' func_help = ('function or method to validate (e.g. pandas.DataFrame.head) ' 'if not provided, all docstrings are validated and returned ' 'as JSON') @@ -679,5 +812,22 @@ def header(title, width=80, char='#'): nargs='?', default=None, help=func_help) + argparser.add_argument('--format', default='default', choices=format_opts, + help='format of the output when validating ' + 'multiple docstrings (ignored when validating one).' + 'It can be {}'.format(str(format_opts)[1:-1])) + argparser.add_argument('--prefix', default=None, help='pattern for the ' + 'docstring names, in order to decide which ones ' + 'will be validated. A prefix "pandas.Series.str.' + 'will make the script validate all the docstrings' + 'of methods starting by this pattern. It is ' + 'ignored if parameter function is provided') + argparser.add_argument('--errors', default=None, help='comma separated ' + 'list of error codes to validate. By default it ' + 'validates all errors (ignored when validating ' + 'a single docstring)') + args = argparser.parse_args() - sys.exit(main(args.function, sys.stdout)) + sys.exit(main(args.function, args.prefix, + args.errors.split(',') if args.errors else None, + args.format)) From 4d4b5838e62bfc923cb598f6b199469529872a05 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Wed, 7 Nov 2018 13:38:58 +0000 Subject: [PATCH 057/122] Fixing bug in validate_docstrings.py, where a bracket closed in the wrong place, raised a KeyError when a private class was found in a docstrings (and adding test for that case (#23543) --- scripts/tests/test_validate_docstrings.py | 11 ++++++++++- scripts/validate_docstrings.py | 5 +++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py index cf8abd1680341..271c7c3021905 100644 --- a/scripts/tests/test_validate_docstrings.py +++ b/scripts/tests/test_validate_docstrings.py @@ -345,6 +345,11 @@ def method(self, foo=None, bar=None): """ pass + def private_classes(self): + """ + This mentions NDFrame, which is not correct. + """ + class BadSummaries(object): @@ -688,7 +693,8 @@ def test_bad_class(self): @capture_stderr @pytest.mark.parametrize("func", [ - 'func', 'astype', 'astype1', 'astype2', 'astype3', 'plot', 'method']) + 'func', 'astype', 'astype1', 'astype2', 'astype3', 'plot', 'method', + 'private_classes']) def test_bad_generic_functions(self, func): errors = validate_one(self._import_path( # noqa:F821 klass='BadGenericDocStrings', func=func))['errors'] @@ -697,6 +703,9 @@ def test_bad_generic_functions(self, func): @pytest.mark.parametrize("klass,func,msgs", [ # See Also tests + ('BadGenericDocStrings', 'private_classes', + ("Private classes (NDFrame) should not be mentioned in public " + 'docstrings',)), ('BadSeeAlso', 'desc_no_period', ('Missing period at end of description for See Also "Series.iloc"',)), ('BadSeeAlso', 'desc_first_letter_lowercase', diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index 08fd3a4ce54d4..67ad21ab80b97 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -57,7 +57,7 @@ 'quotes)', 'GL03': 'Use only one blank line to separate sections or paragraphs', 'GL04': 'Private classes ({mentioned_private_classes}) should not be ' - 'mentioned in public docstring', + 'mentioned in public docstrings', 'GL05': 'Tabs found at the start of line "{line_with_tabs}", please use ' 'whitespace only', 'SS01': 'No summary found (a short summary in a single line should be ' @@ -562,7 +562,8 @@ def validate_one(func_name): errs.append(error('GL03')) mentioned_errs = doc.mentioned_private_classes if mentioned_errs: - errs.append(error('GL04'), mentioned_private_classes=mentioned_errs) + errs.append(error('GL04', + mentioned_private_classes=', '.join(mentioned_errs))) for line in doc.raw_doc.splitlines(): if re.match("^ *\t", line): errs.append(error('GL05', line_with_tabs=line.lstrip())) From 8c280ca85bf872a1a4b8133088774b0d090c9ed0 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 7 Nov 2018 06:05:45 -0800 Subject: [PATCH 058/122] BUG: Timestamp retains frequency of input Timestamps (#23503) --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/_libs/tslibs/timestamps.pyx | 6 +++++- pandas/tests/scalar/timestamp/test_timestamp.py | 6 ++++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 72c11900f7fa3..c4c39fee0ec12 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1121,6 +1121,7 @@ Datetimelike - Bug in :func:`date_range` with frequency of ``Day`` or higher where dates sufficiently far in the future could wrap around to the past instead of raising ``OutOfBoundsDatetime`` (:issue:`14187`) - Bug in :class:`PeriodIndex` with attribute ``freq.n`` greater than 1 where adding a :class:`DateOffset` object would return incorrect results (:issue:`23215`) - Bug in :class:`Series` that interpreted string indices as lists of characters when setting datetimelike values (:issue:`23451`) +- Bug in :class:`Timestamp` constructor which would drop the frequency of an input :class:`Timestamp` (:issue:`22311`) Timedelta ^^^^^^^^^ diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index d5bd2e90af3a7..44133a1a63597 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -16,7 +16,8 @@ from cpython.datetime cimport (datetime, PyDateTime_IMPORT from util cimport (is_datetime64_object, is_timedelta64_object, - is_integer_object, is_string_object, is_array) + is_integer_object, is_string_object, is_array, + is_offset_object) cimport ccalendar from conversion import tz_localize_to_utc, normalize_i8_timestamps @@ -734,6 +735,9 @@ class Timestamp(_Timestamp): if is_string_object(freq): freq = to_offset(freq) + elif not is_offset_object(freq): + # GH 22311: Try to extract the frequency of a given Timestamp input + freq = getattr(ts_input, 'freq', None) return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq) diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index 872c510094a4f..9a77a9ccc96c3 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -569,6 +569,12 @@ def test_construct_with_different_string_format(self, arg): expected = Timestamp(datetime(2013, 1, 1), tz=pytz.FixedOffset(540)) assert result == expected + def test_construct_timestamp_preserve_original_frequency(self): + # GH 22311 + result = Timestamp(Timestamp('2010-08-08', freq='D')).freq + expected = offsets.Day() + assert result == expected + class TestTimestamp(object): From 574a03a41aaca200e9293f06ec6caa76dca1353b Mon Sep 17 00:00:00 2001 From: Sinhrks Date: Wed, 7 Nov 2018 23:08:14 +0900 Subject: [PATCH 059/122] BUG: DatetimeIndex slicing with boolean Index raises TypeError (#22852) --- doc/source/whatsnew/v0.24.0.txt | 2 +- pandas/core/arrays/datetimelike.py | 2 +- pandas/core/indexes/base.py | 2 +- pandas/core/indexes/multi.py | 2 +- pandas/tests/indexes/multi/test_indexing.py | 27 ++++++++++++++++ pandas/tests/test_base.py | 34 ++++++++++++++++++--- 6 files changed, 61 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index c4c39fee0ec12..b0cc0a5c89ad4 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1222,7 +1222,7 @@ Indexing - Bug in `MultiIndex.set_levels` when levels value is not subscriptable (:issue:`23273`) - Bug where setting a timedelta column by ``Index`` causes it to be casted to double, and therefore lose precision (:issue:`23511`) - Bug in :func:`Index.union` and :func:`Index.intersection` where name of the ``Index`` of the result was not computed correctly for certain cases (:issue:`9943`, :issue:`9862`) - +- Bug in :class:`Index` slicing with boolean :class:`Index` may raise ``TypeError`` (:issue:`22533`) Missing ^^^^^^^ diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 92de1fe2e0679..7f1c86938a354 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -161,7 +161,7 @@ def __getitem__(self, key): return self._box_func(val) if com.is_bool_indexer(key): - key = np.asarray(key) + key = np.asarray(key, dtype=bool) if key.all(): key = slice(0, None, None) else: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 6e65d6899787f..fcced091b3794 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2078,7 +2078,7 @@ def __getitem__(self, key): return promote(getitem(key)) if com.is_bool_indexer(key): - key = np.asarray(key) + key = np.asarray(key, dtype=bool) key = com.values_from_object(key) result = getitem(key) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index c694289efc493..9c981c24190a4 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1614,7 +1614,7 @@ def __getitem__(self, key): return tuple(retval) else: if com.is_bool_indexer(key): - key = np.asarray(key) + key = np.asarray(key, dtype=bool) sortorder = self.sortorder else: # cannot be sure whether the result will be sorted diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index 2b5f16b0ea0c8..563027364134d 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -226,6 +226,33 @@ def test_get_indexer_consistency(idx): assert indexer.dtype == np.intp +@pytest.mark.parametrize('ind1', [[True] * 5, pd.Index([True] * 5)]) +@pytest.mark.parametrize('ind2', [[True, False, True, False, False], + pd.Index([True, False, True, False, + False])]) +def test_getitem_bool_index_all(ind1, ind2): + # GH#22533 + idx = MultiIndex.from_tuples([(10, 1), (20, 2), (30, 3), + (40, 4), (50, 5)]) + tm.assert_index_equal(idx[ind1], idx) + + expected = MultiIndex.from_tuples([(10, 1), (30, 3)]) + tm.assert_index_equal(idx[ind2], expected) + + +@pytest.mark.parametrize('ind1', [[True], pd.Index([True])]) +@pytest.mark.parametrize('ind2', [[False], pd.Index([False])]) +def test_getitem_bool_index_single(ind1, ind2): + # GH#22533 + idx = MultiIndex.from_tuples([(10, 1)]) + tm.assert_index_equal(idx[ind1], idx) + + expected = pd.MultiIndex(levels=[np.array([], dtype=np.int64), + np.array([], dtype=np.int64)], + labels=[[], []]) + tm.assert_index_equal(idx[ind2], expected) + + def test_get_loc(idx): assert idx.get_loc(('foo', 'two')) == 1 assert idx.get_loc(('baz', 'two')) == 3 diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index fe2956adc35af..07d357b70f94b 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -178,19 +178,20 @@ def setup_method(self, method): self.unicode_index = tm.makeUnicodeIndex(10, name='a') arr = np.random.randn(10) + self.bool_series = Series(arr, index=self.bool_index, name='a') self.int_series = Series(arr, index=self.int_index, name='a') self.float_series = Series(arr, index=self.float_index, name='a') self.dt_series = Series(arr, index=self.dt_index, name='a') self.dt_tz_series = self.dt_tz_index.to_series(keep_tz=True) self.period_series = Series(arr, index=self.period_index, name='a') self.string_series = Series(arr, index=self.string_index, name='a') + self.unicode_series = Series(arr, index=self.unicode_index, name='a') types = ['bool', 'int', 'float', 'dt', 'dt_tz', 'period', 'string', 'unicode'] - fmts = ["{0}_{1}".format(t, f) - for t in types for f in ['index', 'series']] - self.objs = [getattr(self, f) - for f in fmts if getattr(self, f, None) is not None] + self.indexes = [getattr(self, '{}_index'.format(t)) for t in types] + self.series = [getattr(self, '{}_series'.format(t)) for t in types] + self.objs = self.indexes + self.series def check_ops_properties(self, props, filter=None, ignore_failures=False): for op in props: @@ -997,6 +998,31 @@ def test_validate_bool_args(self): with pytest.raises(ValueError): self.int_series.drop_duplicates(inplace=value) + def test_getitem(self): + for i in self.indexes: + s = pd.Series(i) + + assert i[0] == s.iloc[0] + assert i[5] == s.iloc[5] + assert i[-1] == s.iloc[-1] + + assert i[-1] == i[9] + + pytest.raises(IndexError, i.__getitem__, 20) + pytest.raises(IndexError, s.iloc.__getitem__, 20) + + @pytest.mark.parametrize('indexer_klass', [list, pd.Index]) + @pytest.mark.parametrize('indexer', [[True] * 10, [False] * 10, + [True, False, True, True, False, + False, True, True, False, True]]) + def test_bool_indexing(self, indexer_klass, indexer): + # GH 22533 + for idx in self.indexes: + exp_idx = [i for i in range(len(indexer)) if indexer[i]] + tm.assert_index_equal(idx[indexer_klass(indexer)], idx[exp_idx]) + s = pd.Series(idx) + tm.assert_series_equal(s[indexer_klass(indexer)], s.iloc[exp_idx]) + class TestTranspose(Ops): errmsg = "the 'axes' parameter is not supported" From 73cc01b7dd69bb19fa24eaac505eddde945c94f5 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Wed, 7 Nov 2018 15:09:12 +0100 Subject: [PATCH 060/122] BUG: Return KeyError for invalid string key (#23540) closes: #22803 --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/indexes/period.py | 10 ++++++++-- pandas/tests/indexes/period/test_indexing.py | 9 ++++++--- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index b0cc0a5c89ad4..61da03a9f3538 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1206,6 +1206,7 @@ Indexing ^^^^^^^^ - The traceback from a ``KeyError`` when asking ``.loc`` for a single missing label is now shorter and more clear (:issue:`21557`) +- :class:`PeriodIndex` now emits a ``KeyError`` when a malformed string is looked up, which is consistent with the behavior of :class:`DateTimeIndex` (:issue:`22803`) - When ``.ix`` is asked for a missing integer label in a :class:`MultiIndex` with a first level of integer type, it now raises a ``KeyError``, consistently with the case of a flat :class:`Int64Index`, rather than falling back to positional indexing (:issue:`21593`) - Bug in :meth:`DatetimeIndex.reindex` when reindexing a tz-naive and tz-aware :class:`DatetimeIndex` (:issue:`8306`) - Bug in :meth:`Series.reindex` when reindexing an empty series with a ``datetime64[ns, tz]`` dtype (:issue:`20869`) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 92ffaea521d7f..21e84629b4d3b 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -20,7 +20,7 @@ from pandas.core.indexes.datetimelike import ( DatelikeOps, DatetimeIndexOpsMixin, wrap_arithmetic_op ) -from pandas.core.tools.datetimes import parse_time_string +from pandas.core.tools.datetimes import parse_time_string, DateParseError from pandas._libs import tslib, index as libindex from pandas._libs.tslibs.period import (Period, IncompatibleFrequency, @@ -580,7 +580,10 @@ def searchsorted(self, value, side='left', sorter=None): raise IncompatibleFrequency(msg) value = value.ordinal elif isinstance(value, compat.string_types): - value = Period(value, freq=self.freq).ordinal + try: + value = Period(value, freq=self.freq).ordinal + except DateParseError: + raise KeyError("Cannot interpret '{}' as period".format(value)) return self._ndarray_values.searchsorted(value, side=side, sorter=sorter) @@ -711,6 +714,9 @@ def get_loc(self, key, method=None, tolerance=None): key = asdt except TypeError: pass + except DateParseError: + # A string with invalid format + raise KeyError("Cannot interpret '{}' as period".format(key)) try: key = Period(key, freq=self.freq) diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index fafba144bb148..880e37c59c9c4 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -3,7 +3,6 @@ import numpy as np import pytest -from pandas._libs import tslibs from pandas._libs.tslibs import period as libperiod from pandas.compat import lrange @@ -363,7 +362,9 @@ def test_get_loc(self): assert idx0.get_loc(p2) == expected_idx1_p2 assert idx0.get_loc(str(p2)) == expected_idx1_p2 - pytest.raises(tslibs.parsing.DateParseError, idx0.get_loc, 'foo') + tm.assert_raises_regex(KeyError, + "Cannot interpret 'foo' as period", + idx0.get_loc, 'foo') pytest.raises(KeyError, idx0.get_loc, 1.1) pytest.raises(TypeError, idx0.get_loc, idx0) @@ -378,7 +379,9 @@ def test_get_loc(self): assert idx1.get_loc(p2) == expected_idx1_p2 assert idx1.get_loc(str(p2)) == expected_idx1_p2 - pytest.raises(tslibs.parsing.DateParseError, idx1.get_loc, 'foo') + tm.assert_raises_regex(KeyError, + "Cannot interpret 'foo' as period", + idx1.get_loc, 'foo') pytest.raises(KeyError, idx1.get_loc, 1.1) pytest.raises(TypeError, idx1.get_loc, idx1) From 43b135fd6cef148e9cfe607499e6e6e21bc4011a Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 7 Nov 2018 14:14:26 +0000 Subject: [PATCH 061/122] TST: coverage for skipped tests in io/formats/test_to_html.py (#22888) --- pandas/tests/io/formats/test_to_html.py | 38 +++++-------------------- 1 file changed, 7 insertions(+), 31 deletions(-) diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index 845fb1ee3dc3a..035b2d4c3347c 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -1066,14 +1066,10 @@ def test_to_html_regression_GH6098(self): df.pivot_table(index=[u('clé1')], columns=[u('clé2')])._repr_html_() def test_to_html_truncate(self): - pytest.skip("unreliable on travis") index = pd.DatetimeIndex(start='20010101', freq='D', periods=20) df = DataFrame(index=index, columns=range(20)) - fmt.set_option('display.max_rows', 8) - fmt.set_option('display.max_columns', 4) - result = df._repr_html_() + result = df.to_html(max_rows=8, max_cols=4) expected = '''\ - @@ -1159,23 +1155,15 @@ def test_to_html_truncate(self): -
NaN
-

20 rows × 20 columns

-'''.format(div_style) - if compat.PY2: - expected = expected.decode('utf-8') +''' assert result == expected def test_to_html_truncate_multi_index(self): - pytest.skip("unreliable on travis") arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] df = DataFrame(index=arrays, columns=arrays) - fmt.set_option('display.max_rows', 7) - fmt.set_option('display.max_columns', 7) - result = df._repr_html_() + result = df.to_html(max_rows=7, max_cols=7) expected = '''\ - @@ -1276,24 +1264,16 @@ def test_to_html_truncate_multi_index(self): -
NaN
-

8 rows × 8 columns

-'''.format(div_style) - if compat.PY2: - expected = expected.decode('utf-8') +''' assert result == expected + @pytest.mark.xfail(reason='GH22887 TypeError', strict=True) def test_to_html_truncate_multi_index_sparse_off(self): - pytest.skip("unreliable on travis") arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] df = DataFrame(index=arrays, columns=arrays) - fmt.set_option('display.max_rows', 7) - fmt.set_option('display.max_columns', 7) - fmt.set_option('display.multi_sparse', False) - result = df._repr_html_() + result = df.to_html(max_rows=7, max_cols=7, sparsify=False) expected = '''\ - @@ -1387,11 +1367,7 @@ def test_to_html_truncate_multi_index_sparse_off(self): -
NaN
-

8 rows × 8 columns

-'''.format(div_style) - if compat.PY2: - expected = expected.decode('utf-8') +''' assert result == expected def test_to_html_border(self): From 31eee47510e3586d8c8623e66351b28bb356f49c Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Wed, 7 Nov 2018 15:18:14 +0000 Subject: [PATCH 062/122] DOC: Updating DataFrame.join docstring (#23471) --- pandas/core/frame.py | 156 +++++++++++++++++++++---------------------- 1 file changed, 77 insertions(+), 79 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7aadf7e735f38..eff75938b1181 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6494,123 +6494,121 @@ def append(self, other, ignore_index=False, def join(self, other, on=None, how='left', lsuffix='', rsuffix='', sort=False): """ - Join columns with other DataFrame either on index or on a key - column. Efficiently Join multiple DataFrame objects by index at once by + Join columns of another DataFrame. + + Join columns with `other` DataFrame either on index or on a key + column. Efficiently join multiple DataFrame objects by index at once by passing a list. Parameters ---------- - other : DataFrame, Series with name field set, or list of DataFrame + other : DataFrame, Series, or list of DataFrame Index should be similar to one of the columns in this one. If a Series is passed, its name attribute must be set, and that will be - used as the column name in the resulting joined DataFrame - on : name, tuple/list of names, or array-like + used as the column name in the resulting joined DataFrame. + on : str, list of str, or array-like, optional Column or index level name(s) in the caller to join on the index in `other`, otherwise joins index-on-index. If multiple values given, the `other` DataFrame must have a MultiIndex. Can pass an array as the join key if it is not already contained in - the calling DataFrame. Like an Excel VLOOKUP operation - how : {'left', 'right', 'outer', 'inner'}, default: 'left' + the calling DataFrame. Like an Excel VLOOKUP operation. + how : {'left', 'right', 'outer', 'inner'}, default 'left' How to handle the operation of the two objects. * left: use calling frame's index (or column if on is specified) - * right: use other frame's index + * right: use `other`'s index. * outer: form union of calling frame's index (or column if on is - specified) with other frame's index, and sort it - lexicographically + specified) with `other`'s index, and sort it. + lexicographically. * inner: form intersection of calling frame's index (or column if - on is specified) with other frame's index, preserving the order - of the calling's one - lsuffix : string - Suffix to use from left frame's overlapping columns - rsuffix : string - Suffix to use from right frame's overlapping columns - sort : boolean, default False + on is specified) with `other`'s index, preserving the order + of the calling's one. + lsuffix : str, default '' + Suffix to use from left frame's overlapping columns. + rsuffix : str, default '' + Suffix to use from right frame's overlapping columns. + sort : bool, default False Order result DataFrame lexicographically by the join key. If False, - the order of the join key depends on the join type (how keyword) + the order of the join key depends on the join type (how keyword). + + Returns + ------- + DataFrame + A dataframe containing columns from both the caller and `other`. Notes ----- - on, lsuffix, and rsuffix options are not supported when passing a list - of DataFrame objects + Parameters `on`, `lsuffix`, and `rsuffix` are not supported when + passing a list of `DataFrame` objects. Support for specifying index levels as the `on` parameter was added - in version 0.23.0 + in version 0.23.0. + + See Also + -------- + DataFrame.merge : For column(s)-on-columns(s) operations. Examples -------- - >>> caller = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3', 'K4', 'K5'], - ... 'A': ['A0', 'A1', 'A2', 'A3', 'A4', 'A5']}) - - >>> caller - A key - 0 A0 K0 - 1 A1 K1 - 2 A2 K2 - 3 A3 K3 - 4 A4 K4 - 5 A5 K5 + >>> df = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3', 'K4', 'K5'], + ... 'A': ['A0', 'A1', 'A2', 'A3', 'A4', 'A5']}) + + >>> df + key A + 0 K0 A0 + 1 K1 A1 + 2 K2 A2 + 3 K3 A3 + 4 K4 A4 + 5 K5 A5 >>> other = pd.DataFrame({'key': ['K0', 'K1', 'K2'], ... 'B': ['B0', 'B1', 'B2']}) >>> other - B key - 0 B0 K0 - 1 B1 K1 - 2 B2 K2 + key B + 0 K0 B0 + 1 K1 B1 + 2 K2 B2 Join DataFrames using their indexes. - >>> caller.join(other, lsuffix='_caller', rsuffix='_other') - - >>> A key_caller B key_other - 0 A0 K0 B0 K0 - 1 A1 K1 B1 K1 - 2 A2 K2 B2 K2 - 3 A3 K3 NaN NaN - 4 A4 K4 NaN NaN - 5 A5 K5 NaN NaN - + >>> df.join(other, lsuffix='_caller', rsuffix='_other') + key_caller A key_other B + 0 K0 A0 K0 B0 + 1 K1 A1 K1 B1 + 2 K2 A2 K2 B2 + 3 K3 A3 NaN NaN + 4 K4 A4 NaN NaN + 5 K5 A5 NaN NaN If we want to join using the key columns, we need to set key to be - the index in both caller and other. The joined DataFrame will have + the index in both `df` and `other`. The joined DataFrame will have key as its index. - >>> caller.set_index('key').join(other.set_index('key')) - - >>> A B - key - K0 A0 B0 - K1 A1 B1 - K2 A2 B2 - K3 A3 NaN - K4 A4 NaN - K5 A5 NaN - - Another option to join using the key columns is to use the on - parameter. DataFrame.join always uses other's index but we can use any - column in the caller. This method preserves the original caller's + >>> df.set_index('key').join(other.set_index('key')) + A B + key + K0 A0 B0 + K1 A1 B1 + K2 A2 B2 + K3 A3 NaN + K4 A4 NaN + K5 A5 NaN + + Another option to join using the key columns is to use the `on` + parameter. DataFrame.join always uses `other`'s index but we can use + any column in `df`. This method preserves the original DataFrame's index in the result. - >>> caller.join(other.set_index('key'), on='key') - - >>> A key B - 0 A0 K0 B0 - 1 A1 K1 B1 - 2 A2 K2 B2 - 3 A3 K3 NaN - 4 A4 K4 NaN - 5 A5 K5 NaN - - - See also - -------- - DataFrame.merge : For column(s)-on-columns(s) operations - - Returns - ------- - joined : DataFrame + >>> df.join(other.set_index('key'), on='key') + key A B + 0 K0 A0 B0 + 1 K1 A1 B1 + 2 K2 A2 B2 + 3 K3 A3 NaN + 4 K4 A4 NaN + 5 K5 A5 NaN """ # For SparseDataFrame's benefit return self._join_compat(other, on=on, how=how, lsuffix=lsuffix, From 737cb7d2e7880db6eae51cbb076fe85f67f5a202 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 7 Nov 2018 09:34:57 -0600 Subject: [PATCH 063/122] ENH: Support EAs in Series.unstack (#23284) --- asv_bench/benchmarks/reshape.py | 20 ++++- doc/source/whatsnew/v0.24.0.txt | 3 +- pandas/core/internals/blocks.py | 83 +++++++++++++++++-- pandas/core/internals/managers.py | 10 ++- pandas/core/reshape/reshape.py | 81 ++++++++++++------ pandas/tests/extension/base/reshaping.py | 45 ++++++++++ pandas/tests/extension/decimal/array.py | 2 +- .../tests/extension/decimal/test_decimal.py | 21 ++++- pandas/tests/extension/json/test_json.py | 6 +- pandas/tests/frame/test_reshape.py | 27 ++++-- pandas/tests/sparse/test_pivot.py | 1 + 11 files changed, 248 insertions(+), 51 deletions(-) diff --git a/asv_bench/benchmarks/reshape.py b/asv_bench/benchmarks/reshape.py index bda486dba3b0f..67fdfb82e72c0 100644 --- a/asv_bench/benchmarks/reshape.py +++ b/asv_bench/benchmarks/reshape.py @@ -49,21 +49,33 @@ def time_unstack(self): class Unstack(object): - def setup(self): + params = ['int', 'category'] + + def setup(self, dtype): m = 100 n = 1000 levels = np.arange(m) index = MultiIndex.from_product([levels] * 2) columns = np.arange(n) - values = np.arange(m * m * n).reshape(m * m, n) + if dtype == 'int': + values = np.arange(m * m * n).reshape(m * m, n) + else: + # the category branch is ~20x slower than int. So we + # cut down the size a bit. Now it's only ~3x slower. + n = 50 + columns = columns[:n] + indices = np.random.randint(0, 52, size=(m * m, n)) + values = np.take(list(string.ascii_letters), indices) + values = [pd.Categorical(v) for v in values.T] + self.df = DataFrame(values, index, columns) self.df2 = self.df.iloc[:-1] - def time_full_product(self): + def time_full_product(self, dtype): self.df.unstack() - def time_without_last_row(self): + def time_without_last_row(self, dtype): self.df2.unstack() diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 61da03a9f3538..71d3b923305ea 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -853,7 +853,7 @@ update the ``ExtensionDtype._metadata`` tuple to match the signature of your - Updated the ``.type`` attribute for ``PeriodDtype``, ``DatetimeTZDtype``, and ``IntervalDtype`` to be instances of the dtype (``Period``, ``Timestamp``, and ``Interval`` respectively) (:issue:`22938`) - :func:`ExtensionArray.isna` is allowed to return an ``ExtensionArray`` (:issue:`22325`). - Support for reduction operations such as ``sum``, ``mean`` via opt-in base class method override (:issue:`22762`) -- :meth:`Series.unstack` no longer converts extension arrays to object-dtype ndarrays. The output ``DataFrame`` will now have the same dtype as the input. This changes behavior for Categorical and Sparse data (:issue:`23077`). +- :meth:`Series.unstack` and :meth:`DataFrame.unstack` no longer convert extension arrays to object-dtype ndarrays. Each column in the output ``DataFrame`` will now have the same dtype as the input (:issue:`23077`). - Bug when grouping :meth:`Dataframe.groupby()` and aggregating on ``ExtensionArray`` it was not returning the actual ``ExtensionArray`` dtype (:issue:`23227`). .. _whatsnew_0240.api.incompatibilities: @@ -1090,6 +1090,7 @@ Categorical - Bug when indexing with a boolean-valued ``Categorical``. Now a boolean-valued ``Categorical`` is treated as a boolean mask (:issue:`22665`) - Constructing a :class:`CategoricalIndex` with empty values and boolean categories was raising a ``ValueError`` after a change to dtype coercion (:issue:`22702`). - Bug in :meth:`Categorical.take` with a user-provided ``fill_value`` not encoding the ``fill_value``, which could result in a ``ValueError``, incorrect results, or a segmentation fault (:issue:`23296`). +- In meth:`Series.unstack`, specifying a ``fill_value`` not present in the categories now raises a ``TypeError`` rather than ignoring the ``fill_value`` (:issue:`23284`) - Bug when resampling :meth:`Dataframe.resample()` and aggregating on categorical data, the categorical dtype was getting lost. (:issue:`23227`) Datetimelike diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index e84953f3dab56..7a55b652054ed 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import functools import warnings import inspect import re @@ -34,6 +35,7 @@ is_numeric_v_string_like, is_extension_type, is_extension_array_dtype, is_list_like, + is_sparse, is_re, is_re_compilable, pandas_dtype) @@ -632,7 +634,10 @@ def _astype(self, dtype, copy=False, errors='raise', values=None, return self if klass is None: - if dtype == np.object_: + if is_sparse(self.values): + # special case sparse, Series[Sparse].astype(object) is sparse + klass = ExtensionBlock + elif is_object_dtype(dtype): klass = ObjectBlock elif is_extension_array_dtype(dtype): klass = ExtensionBlock @@ -1429,7 +1434,7 @@ def equals(self, other): return False return array_equivalent(self.values, other.values) - def _unstack(self, unstacker_func, new_columns): + def _unstack(self, unstacker_func, new_columns, n_rows, fill_value): """Return a list of unstacked blocks of self Parameters @@ -1438,6 +1443,10 @@ def _unstack(self, unstacker_func, new_columns): Partially applied unstacker. new_columns : Index All columns of the unstacked BlockManager. + n_rows : int + Only used in ExtensionBlock.unstack + fill_value : int + Only used in ExtensionBlock.unstack Returns ------- @@ -1731,7 +1740,7 @@ def _slice(self, slicer): def _try_cast_result(self, result, dtype=None): return result - def _unstack(self, unstacker_func, new_columns): + def _unstack(self, unstacker_func, new_columns, n_rows, fill_value): """Return a list of unstacked blocks of self Parameters @@ -1740,6 +1749,10 @@ def _unstack(self, unstacker_func, new_columns): Partially applied unstacker. new_columns : Index All columns of the unstacked BlockManager. + n_rows : int + Only used in ExtensionBlock.unstack + fill_value : int + Only used in ExtensionBlock.unstack Returns ------- @@ -1751,11 +1764,11 @@ def _unstack(self, unstacker_func, new_columns): # NonConsolidatable blocks can have a single item only, so we return # one block per item unstacker = unstacker_func(self.values.T) - new_items = unstacker.get_new_columns() - new_placement = new_columns.get_indexer(new_items) - new_values, mask = unstacker.get_new_values() - mask = mask.any(0) + new_placement, new_values, mask = self._get_unstack_items( + unstacker, new_columns + ) + new_values = new_values.T[mask] new_placement = new_placement[mask] @@ -1763,6 +1776,38 @@ def _unstack(self, unstacker_func, new_columns): for vals, place in zip(new_values, new_placement)] return blocks, mask + def _get_unstack_items(self, unstacker, new_columns): + """ + Get the placement, values, and mask for a Block unstack. + + This is shared between ObjectBlock and ExtensionBlock. They + differ in that ObjectBlock passes the values, while ExtensionBlock + passes the dummy ndarray of positions to be used by a take + later. + + Parameters + ---------- + unstacker : pandas.core.reshape.reshape._Unstacker + new_columns : Index + All columns of the unstacked BlockManager. + + Returns + ------- + new_placement : ndarray[int] + The placement of the new columns in `new_columns`. + new_values : Union[ndarray, ExtensionArray] + The first return value from _Unstacker.get_new_values. + mask : ndarray[bool] + The second return value from _Unstacker.get_new_values. + """ + # shared with ExtensionBlock + new_items = unstacker.get_new_columns() + new_placement = new_columns.get_indexer(new_items) + new_values, mask = unstacker.get_new_values() + + mask = mask.any(0) + return new_placement, new_values, mask + class ExtensionBlock(NonConsolidatableMixIn, Block): """Block for holding extension types. @@ -1950,6 +1995,30 @@ def shift(self, periods, axis=0): def _ftype(self): return getattr(self.values, '_pandas_ftype', Block._ftype) + def _unstack(self, unstacker_func, new_columns, n_rows, fill_value): + # ExtensionArray-safe unstack. + # We override ObjectBlock._unstack, which unstacks directly on the + # values of the array. For EA-backed blocks, this would require + # converting to a 2-D ndarray of objects. + # Instead, we unstack an ndarray of integer positions, followed by + # a `take` on the actual values. + dummy_arr = np.arange(n_rows) + dummy_unstacker = functools.partial(unstacker_func, fill_value=-1) + unstacker = dummy_unstacker(dummy_arr) + + new_placement, new_values, mask = self._get_unstack_items( + unstacker, new_columns + ) + + blocks = [ + self.make_block_same_class( + self.values.take(indices, allow_fill=True, + fill_value=fill_value), + [place]) + for indices, place in zip(new_values.T, new_placement) + ] + return blocks, mask + class NumericBlock(Block): __slots__ = () diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index fc3a12a9da82a..0519c5e5abe33 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1405,18 +1405,21 @@ def canonicalize(block): return all(block.equals(oblock) for block, oblock in zip(self_blocks, other_blocks)) - def unstack(self, unstacker_func): + def unstack(self, unstacker_func, fill_value): """Return a blockmanager with all blocks unstacked. Parameters ---------- unstacker_func : callable A (partially-applied) ``pd.core.reshape._Unstacker`` class. + fill_value : Any + fill_value for newly introduced missing values. Returns ------- unstacked : BlockManager """ + n_rows = self.shape[-1] dummy = unstacker_func(np.empty((0, 0)), value_columns=self.items) new_columns = dummy.get_new_columns() new_index = dummy.get_new_index() @@ -1427,7 +1430,10 @@ def unstack(self, unstacker_func): blocks, mask = blk._unstack( partial(unstacker_func, value_columns=self.items[blk.mgr_locs.indexer]), - new_columns) + new_columns, + n_rows, + fill_value + ) new_blocks.extend(blocks) columns_mask.extend(mask) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index d3b677a1df2a3..2dca7cf0e6aa3 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -12,12 +12,12 @@ from pandas.core.dtypes.cast import maybe_promote from pandas.core.dtypes.common import ( ensure_platform_int, is_bool_dtype, is_extension_array_dtype, is_list_like, - is_object_dtype, is_sparse, needs_i8_conversion) + is_object_dtype, needs_i8_conversion) from pandas.core.dtypes.missing import notna from pandas import compat import pandas.core.algorithms as algos -from pandas.core.arrays import Categorical, SparseArray +from pandas.core.arrays import SparseArray from pandas.core.arrays.categorical import _factorize_from_iterable from pandas.core.frame import DataFrame from pandas.core.index import Index, MultiIndex @@ -82,28 +82,15 @@ class _Unstacker(object): def __init__(self, values, index, level=-1, value_columns=None, fill_value=None, constructor=None): - self.is_categorical = None - self.is_sparse = is_sparse(values) if values.ndim == 1: - if isinstance(values, Categorical): - self.is_categorical = values - values = np.array(values) - elif self.is_sparse: - # XXX: Makes SparseArray *dense*, but it's supposedly - # a single column at a time, so it's "doable" - values = values.values values = values[:, np.newaxis] self.values = values self.value_columns = value_columns self.fill_value = fill_value if constructor is None: - if self.is_sparse: - self.constructor = SparseDataFrame - else: - self.constructor = DataFrame - else: - self.constructor = constructor + constructor = DataFrame + self.constructor = constructor if value_columns is None and values.shape[1] != 1: # pragma: no cover raise ValueError('must pass column labels for multi-column data') @@ -174,14 +161,6 @@ def get_result(self): columns = self.get_new_columns() index = self.get_new_index() - # may need to coerce categoricals here - if self.is_categorical is not None: - categories = self.is_categorical.categories - ordered = self.is_categorical.ordered - values = [Categorical(values[:, i], categories=categories, - ordered=ordered) - for i in range(values.shape[-1])] - return self.constructor(values, index=index, columns=columns) def get_new_values(self): @@ -339,6 +318,7 @@ def _unstack_multiple(data, clocs, fill_value=None): if isinstance(data, Series): dummy = data.copy() dummy.index = dummy_index + unstacked = dummy.unstack('__placeholder__', fill_value=fill_value) new_levels = clevels new_names = cnames @@ -394,6 +374,8 @@ def unstack(obj, level, fill_value=None): else: return obj.T.stack(dropna=False) else: + if is_extension_array_dtype(obj.dtype): + return _unstack_extension_series(obj, level, fill_value) unstacker = _Unstacker(obj.values, obj.index, level=level, fill_value=fill_value, constructor=obj._constructor_expanddim) @@ -404,7 +386,8 @@ def _unstack_frame(obj, level, fill_value=None): if obj._is_mixed_type: unstacker = partial(_Unstacker, index=obj.index, level=level, fill_value=fill_value) - blocks = obj._data.unstack(unstacker) + blocks = obj._data.unstack(unstacker, + fill_value=fill_value) return obj._constructor(blocks) else: unstacker = _Unstacker(obj.values, obj.index, level=level, @@ -414,6 +397,52 @@ def _unstack_frame(obj, level, fill_value=None): return unstacker.get_result() +def _unstack_extension_series(series, level, fill_value): + """ + Unstack an ExtensionArray-backed Series. + + The ExtensionDtype is preserved. + + Parameters + ---------- + series : Series + A Series with an ExtensionArray for values + level : Any + The level name or number. + fill_value : Any + The user-level (not physical storage) fill value to use for + missing values introduced by the reshape. Passed to + ``series.values.take``. + + Returns + ------- + DataFrame + Each column of the DataFrame will have the same dtype as + the input Series. + """ + # Implementation note: the basic idea is to + # 1. Do a regular unstack on a dummy array of integers + # 2. Followup with a columnwise take. + # We use the dummy take to discover newly-created missing values + # introduced by the reshape. + from pandas.core.reshape.concat import concat + + dummy_arr = np.arange(len(series)) + # fill_value=-1, since we will do a series.values.take later + result = _Unstacker(dummy_arr, series.index, + level=level, fill_value=-1).get_result() + + out = [] + values = series.values + + for col, indices in result.iteritems(): + out.append(Series(values.take(indices.values, + allow_fill=True, + fill_value=fill_value), + name=col, index=result.index)) + return concat(out, axis='columns', copy=False, keys=result.columns) + + def stack(frame, level=-1, dropna=True): """ Convert DataFrame to Series with multi-level Index. Columns become the diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py index 446912b66bf33..d0e42e69e300f 100644 --- a/pandas/tests/extension/base/reshaping.py +++ b/pandas/tests/extension/base/reshaping.py @@ -1,3 +1,5 @@ +import itertools + import numpy as np import pytest @@ -170,3 +172,46 @@ def test_merge(self, data, na_value): [data[0], data[0], data[1], data[2], na_value], dtype=data.dtype)}) self.assert_frame_equal(res, exp[['ext', 'int1', 'key', 'int2']]) + + @pytest.mark.parametrize("index", [ + # Two levels, uniform. + pd.MultiIndex.from_product(([['A', 'B'], ['a', 'b']]), + names=['a', 'b']), + + # non-uniform + pd.MultiIndex.from_tuples([('A', 'a'), ('A', 'b'), ('B', 'b')]), + + # three levels, non-uniform + pd.MultiIndex.from_product([('A', 'B'), ('a', 'b', 'c'), (0, 1, 2)]), + pd.MultiIndex.from_tuples([ + ('A', 'a', 1), + ('A', 'b', 0), + ('A', 'a', 0), + ('B', 'a', 0), + ('B', 'c', 1), + ]), + ]) + @pytest.mark.parametrize("obj", ["series", "frame"]) + def test_unstack(self, data, index, obj): + data = data[:len(index)] + if obj == "series": + ser = pd.Series(data, index=index) + else: + ser = pd.DataFrame({"A": data, "B": data}, index=index) + + n = index.nlevels + levels = list(range(n)) + # [0, 1, 2] + # [(0,), (1,), (2,), (0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)] + combinations = itertools.chain.from_iterable( + itertools.permutations(levels, i) for i in range(1, n) + ) + + for level in combinations: + result = ser.unstack(level=level) + assert all(isinstance(result[col].values, type(data)) + for col in result.columns) + expected = ser.astype(object).unstack(level=level) + result = result.astype(object) + + self.assert_frame_equal(result, expected) diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index c14bfa359bc64..3c8905c578c4f 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -102,7 +102,7 @@ def copy(self, deep=False): def astype(self, dtype, copy=True): if isinstance(dtype, type(self.dtype)): return type(self)(self._data, context=dtype.context) - return super(DecimalArray, self).astype(dtype, copy) + return np.asarray(self, dtype=dtype) def __setitem__(self, key, value): if pd.api.types.is_list_like(value): diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 1c9beefe9e542..af5f6bf0a2f65 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -1,4 +1,5 @@ import decimal +import math import operator import numpy as np @@ -63,9 +64,23 @@ def data_for_grouping(): class BaseDecimal(object): def assert_series_equal(self, left, right, *args, **kwargs): - - left_na = left.isna() - right_na = right.isna() + def convert(x): + # need to convert array([Decimal(NaN)], dtype='object') to np.NaN + # because Series[object].isnan doesn't recognize decimal(NaN) as + # NA. + try: + return math.isnan(x) + except TypeError: + return False + + if left.dtype == 'object': + left_na = left.apply(convert) + else: + left_na = left.isna() + if right.dtype == 'object': + right_na = right.apply(convert) + else: + right_na = right.isna() tm.assert_series_equal(left_na, right_na) return tm.assert_series_equal(left[~left_na], diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index 778432376e092..2b1bfecdf8f28 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -139,7 +139,11 @@ def test_from_dtype(self, data): class TestReshaping(BaseJSON, base.BaseReshapingTests): - pass + @pytest.mark.xfail(reason="dict for NA", strict=True) + def test_unstack(self, data, index): + # The base test has NaN for the expected NA value. + # this matches otherwise + return super().test_unstack(data, index) class TestGetitem(BaseJSON, base.BaseGetitemTests): diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index ed3cc39052183..54511df4effad 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -277,8 +277,6 @@ def test_unstack_fill_frame_timedelta(self): index=['x', 'y', 'z']) assert_frame_equal(result, expected) - @pytest.mark.xfail(reason="GH-23077", - strict=True) def test_unstack_fill_frame_period(self): # Test unstacking with period @@ -305,7 +303,8 @@ def test_unstack_fill_frame_categorical(self): # Test unstacking with categorical data = pd.Series(['a', 'b', 'c', 'a'], dtype='category') data.index = pd.MultiIndex.from_tuples( - [('x', 'a'), ('x', 'b'), ('y', 'b'), ('z', 'a')]) + [('x', 'a'), ('x', 'b'), ('y', 'b'), ('z', 'a')], + ) # By default missing values will be NaN result = data.unstack() @@ -316,9 +315,10 @@ def test_unstack_fill_frame_categorical(self): index=list('xyz')) assert_frame_equal(result, expected) - # Fill with non-category results in NaN entries similar to above - result = data.unstack(fill_value='d') - assert_frame_equal(result, expected) + # Fill with non-category results in a TypeError + msg = r"'fill_value' \('d'\) is not in" + with tm.assert_raises_regex(TypeError, msg): + data.unstack(fill_value='d') # Fill with category value replaces missing values as expected result = data.unstack(fill_value='c') @@ -874,6 +874,21 @@ def test_stack_preserve_categorical_dtype(self, ordered, labels): tm.assert_series_equal(result, expected) + @pytest.mark.parametrize('level', [0, 1]) + def test_unstack_mixed_extension_types(self, level): + index = pd.MultiIndex.from_tuples([('A', 0), ('A', 1), ('B', 1)], + names=['a', 'b']) + df = pd.DataFrame({"A": pd.core.arrays.integer_array([0, 1, None]), + "B": pd.Categorical(['a', 'a', 'b'])}, index=index) + + result = df.unstack(level=level) + expected = df.astype(object).unstack(level=level) + + expected_dtypes = pd.Series([df.A.dtype] * 2 + [df.B.dtype] * 2, + index=result.columns) + tm.assert_series_equal(result.dtypes, expected_dtypes) + tm.assert_frame_equal(result.astype(object), expected) + @pytest.mark.parametrize("level", [0, 'baz']) def test_unstack_swaplevel_sortlevel(self, level): # GH 20994 diff --git a/pandas/tests/sparse/test_pivot.py b/pandas/tests/sparse/test_pivot.py index e7eba63e4e0b3..0e71048f51177 100644 --- a/pandas/tests/sparse/test_pivot.py +++ b/pandas/tests/sparse/test_pivot.py @@ -47,4 +47,5 @@ def test_pivot_table_multi(self): values=['D', 'E']) res_dense = pd.pivot_table(self.dense, index='A', columns='B', values=['D', 'E']) + res_dense = res_dense.apply(lambda x: x.astype("Sparse[float64]")) tm.assert_frame_equal(res_sparse, res_dense) From 13f41ae2c8c2db4e3f2d0640ae6d4967663a54a8 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Wed, 7 Nov 2018 16:08:12 -0800 Subject: [PATCH 064/122] BUG: Fix error message for invalid HTML flavor (#23550) The flavors were not rendering properly in the string formatting. Closes gh-23549. Follow-up to gh-17660. --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/io/html.py | 3 ++- pandas/tests/io/test_html.py | 9 ++++++--- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 71d3b923305ea..7314d23bfaa3e 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1294,6 +1294,7 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form - Bug in :func:`to_string()` that caused representations of :class:`DataFrame` to not take up the whole window (:issue:`22984`) - Bug in :func:`to_string()` that caused representations of :class:`DataFrame` to not take up the whole window (:issue:`22984`) - Bug in :meth:`read_csv()` in which :class:`MultiIndex` index names were being improperly handled in the cases when they were not provided (:issue:`23484`) +- Bug in :meth:`read_html()` in which the error message was not displaying the valid flavors when an invalid one was provided (:issue:`23549`) Plotting ^^^^^^^^ diff --git a/pandas/io/html.py b/pandas/io/html.py index 4f887b69646ee..bcbb07c6dddfb 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -854,7 +854,8 @@ def _parser_dispatch(flavor): def _print_as_set(s): - return '{{arg}}'.format(arg=', '.join(pprint_thing(el) for el in s)) + return ('{' + '{arg}'.format(arg=', '.join( + pprint_thing(el) for el in s)) + '}') def _validate_flavor(flavor): diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index b748e9aa5ef5b..fea3c23121ab2 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -61,9 +61,12 @@ def test_bs4_version_fails(monkeypatch, datapath): def test_invalid_flavor(): - url = 'google.com' - with pytest.raises(ValueError): - read_html(url, 'google', flavor='not a* valid**++ flaver') + url = "google.com" + flavor = "invalid flavor" + msg = r"\{" + flavor + r"\} is not a valid set of flavors" + + with tm.assert_raises_regex(ValueError, msg): + read_html(url, "google", flavor=flavor) @td.skip_if_no('bs4') From 6d4178ad372f3a9dfb60e1c588c8a600341fa6e3 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 8 Nov 2018 04:38:09 -0800 Subject: [PATCH 065/122] remove uses of (ts)?lib.(NaT|iNaT|Timestamp) (#23562) --- pandas/core/generic.py | 10 +- pandas/core/indexes/period.py | 8 +- pandas/tests/dtypes/test_inference.py | 4 +- pandas/tests/frame/test_constructors.py | 2 +- pandas/tests/indexes/datetimes/test_tools.py | 18 +-- pandas/tests/indexes/period/test_ops.py | 46 +++--- pandas/tests/scalar/period/test_period.py | 140 +++++++++--------- pandas/tests/series/indexing/test_datetime.py | 4 +- pandas/tests/series/test_dtypes.py | 7 +- pandas/tests/series/test_internals.py | 34 ++--- pandas/tests/series/test_replace.py | 4 +- pandas/tests/tslibs/test_array_to_datetime.py | 10 +- 12 files changed, 138 insertions(+), 149 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 396b092a286c1..a6224478070ec 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10,7 +10,7 @@ import numpy as np import pandas as pd -from pandas._libs import tslib, properties +from pandas._libs import properties, Timestamp, iNaT from pandas.core.dtypes.common import ( ensure_int64, ensure_object, @@ -9273,9 +9273,9 @@ def describe_categorical_1d(data): tz = data.dt.tz asint = data.dropna().values.view('i8') names += ['top', 'freq', 'first', 'last'] - result += [tslib.Timestamp(top, tz=tz), freq, - tslib.Timestamp(asint.min(), tz=tz), - tslib.Timestamp(asint.max(), tz=tz)] + result += [Timestamp(top, tz=tz), freq, + Timestamp(asint.min(), tz=tz), + Timestamp(asint.max(), tz=tz)] else: names += ['top', 'freq'] result += [top, freq] @@ -10613,7 +10613,7 @@ def cum_func(self, axis=None, skipna=True, *args, **kwargs): issubclass(y.dtype.type, (np.datetime64, np.timedelta64))): result = accum_func(y, axis) mask = isna(self) - np.putmask(result, mask, tslib.iNaT) + np.putmask(result, mask, iNaT) elif skipna and not issubclass(y.dtype.type, (np.integer, np.bool_)): mask = isna(self) np.putmask(y, mask, mask_a) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 21e84629b4d3b..128068959ebd3 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -22,11 +22,11 @@ ) from pandas.core.tools.datetimes import parse_time_string, DateParseError -from pandas._libs import tslib, index as libindex +from pandas._libs import index as libindex from pandas._libs.tslibs.period import (Period, IncompatibleFrequency, DIFFERENT_FREQ_INDEX) -from pandas._libs.tslibs import resolution +from pandas._libs.tslibs import resolution, NaT, iNaT from pandas.core.algorithms import unique1d import pandas.core.arrays.datetimelike as dtl @@ -336,7 +336,7 @@ def _box_func(self): # places outside of indexes/period.py are calling this _box_func, # but passing data that's already boxed. def func(x): - if isinstance(x, Period) or x is tslib.NaT: + if isinstance(x, Period) or x is NaT: return x else: return Period._from_ordinal(ordinal=x, freq=self.freq) @@ -726,7 +726,7 @@ def get_loc(self, key, method=None, tolerance=None): raise KeyError(key) try: - ordinal = tslib.iNaT if key is tslib.NaT else key.ordinal + ordinal = iNaT if key is NaT else key.ordinal if tolerance is not None: tolerance = self._convert_tolerance(tolerance, np.asarray(key)) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index c5911da1666d2..e37efce901cbd 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -15,7 +15,7 @@ import pytest import pandas as pd -from pandas._libs import tslib, lib, missing as libmissing +from pandas._libs import lib, iNaT, missing as libmissing from pandas import (Series, Index, DataFrame, Timedelta, DatetimeIndex, TimedeltaIndex, Timestamp, Panel, Period, Categorical, isna, Interval, @@ -1263,7 +1263,7 @@ def test_nan_to_nat_conversions(): })) df.iloc[3:6, :] = np.nan result = df.loc[4, 'B'].value - assert (result == tslib.iNaT) + assert (result == iNaT) s = df['B'].copy() s._data = s._data.setitem(indexer=tuple([slice(8, 9)]), value=np.nan) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index dbce4c88aefd7..442ce27a730a6 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -711,7 +711,7 @@ def test_constructor_maskedarray_nonfloat(self): assert 1 == frame['A'][1] assert 2 == frame['C'][2] - # masked np.datetime64 stays (use lib.NaT as null) + # masked np.datetime64 stays (use NaT as null) mat = ma.masked_all((2, 3), dtype='M8[ns]') # 2-D input frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2]) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index ba18f9b34574d..8c66b68c94946 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -13,7 +13,7 @@ import pytz from pandas._libs import tslib -from pandas._libs.tslibs import parsing +from pandas._libs.tslibs import iNaT, parsing from pandas.compat import PY3, lmap from pandas.errors import OutOfBoundsDatetime import pandas.util._test_decorators as td @@ -652,7 +652,7 @@ def test_unit(self, cache): with pytest.raises(ValueError): to_datetime([1], unit='D', format='%Y%m%d', cache=cache) - values = [11111111, 1, 1.0, tslib.iNaT, NaT, np.nan, + values = [11111111, 1, 1.0, iNaT, NaT, np.nan, 'NaT', ''] result = to_datetime(values, unit='D', errors='ignore', cache=cache) expected = Index([11111111, Timestamp('1970-01-02'), @@ -669,7 +669,7 @@ def test_unit(self, cache): with pytest.raises(tslib.OutOfBoundsDatetime): to_datetime(values, unit='D', errors='raise', cache=cache) - values = [1420043460000, tslib.iNaT, NaT, np.nan, 'NaT'] + values = [1420043460000, iNaT, NaT, np.nan, 'NaT'] result = to_datetime(values, errors='ignore', unit='s', cache=cache) expected = Index([1420043460000, NaT, NaT, @@ -1104,7 +1104,7 @@ def test_string_na_nat_conversion(self, cache): expected = np.empty(4, dtype='M8[ns]') for i, val in enumerate(strings): if isna(val): - expected[i] = tslib.iNaT + expected[i] = iNaT else: expected[i] = parse_date(val) @@ -1145,7 +1145,7 @@ def test_string_na_nat_conversion(self, cache): for i in range(5): x = series[i] if isna(x): - expected[i] = tslib.iNaT + expected[i] = iNaT else: expected[i] = to_datetime(x, cache=cache) @@ -1420,10 +1420,10 @@ def test_parsers_nat(self): result2 = to_datetime('NaT') result3 = Timestamp('NaT') result4 = DatetimeIndex(['NaT'])[0] - assert result1 is tslib.NaT - assert result2 is tslib.NaT - assert result3 is tslib.NaT - assert result4 is tslib.NaT + assert result1 is NaT + assert result2 is NaT + assert result3 is NaT + assert result4 is NaT @pytest.mark.parametrize('cache', [True, False]) def test_parsers_dayfirst_yearfirst(self, cache): diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index 516eb8971abaf..ede5256db2f1d 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -2,10 +2,8 @@ import numpy as np import pytest -import pandas._libs.tslib as tslib - import pandas as pd -from pandas import DatetimeIndex, Index, Period, PeriodIndex, Series +from pandas import DatetimeIndex, Index, NaT, Period, PeriodIndex, Series from pandas.core.arrays import PeriodArray from pandas.tests.test_base import Ops import pandas.util.testing as tm @@ -29,13 +27,13 @@ def test_ops_properties(self): def test_minmax(self): # monotonic - idx1 = pd.PeriodIndex([pd.NaT, '2011-01-01', '2011-01-02', + idx1 = pd.PeriodIndex([NaT, '2011-01-01', '2011-01-02', '2011-01-03'], freq='D') assert idx1.is_monotonic # non-monotonic - idx2 = pd.PeriodIndex(['2011-01-01', pd.NaT, '2011-01-03', - '2011-01-02', pd.NaT], freq='D') + idx2 = pd.PeriodIndex(['2011-01-01', NaT, '2011-01-03', + '2011-01-02', NaT], freq='D') assert not idx2.is_monotonic for idx in [idx1, idx2]: @@ -50,15 +48,15 @@ def test_minmax(self): # Return NaT obj = PeriodIndex([], freq='M') result = getattr(obj, op)() - assert result is tslib.NaT + assert result is NaT - obj = PeriodIndex([pd.NaT], freq='M') + obj = PeriodIndex([NaT], freq='M') result = getattr(obj, op)() - assert result is tslib.NaT + assert result is NaT - obj = PeriodIndex([pd.NaT, pd.NaT, pd.NaT], freq='M') + obj = PeriodIndex([NaT, NaT, NaT], freq='M') result = getattr(obj, op)() - assert result is tslib.NaT + assert result is NaT def test_numpy_minmax(self): pr = pd.period_range(start='2016-01-15', end='2016-01-20') @@ -113,7 +111,7 @@ def test_value_counts_unique(self): idx = PeriodIndex(['2013-01-01 09:00', '2013-01-01 09:00', '2013-01-01 09:00', '2013-01-01 08:00', - '2013-01-01 08:00', pd.NaT], freq='H') + '2013-01-01 08:00', NaT], freq='H') exp_idx = PeriodIndex(['2013-01-01 09:00', '2013-01-01 08:00'], freq='H') @@ -123,7 +121,7 @@ def test_value_counts_unique(self): tm.assert_series_equal(obj.value_counts(), expected) exp_idx = PeriodIndex(['2013-01-01 09:00', '2013-01-01 08:00', - pd.NaT], freq='H') + NaT], freq='H') expected = Series([3, 2, 1], index=exp_idx) for obj in [idx, Series(idx)]: @@ -284,9 +282,9 @@ def test_order(self): '2011-01-03', '2011-01-05'], freq='D', name='idx2') - idx3 = PeriodIndex([pd.NaT, '2011-01-03', '2011-01-05', - '2011-01-02', pd.NaT], freq='D', name='idx3') - exp3 = PeriodIndex([pd.NaT, pd.NaT, '2011-01-02', '2011-01-03', + idx3 = PeriodIndex([NaT, '2011-01-03', '2011-01-05', + '2011-01-02', NaT], freq='D', name='idx3') + exp3 = PeriodIndex([NaT, NaT, '2011-01-02', '2011-01-03', '2011-01-05'], freq='D', name='idx3') for idx, expected in [(idx1, exp1), (idx2, exp2), (idx3, exp3)]: @@ -338,8 +336,8 @@ def test_repeat(self): tm.assert_index_equal(res, exp) def test_nat(self): - assert pd.PeriodIndex._na_value is pd.NaT - assert pd.PeriodIndex([], freq='M')._na_value is pd.NaT + assert pd.PeriodIndex._na_value is NaT + assert pd.PeriodIndex([], freq='M')._na_value is NaT idx = pd.PeriodIndex(['2011-01-01', '2011-01-02'], freq='D') assert idx._can_hold_na @@ -460,10 +458,10 @@ def test_pi_comp_period_nat(self): f = lambda x: pd.Period('2011-03', freq='M') == x self._check(idx, f, exp) - f = lambda x: x == tslib.NaT + f = lambda x: x == NaT exp = np.array([False, False, False, False], dtype=np.bool) self._check(idx, f, exp) - f = lambda x: tslib.NaT == x + f = lambda x: NaT == x self._check(idx, f, exp) f = lambda x: x != pd.Period('2011-03', freq='M') @@ -472,10 +470,10 @@ def test_pi_comp_period_nat(self): f = lambda x: pd.Period('2011-03', freq='M') != x self._check(idx, f, exp) - f = lambda x: x != tslib.NaT + f = lambda x: x != NaT exp = np.array([True, True, True, True], dtype=np.bool) self._check(idx, f, exp) - f = lambda x: tslib.NaT != x + f = lambda x: NaT != x self._check(idx, f, exp) f = lambda x: pd.Period('2011-03', freq='M') >= x @@ -486,11 +484,11 @@ def test_pi_comp_period_nat(self): exp = np.array([True, False, False, False], dtype=np.bool) self._check(idx, f, exp) - f = lambda x: x > tslib.NaT + f = lambda x: x > NaT exp = np.array([False, False, False, False], dtype=np.bool) self._check(idx, f, exp) - f = lambda x: tslib.NaT >= x + f = lambda x: NaT >= x exp = np.array([False, False, False, False], dtype=np.bool) self._check(idx, f, exp) diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index 7171b15acbfa1..66e8541d2c911 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -5,17 +5,15 @@ from datetime import datetime, date, timedelta import pandas as pd -from pandas import Timedelta +from pandas import Timedelta, NaT, Period, Timestamp, offsets import pandas.util.testing as tm import pandas.core.indexes.period as period from pandas.compat import text_type, iteritems from pandas.compat.numpy import np_datetime64_compat -from pandas._libs import tslib -from pandas._libs.tslibs import period as libperiod +from pandas._libs.tslibs import iNaT, period as libperiod from pandas._libs.tslibs.ccalendar import DAYS, MONTHS from pandas._libs.tslibs.parsing import DateParseError -from pandas import Period, Timestamp, offsets class TestPeriodProperties(object): @@ -91,32 +89,32 @@ def test_period_cons_weekly(self, num, day): assert isinstance(result, Period) def test_period_from_ordinal(self): - p = pd.Period('2011-01', freq='M') - res = pd.Period._from_ordinal(p.ordinal, freq='M') + p = Period('2011-01', freq='M') + res = Period._from_ordinal(p.ordinal, freq='M') assert p == res assert isinstance(res, Period) def test_period_cons_nat(self): p = Period('NaT', freq='M') - assert p is pd.NaT + assert p is NaT p = Period('nat', freq='W-SUN') - assert p is pd.NaT + assert p is NaT - p = Period(tslib.iNaT, freq='D') - assert p is pd.NaT + p = Period(iNaT, freq='D') + assert p is NaT - p = Period(tslib.iNaT, freq='3D') - assert p is pd.NaT + p = Period(iNaT, freq='3D') + assert p is NaT - p = Period(tslib.iNaT, freq='1D1H') - assert p is pd.NaT + p = Period(iNaT, freq='1D1H') + assert p is NaT p = Period('NaT') - assert p is pd.NaT + assert p is NaT - p = Period(tslib.iNaT) - assert p is pd.NaT + p = Period(iNaT) + assert p is NaT def test_period_cons_mult(self): p1 = Period('2011-01', freq='3M') @@ -283,12 +281,12 @@ def test_timestamp_tz_arg_dateutil_from_string(self): assert p.tz == dateutil_gettz('Europe/Brussels') def test_timestamp_mult(self): - p = pd.Period('2011-01', freq='M') + p = Period('2011-01', freq='M') assert p.to_timestamp(how='S') == Timestamp('2011-01-01') expected = Timestamp('2011-02-01') - Timedelta(1, 'ns') assert p.to_timestamp(how='E') == expected - p = pd.Period('2011-01', freq='3M') + p = Period('2011-01', freq='3M') assert p.to_timestamp(how='S') == Timestamp('2011-01-01') expected = Timestamp('2011-04-01') - Timedelta(1, 'ns') assert p.to_timestamp(how='E') == expected @@ -563,7 +561,7 @@ def test_repr(self): def test_repr_nat(self): p = Period('nat', freq='M') - assert repr(tslib.NaT) in repr(p) + assert repr(NaT) in repr(p) def test_millisecond_repr(self): p = Period('2000-01-01 12:15:02.123') @@ -865,7 +863,7 @@ def test_constructor_corner(self): pytest.raises(ValueError, Period, 1.6, freq='D') pytest.raises(ValueError, Period, ordinal=1.6, freq='D') pytest.raises(ValueError, Period, ordinal=2, value=1, freq='D') - assert Period(None) is pd.NaT + assert Period(None) is NaT pytest.raises(ValueError, Period, month=1) p = Period('2007-01-01', freq='D') @@ -1002,8 +1000,8 @@ def test_period_nat_comp(self): p_nat = Period('NaT', freq='D') p = Period('2011-01-01', freq='D') - nat = pd.Timestamp('NaT') - t = pd.Timestamp('2011-01-01') + nat = Timestamp('NaT') + t = Timestamp('2011-01-01') # confirm Period('NaT') work identical with Timestamp('NaT') for left, right in [(p_nat, p), (p, p_nat), (p_nat, p_nat), (nat, t), (t, nat), (nat, nat)]: @@ -1025,13 +1023,13 @@ def test_add(self): assert 1 + dt1 == dt2 def test_add_pdnat(self): - p = pd.Period('2011-01', freq='M') - assert p + pd.NaT is pd.NaT - assert pd.NaT + p is pd.NaT + p = Period('2011-01', freq='M') + assert p + NaT is NaT + assert NaT + p is NaT - p = pd.Period('NaT', freq='M') - assert p + pd.NaT is pd.NaT - assert pd.NaT + p is pd.NaT + p = Period('NaT', freq='M') + assert p + NaT is NaT + assert NaT + p is NaT def test_add_raises(self): # GH 4731 @@ -1054,9 +1052,9 @@ def test_add_raises(self): @pytest.mark.parametrize('lbox', boxes, ids=ids) @pytest.mark.parametrize('rbox', boxes, ids=ids) def test_add_timestamp_raises(self, rbox, lbox): - # GH # 17983 - ts = pd.Timestamp('2017') - per = pd.Period('2017', freq='M') + # GH#17983 + ts = Timestamp('2017') + per = Period('2017', freq='M') # We may get a different message depending on which class raises # the error. @@ -1082,7 +1080,7 @@ def test_sub(self): msg = r"Input has different freq=M from Period\(freq=D\)" with tm.assert_raises_regex(period.IncompatibleFrequency, msg): - dt1 - pd.Period('2011-02', freq='M') + dt1 - Period('2011-02', freq='M') def test_add_offset(self): # freq is DateOffset @@ -1218,41 +1216,41 @@ def test_add_offset_nat(self): for freq in ['A', '2A', '3A']: p = Period('NaT', freq=freq) for o in [offsets.YearEnd(2)]: - assert p + o is tslib.NaT - assert o + p is tslib.NaT + assert p + o is NaT + assert o + p is NaT for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(365, 'D'), timedelta(365)]: - assert p + o is tslib.NaT + assert p + o is NaT if isinstance(o, np.timedelta64): with pytest.raises(TypeError): o + p else: - assert o + p is tslib.NaT + assert o + p is NaT for freq in ['M', '2M', '3M']: p = Period('NaT', freq=freq) for o in [offsets.MonthEnd(2), offsets.MonthEnd(12)]: - assert p + o is tslib.NaT + assert p + o is NaT if isinstance(o, np.timedelta64): with pytest.raises(TypeError): o + p else: - assert o + p is tslib.NaT + assert o + p is NaT for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(365, 'D'), timedelta(365)]: - assert p + o is tslib.NaT + assert p + o is NaT if isinstance(o, np.timedelta64): with pytest.raises(TypeError): o + p else: - assert o + p is tslib.NaT + assert o + p is NaT # freq is Tick for freq in ['D', '2D', '3D']: @@ -1260,55 +1258,55 @@ def test_add_offset_nat(self): for o in [offsets.Day(5), offsets.Hour(24), np.timedelta64(2, 'D'), np.timedelta64(3600 * 24, 's'), timedelta(-2), timedelta(hours=48)]: - assert p + o is tslib.NaT + assert p + o is NaT if isinstance(o, np.timedelta64): with pytest.raises(TypeError): o + p else: - assert o + p is tslib.NaT + assert o + p is NaT for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(4, 'h'), timedelta(hours=23)]: - assert p + o is tslib.NaT + assert p + o is NaT if isinstance(o, np.timedelta64): with pytest.raises(TypeError): o + p else: - assert o + p is tslib.NaT + assert o + p is NaT for freq in ['H', '2H', '3H']: p = Period('NaT', freq=freq) for o in [offsets.Day(2), offsets.Hour(3), np.timedelta64(3, 'h'), np.timedelta64(3600, 's'), timedelta(minutes=120), timedelta(days=4, minutes=180)]: - assert p + o is tslib.NaT + assert p + o is NaT if not isinstance(o, np.timedelta64): - assert o + p is tslib.NaT + assert o + p is NaT for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(3200, 's'), timedelta(hours=23, minutes=30)]: - assert p + o is tslib.NaT + assert p + o is NaT if isinstance(o, np.timedelta64): with pytest.raises(TypeError): o + p else: - assert o + p is tslib.NaT + assert o + p is NaT def test_sub_pdnat(self): # GH 13071 - p = pd.Period('2011-01', freq='M') - assert p - pd.NaT is pd.NaT - assert pd.NaT - p is pd.NaT + p = Period('2011-01', freq='M') + assert p - NaT is NaT + assert NaT - p is NaT - p = pd.Period('NaT', freq='M') - assert p - pd.NaT is pd.NaT - assert pd.NaT - p is pd.NaT + p = Period('NaT', freq='M') + assert p - NaT is NaT + assert NaT - p is NaT def test_sub_offset(self): # freq is DateOffset @@ -1375,22 +1373,22 @@ def test_sub_offset_nat(self): for freq in ['A', '2A', '3A']: p = Period('NaT', freq=freq) for o in [offsets.YearEnd(2)]: - assert p - o is tslib.NaT + assert p - o is NaT for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(365, 'D'), timedelta(365)]: - assert p - o is tslib.NaT + assert p - o is NaT for freq in ['M', '2M', '3M']: p = Period('NaT', freq=freq) for o in [offsets.MonthEnd(2), offsets.MonthEnd(12)]: - assert p - o is tslib.NaT + assert p - o is NaT for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(365, 'D'), timedelta(365)]: - assert p - o is tslib.NaT + assert p - o is NaT # freq is Tick for freq in ['D', '2D', '3D']: @@ -1398,42 +1396,42 @@ def test_sub_offset_nat(self): for o in [offsets.Day(5), offsets.Hour(24), np.timedelta64(2, 'D'), np.timedelta64(3600 * 24, 's'), timedelta(-2), timedelta(hours=48)]: - assert p - o is tslib.NaT + assert p - o is NaT for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(4, 'h'), timedelta(hours=23)]: - assert p - o is tslib.NaT + assert p - o is NaT for freq in ['H', '2H', '3H']: p = Period('NaT', freq=freq) for o in [offsets.Day(2), offsets.Hour(3), np.timedelta64(3, 'h'), np.timedelta64(3600, 's'), timedelta(minutes=120), timedelta(days=4, minutes=180)]: - assert p - o is tslib.NaT + assert p - o is NaT for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(3200, 's'), timedelta(hours=23, minutes=30)]: - assert p - o is tslib.NaT + assert p - o is NaT @pytest.mark.parametrize('freq', ['M', '2M', '3M']) def test_nat_ops(self, freq): p = Period('NaT', freq=freq) - assert p + 1 is tslib.NaT - assert 1 + p is tslib.NaT - assert p - 1 is tslib.NaT - assert p - Period('2011-01', freq=freq) is tslib.NaT - assert Period('2011-01', freq=freq) - p is tslib.NaT + assert p + 1 is NaT + assert 1 + p is NaT + assert p - 1 is NaT + assert p - Period('2011-01', freq=freq) is NaT + assert Period('2011-01', freq=freq) - p is NaT def test_period_ops_offset(self): p = Period('2011-04-01', freq='D') result = p + offsets.Day() - exp = pd.Period('2011-04-02', freq='D') + exp = Period('2011-04-02', freq='D') assert result == exp result = p - offsets.Day(2) - exp = pd.Period('2011-03-30', freq='D') + exp = Period('2011-03-30', freq='D') assert result == exp msg = r"Input cannot be converted to Period\(freq=D\)" @@ -1446,7 +1444,7 @@ def test_period_ops_offset(self): def test_period_immutable(): # see gh-17116 - per = pd.Period('2014Q1') + per = Period('2014Q1') with pytest.raises(AttributeError): per.ordinal = 14 diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py index a0da25c96caa6..cdcc423e3410c 100644 --- a/pandas/tests/series/indexing/test_datetime.py +++ b/pandas/tests/series/indexing/test_datetime.py @@ -6,7 +6,7 @@ import numpy as np import pytest -from pandas._libs import tslib +from pandas._libs import iNaT import pandas._libs.index as _index from pandas.compat import lrange, range @@ -459,7 +459,7 @@ def test_index_unique(dups): tm.assert_index_equal(result, expected) # NaT, note this is excluded - arr = [1370745748 + t for t in range(20)] + [tslib.iNaT] + arr = [1370745748 + t for t in range(20)] + [iNaT] idx = DatetimeIndex(arr * 3) tm.assert_index_equal(idx.unique(), DatetimeIndex(arr)) assert idx.nunique() == 20 diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index a3aaabb70ae8c..75017f2d22794 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -6,10 +6,9 @@ import sys import numpy as np -from numpy import nan import pytest -import pandas._libs.tslib as tslib +from pandas._libs.tslibs import iNaT import pandas.compat as compat from pandas.compat import lrange, range, u @@ -85,7 +84,7 @@ def test_astype_cast_object_int(self): tm.assert_series_equal(result, Series(np.arange(1, 5))) def test_astype_datetime(self): - s = Series(tslib.iNaT, dtype='M8[ns]', index=lrange(5)) + s = Series(iNaT, dtype='M8[ns]', index=lrange(5)) s = s.astype('O') assert s.dtype == np.object_ @@ -137,7 +136,7 @@ def test_astype_datetime64tz(self): tm.rands(1000)]), Series([string.digits * 10, tm.rands(63), - tm.rands(64), nan, 1.0])]) + tm.rands(64), np.nan, 1.0])]) def test_astype_str_map(self, dtype, series): # see gh-4405 result = series.astype(dtype) diff --git a/pandas/tests/series/test_internals.py b/pandas/tests/series/test_internals.py index 8e3b0d19447a1..21094c0079d41 100644 --- a/pandas/tests/series/test_internals.py +++ b/pandas/tests/series/test_internals.py @@ -4,14 +4,10 @@ from datetime import datetime import numpy as np -from numpy import nan import pytest -import pandas._libs.lib as lib - import pandas as pd -from pandas import Series -from pandas.core.indexes.datetimes import Timestamp +from pandas import NaT, Series, Timestamp import pandas.util.testing as tm from pandas.util.testing import assert_series_equal @@ -87,7 +83,7 @@ def test_convert_objects(self): expected = Series([Timestamp('20010101'), Timestamp('20010102'), Timestamp('20010103'), - lib.NaT, lib.NaT, lib.NaT, Timestamp('20010104'), + NaT, NaT, NaT, Timestamp('20010104'), Timestamp('20010105')], dtype='M8[ns]') with tm.assert_produces_warning(FutureWarning): result = s2.convert_objects(convert_dates='coerce', @@ -103,7 +99,7 @@ def test_convert_objects(self): with tm.assert_produces_warning(FutureWarning): result = s.convert_objects(convert_dates='coerce', convert_numeric=False) - expected = Series([lib.NaT] * 2 + [Timestamp(1)] * 2) + expected = Series([NaT] * 2 + [Timestamp(1)] * 2) assert_series_equal(result, expected) # preserver if non-object @@ -149,14 +145,14 @@ def test_convert(self): # Test coercion returns correct type s = Series(['a', 'b', 'c']) results = s._convert(datetime=True, coerce=True) - expected = Series([lib.NaT] * 3) + expected = Series([NaT] * 3) assert_series_equal(results, expected) results = s._convert(numeric=True, coerce=True) expected = Series([np.nan] * 3) assert_series_equal(results, expected) - expected = Series([lib.NaT] * 3, dtype=np.dtype('m8[ns]')) + expected = Series([NaT] * 3, dtype=np.dtype('m8[ns]')) results = s._convert(timedelta=True, coerce=True) assert_series_equal(results, expected) @@ -166,15 +162,15 @@ def test_convert(self): # Test coercion with mixed types s = Series(['a', '3.1415', dt, td]) results = s._convert(datetime=True, coerce=True) - expected = Series([lib.NaT, lib.NaT, dt, lib.NaT]) + expected = Series([NaT, NaT, dt, NaT]) assert_series_equal(results, expected) results = s._convert(numeric=True, coerce=True) - expected = Series([nan, 3.1415, nan, nan]) + expected = Series([np.nan, 3.1415, np.nan, np.nan]) assert_series_equal(results, expected) results = s._convert(timedelta=True, coerce=True) - expected = Series([lib.NaT, lib.NaT, lib.NaT, td], + expected = Series([NaT, NaT, NaT, td], dtype=np.dtype('m8[ns]')) assert_series_equal(results, expected) @@ -182,7 +178,7 @@ def test_convert(self): results = s._convert(datetime=True) assert_series_equal(results, s) results = s._convert(numeric=True) - expected = Series([nan, 3.1415, nan, nan]) + expected = Series([np.nan, 3.1415, np.nan, np.nan]) assert_series_equal(results, expected) results = s._convert(timedelta=True) assert_series_equal(results, s) @@ -231,13 +227,13 @@ def test_convert(self): r['a'] = 'garbled' result = r._convert(numeric=True) expected = s.copy() - expected['a'] = nan + expected['a'] = np.nan assert_series_equal(result, expected) # GH 4119, not converting a mixed type (e.g.floats and object) s = Series([1, 'na', 3, 4]) result = s._convert(datetime=True, numeric=True) - expected = Series([1, nan, 3, 4]) + expected = Series([1, np.nan, 3, 4]) assert_series_equal(result, expected) s = Series([1, '', 3, 4]) @@ -260,7 +256,7 @@ def test_convert(self): assert_series_equal(result, expected) expected = Series([Timestamp('20010101'), Timestamp('20010102'), - Timestamp('20010103'), lib.NaT, lib.NaT, lib.NaT, + Timestamp('20010103'), NaT, NaT, NaT, Timestamp('20010104'), Timestamp('20010105')], dtype='M8[ns]') result = s2._convert(datetime=True, numeric=False, timedelta=False, @@ -271,7 +267,7 @@ def test_convert(self): s = Series(['foo', 'bar', 1, 1.0], dtype='O') result = s._convert(datetime=True, coerce=True) - expected = Series([lib.NaT] * 2 + [Timestamp(1)] * 2) + expected = Series([NaT] * 2 + [Timestamp(1)] * 2) assert_series_equal(result, expected) # preserver if non-object @@ -285,7 +281,7 @@ def test_convert(self): # assert result.dtype == 'M8[ns]' # dateutil parses some single letters into today's value as a date - expected = Series([lib.NaT]) + expected = Series([NaT]) for x in 'abcdefghijklmnopqrstuvwxyz': s = Series([x]) result = s._convert(datetime=True, coerce=True) @@ -321,4 +317,4 @@ def test_hasnans_unchached_for_series(): assert not hasattr(ser, '_cache') ser.iloc[-1] = np.nan assert ser.hasnans is True - assert pd.Series.hasnans.__doc__ == pd.Index.hasnans.__doc__ + assert Series.hasnans.__doc__ == pd.Index.hasnans.__doc__ diff --git a/pandas/tests/series/test_replace.py b/pandas/tests/series/test_replace.py index 67c75f43e030c..7efde1fbdd1f5 100644 --- a/pandas/tests/series/test_replace.py +++ b/pandas/tests/series/test_replace.py @@ -4,8 +4,6 @@ import numpy as np import pytest -import pandas._libs.lib as lib - import pandas as pd import pandas.util.testing as tm @@ -65,7 +63,7 @@ def test_replace(self): ser = pd.Series([np.nan, 0, np.inf]) tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) - ser = pd.Series([np.nan, 0, 'foo', 'bar', np.inf, None, lib.NaT]) + ser = pd.Series([np.nan, 0, 'foo', 'bar', np.inf, None, pd.NaT]) tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) filled = ser.copy() filled[4] = 0 diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index 915687304bfe2..f2d9f35256a10 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -6,7 +6,7 @@ import pytz from dateutil.tz.tz import tzoffset -from pandas._libs import tslib +from pandas._libs import iNaT, tslib from pandas.compat.numpy import np_array_datetime64_compat import pandas.util.testing as tm @@ -130,13 +130,13 @@ def test_coerce_outside_ns_bounds(self, invalid_date): tslib.array_to_datetime(arr, errors='raise') result, _ = tslib.array_to_datetime(arr, errors='coerce') - expected = np.array([tslib.iNaT], dtype='M8[ns]') + expected = np.array([iNaT], dtype='M8[ns]') tm.assert_numpy_array_equal(result, expected) def test_coerce_outside_ns_bounds_one_valid(self): arr = np.array(['1/1/1000', '1/1/2000'], dtype=object) result, _ = tslib.array_to_datetime(arr, errors='coerce') - expected = [tslib.iNaT, + expected = [iNaT, '2000-01-01T00:00:00.000000000-0000'] tm.assert_numpy_array_equal( result, @@ -153,8 +153,8 @@ def test_coerce_of_invalid_datetimes(self): # With coercing, the invalid dates becomes iNaT result, _ = tslib.array_to_datetime(arr, errors='coerce') expected = ['2013-01-01T00:00:00.000000000-0000', - tslib.iNaT, - tslib.iNaT] + iNaT, + iNaT] tm.assert_numpy_array_equal( result, From 66a95bc0d7114e3cab43f4ef919d7eb8a2075941 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Thu, 8 Nov 2018 13:47:42 +0100 Subject: [PATCH 066/122] BUG: raise if invalid freq is passed (#23546) --- pandas/_libs/tslibs/timestamps.pyx | 6 +++--- pandas/tests/plotting/test_converter.py | 4 ++-- pandas/tests/scalar/timestamp/test_timestamp.py | 5 +++++ 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 44133a1a63597..e2914957d01cd 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -733,11 +733,11 @@ class Timestamp(_Timestamp): if ts.value == NPY_NAT: return NaT - if is_string_object(freq): - freq = to_offset(freq) - elif not is_offset_object(freq): + if freq is None: # GH 22311: Try to extract the frequency of a given Timestamp input freq = getattr(ts_input, 'freq', None) + elif not is_offset_object(freq): + freq = to_offset(freq) return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq) diff --git a/pandas/tests/plotting/test_converter.py b/pandas/tests/plotting/test_converter.py index bb976a1e3e81c..eed3679c5bc8c 100644 --- a/pandas/tests/plotting/test_converter.py +++ b/pandas/tests/plotting/test_converter.py @@ -285,11 +285,11 @@ def _assert_less(ts1, ts2): _assert_less(ts, ts + Micro(50)) def test_convert_nested(self): - inner = [Timestamp('2017-01-01', Timestamp('2017-01-02'))] + inner = [Timestamp('2017-01-01'), Timestamp('2017-01-02')] data = [inner, inner] result = self.dtc.convert(data, None, None) expected = [self.dtc.convert(x, None, None) for x in data] - assert result == expected + assert (np.array(result) == expected).all() class TestPeriodConverter(object): diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index 9a77a9ccc96c3..7af0b281aeaa5 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -575,6 +575,11 @@ def test_construct_timestamp_preserve_original_frequency(self): expected = offsets.Day() assert result == expected + def test_constructor_invalid_frequency(self): + # GH 22311 + with tm.assert_raises_regex(ValueError, "Invalid frequency:"): + Timestamp('2012-01-01', freq=[]) + class TestTimestamp(object): From 3bb2f75e5c41f889fd1ad2cc66993951f07ad791 Mon Sep 17 00:00:00 2001 From: Jeremy Schendel Date: Thu, 8 Nov 2018 06:10:43 -0700 Subject: [PATCH 067/122] TST: Fix dtype mismatch on 32bit in IntervalTree get_indexer test (#23468) --- pandas/_libs/intervaltree.pxi.in | 7 ++- .../indexes/interval/test_interval_tree.py | 63 ++++++++++++------- 2 files changed, 46 insertions(+), 24 deletions(-) diff --git a/pandas/_libs/intervaltree.pxi.in b/pandas/_libs/intervaltree.pxi.in index 875848c00311f..f9427fbbcd900 100644 --- a/pandas/_libs/intervaltree.pxi.in +++ b/pandas/_libs/intervaltree.pxi.in @@ -105,7 +105,7 @@ cdef class IntervalTree(IntervalMixin): self.root.query(result, key) if not result.data.n: raise KeyError(key) - return result.to_array() + return result.to_array().astype('intp') def _get_partial_overlap(self, key_left, key_right, side): """Return all positions corresponding to intervals with the given side @@ -155,7 +155,7 @@ cdef class IntervalTree(IntervalMixin): raise KeyError( 'indexer does not intersect a unique set of intervals') old_len = result.data.n - return result.to_array() + return result.to_array().astype('intp') def get_indexer_non_unique(self, scalar_t[:] target): """Return the positions corresponding to intervals that overlap with @@ -175,7 +175,8 @@ cdef class IntervalTree(IntervalMixin): result.append(-1) missing.append(i) old_len = result.data.n - return result.to_array(), missing.to_array() + return (result.to_array().astype('intp'), + missing.to_array().astype('intp')) def __repr__(self): return (' Date: Thu, 8 Nov 2018 07:17:19 -0600 Subject: [PATCH 068/122] Preserve EA dtype in DataFrame.stack (#23285) --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/internals/blocks.py | 2 +- pandas/core/reshape/reshape.py | 62 ++++++++++++++++++++++-- pandas/tests/extension/base/reshaping.py | 22 +++++++++ pandas/tests/extension/json/test_json.py | 9 ++++ pandas/tests/frame/test_reshape.py | 11 +++++ pandas/tests/sparse/frame/test_frame.py | 10 ++++ 7 files changed, 111 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 7314d23bfaa3e..2e609461cd964 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -853,6 +853,7 @@ update the ``ExtensionDtype._metadata`` tuple to match the signature of your - Updated the ``.type`` attribute for ``PeriodDtype``, ``DatetimeTZDtype``, and ``IntervalDtype`` to be instances of the dtype (``Period``, ``Timestamp``, and ``Interval`` respectively) (:issue:`22938`) - :func:`ExtensionArray.isna` is allowed to return an ``ExtensionArray`` (:issue:`22325`). - Support for reduction operations such as ``sum``, ``mean`` via opt-in base class method override (:issue:`22762`) +- :meth:`DataFrame.stack` no longer converts to object dtype for DataFrames where each column has the same extension dtype. The output Series will have the same dtype as the columns (:issue:`23077`). - :meth:`Series.unstack` and :meth:`DataFrame.unstack` no longer convert extension arrays to object-dtype ndarrays. Each column in the output ``DataFrame`` will now have the same dtype as the input (:issue:`23077`). - Bug when grouping :meth:`Dataframe.groupby()` and aggregating on ``ExtensionArray`` it was not returning the actual ``ExtensionArray`` dtype (:issue:`23227`). diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 7a55b652054ed..1f2a1ee52159e 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -35,9 +35,9 @@ is_numeric_v_string_like, is_extension_type, is_extension_array_dtype, is_list_like, - is_sparse, is_re, is_re_compilable, + is_sparse, pandas_dtype) from pandas.core.dtypes.cast import ( maybe_downcast_to_dtype, diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 2dca7cf0e6aa3..065728fb239ae 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -494,8 +494,9 @@ def factorize(index): if is_extension_array_dtype(dtype): arr = dtype.construct_array_type() new_values = arr._concat_same_type([ - col for _, col in frame.iteritems() + col._values for _, col in frame.iteritems() ]) + new_values = _reorder_for_extension_array_stack(new_values, N, K) else: # homogeneous, non-EA new_values = frame.values.ravel() @@ -624,16 +625,32 @@ def _convert_level_number(level_num, columns): slice_len = loc.stop - loc.start if slice_len != levsize: - chunk = this.loc[:, this.columns[loc]] + chunk = this[this.columns[loc]] chunk.columns = level_vals.take(chunk.columns.labels[-1]) value_slice = chunk.reindex(columns=level_vals_used).values else: - if frame._is_mixed_type: - value_slice = this.loc[:, this.columns[loc]].values + if (frame._is_homogeneous_type and + is_extension_array_dtype(frame.dtypes.iloc[0])): + dtype = this[this.columns[loc]].dtypes.iloc[0] + subset = this[this.columns[loc]] + + value_slice = dtype.construct_array_type()._concat_same_type( + [x._values for _, x in subset.iteritems()] + ) + N, K = this.shape + idx = np.arange(N * K).reshape(K, N).T.ravel() + value_slice = value_slice.take(idx) + + elif frame._is_mixed_type: + value_slice = this[this.columns[loc]].values else: value_slice = this.values[:, loc] - new_data[key] = value_slice.ravel() + if value_slice.ndim > 1: + # i.e. not extension + value_slice = value_slice.ravel() + + new_data[key] = value_slice if len(drop_cols) > 0: new_columns = new_columns.difference(drop_cols) @@ -971,3 +988,38 @@ def make_axis_dummies(frame, axis='minor', transform=None): values = values.take(labels, axis=0) return DataFrame(values, columns=items, index=frame.index) + + +def _reorder_for_extension_array_stack(arr, n_rows, n_columns): + """ + Re-orders the values when stacking multiple extension-arrays. + + The indirect stacking method used for EAs requires a followup + take to get the order correct. + + Parameters + ---------- + arr : ExtensionArray + n_rows, n_columns : int + The number of rows and columns in the original DataFrame. + + Returns + ------- + taken : ExtensionArray + The original `arr` with elements re-ordered appropriately + + Examples + -------- + >>> arr = np.array(['a', 'b', 'c', 'd', 'e', 'f']) + >>> _reorder_for_extension_array_stack(arr, 2, 3) + array(['a', 'c', 'e', 'b', 'd', 'f'], dtype='>> _reorder_for_extension_array_stack(arr, 3, 2) + array(['a', 'd', 'b', 'e', 'c', 'f'], dtype=' specialized, as is done for period. + expected = expected.astype(object) + + if isinstance(expected, pd.Series): + assert result.dtype == df.iloc[:, 0].dtype + else: + assert all(result.dtypes == df.iloc[:, 0].dtype) + + result = result.astype(object) + self.assert_equal(result, expected) + @pytest.mark.parametrize("index", [ # Two levels, uniform. pd.MultiIndex.from_product(([['A', 'B'], ['a', 'b']]), diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index 2b1bfecdf8f28..b7c61496f0bf0 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -139,6 +139,15 @@ def test_from_dtype(self, data): class TestReshaping(BaseJSON, base.BaseReshapingTests): + + @pytest.mark.skip(reason="Different definitions of NA") + def test_stack(self): + """ + The test does .astype(object).stack(). If we happen to have + any missing values in `data`, then we'll end up with different + rows since we consider `{}` NA, but `.astype(object)` doesn't. + """ + @pytest.mark.xfail(reason="dict for NA", strict=True) def test_unstack(self, data, index): # The base test has NaN for the expected NA value. diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index 54511df4effad..ab3d6ca3b19f7 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -874,6 +874,17 @@ def test_stack_preserve_categorical_dtype(self, ordered, labels): tm.assert_series_equal(result, expected) + def test_stack_preserve_categorical_dtype_values(self): + # GH-23077 + cat = pd.Categorical(['a', 'a', 'b', 'c']) + df = pd.DataFrame({"A": cat, "B": cat}) + result = df.stack() + index = pd.MultiIndex.from_product([[0, 1, 2, 3], ['A', 'B']]) + expected = pd.Series(pd.Categorical(['a', 'a', 'a', 'a', + 'b', 'b', 'c', 'c']), + index=index) + tm.assert_series_equal(result, expected) + @pytest.mark.parametrize('level', [0, 1]) def test_unstack_mixed_extension_types(self, level): index = pd.MultiIndex.from_tuples([('A', 0), ('A', 1), ('B', 1)], diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py index 03143488c3874..10074a2e5ad99 100644 --- a/pandas/tests/sparse/frame/test_frame.py +++ b/pandas/tests/sparse/frame/test_frame.py @@ -736,6 +736,16 @@ def test_astype_bool(self): assert res['A'].dtype == SparseDtype(np.bool) assert res['B'].dtype == SparseDtype(np.bool) + def test_astype_object(self): + # This may change in GH-23125 + df = pd.DataFrame({"A": SparseArray([0, 1]), + "B": SparseArray([0, 1])}) + result = df.astype(object) + dtype = SparseDtype(object, 0) + expected = pd.DataFrame({"A": SparseArray([0, 1], dtype=dtype), + "B": SparseArray([0, 1], dtype=dtype)}) + tm.assert_frame_equal(result, expected) + def test_fillna(self, float_frame_fill0, float_frame_fill0_dense): df = float_frame_fill0.reindex(lrange(5)) dense = float_frame_fill0_dense.reindex(lrange(5)) From cf5e3851c512976997068857933da6655897036d Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 8 Nov 2018 07:29:17 -0600 Subject: [PATCH 069/122] CI: Auto-cancel redundant builds (#23523) --- .circleci/config.yml | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index cdfe93613fbdd..6e789d0aafdb4 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,10 +1,6 @@ version: 2 jobs: - - # -------------------------------------------------------------------------- - # 1. py36_locale - # -------------------------------------------------------------------------- - py36_locale: + build: docker: - image: continuumio/miniconda:latest # databases configuration @@ -34,9 +30,3 @@ jobs: - run: name: test command: ./ci/circle/run_circle.sh --skip-slow --skip-network - -workflows: - version: 2 - build_and_test: - jobs: - - py36_locale From 7c0b5d0129f2fa42dc9318bee5e293b678f1308f Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 8 Nov 2018 06:55:49 -0800 Subject: [PATCH 070/122] ENH: Support writing timestamps with timezones with to_sql (#22654) --- doc/source/io.rst | 30 ++++++++++++++++++++++ doc/source/whatsnew/v0.24.0.txt | 4 +++ pandas/core/generic.py | 9 +++++++ pandas/io/sql.py | 40 +++++++++++++++++++---------- pandas/tests/io/test_sql.py | 45 ++++++++++++++++++++++++++++++++- 5 files changed, 113 insertions(+), 15 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 68faefa872c88..9f458b58717d6 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -4806,6 +4806,36 @@ default ``Text`` type for string columns: Because of this, reading the database table back in does **not** generate a categorical. +.. _io.sql_datetime_data: + +Datetime data types +''''''''''''''''''' + +Using SQLAlchemy, :func:`~pandas.DataFrame.to_sql` is capable of writing +datetime data that is timezone naive or timezone aware. However, the resulting +data stored in the database ultimately depends on the supported data type +for datetime data of the database system being used. + +The following table lists supported data types for datetime data for some +common databases. Other database dialects may have different data types for +datetime data. + +=========== ============================================= =================== +Database SQL Datetime Types Timezone Support +=========== ============================================= =================== +SQLite ``TEXT`` No +MySQL ``TIMESTAMP`` or ``DATETIME`` No +PostgreSQL ``TIMESTAMP`` or ``TIMESTAMP WITH TIME ZONE`` Yes +=========== ============================================= =================== + +When writing timezone aware data to databases that do not support timezones, +the data will be written as timezone naive timestamps that are in local time +with respect to the timezone. + +:func:`~pandas.read_sql_table` is also capable of reading datetime data that is +timezone aware or naive. When reading ``TIMESTAMP WITH TIME ZONE`` types, pandas +will convert the data to UTC. + Reading Tables '''''''''''''' diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 2e609461cd964..fcbdb391ba83c 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -222,6 +222,7 @@ Other Enhancements - :class:`IntervalIndex` has gained the :meth:`~IntervalIndex.set_closed` method to change the existing ``closed`` value (:issue:`21670`) - :func:`~DataFrame.to_csv`, :func:`~Series.to_csv`, :func:`~DataFrame.to_json`, and :func:`~Series.to_json` now support ``compression='infer'`` to infer compression based on filename extension (:issue:`15008`). The default compression for ``to_csv``, ``to_json``, and ``to_pickle`` methods has been updated to ``'infer'`` (:issue:`22004`). +- :meth:`DataFrame.to_sql` now supports writing ``TIMESTAMP WITH TIME ZONE`` types for supported databases. For databases that don't support timezones, datetime data will be stored as timezone unaware local timestamps. See the :ref:`io.sql_datetime_data` for implications (:issue:`9086`). - :func:`to_timedelta` now supports iso-formated timedelta strings (:issue:`21877`) - :class:`Series` and :class:`DataFrame` now support :class:`Iterable` in constructor (:issue:`2193`) - :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`) @@ -1246,6 +1247,9 @@ MultiIndex I/O ^^^ +- Bug in :meth:`to_sql` when writing timezone aware data (``datetime64[ns, tz]`` dtype) would raise a ``TypeError`` (:issue:`9086`) +- Bug in :meth:`to_sql` where a naive DatetimeIndex would be written as ``TIMESTAMP WITH TIMEZONE`` type in supported databases, e.g. PostgreSQL (:issue:`23510`) + .. _whatsnew_0240.bug_fixes.nan_with_str_dtype: Proper handling of `np.NaN` in a string data-typed column with the Python engine diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a6224478070ec..4d292e956e96b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2397,6 +2397,15 @@ def to_sql(self, name, con, schema=None, if_exists='fail', index=True, -------- pandas.read_sql : read a DataFrame from a table + Notes + ----- + Timezone aware datetime columns will be written as + ``Timestamp with timezone`` type with SQLAlchemy if supported by the + database. Otherwise, the datetimes will be stored as timezone unaware + timestamps local to the original timezone. + + .. versionadded:: 0.24.0 + References ---------- .. [1] http://docs.sqlalchemy.org diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 00fbc35ed1e7d..2f411a956dfb8 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -592,12 +592,17 @@ def insert_data(self): data_list = [None] * ncols blocks = temp._data.blocks - for i in range(len(blocks)): - b = blocks[i] + for b in blocks: if b.is_datetime: - # convert to microsecond resolution so this yields - # datetime.datetime - d = b.values.astype('M8[us]').astype(object) + # return datetime.datetime objects + if b.is_datetimetz: + # GH 9086: Ensure we return datetimes with timezone info + # Need to return 2-D data; DatetimeIndex is 1D + d = b.values.to_pydatetime() + d = np.expand_dims(d, axis=0) + else: + # convert to microsecond resolution for datetime.datetime + d = b.values.astype('M8[us]').astype(object) else: d = np.array(b.get_values(), dtype=object) @@ -612,7 +617,7 @@ def insert_data(self): return column_names, data_list def _execute_insert(self, conn, keys, data_iter): - data = [{k: v for k, v in zip(keys, row)} for row in data_iter] + data = [dict(zip(keys, row)) for row in data_iter] conn.execute(self.insert_statement(), data) def insert(self, chunksize=None): @@ -741,8 +746,9 @@ def _get_column_names_and_types(self, dtype_mapper): def _create_table_setup(self): from sqlalchemy import Table, Column, PrimaryKeyConstraint - column_names_and_types = \ - self._get_column_names_and_types(self._sqlalchemy_type) + column_names_and_types = self._get_column_names_and_types( + self._sqlalchemy_type + ) columns = [Column(name, typ, index=is_index) for name, typ, is_index in column_names_and_types] @@ -841,14 +847,19 @@ def _sqlalchemy_type(self, col): from sqlalchemy.types import (BigInteger, Integer, Float, Text, Boolean, - DateTime, Date, Time) + DateTime, Date, Time, TIMESTAMP) if col_type == 'datetime64' or col_type == 'datetime': + # GH 9086: TIMESTAMP is the suggested type if the column contains + # timezone information try: - tz = col.tzinfo # noqa - return DateTime(timezone=True) + if col.dt.tz is not None: + return TIMESTAMP(timezone=True) except AttributeError: - return DateTime + # The column is actually a DatetimeIndex + if col.tz is not None: + return TIMESTAMP(timezone=True) + return DateTime if col_type == 'timedelta64': warnings.warn("the 'timedelta' type is not supported, and will be " "written as integer values (ns frequency) to the " @@ -1275,8 +1286,9 @@ def _create_table_setup(self): structure of a DataFrame. The first entry will be a CREATE TABLE statement while the rest will be CREATE INDEX statements. """ - column_names_and_types = \ - self._get_column_names_and_types(self._sql_type_name) + column_names_and_types = self._get_column_names_and_types( + self._sql_type_name + ) pat = re.compile(r'\s+') column_names = [col_name for col_name, _, _ in column_names_and_types] diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 237cc2936919e..777b04bbae97d 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -961,7 +961,8 @@ def test_sqlalchemy_type_mapping(self): utc=True)}) db = sql.SQLDatabase(self.conn) table = sql.SQLTable("test_type", db, frame=df) - assert isinstance(table.table.c['time'].type, sqltypes.DateTime) + # GH 9086: TIMESTAMP is the suggested type for datetimes with timezones + assert isinstance(table.table.c['time'].type, sqltypes.TIMESTAMP) def test_database_uri_string(self): @@ -1361,9 +1362,51 @@ def check(col): df = sql.read_sql_table("types_test_data", self.conn) check(df.DateColWithTz) + def test_datetime_with_timezone_roundtrip(self): + # GH 9086 + # Write datetimetz data to a db and read it back + # For dbs that support timestamps with timezones, should get back UTC + # otherwise naive data should be returned + expected = DataFrame({'A': date_range( + '2013-01-01 09:00:00', periods=3, tz='US/Pacific' + )}) + expected.to_sql('test_datetime_tz', self.conn, index=False) + + if self.flavor == 'postgresql': + # SQLAlchemy "timezones" (i.e. offsets) are coerced to UTC + expected['A'] = expected['A'].dt.tz_convert('UTC') + else: + # Otherwise, timestamps are returned as local, naive + expected['A'] = expected['A'].dt.tz_localize(None) + + result = sql.read_sql_table('test_datetime_tz', self.conn) + tm.assert_frame_equal(result, expected) + + result = sql.read_sql_query( + 'SELECT * FROM test_datetime_tz', self.conn + ) + if self.flavor == 'sqlite': + # read_sql_query does not return datetime type like read_sql_table + assert isinstance(result.loc[0, 'A'], string_types) + result['A'] = to_datetime(result['A']) + tm.assert_frame_equal(result, expected) + + def test_naive_datetimeindex_roundtrip(self): + # GH 23510 + # Ensure that a naive DatetimeIndex isn't converted to UTC + dates = date_range('2018-01-01', periods=5, freq='6H') + expected = DataFrame({'nums': range(5)}, index=dates) + expected.to_sql('foo_table', self.conn, index_label='info_date') + result = sql.read_sql_table('foo_table', self.conn, + index_col='info_date') + # result index with gain a name from a set_index operation; expected + tm.assert_frame_equal(result, expected, check_names=False) + def test_date_parsing(self): # No Parsing df = sql.read_sql_table("types_test_data", self.conn) + expected_type = object if self.flavor == 'sqlite' else np.datetime64 + assert issubclass(df.DateCol.dtype.type, expected_type) df = sql.read_sql_table("types_test_data", self.conn, parse_dates=['DateCol']) From db58d3dbe04b5a72e74e3220e8bb05989d375156 Mon Sep 17 00:00:00 2001 From: Patrick Park Date: Thu, 8 Nov 2018 07:08:40 -0800 Subject: [PATCH 071/122] DOC: Added note about groupby excluding Decimal columns by default (#18953) --- doc/source/groupby.rst | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index 17a723e2a2f42..755edba352f05 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -995,6 +995,33 @@ Note that ``df.groupby('A').colname.std().`` is more efficient than is only interesting over one column (here ``colname``), it may be filtered *before* applying the aggregation function. +.. note:: + Any object column, also if it contains numerical values such as ``Decimal`` + objects, is considered as a "nuisance" columns. They are excluded from + aggregate functions automatically in groupby. + + If you do wish to include decimal or object columns in an aggregation with + other non-nuisance data types, you must do so explicitly. + +.. ipython:: python + + from decimal import Decimal + df_dec = pd.DataFrame( + {'id': [1, 2, 1, 2], + 'int_column': [1, 2, 3, 4], + 'dec_column': [Decimal('0.50'), Decimal('0.15'), Decimal('0.25'), Decimal('0.40')] + } + ) + + # Decimal columns can be sum'd explicitly by themselves... + df_dec.groupby(['id'])[['dec_column']].sum() + + # ...but cannot be combined with standard data types or they will be excluded + df_dec.groupby(['id'])[['int_column', 'dec_column']].sum() + + # Use .agg function to aggregate over standard and "nuisance" data types at the same time + df_dec.groupby(['id']).agg({'int_column': 'sum', 'dec_column': 'sum'}) + .. _groupby.observed: Handling of (un)observed Categorical values From b62634f5885dec636bf2f18c23ecbc76d4a59151 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Thu, 8 Nov 2018 16:12:13 +0000 Subject: [PATCH 072/122] Fixes to make validate_docstrings.py not generate warnings or unwanted output (#23552) --- pandas/core/generic.py | 2 +- pandas/core/indexes/base.py | 14 +- pandas/core/panel.py | 8 +- pandas/core/strings.py | 7 - pandas/errors/__init__.py | 2 +- pandas/plotting/_misc.py | 4 +- scripts/tests/test_validate_docstrings.py | 167 +++++++++++----------- scripts/validate_docstrings.py | 23 ++- 8 files changed, 112 insertions(+), 115 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 4d292e956e96b..53cdc46fdd16b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5100,7 +5100,7 @@ def get_ftype_counts(self): 1 b 2 2.0 2 c 3 3.0 - >>> df.get_ftype_counts() + >>> df.get_ftype_counts() # doctest: +SKIP float64:dense 1 int64:dense 1 object:dense 1 diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index fcced091b3794..d0f190c82aec7 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1875,12 +1875,8 @@ def get_duplicates(self): Works on different Index of types. - >>> pd.Index([1, 2, 2, 3, 3, 3, 4]).get_duplicates() + >>> pd.Index([1, 2, 2, 3, 3, 3, 4]).get_duplicates() # doctest: +SKIP [2, 3] - >>> pd.Index([1., 2., 2., 3., 3., 3., 4.]).get_duplicates() - [2.0, 3.0] - >>> pd.Index(['a', 'b', 'b', 'c', 'c', 'c', 'd']).get_duplicates() - ['b', 'c'] Note that for a DatetimeIndex, it does not return a list but a new DatetimeIndex: @@ -1888,22 +1884,22 @@ def get_duplicates(self): >>> dates = pd.to_datetime(['2018-01-01', '2018-01-02', '2018-01-03', ... '2018-01-03', '2018-01-04', '2018-01-04'], ... format='%Y-%m-%d') - >>> pd.Index(dates).get_duplicates() + >>> pd.Index(dates).get_duplicates() # doctest: +SKIP DatetimeIndex(['2018-01-03', '2018-01-04'], dtype='datetime64[ns]', freq=None) Sorts duplicated elements even when indexes are unordered. - >>> pd.Index([1, 2, 3, 2, 3, 4, 3]).get_duplicates() + >>> pd.Index([1, 2, 3, 2, 3, 4, 3]).get_duplicates() # doctest: +SKIP [2, 3] Return empty array-like structure when all elements are unique. - >>> pd.Index([1, 2, 3, 4]).get_duplicates() + >>> pd.Index([1, 2, 3, 4]).get_duplicates() # doctest: +SKIP [] >>> dates = pd.to_datetime(['2018-01-01', '2018-01-02', '2018-01-03'], ... format='%Y-%m-%d') - >>> pd.Index(dates).get_duplicates() + >>> pd.Index(dates).get_duplicates() # doctest: +SKIP DatetimeIndex([], dtype='datetime64[ns]', freq=None) """ warnings.warn("'get_duplicates' is deprecated and will be removed in " diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 72b014b018735..eb841e6398976 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -1013,21 +1013,21 @@ def apply(self, func, axis='major', **kwargs): Returns a Panel with the square root of each element - >>> p = pd.Panel(np.random.rand(4,3,2)) + >>> p = pd.Panel(np.random.rand(4, 3, 2)) # doctest: +SKIP >>> p.apply(np.sqrt) Equivalent to p.sum(1), returning a DataFrame - >>> p.apply(lambda x: x.sum(), axis=1) + >>> p.apply(lambda x: x.sum(), axis=1) # doctest: +SKIP Equivalent to previous: - >>> p.apply(lambda x: x.sum(), axis='major') + >>> p.apply(lambda x: x.sum(), axis='major') # doctest: +SKIP Return the shapes of each DataFrame over axis 2 (i.e the shapes of items x major), as a Series - >>> p.apply(lambda x: x.shape, axis=(0,1)) + >>> p.apply(lambda x: x.shape, axis=(0,1)) # doctest: +SKIP Returns ------- diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 18a83269a2f0f..bf0c93437f4dc 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -2156,13 +2156,6 @@ def cat(self, others=None, sep=None, na_rep=None, join=None): `join`-keyword works as in other methods. >>> t = pd.Series(['d', 'a', 'e', 'c'], index=[3, 0, 4, 2]) - >>> s.str.cat(t, join=None, na_rep='-') - 0 ad - 1 ba - 2 -e - 3 dc - dtype: object - >>> >>> s.str.cat(t, join='left', na_rep='-') 0 aa 1 b- diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 147c43b30d45f..b080ab00972c6 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -133,7 +133,7 @@ class ParserWarning(Warning): >>> csv = u'''a;b;c ... 1;1,8 ... 1;2,1''' - >>> df = pd.read_csv(io.StringIO(csv), sep='[;,]') + >>> df = pd.read_csv(io.StringIO(csv), sep='[;,]') # doctest: +SKIP ... # ParserWarning: Falling back to the 'python' engine... Adding `engine='python'` to `pd.read_csv` removes the Warning: diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py index e0074e2cf3aef..f889e08b5d348 100644 --- a/pandas/plotting/_misc.py +++ b/pandas/plotting/_misc.py @@ -206,7 +206,7 @@ def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds): ... 'versicolor', 'setosa', 'virginica', ... 'setosa'] ... }) - >>> rad_viz = pd.plotting.radviz(df, 'Category') + >>> rad_viz = pd.plotting.radviz(df, 'Category') # doctest: +SKIP """ import matplotlib.pyplot as plt import matplotlib.patches as patches @@ -407,7 +407,7 @@ def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds): :context: close-figs >>> s = pd.Series(np.random.uniform(size=100)) - >>> fig = pd.plotting.bootstrap_plot(s) + >>> fig = pd.plotting.bootstrap_plot(s) # doctest: +SKIP """ import random import matplotlib.pyplot as plt diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py index 271c7c3021905..ccd5f56141a6a 100644 --- a/scripts/tests/test_validate_docstrings.py +++ b/scripts/tests/test_validate_docstrings.py @@ -785,10 +785,10 @@ def test_bad_examples(self, capsys, klass, func, msgs): assert msg in ' '.join(err[1] for err in result['errors']) -class ApiItems(object): +class TestApiItems(object): @property def api_doc(self): - return textwrap.dedent(io.StringIO(''' + return io.StringIO(textwrap.dedent(''' .. currentmodule:: itertools Itertools @@ -861,93 +861,90 @@ def test_item_subsection(self, idx, subsection): assert result[idx][3] == subsection -class MainFunction(object): - def test_num_errors_for_validate_one(self, monkeypatch): +class TestMainFunction(object): + def test_exit_status_for_validate_one(self, monkeypatch): monkeypatch.setattr( - validate_docstrings, 'validate_one', - lambda func_name: {'docstring': 'docstring1', - 'errors': [('ER01', 'err desc'), - ('ER02', 'err desc') - ('ER03', 'err desc')], - 'warnings': [], - 'examples_errors': ''}) - num_errors = validate_docstrings.main(func_name='docstring1', - prefix=None, - errors=[], - output_format='default') - assert num_errors == 3 - - def test_no_num_errors_for_validate_one(self, monkeypatch): - monkeypatch.setattr( - validate_docstrings, 'validate_one', - lambda func_name: {'docstring': 'docstring1', - 'errors': [], - 'warnings': [('WN01', 'warn desc')], - 'examples_errors': ''}) - num_errors = validate_docstrings.main(func_name='docstring1', - prefix=None, - errors=[], - output_format='default') - assert num_errors == 0 - - def test_num_errors_for_validate_all(self, monkeypatch): + validate_docstrings, 'validate_one', lambda func_name: { + 'docstring': 'docstring1', + 'errors': [('ER01', 'err desc'), + ('ER02', 'err desc'), + ('ER03', 'err desc')], + 'warnings': [], + 'examples_errors': ''}) + exit_status = validate_docstrings.main(func_name='docstring1', + prefix=None, + errors=[], + output_format='default') + assert exit_status == 0 + + def test_exit_status_errors_for_validate_all(self, monkeypatch): monkeypatch.setattr( - validate_docstrings, 'validate_all', - lambda: {'docstring1': {'errors': [('ER01', 'err desc'), - ('ER02', 'err desc'), - ('ER03', 'err desc')]}, - 'docstring2': {'errors': [('ER04', 'err desc'), - ('ER05', 'err desc')]}}) - num_errors = validate_docstrings.main(func_name=None, - prefix=None, - errors=[], - output_format='default') - assert num_errors == 5 - - def test_no_num_errors_for_validate_all(self, monkeypatch): + validate_docstrings, 'validate_all', lambda prefix: { + 'docstring1': {'errors': [('ER01', 'err desc'), + ('ER02', 'err desc'), + ('ER03', 'err desc')], + 'file': 'module1.py', + 'file_line': 23}, + 'docstring2': {'errors': [('ER04', 'err desc'), + ('ER05', 'err desc')], + 'file': 'module2.py', + 'file_line': 925}}) + exit_status = validate_docstrings.main(func_name=None, + prefix=None, + errors=[], + output_format='default') + assert exit_status == 5 + + def test_no_exit_status_noerrors_for_validate_all(self, monkeypatch): monkeypatch.setattr( - validate_docstrings, 'validate_all', - lambda: {'docstring1': {'errors': [], - 'warnings': [('WN01', 'warn desc')]}, - 'docstring2': {'errors': []}}) - num_errors = validate_docstrings.main(func_name=None, - prefix=None, - errors=[], - output_format='default') - assert num_errors == 0 - - def test_prefix_param_filters_docstrings(self, monkeypatch): + validate_docstrings, 'validate_all', lambda prefix: { + 'docstring1': {'errors': [], + 'warnings': [('WN01', 'warn desc')]}, + 'docstring2': {'errors': []}}) + exit_status = validate_docstrings.main(func_name=None, + prefix=None, + errors=[], + output_format='default') + assert exit_status == 0 + + def test_exit_status_for_validate_all_json(self, monkeypatch): + print('EXECUTED') monkeypatch.setattr( - validate_docstrings, 'validate_all', - lambda: {'Series.foo': {'errors': [('ER01', 'err desc'), - ('ER02', 'err desc'), - ('ER03', 'err desc')]}, - 'DataFrame.bar': {'errors': [('ER04', 'err desc'), - ('ER05', 'err desc')]}, - 'Series.foobar': {'errors': [('ER06', 'err desc')]}}) - num_errors = validate_docstrings.main(func_name=None, - prefix='Series.', - errors=[], - output_format='default') - assert num_errors == 4 + validate_docstrings, 'validate_all', lambda prefix: { + 'docstring1': {'errors': [('ER01', 'err desc'), + ('ER02', 'err desc'), + ('ER03', 'err desc')]}, + 'docstring2': {'errors': [('ER04', 'err desc'), + ('ER05', 'err desc')]}}) + exit_status = validate_docstrings.main(func_name=None, + prefix=None, + errors=[], + output_format='json') + assert exit_status == 0 def test_errors_param_filters_errors(self, monkeypatch): monkeypatch.setattr( - validate_docstrings, 'validate_all', - lambda: {'Series.foo': {'errors': [('ER01', 'err desc'), - ('ER02', 'err desc'), - ('ER03', 'err desc')]}, - 'DataFrame.bar': {'errors': [('ER01', 'err desc'), - ('ER02', 'err desc')]}, - 'Series.foobar': {'errors': [('ER01', 'err desc')]}}) - num_errors = validate_docstrings.main(func_name=None, - prefix=None, - errors=['E01'], - output_format='default') - assert num_errors == 3 - - num_errors = validate_docstrings.main(func_name=None, - prefix=None, - errors=['E03'], - output_format='default') - assert num_errors == 1 + validate_docstrings, 'validate_all', lambda prefix: { + 'Series.foo': {'errors': [('ER01', 'err desc'), + ('ER02', 'err desc'), + ('ER03', 'err desc')], + 'file': 'series.py', + 'file_line': 142}, + 'DataFrame.bar': {'errors': [('ER01', 'err desc'), + ('ER02', 'err desc')], + 'file': 'frame.py', + 'file_line': 598}, + 'Series.foobar': {'errors': [('ER01', 'err desc')], + 'file': 'series.py', + 'file_line': 279}}) + exit_status = validate_docstrings.main(func_name=None, + prefix=None, + errors=['ER01'], + output_format='default') + assert exit_status == 3 + + exit_status = validate_docstrings.main(func_name=None, + prefix=None, + errors=['ER03'], + output_format='default') + assert exit_status == 1 diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index 67ad21ab80b97..ed84e58049cae 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -32,6 +32,15 @@ from io import StringIO except ImportError: from cStringIO import StringIO + +# Template backend makes matplotlib to not plot anything. This is useful +# to avoid that plot windows are open from the doctests while running the +# script. Setting here before matplotlib is loaded. +# We don't warn for the number of open plots, as none is actually being opened +os.environ['MPLBACKEND'] = 'Template' +import matplotlib +matplotlib.rc('figure', max_open_warning=10000) + import numpy BASE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) @@ -505,6 +514,9 @@ def validate_pep8(self): file.flush() application.run_checks([file.name]) + # We need this to avoid flake8 printing the names of the files to + # the standard output + application.formatter.write = lambda line, source: None application.report() yield from application.guide.stats.statistics_for('') @@ -733,6 +745,7 @@ def header(title, width=80, char='#'): return '\n{full_line}\n{title_line}\n{full_line}\n\n'.format( full_line=full_line, title_line=title_line) + exit_status = 0 if func_name is None: result = validate_all(prefix) @@ -751,7 +764,7 @@ def header(title, width=80, char='#'): raise ValueError('Unknown output_format "{}"'.format( output_format)) - num_errors, output = 0, '' + output = '' for name, res in result.items(): for err_code, err_desc in res['errors']: # The script would be faster if instead of filtering the @@ -759,7 +772,7 @@ def header(title, width=80, char='#'): # initially. But that would complicate the code too much if errors and err_code not in errors: continue - num_errors += 1 + exit_status += 1 output += output_format.format( name=name, path=res['file'], @@ -767,12 +780,10 @@ def header(title, width=80, char='#'): code=err_code, text='{}: {}'.format(name, err_desc)) - sys.stderr.write(output) + sys.stdout.write(output) else: result = validate_one(func_name) - num_errors = len(result['errors']) - sys.stderr.write(header('Docstring ({})'.format(func_name))) sys.stderr.write('{}\n'.format(result['docstring'])) sys.stderr.write(header('Validation')) @@ -799,7 +810,7 @@ def header(title, width=80, char='#'): sys.stderr.write(header('Doctests')) sys.stderr.write(result['examples_errors']) - return num_errors + return exit_status if __name__ == '__main__': From 2c193a006177b1fcd9d40e18f93b160e3bc9a19b Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 8 Nov 2018 19:30:22 +0100 Subject: [PATCH 073/122] Update description of Index._values/values/ndarray_values (#23507) --- pandas/core/indexes/base.py | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index d0f190c82aec7..8470bc6fec490 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -709,7 +709,7 @@ def values(self): @property def _values(self): - # type: () -> Union[ExtensionArray, Index] + # type: () -> Union[ExtensionArray, Index, np.ndarray] # TODO(EA): remove index types as they become extension arrays """The best array representation. @@ -721,18 +721,14 @@ def _values(self): It may differ from the public '.values' method. - index | values | _values | _ndarray_values | - ----------------- | -------------- -| ----------- | --------------- | - CategoricalIndex | Categorical | Categorical | codes | - DatetimeIndex[tz] | ndarray[M8ns] | DTI[tz] | ndarray[M8ns] | - - For the following, the ``._values`` is currently ``ndarray[object]``, - but will soon be an ``ExtensionArray`` - - index | values | _values | _ndarray_values | - ----------------- | --------------- | ------------ | --------------- | - PeriodIndex | ndarray[object] | ndarray[obj] | ndarray[int] | - IntervalIndex | ndarray[object] | ndarray[obj] | ndarray[object] | + index | values | _values | _ndarray_values | + ----------------- | --------------- | ------------- | --------------- | + Index | ndarray | ndarray | ndarray | + CategoricalIndex | Categorical | Categorical | ndarray[int] | + DatetimeIndex | ndarray[M8ns] | ndarray[M8ns] | ndarray[M8ns] | + DatetimeIndex[tz] | ndarray[M8ns] | DTI[tz] | ndarray[M8ns] | + PeriodIndex | ndarray[object] | PeriodArray | ndarray[int] | + IntervalIndex | IntervalArray | IntervalArray | ndarray[object] | See Also -------- From 6244f35f77bd0d19ad64ad8aee4f07a83e7e5534 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 9 Nov 2018 01:22:53 -0800 Subject: [PATCH 074/122] TST: Tests and Helpers for Datetime/Period Arrays (#23502) --- pandas/_libs/tslibs/offsets.pyx | 1 + pandas/core/arrays/datetimelike.py | 3 ++ pandas/core/arrays/datetimes.py | 22 ++++++++--- pandas/core/arrays/period.py | 3 -- pandas/core/arrays/timedeltas.py | 2 + pandas/core/dtypes/generic.py | 4 ++ pandas/tests/arithmetic/conftest.py | 19 ++++++++++ pandas/tests/arithmetic/test_datetime64.py | 21 ++++++----- pandas/tests/arithmetic/test_period.py | 10 ++--- pandas/tests/arrays/test_datetimes.py | 43 ++++++++++++++++++++++ pandas/tests/dtypes/test_generic.py | 8 ++++ pandas/util/testing.py | 20 +++++++++- 12 files changed, 132 insertions(+), 24 deletions(-) create mode 100644 pandas/tests/arrays/test_datetimes.py diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 8f5887754e40d..f29d995136a81 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -346,6 +346,7 @@ class _BaseOffset(object): def __add__(self, other): if getattr(other, "_typ", None) in ["datetimeindex", "periodindex", + "datetimearray", "periodarray", "series", "period", "dataframe"]: # defer to the other class's implementation return other + self diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 7f1c86938a354..ed4309395ac1f 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -200,6 +200,9 @@ def astype(self, dtype, copy=True): # ------------------------------------------------------------------ # Null Handling + def isna(self): + return self._isnan + @property # NB: override with cache_readonly in immutable subclasses def _isnan(self): """ return if each value is nan""" diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index e7edd54c4177b..39a2c7e75027e 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -117,28 +117,36 @@ def wrapper(self, other): return ops.invalid_comparison(self, other, op) else: if isinstance(other, list): - # FIXME: This can break for object-dtype with mixed types - other = type(self)(other) - elif not isinstance(other, (np.ndarray, ABCIndexClass, ABCSeries)): + try: + other = type(self)(other) + except ValueError: + other = np.array(other, dtype=np.object_) + elif not isinstance(other, (np.ndarray, ABCIndexClass, ABCSeries, + DatetimeArrayMixin)): # Following Timestamp convention, __eq__ is all-False # and __ne__ is all True, others raise TypeError. return ops.invalid_comparison(self, other, op) if is_object_dtype(other): result = op(self.astype('O'), np.array(other)) + o_mask = isna(other) elif not (is_datetime64_dtype(other) or is_datetime64tz_dtype(other)): # e.g. is_timedelta64_dtype(other) return ops.invalid_comparison(self, other, op) else: self._assert_tzawareness_compat(other) - result = meth(self, np.asarray(other)) + if not hasattr(other, 'asi8'): + # ndarray, Series + other = type(self)(other) + result = meth(self, other) + o_mask = other._isnan result = com.values_from_object(result) # Make sure to pass an array to result[...]; indexing with # Series breaks with older version of numpy - o_mask = np.array(isna(other)) + o_mask = np.array(o_mask) if o_mask.any(): result[o_mask] = nat_result @@ -157,6 +165,7 @@ class DatetimeArrayMixin(dtl.DatetimeLikeArrayMixin): _freq _data """ + _typ = "datetimearray" _bool_ops = ['is_month_start', 'is_month_end', 'is_quarter_start', 'is_quarter_end', 'is_year_start', 'is_year_end', 'is_leap_year'] @@ -166,6 +175,9 @@ class DatetimeArrayMixin(dtl.DatetimeLikeArrayMixin): # by returning NotImplemented timetuple = None + # ensure that operations with numpy arrays defer to our implementation + __array_priority__ = 1000 + # ----------------------------------------------------------------- # Constructors diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 5a75f2706b218..482968fdb4766 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -403,9 +403,6 @@ def take(self, indices, allow_fill=False, fill_value=None): return type(self)(new_values, self.freq) - def isna(self): - return self._data == iNaT - def fillna(self, value=None, method=None, limit=None): # TODO(#20300) # To avoid converting to object, we re-implement here with the changes diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 9653121879c0d..0fd69abd96cfa 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -98,6 +98,8 @@ def wrapper(self, other): class TimedeltaArrayMixin(dtl.DatetimeLikeArrayMixin): + _typ = "timedeltaarray" + @property def _box_func(self): return lambda x: Timedelta(x, unit='ns') diff --git a/pandas/core/dtypes/generic.py b/pandas/core/dtypes/generic.py index f6926a192a724..7a3ff5d295421 100644 --- a/pandas/core/dtypes/generic.py +++ b/pandas/core/dtypes/generic.py @@ -53,6 +53,10 @@ def _check(cls, inst): ('sparse_array', 'sparse_series')) ABCCategorical = create_pandas_abc_type("ABCCategorical", "_typ", ("categorical")) +ABCDatetimeArray = create_pandas_abc_type("ABCDatetimeArray", "_typ", + ("datetimearray")) +ABCTimedeltaArray = create_pandas_abc_type("ABCTimedeltaArray", "_typ", + ("timedeltaarray")) ABCPeriodArray = create_pandas_abc_type("ABCPeriodArray", "_typ", ("periodarray", )) ABCPeriod = create_pandas_abc_type("ABCPeriod", "_typ", ("period", )) diff --git a/pandas/tests/arithmetic/conftest.py b/pandas/tests/arithmetic/conftest.py index cbe26a06d34c6..cf1abc6f79101 100644 --- a/pandas/tests/arithmetic/conftest.py +++ b/pandas/tests/arithmetic/conftest.py @@ -5,6 +5,7 @@ import pandas as pd from pandas.compat import long +from pandas.core.arrays import PeriodArray, DatetimeArrayMixin as DatetimeArray @pytest.fixture(params=[1, np.array(1, dtype=np.int64)]) @@ -171,3 +172,21 @@ def box_df_broadcast_failure(request): the DataFrame operation tries to broadcast incorrectly. """ return request.param + + +@pytest.fixture(params=[pd.Index, pd.Series, pd.DataFrame, PeriodArray], + ids=lambda x: x.__name__) +def box_with_period(request): + """ + Like `box`, but specific to PeriodDtype for also testing PeriodArray + """ + return request.param + + +@pytest.fixture(params=[pd.Index, pd.Series, pd.DataFrame, DatetimeArray], + ids=lambda x: x.__name__) +def box_with_datetime(request): + """ + Like `box`, but specific to datetime64 for also testing DatetimeArray + """ + return request.param diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 4f1a26ae50c3b..c3ebd8f773aa6 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -1037,10 +1037,10 @@ def test_dti_add_sub_float(self, op, other): with pytest.raises(TypeError): op(dti, other) - def test_dti_add_timestamp_raises(self, box): + def test_dti_add_timestamp_raises(self, box_with_datetime): # GH#22163 ensure DataFrame doesn't cast Timestamp to i8 idx = DatetimeIndex(['2011-01-01', '2011-01-02']) - idx = tm.box_expected(idx, box) + idx = tm.box_expected(idx, box_with_datetime) msg = "cannot add" with tm.assert_raises_regex(TypeError, msg): idx + Timestamp('2011-01-01') @@ -1152,16 +1152,17 @@ def test_dti_add_intarray_no_freq(self, box): # ------------------------------------------------------------- # Binary operations DatetimeIndex and timedelta-like - def test_dti_add_timedeltalike(self, tz_naive_fixture, two_hours, box): + def test_dti_add_timedeltalike(self, tz_naive_fixture, two_hours, + box_with_datetime): # GH#22005, GH#22163 check DataFrame doesn't raise TypeError tz = tz_naive_fixture rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz) - rng = tm.box_expected(rng, box) + rng = tm.box_expected(rng, box_with_datetime) result = rng + two_hours expected = pd.date_range('2000-01-01 02:00', '2000-02-01 02:00', tz=tz) - expected = tm.box_expected(expected, box) + expected = tm.box_expected(expected, box_with_datetime) tm.assert_equal(result, expected) def test_dti_iadd_timedeltalike(self, tz_naive_fixture, two_hours): @@ -1431,13 +1432,13 @@ def test_sub_dti_dti(self): tm.assert_index_equal(result, expected) @pytest.mark.parametrize('freq', [None, 'D']) - def test_sub_period(self, freq, box): + def test_sub_period(self, freq, box_with_datetime): # GH#13078 # not supported, check TypeError p = pd.Period('2011-01-01', freq='D') idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], freq=freq) - idx = tm.box_expected(idx, box) + idx = tm.box_expected(idx, box_with_datetime) with pytest.raises(TypeError): idx - p @@ -1779,7 +1780,7 @@ def test_dti_with_offset_series(self, tz_naive_fixture, names): res3 = dti - other tm.assert_series_equal(res3, expected_sub) - def test_dti_add_offset_tzaware(self, tz_aware_fixture, box): + def test_dti_add_offset_tzaware(self, tz_aware_fixture, box_with_datetime): # GH#21610, GH#22163 ensure DataFrame doesn't return object-dtype timezone = tz_aware_fixture if timezone == 'US/Pacific': @@ -1792,8 +1793,8 @@ def test_dti_add_offset_tzaware(self, tz_aware_fixture, box): expected = DatetimeIndex(['2010-11-01 05:00', '2010-11-01 06:00', '2010-11-01 07:00'], freq='H', tz=timezone) - dates = tm.box_expected(dates, box) - expected = tm.box_expected(expected, box) + dates = tm.box_expected(dates, box_with_datetime) + expected = tm.box_expected(expected, box_with_datetime) # TODO: parametrize over the scalar being added? radd? sub? offset = dates + pd.offsets.Hour(5) diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index c52112a4fa147..3595cf7a2522f 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -579,15 +579,15 @@ def test_pi_add_offset_n_gt1(self, box): result = per.freq + pi tm.assert_equal(result, expected) - def test_pi_add_offset_n_gt1_not_divisible(self, box): + def test_pi_add_offset_n_gt1_not_divisible(self, box_with_period): # GH#23215 # PeriodIndex with freq.n > 1 add offset with offset.n % freq.n != 0 pi = pd.PeriodIndex(['2016-01'], freq='2M') - pi = tm.box_expected(pi, box) + pi = tm.box_expected(pi, box_with_period) expected = pd.PeriodIndex(['2016-04'], freq='2M') - expected = tm.box_expected(expected, box) + expected = tm.box_expected(expected, box_with_period) result = pi + to_offset('3M') tm.assert_equal(result, expected) @@ -901,10 +901,10 @@ def test_pi_ops(self): tm.assert_index_equal(result, exp) @pytest.mark.parametrize('ng', ["str", 1.5]) - def test_pi_ops_errors(self, ng, box): + def test_pi_ops_errors(self, ng, box_with_period): idx = PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'], freq='M', name='idx') - obj = tm.box_expected(idx, box) + obj = tm.box_expected(idx, box_with_period) msg = r"unsupported operand type\(s\)" with tm.assert_raises_regex(TypeError, msg): diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py new file mode 100644 index 0000000000000..a15295cfbd81a --- /dev/null +++ b/pandas/tests/arrays/test_datetimes.py @@ -0,0 +1,43 @@ +""" +Tests for DatetimeArray +""" +import operator + +import numpy as np + +import pandas as pd +from pandas.core.arrays import DatetimeArrayMixin as DatetimeArray +import pandas.util.testing as tm + + +class TestDatetimeArrayComparisons(object): + # TODO: merge this into tests/arithmetic/test_datetime64 once it is + # sufficiently robust + + def test_cmp_dt64_arraylike_tznaive(self, all_compare_operators): + # arbitrary tz-naive DatetimeIndex + opname = all_compare_operators.strip('_') + op = getattr(operator, opname) + + dti = pd.date_range('2016-01-1', freq='MS', periods=9, tz=None) + arr = DatetimeArray(dti) + assert arr.freq == dti.freq + assert arr.tz == dti.tz + + right = dti + + expected = np.ones(len(arr), dtype=bool) + if opname in ['ne', 'gt', 'lt']: + # for these the comparisons should be all-False + expected = ~expected + + result = op(arr, arr) + tm.assert_numpy_array_equal(result, expected) + for other in [right, np.array(right)]: + # TODO: add list and tuple, and object-dtype once those + # are fixed in the constructor + result = op(arr, other) + tm.assert_numpy_array_equal(result, expected) + + result = op(other, arr) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py index 38d1143f3838b..53fa482bdeaef 100644 --- a/pandas/tests/dtypes/test_generic.py +++ b/pandas/tests/dtypes/test_generic.py @@ -19,6 +19,8 @@ class TestABCClasses(object): sparse_series = pd.Series([1, 2, 3]).to_sparse() sparse_array = pd.SparseArray(np.random.randn(10)) sparse_frame = pd.SparseDataFrame({'a': [1, -1, None]}) + datetime_array = pd.core.arrays.DatetimeArrayMixin(datetime_index) + timedelta_array = pd.core.arrays.TimedeltaArrayMixin(timedelta_index) def test_abc_types(self): assert isinstance(pd.Index(['a', 'b', 'c']), gt.ABCIndex) @@ -51,6 +53,12 @@ def test_abc_types(self): assert isinstance(pd.Interval(0, 1.5), gt.ABCInterval) assert not isinstance(pd.Period('2012', freq='A-DEC'), gt.ABCInterval) + assert isinstance(self.datetime_array, gt.ABCDatetimeArray) + assert not isinstance(self.datetime_index, gt.ABCDatetimeArray) + + assert isinstance(self.timedelta_array, gt.ABCTimedeltaArray) + assert not isinstance(self.timedelta_index, gt.ABCTimedeltaArray) + def test_setattr_warnings(): # GH7175 - GOTCHA: You can't use dot notation to add a column... diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 96387349eecd7..09c5a68ec28c2 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -34,7 +34,9 @@ IntervalIndex, MultiIndex, Panel, PeriodIndex, RangeIndex, Series, TimedeltaIndex, bdate_range) from pandas.core.algorithms import take_1d -from pandas.core.arrays import ExtensionArray, IntervalArray, PeriodArray +from pandas.core.arrays import ( + DatetimeArrayMixin as DatetimeArray, ExtensionArray, IntervalArray, + PeriodArray, period_array) import pandas.core.common as com from pandas.io.common import urlopen @@ -1049,6 +1051,15 @@ def assert_period_array_equal(left, right, obj='PeriodArray'): assert_attr_equal('freq', left, right, obj=obj) +def assert_datetime_array_equal(left, right, obj='DatetimeArray'): + _check_isinstance(left, right, DatetimeArray) + + assert_numpy_array_equal(left._data, right._data, + obj='{obj}._data'.format(obj=obj)) + assert_attr_equal('freq', left, right, obj=obj) + assert_attr_equal('tz', left, right, obj=obj) + + def raise_assert_detail(obj, message, left, right, diff=None): __tracebackhide__ = True @@ -1546,6 +1557,8 @@ def assert_equal(left, right, **kwargs): assert_interval_array_equal(left, right, **kwargs) elif isinstance(left, PeriodArray): assert_period_array_equal(left, right, **kwargs) + elif isinstance(left, DatetimeArray): + assert_datetime_array_equal(left, right, **kwargs) elif isinstance(left, ExtensionArray): assert_extension_array_equal(left, right, **kwargs) elif isinstance(left, np.ndarray): @@ -1573,6 +1586,11 @@ def box_expected(expected, box_cls): expected = pd.Series(expected) elif box_cls is pd.DataFrame: expected = pd.Series(expected).to_frame() + elif box_cls is PeriodArray: + # the PeriodArray constructor is not as flexible as period_array + expected = period_array(expected) + elif box_cls is DatetimeArray: + expected = DatetimeArray(expected) elif box_cls is np.ndarray: expected = np.array(expected) else: From 03d632c5328c0ec9b3c6af5b1cd74935de4318f1 Mon Sep 17 00:00:00 2001 From: Christopher Whelan Date: Fri, 9 Nov 2018 05:29:51 -0800 Subject: [PATCH 075/122] PERF: define is_all_dates to shortcut inadvertent copy when slicing an IntervalIndex (#23591) --- pandas/core/indexes/interval.py | 8 ++++++++ pandas/tests/indexes/interval/test_interval.py | 7 +++++++ 2 files changed, 15 insertions(+) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 79239ec90ac80..eb4284203d865 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -1061,6 +1061,14 @@ def func(self, other): name=result_name) return func + @property + def is_all_dates(self): + """ + This is False even when left/right contain datetime-like objects, + as the check is done on the Interval itself + """ + return False + union = _setop('union') intersection = _setop('intersection') difference = _setop('difference') diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index ac0446373a6a1..258f2dc41fb79 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -1150,3 +1150,10 @@ def test_set_closed_errors(self, bad_closed): msg = "invalid option for 'closed': {closed}".format(closed=bad_closed) with tm.assert_raises_regex(ValueError, msg): index.set_closed(bad_closed) + + def test_is_all_dates(self): + # GH 23576 + year_2017 = pd.Interval(pd.Timestamp('2017-01-01 00:00:00'), + pd.Timestamp('2018-01-01 00:00:00')) + year_2017_index = pd.IntervalIndex([year_2017]) + assert not year_2017_index.is_all_dates From 01ffb036a98994fcdbfc1870fb94da035450496e Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 9 Nov 2018 07:41:20 -0600 Subject: [PATCH 076/122] PERF: Speeds up creation of Period, PeriodArray, with Offset freq (#23589) --- pandas/_libs/tslibs/offsets.pyx | 2 ++ pandas/_libs/tslibs/period.pyx | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index f29d995136a81..0495202818eb5 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -84,6 +84,8 @@ cdef to_offset(object obj): Wrap pandas.tseries.frequencies.to_offset to keep centralize runtime imports """ + if isinstance(obj, _BaseOffset): + return obj from pandas.tseries.frequencies import to_offset return to_offset(obj) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index ebcbea0ee30b3..a284d8fb544e7 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1567,7 +1567,6 @@ cdef class _Period(object): @classmethod def _maybe_convert_freq(cls, object freq): - if isinstance(freq, (int, tuple)): code, stride = get_freq_code(freq) freq = get_freq_str(code, stride) From 56b8024bd15a4bb259054e1c9ad2268aeb5a19f5 Mon Sep 17 00:00:00 2001 From: Fabian Haase Date: Fri, 9 Nov 2018 17:26:34 +0100 Subject: [PATCH 077/122] DOC/CI: Add linting to rst files, and fix issues (#23381) --- ci/code_checks.sh | 10 ++++ ci/deps/travis-36.yaml | 1 + ci/environment-dev.yaml | 1 + ci/requirements_dev.txt | 1 + doc/source/10min.rst | 56 +++++++++++----------- doc/source/advanced.rst | 10 ++-- doc/source/basics.rst | 11 ++--- doc/source/comparison_with_sas.rst | 2 +- doc/source/contributing.rst | 18 ++++--- doc/source/contributing_docstring.rst | 17 +++++-- doc/source/cookbook.rst | 3 +- doc/source/dsintro.rst | 7 ++- doc/source/enhancingperf.rst | 67 +++++++++++++++------------ doc/source/extending.rst | 7 ++- doc/source/gotchas.rst | 14 +++--- doc/source/groupby.rst | 4 +- doc/source/indexing.rst | 14 +++--- doc/source/io.rst | 66 +++++++++++++++++--------- doc/source/missing_data.rst | 7 ++- doc/source/reshaping.rst | 14 ++++-- doc/source/timeseries.rst | 5 +- pandas/core/accessor.py | 3 +- pandas/core/missing.py | 7 +-- setup.cfg | 6 +++ 24 files changed, 215 insertions(+), 136 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 330901ba56fbd..c4b483a794c21 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -44,6 +44,13 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then flake8 pandas/_libs --filename=*.pxi.in,*.pxd --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403 RET=$(($RET + $?)) ; echo $MSG "DONE" + echo "flake8-rst --version" + flake8-rst --version + + MSG='Linting code-blocks in .rst documentation' ; echo $MSG + flake8-rst doc/source --filename=*.rst + RET=$(($RET + $?)) ; echo $MSG "DONE" + # Check that cython casting is of the form `obj` as opposed to ` obj`; # it doesn't make a difference, but we want to be internally consistent. # Note: this grep pattern is (intended to be) equivalent to the python @@ -64,6 +71,9 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime RET=$(($RET + $?)) ; echo $MSG "DONE" + echo "isort --version-number" + isort --version-number + # Imports - Check formatting using isort see setup.cfg for settings MSG='Check import format using isort ' ; echo $MSG isort --recursive --check-only pandas diff --git a/ci/deps/travis-36.yaml b/ci/deps/travis-36.yaml index 352717a842214..8aa551f6194d9 100644 --- a/ci/deps/travis-36.yaml +++ b/ci/deps/travis-36.yaml @@ -9,6 +9,7 @@ dependencies: - fastparquet - flake8>=3.5 - flake8-comprehensions + - flake8-rst - gcsfs - geopandas - html5lib diff --git a/ci/environment-dev.yaml b/ci/environment-dev.yaml index 3e69b1f725b24..2718c1cd582b6 100644 --- a/ci/environment-dev.yaml +++ b/ci/environment-dev.yaml @@ -7,6 +7,7 @@ dependencies: - NumPy - flake8 - flake8-comprehensions + - flake8-rst - hypothesis>=3.58.0 - isort - moto diff --git a/ci/requirements_dev.txt b/ci/requirements_dev.txt index 6a8b8d64d943b..a1cb20c265974 100644 --- a/ci/requirements_dev.txt +++ b/ci/requirements_dev.txt @@ -4,6 +4,7 @@ Cython>=0.28.2 NumPy flake8 flake8-comprehensions +flake8-rst hypothesis>=3.58.0 isort moto diff --git a/doc/source/10min.rst b/doc/source/10min.rst index fbbe94a72c71e..b5938a24ce6c5 100644 --- a/doc/source/10min.rst +++ b/doc/source/10min.rst @@ -45,7 +45,7 @@ a default integer index: .. ipython:: python - s = pd.Series([1,3,5,np.nan,6,8]) + s = pd.Series([1, 3, 5, np.nan, 6, 8]) s Creating a :class:`DataFrame` by passing a NumPy array, with a datetime index @@ -62,12 +62,12 @@ Creating a ``DataFrame`` by passing a dict of objects that can be converted to s .. ipython:: python - df2 = pd.DataFrame({ 'A' : 1., - 'B' : pd.Timestamp('20130102'), - 'C' : pd.Series(1,index=list(range(4)),dtype='float32'), - 'D' : np.array([3] * 4,dtype='int32'), - 'E' : pd.Categorical(["test","train","test","train"]), - 'F' : 'foo' }) + df2 = pd.DataFrame({'A': 1., + 'B': pd.Timestamp('20130102'), + 'C': pd.Series(1, index=list(range(4)),dtype='float32'), + 'D': np.array([3] * 4, dtype='int32'), + 'E': pd.Categorical(["test", "train", "test", "train"]), + 'F': 'foo'}) df2 The columns of the resulting ``DataFrame`` have different @@ -283,9 +283,9 @@ Using the :func:`~Series.isin` method for filtering: .. ipython:: python df2 = df.copy() - df2['E'] = ['one', 'one','two','three','four','three'] + df2['E'] = ['one', 'one', 'two', 'three', 'four', 'three'] df2 - df2[df2['E'].isin(['two','four'])] + df2[df2['E'].isin(['two', 'four'])] Setting ~~~~~~~ @@ -295,7 +295,7 @@ by the indexes. .. ipython:: python - s1 = pd.Series([1,2,3,4,5,6], index=pd.date_range('20130102', periods=6)) + s1 = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range('20130102', periods=6)) s1 df['F'] = s1 @@ -394,7 +394,7 @@ In addition, pandas automatically broadcasts along the specified dimension. .. ipython:: python - s = pd.Series([1,3,5,np.nan,6,8], index=dates).shift(2) + s = pd.Series([1, 3, 5, np.nan, 6, 8], index=dates).shift(2) s df.sub(s, axis='index') @@ -492,7 +492,7 @@ section. .. ipython:: python - df = pd.DataFrame(np.random.randn(8, 4), columns=['A','B','C','D']) + df = pd.DataFrame(np.random.randn(8, 4), columns=['A', 'B', 'C', 'D']) df s = df.iloc[3] df.append(s, ignore_index=True) @@ -512,12 +512,12 @@ See the :ref:`Grouping section `. .. ipython:: python - df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', - 'foo', 'bar', 'foo', 'foo'], - 'B' : ['one', 'one', 'two', 'three', - 'two', 'two', 'one', 'three'], - 'C' : np.random.randn(8), - 'D' : np.random.randn(8)}) + df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar', + 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', + 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), + 'D': np.random.randn(8)}) df Grouping and then applying the :meth:`~DataFrame.sum` function to the resulting @@ -532,7 +532,7 @@ apply the ``sum`` function. .. ipython:: python - df.groupby(['A','B']).sum() + df.groupby(['A', 'B']).sum() Reshaping --------- @@ -578,11 +578,11 @@ See the section on :ref:`Pivot Tables `. .. ipython:: python - df = pd.DataFrame({'A' : ['one', 'one', 'two', 'three'] * 3, - 'B' : ['A', 'B', 'C'] * 4, - 'C' : ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 2, - 'D' : np.random.randn(12), - 'E' : np.random.randn(12)}) + df = pd.DataFrame({'A': ['one', 'one', 'two', 'three'] * 3, + 'B': ['A', 'B', 'C'] * 4, + 'C': ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 2, + 'D': np.random.randn(12), + 'E': np.random.randn(12)}) df We can produce pivot tables from this data very easily: @@ -653,7 +653,7 @@ pandas can include categorical data in a ``DataFrame``. For full docs, see the .. ipython:: python - df = pd.DataFrame({"id":[1,2,3,4,5,6], "raw_grade":['a', 'b', 'b', 'a', 'a', 'e']}) + df = pd.DataFrame({"id":[1, 2, 3, 4, 5, 6], "raw_grade":['a', 'b', 'b', 'a', 'a', 'e']}) Convert the raw grades to a categorical data type. @@ -753,13 +753,13 @@ Writing to a HDF5 Store. .. ipython:: python - df.to_hdf('foo.h5','df') + df.to_hdf('foo.h5', 'df') Reading from a HDF5 Store. .. ipython:: python - pd.read_hdf('foo.h5','df') + pd.read_hdf('foo.h5', 'df') .. ipython:: python :suppress: @@ -796,7 +796,7 @@ If you are attempting to perform an operation you might see an exception like: .. code-block:: python >>> if pd.Series([False, True, False]): - print("I was true") + ... print("I was true") Traceback ... ValueError: The truth value of an array is ambiguous. Use a.empty, a.any() or a.all(). diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index 608e2c8e72ded..24c117a534209 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -318,13 +318,13 @@ As usual, **both sides** of the slicers are included as this is label indexing. .. code-block:: python - df.loc[(slice('A1','A3'),.....), :] + df.loc[(slice('A1', 'A3'), ...), :] # noqa: E999   You should **not** do this:   .. code-block:: python - df.loc[(slice('A1','A3'),.....)] + df.loc[(slice('A1', 'A3'), ...)] # noqa: E999 .. ipython:: python @@ -532,7 +532,7 @@ used to move the values from the ``MultiIndex`` to a column. .. ipython:: python df.rename_axis(index=['abc', 'def']) - + Note that the columns of a ``DataFrame`` are an index, so that using ``rename_axis`` with the ``columns`` argument will change the name of that index. @@ -779,7 +779,7 @@ values **not** in the categories, similarly to how you can reindex **any** panda Reshaping and Comparison operations on a ``CategoricalIndex`` must have the same categories or a ``TypeError`` will be raised. - .. code-block:: python + .. code-block:: ipython In [9]: df3 = pd.DataFrame({'A' : np.arange(6), 'B' : pd.Series(list('aabbca')).astype('category')}) @@ -1071,7 +1071,7 @@ On the other hand, if the index is not monotonic, then both slice bounds must be # OK because 2 and 4 are in the index df.loc[2:4, :] -.. code-block:: python +.. code-block:: ipython # 0 is not in the index In [9]: df.loc[0:4, :] diff --git a/doc/source/basics.rst b/doc/source/basics.rst index 81efbfd6d1403..d19fcedf4e766 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -306,8 +306,8 @@ To evaluate single-element pandas objects in a boolean context, use the method .. code-block:: python - >>> if df: - ... + >>> if df: # noqa: E999 + ... Or @@ -317,7 +317,7 @@ To evaluate single-element pandas objects in a boolean context, use the method These will both raise errors, as you are trying to compare multiple values. - .. code-block:: python + .. code-block:: python-traceback ValueError: The truth value of an array is ambiguous. Use a.empty, a.any() or a.all(). @@ -732,9 +732,8 @@ with the equivalent .. code-block:: python >>> (df.pipe(h) - .pipe(g, arg1=1) - .pipe(f, arg2=2, arg3=3) - ) + ... .pipe(g, arg1=1) + ... .pipe(f, arg2=2, arg3=3)) Pandas encourages the second style, which is known as method chaining. ``pipe`` makes it easy to use your own or another library's functions diff --git a/doc/source/comparison_with_sas.rst b/doc/source/comparison_with_sas.rst index 4d7acdf9ab16c..318bffe44a81b 100644 --- a/doc/source/comparison_with_sas.rst +++ b/doc/source/comparison_with_sas.rst @@ -744,7 +744,7 @@ XPORT is a relatively limited format and the parsing of it is not as optimized as some of the other pandas readers. An alternative way to interop data between SAS and pandas is to serialize to csv. -.. code-block:: python +.. code-block:: ipython # version 0.17, 10M rows diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 3ec505998fde0..084f710091a1b 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -792,7 +792,7 @@ Transitioning to ``pytest`` .. code-block:: python class TestReallyCoolFeature(object): - .... + pass Going forward, we are moving to a more *functional* style using the `pytest `__ framework, which offers a richer testing framework that will facilitate testing and developing. Thus, instead of writing test classes, we will write test functions like this: @@ -800,7 +800,7 @@ framework that will facilitate testing and developing. Thus, instead of writing .. code-block:: python def test_really_cool_feature(): - .... + pass Using ``pytest`` ~~~~~~~~~~~~~~~~ @@ -825,25 +825,30 @@ We would name this file ``test_cool_feature.py`` and put in an appropriate place import pandas as pd from pandas.util import testing as tm + @pytest.mark.parametrize('dtype', ['int8', 'int16', 'int32', 'int64']) def test_dtypes(dtype): assert str(np.dtype(dtype)) == dtype - @pytest.mark.parametrize('dtype', ['float32', - pytest.param('int16', marks=pytest.mark.skip), - pytest.param('int32', - marks=pytest.mark.xfail(reason='to show how it works'))]) + + @pytest.mark.parametrize( + 'dtype', ['float32', pytest.param('int16', marks=pytest.mark.skip), + pytest.param('int32', marks=pytest.mark.xfail( + reason='to show how it works'))]) def test_mark(dtype): assert str(np.dtype(dtype)) == 'float32' + @pytest.fixture def series(): return pd.Series([1, 2, 3]) + @pytest.fixture(params=['int8', 'int16', 'int32', 'int64']) def dtype(request): return request.param + def test_series(series, dtype): result = series.astype(dtype) assert result.dtype == dtype @@ -912,6 +917,7 @@ for details `_. st.lists(any_json_value), st.dictionaries(st.text(), any_json_value) )) + @given(value=any_json_value) def test_json_roundtrip(value): result = json.loads(json.dumps(value)) diff --git a/doc/source/contributing_docstring.rst b/doc/source/contributing_docstring.rst index 38e4baa66ef67..2f8ffc2e07c71 100644 --- a/doc/source/contributing_docstring.rst +++ b/doc/source/contributing_docstring.rst @@ -197,6 +197,8 @@ infinitive verb. """ pass +.. code-block:: python + def astype(dtype): """ Method to cast Series type. @@ -205,6 +207,8 @@ infinitive verb. """ pass +.. code-block:: python + def astype(dtype): """ Cast Series type @@ -213,6 +217,8 @@ infinitive verb. """ pass +.. code-block:: python + def astype(dtype): """ Cast Series type from its current type to the new type defined in @@ -624,6 +630,7 @@ A simple example could be: .. code-block:: python class Series: + def head(self, n=5): """ Return the first elements of the Series. @@ -681,12 +688,11 @@ shown: .. code-block:: python - import numpy as np - import pandas as pd - + import numpy as np # noqa: F401 + import pandas as pd # noqa: F401 Any other module used in the examples must be explicitly imported, one per line (as -recommended in `PEP-8 `_) +recommended in :pep:`8#imports`) and avoiding aliases. Avoid excessive imports, but if needed, imports from the standard library go first, followed by third-party libraries (like matplotlib). @@ -720,6 +726,7 @@ positional arguments ``head(3)``. .. code-block:: python class Series: + def mean(self): """ Compute the mean of the input. @@ -946,12 +953,14 @@ substitute the children's class names in this docstring. """Apply my function to %(klass)s.""" ... + class ChildA(Parent): @Substitution(klass="ChildA") @Appender(Parent.my_function.__doc__) def my_function(self): ... + class ChildB(Parent): @Substitution(klass="ChildB") @Appender(Parent.my_function.__doc__) diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst index 3d26a9c7d3d54..53468e755a722 100644 --- a/doc/source/cookbook.rst +++ b/doc/source/cookbook.rst @@ -968,7 +968,7 @@ Parsing date components in multi-columns Parsing date components in multi-columns is faster with a format -.. code-block:: python +.. code-block:: ipython In [30]: i = pd.date_range('20000101',periods=10000) @@ -1266,6 +1266,7 @@ The `method` argument within `DataFrame.corr` can accept a callable in addition ... ... return cov_ab / std_a / std_b ... + ... >>> df = pd.DataFrame(np.random.normal(size=(100, 3))) ... >>> df.corr(method=distcorr) diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst index d02912294060c..b55f93566c03d 100644 --- a/doc/source/dsintro.rst +++ b/doc/source/dsintro.rst @@ -566,13 +566,12 @@ To write code compatible with all versions of Python, split the assignment in tw .. code-block:: python >>> dependent = pd.DataFrame({"A": [1, 1, 1]}) - >>> dependent.assign(A=lambda x: x["A"] + 1, - B=lambda x: x["A"] + 2) + >>> dependent.assign(A=lambda x: x["A"] + 1, B=lambda x: x["A"] + 2) For Python 3.5 and earlier the expression creating ``B`` refers to the "old" value of ``A``, ``[1, 1, 1]``. The output is then - .. code-block:: python + .. code-block:: console A B 0 2 3 @@ -582,7 +581,7 @@ To write code compatible with all versions of Python, split the assignment in tw For Python 3.6 and later, the expression creating ``A`` refers to the "new" value of ``A``, ``[2, 2, 2]``, which results in - .. code-block:: python + .. code-block:: console A B 0 2 4 diff --git a/doc/source/enhancingperf.rst b/doc/source/enhancingperf.rst index 8f8a9fe3e50e0..2ca8a2b7ac0f8 100644 --- a/doc/source/enhancingperf.rst +++ b/doc/source/enhancingperf.rst @@ -298,7 +298,7 @@ advanced Cython techniques: Even faster, with the caveat that a bug in our Cython code (an off-by-one error, for example) might cause a segfault because memory access isn't checked. -For more about ``boundscheck`` and ``wraparound``, see the Cython docs on +For more about ``boundscheck`` and ``wraparound``, see the Cython docs on `compiler directives `__. .. _enhancingperf.numba: @@ -323,39 +323,45 @@ Numba works by generating optimized machine code using the LLVM compiler infrast Jit ~~~ -We demonstrate how to use Numba to just-in-time compile our code. We simply +We demonstrate how to use Numba to just-in-time compile our code. We simply take the plain Python code from above and annotate with the ``@jit`` decorator. .. code-block:: python import numba + @numba.jit def f_plain(x): - return x * (x - 1) + return x * (x - 1) + @numba.jit def integrate_f_numba(a, b, N): - s = 0 - dx = (b - a) / N - for i in range(N): - s += f_plain(a + i * dx) - return s * dx + s = 0 + dx = (b - a) / N + for i in range(N): + s += f_plain(a + i * dx) + return s * dx + @numba.jit def apply_integrate_f_numba(col_a, col_b, col_N): - n = len(col_N) - result = np.empty(n, dtype='float64') - assert len(col_a) == len(col_b) == n - for i in range(n): - result[i] = integrate_f_numba(col_a[i], col_b[i], col_N[i]) - return result + n = len(col_N) + result = np.empty(n, dtype='float64') + assert len(col_a) == len(col_b) == n + for i in range(n): + result[i] = integrate_f_numba(col_a[i], col_b[i], col_N[i]) + return result + def compute_numba(df): - result = apply_integrate_f_numba(df['a'].values, df['b'].values, df['N'].values) - return pd.Series(result, index=df.index, name='result') + result = apply_integrate_f_numba(df['a'].values, df['b'].values, + df['N'].values) + return pd.Series(result, index=df.index, name='result') -Note that we directly pass NumPy arrays to the Numba function. ``compute_numba`` is just a wrapper that provides a nicer interface by passing/returning pandas objects. +Note that we directly pass NumPy arrays to the Numba function. ``compute_numba`` is just a wrapper that provides a +nicer interface by passing/returning pandas objects. .. code-block:: ipython @@ -375,13 +381,16 @@ Consider the following toy example of doubling each observation: import numba + def double_every_value_nonumba(x): - return x*2 + return x * 2 + @numba.vectorize def double_every_value_withnumba(x): - return x*2 + return x * 2 +.. code-block:: ipython # Custom function without numba In [5]: %timeit df['col1_doubled'] = df.a.apply(double_every_value_nonumba) @@ -402,18 +411,18 @@ Caveats Numba will execute on any function, but can only accelerate certain classes of functions. -Numba is best at accelerating functions that apply numerical functions to NumPy -arrays. When passed a function that only uses operations it knows how to +Numba is best at accelerating functions that apply numerical functions to NumPy +arrays. When passed a function that only uses operations it knows how to accelerate, it will execute in ``nopython`` mode. -If Numba is passed a function that includes something it doesn't know how to -work with -- a category that currently includes sets, lists, dictionaries, or -string functions -- it will revert to ``object mode``. In ``object mode``, -Numba will execute but your code will not speed up significantly. If you would -prefer that Numba throw an error if it cannot compile a function in a way that -speeds up your code, pass Numba the argument -``nopython=True`` (e.g. ``@numba.jit(nopython=True)``). For more on -troubleshooting Numba modes, see the `Numba troubleshooting page +If Numba is passed a function that includes something it doesn't know how to +work with -- a category that currently includes sets, lists, dictionaries, or +string functions -- it will revert to ``object mode``. In ``object mode``, +Numba will execute but your code will not speed up significantly. If you would +prefer that Numba throw an error if it cannot compile a function in a way that +speeds up your code, pass Numba the argument +``nopython=True`` (e.g. ``@numba.jit(nopython=True)``). For more on +troubleshooting Numba modes, see the `Numba troubleshooting page `__. Read more in the `Numba docs `__. diff --git a/doc/source/extending.rst b/doc/source/extending.rst index 1e8a8e50dd9e3..6c47d0ae8bd84 100644 --- a/doc/source/extending.rst +++ b/doc/source/extending.rst @@ -163,6 +163,7 @@ your ``MyExtensionArray`` class, as follows: class MyExtensionArray(ExtensionArray, ExtensionScalarOpsMixin): pass + MyExtensionArray._add_arithmetic_ops() MyExtensionArray._add_comparison_ops() @@ -205,6 +206,7 @@ To use a test, subclass it: from pandas.tests.extension import base + class TestConstructors(base.BaseConstructorsTests): pass @@ -277,6 +279,7 @@ Below example shows how to define ``SubclassedSeries`` and ``SubclassedDataFrame def _constructor_expanddim(self): return SubclassedDataFrame + class SubclassedDataFrame(DataFrame): @property @@ -297,7 +300,7 @@ Below example shows how to define ``SubclassedSeries`` and ``SubclassedDataFrame >>> type(to_framed) - >>> df = SubclassedDataFrame({'A', [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}) + >>> df = SubclassedDataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}) >>> df A B C 0 1 4 7 @@ -313,6 +316,7 @@ Below example shows how to define ``SubclassedSeries`` and ``SubclassedDataFrame 0 1 4 1 2 5 2 3 6 + >>> type(sliced1) @@ -322,6 +326,7 @@ Below example shows how to define ``SubclassedSeries`` and ``SubclassedDataFrame 1 2 2 3 Name: A, dtype: int64 + >>> type(sliced2) diff --git a/doc/source/gotchas.rst b/doc/source/gotchas.rst index 79e312ca12833..0eb2a4eed8581 100644 --- a/doc/source/gotchas.rst +++ b/doc/source/gotchas.rst @@ -98,7 +98,7 @@ of the following code should be: .. code-block:: python - >>> if pd.Series([False, True, False]): + >>> if pd.Series([False, True, False]): # noqa: E999 ... Should it be ``True`` because it's not zero-length, or ``False`` because there @@ -107,7 +107,7 @@ are ``False`` values? It is unclear, so instead, pandas raises a ``ValueError``: .. code-block:: python >>> if pd.Series([False, True, False]): - print("I was true") + ... print("I was true") Traceback ... ValueError: The truth value of an array is ambiguous. Use a.empty, a.any() or a.all(). @@ -119,8 +119,8 @@ Alternatively, you might want to compare if the pandas object is ``None``: .. code-block:: python >>> if pd.Series([False, True, False]) is not None: - print("I was not None") - >>> I was not None + ... print("I was not None") + I was not None Below is how to check if any of the values are ``True``: @@ -128,8 +128,8 @@ Below is how to check if any of the values are ``True``: .. code-block:: python >>> if pd.Series([False, True, False]).any(): - print("I am any") - >>> I am any + ... print("I am any") + I am any To evaluate single-element pandas objects in a boolean context, use the method :meth:`~DataFrame.bool`: @@ -316,7 +316,7 @@ Occasionally you may have to deal with data that were created on a machine with a different byte order than the one on which you are running Python. A common symptom of this issue is an error like: -.. code-block:: python +.. code-block:: python-traceback Traceback ... diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index 755edba352f05..fb96afaf7d796 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -79,7 +79,7 @@ pandas objects can be split on any of their axes. The abstract definition of grouping is to provide a mapping of labels to group names. To create a GroupBy object (more on what the GroupBy object is later), you may do the following: -.. code-block:: ipython +.. code-block:: python # default is axis=0 >>> grouped = obj.groupby(key) @@ -1310,7 +1310,7 @@ arbitrary function, for example: .. code-block:: python - (df.groupby(['Store', 'Product']).pipe(report_func) + df.groupby(['Store', 'Product']).pipe(report_func) where ``report_func`` takes a GroupBy object and creates a report from that. diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index 1c63acce6e3fa..5740ab5fa6921 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -537,10 +537,10 @@ A list of indexers where any element is out of bounds will raise an .. code-block:: python - dfl.iloc[[4, 5, 6]] + >>> dfl.iloc[[4, 5, 6]] IndexError: positional indexers are out-of-bounds - dfl.iloc[:, 4] + >>> dfl.iloc[:, 4] IndexError: single positional indexer is out-of-bounds .. _indexing.callable: @@ -1794,7 +1794,7 @@ interpreter executes this code: .. code-block:: python - dfmi.loc[:,('one','second')] = value + dfmi.loc[:, ('one', 'second')] = value # becomes dfmi.loc.__setitem__((slice(None), ('one', 'second')), value) @@ -1827,10 +1827,10 @@ that you've done this: .. code-block:: python def do_something(df): - foo = df[['bar', 'baz']] # Is foo a view? A copy? Nobody knows! - # ... many lines here ... - foo['quux'] = value # We don't know whether this will modify df or not! - return foo + foo = df[['bar', 'baz']] # Is foo a view? A copy? Nobody knows! + # ... many lines here ... + foo['quux'] = value # We don't know whether this will modify df or not! + return foo Yikes! diff --git a/doc/source/io.rst b/doc/source/io.rst index 9f458b58717d6..0acb0dfbee2d7 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -1834,8 +1834,7 @@ For example: .. code-block:: python - DataFrame([1.0, 2.0, complex(1.0, 2.0)]).to_json() # raises - + >>> DataFrame([1.0, 2.0, complex(1.0, 2.0)]).to_json() # raises RuntimeError: Unhandled numpy dtype 15 can be dealt with by specifying a simple ``default_handler``: @@ -2411,8 +2410,8 @@ columns to strings. .. code-block:: python url_mcc = 'https://en.wikipedia.org/wiki/Mobile_country_code' - dfs = pd.read_html(url_mcc, match='Telekom Albania', header=0, converters={'MNC': - str}) + dfs = pd.read_html(url_mcc, match='Telekom Albania', header=0, + converters={'MNC': str}) .. versionadded:: 0.19 @@ -2724,7 +2723,8 @@ different parameters: data = {} # For when Sheet1's format differs from Sheet2 with pd.ExcelFile('path_to_file.xls') as xls: - data['Sheet1'] = pd.read_excel(xls, 'Sheet1', index_col=None, na_values=['NA']) + data['Sheet1'] = pd.read_excel(xls, 'Sheet1', index_col=None, + na_values=['NA']) data['Sheet2'] = pd.read_excel(xls, 'Sheet2', index_col=1) Note that if the same parsing parameters are used for all sheets, a list @@ -2735,11 +2735,14 @@ of sheet names can simply be passed to ``read_excel`` with no loss in performanc # using the ExcelFile class data = {} with pd.ExcelFile('path_to_file.xls') as xls: - data['Sheet1'] = read_excel(xls, 'Sheet1', index_col=None, na_values=['NA']) - data['Sheet2'] = read_excel(xls, 'Sheet2', index_col=None, na_values=['NA']) + data['Sheet1'] = read_excel(xls, 'Sheet1', index_col=None, + na_values=['NA']) + data['Sheet2'] = read_excel(xls, 'Sheet2', index_col=None, + na_values=['NA']) # equivalent using the read_excel function - data = read_excel('path_to_file.xls', ['Sheet1', 'Sheet2'], index_col=None, na_values=['NA']) + data = read_excel('path_to_file.xls', ['Sheet1', 'Sheet2'], + index_col=None, na_values=['NA']) .. _io.excel.specifying_sheets: @@ -2899,7 +2902,10 @@ missing data to recover integer dtype: .. code-block:: python - cfun = lambda x: int(x) if x else -1 + def cfun(x): + return int(x) if x else -1 + + read_excel('path_to_file.xls', 'Sheet1', converters={'MyInts': cfun}) dtype Specifications @@ -3040,7 +3046,7 @@ argument to ``to_excel`` and to ``ExcelWriter``. The built-in engines are: writer = ExcelWriter('path_to_file.xlsx', engine='xlsxwriter') # Or via pandas configuration. - from pandas import options + from pandas import options # noqa: E402 options.io.excel.xlsx.writer = 'xlsxwriter' df.to_excel('path_to_file.xlsx', sheet_name='Sheet1') @@ -3067,7 +3073,7 @@ which takes the contents of the clipboard buffer and passes them to the ``read_csv`` method. For instance, you can copy the following text to the clipboard (CTRL-C on many operating systems): -.. code-block:: python +.. code-block:: console A B C x 1 4 p @@ -3476,9 +3482,8 @@ This format is specified by default when using ``put`` or ``to_hdf`` or by ``for .. code-block:: python - pd.DataFrame(randn(10, 2)).to_hdf('test_fixed.h5', 'df') - - pd.read_hdf('test_fixed.h5', 'df', where='index>5') + >>> pd.DataFrame(randn(10, 2)).to_hdf('test_fixed.h5', 'df') + >>> pd.read_hdf('test_fixed.h5', 'df', where='index>5') TypeError: cannot pass a where specification when reading a fixed format. this store must be selected in its entirety @@ -3574,7 +3579,7 @@ will yield a tuple for each group key along with the relative keys of its conten Hierarchical keys cannot be retrieved as dotted (attribute) access as described above for items stored under the root node. - .. code-block:: python + .. code-block:: ipython In [8]: store.foo.bar.bah AttributeError: 'HDFStore' object has no attribute 'foo' @@ -3732,10 +3737,10 @@ The right-hand side of the sub-expression (after a comparison operator) can be: instead of this - .. code-block:: python + .. code-block:: ipython string = "HolyMoly'" - store.select('df', 'index == %s' % string) + store.select('df', 'index == %s' % string) The latter will **not** work and will raise a ``SyntaxError``.Note that there's a single quote followed by a double quote in the ``string`` @@ -3941,7 +3946,7 @@ The default is 50,000 rows returned in a chunk. .. code-block:: python - for df in pd.read_hdf('store.h5','df', chunksize=3): + for df in pd.read_hdf('store.h5', 'df', chunksize=3): print(df) Note, that the chunksize keyword applies to the **source** rows. So if you @@ -4871,7 +4876,8 @@ to pass to :func:`pandas.to_datetime`: .. code-block:: python pd.read_sql_table('data', engine, parse_dates={'Date': '%Y-%m-%d'}) - pd.read_sql_table('data', engine, parse_dates={'Date': {'format': '%Y-%m-%d %H:%M:%S'}}) + pd.read_sql_table('data', engine, + parse_dates={'Date': {'format': '%Y-%m-%d %H:%M:%S'}}) You can check if a table exists using :func:`~pandas.io.sql.has_table` @@ -5374,11 +5380,11 @@ And here's the code: import pandas as pd import sqlite3 from numpy.random import randn - from pandas.io import sql sz = 1000000 df = pd.DataFrame({'A': randn(sz), 'B': [1] * sz}) + def test_sql_write(df): if os.path.exists('test.sql'): os.remove('test.sql') @@ -5386,55 +5392,73 @@ And here's the code: df.to_sql(name='test_table', con=sql_db) sql_db.close() + def test_sql_read(): sql_db = sqlite3.connect('test.sql') pd.read_sql_query("select * from test_table", sql_db) sql_db.close() + def test_hdf_fixed_write(df): df.to_hdf('test_fixed.hdf', 'test', mode='w') + def test_hdf_fixed_read(): pd.read_hdf('test_fixed.hdf', 'test') + def test_hdf_fixed_write_compress(df): df.to_hdf('test_fixed_compress.hdf', 'test', mode='w', complib='blosc') + def test_hdf_fixed_read_compress(): pd.read_hdf('test_fixed_compress.hdf', 'test') + def test_hdf_table_write(df): df.to_hdf('test_table.hdf', 'test', mode='w', format='table') + def test_hdf_table_read(): pd.read_hdf('test_table.hdf', 'test') + def test_hdf_table_write_compress(df): - df.to_hdf('test_table_compress.hdf', 'test', mode='w', complib='blosc', format='table') + df.to_hdf('test_table_compress.hdf', 'test', mode='w', + complib='blosc', format='table') + def test_hdf_table_read_compress(): pd.read_hdf('test_table_compress.hdf', 'test') + def test_csv_write(df): df.to_csv('test.csv', mode='w') + def test_csv_read(): pd.read_csv('test.csv', index_col=0) + def test_feather_write(df): df.to_feather('test.feather') + def test_feather_read(): pd.read_feather('test.feather') + def test_pickle_write(df): df.to_pickle('test.pkl') + def test_pickle_read(): pd.read_pickle('test.pkl') + def test_pickle_write_compress(df): df.to_pickle('test.pkl.compress', compression='xz') + def test_pickle_read_compress(): pd.read_pickle('test.pkl.compress', compression='xz') diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst index e4b5578af15f0..4864637691607 100644 --- a/doc/source/missing_data.rst +++ b/doc/source/missing_data.rst @@ -696,9 +696,8 @@ You can also operate on the DataFrame in place: .. code-block:: python - s = pd.Series([True, False, True]) - s.replace({'a string': 'new value', True: False}) # raises - + >>> s = pd.Series([True, False, True]) + >>> s.replace({'a string': 'new value', True: False}) # raises TypeError: Cannot compare types 'ndarray(dtype=bool)' and 'str' will raise a ``TypeError`` because one of the ``dict`` keys is not of the @@ -728,7 +727,7 @@ rules introduced in the table below. :header: "data type", "Cast to" :widths: 40, 40 - integer, float + integer, float boolean, object float, no cast object, no cast diff --git a/doc/source/reshaping.rst b/doc/source/reshaping.rst index 7d9925d800441..6163b6f2ae89a 100644 --- a/doc/source/reshaping.rst +++ b/doc/source/reshaping.rst @@ -45,13 +45,19 @@ For the curious here is how the above ``DataFrame`` was created: .. code-block:: python - import pandas.util.testing as tm; tm.N = 3 + import pandas.util.testing as tm + + tm.N = 3 + + def unpivot(frame): N, K = frame.shape - data = {'value' : frame.values.ravel('F'), - 'variable' : np.asarray(frame.columns).repeat(N), - 'date' : np.tile(np.asarray(frame.index), K)} + data = {'value': frame.values.ravel('F'), + 'variable': np.asarray(frame.columns).repeat(N), + 'date': np.tile(np.asarray(frame.index), K)} return pd.DataFrame(data, columns=['date', 'variable', 'value']) + + df = unpivot(tm.makeTimeDataFrame()) To select out everything for variable ``A`` we could do: diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index a52c80106f100..42fd356bbe65a 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -898,7 +898,7 @@ custom date increment logic, such as adding business days: .. code-block:: python class BDay(DateOffset): - """DateOffset increments between business days""" + """DateOffset increments between business days""" def apply(self, other): ... @@ -2133,7 +2133,8 @@ To convert from an ``int64`` based YYYYMMDD representation. s def conv(x): - return pd.Period(year = x // 10000, month = x//100 % 100, day = x%100, freq='D') + return pd.Period(year=x // 10000, month=x // 100 % 100, + day=x % 100, freq='D') s.apply(conv) s.apply(conv)[2] diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index bc91372e3ac7d..6694737737562 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -204,7 +204,8 @@ def decorator(accessor): .. code-block:: python - def __init__(self, pandas_object): + def __init__(self, pandas_object): # noqa: E999 + ... For consistency with pandas methods, you should raise an ``AttributeError`` if the data passed to your accessor has an incorrect dtype. diff --git a/pandas/core/missing.py b/pandas/core/missing.py index b2daec327d618..222873cd7f81a 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -760,9 +760,10 @@ def _interp_limit(invalid, fw_limit, bw_limit): .. code-block:: python - for x in np.where(invalid)[0]: - if invalid[max(0, x - fw_limit):x + bw_limit + 1].all(): - yield x + def _interp_limit(invalid, fw_limit, bw_limit): + for x in np.where(invalid)[0]: + if invalid[max(0, x - fw_limit):x + bw_limit + 1].all(): + yield x """ # handle forward first; the backward direction is the same except # 1. operate on the reversed array diff --git a/setup.cfg b/setup.cfg index 17b88d084ebf6..4726a0ddb2fb2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -30,6 +30,12 @@ exclude = versioneer.py, env # exclude asv benchmark environments from linting +[flake8-rst] +ignore = + F821, # undefined name + W391, # blank line at end of file [Seems to be a bug (v0.4.1)] + + [yapf] based_on_style = pep8 split_before_named_assigns = false From 84ef701389cb425bfcaf2208e5c518d7b70cb878 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 9 Nov 2018 22:05:54 +0100 Subject: [PATCH 078/122] CLN: remove values attribute from datetimelike EAs (#23603) --- pandas/core/arrays/datetimelike.py | 9 ++------- pandas/core/arrays/datetimes.py | 2 +- pandas/core/arrays/timedeltas.py | 2 +- pandas/core/indexes/datetimes.py | 2 +- pandas/tests/extension/test_period.py | 4 +--- 5 files changed, 6 insertions(+), 13 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index ed4309395ac1f..3fa4f503d2dd5 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -66,7 +66,7 @@ def cmp_method(self, other): with warnings.catch_warnings(record=True): warnings.filterwarnings("ignore", "elementwise", FutureWarning) with np.errstate(all='ignore'): - result = op(self.values, np.asarray(other)) + result = op(self._data, np.asarray(other)) return result @@ -119,15 +119,10 @@ def _box_values(self, values): def __iter__(self): return (self._box_func(v) for v in self.asi8) - @property - def values(self): - """ return the underlying data as an ndarray """ - return self._data.view(np.ndarray) - @property def asi8(self): # do not cache or you'll create a memory leak - return self.values.view('i8') + return self._data.view('i8') # ------------------------------------------------------------------ # Array-like Methods diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 39a2c7e75027e..405056c628ceb 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -886,7 +886,7 @@ def to_period(self, freq=None): freq = get_period_alias(freq) - return PeriodArray._from_datetime64(self.values, freq, tz=self.tz) + return PeriodArray._from_datetime64(self._data, freq, tz=self.tz) def to_perioddelta(self, freq): """ diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 0fd69abd96cfa..cf3ba263d1f81 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -81,7 +81,7 @@ def wrapper(self, other): raise TypeError(msg.format(cls=type(self).__name__, typ=type(other).__name__)) else: - other = type(self)(other).values + other = type(self)(other)._data result = meth(self, other) result = com.values_from_object(result) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 3a2f9986760d3..56ab9b6c020c0 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -292,7 +292,7 @@ def __new__(cls, data=None, 'set specified tz: {1}') raise TypeError(msg.format(data.tz, tz)) - subarr = data.values + subarr = data._data if freq is None: freq = data.freq diff --git a/pandas/tests/extension/test_period.py b/pandas/tests/extension/test_period.py index 83f30aed88e65..3de3f1dfd9dbc 100644 --- a/pandas/tests/extension/test_period.py +++ b/pandas/tests/extension/test_period.py @@ -75,9 +75,7 @@ def test_combine_add(self, data_repeated): class TestInterface(BasePeriodTests, base.BaseInterfaceTests): - def test_no_values_attribute(self, data): - # We have a values attribute. - pass + pass class TestArithmeticOps(BasePeriodTests, base.BaseArithmeticOpsTests): From e4104e9c5af4c65a997e3da100aada7ecbdbca06 Mon Sep 17 00:00:00 2001 From: Jeremy Schendel Date: Fri, 9 Nov 2018 21:24:09 -0700 Subject: [PATCH 079/122] TST: Use intp as expected dtype in IntervalIndex indexing tests (#23609) --- pandas/tests/indexes/interval/test_interval.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index 258f2dc41fb79..49d093d312cf1 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -412,9 +412,9 @@ def test_get_loc_value(self): assert idx.get_loc(0.5) == 0 assert idx.get_loc(1) == 0 tm.assert_numpy_array_equal(idx.get_loc(1.5), - np.array([0, 1], dtype='int64')) + np.array([0, 1], dtype='intp')) tm.assert_numpy_array_equal(np.sort(idx.get_loc(2)), - np.array([0, 1], dtype='int64')) + np.array([0, 1], dtype='intp')) assert idx.get_loc(3) == 1 pytest.raises(KeyError, idx.get_loc, 3.5) @@ -537,12 +537,12 @@ def test_get_loc_datetimelike_overlapping(self, arrays): value = index[0].mid + Timedelta('12 hours') result = np.sort(index.get_loc(value)) - expected = np.array([0, 1], dtype='int64') + expected = np.array([0, 1], dtype='intp') assert tm.assert_numpy_array_equal(result, expected) interval = Interval(index[0].left, index[1].right) result = np.sort(index.get_loc(interval)) - expected = np.array([0, 1, 2], dtype='int64') + expected = np.array([0, 1, 2], dtype='intp') assert tm.assert_numpy_array_equal(result, expected) # To be removed, replaced by test_interval_new.py (see #16316, #16386) @@ -617,7 +617,7 @@ def test_get_reindexer_datetimelike(self, arrays): target = IntervalIndex.from_tuples(tuples) result = index._get_reindexer(target) - expected = np.array([0, 3], dtype='int64') + expected = np.array([0, 3], dtype='intp') tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize('breaks', [ From eefb76ebfcef33394f8deb4968a07be27b713cbf Mon Sep 17 00:00:00 2001 From: Anjana S Date: Sat, 10 Nov 2018 17:41:53 +0530 Subject: [PATCH 080/122] ENH: Support for partition_cols in to_parquet (#23321) * closes #23283 --- doc/source/io.rst | 37 +++++++++++++++++++++ doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/frame.py | 17 ++++++++-- pandas/io/parquet.py | 53 ++++++++++++++++++++++++------- pandas/tests/io/test_parquet.py | 47 +++++++++++++++++++++++++++ pandas/tests/util/test_testing.py | 7 ++++ pandas/util/testing.py | 20 ++++++++++++ 7 files changed, 167 insertions(+), 15 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 0acb0dfbee2d7..5d29e349e2898 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -4673,6 +4673,43 @@ Passing ``index=True`` will *always* write the index, even if that's not the underlying engine's default behavior. +Partitioning Parquet files +'''''''''''''''''''''''''' + +.. versionadded:: 0.24.0 + +Parquet supports partitioning of data based on the values of one or more columns. + +.. ipython:: python + + df = pd.DataFrame({'a': [0, 0, 1, 1], 'b': [0, 1, 0, 1]}) + df.to_parquet(fname='test', engine='pyarrow', partition_cols=['a'], compression=None) + +The `fname` specifies the parent directory to which data will be saved. +The `partition_cols` are the column names by which the dataset will be partitioned. +Columns are partitioned in the order they are given. The partition splits are +determined by the unique values in the partition columns. +The above example creates a partitioned dataset that may look like: + +.. code-block:: text + + test + ├── a=0 + │ ├── 0bac803e32dc42ae83fddfd029cbdebc.parquet + │ └── ... + └── a=1 + ├── e6ab24a4f45147b49b54a662f0c412a3.parquet + └── ... + +.. ipython:: python + :suppress: + + from shutil import rmtree + try: + rmtree('test') + except Exception: + pass + .. _io.sql: SQL Queries diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index fcbdb391ba83c..3dcaef302d564 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -236,6 +236,7 @@ Other Enhancements - New attribute :attr:`__git_version__` will return git commit sha of current build (:issue:`21295`). - Compatibility with Matplotlib 3.0 (:issue:`22790`). - Added :meth:`Interval.overlaps`, :meth:`IntervalArray.overlaps`, and :meth:`IntervalIndex.overlaps` for determining overlaps between interval-like objects (:issue:`21998`) +- :func:`~DataFrame.to_parquet` now supports writing a ``DataFrame`` as a directory of parquet files partitioned by a subset of the columns when ``engine = 'pyarrow'`` (:issue:`23283`) - :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have gained the ``nonexistent`` argument for alternative handling of nonexistent times. See :ref:`timeseries.timezone_nonexsistent` (:issue:`8917`) .. _whatsnew_0240.api_breaking: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index eff75938b1181..b24f79e89902a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1970,7 +1970,7 @@ def to_feather(self, fname): to_feather(self, fname) def to_parquet(self, fname, engine='auto', compression='snappy', - index=None, **kwargs): + index=None, partition_cols=None, **kwargs): """ Write a DataFrame to the binary parquet format. @@ -1984,7 +1984,11 @@ def to_parquet(self, fname, engine='auto', compression='snappy', Parameters ---------- fname : str - String file path. + File path or Root Directory path. Will be used as Root Directory + path while writing a partitioned dataset. + + .. versionchanged:: 0.24.0 + engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto' Parquet library to use. If 'auto', then the option ``io.parquet.engine`` is used. The default ``io.parquet.engine`` @@ -1999,6 +2003,12 @@ def to_parquet(self, fname, engine='auto', compression='snappy', .. versionadded:: 0.24.0 + partition_cols : list, optional, default None + Column names by which to partition the dataset + Columns are partitioned in the order they are given + + .. versionadded:: 0.24.0 + **kwargs Additional arguments passed to the parquet library. See :ref:`pandas io ` for more details. @@ -2027,7 +2037,8 @@ def to_parquet(self, fname, engine='auto', compression='snappy', """ from pandas.io.parquet import to_parquet to_parquet(self, fname, engine, - compression=compression, index=index, **kwargs) + compression=compression, index=index, + partition_cols=partition_cols, **kwargs) @Substitution(header='Write out the column names. If a list of strings ' 'is given, it is assumed to be aliases for the ' diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 160a26533fb89..3d72b1ec3a47f 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -101,7 +101,8 @@ def __init__(self): self.api = pyarrow def write(self, df, path, compression='snappy', - coerce_timestamps='ms', index=None, **kwargs): + coerce_timestamps='ms', index=None, partition_cols=None, + **kwargs): self.validate_dataframe(df) path, _, _, _ = get_filepath_or_buffer(path, mode='wb') @@ -109,11 +110,16 @@ def write(self, df, path, compression='snappy', from_pandas_kwargs = {} else: from_pandas_kwargs = {'preserve_index': index} - table = self.api.Table.from_pandas(df, **from_pandas_kwargs) - self.api.parquet.write_table( - table, path, compression=compression, - coerce_timestamps=coerce_timestamps, **kwargs) + if partition_cols is not None: + self.api.parquet.write_to_dataset( + table, path, compression=compression, + coerce_timestamps=coerce_timestamps, + partition_cols=partition_cols, **kwargs) + else: + self.api.parquet.write_table( + table, path, compression=compression, + coerce_timestamps=coerce_timestamps, **kwargs) def read(self, path, columns=None, **kwargs): path, _, _, should_close = get_filepath_or_buffer(path) @@ -156,12 +162,23 @@ def __init__(self): ) self.api = fastparquet - def write(self, df, path, compression='snappy', index=None, **kwargs): + def write(self, df, path, compression='snappy', index=None, + partition_cols=None, **kwargs): self.validate_dataframe(df) # thriftpy/protocol/compact.py:339: # DeprecationWarning: tostring() is deprecated. # Use tobytes() instead. + if 'partition_on' in kwargs and partition_cols is not None: + raise ValueError("Cannot use both partition_on and " + "partition_cols. Use partition_cols for " + "partitioning data") + elif 'partition_on' in kwargs: + partition_cols = kwargs.pop('partition_on') + + if partition_cols is not None: + kwargs['file_scheme'] = 'hive' + if is_s3_url(path): # path is s3:// so we need to open the s3file in 'wb' mode. # TODO: Support 'ab' @@ -174,7 +191,8 @@ def write(self, df, path, compression='snappy', index=None, **kwargs): with catch_warnings(record=True): self.api.write(path, df, compression=compression, - write_index=index, **kwargs) + write_index=index, partition_on=partition_cols, + **kwargs) def read(self, path, columns=None, **kwargs): if is_s3_url(path): @@ -194,15 +212,18 @@ def read(self, path, columns=None, **kwargs): def to_parquet(df, path, engine='auto', compression='snappy', index=None, - **kwargs): + partition_cols=None, **kwargs): """ Write a DataFrame to the parquet format. Parameters ---------- - df : DataFrame - path : string - File path + path : str + File path or Root Directory path. Will be used as Root Directory path + while writing a partitioned dataset. + + .. versionchanged:: 0.24.0 + engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto' Parquet library to use. If 'auto', then the option ``io.parquet.engine`` is used. The default ``io.parquet.engine`` @@ -216,11 +237,19 @@ def to_parquet(df, path, engine='auto', compression='snappy', index=None, engine's default behavior will be used. .. versionadded 0.24.0 + + partition_cols : list, optional, default None + Column names by which to partition the dataset + Columns are partitioned in the order they are given + + .. versionadded:: 0.24.0 + kwargs Additional keyword arguments passed to the engine """ impl = get_engine(engine) - return impl.write(df, path, compression=compression, index=index, **kwargs) + return impl.write(df, path, compression=compression, index=index, + partition_cols=partition_cols, **kwargs) def read_parquet(path, engine='auto', columns=None, **kwargs): diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 3b3e7f757bf60..6024fccb15c76 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -1,4 +1,5 @@ """ test parquet compat """ +import os import pytest import datetime @@ -454,6 +455,18 @@ def test_s3_roundtrip(self, df_compat, s3_resource, pa): check_round_trip(df_compat, pa, path='s3://pandas-test/pyarrow.parquet') + def test_partition_cols_supported(self, pa, df_full): + # GH #23283 + partition_cols = ['bool', 'int'] + df = df_full + with tm.ensure_clean_dir() as path: + df.to_parquet(path, partition_cols=partition_cols, + compression=None) + import pyarrow.parquet as pq + dataset = pq.ParquetDataset(path, validate_schema=False) + assert len(dataset.partitions.partition_names) == 2 + assert dataset.partitions.partition_names == set(partition_cols) + class TestParquetFastParquet(Base): @@ -519,3 +532,37 @@ def test_s3_roundtrip(self, df_compat, s3_resource, fp): # GH #19134 check_round_trip(df_compat, fp, path='s3://pandas-test/fastparquet.parquet') + + def test_partition_cols_supported(self, fp, df_full): + # GH #23283 + partition_cols = ['bool', 'int'] + df = df_full + with tm.ensure_clean_dir() as path: + df.to_parquet(path, engine="fastparquet", + partition_cols=partition_cols, compression=None) + assert os.path.exists(path) + import fastparquet + actual_partition_cols = fastparquet.ParquetFile(path, False).cats + assert len(actual_partition_cols) == 2 + + def test_partition_on_supported(self, fp, df_full): + # GH #23283 + partition_cols = ['bool', 'int'] + df = df_full + with tm.ensure_clean_dir() as path: + df.to_parquet(path, engine="fastparquet", compression=None, + partition_on=partition_cols) + assert os.path.exists(path) + import fastparquet + actual_partition_cols = fastparquet.ParquetFile(path, False).cats + assert len(actual_partition_cols) == 2 + + def test_error_on_using_partition_cols_and_partition_on(self, fp, df_full): + # GH #23283 + partition_cols = ['bool', 'int'] + df = df_full + with pytest.raises(ValueError): + with tm.ensure_clean_dir() as path: + df.to_parquet(path, engine="fastparquet", compression=None, + partition_on=partition_cols, + partition_cols=partition_cols) diff --git a/pandas/tests/util/test_testing.py b/pandas/tests/util/test_testing.py index d1dc91f94e3c4..c10ad72d39f8e 100644 --- a/pandas/tests/util/test_testing.py +++ b/pandas/tests/util/test_testing.py @@ -876,3 +876,10 @@ def test_datapath_missing(datapath, request): ) assert result == expected + + +def test_create_temp_directory(): + with tm.ensure_clean_dir() as path: + assert os.path.exists(path) + assert os.path.isdir(path) + assert not os.path.exists(path) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 09c5a68ec28c2..c6457545038e0 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -6,6 +6,7 @@ import locale import os import re +from shutil import rmtree import string import subprocess import sys @@ -761,6 +762,25 @@ def ensure_clean(filename=None, return_filelike=False): print("Exception on removing file: {error}".format(error=e)) +@contextmanager +def ensure_clean_dir(): + """ + Get a temporary directory path and agrees to remove on close. + + Yields + ------ + Temporary directory path + """ + directory_name = tempfile.mkdtemp(suffix='') + try: + yield directory_name + finally: + try: + rmtree(directory_name) + except Exception: + pass + + # ----------------------------------------------------------------------------- # Comparators From af8ca2635c8682aeb323cc37c6c77172a1fbbfe9 Mon Sep 17 00:00:00 2001 From: Jose Rivera-Rubio Date: Sat, 10 Nov 2018 22:47:24 +0100 Subject: [PATCH 081/122] DOC: Updating Series.resample and DataFrame.resample docstrings (#23197) --- ci/code_checks.sh | 2 +- pandas/core/generic.py | 218 ++++++++++++++++++++++++++--------------- 2 files changed, 140 insertions(+), 80 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index c4b483a794c21..eba96f0c6c2fc 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -151,7 +151,7 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then MSG='Doctests generic.py' ; echo $MSG pytest -q --doctest-modules pandas/core/generic.py \ - -k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -resample -to_json -transpose -values -xs" + -k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -to_json -transpose -values -xs" RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Doctests top-level reshaping functions' ; echo $MSG diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 53cdc46fdd16b..cfdc6b34274bf 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7500,46 +7500,67 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None, label=None, convention='start', kind=None, loffset=None, limit=None, base=0, on=None, level=None): """ + Resample time-series data. + Convenience method for frequency conversion and resampling of time - series. Object must have a datetime-like index (DatetimeIndex, - PeriodIndex, or TimedeltaIndex), or pass datetime-like values - to the on or level keyword. + series. Object must have a datetime-like index (`DatetimeIndex`, + `PeriodIndex`, or `TimedeltaIndex`), or pass datetime-like values + to the `on` or `level` keyword. Parameters ---------- - rule : string - the offset string or object representing target conversion - axis : int, optional, default 0 - closed : {'right', 'left'} + rule : str + The offset string or object representing target conversion. + how : str + Method for down/re-sampling, default to 'mean' for downsampling. + + .. deprecated:: 0.18.0 + The new syntax is ``.resample(...).mean()``, or + ``.resample(...).apply()`` + axis : {0 or 'index', 1 or 'columns'}, default 0 + Which axis to use for up- or down-sampling. For `Series` this + will default to 0, i.e. along the rows. Must be + `DatetimeIndex`, `TimedeltaIndex` or `PeriodIndex`. + fill_method : str, default None + Filling method for upsampling. + + .. deprecated:: 0.18.0 + The new syntax is ``.resample(...).()``, + e.g. ``.resample(...).pad()`` + closed : {'right', 'left'}, default None Which side of bin interval is closed. The default is 'left' for all frequency offsets except for 'M', 'A', 'Q', 'BM', 'BA', 'BQ', and 'W' which all have a default of 'right'. - label : {'right', 'left'} + label : {'right', 'left'}, default None Which bin edge label to label bucket with. The default is 'left' for all frequency offsets except for 'M', 'A', 'Q', 'BM', 'BA', 'BQ', and 'W' which all have a default of 'right'. - convention : {'start', 'end', 's', 'e'} - For PeriodIndex only, controls whether to use the start or end of - `rule` - kind: {'timestamp', 'period'}, optional + convention : {'start', 'end', 's', 'e'}, default 'start' + For `PeriodIndex` only, controls whether to use the start or + end of `rule`. + kind : {'timestamp', 'period'}, optional, default None Pass 'timestamp' to convert the resulting index to a - ``DateTimeIndex`` or 'period' to convert it to a ``PeriodIndex``. + `DateTimeIndex` or 'period' to convert it to a `PeriodIndex`. By default the input representation is retained. - loffset : timedelta - Adjust the resampled time labels + loffset : timedelta, default None + Adjust the resampled time labels. + limit : int, default None + Maximum size gap when reindexing with `fill_method`. + + .. deprecated:: 0.18.0 base : int, default 0 For frequencies that evenly subdivide 1 day, the "origin" of the aggregated intervals. For example, for '5min' frequency, base could - range from 0 through 4. Defaults to 0 - on : string, optional + range from 0 through 4. Defaults to 0. + on : str, optional For a DataFrame, column to use instead of index for resampling. Column must be datetime-like. .. versionadded:: 0.19.0 - level : string or int, optional + level : str or int, optional For a MultiIndex, level (name or number) to use for - resampling. Level must be datetime-like. + resampling. `level` must be datetime-like. .. versionadded:: 0.19.0 @@ -7556,6 +7577,12 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None, To learn more about the offset strings, please see `this link `__. + See Also + -------- + groupby : Group by mapping, function, label, or list of labels. + Series.resample : Resample a Series. + DataFrame.resample: Resample a DataFrame. + Examples -------- @@ -7612,7 +7639,7 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None, Upsample the series into 30 second bins. - >>> series.resample('30S').asfreq()[0:5] #select first 5 rows + >>> series.resample('30S').asfreq()[0:5] # Select first 5 rows 2000-01-01 00:00:00 0.0 2000-01-01 00:00:30 NaN 2000-01-01 00:01:00 1.0 @@ -7645,8 +7672,8 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None, Pass a custom function via ``apply`` >>> def custom_resampler(array_like): - ... return np.sum(array_like)+5 - + ... return np.sum(array_like) + 5 + ... >>> series.resample('3T').apply(custom_resampler) 2000-01-01 00:00:00 8 2000-01-01 00:03:00 17 @@ -7656,73 +7683,106 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None, For a Series with a PeriodIndex, the keyword `convention` can be used to control whether to use the start or end of `rule`. + Resample a year by quarter using 'start' `convention`. Values are + assigned to the first quarter of the period. + >>> s = pd.Series([1, 2], index=pd.period_range('2012-01-01', - freq='A', - periods=2)) + ... freq='A', + ... periods=2)) >>> s 2012 1 2013 2 Freq: A-DEC, dtype: int64 - - Resample by month using 'start' `convention`. Values are assigned to - the first month of the period. - - >>> s.resample('M', convention='start').asfreq().head() - 2012-01 1.0 - 2012-02 NaN - 2012-03 NaN - 2012-04 NaN - 2012-05 NaN - Freq: M, dtype: float64 - - Resample by month using 'end' `convention`. Values are assigned to - the last month of the period. - - >>> s.resample('M', convention='end').asfreq() - 2012-12 1.0 - 2013-01 NaN - 2013-02 NaN - 2013-03 NaN - 2013-04 NaN - 2013-05 NaN - 2013-06 NaN - 2013-07 NaN - 2013-08 NaN - 2013-09 NaN - 2013-10 NaN - 2013-11 NaN - 2013-12 2.0 + >>> s.resample('Q', convention='start').asfreq() + 2012Q1 1.0 + 2012Q2 NaN + 2012Q3 NaN + 2012Q4 NaN + 2013Q1 2.0 + 2013Q2 NaN + 2013Q3 NaN + 2013Q4 NaN + Freq: Q-DEC, dtype: float64 + + Resample quarters by month using 'end' `convention`. Values are + assigned to the last month of the period. + + >>> q = pd.Series([1, 2, 3, 4], index=pd.period_range('2018-01-01', + ... freq='Q', + ... periods=4)) + >>> q + 2018Q1 1 + 2018Q2 2 + 2018Q3 3 + 2018Q4 4 + Freq: Q-DEC, dtype: int64 + >>> q.resample('M', convention='end').asfreq() + 2018-03 1.0 + 2018-04 NaN + 2018-05 NaN + 2018-06 2.0 + 2018-07 NaN + 2018-08 NaN + 2018-09 3.0 + 2018-10 NaN + 2018-11 NaN + 2018-12 4.0 Freq: M, dtype: float64 - For DataFrame objects, the keyword ``on`` can be used to specify the + For DataFrame objects, the keyword `on` can be used to specify the column instead of the index for resampling. - >>> df = pd.DataFrame(data=9*[range(4)], columns=['a', 'b', 'c', 'd']) - >>> df['time'] = pd.date_range('1/1/2000', periods=9, freq='T') - >>> df.resample('3T', on='time').sum() - a b c d - time - 2000-01-01 00:00:00 0 3 6 9 - 2000-01-01 00:03:00 0 3 6 9 - 2000-01-01 00:06:00 0 3 6 9 - - For a DataFrame with MultiIndex, the keyword ``level`` can be used to - specify on level the resampling needs to take place. - - >>> time = pd.date_range('1/1/2000', periods=5, freq='T') - >>> df2 = pd.DataFrame(data=10*[range(4)], - columns=['a', 'b', 'c', 'd'], - index=pd.MultiIndex.from_product([time, [1, 2]]) - ) - >>> df2.resample('3T', level=0).sum() - a b c d - 2000-01-01 00:00:00 0 6 12 18 - 2000-01-01 00:03:00 0 4 8 12 - - See also - -------- - groupby : Group by mapping, function, label, or list of labels. + >>> d = dict({'price': [10, 11, 9, 13, 14, 18, 17, 19], + ... 'volume': [50, 60, 40, 100, 50, 100, 40, 50]}) + >>> df = pd.DataFrame(d) + >>> df['week_starting'] = pd.date_range('01/01/2018', + ... periods=8, + ... freq='W') + >>> df + price volume week_starting + 0 10 50 2018-01-07 + 1 11 60 2018-01-14 + 2 9 40 2018-01-21 + 3 13 100 2018-01-28 + 4 14 50 2018-02-04 + 5 18 100 2018-02-11 + 6 17 40 2018-02-18 + 7 19 50 2018-02-25 + >>> df.resample('M', on='week_starting').mean() + price volume + week_starting + 2018-01-31 10.75 62.5 + 2018-02-28 17.00 60.0 + + For a DataFrame with MultiIndex, the keyword `level` can be used to + specify on which level the resampling needs to take place. + + >>> days = pd.date_range('1/1/2000', periods=4, freq='D') + >>> d2 = dict({'price': [10, 11, 9, 13, 14, 18, 17, 19], + ... 'volume': [50, 60, 40, 100, 50, 100, 40, 50]}) + >>> df2 = pd.DataFrame(d2, + ... index=pd.MultiIndex.from_product([days, + ... ['morning', + ... 'afternoon']] + ... )) + >>> df2 + price volume + 2000-01-01 morning 10 50 + afternoon 11 60 + 2000-01-02 morning 9 40 + afternoon 13 100 + 2000-01-03 morning 14 50 + afternoon 18 100 + 2000-01-04 morning 17 40 + afternoon 19 50 + >>> df2.resample('D', level=0).sum() + price volume + 2000-01-01 21 110 + 2000-01-02 22 140 + 2000-01-03 32 150 + 2000-01-04 36 90 """ + from pandas.core.resample import (resample, _maybe_process_deprecations) axis = self._get_axis_number(axis) From d8d62229c3a479ae19ed6820389e740f4be88045 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sat, 10 Nov 2018 14:13:34 -0800 Subject: [PATCH 082/122] MAINT: tm.assert_raises_regex --> pytest.raises (#23592) * MAINT: tm.assert_raises_regex --> pytest.raises pytest.raises has all of the functionality that we need from tm.assert_raises_regex. Closes gh-16521. * Don't remove, just deprecate assert_raises_regex * CLN: Test cleanups and follow-ups --- pandas/tests/arithmetic/test_datetime64.py | 18 +- pandas/tests/arithmetic/test_period.py | 54 +++--- pandas/tests/arithmetic/test_timedelta64.py | 24 +-- pandas/tests/arrays/categorical/test_algos.py | 2 +- .../arrays/categorical/test_analytics.py | 3 +- pandas/tests/arrays/categorical/test_api.py | 4 +- .../arrays/categorical/test_constructors.py | 12 +- .../tests/arrays/categorical/test_dtypes.py | 2 +- .../tests/arrays/categorical/test_indexing.py | 2 +- .../tests/arrays/categorical/test_missing.py | 2 +- .../arrays/categorical/test_operators.py | 8 +- .../tests/arrays/categorical/test_sorting.py | 9 +- pandas/tests/arrays/interval/test_interval.py | 2 +- pandas/tests/arrays/interval/test_ops.py | 2 +- pandas/tests/arrays/sparse/test_array.py | 87 +++++---- pandas/tests/arrays/sparse/test_dtype.py | 3 +- pandas/tests/arrays/sparse/test_libsparse.py | 14 +- pandas/tests/arrays/test_integer.py | 14 +- pandas/tests/arrays/test_period.py | 26 +-- pandas/tests/computation/test_eval.py | 54 +++--- pandas/tests/dtypes/test_common.py | 3 +- pandas/tests/dtypes/test_dtypes.py | 18 +- pandas/tests/dtypes/test_inference.py | 2 +- pandas/tests/extension/base/constructors.py | 3 +- pandas/tests/extension/base/getitem.py | 5 +- pandas/tests/extension/base/methods.py | 4 +- pandas/tests/extension/base/setitem.py | 7 +- .../tests/extension/decimal/test_decimal.py | 2 +- pandas/tests/extension/json/test_json.py | 2 +- pandas/tests/extension/test_categorical.py | 3 +- pandas/tests/extension/test_interval.py | 3 +- pandas/tests/extension/test_period.py | 3 +- pandas/tests/frame/test_alter_axes.py | 64 +++---- pandas/tests/frame/test_analytics.py | 23 +-- pandas/tests/frame/test_api.py | 19 +- pandas/tests/frame/test_arithmetic.py | 12 +- .../tests/frame/test_axis_select_reindex.py | 38 ++-- pandas/tests/frame/test_block_internals.py | 2 +- pandas/tests/frame/test_combine_concat.py | 14 +- pandas/tests/frame/test_constructors.py | 80 ++++---- pandas/tests/frame/test_dtypes.py | 22 +-- pandas/tests/frame/test_indexing.py | 43 +++-- pandas/tests/frame/test_join.py | 11 +- pandas/tests/frame/test_missing.py | 18 +- pandas/tests/frame/test_mutate_columns.py | 6 +- pandas/tests/frame/test_nonunique_indexes.py | 7 +- pandas/tests/frame/test_operators.py | 34 ++-- pandas/tests/frame/test_period.py | 5 +- pandas/tests/frame/test_quantile.py | 2 +- pandas/tests/frame/test_query_eval.py | 22 ++- pandas/tests/frame/test_rank.py | 4 +- pandas/tests/frame/test_replace.py | 15 +- pandas/tests/frame/test_reshape.py | 4 +- pandas/tests/frame/test_sorting.py | 20 +- pandas/tests/frame/test_subclass.py | 2 +- pandas/tests/frame/test_timeseries.py | 31 ++- pandas/tests/frame/test_to_csv.py | 16 +- pandas/tests/frame/test_validate.py | 3 +- pandas/tests/generic/test_generic.py | 61 +++--- .../generic/test_label_or_level_utils.py | 28 +-- .../tests/groupby/aggregate/test_aggregate.py | 6 +- pandas/tests/groupby/aggregate/test_cython.py | 6 +- pandas/tests/groupby/aggregate/test_other.py | 4 +- pandas/tests/groupby/test_filters.py | 6 +- pandas/tests/groupby/test_function.py | 14 +- pandas/tests/groupby/test_groupby.py | 4 +- pandas/tests/groupby/test_grouping.py | 16 +- pandas/tests/groupby/test_rank.py | 4 +- pandas/tests/groupby/test_transform.py | 10 +- pandas/tests/groupby/test_whitelist.py | 2 +- pandas/tests/indexes/common.py | 164 ++++++++-------- pandas/tests/indexes/datetimes/test_astype.py | 2 +- .../indexes/datetimes/test_construction.py | 8 +- .../indexes/datetimes/test_date_range.py | 42 ++--- .../tests/indexes/datetimes/test_datetime.py | 9 +- .../tests/indexes/datetimes/test_indexing.py | 32 ++-- pandas/tests/indexes/datetimes/test_ops.py | 24 ++- .../indexes/datetimes/test_partial_slicing.py | 28 +-- .../indexes/datetimes/test_scalar_compat.py | 10 +- pandas/tests/indexes/datetimes/test_tools.py | 12 +- pandas/tests/indexes/interval/test_astype.py | 16 +- .../indexes/interval/test_construction.py | 30 +-- .../tests/indexes/interval/test_interval.py | 20 +- .../indexes/interval/test_interval_new.py | 2 +- .../indexes/interval/test_interval_range.py | 42 ++--- pandas/tests/indexes/multi/test_analytics.py | 24 +-- pandas/tests/indexes/multi/test_astype.py | 7 +- pandas/tests/indexes/multi/test_compat.py | 49 ++--- .../tests/indexes/multi/test_constructor.py | 73 ++++---- pandas/tests/indexes/multi/test_conversion.py | 6 +- .../tests/indexes/multi/test_equivalence.py | 18 +- pandas/tests/indexes/multi/test_get_set.py | 38 ++-- pandas/tests/indexes/multi/test_indexing.py | 26 ++- pandas/tests/indexes/multi/test_integrity.py | 36 ++-- pandas/tests/indexes/multi/test_join.py | 4 +- pandas/tests/indexes/multi/test_missing.py | 8 +- pandas/tests/indexes/multi/test_names.py | 31 ++- pandas/tests/indexes/multi/test_reindex.py | 19 +- pandas/tests/indexes/multi/test_reshape.py | 2 +- pandas/tests/indexes/multi/test_set_ops.py | 42 ++--- pandas/tests/indexes/multi/test_sorting.py | 12 +- pandas/tests/indexes/period/test_astype.py | 2 +- .../tests/indexes/period/test_construction.py | 30 +-- pandas/tests/indexes/period/test_indexing.py | 33 ++-- pandas/tests/indexes/period/test_ops.py | 14 +- .../indexes/period/test_partial_slicing.py | 12 +- pandas/tests/indexes/period/test_period.py | 10 +- .../tests/indexes/period/test_period_range.py | 16 +- pandas/tests/indexes/period/test_setops.py | 2 +- pandas/tests/indexes/period/test_tools.py | 8 +- pandas/tests/indexes/test_base.py | 77 ++++---- pandas/tests/indexes/test_category.py | 30 +-- pandas/tests/indexes/test_numeric.py | 20 +- pandas/tests/indexes/test_range.py | 14 +- .../tests/indexes/timedeltas/test_astype.py | 2 +- .../indexes/timedeltas/test_construction.py | 2 +- .../tests/indexes/timedeltas/test_indexing.py | 23 ++- pandas/tests/indexes/timedeltas/test_ops.py | 20 +- .../timedeltas/test_partial_slicing.py | 13 +- .../indexes/timedeltas/test_scalar_compat.py | 9 +- .../indexes/timedeltas/test_timedelta.py | 4 +- .../timedeltas/test_timedelta_range.py | 14 +- pandas/tests/indexes/timedeltas/test_tools.py | 4 +- pandas/tests/indexing/test_categorical.py | 21 +-- pandas/tests/indexing/test_coercion.py | 14 +- pandas/tests/indexing/test_floats.py | 6 +- pandas/tests/indexing/test_iloc.py | 14 +- pandas/tests/indexing/test_indexing.py | 19 +- pandas/tests/indexing/test_multiindex.py | 12 +- pandas/tests/indexing/test_scalar.py | 4 +- pandas/tests/internals/test_internals.py | 8 +- pandas/tests/io/formats/test_style.py | 2 +- pandas/tests/io/formats/test_to_csv.py | 6 +- pandas/tests/io/formats/test_to_html.py | 2 +- pandas/tests/io/json/test_compression.py | 10 +- .../tests/io/json/test_json_table_schema.py | 8 +- pandas/tests/io/json/test_pandas.py | 15 +- pandas/tests/io/json/test_readlines.py | 4 +- pandas/tests/io/json/test_ujson.py | 2 +- pandas/tests/io/msgpack/test_except.py | 5 +- pandas/tests/io/msgpack/test_limits.py | 11 +- pandas/tests/io/msgpack/test_sequnpack.py | 3 +- pandas/tests/io/parser/c_parser_only.py | 16 +- pandas/tests/io/parser/common.py | 50 ++--- pandas/tests/io/parser/compression.py | 17 +- pandas/tests/io/parser/converters.py | 2 +- pandas/tests/io/parser/dialect.py | 4 +- pandas/tests/io/parser/header.py | 6 +- pandas/tests/io/parser/parse_dates.py | 22 +-- pandas/tests/io/parser/python_parser_only.py | 16 +- pandas/tests/io/parser/quoting.py | 38 ++-- pandas/tests/io/parser/skiprows.py | 5 +- pandas/tests/io/parser/test_read_fwf.py | 15 +- pandas/tests/io/parser/test_unsupported.py | 24 +-- pandas/tests/io/parser/usecols.py | 39 ++-- pandas/tests/io/sas/test_sas.py | 6 +- pandas/tests/io/test_common.py | 9 +- pandas/tests/io/test_excel.py | 12 +- pandas/tests/io/test_html.py | 28 ++- pandas/tests/io/test_pickle.py | 5 +- pandas/tests/io/test_pytables.py | 25 +-- pandas/tests/io/test_sql.py | 2 +- pandas/tests/plotting/test_misc.py | 2 +- pandas/tests/reshape/merge/test_join.py | 4 +- pandas/tests/reshape/merge/test_merge.py | 10 +- pandas/tests/reshape/merge/test_merge_asof.py | 5 +- .../tests/reshape/merge/test_merge_ordered.py | 5 +- pandas/tests/reshape/test_concat.py | 14 +- pandas/tests/reshape/test_melt.py | 2 +- pandas/tests/reshape/test_pivot.py | 10 +- pandas/tests/reshape/test_tile.py | 4 +- .../tests/reshape/test_union_categoricals.py | 14 +- pandas/tests/reshape/test_util.py | 15 +- pandas/tests/scalar/interval/test_interval.py | 31 ++- pandas/tests/scalar/interval/test_ops.py | 3 +- pandas/tests/scalar/period/test_asfreq.py | 20 +- pandas/tests/scalar/period/test_period.py | 44 ++--- .../scalar/timedelta/test_construction.py | 24 +-- .../tests/scalar/timestamp/test_timestamp.py | 12 +- .../tests/scalar/timestamp/test_timezones.py | 12 +- .../tests/scalar/timestamp/test_unary_ops.py | 2 +- .../tests/series/indexing/test_alter_index.py | 4 +- pandas/tests/series/indexing/test_boolean.py | 8 +- pandas/tests/series/indexing/test_indexing.py | 4 +- pandas/tests/series/test_alter_axes.py | 16 +- pandas/tests/series/test_analytics.py | 35 ++-- pandas/tests/series/test_api.py | 29 ++- pandas/tests/series/test_arithmetic.py | 4 +- pandas/tests/series/test_combine_concat.py | 4 +- pandas/tests/series/test_constructors.py | 14 +- pandas/tests/series/test_datetime_values.py | 8 +- pandas/tests/series/test_dtypes.py | 4 +- pandas/tests/series/test_missing.py | 30 +-- pandas/tests/series/test_operators.py | 12 +- pandas/tests/series/test_quantile.py | 2 +- pandas/tests/series/test_rank.py | 4 +- pandas/tests/series/test_replace.py | 8 +- pandas/tests/series/test_timeseries.py | 10 +- pandas/tests/series/test_timezones.py | 10 +- pandas/tests/series/test_validate.py | 4 +- pandas/tests/sparse/frame/test_frame.py | 20 +- pandas/tests/sparse/series/test_series.py | 28 +-- pandas/tests/sparse/test_indexing.py | 2 +- pandas/tests/test_algos.py | 42 ++--- pandas/tests/test_base.py | 22 +-- pandas/tests/test_errors.py | 7 +- pandas/tests/test_expressions.py | 12 +- pandas/tests/test_multilevel.py | 40 ++-- pandas/tests/test_panel.py | 79 ++++---- pandas/tests/test_register_accessor.py | 2 +- pandas/tests/test_resample.py | 20 +- pandas/tests/test_sorting.py | 12 +- pandas/tests/test_strings.py | 177 ++++++++---------- pandas/tests/test_take.py | 8 +- pandas/tests/test_window.py | 48 ++--- pandas/tests/tools/test_numeric.py | 14 +- pandas/tests/tseries/offsets/test_fiscal.py | 6 +- pandas/tests/tseries/test_frequencies.py | 42 ++--- pandas/tests/tslibs/test_libfrequencies.py | 4 +- pandas/tests/tslibs/test_parsing.py | 4 +- pandas/tests/util/test_hashing.py | 6 +- pandas/tests/util/test_testing.py | 139 +++++++------- pandas/tests/util/test_util.py | 44 +++-- pandas/util/testing.py | 7 + 224 files changed, 1973 insertions(+), 2032 deletions(-) diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index c3ebd8f773aa6..73921a18ee5c7 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -687,7 +687,7 @@ def check(get_ser, test_ser): # with 'operate' (from core/ops.py) for the ops that are not # defined op = getattr(get_ser, op_str, None) - with tm.assert_raises_regex(TypeError, 'operate|cannot'): + with pytest.raises(TypeError, match='operate|cannot'): op(test_ser) # ## timedelta64 ### @@ -1042,9 +1042,9 @@ def test_dti_add_timestamp_raises(self, box_with_datetime): idx = DatetimeIndex(['2011-01-01', '2011-01-02']) idx = tm.box_expected(idx, box_with_datetime) msg = "cannot add" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): idx + Timestamp('2011-01-01') - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): Timestamp('2011-01-01') + idx # ------------------------------------------------------------- @@ -1268,7 +1268,7 @@ def test_dti_sub_tdi(self, tz_naive_fixture): tm.assert_index_equal(result, expected) msg = 'cannot subtract .*TimedeltaIndex' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): tdi - dti # sub with timedelta64 array @@ -1276,7 +1276,7 @@ def test_dti_sub_tdi(self, tz_naive_fixture): tm.assert_index_equal(result, expected) msg = 'cannot subtract DatetimeIndex from' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): tdi.values - dti def test_dti_isub_tdi(self, tz_naive_fixture): @@ -1292,7 +1292,7 @@ def test_dti_isub_tdi(self, tz_naive_fixture): tm.assert_index_equal(result, expected) msg = 'cannot subtract .*TimedeltaIndex' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): tdi -= dti # isub with timedelta64 array @@ -1303,7 +1303,7 @@ def test_dti_isub_tdi(self, tz_naive_fixture): msg = '|'.join(['cannot perform __neg__ with this index type:', 'ufunc subtract cannot use operands with types', 'cannot subtract DatetimeIndex from']) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): tdi.values -= dti # ------------------------------------------------------------- @@ -1323,9 +1323,9 @@ def test_add_datetimelike_and_dti(self, addend, tz): # GH#9631 dti = DatetimeIndex(['2011-01-01', '2011-01-02']).tz_localize(tz) msg = 'cannot add DatetimeIndex and {0}'.format(type(addend).__name__) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): dti + addend - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): addend + dti # ------------------------------------------------------------- diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index 3595cf7a2522f..687d07082ea33 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -118,27 +118,27 @@ def test_parr_cmp_pi_mismatched_freq_raises(self, freq, box_df_fail): base = tm.box_expected(base, box) msg = "Input has different freq=A-DEC from " - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): base <= Period('2011', freq='A') - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): Period('2011', freq='A') >= base # TODO: Could parametrize over boxes for idx? idx = PeriodIndex(['2011', '2012', '2013', '2014'], freq='A') - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): base <= idx # Different frequency msg = "Input has different freq=4M from " - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): base <= Period('2011', freq='4M') - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): Period('2011', freq='4M') >= base idx = PeriodIndex(['2011', '2012', '2013', '2014'], freq='4M') - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): base <= idx @pytest.mark.parametrize('freq', ['M', '2M', '3M']) @@ -190,10 +190,10 @@ def test_pi_cmp_nat_mismatched_freq_raises(self, freq): diff = PeriodIndex(['2011-02', '2011-01', '2011-04', 'NaT'], freq='4M') msg = "Input has different freq=4M from PeriodIndex" - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): idx1 > diff - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): idx1 == diff # TODO: De-duplicate with test_pi_cmp_nat @@ -708,13 +708,13 @@ def test_pi_add_sub_timedeltalike_freq_mismatch_daily(self, not_daily): other = not_daily rng = pd.period_range('2014-05-01', '2014-05-15', freq='D') msg = 'Input has different freq(=.+)? from Period.*?\\(freq=D\\)' - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): rng + other - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): rng += other - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): rng - other - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): rng -= other def test_pi_add_iadd_timedeltalike_hourly(self, two_hours): @@ -734,10 +734,10 @@ def test_pi_add_timedeltalike_mismatched_freq_hourly(self, not_hourly): rng = pd.period_range('2014-01-01 10:00', '2014-01-05 10:00', freq='H') msg = 'Input has different freq(=.+)? from Period.*?\\(freq=H\\)' - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): rng + other - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): rng += other def test_pi_sub_isub_timedeltalike_hourly(self, two_hours): @@ -768,13 +768,13 @@ def test_pi_add_sub_timedeltalike_freq_mismatch_annual(self, rng = pd.period_range('2014', '2024', freq='A') msg = ('Input has different freq(=.+)? ' 'from Period.*?\\(freq=A-DEC\\)') - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): rng + other - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): rng += other - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): rng - other - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): rng -= other def test_pi_add_iadd_timedeltalike_M(self): @@ -792,13 +792,13 @@ def test_pi_add_sub_timedeltalike_freq_mismatch_monthly(self, other = mismatched_freq rng = pd.period_range('2014-01', '2016-12', freq='M') msg = 'Input has different freq(=.+)? from Period.*?\\(freq=M\\)' - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): rng + other - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): rng += other - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): rng - other - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): rng -= other def test_parr_add_sub_td64_nat(self, box): @@ -907,14 +907,14 @@ def test_pi_ops_errors(self, ng, box_with_period): obj = tm.box_expected(idx, box_with_period) msg = r"unsupported operand type\(s\)" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): obj + ng with pytest.raises(TypeError): # error message differs between PY2 and 3 ng + obj - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): obj - ng with pytest.raises(TypeError): @@ -1009,13 +1009,13 @@ def test_pi_offset_errors(self): # from Period msg = r"Input has different freq from Period.*?\(freq=D\)" for obj in [idx, ser]: - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): obj + pd.offsets.Hour(2) - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): pd.offsets.Hour(2) + obj - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): obj - pd.offsets.Hour(2) def test_pi_sub_period(self): diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 902d0716aed8d..f92a772f3eaad 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -161,22 +161,22 @@ def test_tdi_add_timestamp_nat_masking(self): def test_tdi_add_overflow(self): # See GH#14068 msg = "too (big|large) to convert" - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): pd.to_timedelta(106580, 'D') + Timestamp('2000') - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): Timestamp('2000') + pd.to_timedelta(106580, 'D') _NaT = int(pd.NaT) + 1 msg = "Overflow in int64 addition" - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): pd.to_timedelta([106580], 'D') + Timestamp('2000') - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): Timestamp('2000') + pd.to_timedelta([106580], 'D') - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): pd.to_timedelta([_NaT]) - Timedelta('1 days') - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): pd.to_timedelta(['5 days', _NaT]) - Timedelta('1 days') - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): (pd.to_timedelta([_NaT, '5 days', '1 hours']) - pd.to_timedelta(['7 seconds', _NaT, '4 hours'])) @@ -415,7 +415,7 @@ def test_td64arr_sub_timestamp_raises(self, box): msg = ("cannot subtract a datelike from|" "Could not operate|" "cannot perform operation") - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): idx - Timestamp('2011-01-01') def test_td64arr_add_timestamp(self, box, tz_naive_fixture): @@ -1217,9 +1217,9 @@ def test_td64arr_mul_tdscalar_invalid(self, box, scalar_td): # with 'operate' (from core/ops.py) for the ops that are not # defined pattern = 'operate|unsupported|cannot|not supported' - with tm.assert_raises_regex(TypeError, pattern): + with pytest.raises(TypeError, match=pattern): td1 * scalar_td - with tm.assert_raises_regex(TypeError, pattern): + with pytest.raises(TypeError, match=pattern): scalar_td * td1 def test_td64arr_mul_too_short_raises(self, box): @@ -1399,8 +1399,8 @@ def test_td64arr_pow_invalid(self, scalar_td, box): # with 'operate' (from core/ops.py) for the ops that are not # defined pattern = 'operate|unsupported|cannot|not supported' - with tm.assert_raises_regex(TypeError, pattern): + with pytest.raises(TypeError, match=pattern): scalar_td ** td1 - with tm.assert_raises_regex(TypeError, pattern): + with pytest.raises(TypeError, match=pattern): td1 ** scalar_td diff --git a/pandas/tests/arrays/categorical/test_algos.py b/pandas/tests/arrays/categorical/test_algos.py index e7dc67c5d6a5b..50f643756c5dc 100644 --- a/pandas/tests/arrays/categorical/test_algos.py +++ b/pandas/tests/arrays/categorical/test_algos.py @@ -138,5 +138,5 @@ def test_take_fill_value_new_raises(self): # https://github.com/pandas-dev/pandas/issues/23296 cat = pd.Categorical(['a', 'b', 'c']) xpr = r"'fill_value' \('d'\) is not in this Categorical's categories." - with tm.assert_raises_regex(TypeError, xpr): + with pytest.raises(TypeError, match=xpr): cat.take([0, 1, -1], fill_value='d', allow_fill=True) diff --git a/pandas/tests/arrays/categorical/test_analytics.py b/pandas/tests/arrays/categorical/test_analytics.py index 0f292a457bbc2..ea6facd66a1a3 100644 --- a/pandas/tests/arrays/categorical/test_analytics.py +++ b/pandas/tests/arrays/categorical/test_analytics.py @@ -305,7 +305,8 @@ def test_numpy_repeat(self): tm.assert_categorical_equal(np.repeat(cat, 2), exp) msg = "the 'axis' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.repeat, cat, 2, axis=1) + with pytest.raises(ValueError, match=msg): + np.repeat(cat, 2, axis=1) def test_isna(self): exp = np.array([False, False, True]) diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index 54de398473d52..ec90995e6084b 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -50,9 +50,9 @@ def test_set_ordered(self): # removed in 0.19.0 msg = "can\'t set attribute" - with tm.assert_raises_regex(AttributeError, msg): + with pytest.raises(AttributeError, match=msg): cat.ordered = True - with tm.assert_raises_regex(AttributeError, msg): + with pytest.raises(AttributeError, match=msg): cat.ordered = False def test_rename_categories(self): diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index 8bd245d2aabae..a473f44d5d4aa 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -26,10 +26,10 @@ def test_validate_ordered(self): # This should be a boolean. ordered = np.array([0, 1, 2]) - with tm.assert_raises_regex(exp_err, exp_msg): + with pytest.raises(exp_err, match=exp_msg): Categorical([1, 2, 3], ordered=ordered) - with tm.assert_raises_regex(exp_err, exp_msg): + with pytest.raises(exp_err, match=exp_msg): Categorical.from_codes([0, 0, 1], categories=['a', 'b', 'c'], ordered=ordered) @@ -351,13 +351,13 @@ def test_constructor_with_dtype(self, ordered): def test_constructor_dtype_and_others_raises(self): dtype = CategoricalDtype(['a', 'b'], ordered=True) - with tm.assert_raises_regex(ValueError, "Cannot"): + with pytest.raises(ValueError, match="Cannot"): Categorical(['a', 'b'], categories=['a', 'b'], dtype=dtype) - with tm.assert_raises_regex(ValueError, "Cannot"): + with pytest.raises(ValueError, match="Cannot"): Categorical(['a', 'b'], ordered=True, dtype=dtype) - with tm.assert_raises_regex(ValueError, "Cannot"): + with pytest.raises(ValueError, match="Cannot"): Categorical(['a', 'b'], ordered=False, dtype=dtype) @pytest.mark.parametrize('categories', [ @@ -372,7 +372,7 @@ def test_constructor_str_category(self, categories, ordered): tm.assert_categorical_equal(result, expected) def test_constructor_str_unknown(self): - with tm.assert_raises_regex(ValueError, "Unknown `dtype`"): + with pytest.raises(ValueError, match="Unknown `dtype`"): Categorical([1, 2], dtype="foo") def test_constructor_from_categorical_with_dtype(self): diff --git a/pandas/tests/arrays/categorical/test_dtypes.py b/pandas/tests/arrays/categorical/test_dtypes.py index 491a7867fee71..66f08355e7516 100644 --- a/pandas/tests/arrays/categorical/test_dtypes.py +++ b/pandas/tests/arrays/categorical/test_dtypes.py @@ -120,7 +120,7 @@ def test_astype(self, ordered): tm.assert_numpy_array_equal(result, expected) msg = 'could not convert string to float' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): cat.astype(float) # numeric diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py index a54ee7381f9eb..8df5728f7d895 100644 --- a/pandas/tests/arrays/categorical/test_indexing.py +++ b/pandas/tests/arrays/categorical/test_indexing.py @@ -143,5 +143,5 @@ def test_mask_with_boolean_raises(index): if index: idx = CategoricalIndex(idx) - with tm.assert_raises_regex(ValueError, 'NA / NaN'): + with pytest.raises(ValueError, match='NA / NaN'): s[idx] diff --git a/pandas/tests/arrays/categorical/test_missing.py b/pandas/tests/arrays/categorical/test_missing.py index 32698d190d93c..b4b361dabac61 100644 --- a/pandas/tests/arrays/categorical/test_missing.py +++ b/pandas/tests/arrays/categorical/test_missing.py @@ -70,7 +70,7 @@ def test_fillna_raises(self, fillna_kwargs, msg): # https://github.com/pandas-dev/pandas/issues/19682 cat = Categorical([1, 2, 3]) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): cat.fillna(**fillna_kwargs) @pytest.mark.parametrize("named", [True, False]) diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py index ce15ebfb281f2..f216865faa2ad 100644 --- a/pandas/tests/arrays/categorical/test_operators.py +++ b/pandas/tests/arrays/categorical/test_operators.py @@ -238,15 +238,17 @@ def test_unordered_different_order_equal(self, ctor): def test_unordered_different_categories_raises(self): c1 = Categorical(['a', 'b'], categories=['a', 'b'], ordered=False) c2 = Categorical(['a', 'c'], categories=['c', 'a'], ordered=False) - with tm.assert_raises_regex(TypeError, - "Categoricals can only be compared"): + + with pytest.raises(TypeError, match=("Categoricals can " + "only be compared")): c1 == c2 def test_compare_different_lengths(self): c1 = Categorical([], categories=['a', 'b']) c2 = Categorical([], categories=['a']) + msg = "Categories are different lengths" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): c1 == c2 def test_compare_unordered_different_order(self): diff --git a/pandas/tests/arrays/categorical/test_sorting.py b/pandas/tests/arrays/categorical/test_sorting.py index 922d9fdb788b1..3d55862cd2cc0 100644 --- a/pandas/tests/arrays/categorical/test_sorting.py +++ b/pandas/tests/arrays/categorical/test_sorting.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import numpy as np +import pytest from pandas import Categorical, Index import pandas.util.testing as tm @@ -30,12 +31,12 @@ def test_numpy_argsort(self): check_dtype=False) msg = "the 'axis' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.argsort, - c, axis=0) + with pytest.raises(ValueError, match=msg): + np.argsort(c, axis=0) msg = "the 'order' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.argsort, - c, order='C') + with pytest.raises(ValueError, match=msg): + np.argsort(c, order='C') def test_sort_values(self): diff --git a/pandas/tests/arrays/interval/test_interval.py b/pandas/tests/arrays/interval/test_interval.py index 9a191dda3a73a..a04579dbbb6b1 100644 --- a/pandas/tests/arrays/interval/test_interval.py +++ b/pandas/tests/arrays/interval/test_interval.py @@ -38,7 +38,7 @@ def test_repeat(self, left_right_dtypes, repeats): ('foo', r'invalid literal for (int|long)\(\) with base 10')]) def test_repeat_errors(self, bad_repeats, msg): array = IntervalArray.from_breaks(range(4)) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): array.repeat(bad_repeats) @pytest.mark.parametrize('new_closed', [ diff --git a/pandas/tests/arrays/interval/test_ops.py b/pandas/tests/arrays/interval/test_ops.py index 45bf465577ace..bdbd145ed2a80 100644 --- a/pandas/tests/arrays/interval/test_ops.py +++ b/pandas/tests/arrays/interval/test_ops.py @@ -78,5 +78,5 @@ def test_overlaps_invalid_type(self, constructor, other): interval_container = constructor.from_breaks(range(5)) msg = '`other` must be Interval-like, got {other}'.format( other=type(other).__name__) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval_container.overlaps(other) diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index 852c4fb910560..04d7f4d498c2b 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -97,7 +97,7 @@ def test_constructor_object_dtype(self): @pytest.mark.parametrize("dtype", [SparseDtype(int, 0), int]) def test_constructor_na_dtype(self, dtype): - with tm.assert_raises_regex(ValueError, "Cannot convert"): + with pytest.raises(ValueError, match="Cannot convert"): SparseArray([0, 1, np.nan], dtype=dtype) def test_constructor_spindex_dtype(self): @@ -224,13 +224,18 @@ def test_get_item(self): assert self.zarr[7] == 5 errmsg = re.compile("bounds") - tm.assert_raises_regex(IndexError, errmsg, lambda: self.arr[11]) - tm.assert_raises_regex(IndexError, errmsg, lambda: self.arr[-11]) + + with pytest.raises(IndexError, match=errmsg): + self.arr[11] + + with pytest.raises(IndexError, match=errmsg): + self.arr[-11] + assert self.arr[-1] == self.arr[len(self.arr) - 1] def test_take_scalar_raises(self): msg = "'indices' must be an array, not a scalar '2'." - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.arr.take(2) def test_take(self): @@ -258,8 +263,8 @@ def test_take_negative(self): tm.assert_sp_array_equal(self.arr.take([-4, -3, -2]), exp) def test_bad_take(self): - tm.assert_raises_regex( - IndexError, "bounds", lambda: self.arr.take([11])) + with pytest.raises(IndexError, match="bounds"): + self.arr.take([11]) def test_take_filling(self): # similar tests as GH 12631 @@ -279,10 +284,11 @@ def test_take_filling(self): expected = SparseArray([np.nan, np.nan, 4]) tm.assert_sp_array_equal(result, expected) - msg = ("Invalid value in 'indices'") - with tm.assert_raises_regex(ValueError, msg): + msg = "Invalid value in 'indices'" + with pytest.raises(ValueError, match=msg): sparse.take(np.array([1, 0, -2]), allow_fill=True) - with tm.assert_raises_regex(ValueError, msg): + + with pytest.raises(ValueError, match=msg): sparse.take(np.array([1, 0, -5]), allow_fill=True) with pytest.raises(IndexError): @@ -314,9 +320,9 @@ def test_take_filling_fill_value(self): tm.assert_sp_array_equal(result, expected) msg = ("Invalid value in 'indices'.") - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): sparse.take(np.array([1, 0, -2]), allow_fill=True) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): sparse.take(np.array([1, 0, -5]), allow_fill=True) with pytest.raises(IndexError): @@ -351,12 +357,15 @@ def setitem(): def setslice(): self.arr[1:5] = 2 - tm.assert_raises_regex(TypeError, "item assignment", setitem) - tm.assert_raises_regex(TypeError, "item assignment", setslice) + with pytest.raises(TypeError, match="item assignment"): + setitem() + + with pytest.raises(TypeError, match="item assignment"): + setslice() def test_constructor_from_too_large_array(self): - tm.assert_raises_regex(TypeError, "expected dimension <= 1 data", - SparseArray, np.arange(10).reshape((2, 5))) + with pytest.raises(TypeError, match="expected dimension <= 1 data"): + SparseArray(np.arange(10).reshape((2, 5))) def test_constructor_from_sparse(self): res = SparseArray(self.zarr) @@ -441,7 +450,7 @@ def test_astype(self): tm.assert_sp_array_equal(result, expected) arr = SparseArray([0, np.nan, 0, 1], fill_value=0) - with tm.assert_raises_regex(ValueError, 'NA'): + with pytest.raises(ValueError, match='NA'): arr.astype('Sparse[i8]') def test_astype_bool(self): @@ -481,12 +490,12 @@ def test_set_fill_value(self): # sparsearray with NaN fill value, why not update one? # coerces to int # msg = "unable to set fill_value 3\\.1 to int64 dtype" - # with tm.assert_raises_regex(ValueError, msg): + # with pytest.raises(ValueError, match=msg): arr.fill_value = 3.1 assert arr.fill_value == 3.1 # msg = "unable to set fill_value nan to int64 dtype" - # with tm.assert_raises_regex(ValueError, msg): + # with pytest.raises(ValueError, match=msg): arr.fill_value = np.nan assert np.isnan(arr.fill_value) @@ -496,12 +505,12 @@ def test_set_fill_value(self): # coerces to bool # msg = "unable to set fill_value 0 to bool dtype" - # with tm.assert_raises_regex(ValueError, msg): + # with pytest.raises(ValueError, match=msg): arr.fill_value = 0 assert arr.fill_value == 0 # msg = "unable to set fill_value nan to bool dtype" - # with tm.assert_raises_regex(ValueError, msg): + # with pytest.raises(ValueError, match=msg): arr.fill_value = np.nan assert np.isnan(arr.fill_value) @@ -510,7 +519,7 @@ def test_set_fill_invalid_non_scalar(self, val): arr = SparseArray([True, False, True], fill_value=False, dtype=np.bool) msg = "fill_value must be a scalar" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): arr.fill_value = val def test_copy_shallow(self): @@ -793,8 +802,8 @@ def test_numpy_all(self, data, pos, neg): # raises with a different message on py2. msg = "the \'out\' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.all, - SparseArray(data), out=np.array([])) + with pytest.raises(ValueError, match=msg): + np.all(SparseArray(data), out=np.array([])) @pytest.mark.parametrize('data,pos,neg', [ ([False, True, False], True, False), @@ -838,8 +847,8 @@ def test_numpy_any(self, data, pos, neg): assert not out msg = "the \'out\' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.any, - SparseArray(data), out=out) + with pytest.raises(ValueError, match=msg): + np.any(SparseArray(data), out=out) def test_sum(self): data = np.arange(10).astype(float) @@ -866,12 +875,12 @@ def test_numpy_sum(self): assert out == 40.0 msg = "the 'dtype' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.sum, - SparseArray(data), dtype=np.int64) + with pytest.raises(ValueError, match=msg): + np.sum(SparseArray(data), dtype=np.int64) msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.sum, - SparseArray(data), out=out) + with pytest.raises(ValueError, match=msg): + np.sum(SparseArray(data), out=out) @pytest.mark.parametrize("data,expected", [ (np.array([1, 2, 3, 4, 5], dtype=float), # non-null data @@ -894,16 +903,16 @@ def test_cumsum(self, data, expected, numpy): if numpy: # numpy compatibility checks. msg = "the 'dtype' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.cumsum, - SparseArray(data), dtype=np.int64) + with pytest.raises(ValueError, match=msg): + np.cumsum(SparseArray(data), dtype=np.int64) msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.cumsum, - SparseArray(data), out=out) + with pytest.raises(ValueError, match=msg): + np.cumsum(SparseArray(data), out=out) else: axis = 1 # SparseArray currently 1-D, so only axis = 0 is valid. msg = "axis\\(={axis}\\) out of bounds".format(axis=axis) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): SparseArray(data).cumsum(axis=axis) def test_mean(self): @@ -925,12 +934,12 @@ def test_numpy_mean(self): assert out == 40.0 / 9 msg = "the 'dtype' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.mean, - SparseArray(data), dtype=np.int64) + with pytest.raises(ValueError, match=msg): + np.mean(SparseArray(data), dtype=np.int64) msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.mean, - SparseArray(data), out=out) + with pytest.raises(ValueError, match=msg): + np.mean(SparseArray(data), out=out) def test_ufunc(self): # GH 13853 make sure ufunc is applied to fill_value @@ -1042,7 +1051,7 @@ def test_to_coo(self): def test_non_sparse_raises(self): ser = pd.Series([1, 2, 3]) - with tm.assert_raises_regex(AttributeError, '.sparse'): + with pytest.raises(AttributeError, match='.sparse'): ser.sparse.density diff --git a/pandas/tests/arrays/sparse/test_dtype.py b/pandas/tests/arrays/sparse/test_dtype.py index 75fc325b07a08..7c310693cf26c 100644 --- a/pandas/tests/arrays/sparse/test_dtype.py +++ b/pandas/tests/arrays/sparse/test_dtype.py @@ -3,7 +3,6 @@ import pandas as pd from pandas.core.sparse.api import SparseDtype -import pandas.util.testing as tm @pytest.mark.parametrize("dtype, fill_value", [ @@ -138,5 +137,5 @@ def test_parse_subtype(string, expected): "Sparse[bool, True]", ]) def test_construct_from_string_fill_value_raises(string): - with tm.assert_raises_regex(TypeError, 'fill_value in the string is not'): + with pytest.raises(TypeError, match='fill_value in the string is not'): SparseDtype.construct_from_string(string) diff --git a/pandas/tests/arrays/sparse/test_libsparse.py b/pandas/tests/arrays/sparse/test_libsparse.py index cbad7e8e9136d..6e9d790bf85f3 100644 --- a/pandas/tests/arrays/sparse/test_libsparse.py +++ b/pandas/tests/arrays/sparse/test_libsparse.py @@ -478,37 +478,37 @@ def test_check_integrity(self): # Too many indices than specified in self.length msg = "Too many indices" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): IntIndex(length=1, indices=[1, 2, 3]) # No index can be negative. msg = "No index can be less than zero" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): IntIndex(length=5, indices=[1, -2, 3]) # No index can be negative. msg = "No index can be less than zero" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): IntIndex(length=5, indices=[1, -2, 3]) # All indices must be less than the length. msg = "All indices must be less than the length" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): IntIndex(length=5, indices=[1, 2, 5]) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): IntIndex(length=5, indices=[1, 2, 6]) # Indices must be strictly ascending. msg = "Indices must be strictly increasing" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): IntIndex(length=5, indices=[1, 3, 2]) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): IntIndex(length=5, indices=[1, 3, 3]) def test_int_internal(self): diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 0fe07caed5b85..10f54458e4980 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -453,17 +453,17 @@ def test_construct_cast_invalid(self, dtype): msg = "cannot safely" arr = [1.2, 2.3, 3.7] - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): integer_array(arr, dtype=dtype) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): pd.Series(arr).astype(dtype) arr = [1.2, 2.3, 3.7, np.nan] - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): integer_array(arr, dtype=dtype) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): pd.Series(arr).astype(dtype) @@ -683,11 +683,11 @@ def test_reduce_to_float(op): def test_astype_nansafe(): - # https://github.com/pandas-dev/pandas/pull/22343 + # see gh-22343 arr = integer_array([np.nan, 1, 2], dtype="Int8") + msg = "cannot convert float NaN to integer" - with tm.assert_raises_regex( - ValueError, 'cannot convert float NaN to integer'): + with pytest.raises(ValueError, match=msg): arr.astype('uint32') diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index 0125729048cdd..95a1d1781456c 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -53,22 +53,22 @@ def test_from_datetime64_freq_changes(): "Input has different freq"), ]) def test_period_array_raises(data, freq, msg): - with tm.assert_raises_regex(IncompatibleFrequency, msg): + with pytest.raises(IncompatibleFrequency, match=msg): period_array(data, freq) def test_period_array_non_period_series_raies(): ser = pd.Series([1, 2, 3]) - with tm.assert_raises_regex(TypeError, 'dtype'): + with pytest.raises(TypeError, match='dtype'): PeriodArray(ser, freq='D') def test_period_array_freq_mismatch(): arr = period_array(['2000', '2001'], freq='D') - with tm.assert_raises_regex(IncompatibleFrequency, 'freq'): + with pytest.raises(IncompatibleFrequency, match='freq'): PeriodArray(arr, freq='M') - with tm.assert_raises_regex(IncompatibleFrequency, 'freq'): + with pytest.raises(IncompatibleFrequency, match='freq'): PeriodArray(arr, freq=pd.tseries.offsets.MonthEnd()) @@ -80,11 +80,11 @@ def test_asi8(): def test_take_raises(): arr = period_array(['2000', '2001'], freq='D') - with tm.assert_raises_regex(IncompatibleFrequency, 'freq'): + with pytest.raises(IncompatibleFrequency, match='freq'): arr.take([0, -1], allow_fill=True, fill_value=pd.Period('2000', freq='W')) - with tm.assert_raises_regex(ValueError, 'foo'): + with pytest.raises(ValueError, match='foo'): arr.take([0, -1], allow_fill=True, fill_value='foo') @@ -129,13 +129,13 @@ def test_astype_period(): def test_astype_datetime(other): arr = period_array(['2000', '2001', None], freq='D') # slice off the [ns] so that the regex matches. - with tm.assert_raises_regex(TypeError, other[:-4]): + with pytest.raises(TypeError, match=other[:-4]): arr.astype(other) def test_fillna_raises(): arr = period_array(['2000', '2001', '2002'], freq='D') - with tm.assert_raises_regex(ValueError, 'Length'): + with pytest.raises(ValueError, match='Length'): arr.fillna(arr[:2]) @@ -167,23 +167,23 @@ def test_setitem(key, value, expected): def test_setitem_raises_incompatible_freq(): arr = PeriodArray(np.arange(3), freq="D") - with tm.assert_raises_regex(IncompatibleFrequency, "freq"): + with pytest.raises(IncompatibleFrequency, match="freq"): arr[0] = pd.Period("2000", freq="A") other = period_array(['2000', '2001'], freq='A') - with tm.assert_raises_regex(IncompatibleFrequency, "freq"): + with pytest.raises(IncompatibleFrequency, match="freq"): arr[[0, 1]] = other def test_setitem_raises_length(): arr = PeriodArray(np.arange(3), freq="D") - with tm.assert_raises_regex(ValueError, "length"): + with pytest.raises(ValueError, match="length"): arr[[0, 1]] = [pd.Period("2000", freq="D")] def test_setitem_raises_type(): arr = PeriodArray(np.arange(3), freq="D") - with tm.assert_raises_regex(TypeError, "int"): + with pytest.raises(TypeError, match="int"): arr[0] = 1 @@ -193,5 +193,5 @@ def test_setitem_raises_type(): def tet_sub_period(): arr = period_array(['2000', '2001'], freq='D') other = pd.Period("2000", freq="M") - with tm.assert_raises_regex(IncompatibleFrequency, "freq"): + with pytest.raises(IncompatibleFrequency, match="freq"): arr - other diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index eef8646e4d6d2..52945edb14e58 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -694,12 +694,12 @@ def test_disallow_python_keywords(self): # GH 18221 df = pd.DataFrame([[0, 0, 0]], columns=['foo', 'bar', 'class']) msg = "Python keyword not valid identifier in numexpr query" - with tm.assert_raises_regex(SyntaxError, msg): + with pytest.raises(SyntaxError, match=msg): df.query('class == 0') df = pd.DataFrame() df.index.name = 'lambda' - with tm.assert_raises_regex(SyntaxError, msg): + with pytest.raises(SyntaxError, match=msg): df.query('lambda == 0') @@ -1392,11 +1392,11 @@ def test_cannot_item_assign(self, invalid_target): msg = "Cannot assign expression output to target" expression = "a = 1 + 2" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.eval(expression, target=invalid_target, inplace=True) if hasattr(invalid_target, "copy"): - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.eval(expression, target=invalid_target, inplace=False) @pytest.mark.parametrize("invalid_target", [1, "cat", (1, 3)]) @@ -1404,7 +1404,7 @@ def test_cannot_copy_item(self, invalid_target): msg = "Cannot return a copy of the target" expression = "a = 1 + 2" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.eval(expression, target=invalid_target, inplace=False) @pytest.mark.parametrize("target", [1, "cat", [1, 2], @@ -1415,7 +1415,7 @@ def test_inplace_no_assignment(self, target): assert self.eval(expression, target=target, inplace=False) == 3 msg = "Cannot operate inplace if there is no assignment" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.eval(expression, target=target, inplace=True) def test_basic_period_index_boolean_expression(self): @@ -1692,17 +1692,18 @@ def test_result_types2(self): def test_undefined_func(self): df = DataFrame({'a': np.random.randn(10)}) - with tm.assert_raises_regex( - ValueError, "\"mysin\" is not a supported function"): + msg = "\"mysin\" is not a supported function" + + with pytest.raises(ValueError, match=msg): df.eval("mysin(a)", engine=self.engine, parser=self.parser) def test_keyword_arg(self): df = DataFrame({'a': np.random.randn(10)}) - with tm.assert_raises_regex(TypeError, - "Function \"sin\" does not support " - "keyword arguments"): + msg = "Function \"sin\" does not support keyword arguments" + + with pytest.raises(TypeError, match=msg): df.eval("sin(x=a)", engine=self.engine, parser=self.parser) @@ -1763,16 +1764,16 @@ def test_no_new_globals(self, engine, parser): @td.skip_if_no_ne def test_invalid_engine(): - tm.assert_raises_regex(KeyError, 'Invalid engine \'asdf\' passed', - pd.eval, 'x + y', local_dict={'x': 1, 'y': 2}, - engine='asdf') + msg = 'Invalid engine \'asdf\' passed' + with pytest.raises(KeyError, match=msg): + pd.eval('x + y', local_dict={'x': 1, 'y': 2}, engine='asdf') @td.skip_if_no_ne def test_invalid_parser(): - tm.assert_raises_regex(KeyError, 'Invalid parser \'asdf\' passed', - pd.eval, 'x + y', local_dict={'x': 1, 'y': 2}, - parser='asdf') + msg = 'Invalid parser \'asdf\' passed' + with pytest.raises(KeyError, match=msg): + pd.eval('x + y', local_dict={'x': 1, 'y': 2}, parser='asdf') _parsers = {'python': PythonExprVisitor, 'pytables': pytables.ExprVisitor, @@ -1809,20 +1810,18 @@ def test_invalid_local_variable_reference(engine, parser): for _expr in exprs: if parser != 'pandas': - with tm.assert_raises_regex(SyntaxError, - "The '@' prefix is only"): + with pytest.raises(SyntaxError, match="The '@' prefix is only"): pd.eval(_expr, engine=engine, parser=parser) else: - with tm.assert_raises_regex(SyntaxError, - "The '@' prefix is not"): + with pytest.raises(SyntaxError, match="The '@' prefix is not"): pd.eval(_expr, engine=engine, parser=parser) def test_numexpr_builtin_raises(engine, parser): sin, dotted_line = 1, 2 if engine == 'numexpr': - with tm.assert_raises_regex(NumExprClobberingError, - 'Variables in expression .+'): + msg = 'Variables in expression .+' + with pytest.raises(NumExprClobberingError, match=msg): pd.eval('sin + dotted_line', engine=engine, parser=parser) else: res = pd.eval('sin + dotted_line', engine=engine, parser=parser) @@ -1831,21 +1830,20 @@ def test_numexpr_builtin_raises(engine, parser): def test_bad_resolver_raises(engine, parser): cannot_resolve = 42, 3.0 - with tm.assert_raises_regex(TypeError, 'Resolver of type .+'): + with pytest.raises(TypeError, match='Resolver of type .+'): pd.eval('1 + 2', resolvers=cannot_resolve, engine=engine, parser=parser) def test_empty_string_raises(engine, parser): # GH 13139 - with tm.assert_raises_regex(ValueError, - 'expr cannot be an empty string'): + with pytest.raises(ValueError, match="expr cannot be an empty string"): pd.eval('', engine=engine, parser=parser) def test_more_than_one_expression_raises(engine, parser): - with tm.assert_raises_regex(SyntaxError, - 'only a single expression is allowed'): + with pytest.raises(SyntaxError, match=("only a single expression " + "is allowed")): pd.eval('1 + 1; 2 + 2', engine=engine, parser=parser) diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index b5353e34a2311..4dd55321dc71f 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -9,7 +9,6 @@ from pandas.core.sparse.api import SparseDtype import pandas.core.dtypes.common as com -import pandas.util.testing as tm import pandas.util._test_decorators as td @@ -19,7 +18,7 @@ class TestPandasDtype(object): # Per issue GH15520 @pytest.mark.parametrize('box', [pd.Timestamp, 'pd.Timestamp', list]) def test_invalid_dtype_error(self, box): - with tm.assert_raises_regex(TypeError, 'not understood'): + with pytest.raises(TypeError, match='not understood'): com.pandas_dtype(box) @pytest.mark.parametrize('dtype', [ diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 2927442f9b6ee..c70a549234a44 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -95,8 +95,8 @@ def test_construction_from_string(self): TypeError, lambda: CategoricalDtype.construct_from_string('foo')) def test_constructor_invalid(self): - with tm.assert_raises_regex(TypeError, - "CategoricalIndex.* must be called"): + msg = "CategoricalIndex.* must be called" + with pytest.raises(TypeError, match=msg): CategoricalDtype("category") def test_is_dtype(self): @@ -455,12 +455,12 @@ def test_construction_not_supported(self, subtype): # GH 19016 msg = ('category, object, and string subtypes are not supported ' 'for IntervalDtype') - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): IntervalDtype(subtype) def test_construction_errors(self): msg = 'could not construct IntervalDtype' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): IntervalDtype('xx') def test_construction_from_string(self): @@ -475,7 +475,7 @@ def test_construction_from_string_errors(self, string): # these are invalid entirely msg = 'a string needs to be passed, got type' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): IntervalDtype.construct_from_string(string) @pytest.mark.parametrize('string', [ @@ -484,7 +484,7 @@ def test_construction_from_string_error_subtype(self, string): # this is an invalid subtype msg = 'could not construct IntervalDtype' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): IntervalDtype.construct_from_string(string) def test_subclass(self): @@ -698,10 +698,10 @@ def test_categorical_equality_strings(self, categories, ordered, other): assert result is expected def test_invalid_raises(self): - with tm.assert_raises_regex(TypeError, 'ordered'): + with pytest.raises(TypeError, match='ordered'): CategoricalDtype(['a', 'b'], ordered='foo') - with tm.assert_raises_regex(TypeError, 'collection'): + with pytest.raises(TypeError, match='collection'): CategoricalDtype('category') def test_mixed(self): @@ -782,7 +782,7 @@ def test_update_dtype_string(self, ordered): def test_update_dtype_errors(self, bad_dtype): dtype = CategoricalDtype(list('abc'), False) msg = 'a CategoricalDtype must be passed to perform an update, ' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): dtype.update_dtype(bad_dtype) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index e37efce901cbd..1ff3005722341 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -371,7 +371,7 @@ def test_maybe_convert_numeric_infinities(self): tm.assert_numpy_array_equal(out, pos) # too many characters - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): lib.maybe_convert_numeric( np.array(['foo_' + infinity], dtype=object), na_values, maybe_int) diff --git a/pandas/tests/extension/base/constructors.py b/pandas/tests/extension/base/constructors.py index 076be53a4a72f..3b966cd8d4774 100644 --- a/pandas/tests/extension/base/constructors.py +++ b/pandas/tests/extension/base/constructors.py @@ -2,7 +2,6 @@ import pandas as pd from pandas.core.internals import ExtensionBlock -import pandas.util.testing as tm from .base import BaseExtensionTests @@ -43,7 +42,7 @@ def test_dataframe_from_series(self, data): def test_series_given_mismatched_index_raises(self, data): msg = 'Length of passed values is 3, index implies 5' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): pd.Series(data[:3], index=[0, 1, 2, 3, 4]) def test_from_dtype(self, data): diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index 00bb3b5d4eec2..dfc82c6041eae 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -2,7 +2,6 @@ import pytest import pandas as pd -import pandas.util.testing as tm from .base import BaseExtensionTests @@ -168,7 +167,7 @@ def test_take(self, data, na_value, na_cmp): assert result[0] == data[0] assert na_cmp(result[1], na_value) - with tm.assert_raises_regex(IndexError, "out of bounds"): + with pytest.raises(IndexError, match="out of bounds"): data.take([len(data) + 1]) def test_take_empty(self, data, na_value, na_cmp): @@ -180,7 +179,7 @@ def test_take_empty(self, data, na_value, na_cmp): with pytest.raises(IndexError): empty.take([-1]) - with tm.assert_raises_regex(IndexError, "cannot do a non-empty take"): + with pytest.raises(IndexError, match="cannot do a non-empty take"): empty.take([0, 1]) def test_take_negative(self, data): diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 60de5d4db03d9..e9a89c1af2f22 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -125,8 +125,8 @@ def test_fillna_copy_series(self, data_missing): assert ser._values is arr def test_fillna_length_mismatch(self, data_missing): - with (tm.assert_raises_regex(ValueError, - "Length of 'value' does not match.")): + msg = "Length of 'value' does not match." + with pytest.raises(ValueError, match=msg): data_missing.fillna(data_missing.take([1])) def test_combine_le(self, data_repeated): diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index 72316b5b7eb91..3d798b2af5c43 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -4,7 +4,6 @@ import pytest import pandas as pd -import pandas.util.testing as tm from .base import BaseExtensionTests @@ -34,12 +33,12 @@ def test_setitem_sequence_mismatched_length_raises(self, data, as_array): value = data._from_sequence(value) xpr = 'cannot set using a {} indexer with a different length' - with tm.assert_raises_regex(ValueError, xpr.format('list-like')): + with pytest.raises(ValueError, match=xpr.format('list-like')): ser[[0, 1]] = value # Ensure no modifications made before the exception self.assert_series_equal(ser, original) - with tm.assert_raises_regex(ValueError, xpr.format('slice')): + with pytest.raises(ValueError, match=xpr.format('slice')): ser[slice(3)] = value self.assert_series_equal(ser, original) @@ -164,7 +163,7 @@ def test_setitem_expand_with_extension(self, data): def test_setitem_frame_invalid_length(self, data): df = pd.DataFrame({"A": [1] * len(data)}) xpr = "Length of values does not match length of index" - with tm.assert_raises_regex(ValueError, xpr): + with pytest.raises(ValueError, match=xpr): df['B'] = data[:5] @pytest.mark.xfail(reason="GH#20441: setitem on extension types.", diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index af5f6bf0a2f65..01efd7ec7e590 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -207,7 +207,7 @@ class TestSetitem(BaseDecimal, base.BaseSetitemTests): def test_series_constructor_coerce_data_to_extension_dtype_raises(): xpr = ("Cannot cast data to extension dtype 'decimal'. Pass the " "extension array directly.") - with tm.assert_raises_regex(ValueError, xpr): + with pytest.raises(ValueError, match=xpr): pd.Series([0, 1, 2], dtype=DecimalDtype()) diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index b7c61496f0bf0..a9fb22bb72497 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -272,7 +272,7 @@ def test_error(self, data, all_arithmetic_operators): def test_add_series_with_extension_array(self, data): ser = pd.Series(data) - with tm.assert_raises_regex(TypeError, "unsupported"): + with pytest.raises(TypeError, match="unsupported"): ser + data def _check_divmod_op(self, s, op, other, exc=NotImplementedError): diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index b1d08a5620bf3..7fd389e19325c 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -22,7 +22,6 @@ from pandas import Categorical from pandas.api.types import CategoricalDtype from pandas.tests.extension import base -import pandas.util.testing as tm def make_data(): @@ -213,7 +212,7 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators): def test_add_series_with_extension_array(self, data): ser = pd.Series(data) - with tm.assert_raises_regex(TypeError, "cannot perform"): + with pytest.raises(TypeError, match="cannot perform"): ser + data def _check_divmod_op(self, s, op, other, exc=NotImplementedError): diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py index 2c7bc79c324b4..d67c0d0a9c05a 100644 --- a/pandas/tests/extension/test_interval.py +++ b/pandas/tests/extension/test_interval.py @@ -21,7 +21,6 @@ from pandas import Interval from pandas.core.arrays import IntervalArray from pandas.tests.extension import base -import pandas.util.testing as tm def make_data(): @@ -137,7 +136,7 @@ def test_fillna_series(self): def test_non_scalar_raises(self, data_missing): msg = "Got a 'list' instead." - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): data_missing.fillna([1, 1]) diff --git a/pandas/tests/extension/test_period.py b/pandas/tests/extension/test_period.py index 3de3f1dfd9dbc..2e629ccb2981e 100644 --- a/pandas/tests/extension/test_period.py +++ b/pandas/tests/extension/test_period.py @@ -8,7 +8,6 @@ import pandas as pd from pandas.core.arrays import PeriodArray from pandas.tests.extension import base -import pandas.util.testing as tm @pytest.fixture @@ -114,7 +113,7 @@ def test_add_series_with_extension_array(self, data): s = pd.Series(data) msg = (r"unsupported operand type\(s\) for \+: " r"\'PeriodArray\' and \'PeriodArray\'") - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): s + data def test_error(self): diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 0752c125b75eb..2b4d1e6f25c65 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -29,7 +29,7 @@ def test_set_index_directly(self, float_string_frame): df.index = idx tm.assert_index_equal(df.index, idx) - with tm.assert_raises_regex(ValueError, 'Length mismatch'): + with pytest.raises(ValueError, match='Length mismatch'): df.index = idx[::2] def test_set_index(self, float_string_frame): @@ -38,7 +38,7 @@ def test_set_index(self, float_string_frame): df = df.set_index(idx) tm.assert_index_equal(df.index, idx) - with tm.assert_raises_regex(ValueError, 'Length mismatch'): + with pytest.raises(ValueError, match='Length mismatch'): df.set_index(idx[::2]) def test_set_index_cast(self): @@ -134,7 +134,7 @@ def test_set_index_pass_single_array(self, frame_of_index_cols, if box == list: # list of strings gets interpreted as list of keys msg = "['one', 'two', 'three', 'one', 'two']" - with tm.assert_raises_regex(KeyError, msg): + with pytest.raises(KeyError, match=msg): df.set_index(key, drop=drop, append=append) else: # np.array/tuple/iter/list-of-list "forget" the name of B @@ -232,12 +232,10 @@ def test_set_index_pass_multiindex(self, frame_of_index_cols, def test_set_index_verify_integrity(self, frame_of_index_cols): df = frame_of_index_cols - with tm.assert_raises_regex(ValueError, - 'Index has duplicate keys'): + with pytest.raises(ValueError, match='Index has duplicate keys'): df.set_index('A', verify_integrity=True) # with MultiIndex - with tm.assert_raises_regex(ValueError, - 'Index has duplicate keys'): + with pytest.raises(ValueError, match='Index has duplicate keys'): df.set_index([df['A'], df['A']], verify_integrity=True) @pytest.mark.parametrize('append', [True, False]) @@ -245,21 +243,21 @@ def test_set_index_verify_integrity(self, frame_of_index_cols): def test_set_index_raise(self, frame_of_index_cols, drop, append): df = frame_of_index_cols - with tm.assert_raises_regex(KeyError, "['foo', 'bar', 'baz']"): + with pytest.raises(KeyError, match="['foo', 'bar', 'baz']"): # column names are A-E, as well as one tuple df.set_index(['foo', 'bar', 'baz'], drop=drop, append=append) # non-existent key in list with arrays - with tm.assert_raises_regex(KeyError, 'X'): + with pytest.raises(KeyError, match='X'): df.set_index([df['A'], df['B'], 'X'], drop=drop, append=append) msg = 'The parameter "keys" may only contain a combination of.*' # forbidden type, e.g. set - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): df.set_index(set(df['A']), drop=drop, append=append) # forbidden type in list, e.g. set - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): df.set_index(['A', df['A'], set(df['A'])], drop=drop, append=append) @@ -427,7 +425,7 @@ def test_set_index_empty_column(self): def test_set_columns(self, float_string_frame): cols = Index(np.arange(len(float_string_frame.columns))) float_string_frame.columns = cols - with tm.assert_raises_regex(ValueError, 'Length mismatch'): + with pytest.raises(ValueError, match='Length mismatch'): float_string_frame.columns = cols[::2] def test_dti_set_index_reindex(self): @@ -575,13 +573,13 @@ def test_rename_axis_mapper(self): assert result.columns.name == 'meh' # Test different error cases - with tm.assert_raises_regex(TypeError, 'Must pass'): + with pytest.raises(TypeError, match='Must pass'): df.rename_axis(index='wrong') - with tm.assert_raises_regex(ValueError, 'Length of names'): + with pytest.raises(ValueError, match='Length of names'): df.rename_axis(index=['wrong']) - with tm.assert_raises_regex(TypeError, 'bogus'): + with pytest.raises(TypeError, match='bogus'): df.rename_axis(bogus=None) def test_rename_multiindex(self): @@ -858,9 +856,9 @@ def test_reset_index_level(self): # Missing levels - for both MultiIndex and single-level Index: for idx_lev in ['A', 'B'], ['A']: - with tm.assert_raises_regex(KeyError, 'Level E '): + with pytest.raises(KeyError, match='Level E '): df.set_index(idx_lev).reset_index(level=['A', 'E']) - with tm.assert_raises_regex(IndexError, 'Too many levels'): + with pytest.raises(IndexError, match='Too many levels'): df.set_index(idx_lev).reset_index(level=[0, 1, 2]) def test_reset_index_right_dtype(self): @@ -1054,35 +1052,35 @@ def test_rename_positional_named(self): tm.assert_frame_equal(result, expected) def test_rename_axis_style_raises(self): - # https://github.com/pandas-dev/pandas/issues/12392 - df = DataFrame({"A": [1, 2], "B": [1, 2]}, index=['0', '1']) + # see gh-12392 + df = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["0", "1"]) # Named target and axis - with tm.assert_raises_regex(TypeError, None): + over_spec_msg = ("Cannot specify both 'axis' and " + "any of 'index' or 'columns'") + with pytest.raises(TypeError, match=over_spec_msg): df.rename(index=str.lower, axis=1) - with tm.assert_raises_regex(TypeError, None): - df.rename(index=str.lower, axis='columns') - - with tm.assert_raises_regex(TypeError, None): - df.rename(index=str.lower, axis='columns') + with pytest.raises(TypeError, match=over_spec_msg): + df.rename(index=str.lower, axis="columns") - with tm.assert_raises_regex(TypeError, None): - df.rename(columns=str.lower, axis='columns') + with pytest.raises(TypeError, match=over_spec_msg): + df.rename(columns=str.lower, axis="columns") - with tm.assert_raises_regex(TypeError, None): + with pytest.raises(TypeError, match=over_spec_msg): df.rename(index=str.lower, axis=0) # Multiple targets and axis - with tm.assert_raises_regex(TypeError, None): - df.rename(str.lower, str.lower, axis='columns') + with pytest.raises(TypeError, match=over_spec_msg): + df.rename(str.lower, str.lower, axis="columns") # Too many targets - with tm.assert_raises_regex(TypeError, None): + over_spec_msg = "Cannot specify all of 'mapper', 'index', 'columns'." + with pytest.raises(TypeError, match=over_spec_msg): df.rename(str.lower, str.lower, str.lower) # Duplicates - with tm.assert_raises_regex(TypeError, "multiple values"): + with pytest.raises(TypeError, match="multiple values"): df.rename(id, mapper=id) def test_reindex_api_equivalence(self): @@ -1279,7 +1277,7 @@ def test_set_axis_inplace(self): # wrong values for the "axis" parameter for axis in 3, 'foo': - with tm.assert_raises_regex(ValueError, 'No axis named'): + with pytest.raises(ValueError, match='No axis named'): df.set_axis(list('abc'), axis=axis, inplace=False) def test_set_axis_prior_to_deprecation_signature(self): diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index ab4eaf02f38dd..c9481fef4aa36 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -101,7 +101,8 @@ def wrapper(x): assert lcd_dtype == result1.dtype # bad axis - tm.assert_raises_regex(ValueError, 'No axis named 2', f, axis=2) + with pytest.raises(ValueError, match='No axis named 2'): + f(axis=2) # all NA case if has_skipna: @@ -189,7 +190,8 @@ def wrapper(x): check_dtype=False) # bad axis - tm.assert_raises_regex(ValueError, 'No axis named 2', f, axis=2) + with pytest.raises(ValueError, match='No axis named 2'): + f(axis=2) # all NA case if has_skipna: @@ -343,7 +345,7 @@ def test_corr_invalid_method(self): df = pd.DataFrame(np.random.normal(size=(10, 2))) msg = ("method must be either 'pearson', 'spearman', " "or 'kendall'") - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): df.corr(method="____") def test_cov(self, float_frame, float_string_frame): @@ -1469,7 +1471,7 @@ def test_any_all_level_axis_none_raises(self, method): names=['out', 'in']) ) xpr = "Must specify 'axis' when aggregating by level." - with tm.assert_raises_regex(ValueError, xpr): + with pytest.raises(ValueError, match=xpr): getattr(df, method)(axis=None, level='out') # ---------------------------------------------------------------------- @@ -1757,7 +1759,7 @@ def test_numpy_round(self): tm.assert_frame_equal(out, expected) msg = "the 'out' parameter is not supported" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): np.round(df, decimals=0, out=df) def test_round_mixed_type(self): @@ -1997,8 +1999,7 @@ def test_dot(self): expected = a.dot(a.iloc[0]) tm.assert_series_equal(result, expected) - with tm.assert_raises_regex(ValueError, - 'Dot product shape mismatch'): + with pytest.raises(ValueError, match='Dot product shape mismatch'): a.dot(row[:-1]) a = np.random.rand(1, 5) @@ -2015,7 +2016,7 @@ def test_dot(self): df = DataFrame(randn(3, 4), index=[1, 2, 3], columns=lrange(4)) df2 = DataFrame(randn(5, 3), index=lrange(5), columns=[1, 2, 3]) - with tm.assert_raises_regex(ValueError, 'aligned'): + with pytest.raises(ValueError, match='aligned'): df.dot(df2) @pytest.mark.skipif(not PY35, @@ -2075,7 +2076,7 @@ def test_matmul(self): df = DataFrame(randn(3, 4), index=[1, 2, 3], columns=lrange(4)) df2 = DataFrame(randn(5, 3), index=lrange(5), columns=[1, 2, 3]) - with tm.assert_raises_regex(ValueError, 'aligned'): + with pytest.raises(ValueError, match='aligned'): operator.matmul(df, df2) @@ -2144,7 +2145,7 @@ def test_n(self, df_strings, nselect_method, n, order): error_msg = self.dtype_error_msg_template.format( column='b', method=nselect_method, dtype='object') - with tm.assert_raises_regex(TypeError, error_msg): + with pytest.raises(TypeError, match=error_msg): getattr(df, nselect_method)(n, order) else: ascending = nselect_method == 'nsmallest' @@ -2162,7 +2163,7 @@ def test_n_error(self, df_main_dtypes, nselect_method, columns): # escape some characters that may be in the repr error_msg = (error_msg.replace('(', '\\(').replace(")", "\\)") .replace("[", "\\[").replace("]", "\\]")) - with tm.assert_raises_regex(TypeError, error_msg): + with pytest.raises(TypeError, match=error_msg): getattr(df, nselect_method)(2, columns) def test_n_all_dtypes(self, df_main_dtypes): diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index d6d932d235eec..295a603850984 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -107,14 +107,17 @@ def test_get_axis(self, float_frame): assert f._get_axis(0) is f.index assert f._get_axis(1) is f.columns - tm.assert_raises_regex( - ValueError, 'No axis named', f._get_axis_number, 2) - tm.assert_raises_regex( - ValueError, 'No axis.*foo', f._get_axis_name, 'foo') - tm.assert_raises_regex( - ValueError, 'No axis.*None', f._get_axis_name, None) - tm.assert_raises_regex(ValueError, 'No axis named', - f._get_axis_number, None) + with pytest.raises(ValueError, match='No axis named'): + f._get_axis_number(2) + + with pytest.raises(ValueError, match='No axis.*foo'): + f._get_axis_name('foo') + + with pytest.raises(ValueError, match='No axis.*None'): + f._get_axis_name(None) + + with pytest.raises(ValueError, match='No axis named'): + f._get_axis_number(None) def test_keys(self, float_frame): getkeys = float_frame.keys diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index b71af4b777022..3cdb223a813b7 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -173,7 +173,7 @@ def _check_unaligned_frame(meth, op, df, other): # NAs msg = "Unable to coerce to Series/DataFrame" tm.assert_frame_equal(f(np.nan), o(df, np.nan)) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): f(ndim_5) # Series @@ -382,7 +382,7 @@ def test_arith_flex_frame_raise(self, all_arithmetic_operators, for dim in range(3, 6): arr = np.ones((1,) * dim) msg = "Unable to coerce to Series/DataFrame" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): getattr(float_frame, op)(arr) def test_arith_flex_frame_corner(self, float_frame): @@ -397,10 +397,10 @@ def test_arith_flex_frame_corner(self, float_frame): result = float_frame[:0].add(float_frame) tm.assert_frame_equal(result, float_frame * np.nan) - with tm.assert_raises_regex(NotImplementedError, 'fill_value'): + with pytest.raises(NotImplementedError, match='fill_value'): float_frame.add(float_frame.iloc[0], fill_value=3) - with tm.assert_raises_regex(NotImplementedError, 'fill_value'): + with pytest.raises(NotImplementedError, match='fill_value'): float_frame.add(float_frame.iloc[0], axis='index', fill_value=3) def test_arith_flex_series(self, simple_frame): @@ -441,10 +441,10 @@ def test_arith_flex_zero_len_raises(self): df_len0 = pd.DataFrame([], columns=['A', 'B']) df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B']) - with tm.assert_raises_regex(NotImplementedError, 'fill_value'): + with pytest.raises(NotImplementedError, match='fill_value'): df.add(ser_len0, fill_value='E') - with tm.assert_raises_regex(NotImplementedError, 'fill_value'): + with pytest.raises(NotImplementedError, match='fill_value'): df_len0.sub(df['A'], axis=None, fill_value=3) diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 6186ce4d45ef2..de6ac251d117b 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -476,36 +476,36 @@ def test_reindex_positional_warns(self): def test_reindex_axis_style_raises(self): # https://github.com/pandas-dev/pandas/issues/12392 df = pd.DataFrame({"A": [1, 2, 3], 'B': [4, 5, 6]}) - with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): df.reindex([0, 1], ['A'], axis=1) - with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): df.reindex([0, 1], ['A'], axis='index') - with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): df.reindex(index=[0, 1], axis='index') - with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): df.reindex(index=[0, 1], axis='columns') - with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): df.reindex(columns=[0, 1], axis='columns') - with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): df.reindex(index=[0, 1], columns=[0, 1], axis='columns') - with tm.assert_raises_regex(TypeError, 'Cannot specify all'): + with pytest.raises(TypeError, match='Cannot specify all'): df.reindex([0, 1], [0], ['A']) # Mixing styles - with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): df.reindex(index=[0, 1], axis='index') - with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): df.reindex(index=[0, 1], axis='columns') # Duplicates - with tm.assert_raises_regex(TypeError, "multiple values"): + with pytest.raises(TypeError, match="multiple values"): df.reindex([0, 1], labels=[0, 1]) def test_reindex_single_named_indexer(self): @@ -820,23 +820,23 @@ def test_filter(self): tm.assert_frame_equal(filtered, expected) # pass in None - with tm.assert_raises_regex(TypeError, 'Must pass'): + with pytest.raises(TypeError, match='Must pass'): self.frame.filter() - with tm.assert_raises_regex(TypeError, 'Must pass'): + with pytest.raises(TypeError, match='Must pass'): self.frame.filter(items=None) - with tm.assert_raises_regex(TypeError, 'Must pass'): + with pytest.raises(TypeError, match='Must pass'): self.frame.filter(axis=1) # test mutually exclusive arguments - with tm.assert_raises_regex(TypeError, 'mutually exclusive'): + with pytest.raises(TypeError, match='mutually exclusive'): self.frame.filter(items=['one', 'three'], regex='e$', like='bbi') - with tm.assert_raises_regex(TypeError, 'mutually exclusive'): + with pytest.raises(TypeError, match='mutually exclusive'): self.frame.filter(items=['one', 'three'], regex='e$', axis=1) - with tm.assert_raises_regex(TypeError, 'mutually exclusive'): + with pytest.raises(TypeError, match='mutually exclusive'): self.frame.filter(items=['one', 'three'], regex='e$') - with tm.assert_raises_regex(TypeError, 'mutually exclusive'): + with pytest.raises(TypeError, match='mutually exclusive'): self.frame.filter(items=['one', 'three'], like='bbi', axis=0) - with tm.assert_raises_regex(TypeError, 'mutually exclusive'): + with pytest.raises(TypeError, match='mutually exclusive'): self.frame.filter(items=['one', 'three'], like='bbi') # objects @@ -1160,5 +1160,5 @@ def test_drop_empty_list(self, index, drop_labels): @pytest.mark.parametrize('drop_labels', [[1, 4], [4, 5]]) def test_drop_non_empty_list(self, index, drop_labels): # GH 21494 - with tm.assert_raises_regex(KeyError, 'not found in axis'): + with pytest.raises(KeyError, match='not found in axis'): pd.DataFrame(index=index).drop(drop_labels) diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 8a7d7d790a1b4..224e56777f6b4 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -474,7 +474,7 @@ def test_convert_objects(self, float_string_frame): # via astype, but errors converted = float_string_frame.copy() - with tm.assert_raises_regex(ValueError, 'invalid literal'): + with pytest.raises(ValueError, match='invalid literal'): converted['H'].astype('int32') # mixed in a single column diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index 3b8d6e6c55ed1..22c5d146e1a06 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -127,13 +127,13 @@ def test_append_series_dict(self): columns=['foo', 'bar', 'baz', 'qux']) series = df.loc[4] - with tm.assert_raises_regex(ValueError, - 'Indexes have overlapping values'): + msg = 'Indexes have overlapping values' + with pytest.raises(ValueError, match=msg): df.append(series, verify_integrity=True) + series.name = None - with tm.assert_raises_regex(TypeError, - 'Can only append a Series if ' - 'ignore_index=True'): + msg = 'Can only append a Series if ignore_index=True' + with pytest.raises(TypeError, match=msg): df.append(series, verify_integrity=True) result = df.append(series[::-1], ignore_index=True) @@ -321,7 +321,7 @@ def test_update_raise(self): other = DataFrame([[2., nan], [nan, 7]], index=[1, 3], columns=[1, 2]) - with tm.assert_raises_regex(ValueError, "Data overlaps"): + with pytest.raises(ValueError, match="Data overlaps"): df.update(other, raise_conflict=True) def test_update_from_non_df(self): @@ -470,7 +470,7 @@ def test_concat_axis_parameter(self): assert_frame_equal(concatted_1_series, expected_columns_series) # Testing ValueError - with tm.assert_raises_regex(ValueError, 'No axis named'): + with pytest.raises(ValueError, match='No axis named'): pd.concat([series1, series2], axis='something') def test_concat_numerical_names(self): diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 442ce27a730a6..c71d5d9f977f6 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -292,10 +292,10 @@ def test_constructor_dict(self): # GH10856 # dict with scalar values should raise error, even if columns passed msg = 'If using all scalar values, you must pass an index' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): DataFrame({'a': 0.7}) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): DataFrame({'a': 0.7}, columns=['a']) @pytest.mark.parametrize("scalar", [2, np.nan, None, 'D']) @@ -377,40 +377,43 @@ def test_constructor_multi_index(self): def test_constructor_error_msgs(self): msg = "Empty data passed with indices specified." # passing an empty array with columns specified. - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): DataFrame(np.empty(0), columns=list('abc')) msg = "Mixing dicts with non-Series may lead to ambiguous ordering." # mix dict and array, wrong size - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): DataFrame({'A': {'a': 'a', 'b': 'b'}, 'B': ['a', 'b', 'c']}) # wrong size ndarray, GH 3105 msg = r"Shape of passed values is \(3, 4\), indices imply \(3, 3\)" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): DataFrame(np.arange(12).reshape((4, 3)), columns=['foo', 'bar', 'baz'], index=pd.date_range('2000-01-01', periods=3)) # higher dim raise exception - with tm.assert_raises_regex(ValueError, 'Must pass 2-d input'): + with pytest.raises(ValueError, match='Must pass 2-d input'): DataFrame(np.zeros((3, 3, 3)), columns=['A', 'B', 'C'], index=[1]) # wrong size axis labels - with tm.assert_raises_regex(ValueError, "Shape of passed values " - r"is \(3, 2\), indices " - r"imply \(3, 1\)"): + msg = ("Shape of passed values " + r"is \(3, 2\), indices " + r"imply \(3, 1\)") + with pytest.raises(ValueError, match=msg): DataFrame(np.random.rand(2, 3), columns=['A', 'B', 'C'], index=[1]) - with tm.assert_raises_regex(ValueError, "Shape of passed values " - r"is \(3, 2\), indices " - r"imply \(2, 2\)"): + msg = ("Shape of passed values " + r"is \(3, 2\), indices " + r"imply \(2, 2\)") + with pytest.raises(ValueError, match=msg): DataFrame(np.random.rand(2, 3), columns=['A', 'B'], index=[1, 2]) - with tm.assert_raises_regex(ValueError, "If using all scalar " - "values, you must pass " - "an index"): + msg = ("If using all scalar " + "values, you must pass " + "an index") + with pytest.raises(ValueError, match=msg): DataFrame({'a': False, 'b': True}) def test_constructor_with_embedded_frames(self): @@ -637,14 +640,14 @@ def _check_basic_constructor(self, empty): # wrong size axis labels msg = r'Shape of passed values is \(3, 2\), indices imply \(3, 1\)' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): DataFrame(mat, columns=['A', 'B', 'C'], index=[1]) msg = r'Shape of passed values is \(3, 2\), indices imply \(2, 2\)' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): DataFrame(mat, columns=['A', 'B'], index=[1, 2]) # higher dim raise exception - with tm.assert_raises_regex(ValueError, 'Must pass 2-d input'): + with pytest.raises(ValueError, match='Must pass 2-d input'): DataFrame(empty((3, 3, 3)), columns=['A', 'B', 'C'], index=[1]) @@ -829,7 +832,7 @@ def test_constructor_arrays_and_scalars(self): exp = DataFrame({'a': df['a'].values, 'b': [True] * 10}) tm.assert_frame_equal(df, exp) - with tm.assert_raises_regex(ValueError, 'must pass an index'): + with pytest.raises(ValueError, match='must pass an index'): DataFrame({'a': False, 'b': True}) def test_constructor_DataFrame(self): @@ -862,7 +865,7 @@ def test_constructor_more(self): # can't cast mat = np.array(['foo', 'bar'], dtype=object).reshape(2, 1) - with tm.assert_raises_regex(ValueError, 'cast'): + with pytest.raises(ValueError, match='cast'): DataFrame(mat, index=[0, 1], columns=[0], dtype=float) dm = DataFrame(DataFrame(self.frame._series)) @@ -1108,8 +1111,7 @@ class CustomDict(dict): def test_constructor_ragged(self): data = {'A': randn(10), 'B': randn(8)} - with tm.assert_raises_regex(ValueError, - 'arrays must all be same length'): + with pytest.raises(ValueError, match='arrays must all be same length'): DataFrame(data) def test_constructor_scalar(self): @@ -1131,7 +1133,7 @@ def test_constructor_mixed_dict_and_Series(self): assert result.index.is_monotonic # ordering ambiguous, raise exception - with tm.assert_raises_regex(ValueError, 'ambiguous ordering'): + with pytest.raises(ValueError, match='ambiguous ordering'): DataFrame({'A': ['a', 'b'], 'B': {'a': 'a', 'b': 'b'}}) # this is OK though @@ -1185,10 +1187,10 @@ def test_from_dict_columns_parameter(self): tm.assert_frame_equal(result, expected) msg = "cannot use columns parameter with orient='columns'" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): DataFrame.from_dict(dict([('A', [1, 2]), ('B', [4, 5])]), orient='columns', columns=['one', 'two']) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): DataFrame.from_dict(dict([('A', [1, 2]), ('B', [4, 5])]), columns=['one', 'two']) @@ -1299,9 +1301,8 @@ def test_constructor_from_items(self): tm.assert_frame_equal(recons, self.mixed_frame) assert recons['A'].dtype == np.float64 - with tm.assert_raises_regex(TypeError, - "Must pass columns with " - "orient='index'"): + msg = "Must pass columns with orient='index'" + with pytest.raises(TypeError, match=msg): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): DataFrame.from_items(row_items, orient='index') @@ -1331,16 +1332,16 @@ def test_constructor_from_items(self): def test_constructor_from_items_scalars(self): # GH 17312 - with tm.assert_raises_regex(ValueError, - r'The value in each \(key, value\) ' - 'pair must be an array, Series, or dict'): + msg = (r'The value in each \(key, value\) ' + 'pair must be an array, Series, or dict') + with pytest.raises(ValueError, match=msg): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): DataFrame.from_items([('A', 1), ('B', 4)]) - with tm.assert_raises_regex(ValueError, - r'The value in each \(key, value\) ' - 'pair must be an array, Series, or dict'): + msg = (r'The value in each \(key, value\) ' + 'pair must be an array, Series, or dict') + with pytest.raises(ValueError, match=msg): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): DataFrame.from_items([('A', 1), ('B', 2)], columns=['col1'], @@ -1363,8 +1364,8 @@ def test_constructor_mix_series_nonseries(self): 'B': list(self.frame['B'])}, columns=['A', 'B']) tm.assert_frame_equal(df, self.frame.loc[:, ['A', 'B']]) - with tm.assert_raises_regex(ValueError, 'does not match ' - 'index length'): + msg = 'does not match index length' + with pytest.raises(ValueError, match=msg): DataFrame({'A': self.frame['A'], 'B': list(self.frame['B'])[:-2]}) def test_constructor_miscast_na_int_dtype(self): @@ -1419,8 +1420,9 @@ def test_constructor_single_value(self): pytest.raises(ValueError, DataFrame, 'a', [1, 2]) pytest.raises(ValueError, DataFrame, 'a', columns=['a', 'c']) - with tm.assert_raises_regex(TypeError, 'incompatible data ' - 'and dtype'): + + msg = 'incompatible data and dtype' + with pytest.raises(TypeError, match=msg): DataFrame('a', [1, 2], ['a', 'c'], float) def test_constructor_with_datetimes(self): @@ -1783,7 +1785,7 @@ def test_from_records_to_records(self): # wrong length msg = r'Shape of passed values is \(3, 2\), indices imply \(3, 1\)' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): DataFrame.from_records(arr, index=index[:-1]) indexed_frame = DataFrame.from_records(arr, index='f1') diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 2dbf3e9784749..2ad6da084e451 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -329,9 +329,8 @@ def test_select_dtypes_not_an_attr_but_still_valid_dtype(self): def test_select_dtypes_empty(self): df = DataFrame({'a': list('abc'), 'b': list(range(1, 4))}) - with tm.assert_raises_regex(ValueError, 'at least one of ' - 'include or exclude ' - 'must be nonempty'): + msg = 'at least one of include or exclude must be nonempty' + with pytest.raises(ValueError, match=msg): df.select_dtypes() def test_select_dtypes_bad_datetime64(self): @@ -341,10 +340,10 @@ def test_select_dtypes_bad_datetime64(self): 'd': np.arange(4.0, 7.0, dtype='float64'), 'e': [True, False, True], 'f': pd.date_range('now', periods=3).values}) - with tm.assert_raises_regex(ValueError, '.+ is too specific'): + with pytest.raises(ValueError, match='.+ is too specific'): df.select_dtypes(include=['datetime64[D]']) - with tm.assert_raises_regex(ValueError, '.+ is too specific'): + with pytest.raises(ValueError, match='.+ is too specific'): df.select_dtypes(exclude=['datetime64[as]']) def test_select_dtypes_datetime_with_tz(self): @@ -373,7 +372,7 @@ def test_select_dtypes_str_raises(self, dtype, arg): msg = "string dtypes are not allowed" kwargs = {arg: [dtype]} - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): df.select_dtypes(**kwargs) def test_select_dtypes_bad_arg_raises(self): @@ -384,8 +383,9 @@ def test_select_dtypes_bad_arg_raises(self): 'd': np.arange(4.0, 7.0, dtype='float64'), 'e': [True, False, True], 'f': pd.date_range('now', periods=3).values}) - with tm.assert_raises_regex(TypeError, 'data type.' - '*not understood'): + + msg = 'data type.*not understood' + with pytest.raises(TypeError, match=msg): df.select_dtypes(['blargy, blarg, blarg']) def test_select_dtypes_typecodes(self): @@ -514,7 +514,7 @@ def test_astype_cast_nan_inf_int(self, val, dtype): msg = "Cannot convert non-finite values \\(NA or inf\\) to integer" df = DataFrame([val]) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): df.astype(dtype) def test_astype_str(self, text_dtype): @@ -661,10 +661,10 @@ def test_astype_categorical(self, dtype): def test_astype_categoricaldtype_class_raises(self, cls): df = DataFrame({"A": ['a', 'a', 'b', 'c']}) xpr = "Expected an instance of {}".format(cls.__name__) - with tm.assert_raises_regex(TypeError, xpr): + with pytest.raises(TypeError, match=xpr): df.astype({"A": cls}) - with tm.assert_raises_regex(TypeError, xpr): + with pytest.raises(TypeError, match=xpr): df['A'].astype(cls) @pytest.mark.parametrize("dtype", ['Int64', 'Int32', 'Int16']) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 2467b2a89472b..b0e7fe2e25a6c 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -54,7 +54,7 @@ def test_getitem(self): assert self.frame[key] is not None assert 'random' not in self.frame - with tm.assert_raises_regex(KeyError, 'random'): + with pytest.raises(KeyError, match='random'): self.frame['random'] df = self.frame.copy() @@ -129,7 +129,7 @@ def test_getitem_listlike(self, idx_type, levels): assert_frame_equal(result, expected) idx = idx_type(keys + [missing]) - with tm.assert_raises_regex(KeyError, 'not in index'): + with pytest.raises(KeyError, match='not in index'): frame[idx] def test_getitem_callable(self): @@ -153,13 +153,12 @@ def test_setitem_list(self): assert_series_equal(self.frame['B'], data['A'], check_names=False) assert_series_equal(self.frame['A'], data['B'], check_names=False) - with tm.assert_raises_regex(ValueError, - 'Columns must be same length as key'): + msg = 'Columns must be same length as key' + with pytest.raises(ValueError, match=msg): data[['A']] = self.frame[['A', 'B']] - with tm.assert_raises_regex(ValueError, 'Length of values ' - 'does not match ' - 'length of index'): + msg = 'Length of values does not match length of index' + with pytest.raises(ValueError, match=msg): data['A'] = range(len(data.index) - 1) df = DataFrame(0, lrange(3), ['tt1', 'tt2'], dtype=np.int_) @@ -242,13 +241,13 @@ def test_getitem_boolean(self): subframe = self.tsframe[indexer] tm.assert_index_equal(subindex, subframe.index) - with tm.assert_raises_regex(ValueError, 'Item wrong length'): + with pytest.raises(ValueError, match='Item wrong length'): self.tsframe[indexer[:-1]] subframe_obj = self.tsframe[indexer_obj] assert_frame_equal(subframe_obj, subframe) - with tm.assert_raises_regex(ValueError, 'boolean values only'): + with pytest.raises(ValueError, match='boolean values only'): self.tsframe[self.tsframe] # test that Series work @@ -545,7 +544,7 @@ def test_setitem_boolean(self): assert_almost_equal(df.values, values) msg = "Must pass DataFrame or 2-d ndarray with boolean values only" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): df[df * 0] = 2 # index with DataFrame @@ -1466,7 +1465,7 @@ def test_getitem_fancy_ints(self): def test_getitem_setitem_fancy_exceptions(self): ix = self.frame.iloc - with tm.assert_raises_regex(IndexingError, 'Too many indexers'): + with pytest.raises(IndexingError, match='Too many indexers'): ix[:, :, :] with pytest.raises(IndexingError): @@ -1803,7 +1802,7 @@ def testit(df): with pytest.raises(KeyError): self.frame.lookup([self.frame.index[0]], ['xyz']) - with tm.assert_raises_regex(ValueError, 'same size'): + with pytest.raises(ValueError, match='same size'): self.frame.lookup(['a', 'b', 'c'], ['a']) def test_set_value(self): @@ -2513,7 +2512,7 @@ def test_boolean_indexing(self): df1[df1 > 2.0 * df2] = -1 assert_frame_equal(df1, expected) - with tm.assert_raises_regex(ValueError, 'Item wrong length'): + with pytest.raises(ValueError, match='Item wrong length'): df1[df1.index[:-1] > 2] = -1 def test_boolean_indexing_mixed(self): @@ -2547,7 +2546,7 @@ def test_boolean_indexing_mixed(self): msg = ("boolean setting on mixed-type|" "not supported between|" "unorderable types") - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): # TODO: This message should be the same in PY2/PY3 df[df > 0.3] = 1 @@ -2733,7 +2732,7 @@ def test_where_invalid_input_single(self, cond): df = DataFrame({"a": [1, 2, 3]}) msg = "Boolean array expected for the condition" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): df.where(cond) @pytest.mark.parametrize("cond", [ @@ -2751,7 +2750,7 @@ def test_where_invalid_input_multiple(self, cond): df = DataFrame({"a": [1, 2, 3], "b": [2, 2, 2]}) msg = "Boolean array expected for the condition" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): df.where(cond) def test_where_dataframe_col_match(self): @@ -2773,7 +2772,7 @@ def test_where_ndframe_align(self): df = DataFrame([[1, 2, 3], [4, 5, 6]]) cond = [True] - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): df.where(cond) expected = DataFrame([[1, 2, 3], [np.nan, np.nan, np.nan]]) @@ -2782,7 +2781,7 @@ def test_where_ndframe_align(self): tm.assert_frame_equal(out, expected) cond = np.array([False, True, False, True]) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): df.where(cond) expected = DataFrame([[np.nan, np.nan, np.nan], [4, 5, 6]]) @@ -2872,9 +2871,9 @@ def test_where_none(self): # GH 7656 df = DataFrame([{'A': 1, 'B': np.nan, 'C': 'Test'}, { 'A': np.nan, 'B': 'Test', 'C': np.nan}]) - expected = df.where(~isna(df), None) - with tm.assert_raises_regex(TypeError, 'boolean setting ' - 'on mixed-type'): + msg = 'boolean setting on mixed-type' + + with pytest.raises(TypeError, match=msg): df.where(~isna(df), None, inplace=True) def test_where_empty_df_and_empty_cond_having_non_bool_dtypes(self): @@ -3162,7 +3161,7 @@ def test_type_error_multiindex(self): dg = df.pivot_table(index='i', columns='c', values=['x', 'y']) - with tm.assert_raises_regex(TypeError, "is an invalid key"): + with pytest.raises(TypeError, match="is an invalid key"): str(dg[:, 0]) index = Index(range(2), name='i') diff --git a/pandas/tests/frame/test_join.py b/pandas/tests/frame/test_join.py index ccdba6df2521a..1c7f3ed834289 100644 --- a/pandas/tests/frame/test_join.py +++ b/pandas/tests/frame/test_join.py @@ -94,13 +94,13 @@ def test_join_index(frame): tm.assert_index_equal(joined.index, frame.index.sort_values()) tm.assert_index_equal(joined.columns, expected_columns) - tm.assert_raises_regex( - ValueError, 'join method', f.join, f2, how='foo') + with pytest.raises(ValueError, match='join method'): + f.join(f2, how='foo') # corner case - overlapping columns + msg = 'columns overlap but no suffix' for how in ('outer', 'left', 'inner'): - with tm.assert_raises_regex(ValueError, 'columns overlap but ' - 'no suffix'): + with pytest.raises(ValueError, match=msg): frame.join(frame, how=how) @@ -131,7 +131,8 @@ def test_join_index_series(frame): tm.assert_frame_equal(joined, frame, check_names=False) s.name = None - tm.assert_raises_regex(ValueError, 'must have a name', df.join, s) + with pytest.raises(ValueError, match='must have a name'): + df.join(s) def test_join_overlap(frame): diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index 9d1bd9e9a0234..200e134838949 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -330,8 +330,8 @@ def test_na_actions_categorical(self): res = df.fillna(value={"cats": 3, "vals": "b"}) tm.assert_frame_equal(res, df_exp_fill) - with tm.assert_raises_regex(ValueError, "fill value must be " - "in categories"): + with pytest.raises(ValueError, match=("fill value must " + "be in categories")): df.fillna(value={"cats": 4, "vals": "c"}) res = df.fillna(method='pad') @@ -555,8 +555,7 @@ def test_fillna_dict_series(self): assert_frame_equal(result, expected) # disable this for now - with tm.assert_raises_regex(NotImplementedError, - 'column by column'): + with pytest.raises(NotImplementedError, match='column by column'): df.fillna(df.max(1), axis=1) def test_fillna_dataframe(self): @@ -596,7 +595,7 @@ def test_fillna_columns(self): assert_frame_equal(result, expected) def test_fillna_invalid_method(self): - with tm.assert_raises_regex(ValueError, 'ffil'): + with pytest.raises(ValueError, match='ffil'): self.frame.fillna(method='ffil') def test_fillna_invalid_value(self): @@ -820,11 +819,10 @@ def test_interp_raise_on_all_object_dtype(self): 'A': [1, 2, 3], 'B': [4, 5, 6]}, dtype='object') - with tm.assert_raises_regex( - TypeError, - "Cannot interpolate with all object-dtype columns " - "in the DataFrame. Try setting at least one " - "column to a numeric dtype."): + msg = ("Cannot interpolate with all object-dtype columns " + "in the DataFrame. Try setting at least one " + "column to a numeric dtype.") + with pytest.raises(TypeError, match=msg): df.interpolate() def test_interp_inplace(self): diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py index 102cc52aa46cb..03ca3941f6031 100644 --- a/pandas/tests/frame/test_mutate_columns.py +++ b/pandas/tests/frame/test_mutate_columns.py @@ -126,7 +126,7 @@ def test_insert_error_msmgs(self): s = DataFrame({'foo': ['a', 'b', 'c', 'a'], 'fiz': [ 'g', 'h', 'i', 'j']}).set_index('foo') msg = 'cannot reindex from a duplicate axis' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): df['newcol'] = s # GH 4107, more descriptive error message @@ -134,7 +134,7 @@ def test_insert_error_msmgs(self): columns=['a', 'b', 'c', 'd']) msg = 'incompatible index of inserted column with frame index' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): df['gr'] = df.groupby(['b', 'c']).count() def test_insert_benchmark(self): @@ -178,7 +178,7 @@ def test_insert(self): result = Series(dict(float32=2, float64=4, int32=1)) assert (df.get_dtype_counts().sort_index() == result).all() - with tm.assert_raises_regex(ValueError, 'already exists'): + with pytest.raises(ValueError, match='already exists'): df.insert(1, 'a', df['b']) pytest.raises(ValueError, df.insert, 1, 'c', df['b']) diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 0b32ec89d3909..df88bee3b35bf 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -51,7 +51,7 @@ def check(result, expected=None): [2, 1, 3, 5, 'bah']], columns=['foo', 'bar', 'foo', 'hello', 'string']) check(df, expected) - with tm.assert_raises_regex(ValueError, 'Length of value'): + with pytest.raises(ValueError, match='Length of value'): df.insert(0, 'AnotherColumn', range(len(df.index) - 1)) # insert same dtype @@ -101,8 +101,9 @@ def check(result, expected=None): check(df, expected) # insert a dup - tm.assert_raises_regex(ValueError, 'cannot insert', - df.insert, 2, 'new_col', 4.) + with pytest.raises(ValueError, match='cannot insert'): + df.insert(2, 'new_col', 4.) + df.insert(2, 'new_col', 4., allow_duplicates=True) expected = DataFrame([[1, 1, 4., 5., 'bah', 3], [1, 2, 4., 5., 'bah', 3], diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 65459735e639b..89d45639f3e03 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -496,8 +496,7 @@ def test_comp(func): tm.assert_numpy_array_equal(result.values, func(df1.values, df2.values)) - with tm.assert_raises_regex(ValueError, - 'dim must be <= 2'): + with pytest.raises(ValueError, match='dim must be <= 2'): func(df1, ndim_5) result2 = func(self.simple, row) @@ -508,9 +507,8 @@ def test_comp(func): tm.assert_numpy_array_equal(result3.values, func(self.frame.values, 0)) - with tm.assert_raises_regex(ValueError, - 'Can only compare identically' - '-labeled DataFrame'): + msg = 'Can only compare identically-labeled DataFrame' + with pytest.raises(ValueError, match=msg): func(self.simple, self.simple[:2]) test_comp(operator.eq) @@ -551,11 +549,11 @@ def test_boolean_comparison(self): msg1d = 'Unable to coerce to Series, length must be 2: given 3' msg2d = 'Unable to coerce to DataFrame, shape must be' msg2db = 'operands could not be broadcast together with shapes' - with tm.assert_raises_regex(ValueError, msg1d): + with pytest.raises(ValueError, match=msg1d): # wrong shape df > lst - with tm.assert_raises_regex(ValueError, msg1d): + with pytest.raises(ValueError, match=msg1d): # wrong shape result = df > tup @@ -566,10 +564,10 @@ def test_boolean_comparison(self): result = df.values > b_r assert_numpy_array_equal(result, expected.values) - with tm.assert_raises_regex(ValueError, msg2d): + with pytest.raises(ValueError, match=msg2d): df > b_c - with tm.assert_raises_regex(ValueError, msg2db): + with pytest.raises(ValueError, match=msg2db): df.values > b_c # == @@ -577,10 +575,10 @@ def test_boolean_comparison(self): result = df == b assert_frame_equal(result, expected) - with tm.assert_raises_regex(ValueError, msg1d): + with pytest.raises(ValueError, match=msg1d): result = df == lst - with tm.assert_raises_regex(ValueError, msg1d): + with pytest.raises(ValueError, match=msg1d): result = df == tup # broadcasts like ndarray (GH#23000) @@ -590,7 +588,7 @@ def test_boolean_comparison(self): result = df.values == b_r assert_numpy_array_equal(result, expected.values) - with tm.assert_raises_regex(ValueError, msg2d): + with pytest.raises(ValueError, match=msg2d): df == b_c assert df.values.shape != b_c.shape @@ -601,10 +599,10 @@ def test_boolean_comparison(self): expected.index = df.index expected.columns = df.columns - with tm.assert_raises_regex(ValueError, msg1d): + with pytest.raises(ValueError, match=msg1d): result = df == lst - with tm.assert_raises_regex(ValueError, msg1d): + with pytest.raises(ValueError, match=msg1d): result = df == tup def test_combine_generic(self): @@ -774,10 +772,10 @@ def test_alignment_non_pandas(self): msg = 'Unable to coerce to Series, length must be 3: given 2' for val in [[1, 2], (1, 2), np.array([1, 2]), range(1, 3)]: - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): align(df, val, 'index') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): align(df, val, 'columns') val = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) @@ -791,10 +789,10 @@ def test_alignment_non_pandas(self): # shape mismatch msg = 'Unable to coerce to DataFrame, shape must be' val = np.array([[1, 2, 3], [4, 5, 6]]) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): align(df, val, 'index') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): align(df, val, 'columns') val = np.zeros((3, 3, 3)) diff --git a/pandas/tests/frame/test_period.py b/pandas/tests/frame/test_period.py index d52b848bebad1..2a8add1a5de92 100644 --- a/pandas/tests/frame/test_period.py +++ b/pandas/tests/frame/test_period.py @@ -1,3 +1,4 @@ +import pytest import numpy as np from numpy.random import randn from datetime import timedelta @@ -111,8 +112,8 @@ def _get_with_delta(delta, freq='A-DEC'): tm.assert_index_equal(result.columns, exp_index) # invalid axis - tm.assert_raises_regex( - ValueError, 'axis', df.to_timestamp, axis=2) + with pytest.raises(ValueError, match='axis'): + df.to_timestamp(axis=2) result1 = df.to_timestamp('5t', axis=1) result2 = df.to_timestamp('t', axis=1) diff --git a/pandas/tests/frame/test_quantile.py b/pandas/tests/frame/test_quantile.py index 3dbac79fed02b..a7c91dd36b2d2 100644 --- a/pandas/tests/frame/test_quantile.py +++ b/pandas/tests/frame/test_quantile.py @@ -220,7 +220,7 @@ def test_quantile_datetime(self): def test_quantile_invalid(self): msg = 'percentiles should all be in the interval \\[0, 1\\]' for invalid in [-1, 2, [0.5, -1], [0.5, 2]]: - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.tsframe.quantile(invalid) def test_quantile_box(self): diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 3c6f0f0b2ab94..9ab7b04725978 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -141,10 +141,10 @@ def test_query_non_str(self): df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'b']}) msg = "expr must be a string to be evaluated" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): df.query(lambda x: x.B == "b") - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): df.query(111) def test_query_empty_string(self): @@ -152,7 +152,7 @@ def test_query_empty_string(self): df = pd.DataFrame({'A': [1, 2, 3]}) msg = "expr cannot be an empty string" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): df.query('') def test_eval_resolvers_as_list(self): @@ -524,8 +524,8 @@ def test_query_builtin(self): df = DataFrame(np.random.randint(m, size=(n, 3)), columns=list('abc')) df.index.name = 'sin' - with tm.assert_raises_regex(NumExprClobberingError, - 'Variables in expression.+'): + msg = 'Variables in expression.+' + with pytest.raises(NumExprClobberingError, match=msg): df.query('sin > 5', engine=engine, parser=parser) def test_query(self): @@ -657,9 +657,11 @@ def test_query_undefined_local(self): from pandas.core.computation.ops import UndefinedVariableError engine, parser = self.engine, self.parser skip_if_no_pandas_parser(parser) + df = DataFrame(np.random.rand(10, 2), columns=list('ab')) - with tm.assert_raises_regex(UndefinedVariableError, - "local variable 'c' is not defined"): + msg = "local variable 'c' is not defined" + + with pytest.raises(UndefinedVariableError, match=msg): df.query('a == @c', engine=engine, parser=parser) def test_index_resolvers_come_after_columns_with_the_same_name(self): @@ -1037,7 +1039,7 @@ def test_bool_arith_expr(self, parser, engine): @pytest.mark.parametrize('op', ['+', '-', '*', '/']) def test_invalid_type_for_operator_raises(self, parser, engine, op): df = DataFrame({'a': [1, 2], 'b': ['c', 'd']}) - with tm.assert_raises_regex(TypeError, - r"unsupported operand type\(s\) " - "for .+: '.+' and '.+'"): + msg = r"unsupported operand type\(s\) for .+: '.+' and '.+'" + + with pytest.raises(TypeError, match=msg): df.eval('a {0} b'.format(op), engine=engine, parser=parser) diff --git a/pandas/tests/frame/test_rank.py b/pandas/tests/frame/test_rank.py index 3134686c2a2d9..078c48539de16 100644 --- a/pandas/tests/frame/test_rank.py +++ b/pandas/tests/frame/test_rank.py @@ -194,11 +194,11 @@ def test_rank_na_option(self): # bad values throw error msg = "na_option must be one of 'keep', 'top', or 'bottom'" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.frame.rank(na_option='bad', ascending=False) # invalid type - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.frame.rank(na_option=True, ascending=False) def test_rank_axis(self): diff --git a/pandas/tests/frame/test_replace.py b/pandas/tests/frame/test_replace.py index bf755b1dac4b8..bfb358a3e8c45 100644 --- a/pandas/tests/frame/test_replace.py +++ b/pandas/tests/frame/test_replace.py @@ -17,9 +17,6 @@ from pandas.util.testing import (assert_series_equal, assert_frame_equal) - -import pandas.util.testing as tm - from pandas.tests.frame.common import TestData @@ -612,9 +609,9 @@ def test_replace_with_empty_list(self): assert_frame_equal(result, expected) # GH 19266 - with tm.assert_raises_regex(ValueError, "cannot assign mismatch"): + with pytest.raises(ValueError, match="cannot assign mismatch"): df.replace({np.nan: []}) - with tm.assert_raises_regex(ValueError, "cannot assign mismatch"): + with pytest.raises(ValueError, match="cannot assign mismatch"): df.replace({np.nan: ['dummy', 'alt']}) def test_replace_series_dict(self): @@ -923,7 +920,7 @@ def test_replace_bool_with_bool(self): def test_replace_with_dict_with_bool_keys(self): df = DataFrame({0: [True, False], 1: [False, True]}) - with tm.assert_raises_regex(TypeError, 'Cannot compare types .+'): + with pytest.raises(TypeError, match='Cannot compare types .+'): df.replace({'asdf': 'asdb', True: 'yes'}) def test_replace_truthy(self): @@ -934,8 +931,7 @@ def test_replace_truthy(self): def test_replace_int_to_int_chain(self): df = DataFrame({'a': lrange(1, 5)}) - with tm.assert_raises_regex(ValueError, - "Replacement not allowed .+"): + with pytest.raises(ValueError, match="Replacement not allowed .+"): df.replace({'a': dict(zip(range(1, 5), range(2, 6)))}) def test_replace_str_to_str_chain(self): @@ -943,8 +939,7 @@ def test_replace_str_to_str_chain(self): astr = a.astype(str) bstr = np.arange(2, 6).astype(str) df = DataFrame({'a': astr}) - with tm.assert_raises_regex(ValueError, - "Replacement not allowed .+"): + with pytest.raises(ValueError, match="Replacement not allowed .+"): df.replace({'a': dict(zip(astr, bstr))}) def test_replace_swapping_bug(self): diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index ab3d6ca3b19f7..a53b01466c7a4 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -66,7 +66,7 @@ def test_pivot_duplicates(self): data = DataFrame({'a': ['bar', 'bar', 'foo', 'foo', 'foo'], 'b': ['one', 'two', 'one', 'one', 'two'], 'c': [1., 2., 3., 3., 4.]}) - with tm.assert_raises_regex(ValueError, 'duplicate entries'): + with pytest.raises(ValueError, match='duplicate entries'): data.pivot('a', 'b', 'c') def test_pivot_empty(self): @@ -317,7 +317,7 @@ def test_unstack_fill_frame_categorical(self): # Fill with non-category results in a TypeError msg = r"'fill_value' \('d'\) is not in" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): data.unstack(fill_value='d') # Fill with category value replaces missing values as expected diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py index b99e8983b5ba1..dd70d3df7d1b9 100644 --- a/pandas/tests/frame/test_sorting.py +++ b/pandas/tests/frame/test_sorting.py @@ -79,7 +79,7 @@ def test_sort_values(self): assert_frame_equal(sorted_df, expected) msg = r'Length of ascending \(5\) != length of by \(2\)' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): frame.sort_values(by=['A', 'B'], axis=0, ascending=[True] * 5) def test_sort_values_inplace(self): @@ -269,7 +269,7 @@ def test_sort_datetimes(self): def test_frame_column_inplace_sort_exception(self): s = self.frame['A'] - with tm.assert_raises_regex(ValueError, "This Series is a view"): + with pytest.raises(ValueError, match="This Series is a view"): s.sort_values(inplace=True) cp = s.copy() @@ -447,26 +447,26 @@ def test_sort_index_duplicates(self): df = DataFrame([lrange(5, 9), lrange(4)], columns=['a', 'a', 'b', 'b']) - with tm.assert_raises_regex(ValueError, 'not unique'): + with pytest.raises(ValueError, match='not unique'): # use .sort_values #9816 with tm.assert_produces_warning(FutureWarning): df.sort_index(by='a') - with tm.assert_raises_regex(ValueError, 'not unique'): + with pytest.raises(ValueError, match='not unique'): df.sort_values(by='a') - with tm.assert_raises_regex(ValueError, 'not unique'): + with pytest.raises(ValueError, match='not unique'): # use .sort_values #9816 with tm.assert_produces_warning(FutureWarning): df.sort_index(by=['a']) - with tm.assert_raises_regex(ValueError, 'not unique'): + with pytest.raises(ValueError, match='not unique'): df.sort_values(by=['a']) - with tm.assert_raises_regex(ValueError, 'not unique'): + with pytest.raises(ValueError, match='not unique'): # use .sort_values #9816 with tm.assert_produces_warning(FutureWarning): # multi-column 'by' is separate codepath df.sort_index(by=['a', 'b']) - with tm.assert_raises_regex(ValueError, 'not unique'): + with pytest.raises(ValueError, match='not unique'): # multi-column 'by' is separate codepath df.sort_values(by=['a', 'b']) @@ -474,11 +474,11 @@ def test_sort_index_duplicates(self): # GH4370 df = DataFrame(np.random.randn(4, 2), columns=MultiIndex.from_tuples([('a', 0), ('a', 1)])) - with tm.assert_raises_regex(ValueError, 'level'): + with pytest.raises(ValueError, match='level'): # use .sort_values #9816 with tm.assert_produces_warning(FutureWarning): df.sort_index(by='a') - with tm.assert_raises_regex(ValueError, 'level'): + with pytest.raises(ValueError, match='level'): df.sort_values(by='a') # convert tuples to a list of tuples diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index f6980a8585436..b27f60d437f57 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -156,7 +156,7 @@ class A(DataFrame): @property def bar(self): return self.i_dont_exist - with tm.assert_raises_regex(AttributeError, '.*i_dont_exist.*'): + with pytest.raises(AttributeError, match='.*i_dont_exist.*'): A().bar def test_subclass_align(self): diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index eecbdc0130f02..5794630e72419 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -18,8 +18,7 @@ from pandas.util.testing import (assert_series_equal, assert_frame_equal, - assert_index_equal, - assert_raises_regex) + assert_index_equal) import pandas.util.testing as tm from pandas.compat import product @@ -276,9 +275,9 @@ def test_shift(self): assert_frame_equal(shifted2, shifted3) assert_frame_equal(ps, shifted2.shift(-1, 'B')) - tm.assert_raises_regex(ValueError, - 'does not match PeriodIndex freq', - ps.shift, freq='D') + msg = 'does not match PeriodIndex freq' + with pytest.raises(ValueError, match=msg): + ps.shift(freq='D') # shift other axis # GH 6371 @@ -360,8 +359,8 @@ def test_tshift(self): shifted3 = ps.tshift(freq=offsets.BDay()) assert_frame_equal(shifted, shifted3) - tm.assert_raises_regex( - ValueError, 'does not match', ps.tshift, freq='M') + with pytest.raises(ValueError, match='does not match'): + ps.tshift(freq='M') # DatetimeIndex shifted = self.tsframe.tshift(1) @@ -437,16 +436,16 @@ def test_truncate_nonsortedindex(self): df = pd.DataFrame({'A': ['a', 'b', 'c', 'd', 'e']}, index=[5, 3, 2, 9, 0]) - with tm.assert_raises_regex(ValueError, - 'truncate requires a sorted index'): + msg = 'truncate requires a sorted index' + with pytest.raises(ValueError, match=msg): df.truncate(before=3, after=9) rng = pd.date_range('2011-01-01', '2012-01-01', freq='W') ts = pd.DataFrame({'A': np.random.randn(len(rng)), 'B': np.random.randn(len(rng))}, index=rng) - with tm.assert_raises_regex(ValueError, - 'truncate requires a sorted index'): + msg = 'truncate requires a sorted index' + with pytest.raises(ValueError, match=msg): ts.sort_values('A', ascending=False).truncate(before='2011-11', after='2011-12') @@ -455,8 +454,8 @@ def test_truncate_nonsortedindex(self): 2: np.random.randn(5), 0: np.random.randn(5)}, columns=[3, 20, 2, 0]) - with tm.assert_raises_regex(ValueError, - 'truncate requires a sorted index'): + msg = 'truncate requires a sorted index' + with pytest.raises(ValueError, match=msg): df.truncate(before=2, after=20, axis=1) def test_asfreq(self): @@ -822,17 +821,17 @@ def test_tz_convert_and_localize(self, fn): # Bad Inputs # Not DatetimeIndex / PeriodIndex - with assert_raises_regex(TypeError, 'DatetimeIndex'): + with pytest.raises(TypeError, match='DatetimeIndex'): df = DataFrame(index=int_idx) df = getattr(df, fn)('US/Pacific') # Not DatetimeIndex / PeriodIndex - with assert_raises_regex(TypeError, 'DatetimeIndex'): + with pytest.raises(TypeError, match='DatetimeIndex'): df = DataFrame(np.ones(5), MultiIndex.from_arrays([int_idx, l0])) df = getattr(df, fn)('US/Pacific', level=0) # Invalid level - with assert_raises_regex(ValueError, 'not valid'): + with pytest.raises(ValueError, match='not valid'): df = DataFrame(index=l0) df = getattr(df, fn)('US/Pacific', level=1) diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index aa91b7510a2b5..b56375d0a8670 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -621,12 +621,12 @@ def _make_frame(names=None): for i in [6, 7]: msg = 'len of {i}, but only 5 lines in file'.format(i=i) - with tm.assert_raises_regex(ParserError, msg): + with pytest.raises(ParserError, match=msg): read_csv(path, header=lrange(i), index_col=0) # write with cols - with tm.assert_raises_regex(TypeError, 'cannot specify cols ' - 'with a MultiIndex'): + msg = 'cannot specify cols with a MultiIndex' + with pytest.raises(TypeError, match=msg): df.to_csv(path, columns=['foo', 'bar']) with ensure_clean('__tmp_to_csv_multiindex__') as path: @@ -1124,11 +1124,11 @@ def test_to_csv_quoting(self): assert result == expected msg = "need to escape, but no escapechar set" - tm.assert_raises_regex(csv.Error, msg, df.to_csv, - quoting=csv.QUOTE_NONE) - tm.assert_raises_regex(csv.Error, msg, df.to_csv, - quoting=csv.QUOTE_NONE, - escapechar=None) + with pytest.raises(csv.Error, match=msg): + df.to_csv(quoting=csv.QUOTE_NONE) + + with pytest.raises(csv.Error, match=msg): + df.to_csv(quoting=csv.QUOTE_NONE, escapechar=None) expected_rows = [',c_bool,c_float,c_int,c_string', '0,True,1.0,42.0,a', diff --git a/pandas/tests/frame/test_validate.py b/pandas/tests/frame/test_validate.py index 2de0e866f6e70..c609712b471e7 100644 --- a/pandas/tests/frame/test_validate.py +++ b/pandas/tests/frame/test_validate.py @@ -1,7 +1,6 @@ from pandas.core.frame import DataFrame import pytest -import pandas.util.testing as tm @pytest.fixture @@ -29,5 +28,5 @@ def test_validate_bool_args(self, dataframe, func, inplace): elif func == "sort_values": kwargs["by"] = ["a"] - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): getattr(dataframe, func)(**kwargs) diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 46bb6303d8908..753e6161d8052 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -15,8 +15,7 @@ import pandas.io.formats.printing as printing from pandas.compat import range, zip, PY3 -from pandas.util.testing import (assert_raises_regex, - assert_series_equal, +from pandas.util.testing import (assert_series_equal, assert_panel_equal, assert_frame_equal) @@ -476,16 +475,16 @@ def test_unexpected_keyword(self): # GH8597 ts = df['joe'].copy() ts[2] = np.nan - with assert_raises_regex(TypeError, 'unexpected keyword'): + with pytest.raises(TypeError, match='unexpected keyword'): df.drop('joe', axis=1, in_place=True) - with assert_raises_regex(TypeError, 'unexpected keyword'): + with pytest.raises(TypeError, match='unexpected keyword'): df.reindex([1, 0], inplace=True) - with assert_raises_regex(TypeError, 'unexpected keyword'): + with pytest.raises(TypeError, match='unexpected keyword'): ca.fillna(0, inplace=True) - with assert_raises_regex(TypeError, 'unexpected keyword'): + with pytest.raises(TypeError, match='unexpected keyword'): ts.fillna(0, in_place=True) # See gh-12301 @@ -494,13 +493,13 @@ def test_stat_unexpected_keyword(self): starwars = 'Star Wars' errmsg = 'unexpected keyword' - with assert_raises_regex(TypeError, errmsg): + with pytest.raises(TypeError, match=errmsg): obj.max(epic=starwars) # stat_function - with assert_raises_regex(TypeError, errmsg): + with pytest.raises(TypeError, match=errmsg): obj.var(epic=starwars) # stat_function_ddof - with assert_raises_regex(TypeError, errmsg): + with pytest.raises(TypeError, match=errmsg): obj.sum(epic=starwars) # cum_function - with assert_raises_regex(TypeError, errmsg): + with pytest.raises(TypeError, match=errmsg): obj.any(epic=starwars) # logical_function def test_api_compat(self): @@ -520,13 +519,13 @@ def test_stat_non_defaults_args(self): out = np.array([0]) errmsg = "the 'out' parameter is not supported" - with assert_raises_regex(ValueError, errmsg): + with pytest.raises(ValueError, match=errmsg): obj.max(out=out) # stat_function - with assert_raises_regex(ValueError, errmsg): + with pytest.raises(ValueError, match=errmsg): obj.var(out=out) # stat_function_ddof - with assert_raises_regex(ValueError, errmsg): + with pytest.raises(ValueError, match=errmsg): obj.sum(out=out) # cum_function - with assert_raises_regex(ValueError, errmsg): + with pytest.raises(ValueError, match=errmsg): obj.any(out=out) # logical_function def test_truncate_out_of_bounds(self): @@ -807,23 +806,23 @@ def test_transpose(self): for p in [tm.makePanel()]: tm.assert_panel_equal(p.transpose(2, 0, 1) .transpose(1, 2, 0), p) - tm.assert_raises_regex(TypeError, msg, p.transpose, - 2, 0, 1, axes=(2, 0, 1)) + with pytest.raises(TypeError, match=msg): + p.transpose(2, 0, 1, axes=(2, 0, 1)) def test_numpy_transpose(self): msg = "the 'axes' parameter is not supported" s = tm.makeFloatSeries() - tm.assert_series_equal( - np.transpose(s), s) - tm.assert_raises_regex(ValueError, msg, - np.transpose, s, axes=1) + tm.assert_series_equal(np.transpose(s), s) + + with pytest.raises(ValueError, match=msg): + np.transpose(s, axes=1) df = tm.makeTimeDataFrame() - tm.assert_frame_equal(np.transpose( - np.transpose(df)), df) - tm.assert_raises_regex(ValueError, msg, - np.transpose, df, axes=1) + tm.assert_frame_equal(np.transpose(np.transpose(df)), df) + + with pytest.raises(ValueError, match=msg): + np.transpose(df, axes=1) with catch_warnings(record=True): simplefilter("ignore", FutureWarning) @@ -869,16 +868,16 @@ def test_take_invalid_kwargs(self): for obj in (s, df, p): msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assert_raises_regex(TypeError, msg, obj.take, - indices, foo=2) + with pytest.raises(TypeError, match=msg): + obj.take(indices, foo=2) msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, obj.take, - indices, out=indices) + with pytest.raises(ValueError, match=msg): + obj.take(indices, out=indices) msg = "the 'mode' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, obj.take, - indices, mode='clip') + with pytest.raises(ValueError, match=msg): + obj.take(indices, mode='clip') def test_equals(self): s1 = pd.Series([1, 2, 3], index=[0, 2, 1]) @@ -1018,7 +1017,7 @@ def test_pipe_panel(self): assert_panel_equal(result, expected) with pytest.raises(ValueError): - result = wp.pipe((f, 'y'), x=1, y=1) + wp.pipe((f, 'y'), x=1, y=1) @pytest.mark.parametrize('box', [pd.Series, pd.DataFrame]) def test_axis_classmethods(self, box): diff --git a/pandas/tests/generic/test_label_or_level_utils.py b/pandas/tests/generic/test_label_or_level_utils.py index 4d78270c856ae..5cb5e935752a7 100644 --- a/pandas/tests/generic/test_label_or_level_utils.py +++ b/pandas/tests/generic/test_label_or_level_utils.py @@ -128,7 +128,7 @@ def test_is_level_reference_series_axis1_error(df): # Make series with L1 as index s = df.set_index('L1').L2 - with tm.assert_raises_regex(ValueError, "No axis named 1"): + with pytest.raises(ValueError, match="No axis named 1"): s._is_level_reference('L1', axis=1) @@ -138,7 +138,7 @@ def test_is_level_reference_panel_error(panel): msg = ("_is_level_reference is not implemented for {type}" .format(type=type(panel))) - with tm.assert_raises_regex(NotImplementedError, msg): + with pytest.raises(NotImplementedError, match=msg): panel._is_level_reference('L1', axis=0) @@ -146,7 +146,7 @@ def test_is_label_reference_panel_error(panel): msg = ("_is_label_reference is not implemented for {type}" .format(type=type(panel))) - with tm.assert_raises_regex(NotImplementedError, msg): + with pytest.raises(NotImplementedError, match=msg): panel._is_label_reference('L1', axis=0) @@ -154,7 +154,7 @@ def test_is_label_or_level_reference_panel_error(panel): msg = ("_is_label_or_level_reference is not implemented for {type}" .format(type=type(panel))) - with tm.assert_raises_regex(NotImplementedError, msg): + with pytest.raises(NotImplementedError, match=msg): panel._is_label_or_level_reference('L1', axis=0) @@ -176,7 +176,7 @@ def test_check_label_or_level_ambiguity_df(df_ambig, axis): # df_ambig has both an on-axis level and off-axis label named L1 # Therefore, L1 is ambiguous. - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): df_ambig._check_label_or_level_ambiguity("L1", axis=axis) # df_ambig has an on-axis level named L2,, and it is not ambiguous. @@ -209,7 +209,7 @@ def test_check_label_or_level_ambiguity_series_axis1_error(df): # Make series with L1 as index s = df.set_index('L1').L2 - with tm.assert_raises_regex(ValueError, "No axis named 1"): + with pytest.raises(ValueError, match="No axis named 1"): s._check_label_or_level_ambiguity('L1', axis=1) @@ -219,7 +219,7 @@ def test_check_label_or_level_ambiguity_panel_error(panel): msg = ("_check_label_or_level_ambiguity is not implemented for {type}" .format(type=type(panel))) - with tm.assert_raises_regex(NotImplementedError, msg): + with pytest.raises(NotImplementedError, match=msg): panel._check_label_or_level_ambiguity("L1", axis=0) @@ -294,7 +294,7 @@ def test_get_label_or_level_values_df_duplabels(df_duplabels, axis): else: expected_msg = "The index label 'L2' is not unique" - with tm.assert_raises_regex(ValueError, expected_msg): + with pytest.raises(ValueError, match=expected_msg): assert_label_values(df_duplabels, ['L2'], axis=axis) @@ -316,7 +316,7 @@ def test_get_label_or_level_values_series_axis1_error(df): # Make series with L1 as index s = df.set_index('L1').L2 - with tm.assert_raises_regex(ValueError, "No axis named 1"): + with pytest.raises(ValueError, match="No axis named 1"): s._get_label_or_level_values('L1', axis=1) @@ -326,7 +326,7 @@ def test_get_label_or_level_values_panel_error(panel): msg = ("_get_label_or_level_values is not implemented for {type}" .format(type=type(panel))) - with tm.assert_raises_regex(NotImplementedError, msg): + with pytest.raises(NotImplementedError, match=msg): panel._get_label_or_level_values('L1', axis=0) @@ -371,7 +371,7 @@ def test_drop_labels_or_levels_df(df_levels, axis): assert_labels_dropped(df_levels, expected_labels, axis=axis) assert_levels_dropped(df_levels, expected_levels, axis=axis) - with tm.assert_raises_regex(ValueError, "not valid labels or levels"): + with pytest.raises(ValueError, match="not valid labels or levels"): df_levels._drop_labels_or_levels('L4', axis=axis) @@ -383,14 +383,14 @@ def test_drop_labels_or_levels_series(df): s = df.set_index('L1').L2 assert_levels_dropped(s, ['L1'], axis=0) - with tm.assert_raises_regex(ValueError, "not valid labels or levels"): + with pytest.raises(ValueError, match="not valid labels or levels"): s._drop_labels_or_levels('L4', axis=0) # Make series with L1 and L2 as index s = df.set_index(['L1', 'L2']).L3 assert_levels_dropped(s, ['L1', 'L2'], axis=0) - with tm.assert_raises_regex(ValueError, "not valid labels or levels"): + with pytest.raises(ValueError, match="not valid labels or levels"): s._drop_labels_or_levels('L4', axis=0) @@ -400,5 +400,5 @@ def test_drop_labels_or_levels_panel_error(panel): msg = ("_drop_labels_or_levels is not implemented for {type}" .format(type=type(panel))) - with tm.assert_raises_regex(NotImplementedError, msg): + with pytest.raises(NotImplementedError, match=msg): panel._drop_labels_or_levels('L1', axis=0) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index b0d6a0e83440a..52bfee66f94f8 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -27,9 +27,9 @@ def test_agg_must_agg(df): grouped = df.groupby('A')['C'] msg = "Must produce aggregated value" - with tm.assert_raises_regex(Exception, msg): + with pytest.raises(Exception, match=msg): grouped.agg(lambda x: x.describe()) - with tm.assert_raises_regex(Exception, msg): + with pytest.raises(Exception, match=msg): grouped.agg(lambda x: x.index[:2]) @@ -217,7 +217,7 @@ def test_agg_multiple_functions_too_many_lambdas(df): funcs = ['mean', lambda x: x.mean(), lambda x: x.std()] msg = 'Function names must be unique, found multiple named ' - with tm.assert_raises_regex(SpecificationError, msg): + with pytest.raises(SpecificationError, match=msg): grouped.agg(funcs) diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py index d8a545b323674..d0e1f04238366 100644 --- a/pandas/tests/groupby/aggregate/test_cython.py +++ b/pandas/tests/groupby/aggregate/test_cython.py @@ -82,12 +82,12 @@ def test_cython_agg_nothing_to_agg(): 'b': ['foo', 'bar'] * 25}) msg = "No numeric types to aggregate" - with tm.assert_raises_regex(DataError, msg): + with pytest.raises(DataError, match=msg): frame.groupby('a')['b'].mean() frame = DataFrame({'a': np.random.randint(0, 5, 50), 'b': ['foo', 'bar'] * 25}) - with tm.assert_raises_regex(DataError, msg): + with pytest.raises(DataError, match=msg): frame[['b']].groupby(frame['a']).mean() @@ -96,7 +96,7 @@ def test_cython_agg_nothing_to_agg_with_dates(): 'b': ['foo', 'bar'] * 25, 'dates': pd.date_range('now', periods=50, freq='T')}) msg = "No numeric types to aggregate" - with tm.assert_raises_regex(DataError, msg): + with pytest.raises(DataError, match=msg): frame.groupby('b').dates.mean() diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py index c35405ad739c9..fca863b4d8eb0 100644 --- a/pandas/tests/groupby/aggregate/test_other.py +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -268,7 +268,7 @@ def test_agg_nested_dicts(): g = df.groupby(['A', 'B']) msg = r'cannot perform renaming for r[1-2] with a nested dictionary' - with tm.assert_raises_regex(SpecificationError, msg): + with pytest.raises(SpecificationError, match=msg): g.aggregate({'r1': {'C': ['mean', 'sum']}, 'r2': {'D': ['mean', 'sum']}}) @@ -302,7 +302,7 @@ def raiseException(df): pprint_thing(df.to_string()) raise TypeError('test') - with tm.assert_raises_regex(TypeError, 'test'): + with pytest.raises(TypeError, match='test'): df.groupby(0).agg(raiseException) diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py index 873d9f6076b69..205b06c5b679f 100644 --- a/pandas/tests/groupby/test_filters.py +++ b/pandas/tests/groupby/test_filters.py @@ -542,8 +542,7 @@ def test_filter_enforces_scalarness(): ['worst', 'd', 'y'], ['best', 'd', 'z'], ], columns=['a', 'b', 'c']) - with tm.assert_raises_regex(TypeError, - 'filter function returned a.*'): + with pytest.raises(TypeError, match='filter function returned a.*'): df.groupby('c').filter(lambda g: g['a'] == 'best') @@ -557,8 +556,7 @@ def test_filter_non_bool_raises(): ['worst', 'd', 1], ['best', 'd', 1], ], columns=['a', 'b', 'c']) - with tm.assert_raises_regex(TypeError, - 'filter function returned a.*'): + with pytest.raises(TypeError, match='filter function returned a.*'): df.groupby('a').filter(lambda g: g.c.mean()) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 775747ce0c6c1..646445623778b 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -517,18 +517,20 @@ def test_nsmallest(): tm.assert_series_equal(gb.nsmallest(3, keep='last'), e) -def test_numpy_compat(): +@pytest.mark.parametrize("func", [ + 'mean', 'var', 'std', 'cumprod', 'cumsum' +]) +def test_numpy_compat(func): # see gh-12811 df = pd.DataFrame({'A': [1, 2, 1], 'B': [1, 2, 3]}) g = df.groupby('A') msg = "numpy operations are not valid with groupby" - for func in ('mean', 'var', 'std', 'cumprod', 'cumsum'): - tm.assert_raises_regex(UnsupportedFunctionCall, msg, - getattr(g, func), 1, 2, 3) - tm.assert_raises_regex(UnsupportedFunctionCall, msg, - getattr(g, func), foo=1) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(g, func)(1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(g, func)(foo=1) def test_cummin_cummax(): diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 3cdd0965ccfd0..e92e5a70b263f 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -629,7 +629,7 @@ def test_as_index_series_column_slice_raises(df): grouped = df.groupby('A', as_index=False) msg = r"Column\(s\) C already selected" - with tm.assert_raises_regex(IndexError, msg): + with pytest.raises(IndexError, match=msg): grouped['C'].__getitem__('D') @@ -1679,7 +1679,7 @@ def test_tuple_correct_keyerror(): df = pd.DataFrame(1, index=range(3), columns=pd.MultiIndex.from_product([[1, 2], [3, 4]])) - with tm.assert_raises_regex(KeyError, "(7, 8)"): + with pytest.raises(KeyError, match="(7, 8)"): df.groupby((7, 8)).mean() diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index e7c0881b11871..546a37bf3d56a 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -21,7 +21,7 @@ # selection # -------------------------------- -class TestSelection(): +class TestSelection(object): def test_select_bad_cols(self): df = DataFrame([[1, 2]], columns=['A', 'B']) @@ -29,7 +29,7 @@ def test_select_bad_cols(self): pytest.raises(KeyError, g.__getitem__, ['C']) # g[['C']] pytest.raises(KeyError, g.__getitem__, ['A', 'C']) # g[['A', 'C']] - with tm.assert_raises_regex(KeyError, '^[^A]+$'): + with pytest.raises(KeyError, match='^[^A]+$'): # A should not be referenced as a bad column... # will have to rethink regex if you change message! g[['A', 'C']] @@ -506,18 +506,14 @@ def test_groupby_args(self, mframe): # PR8618 and issue 8015 frame = mframe - def j(): + msg = "You have to supply one of 'by' and 'level'" + with pytest.raises(TypeError, match=msg): frame.groupby() - tm.assert_raises_regex(TypeError, "You have to supply one of " - "'by' and 'level'", j) - - def k(): + msg = "You have to supply one of 'by' and 'level'" + with pytest.raises(TypeError, match=msg): frame.groupby(by=None, level=None) - tm.assert_raises_regex(TypeError, "You have to supply one of " - "'by' and 'level'", k) - @pytest.mark.parametrize('sort,labels', [ [True, [2, 2, 2, 0, 0, 1, 1, 3, 3, 3]], [False, [0, 0, 0, 1, 1, 2, 2, 3, 3, 3]] diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py index f337af4d39e54..e7e91572c56d1 100644 --- a/pandas/tests/groupby/test_rank.py +++ b/pandas/tests/groupby/test_rank.py @@ -249,7 +249,7 @@ def test_rank_object_raises(ties_method, ascending, na_option, pct, vals): df = DataFrame({'key': ['foo'] * 5, 'val': vals}) - with tm.assert_raises_regex(TypeError, "not callable"): + with pytest.raises(TypeError, match="not callable"): df.groupby('key').rank(method=ties_method, ascending=ascending, na_option=na_option, pct=pct) @@ -269,7 +269,7 @@ def test_rank_naoption_raises(ties_method, ascending, na_option, pct, vals): df = DataFrame({'key': ['foo'] * 5, 'val': vals}) msg = "na_option must be one of 'keep', 'top', or 'bottom'" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): df.groupby('key').rank(method=ties_method, ascending=ascending, na_option=na_option, pct=pct) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 4cf63a321a47a..dbbf6e583796f 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -658,11 +658,11 @@ def test_transform_with_non_scalar_group(): df = pd.DataFrame(np.random.randint(1, 10, (4, 12)), columns=cols, index=['A', 'C', 'G', 'T']) - tm.assert_raises_regex(ValueError, 'transform must return ' - 'a scalar value for each ' - 'group.*', - df.groupby(axis=1, level=1).transform, - lambda z: z.div(z.sum(axis=1), axis=0)) + + msg = 'transform must return a scalar value for each group.*' + with pytest.raises(ValueError, match=msg): + df.groupby(axis=1, level=1).transform( + lambda z: z.div(z.sum(axis=1), axis=0)) @pytest.mark.parametrize('cols,exp,comp_func', [ diff --git a/pandas/tests/groupby/test_whitelist.py b/pandas/tests/groupby/test_whitelist.py index ae033f7b3f251..d5096ee99c8b0 100644 --- a/pandas/tests/groupby/test_whitelist.py +++ b/pandas/tests/groupby/test_whitelist.py @@ -263,7 +263,7 @@ def test_groupby_blacklist(df_letters): for obj in (df, s): gb = obj.groupby(df.letters) msg = fmt.format(bl, type(gb).__name__) - with tm.assert_raises_regex(AttributeError, msg): + with pytest.raises(AttributeError, match=msg): getattr(gb, bl) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index c5cbaea23df76..4b0daac34c2e3 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -125,39 +125,40 @@ def test_create_index_existing_name(self): def test_numeric_compat(self): idx = self.create_index() - tm.assert_raises_regex(TypeError, "cannot perform __mul__", - lambda: idx * 1) - tm.assert_raises_regex(TypeError, "cannot perform __rmul__", - lambda: 1 * idx) - - div_err = "cannot perform __truediv__" if PY3 \ - else "cannot perform __div__" - tm.assert_raises_regex(TypeError, div_err, lambda: idx / 1) + with pytest.raises(TypeError, match="cannot perform __mul__"): + idx * 1 + with pytest.raises(TypeError, match="cannot perform __rmul__"): + 1 * idx + + div_err = ("cannot perform __truediv__" if PY3 + else "cannot perform __div__") + with pytest.raises(TypeError, match=div_err): + idx / 1 + div_err = div_err.replace(' __', ' __r') - tm.assert_raises_regex(TypeError, div_err, lambda: 1 / idx) - tm.assert_raises_regex(TypeError, "cannot perform __floordiv__", - lambda: idx // 1) - tm.assert_raises_regex(TypeError, "cannot perform __rfloordiv__", - lambda: 1 // idx) + with pytest.raises(TypeError, match=div_err): + 1 / idx + with pytest.raises(TypeError, match="cannot perform __floordiv__"): + idx // 1 + with pytest.raises(TypeError, match="cannot perform __rfloordiv__"): + 1 // idx def test_logical_compat(self): idx = self.create_index() - tm.assert_raises_regex(TypeError, 'cannot perform all', - lambda: idx.all()) - tm.assert_raises_regex(TypeError, 'cannot perform any', - lambda: idx.any()) + with pytest.raises(TypeError, match='cannot perform all'): + idx.all() + with pytest.raises(TypeError, match='cannot perform any'): + idx.any() def test_boolean_context_compat(self): # boolean context compat idx = self.create_index() - def f(): + with pytest.raises(ValueError, match='The truth value of a'): if idx: pass - tm.assert_raises_regex(ValueError, 'The truth value of a', f) - def test_reindex_base(self): idx = self.create_index() expected = np.arange(idx.size, dtype=np.intp) @@ -165,7 +166,7 @@ def test_reindex_base(self): actual = idx.get_indexer(idx) tm.assert_numpy_array_equal(expected, actual) - with tm.assert_raises_regex(ValueError, 'Invalid fill method'): + with pytest.raises(ValueError, match='Invalid fill method'): idx.get_indexer(idx, method='invalid') def test_get_indexer_consistency(self): @@ -180,8 +181,8 @@ def test_get_indexer_consistency(self): assert indexer.dtype == np.intp else: e = "Reindexing only valid with uniquely valued Index objects" - with tm.assert_raises_regex(InvalidIndexError, e): - indexer = index.get_indexer(index[0:2]) + with pytest.raises(InvalidIndexError, match=e): + index.get_indexer(index[0:2]) indexer, _ = index.get_indexer_non_unique(index[0:2]) assert isinstance(indexer, np.ndarray) @@ -227,9 +228,8 @@ def test_repr_max_seq_item_setting(self): assert '...' not in str(idx) def test_wrong_number_names(self, indices): - def testit(ind): - ind.names = ["apple", "banana", "carrot"] - tm.assert_raises_regex(ValueError, "^Length", testit, indices) + with pytest.raises(ValueError, match="^Length"): + indices.names = ["apple", "banana", "carrot"] def test_set_name_methods(self, indices): new_name = "This is the new name for this index" @@ -247,10 +247,10 @@ def test_set_name_methods(self, indices): assert res is None assert indices.name == new_name assert indices.names == [new_name] - # with tm.assert_raises_regex(TypeError, "list-like"): + # with pytest.raises(TypeError, match="list-like"): # # should still fail even if it would be the right length # ind.set_names("a") - with tm.assert_raises_regex(ValueError, "Level must be None"): + with pytest.raises(ValueError, match="Level must be None"): indices.set_names("a", level=0) # rename in place just leaves tuples and other containers alone @@ -261,8 +261,9 @@ def test_set_name_methods(self, indices): def test_hash_error(self, indices): index = indices - tm.assert_raises_regex(TypeError, "unhashable type: %r" % - type(index).__name__, hash, indices) + with pytest.raises(TypeError, match=("unhashable type: %r" % + type(index).__name__)): + hash(indices) def test_copy_name(self): # gh-12309: Check that the "name" argument @@ -511,16 +512,16 @@ def test_numpy_argsort(self): # backwards compatibility concerns if isinstance(type(ind), (CategoricalIndex, RangeIndex)): msg = "the 'axis' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, - np.argsort, ind, axis=1) + with pytest.raises(ValueError, match=msg): + np.argsort(ind, axis=1) msg = "the 'kind' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.argsort, - ind, kind='mergesort') + with pytest.raises(ValueError, match=msg): + np.argsort(ind, kind='mergesort') msg = "the 'order' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.argsort, - ind, order=('a', 'b')) + with pytest.raises(ValueError, match=msg): + np.argsort(ind, order=('a', 'b')) def test_pickle(self, indices): self.verify_pickle(indices) @@ -551,16 +552,16 @@ def test_take_invalid_kwargs(self): indices = [1, 2] msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assert_raises_regex(TypeError, msg, idx.take, - indices, foo=2) + with pytest.raises(TypeError, match=msg): + idx.take(indices, foo=2) msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, idx.take, - indices, out=indices) + with pytest.raises(ValueError, match=msg): + idx.take(indices, out=indices) msg = "the 'mode' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, idx.take, - indices, mode='clip') + with pytest.raises(ValueError, match=msg): + idx.take(indices, mode='clip') def test_repeat(self): rep = 2 @@ -580,8 +581,8 @@ def test_numpy_repeat(self): tm.assert_index_equal(np.repeat(i, rep), expected) msg = "the 'axis' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.repeat, - i, rep, axis=0) + with pytest.raises(ValueError, match=msg): + np.repeat(i, rep, axis=0) @pytest.mark.parametrize('klass', [list, tuple, np.array, Series]) def test_where(self, klass): @@ -597,19 +598,16 @@ def test_where(self, klass): result = i.where(klass(cond)) tm.assert_index_equal(result, expected) - def test_setops_errorcases(self): + @pytest.mark.parametrize("case", [0.5, "xxx"]) + @pytest.mark.parametrize("method", ["intersection", "union", + "difference", "symmetric_difference"]) + def test_set_ops_error_cases(self, case, method): for name, idx in compat.iteritems(self.indices): - # # non-iterable input - cases = [0.5, 'xxx'] - methods = [idx.intersection, idx.union, idx.difference, - idx.symmetric_difference] - - for method in methods: - for case in cases: - tm.assert_raises_regex(TypeError, - "Input must be Index " - "or array-like", - method, case) + # non-iterable input + + msg = "Input must be Index or array-like" + with pytest.raises(TypeError, match=msg): + getattr(idx, method)(case) def test_intersection_base(self): for name, idx in compat.iteritems(self.indices): @@ -628,8 +626,8 @@ def test_intersection_base(self): for case in cases: if isinstance(idx, PeriodIndex): msg = "can only call with other PeriodIndex-ed objects" - with tm.assert_raises_regex(ValueError, msg): - result = first.intersection(case) + with pytest.raises(ValueError, match=msg): + first.intersection(case) elif isinstance(idx, CategoricalIndex): pass else: @@ -638,8 +636,8 @@ def test_intersection_base(self): if isinstance(idx, MultiIndex): msg = "other must be a MultiIndex or a list of tuples" - with tm.assert_raises_regex(TypeError, msg): - result = first.intersection([1, 2, 3]) + with pytest.raises(TypeError, match=msg): + first.intersection([1, 2, 3]) def test_union_base(self): for name, idx in compat.iteritems(self.indices): @@ -655,8 +653,8 @@ def test_union_base(self): for case in cases: if isinstance(idx, PeriodIndex): msg = "can only call with other PeriodIndex-ed objects" - with tm.assert_raises_regex(ValueError, msg): - result = first.union(case) + with pytest.raises(ValueError, match=msg): + first.union(case) elif isinstance(idx, CategoricalIndex): pass else: @@ -665,8 +663,8 @@ def test_union_base(self): if isinstance(idx, MultiIndex): msg = "other must be a MultiIndex or a list of tuples" - with tm.assert_raises_regex(TypeError, msg): - result = first.union([1, 2, 3]) + with pytest.raises(TypeError, match=msg): + first.union([1, 2, 3]) def test_difference_base(self): for name, idx in compat.iteritems(self.indices): @@ -686,8 +684,8 @@ def test_difference_base(self): for case in cases: if isinstance(idx, PeriodIndex): msg = "can only call with other PeriodIndex-ed objects" - with tm.assert_raises_regex(ValueError, msg): - result = first.difference(case) + with pytest.raises(ValueError, match=msg): + first.difference(case) elif isinstance(idx, CategoricalIndex): pass elif isinstance(idx, (DatetimeIndex, TimedeltaIndex)): @@ -700,8 +698,8 @@ def test_difference_base(self): if isinstance(idx, MultiIndex): msg = "other must be a MultiIndex or a list of tuples" - with tm.assert_raises_regex(TypeError, msg): - result = first.difference([1, 2, 3]) + with pytest.raises(TypeError, match=msg): + first.difference([1, 2, 3]) def test_symmetric_difference(self): for name, idx in compat.iteritems(self.indices): @@ -720,8 +718,8 @@ def test_symmetric_difference(self): for case in cases: if isinstance(idx, PeriodIndex): msg = "can only call with other PeriodIndex-ed objects" - with tm.assert_raises_regex(ValueError, msg): - result = first.symmetric_difference(case) + with pytest.raises(ValueError, match=msg): + first.symmetric_difference(case) elif isinstance(idx, CategoricalIndex): pass else: @@ -730,7 +728,7 @@ def test_symmetric_difference(self): if isinstance(idx, MultiIndex): msg = "other must be a MultiIndex or a list of tuples" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): first.symmetric_difference([1, 2, 3]) def test_insert_base(self): @@ -767,7 +765,7 @@ def test_delete_base(self): with pytest.raises((IndexError, ValueError)): # either depending on numpy version - result = idx.delete(len(idx)) + idx.delete(len(idx)) def test_equals(self): @@ -799,7 +797,7 @@ def test_equals_op(self): index_b = index_a[0:-1] index_c = index_a[0:-1].append(index_a[-2:-1]) index_d = index_a[0:1] - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with pytest.raises(ValueError, match="Lengths must match"): index_a == index_b expected1 = np.array([True] * n) expected2 = np.array([True] * (n - 1) + [False]) @@ -811,7 +809,7 @@ def test_equals_op(self): array_b = np.array(index_a[0:-1]) array_c = np.array(index_a[0:-1].append(index_a[-2:-1])) array_d = np.array(index_a[0:1]) - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with pytest.raises(ValueError, match="Lengths must match"): index_a == array_b tm.assert_numpy_array_equal(index_a == array_a, expected1) tm.assert_numpy_array_equal(index_a == array_c, expected2) @@ -821,23 +819,23 @@ def test_equals_op(self): series_b = Series(array_b) series_c = Series(array_c) series_d = Series(array_d) - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with pytest.raises(ValueError, match="Lengths must match"): index_a == series_b tm.assert_numpy_array_equal(index_a == series_a, expected1) tm.assert_numpy_array_equal(index_a == series_c, expected2) # cases where length is 1 for one of them - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with pytest.raises(ValueError, match="Lengths must match"): index_a == index_d - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with pytest.raises(ValueError, match="Lengths must match"): index_a == series_d - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with pytest.raises(ValueError, match="Lengths must match"): index_a == array_d msg = "Can only compare identically-labeled Series objects" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): series_a == series_d - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with pytest.raises(ValueError, match="Lengths must match"): series_a == array_d # comparing with a scalar should broadcast; note that we are excluding @@ -947,7 +945,7 @@ def test_fillna(self): elif isinstance(index, MultiIndex): idx = index.copy() msg = "isna is not defined for MultiIndex" - with tm.assert_raises_regex(NotImplementedError, msg): + with pytest.raises(NotImplementedError, match=msg): idx.fillna(idx[0]) else: idx = index.copy() @@ -956,7 +954,7 @@ def test_fillna(self): assert result is not idx msg = "'value' must be a scalar, passed: " - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): idx.fillna([idx[0]]) idx = index.copy() @@ -990,7 +988,7 @@ def test_nulls(self): elif isinstance(index, MultiIndex): idx = index.copy() msg = "isna is not defined for MultiIndex" - with tm.assert_raises_regex(NotImplementedError, msg): + with pytest.raises(NotImplementedError, match=msg): idx.isna() else: diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py index a9cfc551e073b..4b8ead71ed74c 100644 --- a/pandas/tests/indexes/datetimes/test_astype.py +++ b/pandas/tests/indexes/datetimes/test_astype.py @@ -169,7 +169,7 @@ def test_astype_raises(self, dtype): # GH 13149, GH 13209 idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) msg = 'Cannot cast DatetimeIndex to dtype' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): idx.astype(dtype) def test_index_convert_to_datetime_array(self): diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index 7a251a8ecfb28..04b2c4f280588 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -253,8 +253,7 @@ def test_construction_dti_with_mixed_timezones(self): Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx') - with tm.assert_raises_regex(TypeError, - 'data is already tz-aware'): + with pytest.raises(TypeError, match='data is already tz-aware'): DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], tz='Asia/Tokyo', name='idx') @@ -264,8 +263,7 @@ def test_construction_dti_with_mixed_timezones(self): Timestamp('2011-01-02 10:00', tz='US/Eastern')], tz='US/Eastern', name='idx') - with tm.assert_raises_regex(TypeError, - 'data is already tz-aware'): + with pytest.raises(TypeError, match='data is already tz-aware'): # passing tz should results in DatetimeIndex, then mismatch raises # TypeError Index([pd.NaT, Timestamp('2011-01-01 10:00'), @@ -314,7 +312,7 @@ def test_constructor_coverage(self): tm.assert_index_equal(rng, exp) msg = 'periods must be a number, got foo' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): DatetimeIndex(start='1/1/2000', periods='foo', freq='D') pytest.raises(ValueError, DatetimeIndex, start='1/1/2000', diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index b6bab272c8c0a..06b52dfc407cf 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -167,7 +167,7 @@ def test_date_range_ambiguous_arguments(self): msg = ('Of the four parameters: start, end, periods, and ' 'freq, exactly three must be specified') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): date_range(start, end, periods=10, freq='s') def test_date_range_convenience_periods(self): @@ -245,25 +245,25 @@ def test_range_misspecified(self): msg = ('Of the four parameters: start, end, periods, and ' 'freq, exactly three must be specified') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): date_range(start='1/1/2000') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): date_range(end='1/1/2000') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): date_range(periods=10) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): date_range(start='1/1/2000', freq='H') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): date_range(end='1/1/2000', freq='H') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): date_range(periods=10, freq='H') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): date_range() @pytest.mark.parametrize('f', [compat.long, int]) @@ -311,7 +311,7 @@ def test_construct_with_different_start_end_string_format(self): def test_error_with_zero_monthends(self): msg = r'Offset <0 \* MonthEnds> did not increment date' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): date_range('1/1/2000', '1/1/2001', freq=MonthEnd(0)) def test_range_bug(self): @@ -515,7 +515,7 @@ def test_timezone_comparaison_bug(self): def test_timezone_comparaison_assert(self): start = Timestamp('20130220 10:00', tz='US/Eastern') msg = 'Inferred time zone not equal to passed time zone' - with tm.assert_raises_regex(AssertionError, msg): + with pytest.raises(AssertionError, match=msg): date_range(start, periods=2, tz='Europe/Berlin') def test_negative_non_tick_frequency_descending_dates(self, @@ -613,14 +613,14 @@ def test_constructor(self): bdate_range(end=START, periods=20, freq=BDay()) msg = 'periods must be a number, got B' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): date_range('2011-1-1', '2012-1-1', 'B') - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): bdate_range('2011-1-1', '2012-1-1', 'B') msg = 'freq must be specified for bdate_range; use date_range instead' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): bdate_range(START, END, periods=10, freq=None) def test_naive_aware_conflicts(self): @@ -628,10 +628,10 @@ def test_naive_aware_conflicts(self): aware = bdate_range(START, END, freq=BDay(), tz="Asia/Hong_Kong") msg = 'tz-naive.*tz-aware' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): naive.join(aware) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): aware.join(naive) def test_misc(self): @@ -688,10 +688,10 @@ def test_constructor(self): bdate_range(end=START, periods=20, freq=CDay()) msg = 'periods must be a number, got C' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): date_range('2011-1-1', '2012-1-1', 'C') - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): bdate_range('2011-1-1', '2012-1-1', 'C') def test_misc(self): @@ -726,7 +726,7 @@ def test_cdaterange_weekmask(self): # raise with non-custom freq msg = ('a custom frequency string is required when holidays or ' 'weekmask are passed, got frequency B') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): bdate_range('2013-05-01', periods=3, weekmask='Sun Mon Tue Wed Thu') @@ -739,7 +739,7 @@ def test_cdaterange_holidays(self): # raise with non-custom freq msg = ('a custom frequency string is required when holidays or ' 'weekmask are passed, got frequency B') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): bdate_range('2013-05-01', periods=3, holidays=['2013-05-01']) def test_cdaterange_weekmask_and_holidays(self): @@ -752,7 +752,7 @@ def test_cdaterange_weekmask_and_holidays(self): # raise with non-custom freq msg = ('a custom frequency string is required when holidays or ' 'weekmask are passed, got frequency B') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): bdate_range('2013-05-01', periods=3, weekmask='Sun Mon Tue Wed Thu', holidays=['2013-05-01']) @@ -767,5 +767,5 @@ def test_all_custom_freq(self, freq): bad_freq = freq + 'FOO' msg = 'invalid custom frequency string: {freq}' - with tm.assert_raises_regex(ValueError, msg.format(freq=bad_freq)): + with pytest.raises(ValueError, match=msg.format(freq=bad_freq)): bdate_range(START, END, freq=bad_freq) diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index cea56bf803083..4363777d25235 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -95,8 +95,8 @@ def test_week_of_month_frequency(self): def test_hash_error(self): index = date_range('20010101', periods=10) - with tm.assert_raises_regex(TypeError, "unhashable type: %r" % - type(index).__name__): + with pytest.raises(TypeError, match=("unhashable type: %r" % + type(index).__name__)): hash(index) def test_stringified_slice_with_tz(self): @@ -303,9 +303,8 @@ def test_join_with_period_index(self, join_type): c_idx_type='p', r_idx_type='dt') s = df.iloc[:5, 0] - with tm.assert_raises_regex(ValueError, - 'can only call with other ' - 'PeriodIndex-ed objects'): + msg = 'can only call with other PeriodIndex-ed objects' + with pytest.raises(ValueError, match=msg): df.columns.join(s.index, how=join_type) def test_factorize(self): diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index b66475612fe40..f75b5867e1511 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -181,16 +181,16 @@ def test_take_invalid_kwargs(self): indices = [1, 6, 5, 9, 10, 13, 15, 3] msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assert_raises_regex(TypeError, msg, idx.take, - indices, foo=2) + with pytest.raises(TypeError, match=msg): + idx.take(indices, foo=2) msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, idx.take, - indices, out=indices) + with pytest.raises(ValueError, match=msg): + idx.take(indices, out=indices) msg = "the 'mode' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, idx.take, - indices, mode='clip') + with pytest.raises(ValueError, match=msg): + idx.take(indices, mode='clip') # TODO: This method came from test_datetime; de-dup with version above @pytest.mark.parametrize('tz', [None, 'US/Eastern', 'Asia/Tokyo']) @@ -237,9 +237,9 @@ def test_take_fill_value(self): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): @@ -268,9 +268,9 @@ def test_take_fill_value_with_timezone(self): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): @@ -411,8 +411,8 @@ def test_delete(self): assert result.freq == expected.freq with pytest.raises((IndexError, ValueError)): - # either depeidnig on numpy version - result = idx.delete(5) + # either depending on numpy version + idx.delete(5) for tz in [None, 'Asia/Tokyo', 'US/Pacific']: idx = date_range(start='2000-01-01 09:00', periods=10, freq='H', @@ -508,8 +508,7 @@ def test_get_loc(self): tolerance=np.timedelta64(1, 'D')) == 1 assert idx.get_loc('2000-01-01T12', method='nearest', tolerance=timedelta(1)) == 1 - with tm.assert_raises_regex(ValueError, - 'unit abbreviation w/o a number'): + with pytest.raises(ValueError, match='unit abbreviation w/o a number'): idx.get_loc('2000-01-01T12', method='nearest', tolerance='foo') with pytest.raises(KeyError): idx.get_loc('2000-01-01T03', method='nearest', tolerance='2 hours') @@ -583,12 +582,11 @@ def test_get_indexer(self): with pytest.raises(ValueError): idx.get_indexer(idx[[0]], method='nearest', tolerance='foo') - def test_reasonable_keyerror(self): + def test_reasonable_key_error(self): # GH#1062 index = DatetimeIndex(['1/3/2000']) - with pytest.raises(KeyError) as excinfo: + with pytest.raises(KeyError, match='2000'): index.get_loc('1/1/2000') - assert '2000' in str(excinfo.value) @pytest.mark.parametrize('key', [pd.Timedelta(0), pd.Timedelta(1), diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 2cb7482cda617..d599af6180bfb 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -84,17 +84,21 @@ def test_numpy_minmax(self): assert np.max(dr) == Timestamp('2016-01-20 00:00:00', freq='D') errmsg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, errmsg, np.min, dr, out=0) - tm.assert_raises_regex(ValueError, errmsg, np.max, dr, out=0) + with pytest.raises(ValueError, match=errmsg): + np.min(dr, out=0) + + with pytest.raises(ValueError, match=errmsg): + np.max(dr, out=0) assert np.argmin(dr) == 0 assert np.argmax(dr) == 5 errmsg = "the 'out' parameter is not supported" - tm.assert_raises_regex( - ValueError, errmsg, np.argmin, dr, out=0) - tm.assert_raises_regex( - ValueError, errmsg, np.argmax, dr, out=0) + with pytest.raises(ValueError, match=errmsg): + np.argmin(dr, out=0) + + with pytest.raises(ValueError, match=errmsg): + np.argmax(dr, out=0) def test_repeat_range(self, tz_naive_fixture): tz = tz_naive_fixture @@ -148,8 +152,8 @@ def test_repeat(self, tz_naive_fixture): assert res.freq is None tm.assert_index_equal(np.repeat(rng, reps), expected_rng) - tm.assert_raises_regex(ValueError, msg, np.repeat, - rng, reps, axis=1) + with pytest.raises(ValueError, match=msg): + np.repeat(rng, reps, axis=1) def test_resolution(self, tz_naive_fixture): tz = tz_naive_fixture @@ -415,11 +419,11 @@ def test_freq_setter_errors(self): # setting with an incompatible freq msg = ('Inferred frequency 2D from passed values does not conform to ' 'passed frequency 5D') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.freq = '5D' # setting with non-freq string - with tm.assert_raises_regex(ValueError, 'Invalid frequency'): + with pytest.raises(ValueError, match='Invalid frequency'): idx.freq = 'foo' def test_offset_deprecated(self): diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index 27e53c15238be..e6e19c6a8200d 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -66,12 +66,12 @@ def assert_slices_equivalent(l_slc, i_slc): def test_slice_with_zero_step_raises(self): ts = Series(np.arange(20), date_range('2014-01-01', periods=20, freq='MS')) - tm.assert_raises_regex(ValueError, 'slice step cannot be zero', - lambda: ts[::0]) - tm.assert_raises_regex(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) - tm.assert_raises_regex(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) + with pytest.raises(ValueError, match='slice step cannot be zero'): + ts[::0] + with pytest.raises(ValueError, match='slice step cannot be zero'): + ts.loc[::0] + with pytest.raises(ValueError, match='slice step cannot be zero'): + ts.loc[::0] def test_slice_bounds_empty(self): # GH 14354 @@ -222,8 +222,8 @@ def test_partial_slice_second_precision(self): tm.assert_series_equal(s['2005-1-1 00:01:00'], s.iloc[10:]) assert s[Timestamp('2005-1-1 00:00:59.999990')] == s.iloc[0] - tm.assert_raises_regex(KeyError, '2005-1-1 00:00:00', - lambda: s['2005-1-1 00:00:00']) + with pytest.raises(KeyError, match='2005-1-1 00:00:00'): + s['2005-1-1 00:00:00'] def test_partial_slicing_dataframe(self): # GH14856 @@ -349,14 +349,14 @@ def test_partial_slice_doesnt_require_monotonicity(self): timestamp = pd.Timestamp('2014-01-10') tm.assert_series_equal(nonmonotonic['2014-01-10':], expected) - tm.assert_raises_regex(KeyError, - r"Timestamp\('2014-01-10 00:00:00'\)", - lambda: nonmonotonic[timestamp:]) + with pytest.raises(KeyError, + match=r"Timestamp\('2014-01-10 00:00:00'\)"): + nonmonotonic[timestamp:] tm.assert_series_equal(nonmonotonic.loc['2014-01-10':], expected) - tm.assert_raises_regex(KeyError, - r"Timestamp\('2014-01-10 00:00:00'\)", - lambda: nonmonotonic.loc[timestamp:]) + with pytest.raises(KeyError, + match=r"Timestamp\('2014-01-10 00:00:00'\)"): + nonmonotonic.loc[timestamp:] def test_loc_datetime_length_one(self): # GH16071 diff --git a/pandas/tests/indexes/datetimes/test_scalar_compat.py b/pandas/tests/indexes/datetimes/test_scalar_compat.py index b644cb5844d9b..81f4c77009ce4 100644 --- a/pandas/tests/indexes/datetimes/test_scalar_compat.py +++ b/pandas/tests/indexes/datetimes/test_scalar_compat.py @@ -97,14 +97,16 @@ def test_round(self, tz_naive_fixture): assert elt.round(freq='H') == expected_elt msg = pd._libs.tslibs.frequencies.INVALID_FREQ_ERR_MSG - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): rng.round(freq='foo') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): elt.round(freq='foo') msg = " is a non-fixed frequency" - tm.assert_raises_regex(ValueError, msg, rng.round, freq='M') - tm.assert_raises_regex(ValueError, msg, elt.round, freq='M') + with pytest.raises(ValueError, match=msg): + rng.round(freq='M') + with pytest.raises(ValueError, match=msg): + elt.round(freq='M') # GH#14440 & GH#15578 index = DatetimeIndex(['2016-10-17 12:00:00.0015'], tz=tz) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 8c66b68c94946..c24c1025ea63c 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -586,7 +586,7 @@ def test_week_without_day_and_calendar_year(self, date, format): # GH16774 msg = "Cannot use '%W' or '%U' without day and year" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): pd.to_datetime(date, format=format) def test_iso_8601_strings_with_same_offset(self): @@ -865,7 +865,7 @@ def test_dataframe(self, cache): msg = ("cannot assemble the datetimes: time data .+ does not " r"match format '%Y%m%d' \(match\)") - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): to_datetime(df2, cache=cache) result = to_datetime(df2, errors='coerce', cache=cache) expected = Series([Timestamp('20150204 00:00:00'), @@ -875,7 +875,7 @@ def test_dataframe(self, cache): # extra columns msg = ("extra keys have been passed to the datetime assemblage: " r"\[foo\]") - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): df2 = df.copy() df2['foo'] = 1 to_datetime(df2, cache=cache) @@ -888,7 +888,7 @@ def test_dataframe(self, cache): ['year', 'month', 'second'], ['month', 'day'], ['year', 'day', 'second']]: - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): to_datetime(df[c], cache=cache) # duplicates @@ -897,7 +897,7 @@ def test_dataframe(self, cache): 'month': [2, 20], 'day': [4, 5]}) df2.columns = ['year', 'year', 'day'] - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): to_datetime(df2, cache=cache) df2 = DataFrame({'year': [2015, 2016], @@ -905,7 +905,7 @@ def test_dataframe(self, cache): 'day': [4, 5], 'hour': [4, 5]}) df2.columns = ['year', 'month', 'day', 'day'] - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): to_datetime(df2, cache=cache) @pytest.mark.parametrize('cache', [True, False]) diff --git a/pandas/tests/indexes/interval/test_astype.py b/pandas/tests/indexes/interval/test_astype.py index 4389a22641b72..8bcd6ef5dcc5a 100644 --- a/pandas/tests/indexes/interval/test_astype.py +++ b/pandas/tests/indexes/interval/test_astype.py @@ -49,12 +49,12 @@ def test_astype_category(self, index): 'datetime64[ns, US/Eastern]']) def test_astype_cannot_cast(self, index, dtype): msg = 'Cannot cast IntervalIndex to dtype' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): index.astype(dtype) def test_astype_invalid_dtype(self, index): msg = "data type 'fake_dtype' not understood" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): index.astype('fake_dtype') @@ -127,7 +127,7 @@ def test_subtype_integer(self, subtype): # raises with NA msg = 'Cannot convert NA to integer' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): index.insert(0, np.nan).astype(dtype) @pytest.mark.xfail(reason='GH#15832', strict=True) @@ -152,7 +152,7 @@ def test_subtype_integer_errors(self): def test_subtype_datetimelike(self, index, subtype): dtype = IntervalDtype(subtype) msg = 'Cannot convert .* to .*; subtypes are incompatible' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): index.astype(dtype) @@ -183,7 +183,7 @@ def test_subtype_integer(self, index, subtype): def test_subtype_float(self, index): dtype = IntervalDtype('float64') msg = 'Cannot convert .* to .*; subtypes are incompatible' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): index.astype(dtype) def test_subtype_datetimelike(self): @@ -192,15 +192,15 @@ def test_subtype_datetimelike(self): msg = 'Cannot convert .* to .*; subtypes are incompatible' index = interval_range(Timestamp('2018-01-01'), periods=10) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): index.astype(dtype) index = interval_range(Timestamp('2018-01-01', tz='CET'), periods=10) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): index.astype(dtype) # timedelta -> datetime raises dtype = IntervalDtype('datetime64[ns]') index = interval_range(Timedelta('0 days'), periods=10) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): index.astype(dtype) diff --git a/pandas/tests/indexes/interval/test_construction.py b/pandas/tests/indexes/interval/test_construction.py index a937dbc40a843..d07c11012a86b 100644 --- a/pandas/tests/indexes/interval/test_construction.py +++ b/pandas/tests/indexes/interval/test_construction.py @@ -107,7 +107,7 @@ def test_constructor_string(self, constructor, breaks): # GH 19016 msg = ('category, object, and string subtypes are not supported ' 'for IntervalIndex') - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): constructor(**self.get_kwargs_from_breaks(breaks)) @pytest.mark.parametrize('cat_constructor', [ @@ -132,30 +132,30 @@ def test_generic_errors(self, constructor): # invalid closed msg = "invalid option for 'closed': invalid" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): constructor(closed='invalid', **filler) # unsupported dtype msg = 'dtype must be an IntervalDtype, got int64' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): constructor(dtype='int64', **filler) # invalid dtype msg = "data type 'invalid' not understood" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): constructor(dtype='invalid', **filler) # no point in nesting periods in an IntervalIndex periods = period_range('2000-01-01', periods=10) periods_kwargs = self.get_kwargs_from_breaks(periods) msg = 'Period dtypes are not supported, use a PeriodIndex instead' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): constructor(**periods_kwargs) # decreasing values decreasing_kwargs = self.get_kwargs_from_breaks(range(10, -1, -1)) msg = 'left side of interval must be <= right side' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): constructor(**decreasing_kwargs) @@ -178,14 +178,14 @@ def test_constructor_errors(self): data = Categorical(list('01234abcde'), ordered=True) msg = ('category, object, and string subtypes are not supported ' 'for IntervalIndex') - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): IntervalIndex.from_arrays(data[:-1], data[1:]) # unequal length left = [0, 1, 2] right = [2, 3] msg = 'left and right must have the same length' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): IntervalIndex.from_arrays(left, right) @pytest.mark.parametrize('left_subtype, right_subtype', [ @@ -224,7 +224,7 @@ def test_constructor_errors(self): data = Categorical(list('01234abcde'), ordered=True) msg = ('category, object, and string subtypes are not supported ' 'for IntervalIndex') - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): IntervalIndex.from_breaks(data) def test_length_one(self): @@ -261,17 +261,17 @@ def test_constructor_errors(self): # non-tuple tuples = [(0, 1), 2, (3, 4)] msg = 'IntervalIndex.from_tuples received an invalid item, 2' - with tm.assert_raises_regex(TypeError, msg.format(t=tuples)): + with pytest.raises(TypeError, match=msg.format(t=tuples)): IntervalIndex.from_tuples(tuples) # too few/many items tuples = [(0, 1), (2,), (3, 4)] msg = 'IntervalIndex.from_tuples requires tuples of length 2, got {t}' - with tm.assert_raises_regex(ValueError, msg.format(t=tuples)): + with pytest.raises(ValueError, match=msg.format(t=tuples)): IntervalIndex.from_tuples(tuples) tuples = [(0, 1), (2, 3, 4), (5, 6)] - with tm.assert_raises_regex(ValueError, msg.format(t=tuples)): + with pytest.raises(ValueError, match=msg.format(t=tuples)): IntervalIndex.from_tuples(tuples) def test_na_tuples(self): @@ -318,19 +318,19 @@ def test_constructor_errors(self, constructor): # mismatched closed within intervals with no constructor override ivs = [Interval(0, 1, closed='right'), Interval(2, 3, closed='left')] msg = 'intervals must all be closed on the same side' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): constructor(ivs) # scalar msg = (r'IntervalIndex\(...\) must be called with a collection of ' 'some kind, 5 was passed') - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): constructor(5) # not an interval msg = ("type <(class|type) 'numpy.int64'> with value 0 " "is not an interval") - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): constructor([0, 1]) @pytest.mark.parametrize('data, closed', [ diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index 49d093d312cf1..d5f62429ddb73 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -214,13 +214,13 @@ def test_insert(self, data): # invalid type msg = 'can only insert Interval objects and NA into an IntervalIndex' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): data.insert(1, 'foo') # invalid closed msg = 'inserted item must be closed on the same side as the index' for closed in {'left', 'right', 'both', 'neither'} - {item.closed}: - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): bad_item = Interval(item.left, item.right, closed=closed) data.insert(1, bad_item) @@ -690,7 +690,7 @@ def test_maybe_convert_i8_errors(self, breaks1, breaks2, make_key): msg = ('Cannot index an IntervalIndex of subtype {dtype1} with ' 'values of dtype {dtype2}') msg = re.escape(msg.format(dtype1=breaks1.dtype, dtype2=breaks2.dtype)) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): index._maybe_convert_i8(key) # To be removed, replaced by test_interval_new.py (see #16316, #16386) @@ -842,7 +842,7 @@ def test_set_operation_errors(self, closed, op_name): # non-IntervalIndex msg = ('the other index needs to be an IntervalIndex too, but ' 'was type Int64Index') - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): set_op(Index([1, 2, 3])) # mixed closed @@ -850,14 +850,14 @@ def test_set_operation_errors(self, closed, op_name): 'that are closed on the same side') for other_closed in {'right', 'left', 'both', 'neither'} - {closed}: other = self.create_index(closed=other_closed) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): set_op(other) # GH 19016: incompatible dtypes other = interval_range(Timestamp('20180101'), periods=9, closed=closed) msg = ('can only do {op} between two IntervalIndex objects that have ' 'compatible dtypes').format(op=op_name) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): set_op(other) def test_isin(self, closed): @@ -934,9 +934,9 @@ def test_comparison(self): actual = self.index == self.index.left tm.assert_numpy_array_equal(actual, np.array([False, False])) - with tm.assert_raises_regex(TypeError, 'unorderable types'): + with pytest.raises(TypeError, match='unorderable types'): self.index > 0 - with tm.assert_raises_regex(TypeError, 'unorderable types'): + with pytest.raises(TypeError, match='unorderable types'): self.index <= 0 with pytest.raises(TypeError): self.index > np.arange(2) @@ -1039,7 +1039,7 @@ def test_append(self, closed): for other_closed in {'left', 'right', 'both', 'neither'} - {closed}: index_other_closed = IntervalIndex.from_arrays( [0, 1], [1, 2], closed=other_closed) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): index1.append(index_other_closed) def test_is_non_overlapping_monotonic(self, closed): @@ -1148,7 +1148,7 @@ def test_set_closed_errors(self, bad_closed): # GH 21670 index = interval_range(0, 5) msg = "invalid option for 'closed': {closed}".format(closed=bad_closed) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): index.set_closed(bad_closed) def test_is_all_dates(self): diff --git a/pandas/tests/indexes/interval/test_interval_new.py b/pandas/tests/indexes/interval/test_interval_new.py index 80905e13e9525..b4510f8f62bdf 100644 --- a/pandas/tests/indexes/interval/test_interval_new.py +++ b/pandas/tests/indexes/interval/test_interval_new.py @@ -199,7 +199,7 @@ def test_get_indexer_errors(self, tuples, closed): msg = ('cannot handle overlapping indices; use ' 'IntervalIndex.get_indexer_non_unique') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): index.get_indexer([0, 2]) @pytest.mark.parametrize('query, expected', [ diff --git a/pandas/tests/indexes/interval/test_interval_range.py b/pandas/tests/indexes/interval/test_interval_range.py index 9e11c357c075d..87bbf53cd56e0 100644 --- a/pandas/tests/indexes/interval/test_interval_range.py +++ b/pandas/tests/indexes/interval/test_interval_range.py @@ -232,84 +232,84 @@ def test_errors(self): msg = ('Of the four parameters: start, end, periods, and freq, ' 'exactly three must be specified') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): interval_range(start=0) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): interval_range(end=5) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): interval_range(periods=2) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): interval_range() # too many params - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): interval_range(start=0, end=5, periods=6, freq=1.5) # mixed units msg = 'start, end, freq need to be type compatible' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval_range(start=0, end=Timestamp('20130101'), freq=2) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval_range(start=0, end=Timedelta('1 day'), freq=2) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval_range(start=0, end=10, freq='D') - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval_range(start=Timestamp('20130101'), end=10, freq='D') - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval_range(start=Timestamp('20130101'), end=Timedelta('1 day'), freq='D') - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval_range(start=Timestamp('20130101'), end=Timestamp('20130110'), freq=2) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval_range(start=Timedelta('1 day'), end=10, freq='D') - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval_range(start=Timedelta('1 day'), end=Timestamp('20130110'), freq='D') - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval_range(start=Timedelta('1 day'), end=Timedelta('10 days'), freq=2) # invalid periods msg = 'periods must be a number, got foo' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval_range(start=0, periods='foo') # invalid start msg = 'start must be numeric or datetime-like, got foo' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): interval_range(start='foo', periods=10) # invalid end msg = r'end must be numeric or datetime-like, got \(0, 1\]' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): interval_range(end=Interval(0, 1), periods=10) # invalid freq for datetime-like msg = 'freq must be numeric or convertible to DateOffset, got foo' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): interval_range(start=0, end=10, freq='foo') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): interval_range(start=Timestamp('20130101'), periods=10, freq='foo') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): interval_range(end=Timedelta('1 day'), periods=10, freq='foo') # mixed tz start = Timestamp('2017-01-01', tz='US/Eastern') end = Timestamp('2017-01-07', tz='US/Pacific') msg = 'Start and end cannot both be tz-aware with different timezones' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval_range(start=start, end=end) diff --git a/pandas/tests/indexes/multi/test_analytics.py b/pandas/tests/indexes/multi/test_analytics.py index 8d602b0bb2b1d..05adaada01ee5 100644 --- a/pandas/tests/indexes/multi/test_analytics.py +++ b/pandas/tests/indexes/multi/test_analytics.py @@ -77,8 +77,8 @@ def f(): def test_reorder_levels(idx): # this blows up - tm.assert_raises_regex(IndexError, '^Too many levels', - idx.reorder_levels, [2, 1, 0]) + with pytest.raises(IndexError, match='^Too many levels'): + idx.reorder_levels([2, 1, 0]) def test_numpy_repeat(): @@ -93,8 +93,8 @@ def test_numpy_repeat(): tm.assert_index_equal(np.repeat(m, reps), expected) msg = "the 'axis' parameter is not supported" - tm.assert_raises_regex( - ValueError, msg, np.repeat, m, reps, axis=1) + with pytest.raises(ValueError, match=msg): + np.repeat(m, reps, axis=1) def test_append_mixed_dtypes(): @@ -151,16 +151,16 @@ def test_take_invalid_kwargs(idx): indices = [1, 2] msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assert_raises_regex(TypeError, msg, idx.take, - indices, foo=2) + with pytest.raises(TypeError, match=msg): + idx.take(indices, foo=2) msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, idx.take, - indices, out=indices) + with pytest.raises(ValueError, match=msg): + idx.take(indices, out=indices) msg = "the 'mode' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, idx.take, - indices, mode='clip') + with pytest.raises(ValueError, match=msg): + idx.take(indices, mode='clip') def test_take_fill_value(): @@ -195,9 +195,9 @@ def test_take_fill_value(): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): diff --git a/pandas/tests/indexes/multi/test_astype.py b/pandas/tests/indexes/multi/test_astype.py index 5da96717bc077..70d79ddfdc22e 100644 --- a/pandas/tests/indexes/multi/test_astype.py +++ b/pandas/tests/indexes/multi/test_astype.py @@ -3,7 +3,6 @@ import numpy as np import pytest -import pandas.util.testing as tm from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.util.testing import assert_copy @@ -15,7 +14,7 @@ def test_astype(idx): assert_copy(actual.labels, expected.labels) assert [level.name for level in actual.levels] == list(expected.names) - with tm.assert_raises_regex(TypeError, "^Setting.*dtype.*object"): + with pytest.raises(TypeError, match="^Setting.*dtype.*object"): idx.astype(np.dtype(int)) @@ -23,10 +22,10 @@ def test_astype(idx): def test_astype_category(idx, ordered): # GH 18630 msg = '> 1 ndim Categorical are not supported at this time' - with tm.assert_raises_regex(NotImplementedError, msg): + with pytest.raises(NotImplementedError, match=msg): idx.astype(CategoricalDtype(ordered=ordered)) if ordered is False: # dtype='category' defaults to ordered=False, so only test once - with tm.assert_raises_regex(NotImplementedError, msg): + with pytest.raises(NotImplementedError, match=msg): idx.astype('category') diff --git a/pandas/tests/indexes/multi/test_compat.py b/pandas/tests/indexes/multi/test_compat.py index f05b53522fa31..23ea0c306d47c 100644 --- a/pandas/tests/indexes/multi/test_compat.py +++ b/pandas/tests/indexes/multi/test_compat.py @@ -11,27 +11,34 @@ def test_numeric_compat(idx): - tm.assert_raises_regex(TypeError, "cannot perform __mul__", - lambda: idx * 1) - tm.assert_raises_regex(TypeError, "cannot perform __rmul__", - lambda: 1 * idx) - - div_err = "cannot perform __truediv__" if PY3 \ - else "cannot perform __div__" - tm.assert_raises_regex(TypeError, div_err, lambda: idx / 1) - div_err = div_err.replace(' __', ' __r') - tm.assert_raises_regex(TypeError, div_err, lambda: 1 / idx) - tm.assert_raises_regex(TypeError, "cannot perform __floordiv__", - lambda: idx // 1) - tm.assert_raises_regex(TypeError, "cannot perform __rfloordiv__", - lambda: 1 // idx) - - -def test_logical_compat(idx): - tm.assert_raises_regex(TypeError, 'cannot perform all', - lambda: idx.all()) - tm.assert_raises_regex(TypeError, 'cannot perform any', - lambda: idx.any()) + with pytest.raises(TypeError, match="cannot perform __mul__"): + idx * 1 + + with pytest.raises(TypeError, match="cannot perform __rmul__"): + 1 * idx + + div_err = ("cannot perform __truediv__" if PY3 + else "cannot perform __div__") + with pytest.raises(TypeError, match=div_err): + idx / 1 + + div_err = div_err.replace(" __", " __r") + with pytest.raises(TypeError, match=div_err): + 1 / idx + + with pytest.raises(TypeError, match="cannot perform __floordiv__"): + idx // 1 + + with pytest.raises(TypeError, match="cannot perform __rfloordiv__"): + 1 // idx + + +@pytest.mark.parametrize("method", ["all", "any"]) +def test_logical_compat(idx, method): + msg = "cannot perform {method}".format(method=method) + + with pytest.raises(TypeError, match=msg): + getattr(idx, method)() def test_boolean_context_compat(idx): diff --git a/pandas/tests/indexes/multi/test_constructor.py b/pandas/tests/indexes/multi/test_constructor.py index 833de283e5367..fb15d674613d4 100644 --- a/pandas/tests/indexes/multi/test_constructor.py +++ b/pandas/tests/indexes/multi/test_constructor.py @@ -25,13 +25,14 @@ def test_constructor_single_level(): def test_constructor_no_levels(): - tm.assert_raises_regex(ValueError, "non-zero number " - "of levels/labels", - MultiIndex, levels=[], labels=[]) + msg = "non-zero number of levels/labels" + with pytest.raises(ValueError, match=msg): + MultiIndex(levels=[], labels=[]) + both_re = re.compile('Must pass both levels and labels') - with tm.assert_raises_regex(TypeError, both_re): + with pytest.raises(TypeError, match=both_re): MultiIndex(levels=[]) - with tm.assert_raises_regex(TypeError, both_re): + with pytest.raises(TypeError, match=both_re): MultiIndex(labels=[]) @@ -39,44 +40,48 @@ def test_constructor_nonhashable_names(): # GH 20527 levels = [[1, 2], [u'one', u'two']] labels = [[0, 0, 1, 1], [0, 1, 0, 1]] - names = ((['foo'], ['bar'])) + names = (['foo'], ['bar']) message = "MultiIndex.name must be a hashable type" - tm.assert_raises_regex(TypeError, message, - MultiIndex, levels=levels, - labels=labels, names=names) + with pytest.raises(TypeError, match=message): + MultiIndex(levels=levels, labels=labels, names=names) # With .rename() mi = MultiIndex(levels=[[1, 2], [u'one', u'two']], labels=[[0, 0, 1, 1], [0, 1, 0, 1]], names=('foo', 'bar')) renamed = [['foor'], ['barr']] - tm.assert_raises_regex(TypeError, message, mi.rename, names=renamed) + with pytest.raises(TypeError, match=message): + mi.rename(names=renamed) + # With .set_names() - tm.assert_raises_regex(TypeError, message, mi.set_names, names=renamed) + with pytest.raises(TypeError, match=message): + mi.set_names(names=renamed) def test_constructor_mismatched_label_levels(idx): labels = [np.array([1]), np.array([2]), np.array([3])] levels = ["a"] - tm.assert_raises_regex(ValueError, "Length of levels and labels " - "must be the same", MultiIndex, - levels=levels, labels=labels) + + msg = "Length of levels and labels must be the same" + with pytest.raises(ValueError, match=msg): + MultiIndex(levels=levels, labels=labels) + length_error = re.compile('>= length of level') label_error = re.compile(r'Unequal label lengths: \[4, 2\]') # important to check that it's looking at the right thing. - with tm.assert_raises_regex(ValueError, length_error): + with pytest.raises(ValueError, match=length_error): MultiIndex(levels=[['a'], ['b']], labels=[[0, 1, 2, 3], [0, 3, 4, 1]]) - with tm.assert_raises_regex(ValueError, label_error): + with pytest.raises(ValueError, match=label_error): MultiIndex(levels=[['a'], ['b']], labels=[[0, 0, 0, 0], [0, 0]]) # external API - with tm.assert_raises_regex(ValueError, length_error): + with pytest.raises(ValueError, match=length_error): idx.copy().set_levels([['a'], ['b']]) - with tm.assert_raises_regex(ValueError, label_error): + with pytest.raises(ValueError, match=label_error): idx.copy().set_labels([[0, 0, 0, 0], [0, 0]]) @@ -121,8 +126,8 @@ def test_from_arrays_iterator(idx): tm.assert_index_equal(result, idx) # invalid iterator input - with tm.assert_raises_regex( - TypeError, "Input must be a list / sequence of array-likes."): + msg = "Input must be a list / sequence of array-likes." + with pytest.raises(TypeError, match=msg): MultiIndex.from_arrays(0) @@ -217,8 +222,8 @@ def test_from_arrays_index_series_categorical(): def test_from_arrays_empty(): # 0 levels - with tm.assert_raises_regex( - ValueError, "Must pass non-zero number of levels/labels"): + msg = "Must pass non-zero number of levels/labels" + with pytest.raises(ValueError, match=msg): MultiIndex.from_arrays(arrays=[]) # 1 level @@ -261,15 +266,15 @@ def test_from_arrays_invalid_input(invalid_array): ]) def test_from_arrays_different_lengths(idx1, idx2): # see gh-13599 - tm.assert_raises_regex(ValueError, '^all arrays must ' - 'be same length$', - MultiIndex.from_arrays, [idx1, idx2]) + msg = '^all arrays must be same length$' + with pytest.raises(ValueError, match=msg): + MultiIndex.from_arrays([idx1, idx2]) def test_from_tuples(): - tm.assert_raises_regex(TypeError, 'Cannot infer number of levels ' - 'from empty list', - MultiIndex.from_tuples, []) + msg = 'Cannot infer number of levels from empty list' + with pytest.raises(TypeError, match=msg): + MultiIndex.from_tuples([]) expected = MultiIndex(levels=[[1, 3], [2, 4]], labels=[[0, 1], [0, 1]], @@ -291,8 +296,8 @@ def test_from_tuples_iterator(): tm.assert_index_equal(result, expected) # input non-iterables - with tm.assert_raises_regex( - TypeError, 'Input must be a list / sequence of tuple-likes.'): + msg = 'Input must be a list / sequence of tuple-likes.' + with pytest.raises(TypeError, match=msg): MultiIndex.from_tuples(0) @@ -311,8 +316,8 @@ def test_from_tuples_index_values(idx): def test_from_product_empty_zero_levels(): # 0 levels - with tm.assert_raises_regex( - ValueError, "Must pass non-zero number of levels/labels"): + msg = "Must pass non-zero number of levels/labels" + with pytest.raises(ValueError, match=msg): MultiIndex.from_product([]) @@ -422,8 +427,8 @@ def test_from_product_iterator(): tm.assert_index_equal(result, expected) # Invalid non-iterable input - with tm.assert_raises_regex( - TypeError, "Input must be a list / sequence of iterables."): + msg = "Input must be a list / sequence of iterables." + with pytest.raises(TypeError, match=msg): MultiIndex.from_product(0) diff --git a/pandas/tests/indexes/multi/test_conversion.py b/pandas/tests/indexes/multi/test_conversion.py index 1daccefcfe876..79494a7c77cbd 100644 --- a/pandas/tests/indexes/multi/test_conversion.py +++ b/pandas/tests/indexes/multi/test_conversion.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- - +import pytest import numpy as np import pandas as pd @@ -51,11 +51,11 @@ def test_to_frame(): tm.assert_frame_equal(result, expected) msg = "'name' must be a list / sequence of column names." - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): index.to_frame(name='first') msg = "'name' should have same length as number of levels on index." - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): index.to_frame(name=['first']) # Tests for datetime index diff --git a/pandas/tests/indexes/multi/test_equivalence.py b/pandas/tests/indexes/multi/test_equivalence.py index 41cb2409f0532..bd1f313897ea2 100644 --- a/pandas/tests/indexes/multi/test_equivalence.py +++ b/pandas/tests/indexes/multi/test_equivalence.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- - import numpy as np +import pytest from pandas.compat import lrange, lzip, range @@ -35,7 +35,7 @@ def test_equals_op(idx): index_b = index_a[0:-1] index_c = index_a[0:-1].append(index_a[-2:-1]) index_d = index_a[0:1] - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with pytest.raises(ValueError, match="Lengths must match"): index_a == index_b expected1 = np.array([True] * n) expected2 = np.array([True] * (n - 1) + [False]) @@ -47,7 +47,7 @@ def test_equals_op(idx): array_b = np.array(index_a[0:-1]) array_c = np.array(index_a[0:-1].append(index_a[-2:-1])) array_d = np.array(index_a[0:1]) - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with pytest.raises(ValueError, match="Lengths must match"): index_a == array_b tm.assert_numpy_array_equal(index_a == array_a, expected1) tm.assert_numpy_array_equal(index_a == array_c, expected2) @@ -57,23 +57,23 @@ def test_equals_op(idx): series_b = Series(array_b) series_c = Series(array_c) series_d = Series(array_d) - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with pytest.raises(ValueError, match="Lengths must match"): index_a == series_b tm.assert_numpy_array_equal(index_a == series_a, expected1) tm.assert_numpy_array_equal(index_a == series_c, expected2) # cases where length is 1 for one of them - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with pytest.raises(ValueError, match="Lengths must match"): index_a == index_d - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with pytest.raises(ValueError, match="Lengths must match"): index_a == series_d - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with pytest.raises(ValueError, match="Lengths must match"): index_a == array_d msg = "Can only compare identically-labeled Series objects" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): series_a == series_d - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with pytest.raises(ValueError, match="Lengths must match"): series_a == array_d # comparing with a scalar should broadcast; note that we are excluding diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py index e72b76ed07269..a5f586bd98d5f 100644 --- a/pandas/tests/indexes/multi/test_get_set.py +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -26,8 +26,8 @@ def test_get_level_number_integer(idx): assert idx._get_level_number(1) == 0 assert idx._get_level_number(0) == 1 pytest.raises(IndexError, idx._get_level_number, 2) - tm.assert_raises_regex(KeyError, 'Level fourth not found', - idx._get_level_number, 'fourth') + with pytest.raises(KeyError, match='Level fourth not found'): + idx._get_level_number('fourth') def test_get_level_values(idx): @@ -125,7 +125,7 @@ def test_set_name_methods(idx, index_names): ind = idx.set_names(new_names) assert idx.names == index_names assert ind.names == new_names - with tm.assert_raises_regex(ValueError, "^Length"): + with pytest.raises(ValueError, match="^Length"): ind.set_names(new_names + new_names) new_names2 = [name + "SUFFIX2" for name in new_names] res = ind.set_names(new_names2, inplace=True) @@ -226,23 +226,23 @@ def test_set_levels(idx): # GH 13754 original_index = idx.copy() for inplace in [True, False]: - with tm.assert_raises_regex(ValueError, "^On"): + with pytest.raises(ValueError, match="^On"): idx.set_levels(['c'], level=0, inplace=inplace) assert_matching(idx.levels, original_index.levels, check_dtype=True) - with tm.assert_raises_regex(ValueError, "^On"): + with pytest.raises(ValueError, match="^On"): idx.set_labels([0, 1, 2, 3, 4, 5], level=0, inplace=inplace) assert_matching(idx.labels, original_index.labels, check_dtype=True) - with tm.assert_raises_regex(TypeError, "^Levels"): + with pytest.raises(TypeError, match="^Levels"): idx.set_levels('c', level=0, inplace=inplace) assert_matching(idx.levels, original_index.levels, check_dtype=True) - with tm.assert_raises_regex(TypeError, "^Labels"): + with pytest.raises(TypeError, match="^Labels"): idx.set_labels(1, level=0, inplace=inplace) assert_matching(idx.labels, original_index.labels, check_dtype=True) @@ -323,46 +323,46 @@ def test_set_levels_labels_names_bad_input(idx): levels, labels = idx.levels, idx.labels names = idx.names - with tm.assert_raises_regex(ValueError, 'Length of levels'): + with pytest.raises(ValueError, match='Length of levels'): idx.set_levels([levels[0]]) - with tm.assert_raises_regex(ValueError, 'Length of labels'): + with pytest.raises(ValueError, match='Length of labels'): idx.set_labels([labels[0]]) - with tm.assert_raises_regex(ValueError, 'Length of names'): + with pytest.raises(ValueError, match='Length of names'): idx.set_names([names[0]]) # shouldn't scalar data error, instead should demand list-like - with tm.assert_raises_regex(TypeError, 'list of lists-like'): + with pytest.raises(TypeError, match='list of lists-like'): idx.set_levels(levels[0]) # shouldn't scalar data error, instead should demand list-like - with tm.assert_raises_regex(TypeError, 'list of lists-like'): + with pytest.raises(TypeError, match='list of lists-like'): idx.set_labels(labels[0]) # shouldn't scalar data error, instead should demand list-like - with tm.assert_raises_regex(TypeError, 'list-like'): + with pytest.raises(TypeError, match='list-like'): idx.set_names(names[0]) # should have equal lengths - with tm.assert_raises_regex(TypeError, 'list of lists-like'): + with pytest.raises(TypeError, match='list of lists-like'): idx.set_levels(levels[0], level=[0, 1]) - with tm.assert_raises_regex(TypeError, 'list-like'): + with pytest.raises(TypeError, match='list-like'): idx.set_levels(levels, level=0) # should have equal lengths - with tm.assert_raises_regex(TypeError, 'list of lists-like'): + with pytest.raises(TypeError, match='list of lists-like'): idx.set_labels(labels[0], level=[0, 1]) - with tm.assert_raises_regex(TypeError, 'list-like'): + with pytest.raises(TypeError, match='list-like'): idx.set_labels(labels, level=0) # should have equal lengths - with tm.assert_raises_regex(ValueError, 'Length of names'): + with pytest.raises(ValueError, match='Length of names'): idx.set_names(names[0], level=[0, 1]) - with tm.assert_raises_regex(TypeError, 'Names must be a'): + with pytest.raises(TypeError, match='Names must be a'): idx.set_names(names, level=0) diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index 563027364134d..23f48db751804 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -54,19 +54,17 @@ def test_slice_locs_with_type_mismatch(): df = tm.makeTimeDataFrame() stacked = df.stack() idx = stacked.index - tm.assert_raises_regex(TypeError, '^Level type mismatch', - idx.slice_locs, (1, 3)) - tm.assert_raises_regex(TypeError, '^Level type mismatch', - idx.slice_locs, - df.index[5] + timedelta( - seconds=30), (5, 2)) + with pytest.raises(TypeError, match='^Level type mismatch'): + idx.slice_locs((1, 3)) + with pytest.raises(TypeError, match='^Level type mismatch'): + idx.slice_locs(df.index[5] + timedelta(seconds=30), (5, 2)) df = tm.makeCustomDataframe(5, 5) stacked = df.stack() idx = stacked.index - with tm.assert_raises_regex(TypeError, '^Level type mismatch'): + with pytest.raises(TypeError, match='^Level type mismatch'): idx.slice_locs(timedelta(seconds=30)) # TODO: Try creating a UnicodeDecodeError in exception message - with tm.assert_raises_regex(TypeError, '^Level type mismatch'): + with pytest.raises(TypeError, match='^Level type mismatch'): idx.slice_locs(df.index[1], (16, "a")) @@ -75,9 +73,9 @@ def test_slice_locs_not_sorted(): lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) - tm.assert_raises_regex(KeyError, "[Kk]ey length.*greater than " - "MultiIndex lexsort depth", - index.slice_locs, (1, 0, 1), (2, 1, 0)) + msg = "[Kk]ey length.*greater than MultiIndex lexsort depth" + with pytest.raises(KeyError, match=msg): + index.slice_locs((1, 0, 1), (2, 1, 0)) # works sorted_index, _ = index.sortlevel(0) @@ -172,7 +170,7 @@ def test_get_indexer(): idx2 = Index(lrange(20)) msg = "Reindexing only valid with uniquely valued Index objects" - with tm.assert_raises_regex(InvalidIndexError, msg): + with pytest.raises(InvalidIndexError, match=msg): idx1.get_indexer(idx2) @@ -218,8 +216,8 @@ def test_get_indexer_consistency(idx): assert indexer.dtype == np.intp else: e = "Reindexing only valid with uniquely valued Index objects" - with tm.assert_raises_regex(InvalidIndexError, e): - indexer = idx.get_indexer(idx[0:2]) + with pytest.raises(InvalidIndexError, match=e): + idx.get_indexer(idx[0:2]) indexer, _ = idx.get_indexer_non_unique(idx[0:2]) assert isinstance(indexer, np.ndarray) diff --git a/pandas/tests/indexes/multi/test_integrity.py b/pandas/tests/indexes/multi/test_integrity.py index 4d08fa7cef7a4..2ec08fa89d133 100644 --- a/pandas/tests/indexes/multi/test_integrity.py +++ b/pandas/tests/indexes/multi/test_integrity.py @@ -139,16 +139,16 @@ def take_invalid_kwargs(): indices = [1, 2] msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assert_raises_regex(TypeError, msg, idx.take, - indices, foo=2) + with pytest.raises(TypeError, match=msg): + idx.take(indices, foo=2) msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, idx.take, - indices, out=indices) + with pytest.raises(ValueError, match=msg): + idx.take(indices, out=indices) msg = "the 'mode' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, idx.take, - indices, mode='clip') + with pytest.raises(ValueError, match=msg): + idx.take(indices, mode='clip') def test_isna_behavior(idx): @@ -183,8 +183,8 @@ def test_million_record_attribute_error(): df = pd.DataFrame({'a': r, 'b': r}, index=pd.MultiIndex.from_tuples([(x, x) for x in r])) - with tm.assert_raises_regex(AttributeError, - "'Series' object has no attribute 'foo'"): + msg = "'Series' object has no attribute 'foo'" + with pytest.raises(AttributeError, match=msg): df['a'].foo() @@ -197,18 +197,18 @@ def test_metadata_immutable(idx): levels, labels = idx.levels, idx.labels # shouldn't be able to set at either the top level or base level mutable_regex = re.compile('does not support mutable operations') - with tm.assert_raises_regex(TypeError, mutable_regex): + with pytest.raises(TypeError, match=mutable_regex): levels[0] = levels[0] - with tm.assert_raises_regex(TypeError, mutable_regex): + with pytest.raises(TypeError, match=mutable_regex): levels[0][0] = levels[0][0] # ditto for labels - with tm.assert_raises_regex(TypeError, mutable_regex): + with pytest.raises(TypeError, match=mutable_regex): labels[0] = labels[0] - with tm.assert_raises_regex(TypeError, mutable_regex): + with pytest.raises(TypeError, match=mutable_regex): labels[0][0] = labels[0][0] # and for names names = idx.names - with tm.assert_raises_regex(TypeError, mutable_regex): + with pytest.raises(TypeError, match=mutable_regex): names[0] = names[0] @@ -248,8 +248,9 @@ def test_rangeindex_fallback_coercion_bug(): def test_hash_error(indices): index = indices - tm.assert_raises_regex(TypeError, "unhashable type: %r" % - type(index).__name__, hash, indices) + with pytest.raises(TypeError, match=("unhashable type: %r" % + type(index).__name__)): + hash(indices) def test_mutability(indices): @@ -259,9 +260,8 @@ def test_mutability(indices): def test_wrong_number_names(indices): - def testit(ind): - ind.names = ["apple", "banana", "carrot"] - tm.assert_raises_regex(ValueError, "^Length", testit, indices) + with pytest.raises(ValueError, match="^Length"): + indices.names = ["apple", "banana", "carrot"] def test_memory_usage(idx): diff --git a/pandas/tests/indexes/multi/test_join.py b/pandas/tests/indexes/multi/test_join.py index 8d89ad9f1cd0c..f50ee29ba31cd 100644 --- a/pandas/tests/indexes/multi/test_join.py +++ b/pandas/tests/indexes/multi/test_join.py @@ -46,8 +46,8 @@ def test_join_level_corner_case(idx): result = index.join(idx, level='second') assert isinstance(result, MultiIndex) - tm.assert_raises_regex(TypeError, "Join.*MultiIndex.*ambiguous", - idx.join, idx, level=1) + with pytest.raises(TypeError, match="Join.*MultiIndex.*ambiguous"): + idx.join(idx, level=1) def test_join_self(idx, join_type): diff --git a/pandas/tests/indexes/multi/test_missing.py b/pandas/tests/indexes/multi/test_missing.py index 7a91ac6d96220..73e6579cf7771 100644 --- a/pandas/tests/indexes/multi/test_missing.py +++ b/pandas/tests/indexes/multi/test_missing.py @@ -20,7 +20,7 @@ def test_fillna(idx): elif isinstance(index, MultiIndex): idx = index.copy() msg = "isna is not defined for MultiIndex" - with tm.assert_raises_regex(NotImplementedError, msg): + with pytest.raises(NotImplementedError, match=msg): idx.fillna(idx[0]) else: idx = index.copy() @@ -29,7 +29,7 @@ def test_fillna(idx): assert result is not idx msg = "'value' must be a scalar, passed: " - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): idx.fillna([idx[0]]) idx = index.copy() @@ -71,7 +71,7 @@ def test_dropna(): tm.assert_index_equal(idx.dropna(how='all'), exp) msg = "invalid how option: xxx" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.dropna(how='xxx') @@ -80,7 +80,7 @@ def test_nulls(idx): # as these are adequately tested for function elsewhere msg = "isna is not defined for MultiIndex" - with tm.assert_raises_regex(NotImplementedError, msg): + with pytest.raises(NotImplementedError, match=msg): idx.isna() diff --git a/pandas/tests/indexes/multi/test_names.py b/pandas/tests/indexes/multi/test_names.py index 68e8bb0cf58f2..1f63f1ef100c1 100644 --- a/pandas/tests/indexes/multi/test_names.py +++ b/pandas/tests/indexes/multi/test_names.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- - +import pytest import pandas as pd import pandas.util.testing as tm from pandas import MultiIndex @@ -92,23 +92,22 @@ def test_names(idx, index_names): # setting bad names on existing index = idx - tm.assert_raises_regex(ValueError, "^Length of names", - setattr, index, "names", - list(index.names) + ["third"]) - tm.assert_raises_regex(ValueError, "^Length of names", - setattr, index, "names", []) + with pytest.raises(ValueError, match="^Length of names"): + setattr(index, "names", list(index.names) + ["third"]) + with pytest.raises(ValueError, match="^Length of names"): + setattr(index, "names", []) # initializing with bad names (should always be equivalent) major_axis, minor_axis = idx.levels major_labels, minor_labels = idx.labels - tm.assert_raises_regex(ValueError, "^Length of names", MultiIndex, - levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels], - names=['first']) - tm.assert_raises_regex(ValueError, "^Length of names", MultiIndex, - levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels], - names=['first', 'second', 'third']) + with pytest.raises(ValueError, match="^Length of names"): + MultiIndex(levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels], + names=['first']) + with pytest.raises(ValueError, match="^Length of names"): + MultiIndex(levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels], + names=['first', 'second', 'third']) # names are assigned index.names = ["a", "b"] @@ -120,5 +119,5 @@ def test_names(idx, index_names): def test_duplicate_level_names_access_raises(idx): # GH19029 idx.names = ['foo', 'foo'] - tm.assert_raises_regex(ValueError, 'name foo occurs multiple times', - idx._get_level_number, 'foo') + with pytest.raises(ValueError, match='name foo occurs multiple times'): + idx._get_level_number('foo') diff --git a/pandas/tests/indexes/multi/test_reindex.py b/pandas/tests/indexes/multi/test_reindex.py index f7651ac258d48..049096ad92c76 100644 --- a/pandas/tests/indexes/multi/test_reindex.py +++ b/pandas/tests/indexes/multi/test_reindex.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- - +import pytest import numpy as np import pandas as pd @@ -40,13 +40,11 @@ def test_reindex_level(idx): exp_indexer2 = np.array([0, -1, 0, -1, 0, -1]) tm.assert_numpy_array_equal(indexer2, exp_indexer2, check_dtype=False) - tm.assert_raises_regex(TypeError, "Fill method not supported", - idx.reindex, idx, - method='pad', level='second') + with pytest.raises(TypeError, match="Fill method not supported"): + idx.reindex(idx, method='pad', level='second') - tm.assert_raises_regex(TypeError, "Fill method not supported", - index.reindex, index, method='bfill', - level='first') + with pytest.raises(TypeError, match="Fill method not supported"): + index.reindex(index, method='bfill', level='first') def test_reindex_preserves_names_when_target_is_list_or_ndarray(idx): @@ -96,7 +94,7 @@ def test_reindex_base(idx): actual = idx.get_indexer(idx) tm.assert_numpy_array_equal(expected, actual) - with tm.assert_raises_regex(ValueError, 'Invalid fill method'): + with pytest.raises(ValueError, match='Invalid fill method'): idx.get_indexer(idx, method='invalid') @@ -104,6 +102,7 @@ def test_reindex_non_unique(): idx = pd.MultiIndex.from_tuples([(0, 0), (1, 1), (1, 1), (2, 2)]) a = pd.Series(np.arange(4), index=idx) new_idx = pd.MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)]) - with tm.assert_raises_regex(ValueError, - 'cannot handle a non-unique multi-index!'): + + msg = 'cannot handle a non-unique multi-index!' + with pytest.raises(ValueError, match=msg): a.reindex(new_idx) diff --git a/pandas/tests/indexes/multi/test_reshape.py b/pandas/tests/indexes/multi/test_reshape.py index 7750379bff445..dd747a0283e45 100644 --- a/pandas/tests/indexes/multi/test_reshape.py +++ b/pandas/tests/indexes/multi/test_reshape.py @@ -27,7 +27,7 @@ def test_insert(idx): # key wrong length msg = "Item must have length equal to number of levels" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.insert(0, ('foo2',)) left = pd.DataFrame([['a', 'b', 0], ['b', 'd', 1]], diff --git a/pandas/tests/indexes/multi/test_set_ops.py b/pandas/tests/indexes/multi/test_set_ops.py index 46d7a27e02aec..34da3df4fb16e 100644 --- a/pandas/tests/indexes/multi/test_set_ops.py +++ b/pandas/tests/indexes/multi/test_set_ops.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- +import pytest import numpy as np import pandas as pd @@ -7,18 +8,14 @@ from pandas import MultiIndex, Series -def test_setops_errorcases(idx): - # # non-iterable input - cases = [0.5, 'xxx'] - methods = [idx.intersection, idx.union, idx.difference, - idx.symmetric_difference] - - for method in methods: - for case in cases: - tm.assert_raises_regex(TypeError, - "Input must be Index " - "or array-like", - method, case) +@pytest.mark.parametrize("case", [0.5, "xxx"]) +@pytest.mark.parametrize("method", ["intersection", "union", + "difference", "symmetric_difference"]) +def test_set_ops_error_cases(idx, case, method): + # non-iterable input + msg = "Input must be Index or array-like" + with pytest.raises(TypeError, match=msg): + getattr(idx, method)(case) def test_intersection_base(idx): @@ -36,8 +33,8 @@ def test_intersection_base(idx): assert tm.equalContents(result, second) msg = "other must be a MultiIndex or a list of tuples" - with tm.assert_raises_regex(TypeError, msg): - result = first.intersection([1, 2, 3]) + with pytest.raises(TypeError, match=msg): + first.intersection([1, 2, 3]) def test_union_base(idx): @@ -55,8 +52,8 @@ def test_union_base(idx): assert tm.equalContents(result, everything) msg = "other must be a MultiIndex or a list of tuples" - with tm.assert_raises_regex(TypeError, msg): - result = first.union([1, 2, 3]) + with pytest.raises(TypeError, match=msg): + first.union([1, 2, 3]) def test_difference_base(idx): @@ -75,8 +72,8 @@ def test_difference_base(idx): assert tm.equalContents(result, answer) msg = "other must be a MultiIndex or a list of tuples" - with tm.assert_raises_regex(TypeError, msg): - result = first.difference([1, 2, 3]) + with pytest.raises(TypeError, match=msg): + first.difference([1, 2, 3]) def test_symmetric_difference(idx): @@ -94,7 +91,7 @@ def test_symmetric_difference(idx): assert tm.equalContents(result, answer) msg = "other must be a MultiIndex or a list of tuples" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): first.symmetric_difference([1, 2, 3]) @@ -159,9 +156,10 @@ def test_difference(idx): 'foo', 'two'), ('qux', 'one'), ('qux', 'two')]) expected.names = first.names assert first.names == result.names - tm.assert_raises_regex(TypeError, "other must be a MultiIndex " - "or a list of tuples", - first.difference, [1, 2, 3, 4, 5]) + + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + first.difference([1, 2, 3, 4, 5]) def test_union(idx): diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index 80e2b811ac062..7ad9b43e4c723 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -80,16 +80,16 @@ def test_numpy_argsort(idx): # backwards compatibility concerns if isinstance(type(idx), (CategoricalIndex, RangeIndex)): msg = "the 'axis' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, - np.argsort, idx, axis=1) + with pytest.raises(ValueError, match=msg): + np.argsort(idx, axis=1) msg = "the 'kind' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.argsort, - idx, kind='mergesort') + with pytest.raises(ValueError, match=msg): + np.argsort(idx, kind='mergesort') msg = "the 'order' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.argsort, - idx, order=('a', 'b')) + with pytest.raises(ValueError, match=msg): + np.argsort(idx, order=('a', 'b')) def test_unsortedindex(): diff --git a/pandas/tests/indexes/period/test_astype.py b/pandas/tests/indexes/period/test_astype.py index f7c2bf3d6bf4f..3c384eed0a848 100644 --- a/pandas/tests/indexes/period/test_astype.py +++ b/pandas/tests/indexes/period/test_astype.py @@ -15,7 +15,7 @@ def test_astype_raises(self, dtype): # GH#13149, GH#13209 idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D') msg = 'Cannot cast PeriodArray to dtype' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): idx.astype(dtype) def test_astype_conversion(self): diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index fb74244d815c2..1ebc0ecb2fc02 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -199,7 +199,7 @@ def test_constructor_dtype(self): assert res.dtype == 'period[M]' msg = 'specified freq and dtype are different' - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): PeriodIndex(['2011-01'], freq='M', dtype='period[D]') def test_constructor_empty(self): @@ -208,7 +208,7 @@ def test_constructor_empty(self): assert len(idx) == 0 assert idx.freq == 'M' - with tm.assert_raises_regex(ValueError, 'freq not specified'): + with pytest.raises(ValueError, match='freq not specified'): pd.PeriodIndex([]) def test_constructor_pi_nat(self): @@ -234,35 +234,35 @@ def test_constructor_pi_nat(self): idx = PeriodIndex([pd.NaT, pd.NaT, '2011-01', '2011-01'], freq='M') tm.assert_index_equal(idx, exp) - with tm.assert_raises_regex(ValueError, 'freq not specified'): + with pytest.raises(ValueError, match='freq not specified'): PeriodIndex([pd.NaT, pd.NaT]) - with tm.assert_raises_regex(ValueError, 'freq not specified'): + with pytest.raises(ValueError, match='freq not specified'): PeriodIndex(np.array([pd.NaT, pd.NaT])) - with tm.assert_raises_regex(ValueError, 'freq not specified'): + with pytest.raises(ValueError, match='freq not specified'): PeriodIndex(['NaT', 'NaT']) - with tm.assert_raises_regex(ValueError, 'freq not specified'): + with pytest.raises(ValueError, match='freq not specified'): PeriodIndex(np.array(['NaT', 'NaT'])) def test_constructor_incompat_freq(self): msg = "Input has different freq=D from PeriodIndex\\(freq=M\\)" - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): PeriodIndex([Period('2011-01', freq='M'), pd.NaT, Period('2011-01', freq='D')]) - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): PeriodIndex(np.array([Period('2011-01', freq='M'), pd.NaT, Period('2011-01', freq='D')])) # first element is pd.NaT - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): PeriodIndex([pd.NaT, Period('2011-01', freq='M'), Period('2011-01', freq='D')]) - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): PeriodIndex(np.array([pd.NaT, Period('2011-01', freq='M'), Period('2011-01', freq='D')])) @@ -339,15 +339,15 @@ def test_constructor_freq_mult(self): msg = ('Frequency must be positive, because it' ' represents span: -1M') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): PeriodIndex(['2011-01'], freq='-1M') msg = ('Frequency must be positive, because it' ' represents span: 0M') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): PeriodIndex(['2011-01'], freq='0M') msg = ('Frequency must be positive, because it' ' represents span: 0M') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): period_range('2011-01', periods=3, freq='0M') @pytest.mark.parametrize('freq', ['A', 'M', 'D', 'T', 'S']) @@ -442,12 +442,12 @@ def test_constructor_error(self): end_intv = Period('2006-12-31', ('w', 1)) msg = 'start and end must have same freq' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): PeriodIndex(start=start, end=end_intv) msg = ('Of the three parameters: start, end, and periods, ' 'exactly two must be specified') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): PeriodIndex(start=start) @pytest.mark.parametrize('freq', ['M', 'Q', 'A', 'D', 'B', diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index 880e37c59c9c4..c92769311d848 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -101,10 +101,9 @@ def test_getitem_partial(self): tm.assert_series_equal(exp, result) ts = ts[10:].append(ts[10:]) - tm.assert_raises_regex(KeyError, - "left slice bound for non-unique " - "label: '2008'", - ts.__getitem__, slice('2008', '2009')) + msg = "left slice bound for non-unique label: '2008'" + with pytest.raises(KeyError, match=msg): + ts[slice('2008', '2009')] def test_getitem_datetime(self): rng = period_range(start='2012-01-01', periods=10, freq='W-MON') @@ -313,9 +312,9 @@ def test_take_fill_value(self): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): @@ -362,9 +361,9 @@ def test_get_loc(self): assert idx0.get_loc(p2) == expected_idx1_p2 assert idx0.get_loc(str(p2)) == expected_idx1_p2 - tm.assert_raises_regex(KeyError, - "Cannot interpret 'foo' as period", - idx0.get_loc, 'foo') + msg = "Cannot interpret 'foo' as period" + with pytest.raises(KeyError, match=msg): + idx0.get_loc('foo') pytest.raises(KeyError, idx0.get_loc, 1.1) pytest.raises(TypeError, idx0.get_loc, idx0) @@ -379,9 +378,10 @@ def test_get_loc(self): assert idx1.get_loc(p2) == expected_idx1_p2 assert idx1.get_loc(str(p2)) == expected_idx1_p2 - tm.assert_raises_regex(KeyError, - "Cannot interpret 'foo' as period", - idx1.get_loc, 'foo') + msg = "Cannot interpret 'foo' as period" + with pytest.raises(KeyError, match=msg): + idx1.get_loc('foo') + pytest.raises(KeyError, idx1.get_loc, 1.1) pytest.raises(TypeError, idx1.get_loc, idx1) @@ -564,12 +564,13 @@ def test_get_loc2(self): tolerance=np.timedelta64(1, 'D')) == 1 assert idx.get_loc('2000-01-02T12', method='nearest', tolerance=timedelta(1)) == 1 - with tm.assert_raises_regex(ValueError, - 'unit abbreviation w/o a number'): + + msg = 'unit abbreviation w/o a number' + with pytest.raises(ValueError, match=msg): idx.get_loc('2000-01-10', method='nearest', tolerance='foo') msg = 'Input has different freq from PeriodArray\\(freq=D\\)' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.get_loc('2000-01-10', method='nearest', tolerance='1 hour') with pytest.raises(KeyError): idx.get_loc('2000-01-10', method='nearest', tolerance='1 day') @@ -599,7 +600,7 @@ def test_get_indexer2(self): np.array([0, -1, 1], dtype=np.intp)) msg = 'Input has different freq from PeriodArray\\(freq=H\\)' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.get_indexer(target, 'nearest', tolerance='1 minute') tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest', diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index ede5256db2f1d..01347db4db3b2 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -65,17 +65,19 @@ def test_numpy_minmax(self): assert np.max(pr) == Period('2016-01-20', freq='D') errmsg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, errmsg, np.min, pr, out=0) - tm.assert_raises_regex(ValueError, errmsg, np.max, pr, out=0) + with pytest.raises(ValueError, match=errmsg): + np.min(pr, out=0) + with pytest.raises(ValueError, match=errmsg): + np.max(pr, out=0) assert np.argmin(pr) == 0 assert np.argmax(pr) == 5 errmsg = "the 'out' parameter is not supported" - tm.assert_raises_regex( - ValueError, errmsg, np.argmin, pr, out=0) - tm.assert_raises_regex( - ValueError, errmsg, np.argmax, pr, out=0) + with pytest.raises(ValueError, match=errmsg): + np.argmin(pr, out=0) + with pytest.raises(ValueError, match=errmsg): + np.argmax(pr, out=0) def test_resolution(self): for freq, expected in zip(['A', 'Q', 'M', 'D', 'H', diff --git a/pandas/tests/indexes/period/test_partial_slicing.py b/pandas/tests/indexes/period/test_partial_slicing.py index fcf1156266880..137a7be987d5b 100644 --- a/pandas/tests/indexes/period/test_partial_slicing.py +++ b/pandas/tests/indexes/period/test_partial_slicing.py @@ -41,12 +41,12 @@ def assert_slices_equivalent(l_slc, i_slc): def test_slice_with_zero_step_raises(self): ts = Series(np.arange(20), period_range('2014-01', periods=20, freq='M')) - tm.assert_raises_regex(ValueError, 'slice step cannot be zero', - lambda: ts[::0]) - tm.assert_raises_regex(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) - tm.assert_raises_regex(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) + with pytest.raises(ValueError, match='slice step cannot be zero'): + ts[::0] + with pytest.raises(ValueError, match='slice step cannot be zero'): + ts.loc[::0] + with pytest.raises(ValueError, match='slice step cannot be zero'): + ts.loc[::0] def test_slice_keep_name(self): idx = period_range('20010101', periods=10, freq='D', name='bob') diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 8d10cb8e42a94..ddb3fe686534a 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -92,8 +92,8 @@ def test_difference_freq(self): def test_hash_error(self): index = period_range('20010101', periods=10) - with tm.assert_raises_regex(TypeError, "unhashable type: %r" % - type(index).__name__): + with pytest.raises(TypeError, match=("unhashable type: %r" % + type(index).__name__)): hash(index) def test_make_time_series(self): @@ -452,8 +452,8 @@ def test_numpy_repeat(self): tm.assert_index_equal(np.repeat(index, 2), expected) msg = "the 'axis' parameter is not supported" - tm.assert_raises_regex( - ValueError, msg, np.repeat, index, 2, axis=1) + with pytest.raises(ValueError, match=msg): + np.repeat(index, 2, axis=1) def test_pindex_multiples(self): pi = PeriodIndex(start='1/1/11', end='12/31/11', freq='2M') @@ -568,5 +568,5 @@ def test_maybe_convert_timedelta(): assert pi._maybe_convert_timedelta(2) == 2 offset = offsets.BusinessDay() - with tm.assert_raises_regex(ValueError, 'freq'): + with pytest.raises(ValueError, match='freq'): pi._maybe_convert_timedelta(offset) diff --git a/pandas/tests/indexes/period/test_period_range.py b/pandas/tests/indexes/period/test_period_range.py index 11d38df1dd49c..aa300111ba67a 100644 --- a/pandas/tests/indexes/period/test_period_range.py +++ b/pandas/tests/indexes/period/test_period_range.py @@ -65,31 +65,31 @@ def test_errors(self): # not enough params msg = ('Of the three parameters: start, end, and periods, ' 'exactly two must be specified') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): period_range(start='2017Q1') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): period_range(end='2017Q1') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): period_range(periods=5) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): period_range() # too many params - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): period_range(start='2017Q1', end='2018Q1', periods=8, freq='Q') # start/end NaT msg = 'start and end must not be NaT' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): period_range(start=NaT, end='2018Q1') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): period_range(start='2017Q1', end=NaT) # invalid periods param msg = 'periods must be a number, got foo' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): period_range(start='2017Q1', periods='foo') diff --git a/pandas/tests/indexes/period/test_setops.py b/pandas/tests/indexes/period/test_setops.py index c4dd23b1708db..c8b7d82855519 100644 --- a/pandas/tests/indexes/period/test_setops.py +++ b/pandas/tests/indexes/period/test_setops.py @@ -106,7 +106,7 @@ def test_union_misc(self): index.union(index2) msg = 'can only call with other PeriodIndex-ed objects' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): index.join(index.to_timestamp()) index3 = period_range('1/1/2000', '1/20/2000', freq='2D') diff --git a/pandas/tests/indexes/period/test_tools.py b/pandas/tests/indexes/period/test_tools.py index 8d09273bde63d..c8e1e6c1f3525 100644 --- a/pandas/tests/indexes/period/test_tools.py +++ b/pandas/tests/indexes/period/test_tools.py @@ -180,7 +180,7 @@ def test_to_period_monthish(self): assert prng.freq == 'M' msg = pd._libs.tslibs.frequencies.INVALID_FREQ_ERR_MSG - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): date_range('01-Jan-2012', periods=8, freq='EOM') def test_period_dt64_round_trip(self): @@ -219,11 +219,11 @@ def test_searchsorted(self, freq): assert pidx.searchsorted(p2) == 3 msg = "Input has different freq=H from PeriodIndex" - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): pidx.searchsorted(pd.Period('2014-01-01', freq='H')) msg = "Input has different freq=5D from PeriodIndex" - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): pidx.searchsorted(pd.Period('2014-01-01', freq='5D')) @@ -260,7 +260,7 @@ def test_to_timestamp_pi_nat(self): msg = ('Frequency must be positive, because it' ' represents span: -2A') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): result.to_period(freq='-2A') def test_to_timestamp_preserve_name(self): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 724dffc49dd3b..666420a6a9b06 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -267,7 +267,7 @@ def test_constructor_int_dtype_nan_raises(self, dtype): # see gh-15187 data = [np.nan] msg = "cannot convert" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Index(data, dtype=dtype) @pytest.mark.parametrize("klass,dtype,na_val", [ @@ -464,29 +464,28 @@ def test_constructor_empty_special(self, empty, klass): assert isinstance(empty, klass) assert not len(empty) - def test_constructor_nonhashable_name(self, indices): + def test_constructor_non_hashable_name(self, indices): # GH 20527 if isinstance(indices, MultiIndex): pytest.skip("multiindex handled in test_multi.py") - name = ['0'] message = "Index.name must be a hashable type" - tm.assert_raises_regex(TypeError, message, name=name) + renamed = [['1']] # With .rename() - renamed = [['1']] - tm.assert_raises_regex(TypeError, message, - indices.rename, name=renamed) + with pytest.raises(TypeError, match=message): + indices.rename(name=renamed) + # With .set_names() - tm.assert_raises_regex(TypeError, message, - indices.set_names, names=renamed) + with pytest.raises(TypeError, match=message): + indices.set_names(names=renamed) def test_constructor_overflow_int64(self): # see gh-15832 msg = ("The elements provided in the data cannot " "all be casted to the dtype int64") - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): Index([np.iinfo(np.uint64).max - 1], dtype="int64") @pytest.mark.xfail(reason="see GH#21311: Index " @@ -494,7 +493,7 @@ def test_constructor_overflow_int64(self): strict=True) def test_constructor_cast(self): msg = "could not convert string to float" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Index(["a", "b", "c"], dtype=float) def test_view_with_args(self): @@ -1327,10 +1326,10 @@ def test_get_indexer_invalid(self): # GH10411 index = Index(np.arange(10)) - with tm.assert_raises_regex(ValueError, 'tolerance argument'): + with pytest.raises(ValueError, match='tolerance argument'): index.get_indexer([1, 0], tolerance=1) - with tm.assert_raises_regex(ValueError, 'limit argument'): + with pytest.raises(ValueError, match='limit argument'): index.get_indexer([1, 0], limit=1) @pytest.mark.parametrize( @@ -1378,7 +1377,7 @@ def test_get_indexer_nearest_listlike_tolerance(self, tolerance, def test_get_indexer_nearest_error(self): index = Index(np.arange(10)) - with tm.assert_raises_regex(ValueError, 'limit argument'): + with pytest.raises(ValueError, match='limit argument'): index.get_indexer([1, 0], method='nearest', limit=1) with pytest.raises(ValueError, match='tolerance size must match'): @@ -1465,7 +1464,7 @@ def test_get_loc_raises_bad_label(self, method): else: msg = 'invalid key' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): index.get_loc([1, 2], method=method) @pytest.mark.parametrize("method,loc", [ @@ -1478,32 +1477,32 @@ def test_get_loc_tolerance(self, method, loc): @pytest.mark.parametrize("method", ['pad', 'backfill', 'nearest']) def test_get_loc_outside_tolerance_raises(self, method): index = pd.Index([0, 1, 2]) - with tm.assert_raises_regex(KeyError, '1.1'): + with pytest.raises(KeyError, match='1.1'): index.get_loc(1.1, method, tolerance=0.05) def test_get_loc_bad_tolerance_raises(self): index = pd.Index([0, 1, 2]) - with tm.assert_raises_regex(ValueError, 'must be numeric'): + with pytest.raises(ValueError, match='must be numeric'): index.get_loc(1.1, 'nearest', tolerance='invalid') def test_get_loc_tolerance_no_method_raises(self): index = pd.Index([0, 1, 2]) - with tm.assert_raises_regex(ValueError, 'tolerance .* valid if'): + with pytest.raises(ValueError, match='tolerance .* valid if'): index.get_loc(1.1, tolerance=1) def test_get_loc_raises_missized_tolerance(self): index = pd.Index([0, 1, 2]) - with tm.assert_raises_regex(ValueError, 'tolerance size must match'): + with pytest.raises(ValueError, match='tolerance size must match'): index.get_loc(1.1, 'nearest', tolerance=[1, 1]) def test_get_loc_raises_object_nearest(self): index = pd.Index(['a', 'c']) - with tm.assert_raises_regex(TypeError, 'unsupported operand type'): + with pytest.raises(TypeError, match='unsupported operand type'): index.get_loc('a', method='nearest') def test_get_loc_raises_object_tolerance(self): index = pd.Index(['a', 'c']) - with tm.assert_raises_regex(TypeError, 'unsupported operand type'): + with pytest.raises(TypeError, match='unsupported operand type'): index.get_loc('a', method='pad', tolerance='invalid') @pytest.mark.parametrize("dtype", [int, float]) @@ -1585,10 +1584,10 @@ def test_slice_locs_na(self): def test_slice_locs_na_raises(self): index = Index([np.nan, 1, 2]) - with tm.assert_raises_regex(KeyError, ''): + with pytest.raises(KeyError, match=''): index.slice_locs(start=1.5) - with tm.assert_raises_regex(KeyError, ''): + with pytest.raises(KeyError, match=''): index.slice_locs(end=1.5) @pytest.mark.parametrize("in_slice,expected", [ @@ -1627,7 +1626,7 @@ def test_drop_by_str_label(self): @pytest.mark.parametrize("keys", [['foo', 'bar'], ['1', 'bar']]) def test_drop_by_str_label_raises_missing_keys(self, keys): - with tm.assert_raises_regex(KeyError, ''): + with pytest.raises(KeyError, match=''): self.strIndex.drop(keys) def test_drop_by_str_label_errors_ignore(self): @@ -1656,7 +1655,7 @@ def test_drop_by_numeric_label_loc(self): def test_drop_by_numeric_label_raises_missing_keys(self): index = Index([1, 2, 3]) - with tm.assert_raises_regex(KeyError, ''): + with pytest.raises(KeyError, match=''): index.drop([3, 4]) @pytest.mark.parametrize("key,expected", [ @@ -1789,7 +1788,7 @@ def test_isin_level_kwarg(self, level, index): # Float64Index overrides isin, so must be checked separately Float64Index([1.0, 2.0, 3.0, 4.0])]) def test_isin_level_kwarg_raises_bad_index(self, level, index): - with tm.assert_raises_regex(IndexError, 'Too many levels'): + with pytest.raises(IndexError, match='Too many levels'): index.isin([], level=level) @pytest.mark.parametrize("level", [1.0, 'foobar', 'xyzzy', np.nan]) @@ -1797,7 +1796,7 @@ def test_isin_level_kwarg_raises_bad_index(self, level, index): Index(['qux', 'baz', 'foo', 'bar']), Float64Index([1.0, 2.0, 3.0, 4.0])]) def test_isin_level_kwarg_raises_key(self, level, index): - with tm.assert_raises_regex(KeyError, 'must be same as name'): + with pytest.raises(KeyError, match='must be same as name'): index.isin([], level=level) @pytest.mark.parametrize("empty", [[], Series(), np.array([])]) @@ -1860,7 +1859,7 @@ def test_str_attribute(self, method): MultiIndex.from_tuples([('foo', '1'), ('bar', '3')]), PeriodIndex(start='2000', end='2010', freq='A')]) def test_str_attribute_raises(self, index): - with tm.assert_raises_regex(AttributeError, 'only use .str accessor'): + with pytest.raises(AttributeError, match='only use .str accessor'): index.str.repeat(2) @pytest.mark.parametrize("expand,expected", [ @@ -1951,14 +1950,14 @@ def test_take_fill_value_none_raises(self): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): index.take(np.array([1, 0, -2]), fill_value=True) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): index.take(np.array([1, 0, -5]), fill_value=True) def test_take_bad_bounds_raises(self): index = pd.Index(list('ABC'), name='xxx') - with tm.assert_raises_regex(IndexError, 'out of bounds'): + with pytest.raises(IndexError, match='out of bounds'): index.take(np.array([1, -5])) @pytest.mark.parametrize("name", [None, 'foobar']) @@ -2032,7 +2031,7 @@ def test_equals_op_multiindex_identify(self): def test_equals_op_mismatched_multiindex_raises(self, index): df = pd.read_csv(StringIO('a,b,c\n1,2,3\n4,5,6'), index_col=[0, 1]) - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with pytest.raises(ValueError, match="Lengths must match"): df.index == index def test_equals_op_index_vs_mi_same_length(self): @@ -2240,7 +2239,7 @@ def test_iadd_preserves_name(self): def test_cached_properties_not_settable(self): index = pd.Index([1, 2, 3]) - with tm.assert_raises_regex(AttributeError, "Can't set attribute"): + with pytest.raises(AttributeError, match="Can't set attribute"): index.is_unique = False def test_get_duplicates_deprecated(self): @@ -2277,10 +2276,10 @@ def create_index(self): def test_argsort(self): index = self.create_index() if PY36: - with tm.assert_raises_regex(TypeError, "'>|<' not supported"): + with pytest.raises(TypeError, match="'>|<' not supported"): result = index.argsort() elif PY3: - with tm.assert_raises_regex(TypeError, "unorderable types"): + with pytest.raises(TypeError, match="unorderable types"): result = index.argsort() else: result = index.argsort() @@ -2290,10 +2289,10 @@ def test_argsort(self): def test_numpy_argsort(self): index = self.create_index() if PY36: - with tm.assert_raises_regex(TypeError, "'>|<' not supported"): + with pytest.raises(TypeError, match="'>|<' not supported"): result = np.argsort(index) elif PY3: - with tm.assert_raises_regex(TypeError, "unorderable types"): + with pytest.raises(TypeError, match="unorderable types"): result = np.argsort(index) else: result = np.argsort(index) @@ -2462,7 +2461,7 @@ def test_dropna_dt_like(self, how, index, expected): def test_dropna_invalid_how_raises(self): msg = "invalid how option: xxx" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): pd.Index([1, 2, 3]).dropna(how='xxx') def test_get_combined_index(self): @@ -2586,7 +2585,7 @@ def test_generated_op_names(opname, indices): @pytest.mark.parametrize('index_maker', tm.index_subclass_makers_generator()) def test_index_subclass_constructor_wrong_kwargs(index_maker): # GH #19348 - with tm.assert_raises_regex(TypeError, 'unexpected keyword argument'): + with pytest.raises(TypeError, match='unexpected keyword argument'): index_maker(foo='bar') diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 5c4e4d2417957..6c5a70d76e3b5 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -342,7 +342,7 @@ def test_append(self): result = ci.append([]) tm.assert_index_equal(result, ci, exact=True) - # appending with different categories or reoreded is not ok + # appending with different categories or reordered is not ok pytest.raises( TypeError, lambda: ci.append(ci.values.set_categories(list('abcd')))) @@ -481,7 +481,7 @@ def test_reindex_base(self): actual = idx.get_indexer(idx) tm.assert_numpy_array_equal(expected, actual) - with tm.assert_raises_regex(ValueError, "Invalid fill method"): + with pytest.raises(ValueError, match="Invalid fill method"): idx.get_indexer(idx, method="invalid") def test_reindexing(self): @@ -758,7 +758,7 @@ def test_equals_categorical(self): assert (ci1 == ci1.values).all() # invalid comparisons - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with pytest.raises(ValueError, match="Lengths must match"): ci1 == Index(['a', 'b', 'c']) pytest.raises(TypeError, lambda: ci1 == ci2) pytest.raises( @@ -1000,8 +1000,8 @@ def test_fillna_categorical(self): tm.assert_index_equal(idx.fillna(1.0), exp) # fill by value not in categories raises ValueError - with tm.assert_raises_regex(ValueError, - 'fill value must be in categories'): + msg = 'fill value must be in categories' + with pytest.raises(ValueError, match=msg): idx.fillna(2.0) def test_take_fill_value(self): @@ -1055,9 +1055,9 @@ def test_take_fill_value(self): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): @@ -1093,9 +1093,9 @@ def test_take_fill_value_datetime(self): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): @@ -1106,16 +1106,16 @@ def test_take_invalid_kwargs(self): indices = [1, 0, -1] msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assert_raises_regex(TypeError, msg, idx.take, - indices, foo=2) + with pytest.raises(TypeError, match=msg): + idx.take(indices, foo=2) msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, idx.take, - indices, out=indices) + with pytest.raises(ValueError, match=msg): + idx.take(indices, out=indices) msg = "the 'mode' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, idx.take, - indices, mode='clip') + with pytest.raises(ValueError, match=msg): + idx.take(indices, mode='clip') @pytest.mark.parametrize('dtype, engine_type', [ (np.int8, libindex.Int8Engine), diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 8373cbc89149a..c125db16bcbff 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -232,7 +232,7 @@ def test_astype(self): def test_type_coercion_fail(self, any_int_dtype): # see gh-15832 msg = "Trying to coerce float values to integers" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Index([1, 2, 3.5], dtype=any_int_dtype) def test_type_coercion_valid(self, float_dtype): @@ -288,7 +288,7 @@ def test_get_loc(self): pytest.raises(KeyError, idx.get_loc, True) pytest.raises(KeyError, idx.get_loc, False) - with tm.assert_raises_regex(ValueError, 'must be numeric'): + with pytest.raises(ValueError, match='must be numeric'): idx.get_loc(1.4, method='nearest', tolerance='foo') with pytest.raises(ValueError, match='must contain numeric elements'): @@ -393,9 +393,9 @@ def test_take_fill_value(self): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): @@ -540,7 +540,7 @@ def test_take_fill_value(self): "{name} cannot contain NA").format(name=name) # fill_value=True - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -1]), fill_value=True) # allow_fill=False @@ -549,9 +549,9 @@ def test_take_fill_value(self): expected = self._holder([2, 1, 3], name='xxx') tm.assert_index_equal(result, expected) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): @@ -613,11 +613,11 @@ def test_constructor_corner(self): # preventing casting arr = np.array([1, '2', 3, '4'], dtype=object) - with tm.assert_raises_regex(TypeError, 'casting'): + with pytest.raises(TypeError, match='casting'): Int64Index(arr) arr_with_floats = [0, 2, 3, 4, 5, 1.25, 3, -1] - with tm.assert_raises_regex(TypeError, 'casting'): + with pytest.raises(TypeError, match='casting'): Int64Index(arr_with_floats) def test_constructor_coercion_signed_to_unsigned(self, uint_dtype): @@ -625,7 +625,7 @@ def test_constructor_coercion_signed_to_unsigned(self, uint_dtype): # see gh-15832 msg = "Trying to coerce negative values to unsigned integers" - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): Index([-1], dtype=uint_dtype) def test_coerce_list(self): diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index efea9b58ecb7a..d0f8768456bc5 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -63,11 +63,9 @@ def test_binops_pow(self): self.check_binop(ops, scalars, idxs) def test_too_many_names(self): - def testit(): + with pytest.raises(ValueError, match="^Length"): self.index.names = ["roger", "harold"] - tm.assert_raises_regex(ValueError, "^Length", testit) - def test_constructor(self): index = RangeIndex(5) expected = np.arange(5, dtype=np.int64) @@ -91,7 +89,7 @@ def test_constructor(self): tm.assert_index_equal(Index(expected), index) msg = "RangeIndex\\(\\.\\.\\.\\) must be called with integers" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): RangeIndex() for index in [RangeIndex(0), RangeIndex(start=0), RangeIndex(stop=0), @@ -103,7 +101,7 @@ def test_constructor(self): assert index._step == 1 tm.assert_index_equal(Index(expected), index) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): RangeIndex(name='Foo') for index in [RangeIndex(0, name='Foo'), @@ -765,7 +763,7 @@ def test_take_fill_value(self): # fill_value msg = "Unable to fill values because RangeIndex cannot contain NA" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -1]), fill_value=True) # allow_fill=False @@ -775,9 +773,9 @@ def test_take_fill_value(self): tm.assert_index_equal(result, expected) msg = "Unable to fill values because RangeIndex cannot contain NA" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): diff --git a/pandas/tests/indexes/timedeltas/test_astype.py b/pandas/tests/indexes/timedeltas/test_astype.py index 54f1ac601fd69..1a0481b730618 100644 --- a/pandas/tests/indexes/timedeltas/test_astype.py +++ b/pandas/tests/indexes/timedeltas/test_astype.py @@ -75,5 +75,5 @@ def test_astype_raises(self, dtype): # GH 13149, GH 13209 idx = TimedeltaIndex([1e14, 'NaT', NaT, np.NaN]) msg = 'Cannot cast TimedeltaIndex to dtype' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): idx.astype(dtype) diff --git a/pandas/tests/indexes/timedeltas/test_construction.py b/pandas/tests/indexes/timedeltas/test_construction.py index a5cfad98b31c1..1abda624777c8 100644 --- a/pandas/tests/indexes/timedeltas/test_construction.py +++ b/pandas/tests/indexes/timedeltas/test_construction.py @@ -57,7 +57,7 @@ def test_constructor_coverage(self): tm.assert_index_equal(rng, exp) msg = 'periods must be a number, got foo' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): TimedeltaIndex(start='1 days', periods='foo', freq='D') pytest.raises(ValueError, TimedeltaIndex, start='1 days', diff --git a/pandas/tests/indexes/timedeltas/test_indexing.py b/pandas/tests/indexes/timedeltas/test_indexing.py index e0e932efafd55..bfed4114929b7 100644 --- a/pandas/tests/indexes/timedeltas/test_indexing.py +++ b/pandas/tests/indexes/timedeltas/test_indexing.py @@ -101,16 +101,16 @@ def test_take_invalid_kwargs(self): indices = [1, 6, 5, 9, 10, 13, 15, 3] msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assert_raises_regex(TypeError, msg, idx.take, - indices, foo=2) + with pytest.raises(TypeError, match=msg): + idx.take(indices, foo=2) msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, idx.take, - indices, out=indices) + with pytest.raises(ValueError, match=msg): + idx.take(indices, out=indices) msg = "the 'mode' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, idx.take, - indices, mode='clip') + with pytest.raises(ValueError, match=msg): + idx.take(indices, mode='clip') # TODO: This method came from test_timedelta; de-dup with version above def test_take2(self): @@ -151,9 +151,9 @@ def test_take_fill_value(self): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): @@ -239,8 +239,8 @@ def test_delete(self): assert result.freq == expected.freq with pytest.raises((IndexError, ValueError)): - # either depeidnig on numpy version - result = idx.delete(5) + # either depending on numpy version + idx.delete(5) def test_delete_slice(self): idx = timedelta_range(start='1 days', periods=10, freq='D', name='idx') @@ -285,8 +285,7 @@ def test_get_loc(self): assert idx.get_loc(idx[1], 'pad', tolerance=timedelta(0)) == 1 - with tm.assert_raises_regex(ValueError, - 'unit abbreviation w/o a number'): + with pytest.raises(ValueError, match='unit abbreviation w/o a number'): idx.get_loc(idx[1], method='nearest', tolerance='foo') with pytest.raises( diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index a8cfdd0add178..2fc0a49d789fd 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -61,17 +61,19 @@ def test_numpy_minmax(self): assert np.max(td) == Timedelta('16820 days') errmsg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, errmsg, np.min, td, out=0) - tm.assert_raises_regex(ValueError, errmsg, np.max, td, out=0) + with pytest.raises(ValueError, match=errmsg): + np.min(td, out=0) + with pytest.raises(ValueError, match=errmsg): + np.max(td, out=0) assert np.argmin(td) == 0 assert np.argmax(td) == 5 errmsg = "the 'out' parameter is not supported" - tm.assert_raises_regex( - ValueError, errmsg, np.argmin, td, out=0) - tm.assert_raises_regex( - ValueError, errmsg, np.argmax, td, out=0) + with pytest.raises(ValueError, match=errmsg): + np.argmin(td, out=0) + with pytest.raises(ValueError, match=errmsg): + np.argmax(td, out=0) def test_value_counts_unique(self): # GH 7735 @@ -317,16 +319,16 @@ def test_freq_setter_errors(self): # setting with an incompatible freq msg = ('Inferred frequency 2D from passed values does not conform to ' 'passed frequency 5D') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.freq = '5D' # setting with a non-fixed frequency msg = r'<2 \* BusinessDays> is a non-fixed frequency' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.freq = '2B' # setting with non-freq string - with tm.assert_raises_regex(ValueError, 'Invalid frequency'): + with pytest.raises(ValueError, match='Invalid frequency'): idx.freq = 'foo' diff --git a/pandas/tests/indexes/timedeltas/test_partial_slicing.py b/pandas/tests/indexes/timedeltas/test_partial_slicing.py index 4dfce3dbe23a6..62bf2a0b4a1cf 100644 --- a/pandas/tests/indexes/timedeltas/test_partial_slicing.py +++ b/pandas/tests/indexes/timedeltas/test_partial_slicing.py @@ -2,7 +2,6 @@ import pytest import pandas as pd -import pandas.util.testing as tm from pandas import Series, Timedelta, timedelta_range from pandas.util.testing import assert_series_equal @@ -78,9 +77,9 @@ def assert_slices_equivalent(l_slc, i_slc): def test_slice_with_zero_step_raises(self): ts = Series(np.arange(20), timedelta_range('0', periods=20, freq='H')) - tm.assert_raises_regex(ValueError, 'slice step cannot be zero', - lambda: ts[::0]) - tm.assert_raises_regex(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) - tm.assert_raises_regex(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) + with pytest.raises(ValueError, match='slice step cannot be zero'): + ts[::0] + with pytest.raises(ValueError, match='slice step cannot be zero'): + ts.loc[::0] + with pytest.raises(ValueError, match='slice step cannot be zero'): + ts.loc[::0] diff --git a/pandas/tests/indexes/timedeltas/test_scalar_compat.py b/pandas/tests/indexes/timedeltas/test_scalar_compat.py index b1d8a12943dca..abd08e37681dd 100644 --- a/pandas/tests/indexes/timedeltas/test_scalar_compat.py +++ b/pandas/tests/indexes/timedeltas/test_scalar_compat.py @@ -3,6 +3,7 @@ Tests for TimedeltaIndex methods behaving like their Timedelta counterparts """ +import pytest import numpy as np import pandas as pd @@ -51,13 +52,13 @@ def test_tdi_round(self): assert elt.round(freq='H') == expected_elt msg = pd._libs.tslibs.frequencies.INVALID_FREQ_ERR_MSG - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): td.round(freq='foo') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): elt.round(freq='foo') msg = " is a non-fixed frequency" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): td.round(freq='M') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): elt.round(freq='M') diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 9bc2e93f8468c..1d068971fad2d 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -199,8 +199,8 @@ def test_pickle(self): def test_hash_error(self): index = timedelta_range('1 days', periods=10) - with tm.assert_raises_regex(TypeError, "unhashable type: %r" % - type(index).__name__): + with pytest.raises(TypeError, match=("unhashable type: %r" % + type(index).__name__)): hash(index) def test_append_join_nondatetimeindex(self): diff --git a/pandas/tests/indexes/timedeltas/test_timedelta_range.py b/pandas/tests/indexes/timedeltas/test_timedelta_range.py index e77c03465d047..238fd861a92ab 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta_range.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta_range.py @@ -36,10 +36,10 @@ def test_timedelta_range(self): arr = np.arange(10).reshape(2, 5) df = pd.DataFrame(np.arange(10).reshape(2, 5)) for arg in (arr, df): - with tm.assert_raises_regex(TypeError, "1-d array"): + with pytest.raises(TypeError, match="1-d array"): to_timedelta(arg) for errors in ['ignore', 'raise', 'coerce']: - with tm.assert_raises_regex(TypeError, "1-d array"): + with pytest.raises(TypeError, match="1-d array"): to_timedelta(arg, errors=errors) # issue10583 @@ -65,18 +65,18 @@ def test_errors(self): # not enough params msg = ('Of the four parameters: start, end, periods, and freq, ' 'exactly three must be specified') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): timedelta_range(start='0 days') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): timedelta_range(end='5 days') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): timedelta_range(periods=2) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): timedelta_range() # too many params - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): timedelta_range(start='0 days', end='5 days', periods=10, freq='H') diff --git a/pandas/tests/indexes/timedeltas/test_tools.py b/pandas/tests/indexes/timedeltas/test_tools.py index 95a77f1b7fe44..b56dd3cababb9 100644 --- a/pandas/tests/indexes/timedeltas/test_tools.py +++ b/pandas/tests/indexes/timedeltas/test_tools.py @@ -110,8 +110,8 @@ def test_to_timedelta_invalid(self): # bad value for errors parameter msg = "errors must be one of" - tm.assert_raises_regex(ValueError, msg, to_timedelta, - ['foo'], errors='never') + with pytest.raises(ValueError, match=msg): + to_timedelta(['foo'], errors='never') # these will error pytest.raises(ValueError, lambda: to_timedelta([1, 2], unit='foo')) diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index 3a235e1eeb0dc..b7443e242137b 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -362,10 +362,9 @@ def test_loc_listlike_dtypes(self): exp = DataFrame({'A': [1, 1, 2], 'B': [4, 4, 5]}, index=exp_index) tm.assert_frame_equal(res, exp, check_index_type=True) - with tm.assert_raises_regex( - KeyError, - 'a list-indexer must only include values that are ' - 'in the categories'): + msg = ('a list-indexer must only include ' + 'values that are in the categories') + with pytest.raises(KeyError, match=msg): df.loc[['a', 'x']] # duplicated categories and codes @@ -387,10 +386,9 @@ def test_loc_listlike_dtypes(self): ]}, index=CategoricalIndex(['a', 'a', 'a', 'a', 'b'])) tm.assert_frame_equal(res, exp, check_index_type=True) - with tm.assert_raises_regex( - KeyError, - 'a list-indexer must only include values ' - 'that are in the categories'): + msg = ('a list-indexer must only include values ' + 'that are in the categories') + with pytest.raises(KeyError, match=msg): df.loc[['a', 'x']] # contains unused category @@ -417,10 +415,9 @@ def test_loc_listlike_dtypes(self): categories=list('abcde'))) tm.assert_frame_equal(res, exp, check_index_type=True) - with tm.assert_raises_regex( - KeyError, - 'a list-indexer must only include values ' - 'that are in the categories'): + msg = ('a list-indexer must only include values ' + 'that are in the categories') + with pytest.raises(KeyError, match=msg): df.loc[['a', 'x']] def test_get_indexer_array(self): diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index 65110d4955294..2bc3aefcf7eb1 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -374,14 +374,14 @@ def test_insert_index_datetimes(self, fill_val, exp_dtype): msg = "Passed item and index have different timezone" if fill_val.tz: - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): obj.insert(1, pd.Timestamp('2012-01-01')) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): obj.insert(1, pd.Timestamp('2012-01-01', tz='Asia/Tokyo')) msg = "cannot insert DatetimeIndex with incompatible label" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): obj.insert(1, 1) pytest.xfail("ToDo: must coerce to object") @@ -397,12 +397,12 @@ def test_insert_index_timedelta64(self): # ToDo: must coerce to object msg = "cannot insert TimedeltaIndex with incompatible label" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): obj.insert(1, pd.Timestamp('2012-01-01')) # ToDo: must coerce to object msg = "cannot insert TimedeltaIndex with incompatible label" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): obj.insert(1, 1) @pytest.mark.parametrize("insert, coerced_val, coerced_dtype", [ @@ -603,7 +603,7 @@ def test_where_index_datetime(self): msg = ("Index\\(\\.\\.\\.\\) must be called with a collection " "of some kind") - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): obj.where(cond, fill_val) values = pd.Index(pd.date_range(fill_val, periods=4)) @@ -628,7 +628,7 @@ def test_where_index_datetimetz(self): msg = ("Index\\(\\.\\.\\.\\) must be called with a collection " "of some kind") - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): obj.where(cond, fill_val) values = pd.Index(pd.date_range(fill_val, periods=4)) diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index 0a55b3f67dd3f..de91b8f4a796c 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -50,11 +50,9 @@ def test_scalar_error(self): s = Series(np.arange(len(i)), index=i) - def f(): + msg = 'Cannot index by location index' + with pytest.raises(TypeError, match=msg): s.iloc[3.0] - tm.assert_raises_regex(TypeError, - 'Cannot index by location index', - f) def f(): s.iloc[3.0] = 0 diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 6d0b516d8ebf0..53d07aeef304a 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -21,12 +21,10 @@ def test_iloc_exceeds_bounds(self): # GH6296 # iloc should allow indexers that exceed the bounds df = DataFrame(np.random.random_sample((20, 5)), columns=list('ABCDE')) - expected = df # lists of positions should raise IndexErrror! - with tm.assert_raises_regex(IndexError, - 'positional indexers ' - 'are out-of-bounds'): + msg = 'positional indexers are out-of-bounds' + with pytest.raises(IndexError, match=msg): df.iloc[:, [0, 1, 2, 3, 4, 5]] pytest.raises(IndexError, lambda: df.iloc[[1, 30]]) pytest.raises(IndexError, lambda: df.iloc[[1, -30]]) @@ -38,14 +36,14 @@ def test_iloc_exceeds_bounds(self): # still raise on a single indexer msg = 'single positional indexer is out-of-bounds' - with tm.assert_raises_regex(IndexError, msg): + with pytest.raises(IndexError, match=msg): df.iloc[30] pytest.raises(IndexError, lambda: df.iloc[-30]) # GH10779 # single positive/negative indexer exceeding Series bounds should raise # an IndexError - with tm.assert_raises_regex(IndexError, msg): + with pytest.raises(IndexError, match=msg): s.iloc[30] pytest.raises(IndexError, lambda: s.iloc[-30]) @@ -136,8 +134,8 @@ def test_iloc_getitem_invalid_scalar(self, dims): else: s = DataFrame(np.arange(100).reshape(10, 10)) - tm.assert_raises_regex(TypeError, 'Cannot index by location index', - lambda: s.iloc['a']) + with pytest.raises(TypeError, match='Cannot index by location index'): + s.iloc['a'] def test_iloc_array_not_mutating_negative_indices(self): diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 3b95ba8e4b9d8..4236a80bc98f1 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -837,15 +837,14 @@ def assert_slices_equivalent(l_slc, i_slc): def test_slice_with_zero_step_raises(self): s = Series(np.arange(20), index=_mklbl('A', 20)) - tm.assert_raises_regex(ValueError, 'slice step cannot be zero', - lambda: s[::0]) - tm.assert_raises_regex(ValueError, 'slice step cannot be zero', - lambda: s.loc[::0]) + with pytest.raises(ValueError, match='slice step cannot be zero'): + s[::0] + with pytest.raises(ValueError, match='slice step cannot be zero'): + s.loc[::0] with catch_warnings(record=True): simplefilter("ignore") - tm.assert_raises_regex(ValueError, - 'slice step cannot be zero', - lambda: s.ix[::0]) + with pytest.raises(ValueError, match='slice step cannot be zero'): + s.ix[::0] def test_indexing_assignment_dict_already_exists(self): df = DataFrame({'x': [1, 2, 6], @@ -1062,18 +1061,18 @@ def test_validate_indices_ok(): def test_validate_indices_low(): indices = np.asarray([0, -2]) - with tm.assert_raises_regex(ValueError, "'indices' contains"): + with pytest.raises(ValueError, match="'indices' contains"): validate_indices(indices, 2) def test_validate_indices_high(): indices = np.asarray([0, 1, 2]) - with tm.assert_raises_regex(IndexError, "indices are out"): + with pytest.raises(IndexError, match="indices are out"): validate_indices(indices, 2) def test_validate_indices_empty(): - with tm.assert_raises_regex(IndexError, "indices are out"): + with pytest.raises(IndexError, match="indices are out"): validate_indices(np.array([0, 1]), 0) diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py index dcf148f199d52..ea17844a75033 100644 --- a/pandas/tests/indexing/test_multiindex.py +++ b/pandas/tests/indexing/test_multiindex.py @@ -308,9 +308,9 @@ def test_getitem_partial_int(self): tm.assert_frame_equal(result, expected) # missing item: - with tm.assert_raises_regex(KeyError, '1'): + with pytest.raises(KeyError, match='1'): df[1] - with tm.assert_raises_regex(KeyError, r"'\[1\] not in index'"): + with pytest.raises(KeyError, match=r"'\[1\] not in index'"): df[[1]] def test_loc_multiindex_indexer_none(self): @@ -851,10 +851,10 @@ def f(): assert df.index.lexsort_depth == 2 df = df.sort_index(level=1, axis=0) assert df.index.lexsort_depth == 0 - with tm.assert_raises_regex( - UnsortedIndexError, - 'MultiIndex slicing requires the index to be ' - r'lexsorted: slicing on levels \[1\], lexsort depth 0'): + + msg = ('MultiIndex slicing requires the index to be ' + r'lexsorted: slicing on levels \[1\], lexsort depth 0') + with pytest.raises(UnsortedIndexError, match=msg): df.loc[(slice(None), slice('bar')), :] # GH 16734: not sorted, but no real slicing diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py index d45209fd277f1..fbbfdfefb67e6 100644 --- a/pandas/tests/indexing/test_scalar.py +++ b/pandas/tests/indexing/test_scalar.py @@ -151,8 +151,8 @@ def test_at_to_fail(self): df.columns = ['x', 'x', 'z'] # Check that we get the correct value in the KeyError - tm.assert_raises_regex(KeyError, r"\['y'\] not in index", - lambda: df[['x', 'y', 'z']]) + with pytest.raises(KeyError, match=r"\['y'\] not in index"): + df[['x', 'y', 'z']] def test_at_with_tz(self): # gh-15822 diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index b327b158adc24..97790920d46f7 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -1055,8 +1055,8 @@ def test_zero_step_raises(self): def test_unbounded_slice_raises(self): def assert_unbounded_slice_error(slc): - tm.assert_raises_regex(ValueError, "unbounded slice", - lambda: BlockPlacement(slc)) + with pytest.raises(ValueError, match="unbounded slice"): + BlockPlacement(slc) assert_unbounded_slice_error(slice(None, None)) assert_unbounded_slice_error(slice(10, None)) @@ -1247,7 +1247,7 @@ def test_binop_other(self, op, value, dtype): if (op, dtype) in invalid: with pytest.raises(TypeError): - result = op(s, e.value) + op(s, e.value) else: # FIXME: Since dispatching to Series, this test no longer # asserts anything meaningful @@ -1281,5 +1281,5 @@ def test_validate_ndim(): placement = slice(2) msg = r"Wrong number of dimensions. values.ndim != ndim \[1 != 2\]" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): make_block(values, placement, ndim=2) diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py index e407573c9a462..6027fc08624df 100644 --- a/pandas/tests/io/formats/test_style.py +++ b/pandas/tests/io/formats/test_style.py @@ -1209,7 +1209,7 @@ def test_text_color_threshold(self, c_map, expected): def test_text_color_threshold_raises(self, text_color_threshold): df = pd.DataFrame([[1, 2], [2, 4]], columns=['A', 'B']) msg = "`text_color_threshold` must be a value from 0 to 1." - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): df.style.background_gradient( text_color_threshold=text_color_threshold)._compute() diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index 228373a7bf545..3792da4b29ef9 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -54,7 +54,7 @@ def test_to_csv_defualt_encoding(self): # Python 3 is uft-8. if pd.compat.PY2: # the encoding argument parameter should be utf-8 - with tm.assert_raises_regex(UnicodeEncodeError, 'ascii'): + with pytest.raises(UnicodeEncodeError, match='ascii'): df.to_csv(path) else: df.to_csv(path) @@ -85,7 +85,7 @@ def test_to_csv_quotechar(self): assert f.read() == expected with tm.ensure_clean('test.csv') as path: - with tm.assert_raises_regex(TypeError, 'quotechar'): + with pytest.raises(TypeError, match='quotechar'): df.to_csv(path, quoting=1, quotechar=None) def test_to_csv_doublequote(self): @@ -103,7 +103,7 @@ def test_to_csv_doublequote(self): from _csv import Error with tm.ensure_clean('test.csv') as path: - with tm.assert_raises_regex(Error, 'escapechar'): + with pytest.raises(Error, match='escapechar'): df.to_csv(path, doublequote=False) # no escapechar set def test_to_csv_escapechar(self): diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index 035b2d4c3347c..0416cf6da7912 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -1580,7 +1580,7 @@ def test_to_html_invalid_justify(self, justify): df = DataFrame() msg = "Invalid value for justify parameter" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): df.to_html(justify=justify) def test_to_html_index(self): diff --git a/pandas/tests/io/json/test_compression.py b/pandas/tests/io/json/test_compression.py index b411744f7bac2..c50b6f68b8839 100644 --- a/pandas/tests/io/json/test_compression.py +++ b/pandas/tests/io/json/test_compression.py @@ -3,7 +3,7 @@ import pandas as pd import pandas.util.testing as tm import pandas.util._test_decorators as td -from pandas.util.testing import assert_frame_equal, assert_raises_regex +from pandas.util.testing import assert_frame_equal def test_compression_roundtrip(compression): @@ -81,15 +81,15 @@ def test_write_unsupported_compression_type(): df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') with tm.ensure_clean() as path: msg = "Unrecognized compression type: unsupported" - assert_raises_regex(ValueError, msg, df.to_json, - path, compression="unsupported") + with pytest.raises(ValueError, match=msg): + df.to_json(path, compression="unsupported") def test_read_unsupported_compression_type(): with tm.ensure_clean() as path: msg = "Unrecognized compression type: unsupported" - assert_raises_regex(ValueError, msg, pd.read_json, - path, compression="unsupported") + with pytest.raises(ValueError, match=msg): + pd.read_json(path, compression="unsupported") @pytest.mark.parametrize("to_infer", [True, False]) diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 829953c144caa..0b4ff2c34297a 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -409,8 +409,8 @@ def test_convert_json_field_to_pandas_type(self, inp, exp): @pytest.mark.parametrize("inp", ["geopoint", "geojson", "fake_type"]) def test_convert_json_field_to_pandas_type_raises(self, inp): field = {'type': inp} - with tm.assert_raises_regex(ValueError, "Unsupported or invalid field " - "type: {}".format(inp)): + with pytest.raises(ValueError, match=("Unsupported or invalid field " + "type: {}".format(inp))): convert_json_field_to_pandas_type(field) def test_categorical(self): @@ -480,7 +480,7 @@ def test_timestamp_in_columns(self): ['a'], [1]], names=["A", "a"])) ]) def test_overlapping_names(self, case): - with tm.assert_raises_regex(ValueError, 'Overlapping'): + with pytest.raises(ValueError, match='Overlapping'): case.to_json(orient='table') def test_mi_falsey_name(self): @@ -526,7 +526,7 @@ def test_read_json_table_orient(self, index_nm, vals, recwarn): def test_read_json_table_orient_raises(self, index_nm, vals, recwarn): df = DataFrame(vals, index=pd.Index(range(4), name=index_nm)) out = df.to_json(orient="table") - with tm.assert_raises_regex(NotImplementedError, 'can not yet read '): + with pytest.raises(NotImplementedError, match='can not yet read '): pd.read_json(out, orient="table") def test_comprehensive(self): diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 04f0220839523..d047970ce2f08 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -344,8 +344,7 @@ def test_frame_from_json_bad_data(self): json = StringIO('{"badkey":["A","B"],' '"index":["2","3"],' '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}') - with tm.assert_raises_regex(ValueError, - r"unexpected key\(s\): badkey"): + with pytest.raises(ValueError, match=r"unexpected key\(s\): badkey"): read_json(json, orient="split") def test_frame_from_json_nones(self): @@ -839,7 +838,7 @@ def test_misc_example(self): DataFrame\\.index values are different \\(100\\.0 %\\) \\[left\\]: Index\\(\\[u?'a', u?'b'\\], dtype='object'\\) \\[right\\]: RangeIndex\\(start=0, stop=2, step=1\\)""" - with tm.assert_raises_regex(AssertionError, error_msg): + with pytest.raises(AssertionError, match=error_msg): assert_frame_equal(result, expected, check_index_type=False) result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]') @@ -1122,9 +1121,7 @@ def test_to_jsonl(self): def test_latin_encoding(self): if compat.PY2: - tm.assert_raises_regex( - TypeError, r'\[unicode\] is not implemented as a table column') - return + pytest.skip("[unicode] is not implemented as a table column") # GH 13774 pytest.skip("encoding not implemented in .to_json(), " @@ -1229,7 +1226,7 @@ def test_index_false_error_to_json(self, orient): df = pd.DataFrame([[1, 2], [4, 5]], columns=['a', 'b']) - with tm.assert_raises_regex(ValueError, "'index=False' is only " - "valid when 'orient' is " - "'split' or 'table'"): + msg = ("'index=False' is only valid when " + "'orient' is 'split' or 'table'") + with pytest.raises(ValueError, match=msg): df.to_json(orient=orient, index=False) diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index 3f61f702b7c9c..25750f4fd23b5 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -81,7 +81,7 @@ def test_readjson_chunks(lines_json_df, chunksize): def test_readjson_chunksize_requires_lines(lines_json_df): msg = "chunksize can only be passed if lines=True" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): pd.read_json(StringIO(lines_json_df), lines=False, chunksize=2) @@ -138,7 +138,7 @@ def test_readjson_chunks_closes(chunksize): def test_readjson_invalid_chunksize(lines_json_df, chunksize): msg = r"'chunksize' must be an integer >=1" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): pd.read_json(StringIO(lines_json_df), lines=True, chunksize=chunksize) diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index 6706a29e78ae8..4ad4f71791079 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -707,7 +707,7 @@ def my_handler(_): def my_handler_raises(_): raise TypeError("I raise for anything") - with tm.assert_raises_regex(TypeError, "I raise for anything"): + with pytest.raises(TypeError, match="I raise for anything"): ujson.encode(_TestObject("foo"), default_handler=my_handler_raises) def my_int_handler(_): diff --git a/pandas/tests/io/msgpack/test_except.py b/pandas/tests/io/msgpack/test_except.py index 5a803c5eba34b..8e8d43a16eee9 100644 --- a/pandas/tests/io/msgpack/test_except.py +++ b/pandas/tests/io/msgpack/test_except.py @@ -4,7 +4,6 @@ from pandas.io.msgpack import packb, unpackb import pytest -import pandas.util.testing as tm class DummyException(Exception): @@ -15,7 +14,7 @@ class TestExceptions(object): def test_raise_on_find_unsupported_value(self): msg = "can\'t serialize datetime" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): packb(datetime.now()) def test_raise_from_object_hook(self): @@ -35,5 +34,5 @@ def hook(_): def test_invalid_value(self): msg = "Unpack failed: error" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): unpackb(b"\xd9\x97#DL_") diff --git a/pandas/tests/io/msgpack/test_limits.py b/pandas/tests/io/msgpack/test_limits.py index e4abd4ddb8d13..2d759d6117f2a 100644 --- a/pandas/tests/io/msgpack/test_limits.py +++ b/pandas/tests/io/msgpack/test_limits.py @@ -4,7 +4,6 @@ from pandas.io.msgpack import packb, unpackb, Packer, Unpacker, ExtType import pytest -import pandas.util.testing as tm class TestLimits(object): @@ -41,7 +40,7 @@ def test_max_str_len(self): unpacker.feed(packed) msg = "3 exceeds max_str_len" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): unpacker.unpack() def test_max_bin_len(self): @@ -56,7 +55,7 @@ def test_max_bin_len(self): unpacker.feed(packed) msg = "3 exceeds max_bin_len" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): unpacker.unpack() def test_max_array_len(self): @@ -71,7 +70,7 @@ def test_max_array_len(self): unpacker.feed(packed) msg = "3 exceeds max_array_len" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): unpacker.unpack() def test_max_map_len(self): @@ -86,7 +85,7 @@ def test_max_map_len(self): unpacker.feed(packed) msg = "3 exceeds max_map_len" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): unpacker.unpack() def test_max_ext_len(self): @@ -101,5 +100,5 @@ def test_max_ext_len(self): unpacker.feed(packed) msg = "4 exceeds max_ext_len" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): unpacker.unpack() diff --git a/pandas/tests/io/msgpack/test_sequnpack.py b/pandas/tests/io/msgpack/test_sequnpack.py index dc6fc5ef916b4..be0a23f60f18a 100644 --- a/pandas/tests/io/msgpack/test_sequnpack.py +++ b/pandas/tests/io/msgpack/test_sequnpack.py @@ -5,7 +5,6 @@ from pandas.io.msgpack import OutOfData import pytest -import pandas.util.testing as tm class TestPack(object): @@ -16,7 +15,7 @@ def test_partial_data(self): for data in [b"\xa5", b"h", b"a", b"l", b"l"]: unpacker.feed(data) - with tm.assert_raises_regex(StopIteration, msg): + with pytest.raises(StopIteration, match=msg): next(iter(unpacker)) unpacker.feed(b"o") diff --git a/pandas/tests/io/parser/c_parser_only.py b/pandas/tests/io/parser/c_parser_only.py index edcfe1c0768cd..88db1080642c5 100644 --- a/pandas/tests/io/parser/c_parser_only.py +++ b/pandas/tests/io/parser/c_parser_only.py @@ -7,6 +7,7 @@ further arguments when parsing. """ +from io import TextIOWrapper import os import sys import tarfile @@ -14,7 +15,7 @@ import numpy as np import pytest -from pandas.compat import StringIO, lrange, range +from pandas.compat import PY3, BytesIO, StringIO, lrange, range import pandas.util._test_decorators as td import pandas as pd @@ -34,9 +35,8 @@ def test_buffer_overflow(self, malf): # see gh-9205: test certain malformed input files that cause # buffer overflows in tokenizer.c cperr = 'Buffer overflow caught - possible malformed input file.' - with pytest.raises(pd.errors.ParserError) as excinfo: + with pytest.raises(pd.errors.ParserError, match=cperr): self.read_table(StringIO(malf)) - assert cperr in str(excinfo.value) def test_buffer_rd_bytes(self): # see gh-12098: src->buffer in the C parser can be freed twice leading @@ -99,7 +99,7 @@ def test_dtype_and_names_error(self): 3.0 3 """ # fallback casting, but not castable - with tm.assert_raises_regex(ValueError, 'cannot safely convert'): + with pytest.raises(ValueError, match='cannot safely convert'): self.read_csv(StringIO(data), sep=r'\s+', header=None, names=['a', 'b'], dtype={'a': np.int32}) @@ -455,6 +455,14 @@ def __next__(self): tm.assert_frame_equal(result, expected) + def test_buffer_rd_bytes_bad_unicode(self): + # see gh-22748 + t = BytesIO(b"\xB0") + if PY3: + t = TextIOWrapper(t, encoding='ascii', errors='surrogateescape') + with pytest.raises(UnicodeError): + self.read_csv(t, encoding='UTF-8') + @pytest.mark.parametrize("tar_suffix", [".tar", ".tar.gz"]) def test_read_tarfile(self, tar_suffix): # see gh-16530 diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index da8118ef3e123..18690a18f7cb3 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -4,7 +4,6 @@ from collections import OrderedDict import csv from datetime import datetime -from io import TextIOWrapper import os import platform import re @@ -45,7 +44,7 @@ def test_empty_decimal_marker(self): """ # Parsers support only length-1 decimals msg = 'Only length-1 decimal markers supported' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(data), decimal='') def test_bad_stream_exception(self): @@ -67,7 +66,7 @@ def test_bad_stream_exception(self): handle, utf8.encode, utf8.decode, codec.streamreader, codec.streamwriter) as stream: - with tm.assert_raises_regex(UnicodeDecodeError, msg): + with pytest.raises(UnicodeDecodeError, match=msg): self.read_csv(stream) def test_read_csv(self): @@ -128,7 +127,7 @@ def test_malformed(self): 2,3,4 """ msg = 'Expected 3 fields in line 4, saw 5' - with tm.assert_raises_regex(Exception, msg): + with pytest.raises(Exception, match=msg): self.read_table(StringIO(data), sep=',', header=1, comment='#') @@ -142,7 +141,7 @@ def test_malformed(self): 2,3,4 """ msg = 'Expected 3 fields in line 6, saw 5' - with tm.assert_raises_regex(Exception, msg): + with pytest.raises(Exception, match=msg): it = self.read_table(StringIO(data), sep=',', header=1, comment='#', iterator=True, chunksize=1, @@ -159,7 +158,7 @@ def test_malformed(self): 2,3,4 """ msg = 'Expected 3 fields in line 6, saw 5' - with tm.assert_raises_regex(Exception, msg): + with pytest.raises(Exception, match=msg): it = self.read_table(StringIO(data), sep=',', header=1, comment='#', iterator=True, chunksize=1, skiprows=[2]) @@ -175,7 +174,7 @@ def test_malformed(self): 2,3,4 """ msg = 'Expected 3 fields in line 6, saw 5' - with tm.assert_raises_regex(Exception, msg): + with pytest.raises(Exception, match=msg): it = self.read_table(StringIO(data), sep=',', header=1, comment='#', iterator=True, chunksize=1, skiprows=[2]) @@ -192,7 +191,7 @@ def test_malformed(self): footer """ msg = 'Expected 3 fields in line 4, saw 5' - with tm.assert_raises_regex(Exception, msg): + with pytest.raises(Exception, match=msg): self.read_table(StringIO(data), sep=',', header=1, comment='#', skipfooter=1) @@ -367,13 +366,13 @@ def test_read_nrows(self): msg = r"'nrows' must be an integer >=0" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(self.data1), nrows=1.2) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(self.data1), nrows='foo') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(self.data1), nrows=-1) def test_read_chunksize(self): @@ -389,13 +388,13 @@ def test_read_chunksize(self): # with invalid chunksize value: msg = r"'chunksize' must be an integer >=1" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(self.data1), chunksize=1.3) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(self.data1), chunksize='foo') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(self.data1), chunksize=0) def test_read_chunksize_and_nrows(self): @@ -1081,7 +1080,7 @@ def test_uneven_lines_with_usecols(self): # make sure that an error is still thrown # when the 'usecols' parameter is not provided msg = r"Expected \d+ fields in line \d+, saw \d+" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): df = self.read_csv(StringIO(csv)) expected = DataFrame({ @@ -1107,10 +1106,10 @@ def test_read_empty_with_usecols(self): # throws the correct error, with or without usecols errmsg = "No columns to parse from file" - with tm.assert_raises_regex(EmptyDataError, errmsg): + with pytest.raises(EmptyDataError, match=errmsg): self.read_csv(StringIO('')) - with tm.assert_raises_regex(EmptyDataError, errmsg): + with pytest.raises(EmptyDataError, match=errmsg): self.read_csv(StringIO(''), usecols=usecols) expected = DataFrame(columns=usecols, index=[0], dtype=np.float64) @@ -1149,8 +1148,7 @@ def test_trailing_spaces(self): def test_raise_on_sep_with_delim_whitespace(self): # see gh-6607 data = 'a b c\n1 2 3' - with tm.assert_raises_regex(ValueError, - 'you can only specify one'): + with pytest.raises(ValueError, match='you can only specify one'): self.read_table(StringIO(data), sep=r'\s', delim_whitespace=True) def test_single_char_leading_whitespace(self): @@ -1395,7 +1393,7 @@ def test_null_byte_char(self): tm.assert_frame_equal(out, expected) else: msg = "NULL byte detected" - with tm.assert_raises_regex(ParserError, msg): + with pytest.raises(ParserError, match=msg): self.read_csv(StringIO(data), names=cols) def test_utf8_bom(self): @@ -1537,7 +1535,7 @@ class InvalidBuffer(object): msg = "Invalid file path or buffer object type" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.read_csv(InvalidBuffer()) # gh-16135: we want to ensure that "tell" and "seek" @@ -1560,7 +1558,7 @@ def seek(self, pos, whence=0): tm.assert_frame_equal(result, expected) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.read_csv(mock.Mock()) @tm.capture_stderr @@ -1595,11 +1593,3 @@ def test_skip_bad_lines(self): val = sys.stderr.getvalue() assert 'Skipping line 3' in val assert 'Skipping line 5' in val - - def test_buffer_rd_bytes_bad_unicode(self): - # Regression test for #22748 - t = BytesIO(b"\xB0") - if PY3: - t = TextIOWrapper(t, encoding='ascii', errors='surrogateescape') - with pytest.raises(UnicodeError): - pd.read_csv(t, encoding='UTF-8') diff --git a/pandas/tests/io/parser/compression.py b/pandas/tests/io/parser/compression.py index 2d32e383c7fee..e5ada41c06762 100644 --- a/pandas/tests/io/parser/compression.py +++ b/pandas/tests/io/parser/compression.py @@ -52,19 +52,18 @@ def test_zip(self): for file_name in inner_file_names: tmp.writestr(file_name, data) - tm.assert_raises_regex(ValueError, 'Multiple files', - self.read_csv, path, compression='zip') + with pytest.raises(ValueError, match='Multiple files'): + self.read_csv(path, compression='zip') - tm.assert_raises_regex(ValueError, 'Multiple files', - self.read_csv, path, - compression='infer') + with pytest.raises(ValueError, match='Multiple files'): + self.read_csv(path, compression='infer') with tm.ensure_clean() as path: - with zipfile.ZipFile(path, mode='w') as tmp: + with zipfile.ZipFile(path, mode='w'): pass - tm.assert_raises_regex(ValueError, 'Zero files', - self.read_csv, path, compression='zip') + with pytest.raises(ValueError, match='Zero files'): + self.read_csv(path, compression='zip') with tm.ensure_clean() as path: with open(path, 'wb') as f: @@ -133,5 +132,5 @@ def test_read_csv_compressed_utf16_example(self, datapath): def test_invalid_compression(self): msg = 'Unrecognized compression type: sfark' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.read_csv('test_file.zip', compression='sfark') diff --git a/pandas/tests/io/parser/converters.py b/pandas/tests/io/parser/converters.py index f17ad019469ab..f8a498172eaf9 100644 --- a/pandas/tests/io/parser/converters.py +++ b/pandas/tests/io/parser/converters.py @@ -24,7 +24,7 @@ def test_converters_type_must_be_dict(self): data = """index,A,B,C,D foo,2,3,4,5 """ - with tm.assert_raises_regex(TypeError, 'Type converters.+'): + with pytest.raises(TypeError, match='Type converters.+'): self.read_csv(StringIO(data), converters=0) def test_converters(self): diff --git a/pandas/tests/io/parser/dialect.py b/pandas/tests/io/parser/dialect.py index 480ce9ef361d0..aa89f3167788a 100644 --- a/pandas/tests/io/parser/dialect.py +++ b/pandas/tests/io/parser/dialect.py @@ -7,6 +7,8 @@ import csv +import pytest + from pandas.compat import StringIO from pandas.errors import ParserWarning @@ -61,7 +63,7 @@ class InvalidDialect(object): data = 'a\n1' msg = 'Invalid dialect' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(data), dialect=InvalidDialect) def test_dialect_conflict(self): diff --git a/pandas/tests/io/parser/header.py b/pandas/tests/io/parser/header.py index 3a6db0fafa7c6..fe7a16e6447b3 100644 --- a/pandas/tests/io/parser/header.py +++ b/pandas/tests/io/parser/header.py @@ -21,7 +21,7 @@ class HeaderTests(object): def test_read_with_bad_header(self): errmsg = r"but only \d+ lines in file" - with tm.assert_raises_regex(ValueError, errmsg): + with pytest.raises(ValueError, match=errmsg): s = StringIO(',,') self.read_csv(s, header=[10]) @@ -322,9 +322,9 @@ def test_non_int_header(self): # GH 16338 msg = 'header must be integer or list of integers' data = """1,2\n3,4""" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(data), sep=',', header=['a', 'b']) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(data), sep=',', header='string_header') def test_singleton_header(self): diff --git a/pandas/tests/io/parser/parse_dates.py b/pandas/tests/io/parser/parse_dates.py index 4c2c5b754f9bb..751fb01e32a6a 100644 --- a/pandas/tests/io/parser/parse_dates.py +++ b/pandas/tests/io/parser/parse_dates.py @@ -429,11 +429,10 @@ def test_read_with_parse_dates_scalar_non_bool(self): data = """A,B,C 1,2,2003-11-1""" - tm.assert_raises_regex(TypeError, errmsg, self.read_csv, - StringIO(data), parse_dates="C") - tm.assert_raises_regex(TypeError, errmsg, self.read_csv, - StringIO(data), parse_dates="C", - index_col="C") + with pytest.raises(TypeError, match=errmsg): + self.read_csv(StringIO(data), parse_dates="C") + with pytest.raises(TypeError, match=errmsg): + self.read_csv(StringIO(data), parse_dates="C", index_col="C") def test_read_with_parse_dates_invalid_type(self): errmsg = ("Only booleans, lists, and " @@ -442,13 +441,12 @@ def test_read_with_parse_dates_invalid_type(self): data = """A,B,C 1,2,2003-11-1""" - tm.assert_raises_regex(TypeError, errmsg, self.read_csv, - StringIO(data), parse_dates=(1,)) - tm.assert_raises_regex(TypeError, errmsg, - self.read_csv, StringIO(data), - parse_dates=np.array([4, 5])) - tm.assert_raises_regex(TypeError, errmsg, self.read_csv, - StringIO(data), parse_dates={1, 3, 3}) + with pytest.raises(TypeError, match=errmsg): + self.read_csv(StringIO(data), parse_dates=(1,)) + with pytest.raises(TypeError, match=errmsg): + self.read_csv(StringIO(data), parse_dates=np.array([4, 5])) + with pytest.raises(TypeError, match=errmsg): + self.read_csv(StringIO(data), parse_dates={1, 3, 3}) def test_parse_dates_empty_string(self): # see gh-2263 diff --git a/pandas/tests/io/parser/python_parser_only.py b/pandas/tests/io/parser/python_parser_only.py index c3c87bca24a47..590736f720e67 100644 --- a/pandas/tests/io/parser/python_parser_only.py +++ b/pandas/tests/io/parser/python_parser_only.py @@ -36,17 +36,17 @@ def test_invalid_skipfooter(self): # see gh-15925 (comment) msg = "skipfooter must be an integer" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(text), skipfooter="foo") - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(text), skipfooter=1.5) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(text), skipfooter=True) msg = "skipfooter cannot be negative" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(text), skipfooter=-1) def test_sniff_delimiter(self): @@ -220,13 +220,13 @@ def test_multi_char_sep_quotes(self): data = 'a,,b\n1,,a\n2,,"2,,b"' msg = 'ignored when a multi-char delimiter is used' - with tm.assert_raises_regex(ParserError, msg): + with pytest.raises(ParserError, match=msg): self.read_csv(StringIO(data), sep=',,') # We expect no match, so there should be an assertion # error out of the inner context manager. with pytest.raises(AssertionError): - with tm.assert_raises_regex(ParserError, msg): + with pytest.raises(ParserError, match=msg): self.read_csv(StringIO(data), sep=',,', quoting=csv.QUOTE_NONE) @@ -255,11 +255,11 @@ def test_skipfooter_bad_row(self): for data in ('a\n1\n"b"a', 'a,b,c\ncat,foo,bar\ndog,foo,"baz'): - with tm.assert_raises_regex(ParserError, msg): + with pytest.raises(ParserError, match=msg): self.read_csv(StringIO(data), skipfooter=1) # We expect no match, so there should be an assertion # error out of the inner context manager. with pytest.raises(AssertionError): - with tm.assert_raises_regex(ParserError, msg): + with pytest.raises(ParserError, match=msg): self.read_csv(StringIO(data)) diff --git a/pandas/tests/io/parser/quoting.py b/pandas/tests/io/parser/quoting.py index 270a5430e6da4..a8a1cc5451f37 100644 --- a/pandas/tests/io/parser/quoting.py +++ b/pandas/tests/io/parser/quoting.py @@ -7,6 +7,8 @@ import csv +import pytest + from pandas.compat import PY3, StringIO, u from pandas.errors import ParserError @@ -22,29 +24,29 @@ def test_bad_quote_char(self): # Python 2.x: "...must be an 1-character..." # Python 3.x: "...must be a 1-character..." msg = '"quotechar" must be a(n)? 1-character string' - tm.assert_raises_regex(TypeError, msg, self.read_csv, - StringIO(data), quotechar='foo') + with pytest.raises(TypeError, match=msg): + self.read_csv(StringIO(data), quotechar='foo') msg = 'quotechar must be set if quoting enabled' - tm.assert_raises_regex(TypeError, msg, self.read_csv, - StringIO(data), quotechar=None, - quoting=csv.QUOTE_MINIMAL) + with pytest.raises(TypeError, match=msg): + self.read_csv(StringIO(data), quotechar=None, + quoting=csv.QUOTE_MINIMAL) msg = '"quotechar" must be string, not int' - tm.assert_raises_regex(TypeError, msg, self.read_csv, - StringIO(data), quotechar=2) + with pytest.raises(TypeError, match=msg): + self.read_csv(StringIO(data), quotechar=2) def test_bad_quoting(self): data = '1,2,3' msg = '"quoting" must be an integer' - tm.assert_raises_regex(TypeError, msg, self.read_csv, - StringIO(data), quoting='foo') + with pytest.raises(TypeError, match=msg): + self.read_csv(StringIO(data), quoting='foo') # quoting must in the range [0, 3] msg = 'bad "quoting" value' - tm.assert_raises_regex(TypeError, msg, self.read_csv, - StringIO(data), quoting=5) + with pytest.raises(TypeError, match=msg): + self.read_csv(StringIO(data), quoting=5) def test_quote_char_basic(self): data = 'a,b,c\n1,2,"cat"' @@ -70,13 +72,13 @@ def test_null_quote_char(self): # sanity checks msg = 'quotechar must be set if quoting enabled' - tm.assert_raises_regex(TypeError, msg, self.read_csv, - StringIO(data), quotechar=None, - quoting=csv.QUOTE_MINIMAL) + with pytest.raises(TypeError, match=msg): + self.read_csv(StringIO(data), quotechar=None, + quoting=csv.QUOTE_MINIMAL) - tm.assert_raises_regex(TypeError, msg, self.read_csv, - StringIO(data), quotechar='', - quoting=csv.QUOTE_MINIMAL) + with pytest.raises(TypeError, match=msg): + self.read_csv(StringIO(data), quotechar='', + quoting=csv.QUOTE_MINIMAL) # no errors should be raised if quoting is None expected = DataFrame([[1, 2, 3]], @@ -163,7 +165,7 @@ def test_unbalanced_quoting(self): else: regex = "unexpected end of data" - with tm.assert_raises_regex(ParserError, regex): + with pytest.raises(ParserError, match=regex): self.read_csv(StringIO(data)) expected = DataFrame([[1, 2, 3]], columns=["a", "b", "c"]) diff --git a/pandas/tests/io/parser/skiprows.py b/pandas/tests/io/parser/skiprows.py index 5d1b3b207a240..a051ee9b22d10 100644 --- a/pandas/tests/io/parser/skiprows.py +++ b/pandas/tests/io/parser/skiprows.py @@ -8,6 +8,7 @@ from datetime import datetime import numpy as np +import pytest from pandas.compat import StringIO, lrange, range from pandas.errors import EmptyDataError @@ -215,11 +216,11 @@ def test_skiprows_callable(self): skiprows = lambda x: True msg = "No columns to parse from file" - with tm.assert_raises_regex(EmptyDataError, msg): + with pytest.raises(EmptyDataError, match=msg): self.read_csv(StringIO(data), skiprows=skiprows) # This is a bad callable and should raise. msg = "by zero" skiprows = lambda x: 1 / 0 - with tm.assert_raises_regex(ZeroDivisionError, msg): + with pytest.raises(ZeroDivisionError, match=msg): self.read_csv(StringIO(data), skiprows=skiprows) diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index f7846f7824ba5..bb64a85590c8b 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -69,11 +69,10 @@ def test_fwf(self): StringIO(data3), colspecs=colspecs, delimiter='~', header=None) tm.assert_frame_equal(df, expected) - with tm.assert_raises_regex(ValueError, - "must specify only one of"): + with pytest.raises(ValueError, match="must specify only one of"): read_fwf(StringIO(data3), colspecs=colspecs, widths=[6, 10, 10, 7]) - with tm.assert_raises_regex(ValueError, "Must specify either"): + with pytest.raises(ValueError, match="Must specify either"): read_fwf(StringIO(data3), colspecs=None, widths=None) def test_BytesIO_input(self): @@ -96,9 +95,8 @@ def test_fwf_colspecs_is_list_or_tuple(self): bar2,12,13,14,15 """ - with tm.assert_raises_regex(TypeError, - 'column specifications must ' - 'be a list or tuple.+'): + msg = 'column specifications must be a list or tuple.+' + with pytest.raises(TypeError, match=msg): pd.io.parsers.FixedWidthReader(StringIO(data), {'a': 1}, ',', '#') @@ -112,9 +110,8 @@ def test_fwf_colspecs_is_list_or_tuple_of_two_element_tuples(self): bar2,12,13,14,15 """ - with tm.assert_raises_regex(TypeError, - 'Each column specification ' - 'must be.+'): + msg = 'Each column specification must be.+' + with pytest.raises(TypeError, match=msg): read_fwf(StringIO(data), [('a', 1)]) def test_fwf_colspecs_None(self): diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index 4437b0db9054e..8c6dbd64c785d 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -33,7 +33,7 @@ def test_mangle_dupe_cols_false(self): msg = 'is not supported' for engine in ('c', 'python'): - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): read_csv(StringIO(data), engine=engine, mangle_dupe_cols=False) @@ -43,14 +43,14 @@ def test_c_engine(self): msg = 'does not support' # specify C engine with unsupported options (raise) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): read_csv(StringIO(data), engine='c', sep=None, delim_whitespace=False) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): read_csv(StringIO(data), engine='c', sep=r'\s') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): read_csv(StringIO(data), engine='c', sep='\t', quotechar=chr(128)) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): read_csv(StringIO(data), engine='c', skipfooter=1) # specify C-unsupported options without python-unsupported options @@ -70,9 +70,9 @@ def test_c_engine(self): x q 30 3 -0.6662 -0.5243 -0.3580 0.89145 2.5838""" msg = 'Error tokenizing data' - with tm.assert_raises_regex(ParserError, msg): + with pytest.raises(ParserError, match=msg): read_csv(StringIO(text), sep='\\s+') - with tm.assert_raises_regex(ParserError, msg): + with pytest.raises(ParserError, match=msg): read_csv(StringIO(text), engine='c', sep='\\s+') msg = "Only length-1 thousands markers supported" @@ -80,14 +80,14 @@ def test_c_engine(self): 1|2,334|5 10|13|10. """ - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): read_csv(StringIO(data), thousands=',,') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): read_csv(StringIO(data), thousands='') msg = "Only length-1 line terminators supported" data = 'a,b,c~~1,2,3~~4,5,6' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): read_csv(StringIO(data), lineterminator='~~') def test_python_engine(self, python_engine): @@ -104,7 +104,7 @@ def test_python_engine(self, python_engine): 'with the %r engine' % (default, python_engine)) kwargs = {default: object()} - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): read_csv(StringIO(data), engine=python_engine, **kwargs) def test_python_engine_file_no_next(self, python_engine): @@ -122,7 +122,7 @@ def read(self): data = "a\n1" msg = "The 'python' engine cannot iterate" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): read_csv(NoNextBuffer(data), engine=python_engine) diff --git a/pandas/tests/io/parser/usecols.py b/pandas/tests/io/parser/usecols.py index 82d45b163d16a..e9bb72be124d3 100644 --- a/pandas/tests/io/parser/usecols.py +++ b/pandas/tests/io/parser/usecols.py @@ -31,7 +31,7 @@ def test_raise_on_mixed_dtype_usecols(self): usecols = [0, 'b', 2] - with tm.assert_raises_regex(ValueError, self.msg_validate_usecols_arg): + with pytest.raises(ValueError, match=self.msg_validate_usecols_arg): self.read_csv(StringIO(data), usecols=usecols) def test_usecols(self): @@ -97,7 +97,7 @@ def test_usecols_single_string(self): usecols = 'foo' - with tm.assert_raises_regex(ValueError, self.msg_validate_usecols_arg): + with pytest.raises(ValueError, match=self.msg_validate_usecols_arg): self.read_csv(StringIO(data), usecols=usecols) def test_usecols_index_col_False(self): @@ -363,10 +363,10 @@ def test_usecols_with_mixed_encoding_strings(self): 3.568935038,7,False,a ''' - with tm.assert_raises_regex(ValueError, self.msg_validate_usecols_arg): + with pytest.raises(ValueError, match=self.msg_validate_usecols_arg): self.read_csv(StringIO(s), usecols=[u'AAA', b'BBB']) - with tm.assert_raises_regex(ValueError, self.msg_validate_usecols_arg): + with pytest.raises(ValueError, match=self.msg_validate_usecols_arg): self.read_csv(StringIO(s), usecols=[b'AAA', u'BBB']) def test_usecols_with_multibyte_characters(self): @@ -499,21 +499,21 @@ def test_raise_on_usecols_names_mismatch(self): tm.assert_frame_equal(df, expected) usecols = ['a', 'b', 'c', 'f'] - with tm.assert_raises_regex(ValueError, - self.msg_validate_usecols_names.format( - r"\['f'\]")): + msg = self.msg_validate_usecols_names.format(r"\['f'\]") + + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(data), usecols=usecols) usecols = ['a', 'b', 'f'] - with tm.assert_raises_regex(ValueError, - self.msg_validate_usecols_names.format( - r"\['f'\]")): + msg = self.msg_validate_usecols_names.format(r"\['f'\]") + + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(data), usecols=usecols) usecols = ['a', 'b', 'f', 'g'] - with tm.assert_raises_regex(ValueError, - self.msg_validate_usecols_names.format( - r"\[('f', 'g'|'g', 'f')\]")): + msg = self.msg_validate_usecols_names.format( + r"\[('f', 'g'|'g', 'f')\]") + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(data), usecols=usecols) names = ['A', 'B', 'C', 'D'] @@ -537,13 +537,14 @@ def test_raise_on_usecols_names_mismatch(self): # tm.assert_frame_equal(df, expected) usecols = ['A', 'B', 'C', 'f'] - with tm.assert_raises_regex(ValueError, - self.msg_validate_usecols_names.format( - r"\['f'\]")): + msg = self.msg_validate_usecols_names.format(r"\['f'\]") + + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(data), header=0, names=names, usecols=usecols) + usecols = ['A', 'B', 'f'] - with tm.assert_raises_regex(ValueError, - self.msg_validate_usecols_names.format( - r"\['f'\]")): + msg = self.msg_validate_usecols_names.format(r"\['f'\]") + + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(data), names=names, usecols=usecols) diff --git a/pandas/tests/io/sas/test_sas.py b/pandas/tests/io/sas/test_sas.py index b85f6b6bbd5ce..016dc56b4d800 100644 --- a/pandas/tests/io/sas/test_sas.py +++ b/pandas/tests/io/sas/test_sas.py @@ -1,8 +1,8 @@ +import pytest + from pandas.compat import StringIO from pandas import read_sas -import pandas.util.testing as tm - class TestSas(object): @@ -12,5 +12,5 @@ def test_sas_buffer_format(self): msg = ("If this is a buffer object rather than a string " "name, you must specify a format string") - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): read_sas(b) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 73e29e6eb9a6a..2f2b792588a92 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -269,14 +269,15 @@ def test_constructor_bad_file(self, mmap_file): msg = "[Errno 22]" err = mmap.error - tm.assert_raises_regex(err, msg, icom.MMapWrapper, non_file) + with pytest.raises(err, match=msg): + icom.MMapWrapper(non_file) target = open(mmap_file, 'r') target.close() msg = "I/O operation on closed file" - tm.assert_raises_regex( - ValueError, msg, icom.MMapWrapper, target) + with pytest.raises(ValueError, match=msg): + icom.MMapWrapper(target) def test_get_attr(self, mmap_file): with open(mmap_file, 'r') as target: @@ -307,5 +308,5 @@ def test_unknown_engine(self): with tm.ensure_clean() as path: df = tm.makeDataFrame() df.to_csv(path) - with tm.assert_raises_regex(ValueError, 'Unknown engine'): + with pytest.raises(ValueError, match='Unknown engine'): pd.read_csv(path, engine='pyt') diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 1bd2fb5887e38..4bff39f8c7efc 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -564,12 +564,12 @@ def test_sheet_name_and_sheetname(self, ext): tm.assert_frame_equal(df2_parse, dfref, check_names=False) def test_sheet_name_both_raises(self, ext): - with tm.assert_raises_regex(TypeError, "Cannot specify both"): + with pytest.raises(TypeError, match="Cannot specify both"): self.get_exceldf('test1', ext, sheetname='Sheet1', sheet_name='Sheet1') excel = self.get_excelfile('test1', ext) - with tm.assert_raises_regex(TypeError, "Cannot specify both"): + with pytest.raises(TypeError, match="Cannot specify both"): excel.parse(sheetname='Sheet1', sheet_name='Sheet1') @@ -1040,7 +1040,7 @@ def test_read_excel_nrows_greater_than_nrows_in_file(self, ext): def test_read_excel_nrows_non_integer_parameter(self, ext): # GH 16645 msg = "'nrows' must be an integer >=0" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): pd.read_excel(os.path.join(self.dirpath, 'test1' + ext), nrows='5') @@ -2133,7 +2133,7 @@ def test_write_append_mode_raises(self, merge_cells, ext, engine): msg = "Append mode is not supported with xlwt!" with ensure_clean(ext) as f: - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): ExcelWriter(f, engine=engine, mode='a') @@ -2191,7 +2191,7 @@ def test_write_append_mode_raises(self, merge_cells, ext, engine): msg = "Append mode is not supported with xlsxwriter!" with ensure_clean(ext) as f: - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): ExcelWriter(f, engine=engine, mode='a') @@ -2215,7 +2215,7 @@ def test_ExcelWriter_dispatch(self, klass, ext): assert isinstance(writer, klass) def test_ExcelWriter_dispatch_raises(self): - with tm.assert_raises_regex(ValueError, 'No engine'): + with pytest.raises(ValueError, match='No engine'): ExcelWriter('nothing') @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index fea3c23121ab2..4201f751959b5 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -56,7 +56,7 @@ def assert_framelist_equal(list1, list2, *args, **kwargs): def test_bs4_version_fails(monkeypatch, datapath): import bs4 monkeypatch.setattr(bs4, '__version__', '4.2') - with tm.assert_raises_regex(ValueError, "minimum version"): + with pytest.raises(ValueError, match="minimum version"): read_html(datapath("io", "data", "spam.html"), flavor='bs4') @@ -65,7 +65,7 @@ def test_invalid_flavor(): flavor = "invalid flavor" msg = r"\{" + flavor + r"\} is not a valid set of flavors" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): read_html(url, "google", flavor=flavor) @@ -204,8 +204,8 @@ def test_skiprows_ndarray(self): assert_framelist_equal(df1, df2) def test_skiprows_invalid(self): - with tm.assert_raises_regex(TypeError, 'is not a valid type ' - 'for skipping rows'): + with pytest.raises(TypeError, match=('is not a valid type ' + 'for skipping rows')): self.read_html(self.spam_data, '.*Water.*', skiprows='asdf') def test_index(self): @@ -288,7 +288,7 @@ def test_file_url(self): @pytest.mark.slow def test_invalid_table_attrs(self): url = self.banklist_data - with tm.assert_raises_regex(ValueError, 'No tables found'): + with pytest.raises(ValueError, match='No tables found'): self.read_html(url, 'First Federal Bank of Florida', attrs={'id': 'tasdfable'}) @@ -341,8 +341,8 @@ def test_regex_idempotency(self): assert isinstance(df, DataFrame) def test_negative_skiprows(self): - with tm.assert_raises_regex(ValueError, - r'\(you passed a negative value\)'): + msg = r'\(you passed a negative value\)' + with pytest.raises(ValueError, match=msg): self.read_html(self.spam_data, 'Water', skiprows=-1) @network @@ -822,10 +822,9 @@ def test_parse_dates_combine(self): def test_computer_sales_page(self, datapath): data = datapath('io', 'data', 'computer_sales_page.html') - with tm.assert_raises_regex(ParserError, - r"Passed header=\[0,1\] are " - r"too many rows for this " - r"multi_index of columns"): + msg = (r"Passed header=\[0,1\] are too many " + r"rows for this multi_index of columns") + with pytest.raises(ParserError, match=msg): self.read_html(data, header=[0, 1]) data = datapath('io', 'data', 'computer_sales_page.html') @@ -839,10 +838,9 @@ def test_wikipedia_states_table(self, datapath): assert result['sq mi'].dtype == np.dtype('float64') def test_parser_error_on_empty_header_row(self): - with tm.assert_raises_regex(ParserError, - r"Passed header=\[0,1\] are " - r"too many rows for this " - r"multi_index of columns"): + msg = (r"Passed header=\[0,1\] are too many " + r"rows for this multi_index of columns") + with pytest.raises(ParserError, match=msg): self.read_html(""" diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index a47c3c01fc80e..85d467650d5c4 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -370,8 +370,7 @@ def test_write_explicit(self, compression, get_random_path): @pytest.mark.parametrize('compression', ['', 'None', 'bad', '7z']) def test_write_explicit_bad(self, compression, get_random_path): - with tm.assert_raises_regex(ValueError, - "Unrecognized compression type"): + with pytest.raises(ValueError, match="Unrecognized compression type"): with tm.ensure_clean(get_random_path) as path: df = tm.makeDataFrame() df.to_pickle(path, compression=compression) @@ -474,7 +473,7 @@ def test_read_bad_versions(self, protocol, get_random_path): # For Python 2, HIGHEST_PROTOCOL should be 2. msg = ("pickle protocol {protocol} asked for; the highest available " "protocol is 2").format(protocol=protocol) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): with tm.ensure_clean(get_random_path) as path: df = tm.makeDataFrame() df.to_pickle(path, protocol=protocol) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index b6cf660cf171e..7e1b43e81f9c1 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -1084,9 +1084,7 @@ def test_encoding(self): def test_latin_encoding(self): if compat.PY2: - tm.assert_raises_regex( - TypeError, r'\[unicode\] is not implemented as a table column') - return + pytest.skip("[unicode] is not implemented as a table column") values = [[b'E\xc9, 17', b'', b'a', b'b', b'c'], [b'E\xc9, 17', b'a', b'b', b'c'], @@ -2598,8 +2596,8 @@ def test_terms(self): for t in terms: store.select('wp', t) - with tm.assert_raises_regex( - TypeError, 'Only named functions are supported'): + with pytest.raises(TypeError, + match='Only named functions are supported'): store.select( 'wp', 'major_axis == (lambda x: x)("20130101")') @@ -2610,9 +2608,8 @@ def test_terms(self): expected = Panel({-1: wpneg[-1]}) tm.assert_panel_equal(res, expected) - with tm.assert_raises_regex(NotImplementedError, - 'Unary addition ' - 'not supported'): + msg = 'Unary addition not supported' + with pytest.raises(NotImplementedError, match=msg): store.select('wpneg', 'items == +1') def test_term_compat(self): @@ -4520,9 +4517,8 @@ def f(): pytest.raises(ClosedFileError, store.get_storer, 'df2') pytest.raises(ClosedFileError, store.remove, 'df2') - def f(): + with pytest.raises(ClosedFileError, match='file is not open'): store.select('df') - tm.assert_raises_regex(ClosedFileError, 'file is not open', f) def test_pytables_native_read(self, datapath): with ensure_clean_store( @@ -4971,9 +4967,8 @@ def test_to_hdf_with_object_column_names(self): df = DataFrame(np.random.randn(10, 2), columns=index(2)) with ensure_clean_path(self.path) as path: with catch_warnings(record=True): - with tm.assert_raises_regex( - ValueError, ("cannot have non-object label " - "DataIndexableCol")): + msg = "cannot have non-object label DataIndexableCol" + with pytest.raises(ValueError, match=msg): df.to_hdf(path, 'df', format='table', data_columns=True) @@ -5155,14 +5150,14 @@ def test_query_compare_column_type(self): pd.Timedelta(1, 's')]: query = 'date {op} v'.format(op=op) with pytest.raises(TypeError): - result = store.select('test', where=query) + store.select('test', where=query) # strings to other columns must be convertible to type v = 'a' for col in ['int', 'float', 'real_date']: query = '{col} {op} v'.format(op=op, col=col) with pytest.raises(ValueError): - result = store.select('test', where=query) + store.select('test', where=query) for v, col in zip(['1', '1.1', '2014-01-01'], ['int', 'float', 'real_date']): diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 777b04bbae97d..6bb7800b72110 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -993,7 +993,7 @@ def test_database_uri_string(self): pass db_uri = "postgresql+pg8000://user:pass@host/dbname" - with tm.assert_raises_regex(ImportError, "pg8000"): + with pytest.raises(ImportError, match="pg8000"): sql.read_sql("select * from table", db_uri) def _make_iris_table_metadata(self): diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 54d17a4773749..1f0a0d6bfee95 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -22,7 +22,7 @@ def test_import_error_message(): # GH-19810 df = DataFrame({"A": [1, 2]}) - with tm.assert_raises_regex(ImportError, 'matplotlib is required'): + with pytest.raises(ImportError, match='matplotlib is required'): df.plot() diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index e965ff7a78a39..5387a1043e00e 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -237,9 +237,9 @@ def test_join_on_fails_with_wrong_object_type(self, wrong_type): # Edited test to remove the Series object from test parameters df = DataFrame({'a': [1, 1]}) - with tm.assert_raises_regex(TypeError, str(type(wrong_type))): + with pytest.raises(TypeError, match=str(type(wrong_type))): merge(wrong_type, df, left_on='a', right_on='a') - with tm.assert_raises_regex(TypeError, str(type(wrong_type))): + with pytest.raises(TypeError, match=str(type(wrong_type))): merge(df, wrong_type, left_on='a', right_on='a') def test_join_on_pass_vector(self): diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 2b4a7952ae738..d9297cdc5ab3e 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -275,7 +275,7 @@ def test_no_overlap_more_informative_error(self): 'left_index={lidx}, right_index={ridx}' .format(lon=None, ron=None, lidx=False, ridx=False)) - with tm.assert_raises_regex(MergeError, msg): + with pytest.raises(MergeError, match=msg): merge(df1, df2) def test_merge_non_unique_indexes(self): @@ -1472,7 +1472,7 @@ def test_different(self, right_vals): "If you wish to proceed you should use " "pd.concat".format(lk_dtype=left['A'].dtype, rk_dtype=right['A'].dtype)) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): pd.merge(left, right, on='A') @pytest.mark.parametrize('d1', [np.int64, np.int32, @@ -1599,7 +1599,7 @@ def test_merge_incompat_dtypes(self, df1_vals, df2_vals): "you should use pd.concat".format(lk_dtype=df1['A'].dtype, rk_dtype=df2['A'].dtype)) msg = re.escape(msg) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): pd.merge(df1, df2, on=['A']) # Check that error still raised when swapping order of dataframes @@ -1608,7 +1608,7 @@ def test_merge_incompat_dtypes(self, df1_vals, df2_vals): "you should use pd.concat".format(lk_dtype=df2['A'].dtype, rk_dtype=df1['A'].dtype)) msg = re.escape(msg) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): pd.merge(df2, df1, on=['A']) @@ -1938,6 +1938,6 @@ def test_merge_series(on, left_on, right_on, left_index, right_index, nms, nm): left_index=left_index, right_index=right_index) tm.assert_frame_equal(result, expected) else: - with tm.assert_raises_regex(ValueError, 'a Series without a name'): + with pytest.raises(ValueError, match='a Series without a name'): result = pd.merge(a, b, on=on, left_on=left_on, right_on=right_on, left_index=left_index, right_index=right_index) diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index cf39293f47082..71db7844a9db5 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -6,7 +6,6 @@ from pandas import (merge_asof, read_csv, to_datetime, Timedelta) from pandas.core.reshape.merge import MergeError -from pandas.util import testing as tm from pandas.util.testing import assert_frame_equal @@ -1005,7 +1004,7 @@ def test_merge_datatype_error(self): right = pd.DataFrame({'right_val': [1, 2, 3, 6, 7], 'a': [1, 2, 3, 6, 7]}) - with tm.assert_raises_regex(MergeError, msg): + with pytest.raises(MergeError, match=msg): merge_asof(left, right, on='a') @pytest.mark.parametrize('func', [lambda x: x, lambda x: to_datetime(x)], @@ -1019,7 +1018,7 @@ def test_merge_on_nans(self, func, side): df_null = pd.DataFrame({'a': nulls, 'left_val': ['a', 'b', 'c']}) df = pd.DataFrame({'a': non_nulls, 'right_val': [1, 6, 11]}) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): if side == 'left': merge_asof(df_null, df, on='a') else: diff --git a/pandas/tests/reshape/merge/test_merge_ordered.py b/pandas/tests/reshape/merge/test_merge_ordered.py index 42d8eb7273ee1..0f8ecc6370bfd 100644 --- a/pandas/tests/reshape/merge/test_merge_ordered.py +++ b/pandas/tests/reshape/merge/test_merge_ordered.py @@ -1,6 +1,6 @@ +import pytest import pandas as pd from pandas import DataFrame, merge_ordered -from pandas.util import testing as tm from pandas.util.testing import assert_frame_equal from numpy import nan @@ -76,7 +76,8 @@ def test_empty_sequence_concat(self): ([None, None], none_pat) ] for df_seq, pattern in test_cases: - tm.assert_raises_regex(ValueError, pattern, pd.concat, df_seq) + with pytest.raises(ValueError, match=pattern): + pd.concat(df_seq) pd.concat([pd.DataFrame()]) pd.concat([None, pd.DataFrame()]) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 673658c29fe75..c7fba47a8f27c 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -147,12 +147,10 @@ def test_concatlike_same_dtypes(self): tm.assert_index_equal(res, exp) # cannot append non-index - with tm.assert_raises_regex(TypeError, - 'all inputs must be Index'): + with pytest.raises(TypeError, match='all inputs must be Index'): pd.Index(vals1).append(vals2) - with tm.assert_raises_regex(TypeError, - 'all inputs must be Index'): + with pytest.raises(TypeError, match='all inputs must be Index'): pd.Index(vals1).append([pd.Index(vals2), vals3]) # ----- Series ----- # @@ -202,16 +200,16 @@ def test_concatlike_same_dtypes(self): msg = (r'cannot concatenate object of type \"(.+?)\";' ' only pd.Series, pd.DataFrame, and pd.Panel' r' \(deprecated\) objs are valid') - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): pd.Series(vals1).append(vals2) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): pd.Series(vals1).append([pd.Series(vals2), vals3]) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): pd.concat([pd.Series(vals1), vals2]) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): pd.concat([pd.Series(vals1), pd.Series(vals2), vals3]) def test_concatlike_dtypes_coercion(self): diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py index e83a2cb483de7..16ecb07c5f413 100644 --- a/pandas/tests/reshape/test_melt.py +++ b/pandas/tests/reshape/test_melt.py @@ -112,7 +112,7 @@ def test_tuple_vars_fail_with_multiindex(self): for id_vars, value_vars in ((tuple_a, list_b), (list_a, tuple_b), (tuple_a, tuple_b)): - with tm.assert_raises_regex(ValueError, r'MultiIndex'): + with pytest.raises(ValueError, match=r'MultiIndex'): self.df1.melt(id_vars=id_vars, value_vars=value_vars) def test_custom_var_name(self): diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 1cb036dccf23c..69572f75fea1b 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1674,22 +1674,22 @@ def test_crosstab_errors(self): 'c': [1, 1, np.nan, 1, 1]}) error = 'values cannot be used without an aggfunc.' - with tm.assert_raises_regex(ValueError, error): + with pytest.raises(ValueError, match=error): pd.crosstab(df.a, df.b, values=df.c) error = 'aggfunc cannot be used without values' - with tm.assert_raises_regex(ValueError, error): + with pytest.raises(ValueError, match=error): pd.crosstab(df.a, df.b, aggfunc=np.mean) error = 'Not a valid normalize argument' - with tm.assert_raises_regex(ValueError, error): + with pytest.raises(ValueError, match=error): pd.crosstab(df.a, df.b, normalize='42') - with tm.assert_raises_regex(ValueError, error): + with pytest.raises(ValueError, match=error): pd.crosstab(df.a, df.b, normalize=42) error = 'Not a valid margins argument' - with tm.assert_raises_regex(ValueError, error): + with pytest.raises(ValueError, match=error): pd.crosstab(df.a, df.b, normalize='all', margins=42) def test_crosstab_with_categorial_columns(self): diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py index 44de3e93d42bf..f04e9a55a6c8d 100644 --- a/pandas/tests/reshape/test_tile.py +++ b/pandas/tests/reshape/test_tile.py @@ -205,8 +205,8 @@ def test_qcut_specify_quantiles(self): tm.assert_categorical_equal(factor, expected) def test_qcut_all_bins_same(self): - tm.assert_raises_regex(ValueError, "edges.*unique", qcut, - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 3) + with pytest.raises(ValueError, match="edges.*unique"): + qcut([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 3) def test_cut_out_of_bounds(self): arr = np.random.randn(100) diff --git a/pandas/tests/reshape/test_union_categoricals.py b/pandas/tests/reshape/test_union_categoricals.py index 8743d11118200..80538b0c6de4e 100644 --- a/pandas/tests/reshape/test_union_categoricals.py +++ b/pandas/tests/reshape/test_union_categoricals.py @@ -58,11 +58,11 @@ def test_union_categorical(self): s = Categorical([0, 1.2, 2]) s2 = Categorical([2, 3, 4]) msg = 'dtype of categories must be the same' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): union_categoricals([s, s2]) msg = 'No Categoricals to union' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): union_categoricals([]) def test_union_categoricals_nan(self): @@ -143,7 +143,7 @@ def test_union_categoricals_ordered(self): c2 = Categorical([1, 2, 3], ordered=False) msg = 'Categorical.ordered must be the same' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): union_categoricals([c1, c2]) res = union_categoricals([c1, c1]) @@ -161,7 +161,7 @@ def test_union_categoricals_ordered(self): c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True) msg = "to union ordered Categoricals, all categories must be the same" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): union_categoricals([c1, c2]) def test_union_categoricals_ignore_order(self): @@ -174,7 +174,7 @@ def test_union_categoricals_ignore_order(self): tm.assert_categorical_equal(res, exp) msg = 'Categorical.ordered must be the same' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): union_categoricals([c1, c2], ignore_order=False) res = union_categoricals([c1, c1], ignore_order=True) @@ -212,10 +212,10 @@ def test_union_categoricals_ignore_order(self): tm.assert_categorical_equal(result, expected) msg = "to union ordered Categoricals, all categories must be the same" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): union_categoricals([c1, c2], ignore_order=False) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): union_categoricals([c1, c2]) def test_union_categoricals_sort(self): diff --git a/pandas/tests/reshape/test_util.py b/pandas/tests/reshape/test_util.py index e4a9591b95c26..e7e1626bdb2da 100644 --- a/pandas/tests/reshape/test_util.py +++ b/pandas/tests/reshape/test_util.py @@ -1,4 +1,4 @@ - +import pytest import numpy as np from pandas import date_range, Index import pandas.util.testing as tm @@ -41,9 +41,12 @@ def test_empty(self): expected = [] assert result == expected - def test_invalid_input(self): - invalid_inputs = [1, [1], [1, 2], [[1], 2], - 'a', ['a'], ['a', 'b'], [['a'], 'b']] + @pytest.mark.parametrize("X", [ + 1, [1], [1, 2], [[1], 2], + 'a', ['a'], ['a', 'b'], [['a'], 'b'] + ]) + def test_invalid_input(self, X): msg = "Input must be a list-like of list-likes" - for X in invalid_inputs: - tm.assert_raises_regex(TypeError, msg, cartesian_product, X=X) + + with pytest.raises(TypeError, match=msg): + cartesian_product(X=X) diff --git a/pandas/tests/scalar/interval/test_interval.py b/pandas/tests/scalar/interval/test_interval.py index 8d17989ebc7b1..7951fb7ddda0d 100644 --- a/pandas/tests/scalar/interval/test_interval.py +++ b/pandas/tests/scalar/interval/test_interval.py @@ -5,7 +5,6 @@ import pandas.core.common as com import pytest -import pandas.util.testing as tm @pytest.fixture @@ -35,7 +34,7 @@ def test_contains(self, interval): assert 0 not in interval msg = "__contains__ not defined for two intervals" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval in interval interval_both = Interval(0, 1, closed='both') @@ -53,7 +52,7 @@ def test_equal(self): assert Interval(0, 1) != 0 def test_comparison(self): - with tm.assert_raises_regex(TypeError, 'unorderable types'): + with pytest.raises(TypeError, match='unorderable types'): Interval(0, 1) < 2 assert Interval(0, 1) < Interval(1, 2) @@ -106,7 +105,7 @@ def test_length_errors(self, left, right): # GH 18789 iv = Interval(left, right) msg = 'cannot compute length between .* and .*' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): iv.length def test_math_add(self, closed): @@ -124,10 +123,10 @@ def test_math_add(self, closed): assert result == expected msg = r"unsupported operand type\(s\) for \+" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval + interval - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval + 'foo' def test_math_sub(self, closed): @@ -142,10 +141,10 @@ def test_math_sub(self, closed): assert result == expected msg = r"unsupported operand type\(s\) for -" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval - interval - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval - 'foo' def test_math_mult(self, closed): @@ -163,11 +162,11 @@ def test_math_mult(self, closed): assert result == expected msg = r"unsupported operand type\(s\) for \*" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval * interval msg = r"can\'t multiply sequence by non-int" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval * 'foo' def test_math_div(self, closed): @@ -182,10 +181,10 @@ def test_math_div(self, closed): assert result == expected msg = r"unsupported operand type\(s\) for /" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval / interval - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval / 'foo' def test_math_floordiv(self, closed): @@ -200,19 +199,19 @@ def test_math_floordiv(self, closed): assert result == expected msg = r"unsupported operand type\(s\) for //" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval // interval - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval // 'foo' def test_constructor_errors(self): msg = "invalid option for 'closed': foo" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Interval(0, 1, closed='foo') msg = 'left side of interval must be <= right side' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Interval(1, 0) @pytest.mark.parametrize('tz_left, tz_right', [ diff --git a/pandas/tests/scalar/interval/test_ops.py b/pandas/tests/scalar/interval/test_ops.py index 7eca24aa8af25..869ff205c2f51 100644 --- a/pandas/tests/scalar/interval/test_ops.py +++ b/pandas/tests/scalar/interval/test_ops.py @@ -2,7 +2,6 @@ import pytest from pandas import Interval, Timedelta, Timestamp -import pandas.util.testing as tm @pytest.fixture(params=[ @@ -57,5 +56,5 @@ def test_overlaps_invalid_type(self, other): interval = Interval(0, 1) msg = '`other` must be an Interval, got {other}'.format( other=type(other).__name__) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval.overlaps(other) diff --git a/pandas/tests/scalar/period/test_asfreq.py b/pandas/tests/scalar/period/test_asfreq.py index 432d55ef5967a..23762fda8c22a 100644 --- a/pandas/tests/scalar/period/test_asfreq.py +++ b/pandas/tests/scalar/period/test_asfreq.py @@ -329,26 +329,26 @@ def test_conv_weekly(self): assert ival_W.asfreq('W') == ival_W msg = pd._libs.tslibs.frequencies.INVALID_FREQ_ERR_MSG - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): ival_W.asfreq('WK') def test_conv_weekly_legacy(self): # frequency conversion tests: from Weekly Frequency msg = pd._libs.tslibs.frequencies.INVALID_FREQ_ERR_MSG - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period(freq='WK', year=2007, month=1, day=1) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period(freq='WK-SAT', year=2007, month=1, day=6) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period(freq='WK-FRI', year=2007, month=1, day=5) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period(freq='WK-THU', year=2007, month=1, day=4) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period(freq='WK-WED', year=2007, month=1, day=3) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period(freq='WK-TUE', year=2007, month=1, day=2) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period(freq='WK-MON', year=2007, month=1, day=1) def test_conv_business(self): @@ -742,10 +742,10 @@ def test_asfreq_MS(self): assert initial.asfreq(freq="M", how="S") == Period('2013-01', 'M') msg = pd._libs.tslibs.frequencies.INVALID_FREQ_ERR_MSG - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): initial.asfreq(freq="MS", how="S") - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): pd.Period('2013-01', 'MS') assert _period_code_map.get("MS") is None diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index 66e8541d2c911..6d5686463f2ae 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -142,11 +142,11 @@ def test_period_cons_mult(self): msg = ('Frequency must be positive, because it' ' represents span: -3M') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period('2011-01', freq='-3M') msg = ('Frequency must be positive, because it' ' represents span: 0M') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period('2011-01', freq='0M') def test_period_cons_combined(self): @@ -196,28 +196,28 @@ def test_period_cons_combined(self): msg = ('Frequency must be positive, because it' ' represents span: -25H') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period('2011-01', freq='-1D1H') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period('2011-01', freq='-1H1D') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period(ordinal=1, freq='-1D1H') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period(ordinal=1, freq='-1H1D') msg = ('Frequency must be positive, because it' ' represents span: 0D') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period('2011-01', freq='0D0H') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period(ordinal=1, freq='0D0H') # You can only combine together day and intraday offsets msg = ('Invalid frequency: 1W1D') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period('2011-01', freq='1W1D') msg = ('Invalid frequency: 1D1W') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period('2011-01', freq='1D1W') @pytest.mark.parametrize('tzstr', ['Europe/Brussels', @@ -528,9 +528,9 @@ def test_period_deprecated_freq(self): msg = pd._libs.tslibs.frequencies.INVALID_FREQ_ERR_MSG for exp, freqs in iteritems(cases): for freq in freqs: - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period('2016-03-01 09:00', freq=freq) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period(ordinal=1, freq=freq) # check supported freq-aliases still works @@ -774,7 +774,7 @@ def test_properties_weekly_legacy(self): assert exp.days_in_month == 29 msg = pd._libs.tslibs.frequencies.INVALID_FREQ_ERR_MSG - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period(freq='WK', year=2007, month=1, day=7) def test_properties_daily(self): @@ -1036,14 +1036,14 @@ def test_add_raises(self): dt1 = Period(freq='D', year=2008, month=1, day=1) dt2 = Period(freq='D', year=2008, month=1, day=2) msg = r"unsupported operand type\(s\)" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): dt1 + "str" msg = r"unsupported operand type\(s\)" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): "str" + dt1 - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): dt1 + dt2 boxes = [lambda x: x, lambda x: pd.Series([x]), lambda x: pd.Index([x])] @@ -1061,13 +1061,13 @@ def test_add_timestamp_raises(self, rbox, lbox): msg = (r"cannot add|unsupported operand|" r"can only operate on a|incompatible type|" r"ufunc add cannot use operands") - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): lbox(ts) + rbox(per) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): lbox(per) + rbox(ts) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): lbox(per) + rbox(per) def test_sub(self): @@ -1079,7 +1079,7 @@ def test_sub(self): assert dt2 - dt1 == 14 * off msg = r"Input has different freq=M from Period\(freq=D\)" - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): dt1 - Period('2011-02', freq='M') def test_add_offset(self): @@ -1435,10 +1435,10 @@ def test_period_ops_offset(self): assert result == exp msg = r"Input cannot be converted to Period\(freq=D\)" - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): p + offsets.Hour(2) - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): p - offsets.Hour(2) diff --git a/pandas/tests/scalar/timedelta/test_construction.py b/pandas/tests/scalar/timedelta/test_construction.py index d648140aa7347..4165b1aec705f 100644 --- a/pandas/tests/scalar/timedelta/test_construction.py +++ b/pandas/tests/scalar/timedelta/test_construction.py @@ -5,7 +5,6 @@ import numpy as np import pandas as pd -import pandas.util.testing as tm from pandas import Timedelta @@ -90,15 +89,16 @@ def test_construction(): Timedelta('3.1415') # invalid construction - tm.assert_raises_regex(ValueError, "cannot construct a Timedelta", - lambda: Timedelta()) - tm.assert_raises_regex(ValueError, - "unit abbreviation w/o a number", - lambda: Timedelta('foo')) - tm.assert_raises_regex(ValueError, - "cannot construct a Timedelta from the " - "passed arguments, allowed keywords are ", - lambda: Timedelta(day=10)) + with pytest.raises(ValueError, match="cannot construct a Timedelta"): + Timedelta() + + with pytest.raises(ValueError, match="unit abbreviation w/o a number"): + Timedelta('foo') + + msg = ("cannot construct a Timedelta from " + "the passed arguments, allowed keywords are ") + with pytest.raises(ValueError, match=msg): + Timedelta(day=10) # floats expected = np.timedelta64( @@ -190,8 +190,8 @@ def test_iso_constructor(fmt, exp): 'P1DT0H0M0.0000000000000S', 'P1DT0H0M00000000000S', 'P1DT0H0M0.S']) def test_iso_constructor_raises(fmt): - with tm.assert_raises_regex(ValueError, 'Invalid ISO 8601 Duration ' - 'format - {}'.format(fmt)): + with pytest.raises(ValueError, match=('Invalid ISO 8601 Duration ' + 'format - {}'.format(fmt))): Timedelta(fmt) diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index 7af0b281aeaa5..47f91fdf25756 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -334,20 +334,20 @@ def test_constructor_with_stringoffset(self): assert result == eval(repr(result)) def test_constructor_invalid(self): - with tm.assert_raises_regex(TypeError, 'Cannot convert input'): + with pytest.raises(TypeError, match='Cannot convert input'): Timestamp(slice(2)) - with tm.assert_raises_regex(ValueError, 'Cannot convert Period'): + with pytest.raises(ValueError, match='Cannot convert Period'): Timestamp(Period('1000-01-01')) def test_constructor_invalid_tz(self): # GH#17690 - with tm.assert_raises_regex(TypeError, 'must be a datetime.tzinfo'): + with pytest.raises(TypeError, match='must be a datetime.tzinfo'): Timestamp('2017-10-22', tzinfo='US/Eastern') - with tm.assert_raises_regex(ValueError, 'at most one of'): + with pytest.raises(ValueError, match='at most one of'): Timestamp('2017-10-22', tzinfo=utc, tz='UTC') - with tm.assert_raises_regex(ValueError, "Invalid frequency:"): + with pytest.raises(ValueError, match="Invalid frequency:"): # GH#5168 # case where user tries to pass tz as an arg, not kwarg, gets # interpreted as a `freq` @@ -577,7 +577,7 @@ def test_construct_timestamp_preserve_original_frequency(self): def test_constructor_invalid_frequency(self): # GH 22311 - with tm.assert_raises_regex(ValueError, "Invalid frequency:"): + with pytest.raises(ValueError, match="Invalid frequency:"): Timestamp('2012-01-01', freq=[]) diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index 827ad3581cd49..6755d0bd4ae27 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -64,14 +64,14 @@ def test_tz_localize_ambiguous(self): ts.tz_localize('US/Eastern', ambiguous='infer') # GH#8025 - with tm.assert_raises_regex(TypeError, - 'Cannot localize tz-aware Timestamp, ' - 'use tz_convert for conversions'): + msg = ('Cannot localize tz-aware Timestamp, ' + 'use tz_convert for conversions') + with pytest.raises(TypeError, match=msg): Timestamp('2011-01-01', tz='US/Eastern').tz_localize('Asia/Tokyo') - with tm.assert_raises_regex(TypeError, - 'Cannot convert tz-naive Timestamp, ' - 'use tz_localize to localize'): + msg = ('Cannot convert tz-naive Timestamp, ' + 'use tz_localize to localize') + with pytest.raises(TypeError, match=msg): Timestamp('2011-01-01').tz_convert('Asia/Tokyo') @pytest.mark.parametrize('stamp, tz', [ diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index 0c477a021df4d..a9a60c4119605 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -76,7 +76,7 @@ def test_round_nonstandard_freq(self): def test_round_invalid_arg(self): stamp = Timestamp('2000-01-05 05:09:15.13') - with tm.assert_raises_regex(ValueError, INVALID_FREQ_ERR_MSG): + with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG): stamp.round('foo') @pytest.mark.parametrize('test_input, rounder, freq, expected', [ diff --git a/pandas/tests/series/indexing/test_alter_index.py b/pandas/tests/series/indexing/test_alter_index.py index 57a087221f411..e2cffe653d935 100644 --- a/pandas/tests/series/indexing/test_alter_index.py +++ b/pandas/tests/series/indexing/test_alter_index.py @@ -520,7 +520,7 @@ def test_drop_unique_and_non_unique_index(data, index, axis, drop_labels, def test_drop_exception_raised(data, index, drop_labels, axis, error_type, error_desc): - with tm.assert_raises_regex(error_type, error_desc): + with pytest.raises(error_type, match=error_desc): Series(data, index=index).drop(drop_labels, axis=axis) @@ -557,5 +557,5 @@ def test_drop_empty_list(index, drop_labels): ]) def test_drop_non_empty_list(data, index, drop_labels): # GH 21494 and GH 16877 - with tm.assert_raises_regex(KeyError, 'not found in axis'): + with pytest.raises(KeyError, match='not found in axis'): pd.Series(data=data, index=index).drop(drop_labels) diff --git a/pandas/tests/series/indexing/test_boolean.py b/pandas/tests/series/indexing/test_boolean.py index 5d1b81ba7dc1c..b94104a89627a 100644 --- a/pandas/tests/series/indexing/test_boolean.py +++ b/pandas/tests/series/indexing/test_boolean.py @@ -322,11 +322,11 @@ def test_where_invalid_input(cond): s = Series([1, 2, 3]) msg = "Boolean array expected for the condition" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): s.where(cond) msg = "Array conditional must be same shape as self" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): s.where([True]) @@ -335,7 +335,7 @@ def test_where_ndframe_align(): s = Series([1, 2, 3]) cond = [True] - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): s.where(cond) expected = Series([1, np.nan, np.nan]) @@ -344,7 +344,7 @@ def test_where_ndframe_align(): tm.assert_series_equal(out, expected) cond = np.array([False, True, False, True]) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): s.where(cond) expected = Series([np.nan, 2, np.nan]) diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 1582bd119c806..f969619d5acb0 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -394,9 +394,9 @@ def test_setslice(test_data): @pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning") def test_basic_getitem_setitem_corner(test_data): # invalid tuples, e.g. td.ts[:, None] vs. td.ts[:, 2] - with tm.assert_raises_regex(ValueError, 'tuple-index'): + with pytest.raises(ValueError, match='tuple-index'): test_data.ts[:, 2] - with tm.assert_raises_regex(ValueError, 'tuple-index'): + with pytest.raises(ValueError, match='tuple-index'): test_data.ts[:, 2] = 2 # weird lists. [slice(0, 5)] will work but not two slices diff --git a/pandas/tests/series/test_alter_axes.py b/pandas/tests/series/test_alter_axes.py index 00e145680c7a6..79de3dc3be19f 100644 --- a/pandas/tests/series/test_alter_axes.py +++ b/pandas/tests/series/test_alter_axes.py @@ -80,7 +80,7 @@ def test_rename_axis_supported(self): s = Series(range(5)) s.rename({}, axis=0) s.rename({}, axis='index') - with tm.assert_raises_regex(ValueError, 'No axis named 5'): + with pytest.raises(ValueError, match='No axis named 5'): s.rename({}, axis=5) def test_set_name_attribute(self): @@ -169,7 +169,7 @@ def test_reset_index_level(self): drop=True) tm.assert_frame_equal(result, df[['C']]) - with tm.assert_raises_regex(KeyError, 'Level E '): + with pytest.raises(KeyError, match='Level E '): s.reset_index(level=['A', 'E']) # With single-level Index @@ -184,7 +184,7 @@ def test_reset_index_level(self): result = s.reset_index(level=levels[0], drop=True) tm.assert_series_equal(result, df['B']) - with tm.assert_raises_regex(IndexError, 'Too many levels'): + with pytest.raises(IndexError, match='Too many levels'): s.reset_index(level=[0, 1, 2]) # Check that .reset_index([],drop=True) doesn't fail @@ -241,7 +241,7 @@ def test_rename_axis_mapper(self): result = s.rename_axis(index=['foo', 'goo']) assert result.index.names == ['foo', 'goo'] - with tm.assert_raises_regex(TypeError, 'unexpected'): + with pytest.raises(TypeError, match='unexpected'): s.rename_axis(columns='wrong') def test_rename_axis_inplace(self, datetime_series): @@ -289,7 +289,7 @@ def test_set_axis_inplace(self): # wrong values for the "axis" parameter for axis in [2, 'foo']: - with tm.assert_raises_regex(ValueError, 'No axis named'): + with pytest.raises(ValueError, match='No axis named'): s.set_axis(list('abcd'), axis=axis, inplace=False) def test_set_axis_prior_to_deprecation_signature(self): @@ -308,14 +308,14 @@ def test_reset_index_drop_errors(self): # KeyError raised for series index when passed level name is missing s = Series(range(4)) - with tm.assert_raises_regex(KeyError, 'must be same as name'): + with pytest.raises(KeyError, match='must be same as name'): s.reset_index('wrong', drop=True) - with tm.assert_raises_regex(KeyError, 'must be same as name'): + with pytest.raises(KeyError, match='must be same as name'): s.reset_index('wrong') # KeyError raised for series when level to be dropped is missing s = Series(range(4), index=MultiIndex.from_product([[1, 2]] * 2)) - with tm.assert_raises_regex(KeyError, 'not found'): + with pytest.raises(KeyError, match='not found'): s.reset_index('wrong', drop=True) def test_droplevel(self): diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index cbcfa629c8928..a5a7cc2217864 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -26,7 +26,7 @@ assert_series_equal) -class TestSeriesAnalytics(): +class TestSeriesAnalytics(object): @pytest.mark.parametrize("use_bottleneck", [True, False]) @pytest.mark.parametrize("method, unit", [ @@ -561,8 +561,8 @@ def _check_stat_op(self, name, alternate, string_series_, # Unimplemented numeric_only parameter. if 'numeric_only' in compat.signature(f).args: - tm.assert_raises_regex(NotImplementedError, name, f, - string_series_, numeric_only=True) + with pytest.raises(NotImplementedError, match=name): + f(string_series_, numeric_only=True) def _check_accum_op(self, name, datetime_series_, check_dtype=True): func = getattr(np, name) @@ -601,12 +601,12 @@ def test_numpy_compress(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): msg = "the 'axis' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.compress, - cond, s, axis=1) + with pytest.raises(ValueError, match=msg): + np.compress(cond, s, axis=1) msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.compress, - cond, s, out=s) + with pytest.raises(ValueError, match=msg): + np.compress(cond, s, out=s) def test_round(self, datetime_series): datetime_series.index.name = "index_name" @@ -624,7 +624,7 @@ def test_numpy_round(self): assert_series_equal(out, expected) msg = "the 'out' parameter is not supported" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): np.round(s, decimals=0, out=s) def test_built_in_round(self): @@ -789,7 +789,7 @@ def test_corr_invalid_method(self): s2 = pd.Series(np.random.randn(10)) msg = ("method must be either 'pearson', 'spearman', " "or 'kendall'") - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): s1.corr(s2, method="____") def test_corr_callable_method(self, datetime_series): @@ -1254,8 +1254,8 @@ def test_numpy_argmin_deprecated(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.argmin, - s, out=data) + with pytest.raises(ValueError, match=msg): + np.argmin(s, out=data) def test_idxmax(self, string_series): # test idxmax @@ -1322,8 +1322,8 @@ def test_numpy_argmax_deprecated(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.argmax, - s, out=data) + with pytest.raises(ValueError, match=msg): + np.argmax(s, out=data) def test_ptp(self): # GH21614 @@ -1392,7 +1392,8 @@ def test_numpy_repeat(self): assert_series_equal(np.repeat(s, 2), expected) msg = "the 'axis' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.repeat, s, 2, axis=0) + with pytest.raises(ValueError, match=msg): + np.repeat(s, 2, axis=0) def test_searchsorted(self): s = Series([1, 2, 3]) @@ -1929,7 +1930,7 @@ def test_error(self, r): args = 2, len(r), 0, -1 methods = r.nlargest, r.nsmallest for method, arg in product(methods, args): - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): method(arg) def test_nsmallest_nlargest(self, s_main_dtypes_split): @@ -1959,9 +1960,9 @@ def test_misc(self): assert_series_equal(s.nsmallest(), s.iloc[[2, 3, 0, 4]]) msg = 'keep must be either "first", "last"' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): s.nsmallest(keep='invalid') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): s.nlargest(keep='invalid') # GH 15297 diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index bd0d02014dcdb..f944d6f8c9d08 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -457,8 +457,7 @@ def test_str_attribute(self): # str accessor only valid with string values s = Series(range(5)) - with tm.assert_raises_regex(AttributeError, - 'only use .str accessor'): + with pytest.raises(AttributeError, match='only use .str accessor'): s.str.repeat(2) def test_empty_method(self): @@ -525,26 +524,25 @@ def test_cat_accessor_api(self): assert isinstance(s.cat, CategoricalAccessor) invalid = Series([1]) - with tm.assert_raises_regex(AttributeError, - "only use .cat accessor"): + with pytest.raises(AttributeError, match="only use .cat accessor"): invalid.cat assert not hasattr(invalid, 'cat') def test_cat_accessor_no_new_attributes(self): # https://github.com/pandas-dev/pandas/issues/10673 c = Series(list('aabbcde')).astype('category') - with tm.assert_raises_regex(AttributeError, - "You cannot add any new attribute"): + with pytest.raises(AttributeError, + match="You cannot add any new attribute"): c.cat.xlabel = "a" def test_categorical_delegations(self): # invalid accessor pytest.raises(AttributeError, lambda: Series([1, 2, 3]).cat) - tm.assert_raises_regex( - AttributeError, - r"Can only use .cat accessor with a 'category' dtype", - lambda: Series([1, 2, 3]).cat) + with pytest.raises(AttributeError, + match=(r"Can only use .cat accessor " + r"with a 'category' dtype")): + Series([1, 2, 3]).cat() pytest.raises(AttributeError, lambda: Series(['a', 'b', 'c']).cat) pytest.raises(AttributeError, lambda: Series(np.arange(5.)).cat) pytest.raises(AttributeError, @@ -674,9 +672,9 @@ def test_str_accessor_api_for_categorical(self): tm.assert_series_equal(res, exp) invalid = Series([1, 2, 3]).astype('category') - with tm.assert_raises_regex(AttributeError, - "Can only use .str " - "accessor with string"): + msg = "Can only use .str accessor with string" + + with pytest.raises(AttributeError, match=msg): invalid.str assert not hasattr(invalid, 'str') @@ -760,7 +758,8 @@ def test_dt_accessor_api_for_categorical(self): tm.assert_almost_equal(res, exp) invalid = Series([1, 2, 3]).astype('category') - with tm.assert_raises_regex( - AttributeError, "Can only use .dt accessor with datetimelike"): + msg = "Can only use .dt accessor with datetimelike" + + with pytest.raises(AttributeError, match=msg): invalid.dt assert not hasattr(invalid, 'str') diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 979775633f644..d1d6aa8b51c0d 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -82,7 +82,7 @@ def test_add_series_with_period_index(self): tm.assert_series_equal(result, expected) msg = "Input has different freq=D from PeriodIndex\\(freq=A-DEC\\)" - with tm.assert_raises_regex(IncompatibleFrequency, msg): + with pytest.raises(IncompatibleFrequency, match=msg): ts + ts.asfreq('D', how="end") def test_operators_datetimelike(self): @@ -139,7 +139,7 @@ def test_comparison_flex_basic(self): # msg = 'No axis named 1 for object type' for op in ['eq', 'ne', 'le', 'le', 'gt', 'ge']: - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): getattr(left, op)(right, axis=1) diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index a685eb7e9fbd3..3f137bf686715 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -51,9 +51,9 @@ def test_append_duplicates(self): exp, check_index_type=True) msg = 'Indexes have overlapping values:' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): s1.append(s2, verify_integrity=True) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): pd.concat([s1, s2], verify_integrity=True) def test_combine_scalar(self): diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 494321c5190a6..ce0cf0d5c089e 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -33,7 +33,7 @@ def test_invalid_dtype(self): msg = 'not understood' invalid_list = [pd.Timestamp, 'pd.Timestamp', list] for dtype in invalid_list: - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): Series([], name='time', dtype=dtype) def test_scalar_conversion(self): @@ -560,19 +560,19 @@ def test_constructor_pass_nan_nat(self): def test_constructor_cast(self): msg = "could not convert string to float" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Series(["a", "b", "c"], dtype=float) def test_constructor_unsigned_dtype_overflow(self, uint_dtype): # see gh-15832 msg = 'Trying to coerce negative values to unsigned integers' - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): Series([-1], dtype=uint_dtype) def test_constructor_coerce_float_fail(self, any_int_dtype): # see gh-15832 msg = "Trying to coerce float values to integers" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Series([1, 2, 3.5], dtype=any_int_dtype) def test_constructor_coerce_float_valid(self, float_dtype): @@ -1162,7 +1162,7 @@ def test_constructor_cant_cast_datetimelike(self, index): # PeriodIndex or PeriodArray type(index).__name__.rstrip("Index") ) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): Series(index, dtype=float) # ints are ok @@ -1200,7 +1200,7 @@ def test_constructor_generic_timestamp_no_frequency(self, dtype): # see gh-15524, gh-15987 msg = "dtype has no unit. Please pass in" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Series([], dtype=dtype) @pytest.mark.parametrize("dtype,msg", [ @@ -1210,7 +1210,7 @@ def test_constructor_generic_timestamp_no_frequency(self, dtype): def test_constructor_generic_timestamp_bad_frequency(self, dtype, msg): # see gh-15524, gh-15987 - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): Series([], dtype=dtype) @pytest.mark.parametrize('dtype', [None, 'uint8', 'category']) diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index f3ae2b1e6ad15..b1c92c2b82a56 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -216,7 +216,7 @@ def get_dir(s): # no setting allowed s = Series(date_range('20130101', periods=5, freq='D'), name='xxx') - with tm.assert_raises_regex(ValueError, "modifications"): + with pytest.raises(ValueError, match="modifications"): s.dt.hour = 5 # trying to set a copy @@ -314,8 +314,8 @@ def test_dt_namespace_accessor_categorical(self): def test_dt_accessor_no_new_attributes(self): # https://github.com/pandas-dev/pandas/issues/10673 s = Series(date_range('20130101', periods=5, freq='D')) - with tm.assert_raises_regex(AttributeError, - "You cannot add any new attribute"): + with pytest.raises(AttributeError, + match="You cannot add any new attribute"): s.dt.xlabel = "a" @pytest.mark.parametrize('time_locale', [ @@ -481,7 +481,7 @@ def test_dt_accessor_api(self): Series(np.random.randn(5))]) def test_dt_accessor_invalid(self, ser): # GH#9322 check that series with incorrect dtypes don't have attr - with tm.assert_raises_regex(AttributeError, "only use .dt accessor"): + with pytest.raises(AttributeError, match="only use .dt accessor"): ser.dt assert not hasattr(ser, 'dt') diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 75017f2d22794..79b1bc10b9f4b 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -68,7 +68,7 @@ def test_astype_cast_nan_inf_int(self, dtype, value): msg = 'Cannot convert non-finite values \\(NA or inf\\) to integer' s = Series([value]) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): s.astype(dtype) @pytest.mark.parametrize("dtype", [int, np.int8, np.int64]) @@ -404,7 +404,7 @@ def test_astype_generic_timestamp_no_frequency(self, dtype): s = Series(data) msg = "dtype has no unit. Please pass in" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): s.astype(dtype) @pytest.mark.parametrize("dtype", np.typecodes['All']) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index c38b7c0083a21..dc58b46f90609 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -401,31 +401,31 @@ def test_fillna_categorical_raise(self): data = ['a', np.nan, 'b', np.nan, np.nan] s = Series(Categorical(data, categories=['a', 'b'])) - with tm.assert_raises_regex(ValueError, - "fill value must be in categories"): + with pytest.raises(ValueError, + match="fill value must be in categories"): s.fillna('d') - with tm.assert_raises_regex(ValueError, - "fill value must be in categories"): + with pytest.raises(ValueError, + match="fill value must be in categories"): s.fillna(Series('d')) - with tm.assert_raises_regex(ValueError, - "fill value must be in categories"): + with pytest.raises(ValueError, + match="fill value must be in categories"): s.fillna({1: 'd', 3: 'a'}) - with tm.assert_raises_regex(TypeError, - '"value" parameter must be a scalar or ' - 'dict, but you passed a "list"'): + msg = ('"value" parameter must be a scalar or ' + 'dict, but you passed a "list"') + with pytest.raises(TypeError, match=msg): s.fillna(['a', 'b']) - with tm.assert_raises_regex(TypeError, - '"value" parameter must be a scalar or ' - 'dict, but you passed a "tuple"'): + msg = ('"value" parameter must be a scalar or ' + 'dict, but you passed a "tuple"') + with pytest.raises(TypeError, match=msg): s.fillna(('a', 'b')) - with tm.assert_raises_regex(TypeError, - '"value" parameter must be a scalar, dict ' - 'or Series, but you passed a "DataFrame"'): + msg = ('"value" parameter must be a scalar, dict ' + 'or Series, but you passed a "DataFrame"') + with pytest.raises(TypeError, match=msg): s.fillna(DataFrame({1: ['a'], 3: ['b']})) def test_fillna_nat(self): diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 35bd99ff2eda8..77e43a346c824 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -604,23 +604,23 @@ def test_comp_ops_df_compat(self): for left, right in [(s1, s2), (s2, s1), (s3, s4), (s4, s3)]: msg = "Can only compare identically-labeled Series objects" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): left == right - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): left != right - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): left < right msg = "Can only compare identically-labeled DataFrame objects" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): left.to_frame() == right.to_frame() - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): left.to_frame() != right.to_frame() - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): left.to_frame() < right.to_frame() diff --git a/pandas/tests/series/test_quantile.py b/pandas/tests/series/test_quantile.py index 027814c618303..4f462e11e9bb9 100644 --- a/pandas/tests/series/test_quantile.py +++ b/pandas/tests/series/test_quantile.py @@ -44,7 +44,7 @@ def test_quantile(self): msg = 'percentiles should all be in the interval \\[0, 1\\]' for invalid in [-1, 2, [0.5, -1], [0.5, 2]]: - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.ts.quantile(invalid) def test_quantile_multi(self): diff --git a/pandas/tests/series/test_rank.py b/pandas/tests/series/test_rank.py index 40a30cc8cf09a..9772ceecfc7b1 100644 --- a/pandas/tests/series/test_rank.py +++ b/pandas/tests/series/test_rank.py @@ -185,11 +185,11 @@ def test_rank_categorical(self): # Test invalid values for na_option msg = "na_option must be one of 'keep', 'top', or 'bottom'" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): na_ser.rank(na_option='bad', ascending=False) # invalid type - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): na_ser.rank(na_option=True, ascending=False) # Test with pct=True diff --git a/pandas/tests/series/test_replace.py b/pandas/tests/series/test_replace.py index 7efde1fbdd1f5..3a9c210017625 100644 --- a/pandas/tests/series/test_replace.py +++ b/pandas/tests/series/test_replace.py @@ -77,7 +77,7 @@ def test_replace(self): # make sure that we aren't just masking a TypeError because bools don't # implement indexing - with tm.assert_raises_regex(TypeError, 'Cannot compare types .+'): + with pytest.raises(TypeError, match='Cannot compare types .+'): ser.replace([1, 2], [np.nan, 0]) ser = pd.Series([0, 1, 2, 3, 4]) @@ -137,9 +137,9 @@ def test_replace_with_empty_list(self): tm.assert_series_equal(result, expected) # GH 19266 - with tm.assert_raises_regex(ValueError, "cannot assign mismatch"): + with pytest.raises(ValueError, match="cannot assign mismatch"): s.replace({np.nan: []}) - with tm.assert_raises_regex(ValueError, "cannot assign mismatch"): + with pytest.raises(ValueError, match="cannot assign mismatch"): s.replace({np.nan: ['dummy', 'alt']}) def test_replace_mixed_types(self): @@ -205,7 +205,7 @@ def test_replace_bool_with_bool(self): def test_replace_with_dict_with_bool_keys(self): s = pd.Series([True, False, True]) - with tm.assert_raises_regex(TypeError, 'Cannot compare types .+'): + with pytest.raises(TypeError, match='Cannot compare types .+'): s.replace({'asdf': 'asdb', True: 'yes'}) def test_replace2(self): diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 21f80f181c34d..1681255f7e6bd 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -248,14 +248,16 @@ def test_truncate_nonsortedindex(self): s = pd.Series(['a', 'b', 'c', 'd', 'e'], index=[5, 3, 2, 9, 0]) - with tm.assert_raises_regex(ValueError, - 'truncate requires a sorted index'): + msg = 'truncate requires a sorted index' + + with pytest.raises(ValueError, match=msg): s.truncate(before=3, after=9) rng = pd.date_range('2011-01-01', '2012-01-01', freq='W') ts = pd.Series(np.random.randn(len(rng)), index=rng) - with tm.assert_raises_regex(ValueError, - 'truncate requires a sorted index'): + msg = 'truncate requires a sorted index' + + with pytest.raises(ValueError, match=msg): ts.sort_values(ascending=False).truncate(before='2011-11', after='2011-12') diff --git a/pandas/tests/series/test_timezones.py b/pandas/tests/series/test_timezones.py index 3c9701758f12c..bdf5944cab408 100644 --- a/pandas/tests/series/test_timezones.py +++ b/pandas/tests/series/test_timezones.py @@ -31,8 +31,9 @@ def test_series_tz_localize(self): # Can't localize if already tz-aware rng = date_range('1/1/2011', periods=100, freq='H', tz='utc') ts = Series(1, index=rng) - tm.assert_raises_regex(TypeError, 'Already tz-aware', - ts.tz_localize, 'US/Eastern') + + with pytest.raises(TypeError, match='Already tz-aware'): + ts.tz_localize('US/Eastern') @pytest.mark.filterwarnings('ignore::FutureWarning') def test_tz_localize_errors_deprecation(self): @@ -123,8 +124,9 @@ def test_series_tz_convert(self): # can't convert tz-naive rng = date_range('1/1/2011', periods=200, freq='D') ts = Series(1, index=rng) - tm.assert_raises_regex(TypeError, "Cannot convert tz-naive", - ts.tz_convert, 'US/Eastern') + + with pytest.raises(TypeError, match="Cannot convert tz-naive"): + ts.tz_convert('US/Eastern') def test_series_tz_convert_to_utc(self): base = DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], diff --git a/pandas/tests/series/test_validate.py b/pandas/tests/series/test_validate.py index a6cbb058dbc9d..8f7c16f2c3132 100644 --- a/pandas/tests/series/test_validate.py +++ b/pandas/tests/series/test_validate.py @@ -1,7 +1,5 @@ import pytest -import pandas.util.testing as tm - class TestSeriesValidate(object): """Tests for error handling related to data types of method arguments.""" @@ -17,5 +15,5 @@ def test_validate_bool_args(self, string_series, func, inplace): if func == "_set_name": kwargs["name"] = "hello" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): getattr(string_series, func)(**kwargs) diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py index 10074a2e5ad99..dd73ec69c3b9a 100644 --- a/pandas/tests/sparse/frame/test_frame.py +++ b/pandas/tests/sparse/frame/test_frame.py @@ -152,10 +152,10 @@ def test_constructor_ndarray(self, float_frame): level=1) # wrong length index / columns - with tm.assert_raises_regex(ValueError, "^Index length"): + with pytest.raises(ValueError, match="^Index length"): SparseDataFrame(float_frame.values, index=float_frame.index[:-1]) - with tm.assert_raises_regex(ValueError, "^Column length"): + with pytest.raises(ValueError, match="^Column length"): SparseDataFrame(float_frame.values, columns=float_frame.columns[:-1]) @@ -638,7 +638,7 @@ def test_set_index(self, float_frame): def test_ctor_reindex(self): idx = pd.Index([0, 1, 2, 3]) - with tm.assert_raises_regex(ValueError, ''): + with pytest.raises(ValueError, match=''): pd.SparseDataFrame({"A": [1, 2]}, index=idx) def test_append(self, float_frame): @@ -870,8 +870,7 @@ def test_join(self, float_frame): right = float_frame.loc[:, ['B', 'D']] pytest.raises(Exception, left.join, right) - with tm.assert_raises_regex(ValueError, - 'Other Series must have a name'): + with pytest.raises(ValueError, match='Other Series must have a name'): float_frame.join(Series( np.random.randn(len(float_frame)), index=float_frame.index)) @@ -1130,7 +1129,8 @@ def test_numpy_transpose(self): tm.assert_sp_frame_equal(result, sdf) msg = "the 'axes' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.transpose, sdf, axes=1) + with pytest.raises(ValueError, match=msg): + np.transpose(sdf, axes=1) def test_combine_first(self, float_frame): df = float_frame @@ -1300,12 +1300,12 @@ def test_numpy_cumsum(self, float_frame): tm.assert_sp_frame_equal(result, expected) msg = "the 'dtype' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.cumsum, - float_frame, dtype=np.int64) + with pytest.raises(ValueError, match=msg): + np.cumsum(float_frame, dtype=np.int64) msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.cumsum, - float_frame, out=result) + with pytest.raises(ValueError, match=msg): + np.cumsum(float_frame, out=result) def test_numpy_func_call(self, float_frame): # no exception should be raised even though diff --git a/pandas/tests/sparse/series/test_series.py b/pandas/tests/sparse/series/test_series.py index 6a5821519866e..9c7dbd85edcbb 100644 --- a/pandas/tests/sparse/series/test_series.py +++ b/pandas/tests/sparse/series/test_series.py @@ -553,12 +553,12 @@ def test_numpy_take(self): np.take(sp.to_dense(), indices, axis=0)) msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.take, - sp, indices, out=np.empty(sp.shape)) + with pytest.raises(ValueError, match=msg): + np.take(sp, indices, out=np.empty(sp.shape)) msg = "the 'mode' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.take, - sp, indices, out=None, mode='clip') + with pytest.raises(ValueError, match=msg): + np.take(sp, indices, out=None, mode='clip') def test_setitem(self): self.bseries[5] = 7. @@ -776,9 +776,9 @@ def _check_all(values, first, second): first_series = SparseSeries(values1, sparse_index=IntIndex(length, index1), fill_value=nan) - with tm.assert_raises_regex(TypeError, - 'new index must be a SparseIndex'): - reindexed = first_series.sparse_reindex(0) # noqa + with pytest.raises(TypeError, + match='new index must be a SparseIndex'): + first_series.sparse_reindex(0) def test_repr(self): # TODO: These aren't used @@ -870,7 +870,7 @@ def _check_matches(indices, expected): # must have NaN fill value data = {'a': SparseSeries(np.arange(7), sparse_index=expected2, fill_value=0)} - with tm.assert_raises_regex(TypeError, "NaN fill value"): + with pytest.raises(TypeError, match="NaN fill value"): spf.homogenize(data) def test_fill_value_corner(self): @@ -1444,7 +1444,7 @@ def test_cumsum(self): axis = 1 # Series is 1-D, so only axis = 0 is valid. msg = "No axis named {axis}".format(axis=axis) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.bseries.cumsum(axis=axis) def test_numpy_cumsum(self): @@ -1457,12 +1457,12 @@ def test_numpy_cumsum(self): tm.assert_series_equal(result, expected) msg = "the 'dtype' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.cumsum, - self.bseries, dtype=np.int64) + with pytest.raises(ValueError, match=msg): + np.cumsum(self.bseries, dtype=np.int64) msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.cumsum, - self.zbseries, out=result) + with pytest.raises(ValueError, match=msg): + np.cumsum(self.zbseries, out=result) def test_numpy_func_call(self): # no exception should be raised even though @@ -1520,7 +1520,7 @@ def test_to_sparse(): def test_constructor_mismatched_raises(): msg = "Length of passed values is 2, index implies 3" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): SparseSeries([1, 2], index=[1, 2, 3]) diff --git a/pandas/tests/sparse/test_indexing.py b/pandas/tests/sparse/test_indexing.py index 1ecb1f4e8de58..fb10473ec78a8 100644 --- a/pandas/tests/sparse/test_indexing.py +++ b/pandas/tests/sparse/test_indexing.py @@ -451,7 +451,7 @@ def tests_indexing_with_sparse(self, kind, fill): msg = ("iLocation based boolean indexing cannot " "use an indexable as a mask") - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): s.iloc[indexer] diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index d491df587fb4a..3642c4ee98a9e 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1328,7 +1328,7 @@ def test_vector_resize(self, writable, htable, uniques, dtype, if safely_resizes: htable.get_labels(vals, uniques, 0, -1) else: - with tm.assert_raises_regex(ValueError, 'external reference.*'): + with pytest.raises(ValueError, match='external reference.*'): htable.get_labels(vals, uniques, 0, -1) uniques.to_array() # should not raise here @@ -1459,7 +1459,7 @@ def test_too_many_ndims(self): arr = np.array([[[1, 2, 3], [4, 5, 6], [7, 8, 9]]]) msg = "Array with ndim > 2 are not supported" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): algos.rank(arr) @@ -1664,27 +1664,27 @@ def test_int64_add_overflow(): m = np.iinfo(np.int64).max n = np.iinfo(np.int64).min - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): algos.checked_add_with_arr(np.array([m, m]), m) - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): algos.checked_add_with_arr(np.array([m, m]), np.array([m, m])) - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): algos.checked_add_with_arr(np.array([n, n]), n) - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): algos.checked_add_with_arr(np.array([n, n]), np.array([n, n])) - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): algos.checked_add_with_arr(np.array([m, n]), np.array([n, n])) - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]), arr_mask=np.array([False, True])) - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]), b_mask=np.array([False, True])) - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]), arr_mask=np.array([False, True]), b_mask=np.array([False, True])) - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): with tm.assert_produces_warning(RuntimeWarning): algos.checked_add_with_arr(np.array([m, m]), np.array([np.nan, m])) @@ -1692,19 +1692,13 @@ def test_int64_add_overflow(): # Check that the nan boolean arrays override whether or not # the addition overflows. We don't check the result but just # the fact that an OverflowError is not raised. - with pytest.raises(AssertionError): - with tm.assert_raises_regex(OverflowError, msg): - algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]), - arr_mask=np.array([True, True])) - with pytest.raises(AssertionError): - with tm.assert_raises_regex(OverflowError, msg): - algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]), - b_mask=np.array([True, True])) - with pytest.raises(AssertionError): - with tm.assert_raises_regex(OverflowError, msg): - algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]), - arr_mask=np.array([True, False]), - b_mask=np.array([False, True])) + algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]), + arr_mask=np.array([True, True])) + algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]), + b_mask=np.array([True, True])) + algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]), + arr_mask=np.array([True, False]), + b_mask=np.array([False, True])) class TestMode(object): diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 07d357b70f94b..084477d8202b1 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -47,9 +47,9 @@ class CheckImmutable(object): mutable_regex = re.compile('does not support mutable operations') def check_mutable_error(self, *args, **kwargs): - # Pass whatever function you normally would to assert_raises_regex + # Pass whatever function you normally would to pytest.raises # (after the Exception kind). - tm.assert_raises_regex( + pytest.raises( TypeError, self.mutable_regex, *args, **kwargs) def test_no_mutable_funcs(self): @@ -848,9 +848,9 @@ def test_duplicated_drop_duplicates_index(self): result = idx.drop_duplicates(keep=False) tm.assert_index_equal(result, idx[~expected]) - with tm.assert_raises_regex( - TypeError, r"drop_duplicates\(\) got an unexpected " - "keyword argument"): + with pytest.raises(TypeError, + match=(r"drop_duplicates\(\) got an " + r"unexpected keyword argument")): idx.drop_duplicates(inplace=True) else: @@ -1036,10 +1036,10 @@ def test_transpose(self): def test_transpose_non_default_axes(self): for obj in self.objs: - tm.assert_raises_regex(ValueError, self.errmsg, - obj.transpose, 1) - tm.assert_raises_regex(ValueError, self.errmsg, - obj.transpose, axes=1) + with pytest.raises(ValueError, match=self.errmsg): + obj.transpose(1) + with pytest.raises(ValueError, match=self.errmsg): + obj.transpose(axes=1) def test_numpy_transpose(self): for obj in self.objs: @@ -1048,8 +1048,8 @@ def test_numpy_transpose(self): else: tm.assert_series_equal(np.transpose(obj), obj) - tm.assert_raises_regex(ValueError, self.errmsg, - np.transpose, obj, axes=1) + with pytest.raises(ValueError, match=self.errmsg): + np.transpose(obj, axes=1) class TestNoNewAttributesMixin(object): diff --git a/pandas/tests/test_errors.py b/pandas/tests/test_errors.py index fbc0faa4c929a..c5ea69b5ec46f 100644 --- a/pandas/tests/test_errors.py +++ b/pandas/tests/test_errors.py @@ -4,7 +4,6 @@ import pandas # noqa import pandas as pd from pandas.errors import AbstractMethodError -import pandas.util.testing as tm @pytest.mark.parametrize( @@ -62,13 +61,13 @@ def method(self): def test_AbstractMethodError_classmethod(): xpr = "This classmethod must be defined in the concrete class Foo" - with tm.assert_raises_regex(AbstractMethodError, xpr): + with pytest.raises(AbstractMethodError, match=xpr): Foo.classmethod() xpr = "This property must be defined in the concrete class Foo" - with tm.assert_raises_regex(AbstractMethodError, xpr): + with pytest.raises(AbstractMethodError, match=xpr): Foo().property xpr = "This method must be defined in the concrete class Foo" - with tm.assert_raises_regex(AbstractMethodError, xpr): + with pytest.raises(AbstractMethodError, match=xpr): Foo().method() diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index a7b9bf9c9a351..4a61ce930cbab 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -379,22 +379,22 @@ def test_bool_ops_raise_on_arithmetic(self): f = getattr(operator, name) err_msg = re.escape(msg % op) - with tm.assert_raises_regex(NotImplementedError, err_msg): + with pytest.raises(NotImplementedError, match=err_msg): f(df, df) - with tm.assert_raises_regex(NotImplementedError, err_msg): + with pytest.raises(NotImplementedError, match=err_msg): f(df.a, df.b) - with tm.assert_raises_regex(NotImplementedError, err_msg): + with pytest.raises(NotImplementedError, match=err_msg): f(df.a, True) - with tm.assert_raises_regex(NotImplementedError, err_msg): + with pytest.raises(NotImplementedError, match=err_msg): f(False, df.a) - with tm.assert_raises_regex(NotImplementedError, err_msg): + with pytest.raises(NotImplementedError, match=err_msg): f(False, df) - with tm.assert_raises_regex(NotImplementedError, err_msg): + with pytest.raises(NotImplementedError, match=err_msg): f(df, True) def test_bool_ops_warn_on_arithmetic(self): diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 9829c04ea108f..2717b92e05a29 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -325,7 +325,7 @@ def test_frame_getitem_setitem_boolean(self): np.putmask(values[:-1], values[:-1] < 0, 2) tm.assert_almost_equal(df.values, values) - with tm.assert_raises_regex(TypeError, 'boolean values only'): + with pytest.raises(TypeError, match='boolean values only'): df[df * 0] = 2 def test_frame_getitem_setitem_slice(self): @@ -772,8 +772,8 @@ def _check_counts(frame, axis=0): # can't call with level on regular DataFrame df = tm.makeTimeDataFrame() - tm.assert_raises_regex( - TypeError, 'hierarchical', df.count, level=0) + with pytest.raises(TypeError, match='hierarchical'): + df.count(level=0) self.frame['D'] = 'foo' result = self.frame.count(level=0, numeric_only=True) @@ -809,10 +809,9 @@ def test_count_level_corner(self): tm.assert_frame_equal(result, expected) def test_get_level_number_out_of_bounds(self): - with tm.assert_raises_regex(IndexError, "Too many levels"): + with pytest.raises(IndexError, match="Too many levels"): self.frame.index._get_level_number(2) - with tm.assert_raises_regex(IndexError, - "not a valid level number"): + with pytest.raises(IndexError, match="not a valid level number"): self.frame.index._get_level_number(-3) def test_unstack(self): @@ -1029,17 +1028,16 @@ def test_stack_names_and_numbers(self): unstacked = self.ymd.unstack(['year', 'month']) # Can't use mixture of names and numbers to stack - with tm.assert_raises_regex(ValueError, "level should contain"): + with pytest.raises(ValueError, match="level should contain"): unstacked.stack([0, 'month']) def test_stack_multiple_out_of_bounds(self): # nlevels == 3 unstacked = self.ymd.unstack(['year', 'month']) - with tm.assert_raises_regex(IndexError, "Too many levels"): + with pytest.raises(IndexError, match="Too many levels"): unstacked.stack([2, 3]) - with tm.assert_raises_regex(IndexError, - "not a valid level number"): + with pytest.raises(IndexError, match="not a valid level number"): unstacked.stack([-4, -3]) def test_unstack_period_series(self): @@ -1327,10 +1325,10 @@ def test_reorder_levels(self): expected = self.ymd.T.swaplevel(0, 1, axis=1).swaplevel(1, 2, axis=1) tm.assert_frame_equal(result, expected) - with tm.assert_raises_regex(TypeError, 'hierarchical axis'): + with pytest.raises(TypeError, match='hierarchical axis'): self.ymd.reorder_levels([1, 2], axis=1) - with tm.assert_raises_regex(IndexError, 'Too many levels'): + with pytest.raises(IndexError, match='Too many levels'): self.ymd.index.reorder_levels([1, 2, 3]) def test_insert_index(self): @@ -2351,9 +2349,9 @@ def test_reset_index_multiindex_columns(self): tm.assert_frame_equal(result, df) # gh-16120: already existing column - with tm.assert_raises_regex(ValueError, - (r"cannot insert \('A', ''\), " - "already exists")): + with pytest.raises(ValueError, + match=(r"cannot insert \('A', ''\), " + "already exists")): df.rename_axis('A').reset_index() # gh-16164: multiindex (tuple) full key @@ -2368,9 +2366,9 @@ def test_reset_index_multiindex_columns(self): tm.assert_frame_equal(result, expected) # with index name which is a too long tuple... - with tm.assert_raises_regex(ValueError, - ("Item must have length equal to number " - "of levels.")): + with pytest.raises(ValueError, + match=("Item must have length equal " + "to number of levels.")): df.rename_axis([('C', 'c', 'i')]).reset_index() # or too short... @@ -2384,9 +2382,9 @@ def test_reset_index_multiindex_columns(self): tm.assert_frame_equal(result, expected) # ... which is incompatible with col_fill=None - with tm.assert_raises_regex(ValueError, - ("col_fill=None is incompatible with " - r"incomplete column name \('C', 'c'\)")): + with pytest.raises(ValueError, + match=("col_fill=None is incompatible with " + r"incomplete column name \('C', 'c'\)")): df2.rename_axis([('C', 'c')]).reset_index(col_fill=None) # with col_level != 0 diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 775fcc2684f42..bc644071e914f 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -177,8 +177,8 @@ def wrapper(x): # Unimplemented numeric_only parameter. if 'numeric_only' in signature(f).args: - tm.assert_raises_regex(NotImplementedError, name, f, - numeric_only=True) + with pytest.raises(NotImplementedError, match=name): + f(numeric_only=True) @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") @@ -221,10 +221,10 @@ def test_get_axis_number(self): assert self.panel._get_axis_number('major') == 1 assert self.panel._get_axis_number('minor') == 2 - with tm.assert_raises_regex(ValueError, "No axis named foo"): + with pytest.raises(ValueError, match="No axis named foo"): self.panel._get_axis_number('foo') - with tm.assert_raises_regex(ValueError, "No axis named foo"): + with pytest.raises(ValueError, match="No axis named foo"): self.panel.__ge__(self.panel, axis='foo') def test_get_axis_name(self): @@ -502,10 +502,9 @@ def test_setitem(self): # bad shape p = Panel(np.random.randn(4, 3, 2)) - with tm.assert_raises_regex(ValueError, - r"shape of value must be " - r"\(3, 2\), shape of given " - r"object was \(4, 2\)"): + msg = (r"shape of value must be \(3, 2\), " + r"shape of given object was \(4, 2\)") + with pytest.raises(ValueError, match=msg): p[0] = np.random.randn(4, 2) def test_setitem_ndarray(self): @@ -853,9 +852,8 @@ def test_get_value(self): assert_almost_equal(result, expected) with catch_warnings(): simplefilter("ignore", FutureWarning) - with tm.assert_raises_regex(TypeError, - "There must be an argument " - "for each axis"): + msg = "There must be an argument for each axis" + with pytest.raises(TypeError, match=msg): self.panel.get_value('a') def test_set_value(self): @@ -880,7 +878,7 @@ def test_set_value(self): msg = ("There must be an argument for each " "axis plus the value provided") - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): self.panel.set_value('a') @@ -1015,7 +1013,8 @@ def _check_dtype(panel, dtype): _check_dtype(panel, dtype) def test_constructor_fails_with_not_3d_input(self): - with tm.assert_raises_regex(ValueError, "The number of dimensions required is 3"): # noqa + msg = "The number of dimensions required is 3" + with pytest.raises(ValueError, match=msg): Panel(np.random.randn(10, 2)) def test_consolidate(self): @@ -1144,36 +1143,24 @@ def test_from_dict_mixed_orient(self): assert panel['A'].values.dtype == np.float64 def test_constructor_error_msgs(self): - def testit(): + msg = (r"Shape of passed values is \(3, 4, 5\), " + r"indices imply \(4, 5, 5\)") + with pytest.raises(ValueError, match=msg): Panel(np.random.randn(3, 4, 5), lrange(4), lrange(5), lrange(5)) - tm.assert_raises_regex(ValueError, - r"Shape of passed values is " - r"\(3, 4, 5\), indices imply " - r"\(4, 5, 5\)", - testit) - - def testit(): + msg = (r"Shape of passed values is \(3, 4, 5\), " + r"indices imply \(5, 4, 5\)") + with pytest.raises(ValueError, match=msg): Panel(np.random.randn(3, 4, 5), lrange(5), lrange(4), lrange(5)) - tm.assert_raises_regex(ValueError, - r"Shape of passed values is " - r"\(3, 4, 5\), indices imply " - r"\(5, 4, 5\)", - testit) - - def testit(): + msg = (r"Shape of passed values is \(3, 4, 5\), " + r"indices imply \(5, 5, 4\)") + with pytest.raises(ValueError, match=msg): Panel(np.random.randn(3, 4, 5), lrange(5), lrange(5), lrange(4)) - tm.assert_raises_regex(ValueError, - r"Shape of passed values is " - r"\(3, 4, 5\), indices imply " - r"\(5, 5, 4\)", - testit) - def test_conform(self): df = self.panel['ItemA'][:-5].filter(items=['A', 'B']) conformed = self.panel.conform(df) @@ -1634,12 +1621,12 @@ def test_transpose(self): assert_panel_equal(result, expected) # duplicate axes - with tm.assert_raises_regex(TypeError, - 'not enough/duplicate arguments'): + with pytest.raises(TypeError, + match='not enough/duplicate arguments'): self.panel.transpose('minor', maj='major', minor='items') - with tm.assert_raises_regex(ValueError, - 'repeated axis in transpose'): + with pytest.raises(ValueError, + match='repeated axis in transpose'): self.panel.transpose('minor', 'major', major='minor', minor='items') @@ -1833,8 +1820,9 @@ def test_to_panel_duplicates(self): # #2441 df = DataFrame({'a': [0, 0, 1], 'b': [1, 1, 1], 'c': [1, 2, 3]}) idf = df.set_index(['a', 'b']) - tm.assert_raises_regex( - ValueError, 'non-uniquely indexed', idf.to_panel) + + with pytest.raises(ValueError, match='non-uniquely indexed'): + idf.to_panel() def test_panel_dups(self): @@ -1954,8 +1942,8 @@ def test_tshift(self): shifted3 = ps.tshift(freq=BDay()) assert_panel_equal(shifted, shifted3) - tm.assert_raises_regex(ValueError, 'does not match', - ps.tshift, freq='M') + with pytest.raises(ValueError, match='does not match'): + ps.tshift(freq='M') # DatetimeIndex panel = make_test_panel() @@ -2067,7 +2055,8 @@ def test_numpy_round(self): assert_panel_equal(expected, result) msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.round, p, out=p) + with pytest.raises(ValueError, match=msg): + np.round(p, out=p) # removing Panel before NumPy enforces, so just ignore @pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning") @@ -2493,8 +2482,8 @@ def test_to_string(self): def test_to_sparse(self): if isinstance(self.panel, Panel): msg = 'sparsifying is not supported' - tm.assert_raises_regex(NotImplementedError, msg, - self.panel.to_sparse) + with pytest.raises(NotImplementedError, match=msg): + self.panel.to_sparse def test_truncate(self): dates = self.panel.index.levels[0] diff --git a/pandas/tests/test_register_accessor.py b/pandas/tests/test_register_accessor.py index 33b9798b7606a..acc18ed7ad049 100644 --- a/pandas/tests/test_register_accessor.py +++ b/pandas/tests/test_register_accessor.py @@ -85,5 +85,5 @@ class Bad(object): def __init__(self, data): raise AttributeError("whoops") - with tm.assert_raises_regex(AttributeError, "whoops"): + with pytest.raises(AttributeError, match="whoops"): pd.Series([]).bad diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index ed29e20fd5ca5..756385f0cfb56 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -158,7 +158,7 @@ def test_select_bad_cols(self): pytest.raises(KeyError, g.__getitem__, ['D']) pytest.raises(KeyError, g.__getitem__, ['A', 'D']) - with tm.assert_raises_regex(KeyError, '^[^A]+$'): + with pytest.raises(KeyError, match='^[^A]+$'): # A should not be referenced as a bad column... # will have to rethink regex if you change message! g[['A', 'D']] @@ -940,11 +940,10 @@ def test_numpy_compat(self): for func in ('min', 'max', 'sum', 'prod', 'mean', 'var', 'std'): - tm.assert_raises_regex(UnsupportedFunctionCall, msg, - getattr(r, func), - func, 1, 2, 3) - tm.assert_raises_regex(UnsupportedFunctionCall, msg, - getattr(r, func), axis=1) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(r, func)(func, 1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(r, func)(axis=1) def test_resample_how_callables(self): # GH 7929 @@ -3308,11 +3307,10 @@ def test_fails_on_no_datetime_index(self): for name, func in zip(index_names, index_funcs): index = func(n) df = DataFrame({'a': np.random.randn(n)}, index=index) - with tm.assert_raises_regex(TypeError, - "Only valid with " - "DatetimeIndex, TimedeltaIndex " - "or PeriodIndex, but got an " - "instance of %r" % name): + + msg = ("Only valid with DatetimeIndex, TimedeltaIndex " + "or PeriodIndex, but got an instance of %r" % name) + with pytest.raises(TypeError, match=msg): df.groupby(TimeGrouper('D')) def test_aaa_group_order(self): diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py index aa5d0016eca95..22e758a0e59a7 100644 --- a/pandas/tests/test_sorting.py +++ b/pandas/tests/test_sorting.py @@ -423,14 +423,14 @@ def test_unsortable(self): pytest.raises(TypeError, safe_sort, arr) def test_exceptions(self): - with tm.assert_raises_regex(TypeError, - "Only list-like objects are allowed"): + with pytest.raises(TypeError, + match="Only list-like objects are allowed"): safe_sort(values=1) - with tm.assert_raises_regex(TypeError, - "Only list-like objects or None"): + with pytest.raises(TypeError, + match="Only list-like objects or None"): safe_sort(values=[0, 1, 2], labels=1) - with tm.assert_raises_regex(ValueError, - "values should be unique"): + with pytest.raises(ValueError, + match="values should be unique"): safe_sort(values=[0, 1, 2, 1], labels=[0, 1]) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index f0873eb7683e9..7cd9182b4dff4 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -36,8 +36,7 @@ def test_api(self): # GH 9184 invalid = Series([1]) - with tm.assert_raises_regex(AttributeError, - "only use .str accessor"): + with pytest.raises(AttributeError, match="only use .str accessor"): invalid.str assert not hasattr(invalid, 'str') @@ -143,13 +142,13 @@ def test_str_cat(self, box): rgx = 'All arrays must be same length, except those having an index.*' z = Series(['1', '2', '3']) - with tm.assert_raises_regex(ValueError, rgx): + with pytest.raises(ValueError, match=rgx): s.str.cat(z) - with tm.assert_raises_regex(ValueError, rgx): + with pytest.raises(ValueError, match=rgx): s.str.cat(z.values) - with tm.assert_raises_regex(ValueError, rgx): + with pytest.raises(ValueError, match=rgx): s.str.cat(list(z)) @pytest.mark.parametrize('box', [Series, Index]) @@ -157,9 +156,9 @@ def test_str_cat_raises_intuitive_error(self, box): # GH 11334 s = box(['a', 'b', 'c', 'd']) message = "Did you mean to supply a `sep` keyword?" - with tm.assert_raises_regex(ValueError, message): + with pytest.raises(ValueError, match=message): s.str.cat('|') - with tm.assert_raises_regex(ValueError, message): + with pytest.raises(ValueError, match=message): s.str.cat(' ') @pytest.mark.parametrize('sep', ['', None]) @@ -262,23 +261,23 @@ def test_str_cat_mixed_inputs(self, box): e = concat([z, z], axis=1) # DataFrame - with tm.assert_raises_regex(ValueError, rgx): + with pytest.raises(ValueError, match=rgx): s.str.cat(e) # two-dimensional ndarray - with tm.assert_raises_regex(ValueError, rgx): + with pytest.raises(ValueError, match=rgx): s.str.cat(e.values) # list of Series - with tm.assert_raises_regex(ValueError, rgx): + with pytest.raises(ValueError, match=rgx): s.str.cat([z, s]) # list of list-likes - with tm.assert_raises_regex(ValueError, rgx): + with pytest.raises(ValueError, match=rgx): s.str.cat([z.values, s.values]) # mixed list of Series/list-like - with tm.assert_raises_regex(ValueError, rgx): + with pytest.raises(ValueError, match=rgx): s.str.cat([z.values, s]) # errors for incorrect arguments in list-like @@ -287,33 +286,33 @@ def test_str_cat_mixed_inputs(self, box): u = Series(['a', np.nan, 'c', None]) # mix of string and Series - with tm.assert_raises_regex(TypeError, rgx): + with pytest.raises(TypeError, match=rgx): s.str.cat([u, 'u']) # DataFrame in list - with tm.assert_raises_regex(TypeError, rgx): + with pytest.raises(TypeError, match=rgx): s.str.cat([u, d]) # 2-dim ndarray in list - with tm.assert_raises_regex(TypeError, rgx): + with pytest.raises(TypeError, match=rgx): s.str.cat([u, d.values]) # nested lists - with tm.assert_raises_regex(TypeError, rgx): + with pytest.raises(TypeError, match=rgx): s.str.cat([u, [u, d]]) # forbidden input type: set # GH 23009 - with tm.assert_raises_regex(TypeError, rgx): + with pytest.raises(TypeError, match=rgx): s.str.cat(set(u)) # forbidden input type: set in list # GH 23009 - with tm.assert_raises_regex(TypeError, rgx): + with pytest.raises(TypeError, match=rgx): s.str.cat([u, set(u)]) # other forbidden input type, e.g. int - with tm.assert_raises_regex(TypeError, rgx): + with pytest.raises(TypeError, match=rgx): s.str.cat(1) @pytest.mark.parametrize('join', ['left', 'outer', 'inner', 'right']) @@ -371,17 +370,17 @@ def test_str_cat_align_mixed_inputs(self, join): z = Series(['1', '2', '3']).values # unindexed object of wrong length - with tm.assert_raises_regex(ValueError, rgx): + with pytest.raises(ValueError, match=rgx): s.str.cat(z, join=join) # unindexed object of wrong length in list - with tm.assert_raises_regex(ValueError, rgx): + with pytest.raises(ValueError, match=rgx): s.str.cat([t, z], join=join) def test_str_cat_raises(self): # non-strings hiding behind object dtype s = Series([1, 2, 3, 4], dtype='object') - with tm.assert_raises_regex(TypeError, "unsupported operand type.*"): + with pytest.raises(TypeError, match="unsupported operand type.*"): s.str.cat(s) def test_str_cat_special_cases(self): @@ -739,15 +738,15 @@ def test_replace_callable(self): r'(?(3)required )positional arguments?') repl = lambda: None - with tm.assert_raises_regex(TypeError, p_err): + with pytest.raises(TypeError, match=p_err): values.str.replace('a', repl) repl = lambda m, x: None - with tm.assert_raises_regex(TypeError, p_err): + with pytest.raises(TypeError, match=p_err): values.str.replace('a', repl) repl = lambda m, x, y=None: None - with tm.assert_raises_regex(TypeError, p_err): + with pytest.raises(TypeError, match=p_err): values.str.replace('a', repl) # test regex named groups @@ -800,16 +799,16 @@ def test_replace_compiled_regex(self): values = Series(['fooBAD__barBAD__bad', NA]) pat = re.compile(r'BAD[_]*') - with tm.assert_raises_regex(ValueError, - "case and flags cannot be"): + with pytest.raises(ValueError, + match="case and flags cannot be"): result = values.str.replace(pat, '', flags=re.IGNORECASE) - with tm.assert_raises_regex(ValueError, - "case and flags cannot be"): + with pytest.raises(ValueError, + match="case and flags cannot be"): result = values.str.replace(pat, '', case=False) - with tm.assert_raises_regex(ValueError, - "case and flags cannot be"): + with pytest.raises(ValueError, + match="case and flags cannot be"): result = values.str.replace(pat, '', case=True) # test with callable @@ -908,8 +907,8 @@ def test_match(self): def test_extract_expand_None(self): values = Series(['fooBAD__barBAD', NA, 'foo']) - with tm.assert_raises_regex(ValueError, - 'expand must be True or False'): + with pytest.raises(ValueError, + match='expand must be True or False'): values.str.extract('.*(BAD[_]+).*(BAD)', expand=None) def test_extract_expand_unspecified(self): @@ -948,7 +947,7 @@ def test_extract_expand_False(self): # Index only works with one regex group since # multi-group would expand to a frame idx = Index(['A1', 'A2', 'A3', 'A4', 'B5']) - with tm.assert_raises_regex(ValueError, "supported"): + with pytest.raises(ValueError, match="supported"): idx.str.extract('([AB])([123])', expand=False) # these should work for both Series and Index @@ -1446,7 +1445,7 @@ def test_extractall_errors(self): # no capture groups. (it returns DataFrame with one column for # each capture group) s = Series(['a3', 'b3', 'd4c2'], name='series_name') - with tm.assert_raises_regex(ValueError, "no capture groups"): + with pytest.raises(ValueError, match="no capture groups"): s.str.extractall(r'[a-z]') def test_extract_index_one_two_groups(self): @@ -1817,12 +1816,12 @@ def test_find(self): dtype=np.int64) tm.assert_numpy_array_equal(result.values, expected) - with tm.assert_raises_regex(TypeError, - "expected a string object, not int"): + with pytest.raises(TypeError, + match="expected a string object, not int"): result = values.str.find(0) - with tm.assert_raises_regex(TypeError, - "expected a string object, not int"): + with pytest.raises(TypeError, + match="expected a string object, not int"): result = values.str.rfind(0) def test_find_nan(self): @@ -1892,13 +1891,11 @@ def _check(result, expected): dtype=np.int64) tm.assert_numpy_array_equal(result.values, expected) - with tm.assert_raises_regex(ValueError, - "substring not found"): + with pytest.raises(ValueError, match="substring not found"): result = s.str.index('DE') - with tm.assert_raises_regex(TypeError, - "expected a string " - "object, not int"): + msg = "expected a string object, not int" + with pytest.raises(TypeError, match=msg): result = s.str.index(0) # test with nan @@ -1982,25 +1979,22 @@ def test_pad_fillchar(self): exp = Series(['XXaXX', 'XXbXX', NA, 'XXcXX', NA, 'eeeeee']) tm.assert_almost_equal(result, exp) - with tm.assert_raises_regex(TypeError, - "fillchar must be a " - "character, not str"): + msg = "fillchar must be a character, not str" + with pytest.raises(TypeError, match=msg): result = values.str.pad(5, fillchar='XY') - with tm.assert_raises_regex(TypeError, - "fillchar must be a " - "character, not int"): + msg = "fillchar must be a character, not int" + with pytest.raises(TypeError, match=msg): result = values.str.pad(5, fillchar=5) - def test_pad_width(self): - # GH 13598 + @pytest.mark.parametrize("f", ['center', 'ljust', 'rjust', 'zfill', 'pad']) + def test_pad_width(self, f): + # see gh-13598 s = Series(['1', '22', 'a', 'bb']) + msg = "width must be of integer type, not*" - for f in ['center', 'ljust', 'rjust', 'zfill', 'pad']: - with tm.assert_raises_regex(TypeError, - "width must be of " - "integer type, not*"): - getattr(s.str, f)('f') + with pytest.raises(TypeError, match=msg): + getattr(s.str, f)('f') def test_translate(self): @@ -2031,8 +2025,8 @@ def _check(result, expected): expected = klass(['abcde', 'abcc', 'cddd', 'cde']) _check(result, expected) else: - with tm.assert_raises_regex( - ValueError, "deletechars is not a valid argument"): + msg = "deletechars is not a valid argument" + with pytest.raises(ValueError, match=msg): result = s.str.translate(table, deletechars='fg') # Series with non-string values @@ -2120,35 +2114,25 @@ def test_center_ljust_rjust_fillchar(self): # If fillchar is not a charatter, normal str raises TypeError # 'aaa'.ljust(5, 'XY') # TypeError: must be char, not str - with tm.assert_raises_regex(TypeError, - "fillchar must be a " - "character, not str"): - result = values.str.center(5, fillchar='XY') - - with tm.assert_raises_regex(TypeError, - "fillchar must be a " - "character, not str"): - result = values.str.ljust(5, fillchar='XY') - - with tm.assert_raises_regex(TypeError, - "fillchar must be a " - "character, not str"): - result = values.str.rjust(5, fillchar='XY') - - with tm.assert_raises_regex(TypeError, - "fillchar must be a " - "character, not int"): - result = values.str.center(5, fillchar=1) - - with tm.assert_raises_regex(TypeError, - "fillchar must be a " - "character, not int"): - result = values.str.ljust(5, fillchar=1) - - with tm.assert_raises_regex(TypeError, - "fillchar must be a " - "character, not int"): - result = values.str.rjust(5, fillchar=1) + template = "fillchar must be a character, not {dtype}" + + with pytest.raises(TypeError, match=template.format(dtype="str")): + values.str.center(5, fillchar='XY') + + with pytest.raises(TypeError, match=template.format(dtype="str")): + values.str.ljust(5, fillchar='XY') + + with pytest.raises(TypeError, match=template.format(dtype="str")): + values.str.rjust(5, fillchar='XY') + + with pytest.raises(TypeError, match=template.format(dtype="int")): + values.str.center(5, fillchar=1) + + with pytest.raises(TypeError, match=template.format(dtype="int")): + values.str.ljust(5, fillchar=1) + + with pytest.raises(TypeError, match=template.format(dtype="int")): + values.str.rjust(5, fillchar=1) def test_zfill(self): values = Series(['1', '22', 'aaa', '333', '45678']) @@ -2342,7 +2326,7 @@ def test_split_to_dataframe(self): index=['preserve', 'me']) tm.assert_frame_equal(result, exp) - with tm.assert_raises_regex(ValueError, "expand must be"): + with pytest.raises(ValueError, match="expand must be"): s.str.split('_', expand="not_a_boolean") def test_split_to_multiindex_expand(self): @@ -2367,7 +2351,7 @@ def test_split_to_multiindex_expand(self): tm.assert_index_equal(result, exp) assert result.nlevels == 6 - with tm.assert_raises_regex(ValueError, "expand must be"): + with pytest.raises(ValueError, match="expand must be"): idx.str.split('_', expand="not_a_boolean") def test_rsplit_to_dataframe_expand(self): @@ -3038,8 +3022,7 @@ def test_normalize(self): result = s.str.normalize('NFC') tm.assert_series_equal(result, expected) - with tm.assert_raises_regex(ValueError, - "invalid normalization form"): + with pytest.raises(ValueError, match="invalid normalization form"): s.str.normalize('xxx') s = Index([u'ABC', u'123', u'アイエ']) @@ -3082,9 +3065,9 @@ def test_index_str_accessor_visibility(self): for values, tp in cases: idx = Index(values) message = 'Can only use .str accessor with string values' - with tm.assert_raises_regex(AttributeError, message): + with pytest.raises(AttributeError, match=message): Series(values).str - with tm.assert_raises_regex(AttributeError, message): + with pytest.raises(AttributeError, match=message): idx.str assert idx.inferred_type == tp @@ -3092,14 +3075,14 @@ def test_index_str_accessor_visibility(self): idx = MultiIndex.from_tuples([('a', 'b'), ('a', 'b')]) assert idx.inferred_type == 'mixed' message = 'Can only use .str accessor with Index, not MultiIndex' - with tm.assert_raises_regex(AttributeError, message): + with pytest.raises(AttributeError, match=message): idx.str def test_str_accessor_no_new_attributes(self): # https://github.com/pandas-dev/pandas/issues/10673 s = Series(list('aabbcde')) - with tm.assert_raises_regex(AttributeError, - "You cannot add any new attribute"): + with pytest.raises(AttributeError, + match="You cannot add any new attribute"): s.str.xlabel = "a" def test_method_on_bytes(self): diff --git a/pandas/tests/test_take.py b/pandas/tests/test_take.py index ade847923c083..69150ee3c5454 100644 --- a/pandas/tests/test_take.py +++ b/pandas/tests/test_take.py @@ -90,7 +90,7 @@ def test_1d_with_out(self, dtype_can_hold_na, writeable): expected[3] = np.nan tm.assert_almost_equal(out, expected) else: - with tm.assert_raises_regex(TypeError, self.fill_error): + with pytest.raises(TypeError, match=self.fill_error): algos.take_1d(data, indexer, out=out) # No Exception otherwise. @@ -146,8 +146,7 @@ def test_2d_with_out(self, dtype_can_hold_na, writeable): tm.assert_almost_equal(out1, expected1) else: for i, out in enumerate([out0, out1]): - with tm.assert_raises_regex(TypeError, - self.fill_error): + with pytest.raises(TypeError, match=self.fill_error): algos.take_nd(data, indexer, out=out, axis=i) # No Exception otherwise. @@ -226,8 +225,7 @@ def test_3d_with_out(self, dtype_can_hold_na): tm.assert_almost_equal(out2, expected2) else: for i, out in enumerate([out0, out1, out2]): - with tm.assert_raises_regex(TypeError, - self.fill_error): + with pytest.raises(TypeError, match=self.fill_error): algos.take_nd(data, indexer, out=out, axis=i) # No Exception otherwise. diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index c7cd04deac6c8..31ea5c11f5bd1 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -90,7 +90,7 @@ def test_select_bad_cols(self): pytest.raises(KeyError, g.__getitem__, ['C']) # g[['C']] pytest.raises(KeyError, g.__getitem__, ['A', 'C']) # g[['A', 'C']] - with tm.assert_raises_regex(KeyError, '^[^A]+$'): + with pytest.raises(KeyError, match='^[^A]+$'): # A should not be referenced as a bad column... # will have to rethink regex if you change message! g[['A', 'C']] @@ -116,7 +116,7 @@ def test_skip_sum_object_raises(self): df = DataFrame({'A': range(5), 'B': range(5, 10), 'C': 'foo'}) r = df.rolling(window=3) - with tm.assert_raises_regex(TypeError, 'cannot handle this type'): + with pytest.raises(TypeError, match='cannot handle this type'): r.sum() def test_agg(self): @@ -410,10 +410,10 @@ def test_numpy_compat(self, method): msg = "numpy operations are not valid with window objects" - tm.assert_raises_regex(UnsupportedFunctionCall, msg, - getattr(w, method), 1, 2, 3) - tm.assert_raises_regex(UnsupportedFunctionCall, msg, - getattr(w, method), dtype=np.float64) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(w, method)(1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(w, method)(dtype=np.float64) class TestRolling(Base): @@ -507,10 +507,10 @@ def test_numpy_compat(self, method): msg = "numpy operations are not valid with window objects" - tm.assert_raises_regex(UnsupportedFunctionCall, msg, - getattr(r, method), 1, 2, 3) - tm.assert_raises_regex(UnsupportedFunctionCall, msg, - getattr(r, method), dtype=np.float64) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(r, method)(1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(r, method)(dtype=np.float64) def test_closed(self): df = DataFrame({'A': [0, 1, 2, 3, 4]}) @@ -686,10 +686,10 @@ def test_numpy_compat(self, method): msg = "numpy operations are not valid with window objects" - tm.assert_raises_regex(UnsupportedFunctionCall, msg, - getattr(e, method), 1, 2, 3) - tm.assert_raises_regex(UnsupportedFunctionCall, msg, - getattr(e, method), dtype=np.float64) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(e, method)(1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(e, method)(dtype=np.float64) @pytest.mark.parametrize( 'expander', @@ -812,10 +812,10 @@ def test_numpy_compat(self, method): msg = "numpy operations are not valid with window objects" - tm.assert_raises_regex(UnsupportedFunctionCall, msg, - getattr(e, method), 1, 2, 3) - tm.assert_raises_regex(UnsupportedFunctionCall, msg, - getattr(e, method), dtype=np.float64) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(e, method)(1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(e, method)(dtype=np.float64) # gh-12373 : rolling functions error on float32 data @@ -1999,12 +1999,12 @@ def test_no_pairwise_with_other(self, f): tm.assert_index_equal(result.index, expected_index) tm.assert_index_equal(result.columns, expected_columns) else: - tm.assert_raises_regex( - ValueError, "'arg1' columns are not unique", f, df, - self.df2) - tm.assert_raises_regex( - ValueError, "'arg2' columns are not unique", f, - self.df2, df) + with pytest.raises(ValueError, + match="'arg1' columns are not unique"): + f(df, self.df2) + with pytest.raises(ValueError, + match="'arg2' columns are not unique"): + f(self.df2, df) @pytest.mark.parametrize( 'f', [lambda x, y: x.expanding().cov(y), diff --git a/pandas/tests/tools/test_numeric.py b/pandas/tests/tools/test_numeric.py index 43c7d0951bf6c..a0a1364f4617e 100644 --- a/pandas/tests/tools/test_numeric.py +++ b/pandas/tests/tools/test_numeric.py @@ -54,7 +54,7 @@ def test_series_numeric(self): def test_error(self): s = pd.Series([1, -3.14, 'apple']) msg = 'Unable to parse string "apple" at position 2' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): to_numeric(s, errors='raise') res = to_numeric(s, errors='ignore') @@ -67,13 +67,13 @@ def test_error(self): s = pd.Series(['orange', 1, -3.14, 'apple']) msg = 'Unable to parse string "orange" at position 0' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): to_numeric(s, errors='raise') def test_error_seen_bool(self): s = pd.Series([True, False, 'apple']) msg = 'Unable to parse string "apple" at position 2' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): to_numeric(s, errors='raise') res = to_numeric(s, errors='ignore') @@ -166,7 +166,7 @@ def test_type_check(self, errors): # see gh-11776 df = pd.DataFrame({"a": [1, -3.14, 7], "b": ["4", "5", "6"]}) kwargs = dict(errors=errors) if errors is not None else dict() - error_ctx = tm.assert_raises_regex(TypeError, "1-d array") + error_ctx = pytest.raises(TypeError, match="1-d array") with error_ctx: to_numeric(df, **kwargs) @@ -269,7 +269,7 @@ def test_non_hashable(self): res = pd.to_numeric(s, errors='ignore') tm.assert_series_equal(res, pd.Series([[10.0, 2], 1.0, 'apple'])) - with tm.assert_raises_regex(TypeError, "Invalid object type"): + with pytest.raises(TypeError, match="Invalid object type"): pd.to_numeric(s) @pytest.mark.parametrize("data", [ @@ -283,7 +283,7 @@ def test_downcast_basic(self, data): invalid_downcast = "unsigned-integer" msg = "invalid downcasting method provided" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): pd.to_numeric(data, downcast=invalid_downcast) expected = np.array([1, 2, 3], dtype=np.int64) @@ -436,5 +436,5 @@ def test_coerce_uint64_conflict(self): tm.assert_series_equal(result, s) msg = "Unable to parse string" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): to_numeric(s, errors="raise") diff --git a/pandas/tests/tseries/offsets/test_fiscal.py b/pandas/tests/tseries/offsets/test_fiscal.py index 223298dc42544..2f17a61917320 100644 --- a/pandas/tests/tseries/offsets/test_fiscal.py +++ b/pandas/tests/tseries/offsets/test_fiscal.py @@ -7,8 +7,6 @@ from dateutil.relativedelta import relativedelta import pytest -import pandas.util.testing as tm - from pandas import Timestamp from pandas.tseries.frequencies import get_offset from pandas._libs.tslibs.frequencies import INVALID_FREQ_ERR_MSG @@ -44,9 +42,9 @@ def test_get_offset_name(): def test_get_offset(): - with tm.assert_raises_regex(ValueError, INVALID_FREQ_ERR_MSG): + with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG): get_offset('gibberish') - with tm.assert_raises_regex(ValueError, INVALID_FREQ_ERR_MSG): + with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG): get_offset('QS-JAN-B') pairs = [ diff --git a/pandas/tests/tseries/test_frequencies.py b/pandas/tests/tseries/test_frequencies.py index f90c8e449f92c..a8def56aa06d4 100644 --- a/pandas/tests/tseries/test_frequencies.py +++ b/pandas/tests/tseries/test_frequencies.py @@ -105,8 +105,7 @@ def test_to_offset_multiple(self): assert (result == expected) # malformed - with tm.assert_raises_regex(ValueError, - 'Invalid frequency: 2h20m'): + with pytest.raises(ValueError, match='Invalid frequency: 2h20m'): frequencies.to_offset('2h20m') def test_to_offset_negative(self): @@ -128,23 +127,17 @@ def test_to_offset_negative(self): def test_to_offset_invalid(self): # GH 13930 - with tm.assert_raises_regex(ValueError, - 'Invalid frequency: U1'): + with pytest.raises(ValueError, match='Invalid frequency: U1'): frequencies.to_offset('U1') - with tm.assert_raises_regex(ValueError, - 'Invalid frequency: -U'): + with pytest.raises(ValueError, match='Invalid frequency: -U'): frequencies.to_offset('-U') - with tm.assert_raises_regex(ValueError, - 'Invalid frequency: 3U1'): + with pytest.raises(ValueError, match='Invalid frequency: 3U1'): frequencies.to_offset('3U1') - with tm.assert_raises_regex(ValueError, - 'Invalid frequency: -2-3U'): + with pytest.raises(ValueError, match='Invalid frequency: -2-3U'): frequencies.to_offset('-2-3U') - with tm.assert_raises_regex(ValueError, - 'Invalid frequency: -2D:3H'): + with pytest.raises(ValueError, match='Invalid frequency: -2D:3H'): frequencies.to_offset('-2D:3H') - with tm.assert_raises_regex(ValueError, - 'Invalid frequency: 1.5.0S'): + with pytest.raises(ValueError, match='Invalid frequency: 1.5.0S'): frequencies.to_offset('1.5.0S') # split offsets with spaces are valid @@ -157,11 +150,9 @@ def test_to_offset_invalid(self): # special cases assert frequencies.to_offset('2SMS-15') == offsets.SemiMonthBegin(2) - with tm.assert_raises_regex(ValueError, - 'Invalid frequency: 2SMS-15-15'): + with pytest.raises(ValueError, match='Invalid frequency: 2SMS-15-15'): frequencies.to_offset('2SMS-15-15') - with tm.assert_raises_regex(ValueError, - 'Invalid frequency: 2SMS-15D'): + with pytest.raises(ValueError, match='Invalid frequency: 2SMS-15D'): frequencies.to_offset('2SMS-15D') def test_to_offset_leading_zero(self): @@ -183,7 +174,7 @@ def test_to_offset_leading_plus(self): assert (result.n == 150) for bad_freq in ['+-1d', '-+1h', '+1', '-7', '+d', '-m']: - with tm.assert_raises_regex(ValueError, 'Invalid frequency:'): + with pytest.raises(ValueError, match='Invalid frequency:'): frequencies.to_offset(bad_freq) def test_to_offset_pd_timedelta(self): @@ -270,8 +261,7 @@ def test_anchored_shortcuts(self): 'SMS-BAR', 'SMS-BYR' 'BSMS', 'SMS--2'] for invalid_anchor in invalid_anchors: - with tm.assert_raises_regex(ValueError, - 'Invalid frequency: '): + with pytest.raises(ValueError, match='Invalid frequency: '): frequencies.to_offset(invalid_anchor) @@ -464,13 +454,13 @@ def test_frequency_misc(self): expected = offsets.Minute(5) assert result == expected - with tm.assert_raises_regex(ValueError, 'Invalid frequency'): + with pytest.raises(ValueError, match='Invalid frequency'): frequencies.get_freq_code((5, 'baz')) - with tm.assert_raises_regex(ValueError, 'Invalid frequency'): + with pytest.raises(ValueError, match='Invalid frequency'): frequencies.to_offset('100foo') - with tm.assert_raises_regex(ValueError, 'Could not evaluate'): + with pytest.raises(ValueError, match='Could not evaluate'): frequencies.to_offset(('', '')) @@ -799,8 +789,8 @@ def test_legacy_offset_warnings(self): msg = INVALID_FREQ_ERR_MSG for freq in freqs: - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): frequencies.get_offset(freq) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): date_range('2011-01-01', periods=5, freq=freq) diff --git a/pandas/tests/tslibs/test_libfrequencies.py b/pandas/tests/tslibs/test_libfrequencies.py index f4083dfb2bd1c..18840fe1fd9b9 100644 --- a/pandas/tests/tslibs/test_libfrequencies.py +++ b/pandas/tests/tslibs/test_libfrequencies.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -import pandas.util.testing as tm +import pytest from pandas.tseries import offsets from pandas._libs.tslibs.frequencies import (get_rule_month, @@ -14,7 +14,7 @@ def assert_aliases_deprecated(freq, expected, aliases): assert (_period_str_to_code(freq) == expected) for alias in aliases: - with tm.assert_raises_regex(ValueError, INVALID_FREQ_ERR_MSG): + with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG): _period_str_to_code(alias) diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index 466a22e5916e9..2762fb9cbe000 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -60,13 +60,13 @@ def test_does_not_convert_mixed_integer(self): def test_parsers_quarterly_with_freq(self): msg = ('Incorrect quarterly string is given, quarter ' 'must be between 1 and 4: 2013Q5') - with tm.assert_raises_regex(parsing.DateParseError, msg): + with pytest.raises(parsing.DateParseError, match=msg): parsing.parse_time_string('2013Q5') # GH 5418 msg = ('Unable to retrieve month information from given freq: ' 'INVLD-L-DEC-SAT') - with tm.assert_raises_regex(parsing.DateParseError, msg): + with pytest.raises(parsing.DateParseError, match=msg): parsing.parse_time_string('2013Q1', freq='INVLD-L-DEC-SAT') cases = {('2013Q2', None): datetime(2013, 4, 1), diff --git a/pandas/tests/util/test_hashing.py b/pandas/tests/util/test_hashing.py index 11dd2e98adda2..9f5b4f7b90d9f 100644 --- a/pandas/tests/util/test_hashing.py +++ b/pandas/tests/util/test_hashing.py @@ -47,7 +47,7 @@ def test_hash_array_mixed(self): @pytest.mark.parametrize('val', [5, 'foo', pd.Timestamp('20130101')]) def test_hash_array_errors(self, val): msg = 'must pass a ndarray-like' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): hash_array(val) def check_equal(self, obj, **kwargs): @@ -104,7 +104,7 @@ def test_hash_scalar(self, val): @pytest.mark.parametrize('val', [5, 'foo', pd.Timestamp('20130101')]) def test_hash_tuples_err(self, val): msg = 'must be convertible to a list-of-tuples' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): hash_tuples(val) def test_multiindex_unique(self): @@ -238,7 +238,7 @@ def test_hash_keys(self): def test_invalid_key(self): # this only matters for object dtypes msg = 'key should be a 16-byte string encoded' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): hash_pandas_object(Series(list('abc')), hash_key='foo') def test_alread_encoded(self): diff --git a/pandas/tests/util/test_testing.py b/pandas/tests/util/test_testing.py index c10ad72d39f8e..a886579ee913f 100644 --- a/pandas/tests/util/test_testing.py +++ b/pandas/tests/util/test_testing.py @@ -6,6 +6,7 @@ import numpy as np import pytest +from pandas.compat import raise_with_traceback import pandas.util._test_decorators as td import pandas as pd @@ -13,7 +14,7 @@ import pandas.util.testing as tm from pandas.util.testing import ( RNGContext, assert_almost_equal, assert_frame_equal, assert_index_equal, - assert_numpy_array_equal, assert_series_equal, raise_with_traceback) + assert_numpy_array_equal, assert_series_equal) class TestAssertAlmostEqual(object): @@ -152,13 +153,13 @@ def test_assert_almost_equal_object(self): class TestUtilTesting(object): def test_raise_with_traceback(self): - with tm.assert_raises_regex(LookupError, "error_text"): + with pytest.raises(LookupError, match="error_text"): try: raise ValueError("THIS IS AN ERROR") except ValueError as e: e = LookupError("error_text") raise_with_traceback(e) - with tm.assert_raises_regex(LookupError, "error_text"): + with pytest.raises(LookupError, match="error_text"): try: raise ValueError("This is another error") except ValueError: @@ -189,18 +190,18 @@ def test_numpy_array_equal_message(self): \\[left\\]: \\(2,\\) \\[right\\]: \\(3,\\)""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_numpy_array_equal(np.array([1, 2]), np.array([3, 4, 5])) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_almost_equal(np.array([1, 2]), np.array([3, 4, 5])) # scalar comparison expected = """Expected type """ - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_numpy_array_equal(1, 2) expected = """expected 2\\.00000 but got 1\\.00000, with decimal 5""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_almost_equal(1, 2) # array / scalar array comparison @@ -210,10 +211,10 @@ def test_numpy_array_equal_message(self): \\[left\\]: ndarray \\[right\\]: int""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): # numpy_array_equal only accepts np.ndarray assert_numpy_array_equal(np.array([1]), 1) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_almost_equal(np.array([1]), 1) # scalar / array comparison @@ -223,9 +224,9 @@ def test_numpy_array_equal_message(self): \\[left\\]: int \\[right\\]: ndarray""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_numpy_array_equal(1, np.array([1])) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_almost_equal(1, np.array([1])) expected = """numpy array are different @@ -234,10 +235,10 @@ def test_numpy_array_equal_message(self): \\[left\\]: \\[nan, 2\\.0, 3\\.0\\] \\[right\\]: \\[1\\.0, nan, 3\\.0\\]""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_numpy_array_equal(np.array([np.nan, 2, 3]), np.array([1, np.nan, 3])) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_almost_equal(np.array([np.nan, 2, 3]), np.array([1, np.nan, 3])) @@ -247,9 +248,9 @@ def test_numpy_array_equal_message(self): \\[left\\]: \\[1, 2\\] \\[right\\]: \\[1, 3\\]""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_numpy_array_equal(np.array([1, 2]), np.array([1, 3])) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_almost_equal(np.array([1, 2]), np.array([1, 3])) expected = """numpy array are different @@ -258,7 +259,7 @@ def test_numpy_array_equal_message(self): \\[left\\]: \\[1\\.1, 2\\.000001\\] \\[right\\]: \\[1\\.1, 2.0\\]""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_numpy_array_equal( np.array([1.1, 2.000001]), np.array([1.1, 2.0])) @@ -271,10 +272,10 @@ def test_numpy_array_equal_message(self): \\[left\\]: \\[\\[1, 2\\], \\[3, 4\\], \\[5, 6\\]\\] \\[right\\]: \\[\\[1, 3\\], \\[3, 4\\], \\[5, 6\\]\\]""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_numpy_array_equal(np.array([[1, 2], [3, 4], [5, 6]]), np.array([[1, 3], [3, 4], [5, 6]])) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_almost_equal(np.array([[1, 2], [3, 4], [5, 6]]), np.array([[1, 3], [3, 4], [5, 6]])) @@ -284,10 +285,10 @@ def test_numpy_array_equal_message(self): \\[left\\]: \\[\\[1, 2\\], \\[3, 4\\]\\] \\[right\\]: \\[\\[1, 3\\], \\[3, 4\\]\\]""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_numpy_array_equal(np.array([[1, 2], [3, 4]]), np.array([[1, 3], [3, 4]])) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_almost_equal(np.array([[1, 2], [3, 4]]), np.array([[1, 3], [3, 4]])) @@ -298,10 +299,10 @@ def test_numpy_array_equal_message(self): \\[left\\]: \\(2,\\) \\[right\\]: \\(3,\\)""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_numpy_array_equal(np.array([1, 2]), np.array([3, 4, 5]), obj='Index') - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_almost_equal(np.array([1, 2]), np.array([3, 4, 5]), obj='Index') @@ -316,10 +317,10 @@ def test_numpy_array_equal_unicode_message(self): \\[left\\]: \\[á, à, ä\\] \\[right\\]: \\[á, à, å\\]""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_numpy_array_equal(np.array([u'á', u'à', u'ä']), np.array([u'á', u'à', u'å'])) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_almost_equal(np.array([u'á', u'à', u'ä']), np.array([u'á', u'à', u'å'])) @@ -335,9 +336,9 @@ def test_numpy_array_equal_object_message(self): \\[left\\]: \\[2011-01-01 00:00:00, 2011-01-01 00:00:00\\] \\[right\\]: \\[2011-01-01 00:00:00, 2011-01-02 00:00:00\\]""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_numpy_array_equal(a, b) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_almost_equal(a, b) def test_numpy_array_equal_copy_flag(self): @@ -345,10 +346,10 @@ def test_numpy_array_equal_copy_flag(self): b = a.copy() c = a.view() expected = r'array\(\[1, 2, 3\]\) is not array\(\[1, 2, 3\]\)' - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_numpy_array_equal(a, b, check_same='same') expected = r'array\(\[1, 2, 3\]\) is array\(\[1, 2, 3\]\)' - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_numpy_array_equal(a, c, check_same='copy') def test_assert_almost_equal_iterable_message(self): @@ -359,7 +360,7 @@ def test_assert_almost_equal_iterable_message(self): \\[left\\]: 2 \\[right\\]: 3""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_almost_equal([1, 2], [3, 4, 5]) expected = """Iterable are different @@ -368,7 +369,7 @@ def test_assert_almost_equal_iterable_message(self): \\[left\\]: \\[1, 2\\] \\[right\\]: \\[1, 3\\]""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_almost_equal([1, 2], [1, 3]) @@ -386,7 +387,7 @@ def test_index_equal_message(self): idx1 = pd.Index([1, 2, 3]) idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2), ('B', 3), ('B', 4)]) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_index_equal(idx1, idx2, exact=False) expected = """MultiIndex level \\[1\\] are different @@ -399,9 +400,9 @@ def test_index_equal_message(self): ('B', 3), ('B', 4)]) idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2), ('B', 3), ('B', 4)]) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_index_equal(idx1, idx2) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_index_equal(idx1, idx2, check_exact=False) expected = """Index are different @@ -412,9 +413,9 @@ def test_index_equal_message(self): idx1 = pd.Index([1, 2, 3]) idx2 = pd.Index([1, 2, 3, 4]) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_index_equal(idx1, idx2) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_index_equal(idx1, idx2, check_exact=False) expected = """Index are different @@ -425,9 +426,9 @@ def test_index_equal_message(self): idx1 = pd.Index([1, 2, 3]) idx2 = pd.Index([1, 2, 3.0]) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_index_equal(idx1, idx2, exact=True) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_index_equal(idx1, idx2, exact=True, check_exact=False) expected = """Index are different @@ -438,7 +439,7 @@ def test_index_equal_message(self): idx1 = pd.Index([1, 2, 3.]) idx2 = pd.Index([1, 2, 3.0000000001]) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_index_equal(idx1, idx2) # must success @@ -452,9 +453,9 @@ def test_index_equal_message(self): idx1 = pd.Index([1, 2, 3.]) idx2 = pd.Index([1, 2, 3.0001]) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_index_equal(idx1, idx2) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_index_equal(idx1, idx2, check_exact=False) # must success assert_index_equal(idx1, idx2, check_exact=False, @@ -468,9 +469,9 @@ def test_index_equal_message(self): idx1 = pd.Index([1, 2, 3]) idx2 = pd.Index([1, 2, 4]) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_index_equal(idx1, idx2) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_index_equal(idx1, idx2, check_less_precise=True) expected = """MultiIndex level \\[1\\] are different @@ -483,9 +484,9 @@ def test_index_equal_message(self): ('B', 3), ('B', 4)]) idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2), ('B', 3), ('B', 4)]) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_index_equal(idx1, idx2) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_index_equal(idx1, idx2, check_exact=False) def test_index_equal_metadata_message(self): @@ -498,7 +499,7 @@ def test_index_equal_metadata_message(self): idx1 = pd.Index([1, 2, 3]) idx2 = pd.Index([1, 2, 3], name='x') - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_index_equal(idx1, idx2) # same name, should pass @@ -515,7 +516,7 @@ def test_index_equal_metadata_message(self): idx1 = pd.Index([1, 2, 3], name=np.nan) idx2 = pd.Index([1, 2, 3], name=pd.NaT) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_index_equal(idx1, idx2) def test_categorical_index_equality(self): @@ -526,7 +527,7 @@ def test_categorical_index_equality(self): \\[right\\]: CategoricalDtype\\(categories=\\[u?'a', u?'b', u?'c'\\], \ ordered=False\\)""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_index_equal(pd.Index(pd.Categorical(['a', 'b'])), pd.Index(pd.Categorical(['a', 'b'], categories=['a', 'b', 'c']))) @@ -619,7 +620,7 @@ def test_series_equal_message(self): \\[left\\]: 3, RangeIndex\\(start=0, stop=3, step=1\\) \\[right\\]: 4, RangeIndex\\(start=0, stop=4, step=1\\)""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_series_equal(pd.Series([1, 2, 3]), pd.Series([1, 2, 3, 4])) expected = """Series are different @@ -628,9 +629,9 @@ def test_series_equal_message(self): \\[left\\]: \\[1, 2, 3\\] \\[right\\]: \\[1, 2, 4\\]""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_series_equal(pd.Series([1, 2, 3]), pd.Series([1, 2, 4])) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_series_equal(pd.Series([1, 2, 3]), pd.Series([1, 2, 4]), check_less_precise=True) @@ -642,7 +643,7 @@ def test_categorical_series_equality(self): \\[right\\]: CategoricalDtype\\(categories=\\[u?'a', u?'b', u?'c'\\], \ ordered=False\\)""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_series_equal(pd.Series(pd.Categorical(['a', 'b'])), pd.Series(pd.Categorical(['a', 'b'], categories=['a', 'b', 'c']))) @@ -709,7 +710,7 @@ def test_frame_equal_message(self): \\[left\\]: \\(3, 2\\) \\[right\\]: \\(3, 1\\)""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}), pd.DataFrame({'A': [1, 2, 3]})) @@ -719,7 +720,7 @@ def test_frame_equal_message(self): \\[left\\]: Index\\(\\[u?'a', u?'b', u?'c'\\], dtype='object'\\) \\[right\\]: Index\\(\\[u?'a', u?'b', u?'d'\\], dtype='object'\\)""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=['a', 'b', 'c']), pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, @@ -731,7 +732,7 @@ def test_frame_equal_message(self): \\[left\\]: Index\\(\\[u?'A', u?'B'\\], dtype='object'\\) \\[right\\]: Index\\(\\[u?'A', u?'b'\\], dtype='object'\\)""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=['a', 'b', 'c']), pd.DataFrame({'A': [1, 2, 3], 'b': [4, 5, 6]}, @@ -743,11 +744,11 @@ def test_frame_equal_message(self): \\[left\\]: \\[4, 5, 6\\] \\[right\\]: \\[4, 5, 7\\]""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}), pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 7]})) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}), pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 7]}), by_blocks=True) @@ -763,13 +764,13 @@ def test_frame_equal_message_unicode(self): \\[left\\]: \\[é, è, ë\\] \\[right\\]: \\[é, è, e̊\\]""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_frame_equal(pd.DataFrame({'A': [u'á', u'à', u'ä'], 'E': [u'é', u'è', u'ë']}), pd.DataFrame({'A': [u'á', u'à', u'ä'], 'E': [u'é', u'è', u'e̊']})) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_frame_equal(pd.DataFrame({'A': [u'á', u'à', u'ä'], 'E': [u'é', u'è', u'ë']}), pd.DataFrame({'A': [u'á', u'à', u'ä'], @@ -782,13 +783,13 @@ def test_frame_equal_message_unicode(self): \\[left\\]: \\[á, à, ä\\] \\[right\\]: \\[a, a, a\\]""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_frame_equal(pd.DataFrame({'A': [u'á', u'à', u'ä'], 'E': [u'é', u'è', u'ë']}), pd.DataFrame({'A': ['a', 'a', 'a'], 'E': ['e', 'e', 'e']})) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_frame_equal(pd.DataFrame({'A': [u'á', u'à', u'ä'], 'E': [u'é', u'è', u'ë']}), pd.DataFrame({'A': ['a', 'a', 'a'], @@ -808,7 +809,7 @@ def test_categorical_equal_message(self): a = pd.Categorical([1, 2, 3, 4]) b = pd.Categorical([1, 2, 3, 5]) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): tm.assert_categorical_equal(a, b) expected = """Categorical\\.codes are different @@ -819,7 +820,7 @@ def test_categorical_equal_message(self): a = pd.Categorical([1, 2, 4, 3], categories=[1, 2, 3, 4]) b = pd.Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4]) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): tm.assert_categorical_equal(a, b) expected = """Categorical are different @@ -830,7 +831,7 @@ def test_categorical_equal_message(self): a = pd.Categorical([1, 2, 3, 4], ordered=False) b = pd.Categorical([1, 2, 3, 4], ordered=True) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): tm.assert_categorical_equal(a, b) @@ -845,7 +846,7 @@ def test_interval_array_equal_message(self): IntervalArray.left values are different \\(100.0 %\\) \\[left\\]: Int64Index\\(\\[0, 1, 2, 3\\], dtype='int64'\\) \\[right\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)""") - with tm.assert_raises_regex(AssertionError, msg): + with pytest.raises(AssertionError, match=msg): tm.assert_interval_array_equal(a, b) @@ -883,3 +884,13 @@ def test_create_temp_directory(): assert os.path.exists(path) assert os.path.isdir(path) assert not os.path.exists(path) + + +def test_assert_raises_regex_deprecated(): + # see gh-23592 + + with tm.assert_produces_warning(FutureWarning): + msg = "Not equal!" + + with tm.assert_raises_regex(AssertionError, msg): + assert 1 == 2, msg diff --git a/pandas/tests/util/test_util.py b/pandas/tests/util/test_util.py index 032ee5eb22aaa..a6cb54ee43909 100644 --- a/pandas/tests/util/test_util.py +++ b/pandas/tests/util/test_util.py @@ -108,7 +108,7 @@ class TestValidateArgs(object): def test_bad_min_fname_arg_count(self): msg = "'max_fname_arg_count' must be non-negative" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): validate_args(self.fname, (None,), -1, 'foo') def test_bad_arg_length_max_value_single(self): @@ -123,7 +123,7 @@ def test_bad_arg_length_max_value_single(self): .format(fname=self.fname, max_length=max_length, actual_length=actual_length)) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): validate_args(self.fname, args, min_fname_arg_count, compat_args) @@ -140,7 +140,7 @@ def test_bad_arg_length_max_value_multiple(self): .format(fname=self.fname, max_length=max_length, actual_length=actual_length)) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): validate_args(self.fname, args, min_fname_arg_count, compat_args) @@ -159,7 +159,7 @@ def test_not_all_defaults(self): arg_vals = (1, -1, 3) for i in range(1, 3): - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): validate_args(self.fname, arg_vals[:i], 2, compat_args) def test_validation(self): @@ -188,7 +188,7 @@ def test_bad_kwarg(self): r"keyword argument '{arg}'".format( fname=self.fname, arg=badarg)) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): validate_kwargs(self.fname, kwargs, compat_args) def test_not_all_none(self): @@ -209,7 +209,7 @@ def test_not_all_none(self): kwargs = dict(zip(kwarg_keys[:i], kwarg_vals[:i])) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): validate_kwargs(self.fname, kwargs, compat_args) def test_validation(self): @@ -228,11 +228,11 @@ def test_validate_bool_kwarg(self): for name in arg_names: for value in invalid_values: - with tm.assert_raises_regex(ValueError, - "For argument \"%s\" " - "expected type bool, " - "received type %s" % - (name, type(value).__name__)): + msg = ("For argument \"%s\" " + "expected type bool, " + "received type %s" % + (name, type(value).__name__)) + with pytest.raises(ValueError, match=msg): validate_bool_kwarg(value, name) for value in valid_values: @@ -255,7 +255,7 @@ def test_invalid_total_length_max_length_one(self): .format(fname=self.fname, max_length=max_length, actual_length=actual_length)) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): validate_args_and_kwargs(self.fname, args, kwargs, min_fname_arg_count, compat_args) @@ -273,7 +273,7 @@ def test_invalid_total_length_max_length_multiple(self): .format(fname=self.fname, max_length=max_length, actual_length=actual_length)) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): validate_args_and_kwargs(self.fname, args, kwargs, min_fname_arg_count, compat_args) @@ -292,17 +292,15 @@ def test_no_args_with_kwargs(self): args = () kwargs = {'foo': -5, bad_arg: 2} - tm.assert_raises_regex(ValueError, msg, - validate_args_and_kwargs, - self.fname, args, kwargs, - min_fname_arg_count, compat_args) + with pytest.raises(ValueError, match=msg): + validate_args_and_kwargs(self.fname, args, kwargs, + min_fname_arg_count, compat_args) args = (-5, 2) kwargs = {} - tm.assert_raises_regex(ValueError, msg, - validate_args_and_kwargs, - self.fname, args, kwargs, - min_fname_arg_count, compat_args) + with pytest.raises(ValueError, match=msg): + validate_args_and_kwargs(self.fname, args, kwargs, + min_fname_arg_count, compat_args) def test_duplicate_argument(self): min_fname_arg_count = 2 @@ -316,7 +314,7 @@ def test_duplicate_argument(self): msg = (r"{fname}\(\) got multiple values for keyword " r"argument '{arg}'".format(fname=self.fname, arg='foo')) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): validate_args_and_kwargs(self.fname, args, kwargs, min_fname_arg_count, compat_args) @@ -343,7 +341,7 @@ def test_cannot_create_instance_of_stolenbuffer(self): ``move_into_mutable_buffer`` which has a bunch of checks in it. """ msg = "cannot create 'pandas.util._move.stolenbuf' instances" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): stolenbuf() def test_more_than_one_ref(self): diff --git a/pandas/util/testing.py b/pandas/util/testing.py index c6457545038e0..fd7012c87040f 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -2541,6 +2541,9 @@ def assert_raises_regex(_exception, _regexp, _callable=None, for use by `re.search()`. This is a port of the `assertRaisesRegexp` function from unittest in Python 2.7. + .. deprecated:: 0.24.0 + Use `pytest.raises` instead. + Examples -------- >>> assert_raises_regex(ValueError, 'invalid literal for.*XYZ', int, 'XYZ') @@ -2570,6 +2573,10 @@ def assert_raises_regex(_exception, _regexp, _callable=None, AssertionError: "banana" does not match "'str' object does not support \ item assignment" """ + warnings.warn(("assert_raises_regex has been deprecated and will " + "be removed in the next release. Please use " + "`pytest.raises` instead."), FutureWarning, stacklevel=2) + manager = _AssertRaisesContextmanager(exception=_exception, regexp=_regexp) if _callable is not None: with manager: From cffdb0ec102948956f419d209069d1ec85529dba Mon Sep 17 00:00:00 2001 From: Markus Meier Date: Sun, 11 Nov 2018 02:05:54 +0100 Subject: [PATCH 083/122] DOC: Remove incorrect periods at the end of parameter types (#23600) --- pandas/core/dtypes/inference.py | 22 +++++++++++----------- pandas/core/generic.py | 4 ++-- pandas/core/indexes/datetimes.py | 2 +- pandas/core/indexes/multi.py | 2 +- pandas/core/indexes/range.py | 2 +- pandas/core/series.py | 2 +- pandas/core/strings.py | 4 ++-- pandas/io/clipboards.py | 2 +- 8 files changed, 20 insertions(+), 20 deletions(-) diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py index d56bd83f01236..5f35a040d7d47 100644 --- a/pandas/core/dtypes/inference.py +++ b/pandas/core/dtypes/inference.py @@ -73,7 +73,7 @@ def is_string_like(obj): Parameters ---------- - obj : The object to check. + obj : The object to check Examples -------- @@ -127,7 +127,7 @@ def is_iterator(obj): Parameters ---------- - obj : The object to check. + obj : The object to check Returns ------- @@ -172,7 +172,7 @@ def is_file_like(obj): Parameters ---------- - obj : The object to check. + obj : The object to check Returns ------- @@ -203,7 +203,7 @@ def is_re(obj): Parameters ---------- - obj : The object to check. + obj : The object to check Returns ------- @@ -227,7 +227,7 @@ def is_re_compilable(obj): Parameters ---------- - obj : The object to check. + obj : The object to check Returns ------- @@ -261,7 +261,7 @@ def is_list_like(obj, allow_sets=True): Parameters ---------- - obj : The object to check. + obj : The object to check allow_sets : boolean, default True If this parameter is False, sets will not be considered list-like @@ -310,7 +310,7 @@ def is_array_like(obj): Parameters ---------- - obj : The object to check. + obj : The object to check Returns ------- @@ -343,7 +343,7 @@ def is_nested_list_like(obj): Parameters ---------- - obj : The object to check. + obj : The object to check Returns ------- @@ -384,7 +384,7 @@ def is_dict_like(obj): Parameters ---------- - obj : The object to check. + obj : The object to check Returns ------- @@ -408,7 +408,7 @@ def is_named_tuple(obj): Parameters ---------- - obj : The object to check. + obj : The object to check Returns ------- @@ -468,7 +468,7 @@ def is_sequence(obj): Parameters ---------- - obj : The object to check. + obj : The object to check Returns ------- diff --git a/pandas/core/generic.py b/pandas/core/generic.py index cfdc6b34274bf..b7ead5a098880 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5248,11 +5248,11 @@ def astype(self, dtype, copy=True, errors='raise', **kwargs): the same type. Alternatively, use {col: dtype, ...}, where col is a column label and dtype is a numpy.dtype or Python type to cast one or more of the DataFrame's columns to column-specific types. - copy : bool, default True. + copy : bool, default True Return a copy when ``copy=True`` (be very careful setting ``copy=False`` as changes to values then may propagate to other pandas objects). - errors : {'raise', 'ignore'}, default 'raise'. + errors : {'raise', 'ignore'}, default 'raise' Control raising of exceptions on invalid data for provided dtype. - ``raise`` : allow exceptions to be raised diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 56ab9b6c020c0..8da0672559006 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -501,7 +501,7 @@ def to_series(self, keep_tz=False, index=None, name=None): Parameters ---------- - keep_tz : optional, defaults False. + keep_tz : optional, defaults False return the data keeping the timezone. If keep_tz is True: diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 9c981c24190a4..01304cce507f0 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1885,7 +1885,7 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True): ascending : boolean, default True False to sort in descending order Can also be a list to specify a directed ordering - sort_remaining : sort by the remaining levels after level. + sort_remaining : sort by the remaining levels after level Returns ------- diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index d1b5645928921..e4c177a08462e 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -38,7 +38,7 @@ class RangeIndex(Int64Index): Parameters ---------- - start : int (default: 0), or other RangeIndex instance. + start : int (default: 0), or other RangeIndex instance If int and "stop" is not given, interpreted as "stop" instead. stop : int (default: 0) step : int (default: 1) diff --git a/pandas/core/series.py b/pandas/core/series.py index 6971b0b0c78e0..b9f4b848b2ed7 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3025,7 +3025,7 @@ def reorder_levels(self, order): Parameters ---------- - order : list of int representing new level order. + order : list of int representing new level order (reference level by number or key) Returns diff --git a/pandas/core/strings.py b/pandas/core/strings.py index bf0c93437f4dc..a12605aaed554 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -659,7 +659,7 @@ def str_match(arr, pat, case=True, flags=0, na=np.nan): If True, case sensitive flags : int, default 0 (no flags) re module flags, e.g. re.IGNORECASE - na : default NaN, fill value for missing values. + na : default NaN, fill value for missing values Returns ------- @@ -2665,7 +2665,7 @@ def encode(self, encoding, errors="strict"): Parameters ---------- - to_strip : str or None, default None. + to_strip : str or None, default None Specifying the set of characters to be removed. All combinations of this set of characters will be stripped. If None then whitespaces are removed. diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py index c6108f30a560a..23a2b04214e4e 100644 --- a/pandas/io/clipboards.py +++ b/pandas/io/clipboards.py @@ -16,7 +16,7 @@ def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover Parameters ---------- - sep : str, default '\s+'. + sep : str, default '\s+' A string or regex delimiter. The default of '\s+' denotes one or more whitespace characters. From b265308cbca31750c02b98bab5914add6159d5d4 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Sun, 11 Nov 2018 01:28:18 +0000 Subject: [PATCH 084/122] DOC: Fixes to docstring to add validation to CI (#23560) --- pandas/core/frame.py | 38 +++++++++++++++++++++----------------- pandas/core/generic.py | 3 +-- pandas/core/panel.py | 16 ++++++++-------- pandas/core/series.py | 1 - 4 files changed, 30 insertions(+), 28 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b24f79e89902a..6b29725ba2bea 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -864,12 +864,17 @@ def iterrows(self): data types, the iterator returns a copy and not a view, and writing to it will have no effect. - Returns - ------- + Yields + ------ + index : label or tuple of label + The index of the row. A tuple for a `MultiIndex`. + data : Series + The data of the row as a Series. + it : generator A generator that iterates over the rows of the frame. - See also + See Also -------- itertuples : Iterate over DataFrame rows as namedtuples of the values. iteritems : Iterate over (column name, Series) pairs. @@ -3951,6 +3956,10 @@ def set_index(self, keys, drop=True, append=False, inplace=False, necessary. Setting to False will improve the performance of this method + Returns + ------- + DataFrame + Examples -------- >>> df = pd.DataFrame({'month': [1, 4, 7, 10], @@ -3991,10 +4000,6 @@ def set_index(self, keys, drop=True, append=False, inplace=False, 2 2014 4 40 3 2013 7 84 4 2014 10 31 - - Returns - ------- - dataframe : DataFrame """ inplace = validate_bool_kwarg(inplace, 'inplace') if not isinstance(keys, list): @@ -6694,6 +6699,15 @@ def round(self, decimals=0, *args, **kwargs): of `decimals` which are not columns of the input will be ignored. + Returns + ------- + DataFrame + + See Also + -------- + numpy.around + Series.round + Examples -------- >>> df = pd.DataFrame(np.random.random([3, 3]), @@ -6719,15 +6733,6 @@ def round(self, decimals=0, *args, **kwargs): first 0.0 1 0.17 second 0.0 1 0.58 third 0.9 0 0.49 - - Returns - ------- - DataFrame object - - See Also - -------- - numpy.around - Series.round """ from pandas.core.reshape.concat import concat @@ -6793,7 +6798,6 @@ def corr(self, method='pearson', min_periods=1): Examples -------- - >>> import numpy as np >>> histogram_intersection = lambda a, b: np.minimum(a, b ... ).sum().round(decimals=1) >>> df = pd.DataFrame([(.2, .3), (.0, .6), (.6, .0), (.2, .1)], diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b7ead5a098880..34f25c5634d5b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9691,8 +9691,7 @@ def nanptp(values, axis=0, skipna=True): cls.ptp = _make_stat_function( cls, 'ptp', name, name2, axis_descr, - """ - Returns the difference between the maximum value and the + """Returns the difference between the maximum value and the minimum value in the object. This is the equivalent of the ``numpy.ndarray`` method ``ptp``. diff --git a/pandas/core/panel.py b/pandas/core/panel.py index eb841e6398976..c878d16fac2e9 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -106,14 +106,14 @@ def panel_index(time, panels, names=None): class Panel(NDFrame): """ - Represents wide format panel data, stored as 3-dimensional array - - .. deprecated:: 0.20.0 - The recommended way to represent 3-D data are with a MultiIndex on a - DataFrame via the :attr:`~Panel.to_frame()` method or with the - `xarray package `__. - Pandas provides a :attr:`~Panel.to_xarray()` method to automate this - conversion. + Represents wide format panel data, stored as 3-dimensional array. + + .. deprecated:: 0.20.0 + The recommended way to represent 3-D data are with a MultiIndex on a + DataFrame via the :attr:`~Panel.to_frame()` method or with the + `xarray package `__. + Pandas provides a :attr:`~Panel.to_xarray()` method to automate this + conversion. Parameters ---------- diff --git a/pandas/core/series.py b/pandas/core/series.py index b9f4b848b2ed7..20e4720a3bde7 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1913,7 +1913,6 @@ def corr(self, other, method='pearson', min_periods=None): Examples -------- - >>> import numpy as np >>> histogram_intersection = lambda a, b: np.minimum(a, b ... ).sum().round(decimals=1) >>> s1 = pd.Series([.2, .0, .6, .2]) From f2eac446f4c2ce96b6dba58d6a214e5c054e767a Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sun, 11 Nov 2018 08:24:42 -0600 Subject: [PATCH 085/122] TST: Fix integer ops comparison test (#23619) The `op(Series[integer], other)` path was being tested twice. The `op(IntegerArray, other)` path was not being tested. Closes https://github.com/pandas-dev/pandas/issues/22096 --- pandas/tests/arrays/test_integer.py | 10 ++++------ setup.cfg | 2 +- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 10f54458e4980..51cd139a6ccad 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -314,11 +314,11 @@ def test_rpow_one_to_na(self): class TestComparisonOps(BaseOpsUtil): - def _compare_other(self, s, data, op_name, other): + def _compare_other(self, data, op_name, other): op = self.get_op_from_name(op_name) # array - result = op(s, other) + result = pd.Series(op(data, other)) expected = pd.Series(op(data._data, other)) # fill the nan locations @@ -340,14 +340,12 @@ def _compare_other(self, s, data, op_name, other): def test_compare_scalar(self, data, all_compare_operators): op_name = all_compare_operators - s = pd.Series(data) - self._compare_other(s, data, op_name, 0) + self._compare_other(data, op_name, 0) def test_compare_array(self, data, all_compare_operators): op_name = all_compare_operators - s = pd.Series(data) other = pd.Series([0] * len(data)) - self._compare_other(s, data, op_name, other) + self._compare_other(data, op_name, other) class TestCasting(object): diff --git a/setup.cfg b/setup.cfg index 4726a0ddb2fb2..2e07182196d5b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -90,7 +90,7 @@ known_post_core=pandas.tseries,pandas.io,pandas.plotting sections=FUTURE,STDLIB,THIRDPARTY,PRE_CORE,DTYPES,FIRSTPARTY,POST_CORE,LOCALFOLDER known_first_party=pandas -known_third_party=Cython,numpy,python-dateutil,pytz,pyarrow +known_third_party=Cython,numpy,python-dateutil,pytz,pyarrow,pytest multi_line_output=4 force_grid_wrap=0 combine_as_imports=True From fef6d7a791ea2911f513d76aa990576d059303e6 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sun, 11 Nov 2018 08:48:37 -0600 Subject: [PATCH 086/122] TST: Unskip some Categorical Tests (#23613) --- pandas/tests/extension/test_categorical.py | 95 +++++++++------------- 1 file changed, 40 insertions(+), 55 deletions(-) diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index 7fd389e19325c..279bfb5dc8eab 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -72,10 +72,10 @@ class TestDtype(base.BaseDtypeTests): class TestInterface(base.BaseInterfaceTests): - @pytest.mark.skip(reason="Memory usage doesn't match") - def test_memory_usage(self): + @pytest.mark.skip(reason="Memory usage doesn't match", strict=True) + def test_memory_usage(self, data): # Is this deliberate? - pass + super(TestInterface, self).test_memory_usage(data) class TestConstructors(base.BaseConstructorsTests): @@ -83,69 +83,56 @@ class TestConstructors(base.BaseConstructorsTests): class TestReshaping(base.BaseReshapingTests): - @pytest.mark.skip(reason="Unobserved categories preseved in concat.") - def test_concat_columns(self, data, na_value): - pass - - @pytest.mark.skip(reason="Unobserved categories preseved in concat.") - def test_align(self, data, na_value): - pass - - @pytest.mark.skip(reason="Unobserved categories preseved in concat.") - def test_align_frame(self, data, na_value): - pass - - @pytest.mark.skip(reason="Unobserved categories preseved in concat.") - def test_merge(self, data, na_value): - pass + pass class TestGetitem(base.BaseGetitemTests): - skip_take = pytest.mark.skip(reason="GH-20664.") + skip_take = pytest.mark.skip(reason="GH-20664.", strict=True) - @pytest.mark.skip(reason="Backwards compatibility") - def test_getitem_scalar(self): + @pytest.mark.skip(reason="Backwards compatibility", strict=True) + def test_getitem_scalar(self, data): # CategoricalDtype.type isn't "correct" since it should # be a parent of the elements (object). But don't want # to break things by changing. - pass + super(TestGetitem, self).test_getitem_scalar(data) @skip_take - def test_take(self): + def test_take(self, data, na_value, na_cmp): # TODO remove this once Categorical.take is fixed - pass + super(TestGetitem, self).test_take(data, na_value, na_cmp) @skip_take - def test_take_negative(self): - pass + def test_take_negative(self, data): + super().test_take_negative(data) @skip_take - def test_take_pandas_style_negative_raises(self): - pass + def test_take_pandas_style_negative_raises(self, data, na_value): + super().test_take_pandas_style_negative_raises(data, na_value) @skip_take - def test_take_non_na_fill_value(self): - pass + def test_take_non_na_fill_value(self, data_missing): + super().test_take_non_na_fill_value(data_missing) @skip_take - def test_take_out_of_bounds_raises(self): - pass + def test_take_out_of_bounds_raises(self, data, allow_fill): + return super().test_take_out_of_bounds_raises(data, allow_fill) - @pytest.mark.skip(reason="GH-20747. Unobserved categories.") - def test_take_series(self): - pass + @pytest.mark.skip(reason="GH-20747. Unobserved categories.", strict=True) + def test_take_series(self, data): + super().test_take_series(data) @skip_take - def test_reindex_non_na_fill_value(self): - pass + def test_reindex_non_na_fill_value(self, data_missing): + super().test_reindex_non_na_fill_value(data_missing) - @pytest.mark.skip(reason="Categorical.take buggy") - def test_take_empty(self): - pass + @pytest.mark.skip(reason="Categorical.take buggy", strict=True) + def test_take_empty(self, data, na_value, na_cmp): + super().test_take_empty(data, na_value, na_cmp) - @pytest.mark.skip(reason="test not written correctly for categorical") - def test_reindex(self): - pass + @pytest.mark.skip(reason="test not written correctly for categorical", + strict=True) + def test_reindex(self, data, na_value): + super().test_reindex(data, na_value) class TestSetitem(base.BaseSetitemTests): @@ -154,13 +141,13 @@ class TestSetitem(base.BaseSetitemTests): class TestMissing(base.BaseMissingTests): - @pytest.mark.skip(reason="Not implemented") - def test_fillna_limit_pad(self): - pass + @pytest.mark.skip(reason="Not implemented", strict=True) + def test_fillna_limit_pad(self, data_missing): + super().test_fillna_limit_pad(data_missing) - @pytest.mark.skip(reason="Not implemented") - def test_fillna_limit_backfill(self): - pass + @pytest.mark.skip(reason="Not implemented", strict=True) + def test_fillna_limit_backfill(self, data_missing): + super().test_fillna_limit_backfill(data_missing) class TestReduce(base.BaseNoReduceTests): @@ -168,11 +155,9 @@ class TestReduce(base.BaseNoReduceTests): class TestMethods(base.BaseMethodsTests): - pass - - @pytest.mark.skip(reason="Unobserved categories included") + @pytest.mark.skip(reason="Unobserved categories included", strict=True) def test_value_counts(self, all_data, dropna): - pass + return super().test_value_counts(all_data, dropna) def test_combine_add(self, data_repeated): # GH 20825 @@ -190,9 +175,9 @@ def test_combine_add(self, data_repeated): expected = pd.Series([a + val for a in list(orig_data1)]) self.assert_series_equal(result, expected) - @pytest.mark.skip(reason="Not Applicable") + @pytest.mark.skip(reason="Not Applicable", strict=True) def test_fillna_length_mismatch(self, data_missing): - pass + super().test_fillna_length_mismatch(data_missing) class TestCasting(base.BaseCastingTests): From c80ff12a68c86b1e948ce2977ce103c2ec2c9c22 Mon Sep 17 00:00:00 2001 From: Thein Oo Date: Sun, 11 Nov 2018 09:50:26 -0500 Subject: [PATCH 087/122] DOC: Fix Order of parameters in docstrings (#23611) --- pandas/core/reshape/pivot.py | 4 ++-- pandas/core/window.py | 35 +++++++++++++++++------------------ pandas/io/json/normalize.py | 2 +- pandas/tseries/offsets.py | 4 ++-- pandas/util/testing.py | 8 ++++---- 5 files changed, 26 insertions(+), 27 deletions(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index ec4cdffc56435..d12dbb81765d8 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -407,12 +407,12 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None, values : array-like, optional Array of values to aggregate according to the factors. Requires `aggfunc` be specified. - aggfunc : function, optional - If specified, requires `values` be specified as well rownames : sequence, default None If passed, must match number of row arrays passed colnames : sequence, default None If passed, must match number of column arrays passed + aggfunc : function, optional + If specified, requires `values` be specified as well margins : boolean, default False Add row/column margins (subtotals) margins_name : string, default 'All' diff --git a/pandas/core/window.py b/pandas/core/window.py index 5256532a31870..be28a3bcccec6 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -136,7 +136,7 @@ def _gotitem(self, key, ndim, subset=None): Parameters ---------- - key : string / list of selections + key : str / list of selections ndim : 1,2 requested ndim of result subset : object, default None @@ -464,15 +464,16 @@ class Window(_Window): (otherwise result is NA). For a window that is specified by an offset, `min_periods` will default to 1. Otherwise, `min_periods` will default to the size of the window. - center : boolean, default False + center : bool, default False Set the labels at the center of the window. - win_type : string, default None + win_type : str, default None Provide a window type. If ``None``, all points are evenly weighted. See the notes below for further information. - on : string, optional + on : str, optional For a DataFrame, column on which to calculate the rolling window, rather than the index - closed : string, default None + axis : int or str, default 0 + closed : str, default None Make the interval closed on the 'right', 'left', 'both' or 'neither' endpoints. For offset-based windows, it defaults to 'right'. @@ -481,8 +482,6 @@ class Window(_Window): .. versionadded:: 0.20.0 - axis : int or string, default 0 - Returns ------- a Window or Rolling sub-classed for the particular operation @@ -661,7 +660,7 @@ def _apply_window(self, mean=True, **kwargs): Parameters ---------- - mean : boolean, default True + mean : bool, default True If True computes weighted mean, else weighted sum Returns @@ -819,11 +818,11 @@ def _apply(self, func, name=None, window=None, center=None, Parameters ---------- - func : string/callable to apply - name : string, optional + func : str/callable to apply + name : str, optional name of this function window : int/array, default to _get_window() - center : boolean, default to self.center + center : bool, default to self.center check_minp : function, default to _use_window Returns @@ -1816,9 +1815,9 @@ class Expanding(_Rolling_and_Expanding): min_periods : int, default 1 Minimum number of observations in window required to have a value (otherwise result is NA). - center : boolean, default False + center : bool, default False Set the labels at the center of the window. - axis : int or string, default 0 + axis : int or str, default 0 Returns ------- @@ -2062,7 +2061,7 @@ def _constructor(self): Parameters ---------- -bias : boolean, default False +bias : bool, default False Use a standard estimation bias correction """ @@ -2079,7 +2078,7 @@ def _constructor(self): will be a MultiIndex DataFrame in the case of DataFrame inputs. In the case of missing elements, only complete pairwise observations will be used. -bias : boolean, default False +bias : bool, default False Use a standard estimation bias correction """ @@ -2110,10 +2109,10 @@ class EWM(_Rolling): min_periods : int, default 0 Minimum number of observations in window required to have a value (otherwise result is NA). - adjust : boolean, default True + adjust : bool, default True Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings (viewing EWMA as a moving average) - ignore_na : boolean, default False + ignore_na : bool, default False Ignore missing values when calculating weights; specify True to reproduce pre-0.15.0 behavior @@ -2242,7 +2241,7 @@ def _apply(self, func, **kwargs): Parameters ---------- - func : string/callable to apply + func : str/callable to apply Returns ------- diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py index ce07a795017e5..af046d9f309e7 100644 --- a/pandas/io/json/normalize.py +++ b/pandas/io/json/normalize.py @@ -110,10 +110,10 @@ def json_normalize(data, record_path=None, meta=None, assumed to be an array of records meta : list of paths (string or list of strings), default None Fields to use as metadata for each record in resulting table + meta_prefix : string, default None record_prefix : string, default None If True, prefix records with dotted (?) path, e.g. foo.bar.field if path to records is ['foo', 'bar'] - meta_prefix : string, default None errors : {'raise', 'ignore'}, default 'raise' * 'ignore' : will ignore KeyError if keys listed in meta are not diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 6fb562e301ac2..53719b71d1180 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -807,7 +807,6 @@ class CustomBusinessDay(_CustomMixin, BusinessDay): Parameters ---------- n : int, default 1 - offset : timedelta, default timedelta(0) normalize : bool, default False Normalize start/end dates to midnight before generating date range weekmask : str, Default 'Mon Tue Wed Thu Fri' @@ -816,6 +815,7 @@ class CustomBusinessDay(_CustomMixin, BusinessDay): list/array of dates to exclude from the set of valid business days, passed to ``numpy.busdaycalendar`` calendar : pd.HolidayCalendar or np.busdaycalendar + offset : timedelta, default timedelta(0) """ _prefix = 'C' _attributes = frozenset(['n', 'normalize', @@ -958,7 +958,6 @@ class _CustomBusinessMonth(_CustomMixin, BusinessMixin, MonthOffset): Parameters ---------- n : int, default 1 - offset : timedelta, default timedelta(0) normalize : bool, default False Normalize start/end dates to midnight before generating date range weekmask : str, Default 'Mon Tue Wed Thu Fri' @@ -967,6 +966,7 @@ class _CustomBusinessMonth(_CustomMixin, BusinessMixin, MonthOffset): list/array of dates to exclude from the set of valid business days, passed to ``numpy.busdaycalendar`` calendar : pd.HolidayCalendar or np.busdaycalendar + offset : timedelta, default timedelta(0) """ _attributes = frozenset(['n', 'normalize', 'weekmask', 'holidays', 'calendar', 'offset']) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index fd7012c87040f..748f3bbc5b497 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1242,18 +1242,18 @@ def assert_series_equal(left, right, check_dtype=True, check_less_precise : bool or int, default False Specify comparison precision. Only used when check_exact is False. 5 digits (False) or 3 digits (True) after decimal points are compared. - If int, then specify the digits to compare - check_exact : bool, default False - Whether to compare number exactly. + If int, then specify the digits to compare. check_names : bool, default True Whether to check the Series and Index names attribute. + check_exact : bool, default False + Whether to compare number exactly. check_datetimelike_compat : bool, default False Compare datetime-like which is comparable ignoring dtype. check_categorical : bool, default True Whether to compare internal Categorical exactly. obj : str, default 'Series' Specify object name being compared, internally used to show appropriate - assertion message + assertion message. """ __tracebackhide__ = True From ff8130bc4263d57233bb07eaddf6231063df3159 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 11 Nov 2018 07:08:14 -0800 Subject: [PATCH 088/122] CLN: use float64_t consistently instead of double, double_t (#23583) --- pandas/_libs/algos.pxd | 3 - pandas/_libs/algos.pyx | 18 +- pandas/_libs/algos_common_helper.pxi.in | 4 +- pandas/_libs/algos_rank_helper.pxi.in | 10 +- pandas/_libs/algos_take_helper.pxi.in | 4 +- pandas/_libs/groupby.pyx | 34 ++- pandas/_libs/groupby_helper.pxi.in | 22 +- pandas/_libs/hashtable.pyx | 8 +- pandas/_libs/hashtable_class_helper.pxi.in | 4 +- pandas/_libs/index.pyx | 6 +- pandas/_libs/index_class_helper.pxi.in | 4 +- pandas/_libs/interval.pyx | 15 +- pandas/_libs/intervaltree.pxi.in | 25 +-- pandas/_libs/join.pyx | 6 +- pandas/_libs/lib.pyx | 26 ++- pandas/_libs/missing.pyx | 21 +- pandas/_libs/parsers.pyx | 36 ++-- pandas/_libs/sparse.pyx | 67 ------ pandas/_libs/sparse_op_helper.pxi.in | 4 +- pandas/_libs/tslib.pyx | 16 +- pandas/_libs/tslibs/timestamps.pyx | 2 +- pandas/_libs/window.pyx | 196 ++++++++++-------- pandas/tests/arrays/sparse/test_array.py | 4 +- pandas/tests/frame/test_operators.py | 3 +- pandas/tests/frame/test_repr_info.py | 5 +- pandas/tests/frame/test_timeseries.py | 7 +- pandas/tests/frame/test_to_csv.py | 3 +- pandas/tests/groupby/aggregate/test_cython.py | 5 +- pandas/tests/series/test_operators.py | 9 +- 29 files changed, 251 insertions(+), 316 deletions(-) diff --git a/pandas/_libs/algos.pxd b/pandas/_libs/algos.pxd index 0888cf3c85f2f..5df1e381ea3ce 100644 --- a/pandas/_libs/algos.pxd +++ b/pandas/_libs/algos.pxd @@ -1,9 +1,6 @@ from util cimport numeric -cpdef numeric kth_smallest(numeric[:] a, Py_ssize_t k) nogil - - cdef inline Py_ssize_t swap(numeric *a, numeric *b) nogil: cdef: numeric t diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 075e2c5129579..e77899507833f 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -15,8 +15,7 @@ from numpy cimport (ndarray, NPY_FLOAT32, NPY_FLOAT64, NPY_OBJECT, int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, - uint32_t, uint64_t, float32_t, float64_t, - double_t) + uint32_t, uint64_t, float32_t, float64_t) cnp.import_array() @@ -32,10 +31,9 @@ import missing cdef float64_t FP_ERR = 1e-13 -cdef double NaN = np.NaN -cdef double nan = NaN +cdef float64_t NaN = np.NaN -cdef int64_t iNaT = get_nat() +cdef int64_t NPY_NAT = get_nat() tiebreakers = { 'average': TIEBREAK_AVERAGE, @@ -199,7 +197,7 @@ def groupsort_indexer(ndarray[int64_t] index, Py_ssize_t ngroups): @cython.boundscheck(False) @cython.wraparound(False) -cpdef numeric kth_smallest(numeric[:] a, Py_ssize_t k) nogil: +def kth_smallest(numeric[:] a, Py_ssize_t k) -> numeric: cdef: Py_ssize_t i, j, l, m, n = a.shape[0] numeric x @@ -812,7 +810,7 @@ def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike): n = len(arr) if n == 1: - if arr[0] != arr[0] or (timelike and arr[0] == iNaT): + if arr[0] != arr[0] or (timelike and arr[0] == NPY_NAT): # single value is NaN return False, False, True else: @@ -820,7 +818,7 @@ def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike): elif n < 2: return True, True, True - if timelike and arr[0] == iNaT: + if timelike and arr[0] == NPY_NAT: return False, False, True if algos_t is not object: @@ -828,7 +826,7 @@ def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike): prev = arr[0] for i in range(1, n): cur = arr[i] - if timelike and cur == iNaT: + if timelike and cur == NPY_NAT: is_monotonic_inc = 0 is_monotonic_dec = 0 break @@ -853,7 +851,7 @@ def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike): prev = arr[0] for i in range(1, n): cur = arr[i] - if timelike and cur == iNaT: + if timelike and cur == NPY_NAT: is_monotonic_inc = 0 is_monotonic_dec = 0 break diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in index c2b0a4119e6e5..3708deb1a4b76 100644 --- a/pandas/_libs/algos_common_helper.pxi.in +++ b/pandas/_libs/algos_common_helper.pxi.in @@ -84,9 +84,9 @@ def put2d_{{name}}_{{dest_name}}(ndarray[{{c_type}}, ndim=2, cast=True] values, {{endfor}} -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # ensure_dtype -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- cdef int PLATFORM_INT = (np.arange(0, dtype=np.intp)).descr.type_num diff --git a/pandas/_libs/algos_rank_helper.pxi.in b/pandas/_libs/algos_rank_helper.pxi.in index fcb052e8be63b..4d144dcf2808a 100644 --- a/pandas/_libs/algos_rank_helper.pxi.in +++ b/pandas/_libs/algos_rank_helper.pxi.in @@ -74,9 +74,9 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', {{elif dtype == 'float64'}} mask = np.isnan(values) {{elif dtype == 'int64'}} - mask = values == iNaT + mask = values == NPY_NAT - # create copy in case of iNaT + # create copy in case of NPY_NAT # values are mutated inplace if mask.any(): values = values.copy() @@ -149,7 +149,7 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', {{if dtype != 'uint64'}} isnan = sorted_mask[i] if isnan and keep_na: - ranks[argsorted[i]] = nan + ranks[argsorted[i]] = NaN continue {{endif}} @@ -257,7 +257,7 @@ def rank_2d_{{dtype}}(object in_arr, axis=0, ties_method='average', {{elif dtype == 'float64'}} mask = np.isnan(values) {{elif dtype == 'int64'}} - mask = values == iNaT + mask = values == NPY_NAT {{endif}} np.putmask(values, mask, nan_value) @@ -317,7 +317,7 @@ def rank_2d_{{dtype}}(object in_arr, axis=0, ties_method='average', {{else}} if (val == nan_value) and keep_na: {{endif}} - ranks[i, argsorted[i, j]] = nan + ranks[i, argsorted[i, j]] = NaN {{if dtype == 'object'}} infs += 1 diff --git a/pandas/_libs/algos_take_helper.pxi.in b/pandas/_libs/algos_take_helper.pxi.in index bd5feef1ff2b0..2fea8b17fd9d7 100644 --- a/pandas/_libs/algos_take_helper.pxi.in +++ b/pandas/_libs/algos_take_helper.pxi.in @@ -4,9 +4,9 @@ Template for each `dtype` helper function for take WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in """ -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # take_1d, take_2d -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- {{py: diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 83ded64b742ed..7c16b29f3e42b 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -1,14 +1,13 @@ # -*- coding: utf-8 -*- -cimport cython -from cython cimport Py_ssize_t +import cython +from cython import Py_ssize_t from libc.stdlib cimport malloc, free import numpy as np cimport numpy as cnp from numpy cimport (ndarray, - double_t, int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t, float32_t, float64_t) cnp.import_array() @@ -20,10 +19,9 @@ from algos cimport (swap, TiebreakEnumType, TIEBREAK_AVERAGE, TIEBREAK_MIN, TIEBREAK_MAX, TIEBREAK_FIRST, TIEBREAK_DENSE) from algos import take_2d_axis1_float64_float64, groupsort_indexer, tiebreakers -cdef int64_t iNaT = get_nat() +cdef int64_t NPY_NAT = get_nat() -cdef double NaN = np.NaN -cdef double nan = NaN +cdef float64_t NaN = np.NaN cdef inline float64_t median_linear(float64_t* a, int n) nogil: @@ -67,13 +65,13 @@ cdef inline float64_t median_linear(float64_t* a, int n) nogil: return result -# TODO: Is this redundant with algos.kth_smallest? +# TODO: Is this redundant with algos.kth_smallest cdef inline float64_t kth_smallest_c(float64_t* a, Py_ssize_t k, Py_ssize_t n) nogil: cdef: Py_ssize_t i, j, l, m - double_t x, t + float64_t x, t l = 0 m = n - 1 @@ -109,7 +107,7 @@ def group_median_float64(ndarray[float64_t, ndim=2] out, cdef: Py_ssize_t i, j, N, K, ngroups, size ndarray[int64_t] _counts - ndarray data + ndarray[float64_t, ndim=2] data float64_t* ptr assert min_count == -1, "'min_count' only used in add and prod" @@ -139,8 +137,8 @@ def group_median_float64(ndarray[float64_t, ndim=2] out, @cython.boundscheck(False) @cython.wraparound(False) def group_cumprod_float64(float64_t[:, :] out, - float64_t[:, :] values, - int64_t[:] labels, + const float64_t[:, :] values, + const int64_t[:] labels, bint is_datetimelike, bint skipna=True): """ @@ -177,7 +175,7 @@ def group_cumprod_float64(float64_t[:, :] out, @cython.wraparound(False) def group_cumsum(numeric[:, :] out, numeric[:, :] values, - int64_t[:] labels, + const int64_t[:] labels, is_datetimelike, bint skipna=True): """ @@ -217,7 +215,7 @@ def group_cumsum(numeric[:, :] out, @cython.boundscheck(False) @cython.wraparound(False) -def group_shift_indexer(ndarray[int64_t] out, ndarray[int64_t] labels, +def group_shift_indexer(int64_t[:] out, const int64_t[:] labels, int ngroups, int periods): cdef: Py_ssize_t N, i, j, ii @@ -291,7 +289,7 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels, """ cdef: Py_ssize_t i, N - ndarray[int64_t] sorted_labels + int64_t[:] sorted_labels int64_t idx, curr_fill_idx=-1, filled_vals=0 N = len(out) @@ -327,10 +325,10 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels, @cython.boundscheck(False) @cython.wraparound(False) -def group_any_all(ndarray[uint8_t] out, - ndarray[int64_t] labels, - ndarray[uint8_t] values, - ndarray[uint8_t] mask, +def group_any_all(uint8_t[:] out, + const int64_t[:] labels, + const uint8_t[:] values, + const uint8_t[:] mask, object val_test, bint skipna): """Aggregated boolean values to show truthfulness of group elements diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in index 484a4b069305f..523d43f893aad 100644 --- a/pandas/_libs/groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -5,7 +5,7 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in """ cdef extern from "numpy/npy_math.h": - double NAN "NPY_NAN" + float64_t NAN "NPY_NAN" _int64_max = np.iinfo(np.int64).max # ---------------------------------------------------------------------- @@ -268,16 +268,16 @@ def group_ohlc_{{name}}(ndarray[{{c_type}}, ndim=2] out, {{endfor}} -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # group_nth, group_last, group_rank -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- {{py: # name, c_type, nan_val dtypes = [('float64', 'float64_t', 'NAN'), ('float32', 'float32_t', 'NAN'), - ('int64', 'int64_t', 'iNaT'), + ('int64', 'int64_t', 'NPY_NAT'), ('object', 'object', 'NAN')] def get_dispatch(dtypes): @@ -527,7 +527,7 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out, # to the result where appropriate if keep_na and mask[_as[i]]: for j in range(i - dups + 1, i + 1): - out[_as[j], 0] = nan + out[_as[j], 0] = NaN grp_na_count = dups elif tiebreak == TIEBREAK_AVERAGE: for j in range(i - dups + 1, i + 1): @@ -630,7 +630,7 @@ def group_max(ndarray[groupby_t, ndim=2] out, if groupby_t is int64_t: # Note: evaluated at compile-time maxx[:] = -_int64_max - nan_val = iNaT + nan_val = NPY_NAT else: maxx[:] = -np.inf nan_val = NAN @@ -692,7 +692,7 @@ def group_min(ndarray[groupby_t, ndim=2] out, minx = np.empty_like(out) if groupby_t is int64_t: minx[:] = _int64_max - nan_val = iNaT + nan_val = NPY_NAT else: minx[:] = np.inf nan_val = NAN @@ -762,8 +762,8 @@ def group_cummin(ndarray[groupby_t, ndim=2] out, # val = nan if groupby_t is int64_t: - if is_datetimelike and val == iNaT: - out[i, j] = iNaT + if is_datetimelike and val == NPY_NAT: + out[i, j] = NPY_NAT else: mval = accum[lab, j] if val < mval: @@ -809,8 +809,8 @@ def group_cummax(ndarray[groupby_t, ndim=2] out, val = values[i, j] if groupby_t is int64_t: - if is_datetimelike and val == iNaT: - out[i, j] = iNaT + if is_datetimelike and val == NPY_NAT: + out[i, j] = NPY_NAT else: mval = accum[lab, j] if val > mval: diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx index d38b72ccebbb2..9aa887727a765 100644 --- a/pandas/_libs/hashtable.pyx +++ b/pandas/_libs/hashtable.pyx @@ -9,11 +9,11 @@ from libc.stdlib cimport malloc, free import numpy as np cimport numpy as cnp -from numpy cimport ndarray, uint8_t, uint32_t +from numpy cimport ndarray, uint8_t, uint32_t, float64_t cnp.import_array() cdef extern from "numpy/npy_math.h": - double NAN "NPY_NAN" + float64_t NAN "NPY_NAN" from khash cimport ( @@ -42,9 +42,7 @@ cimport util from missing cimport checknull -nan = np.nan - -cdef int64_t iNaT = util.get_nat() +cdef int64_t NPY_NAT = util.get_nat() _SIZE_HINT_LIMIT = (1 << 20) + 7 diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 36ed8a88aa78b..a71023ed34f44 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -251,9 +251,9 @@ cdef class HashTable: {{py: # name, dtype, float_group, default_na_value -dtypes = [('Float64', 'float64', True, 'nan'), +dtypes = [('Float64', 'float64', True, 'np.nan'), ('UInt64', 'uint64', False, 0), - ('Int64', 'int64', False, 'iNaT')] + ('Int64', 'int64', False, 'NPY_NAT')] }} diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index d418ac63a4ac8..7930f583274b5 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -25,7 +25,7 @@ from pandas._libs import algos, hashtable as _hash from pandas._libs.tslibs import Timestamp, Timedelta, period as periodlib from pandas._libs.missing import checknull -cdef int64_t iNaT = util.get_nat() +cdef int64_t NPY_NAT = util.get_nat() cdef inline bint is_definitely_invalid_key(object val): @@ -520,7 +520,7 @@ cpdef convert_scalar(ndarray arr, object value): elif isinstance(value, (datetime, np.datetime64, date)): return Timestamp(value).value elif value is None or value != value: - return iNaT + return NPY_NAT elif util.is_string_object(value): return Timestamp(value).value raise ValueError("cannot set a Timestamp with a non-timestamp") @@ -531,7 +531,7 @@ cpdef convert_scalar(ndarray arr, object value): elif isinstance(value, timedelta): return Timedelta(value).value elif value is None or value != value: - return iNaT + return NPY_NAT elif util.is_string_object(value): return Timedelta(value).value raise ValueError("cannot set a Timedelta with a non-timedelta") diff --git a/pandas/_libs/index_class_helper.pxi.in b/pandas/_libs/index_class_helper.pxi.in index c19812efaaa35..ff95917f6643a 100644 --- a/pandas/_libs/index_class_helper.pxi.in +++ b/pandas/_libs/index_class_helper.pxi.in @@ -4,9 +4,9 @@ Template for functions of IndexEngine subclasses. WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in """ -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # IndexEngine Subclass Methods -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- {{py: diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index a395fdbabeca2..dae88d3b707bf 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -1,20 +1,27 @@ # -*- coding: utf-8 -*- import numbers +from operator import le, lt from cpython.object cimport (Py_EQ, Py_NE, Py_GT, Py_LT, Py_GE, Py_LE, PyObject_RichCompare) -cimport cython -from cython cimport Py_ssize_t +import cython +from cython import Py_ssize_t import numpy as np -from numpy cimport ndarray +cimport numpy as cnp +from numpy cimport ( + int64_t, int32_t, float64_t, float32_t, uint64_t, + ndarray, + PyArray_ArgSort, NPY_QUICKSORT, PyArray_Take) +cnp.import_array() -from operator import le, lt cimport util util.import_array() +from hashtable cimport Int64Vector, Int64VectorData + from tslibs import Timestamp from tslibs.timezones cimport tz_compare diff --git a/pandas/_libs/intervaltree.pxi.in b/pandas/_libs/intervaltree.pxi.in index f9427fbbcd900..aa53f5086b894 100644 --- a/pandas/_libs/intervaltree.pxi.in +++ b/pandas/_libs/intervaltree.pxi.in @@ -4,21 +4,6 @@ Template for intervaltree WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in """ -from numpy cimport ( - int64_t, int32_t, float64_t, float32_t, uint64_t, - ndarray, - PyArray_ArgSort, NPY_QUICKSORT, PyArray_Take) -import numpy as np - -cimport cython -from cython cimport Py_ssize_t - -cimport numpy as cnp -cnp.import_array() - -from hashtable cimport Int64Vector, Int64VectorData - - ctypedef fused scalar_t: float64_t float32_t @@ -26,10 +11,9 @@ ctypedef fused scalar_t: int32_t uint64_t - -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # IntervalTree -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- cdef class IntervalTree(IntervalMixin): """A centered interval tree @@ -203,9 +187,10 @@ cdef sort_values_and_indices(all_values, all_indices, subset): sorted_indices = take(indices, sorter) return sorted_values, sorted_indices -#---------------------------------------------------------------------- + +# ---------------------------------------------------------------------- # Nodes -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # we need specialized nodes and leaves to optimize for different dtype and # closed values diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index 748f3f265dd34..54dfeeff1452d 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -10,10 +10,6 @@ from numpy cimport (ndarray, uint32_t, uint64_t, float32_t, float64_t) cnp.import_array() - -cdef double NaN = np.NaN -cdef double nan = NaN - from pandas._libs.algos import groupsort_indexer, ensure_platform_int from pandas.core.algorithms import take_nd @@ -673,7 +669,7 @@ ctypedef fused asof_t: int32_t int64_t float - double + float64_t ctypedef fused by_t: object diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index a9e0fcbc4a826..cfc60256e97a3 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -45,13 +45,14 @@ cdef extern from "numpy/arrayobject.h": cdef extern from "src/parse_helper.h": - int floatify(object, double *result, int *maybe_int) except -1 + int floatify(object, float64_t *result, int *maybe_int) except -1 cimport util from util cimport (is_nan, UINT8_MAX, UINT64_MAX, INT64_MAX, INT64_MIN) from tslib import array_to_datetime +from tslibs.nattype cimport NPY_NAT from tslibs.nattype import NaT from tslibs.conversion cimport convert_to_tsobject from tslibs.timedeltas cimport convert_to_timedelta64 @@ -67,11 +68,8 @@ cdef object oINT64_MAX = INT64_MAX cdef object oINT64_MIN = INT64_MIN cdef object oUINT64_MAX = UINT64_MAX -cdef int64_t NPY_NAT = util.get_nat() -iNaT = util.get_nat() - cdef bint PY2 = sys.version_info[0] == 2 -cdef double nan = np.NaN +cdef float64_t NaN = np.NaN def values_from_object(obj: object): @@ -104,7 +102,7 @@ def memory_usage_of_objects(arr: object[:]) -> int64_t: # ---------------------------------------------------------------------- -def is_scalar(val: object) -> bint: +def is_scalar(val: object) -> bool: """ Return True if given value is scalar. @@ -628,7 +626,7 @@ def generate_bins_dt64(ndarray[int64_t] values, ndarray[int64_t] binner, nat_count = 0 if hasnans: - mask = values == iNaT + mask = values == NPY_NAT nat_count = np.sum(mask) values = values[~mask] @@ -1816,7 +1814,7 @@ def maybe_convert_numeric(ndarray[object] values, set na_values, if val.__hash__ is not None and val in na_values: seen.saw_null() - floats[i] = complexes[i] = nan + floats[i] = complexes[i] = NaN elif util.is_float_object(val): fval = val if fval != fval: @@ -1847,11 +1845,11 @@ def maybe_convert_numeric(ndarray[object] values, set na_values, seen.bool_ = True elif val is None: seen.saw_null() - floats[i] = complexes[i] = nan + floats[i] = complexes[i] = NaN elif hasattr(val, '__len__') and len(val) == 0: if convert_empty or seen.coerce_numeric: seen.saw_null() - floats[i] = complexes[i] = nan + floats[i] = complexes[i] = NaN else: raise ValueError('Empty string encountered') elif util.is_complex_object(val): @@ -1866,7 +1864,7 @@ def maybe_convert_numeric(ndarray[object] values, set na_values, if fval in na_values: seen.saw_null() - floats[i] = complexes[i] = nan + floats[i] = complexes[i] = NaN else: if fval != fval: seen.null_ = True @@ -1899,7 +1897,7 @@ def maybe_convert_numeric(ndarray[object] values, set na_values, elif "uint64" in str(e): # Exception from check functions. raise seen.saw_null() - floats[i] = nan + floats[i] = NaN if seen.check_uint64_conflict(): return values @@ -1967,10 +1965,10 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, floats[i] = complexes[i] = fnan elif val is NaT: if convert_datetime: - idatetimes[i] = iNaT + idatetimes[i] = NPY_NAT seen.datetime_ = 1 if convert_timedelta: - itimedeltas[i] = iNaT + itimedeltas[i] = NPY_NAT seen.timedelta_ = 1 if not (convert_datetime or convert_timedelta): seen.object_ = 1 diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index b8791359241ad..1fdb04dd10d8e 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -5,16 +5,17 @@ from cython import Py_ssize_t import numpy as np cimport numpy as cnp -from numpy cimport ndarray, int64_t, uint8_t +from numpy cimport ndarray, int64_t, uint8_t, float64_t cnp.import_array() cimport util from tslibs.np_datetime cimport get_timedelta64_value, get_datetime64_value +from tslibs.nattype cimport checknull_with_nat from tslibs.nattype import NaT -cdef double INF = np.inf -cdef double NEGINF = -INF +cdef float64_t INF = np.inf +cdef float64_t NEGINF = -INF cdef int64_t NPY_NAT = util.get_nat() @@ -295,9 +296,7 @@ def isneginf_scalar(val: object) -> bool: cdef inline bint is_null_datetime64(v): # determine if we have a null for a datetime (or integer versions), # excluding np.timedelta64('nat') - if v is None or util.is_nan(v): - return True - elif v is NaT: + if checknull_with_nat(v): return True elif util.is_datetime64_object(v): return v.view('int64') == NPY_NAT @@ -307,9 +306,7 @@ cdef inline bint is_null_datetime64(v): cdef inline bint is_null_timedelta64(v): # determine if we have a null for a timedelta (or integer versions), # excluding np.datetime64('nat') - if v is None or util.is_nan(v): - return True - elif v is NaT: + if checknull_with_nat(v): return True elif util.is_timedelta64_object(v): return v.view('int64') == NPY_NAT @@ -319,8 +316,4 @@ cdef inline bint is_null_timedelta64(v): cdef inline bint is_null_period(v): # determine if we have a null for a Period (or integer versions), # excluding np.datetime64('nat') and np.timedelta64('nat') - if v is None or util.is_nan(v): - return True - elif v is NaT: - return True - return False + return checknull_with_nat(v) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 391de339ad60e..3870a55c22fd6 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -65,8 +65,8 @@ CParserError = ParserError cdef bint PY3 = (sys.version_info[0] >= 3) -cdef double INF = np.inf -cdef double NEGINF = -INF +cdef float64_t INF = np.inf +cdef float64_t NEGINF = -INF cdef extern from "errno.h": @@ -182,10 +182,10 @@ cdef extern from "parser/tokenizer.h": int64_t skip_first_N_rows int64_t skipfooter # pick one, depending on whether the converter requires GIL - double (*double_converter_nogil)(const char *, char **, - char, char, char, int) nogil - double (*double_converter_withgil)(const char *, char **, - char, char, char, int) + float64_t (*double_converter_nogil)(const char *, char **, + char, char, char, int) nogil + float64_t (*double_converter_withgil)(const char *, char **, + char, char, char, int) # error handling char *warn_msg @@ -233,12 +233,12 @@ cdef extern from "parser/tokenizer.h": uint64_t str_to_uint64(uint_state *state, char *p_item, int64_t int_max, uint64_t uint_max, int *error, char tsep) nogil - double xstrtod(const char *p, char **q, char decimal, char sci, - char tsep, int skip_trailing) nogil - double precise_xstrtod(const char *p, char **q, char decimal, char sci, - char tsep, int skip_trailing) nogil - double round_trip(const char *p, char **q, char decimal, char sci, + float64_t xstrtod(const char *p, char **q, char decimal, char sci, char tsep, int skip_trailing) nogil + float64_t precise_xstrtod(const char *p, char **q, char decimal, char sci, + char tsep, int skip_trailing) nogil + float64_t round_trip(const char *p, char **q, char decimal, char sci, + char tsep, int skip_trailing) nogil int to_boolean(const char *item, uint8_t *val) nogil @@ -1697,8 +1697,8 @@ cdef _try_double(parser_t *parser, int64_t col, coliter_t it const char *word = NULL char *p_end - double *data - double NA = na_values[np.float64] + float64_t *data + float64_t NA = na_values[np.float64] kh_float64_t *na_fset ndarray result khiter_t k @@ -1706,7 +1706,7 @@ cdef _try_double(parser_t *parser, int64_t col, lines = line_end - line_start result = np.empty(lines, dtype=np.float64) - data = result.data + data = result.data na_fset = kset_float64_from_list(na_flist) if parser.double_converter_nogil != NULL: # if it can run without the GIL with nogil: @@ -1717,8 +1717,8 @@ cdef _try_double(parser_t *parser, int64_t col, else: assert parser.double_converter_withgil != NULL error = _try_double_nogil(parser, - parser.double_converter_withgil, col, line_start, line_end, na_filter, na_hashset, use_na_flist, @@ -1730,14 +1730,14 @@ cdef _try_double(parser_t *parser, int64_t col, cdef inline int _try_double_nogil(parser_t *parser, - double (*double_converter)( + float64_t (*double_converter)( const char *, char **, char, char, char, int) nogil, int col, int line_start, int line_end, bint na_filter, kh_str_t *na_hashset, bint use_na_flist, const kh_float64_t *na_flist, - double NA, double *data, + float64_t NA, float64_t *data, int *na_count) nogil: cdef: int error, diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index b8ca744ac88c4..668bd0ae6bbb7 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -22,9 +22,6 @@ _np_version_under1p11 = LooseVersion(_np_version) < LooseVersion('1.11') cdef float64_t NaN = np.NaN cdef float64_t INF = np.inf -cdef inline int int_max(int a, int b): return a if a >= b else b -cdef inline int int_min(int a, int b): return a if a <= b else b - # ----------------------------------------------------------------------------- @@ -673,13 +670,6 @@ cdef class BlockMerge(object): self.yi = xi -cdef class BlockIntersection(BlockMerge): - """ - not done yet - """ - pass - - cdef class BlockUnion(BlockMerge): """ Object-oriented approach makes sharing state between recursive functions a @@ -805,63 +795,6 @@ cdef class BlockUnion(BlockMerge): include "sparse_op_helper.pxi" -# ----------------------------------------------------------------------------- -# Indexing operations - -def get_reindexer(ndarray[object, ndim=1] values, dict index_map): - cdef: - object idx - Py_ssize_t i - Py_ssize_t new_length = len(values) - ndarray[int32_t, ndim=1] indexer - - indexer = np.empty(new_length, dtype=np.int32) - - for i in range(new_length): - idx = values[i] - if idx in index_map: - indexer[i] = index_map[idx] - else: - indexer[i] = -1 - - return indexer - -# def reindex_block(ndarray[float64_t, ndim=1] values, -# BlockIndex sparse_index, -# ndarray[int32_t, ndim=1] indexer): -# cdef: -# Py_ssize_t i, length -# ndarray[float64_t, ndim=1] out - -# out = np.empty(length, dtype=np.float64) - -# for i in range(length): -# if indexer[i] == -1: -# pass - - -# cdef class SparseCruncher(object): -# """ -# Class to acquire float pointer for convenient operations on sparse data -# structures -# """ -# cdef: -# SparseIndex index -# float64_t* buf - -# def __init__(self, ndarray[float64_t, ndim=1, mode='c'] values, -# SparseIndex index): - -# self.index = index -# self.buf = values.data - - -def reindex_integer(ndarray[float64_t, ndim=1] values, - IntIndex sparse_index, - ndarray[int32_t, ndim=1] indexer): - pass - - # ----------------------------------------------------------------------------- # SparseArray mask create operations diff --git a/pandas/_libs/sparse_op_helper.pxi.in b/pandas/_libs/sparse_op_helper.pxi.in index d02a985de1d61..1f41096a3f194 100644 --- a/pandas/_libs/sparse_op_helper.pxi.in +++ b/pandas/_libs/sparse_op_helper.pxi.in @@ -4,9 +4,9 @@ Template for each `dtype` helper function for sparse ops WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in """ -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # Sparse op -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- ctypedef fused sparse_t: float64_t diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 9012ebefe0975..e346eb7e598ed 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import cython from cython import Py_ssize_t from cpython.datetime cimport (PyDateTime_Check, PyDate_Check, @@ -37,7 +38,8 @@ from tslibs.conversion cimport (tz_convert_single, _TSObject, get_datetime64_nanos, tz_convert_utc_to_tzlocal) -from tslibs.nattype import NaT, nat_strings, iNaT +# many modules still look for NaT and iNaT here despite them not being needed +from tslibs.nattype import nat_strings, NaT, iNaT # noqa:F821 from tslibs.nattype cimport checknull_with_nat, NPY_NAT from tslibs.offsets cimport to_offset @@ -71,6 +73,8 @@ cdef inline object create_time_from_ts( return time(dts.hour, dts.min, dts.sec, dts.us, tz) +@cython.wraparound(False) +@cython.boundscheck(False) def ints_to_pydatetime(int64_t[:] arr, tz=None, freq=None, box="datetime"): """ Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp @@ -213,6 +217,8 @@ def _test_parse_iso8601(object ts): return Timestamp(obj.value) +@cython.wraparound(False) +@cython.boundscheck(False) def format_array_from_datetime(ndarray[int64_t] values, object tz=None, object format=None, object na_rep=None): """ @@ -335,7 +341,7 @@ def array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): # then need to iterate try: iresult = values.astype('i8', casting='same_kind', copy=False) - mask = iresult == iNaT + mask = iresult == NPY_NAT iresult[mask] = 0 fvalues = iresult.astype('f8') * m need_to_iterate = False @@ -351,7 +357,7 @@ def array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): "'{unit}'".format(unit=unit)) result = (iresult * m).astype('M8[ns]') iresult = result.view('i8') - iresult[mask] = iNaT + iresult[mask] = NPY_NAT return result result = np.empty(n, dtype='M8[ns]') @@ -449,6 +455,8 @@ def array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): return oresult +@cython.wraparound(False) +@cython.boundscheck(False) cpdef array_to_datetime(ndarray[object] values, errors='raise', dayfirst=False, yearfirst=False, format=None, utc=None, @@ -752,6 +760,8 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', return array_to_datetime_object(values, is_raise, dayfirst, yearfirst) +@cython.wraparound(False) +@cython.boundscheck(False) cdef array_to_datetime_object(ndarray[object] values, bint is_raise, dayfirst=False, yearfirst=False): """ diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index e2914957d01cd..457f5003cb9a5 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import enum import warnings from cpython cimport (PyObject_RichCompareBool, PyObject_RichCompare, @@ -23,7 +24,6 @@ cimport ccalendar from conversion import tz_localize_to_utc, normalize_i8_timestamps from conversion cimport (tz_convert_single, _TSObject, convert_to_tsobject, convert_datetime_to_tsobject) -import enum from fields import get_start_end_field, get_date_name_field from nattype import NaT from nattype cimport NPY_NAT diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index bb7af67d14585..f517e0933264a 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -9,15 +9,15 @@ from libc.stdlib cimport malloc, free import numpy as np cimport numpy as cnp -from numpy cimport ndarray, double_t, int64_t, float64_t, float32_t +from numpy cimport ndarray, int64_t, float64_t, float32_t cnp.import_array() cdef extern from "src/headers/cmath" namespace "std": - bint isnan(double) nogil - bint notnan(double) nogil - int signbit(double) nogil - double sqrt(double x) nogil + bint isnan(float64_t) nogil + bint notnan(float64_t) nogil + int signbit(float64_t) nogil + float64_t sqrt(float64_t x) nogil cimport util from util cimport numeric @@ -32,7 +32,7 @@ cdef float64_t MINfloat64 = np.NINF cdef float32_t MAXfloat32 = np.inf cdef float64_t MAXfloat64 = np.inf -cdef double NaN = np.NaN +cdef float64_t NaN = np.NaN cdef inline int int_max(int a, int b): return a if a >= b else b cdef inline int int_min(int a, int b): return a if a <= b else b @@ -80,6 +80,7 @@ def _check_minp(win, minp, N, floor=None): return max(minp, floor) + # original C implementation by N. Devillard. # This code in public domain. # Function : kth_smallest() @@ -352,19 +353,20 @@ def get_window_indexer(values, win, minp, index, closed, right_closed, index, floor) return indexer.get_data() + # ---------------------------------------------------------------------- # Rolling count # this is only an impl for index not None, IOW, freq aware -def roll_count(ndarray[double_t] values, int64_t win, int64_t minp, +def roll_count(ndarray[float64_t] values, int64_t win, int64_t minp, object index, object closed): cdef: - double val, count_x = 0.0 + float64_t val, count_x = 0.0 int64_t s, e, nobs, N Py_ssize_t i, j ndarray[int64_t] start, end - ndarray[double_t] output + ndarray[float64_t] output start, end, N, win, minp, _ = get_window_indexer(values, win, minp, index, closed) @@ -406,12 +408,15 @@ def roll_count(ndarray[double_t] values, int64_t win, int64_t minp, return output + # ---------------------------------------------------------------------- # Rolling sum -cdef inline double calc_sum(int64_t minp, int64_t nobs, double sum_x) nogil: - cdef double result +cdef inline float64_t calc_sum(int64_t minp, int64_t nobs, + float64_t sum_x) nogil: + cdef: + float64_t result if nobs >= minp: result = sum_x @@ -421,7 +426,7 @@ cdef inline double calc_sum(int64_t minp, int64_t nobs, double sum_x) nogil: return result -cdef inline void add_sum(double val, int64_t *nobs, double *sum_x) nogil: +cdef inline void add_sum(float64_t val, int64_t *nobs, float64_t *sum_x) nogil: """ add a value from the sum calc """ # Not NaN @@ -430,7 +435,8 @@ cdef inline void add_sum(double val, int64_t *nobs, double *sum_x) nogil: sum_x[0] = sum_x[0] + val -cdef inline void remove_sum(double val, int64_t *nobs, double *sum_x) nogil: +cdef inline void remove_sum(float64_t val, + int64_t *nobs, float64_t *sum_x) nogil: """ remove a value from the sum calc """ if notnan(val): @@ -438,15 +444,15 @@ cdef inline void remove_sum(double val, int64_t *nobs, double *sum_x) nogil: sum_x[0] = sum_x[0] - val -def roll_sum(ndarray[double_t] values, int64_t win, int64_t minp, +def roll_sum(ndarray[float64_t] values, int64_t win, int64_t minp, object index, object closed): cdef: - double val, prev_x, sum_x = 0 + float64_t val, prev_x, sum_x = 0 int64_t s, e, range_endpoint int64_t nobs = 0, i, j, N bint is_variable ndarray[int64_t] start, end - ndarray[double_t] output + ndarray[float64_t] output start, end, N, win, minp, is_variable = get_window_indexer(values, win, minp, index, @@ -511,16 +517,18 @@ def roll_sum(ndarray[double_t] values, int64_t win, int64_t minp, return output + # ---------------------------------------------------------------------- # Rolling mean -cdef inline double calc_mean(int64_t minp, Py_ssize_t nobs, - Py_ssize_t neg_ct, double sum_x) nogil: - cdef double result +cdef inline float64_t calc_mean(int64_t minp, Py_ssize_t nobs, + Py_ssize_t neg_ct, float64_t sum_x) nogil: + cdef: + float64_t result if nobs >= minp: - result = sum_x / nobs + result = sum_x / nobs if neg_ct == 0 and result < 0: # all positive result = 0 @@ -534,7 +542,7 @@ cdef inline double calc_mean(int64_t minp, Py_ssize_t nobs, return result -cdef inline void add_mean(double val, Py_ssize_t *nobs, double *sum_x, +cdef inline void add_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x, Py_ssize_t *neg_ct) nogil: """ add a value from the mean calc """ @@ -546,7 +554,7 @@ cdef inline void add_mean(double val, Py_ssize_t *nobs, double *sum_x, neg_ct[0] = neg_ct[0] + 1 -cdef inline void remove_mean(double val, Py_ssize_t *nobs, double *sum_x, +cdef inline void remove_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x, Py_ssize_t *neg_ct) nogil: """ remove a value from the mean calc """ @@ -557,15 +565,15 @@ cdef inline void remove_mean(double val, Py_ssize_t *nobs, double *sum_x, neg_ct[0] = neg_ct[0] - 1 -def roll_mean(ndarray[double_t] values, int64_t win, int64_t minp, +def roll_mean(ndarray[float64_t] values, int64_t win, int64_t minp, object index, object closed): cdef: - double val, prev_x, result, sum_x = 0 + float64_t val, prev_x, result, sum_x = 0 int64_t s, e bint is_variable Py_ssize_t nobs = 0, i, j, neg_ct = 0, N ndarray[int64_t] start, end - ndarray[double_t] output + ndarray[float64_t] output start, end, N, win, minp, is_variable = get_window_indexer(values, win, minp, index, @@ -627,13 +635,15 @@ def roll_mean(ndarray[double_t] values, int64_t win, int64_t minp, return output + # ---------------------------------------------------------------------- # Rolling variance -cdef inline double calc_var(int64_t minp, int ddof, double nobs, - double ssqdm_x) nogil: - cdef double result +cdef inline float64_t calc_var(int64_t minp, int ddof, float64_t nobs, + float64_t ssqdm_x) nogil: + cdef: + float64_t result # Variance is unchanged if no observation is added or removed if (nobs >= minp) and (nobs > ddof): @@ -642,7 +652,7 @@ cdef inline double calc_var(int64_t minp, int ddof, double nobs, if nobs == 1: result = 0 else: - result = ssqdm_x / (nobs - ddof) + result = ssqdm_x / (nobs - ddof) if result < 0: result = 0 else: @@ -651,10 +661,12 @@ cdef inline double calc_var(int64_t minp, int ddof, double nobs, return result -cdef inline void add_var(double val, double *nobs, double *mean_x, - double *ssqdm_x) nogil: +cdef inline void add_var(float64_t val, float64_t *nobs, float64_t *mean_x, + float64_t *ssqdm_x) nogil: """ add a value from the var calc """ - cdef double delta + cdef: + float64_t delta + # `isnan` instead of equality as fix for GH-21813, msvc 2017 bug if isnan(val): return @@ -667,10 +679,11 @@ cdef inline void add_var(double val, double *nobs, double *mean_x, ssqdm_x[0] = ssqdm_x[0] + ((nobs[0] - 1) * delta ** 2) / nobs[0] -cdef inline void remove_var(double val, double *nobs, double *mean_x, - double *ssqdm_x) nogil: +cdef inline void remove_var(float64_t val, float64_t *nobs, float64_t *mean_x, + float64_t *ssqdm_x) nogil: """ remove a value from the var calc """ - cdef double delta + cdef: + float64_t delta if notnan(val): nobs[0] = nobs[0] - 1 @@ -685,18 +698,19 @@ cdef inline void remove_var(double val, double *nobs, double *mean_x, ssqdm_x[0] = 0 -def roll_var(ndarray[double_t] values, int64_t win, int64_t minp, +def roll_var(ndarray[float64_t] values, int64_t win, int64_t minp, object index, object closed, int ddof=1): """ Numerically stable implementation using Welford's method. """ cdef: - double val, prev, mean_x = 0, ssqdm_x = 0, nobs = 0, delta, mean_x_old + float64_t mean_x = 0, ssqdm_x = 0, nobs = 0, + float64_t val, prev, delta, mean_x_old int64_t s, e bint is_variable Py_ssize_t i, j, N ndarray[int64_t] start, end - ndarray[double_t] output + ndarray[float64_t] output start, end, N, win, minp, is_variable = get_window_indexer(values, win, minp, index, @@ -785,13 +799,15 @@ def roll_var(ndarray[double_t] values, int64_t win, int64_t minp, # ---------------------------------------------------------------------- # Rolling skewness -cdef inline double calc_skew(int64_t minp, int64_t nobs, double x, double xx, - double xxx) nogil: - cdef double result, dnobs - cdef double A, B, C, R +cdef inline float64_t calc_skew(int64_t minp, int64_t nobs, + float64_t x, float64_t xx, + float64_t xxx) nogil: + cdef: + float64_t result, dnobs + float64_t A, B, C, R if nobs >= minp: - dnobs = nobs + dnobs = nobs A = x / dnobs B = xx / dnobs - A * A C = xxx / dnobs - A * A * A - 3 * A * B @@ -817,8 +833,9 @@ cdef inline double calc_skew(int64_t minp, int64_t nobs, double x, double xx, return result -cdef inline void add_skew(double val, int64_t *nobs, double *x, double *xx, - double *xxx) nogil: +cdef inline void add_skew(float64_t val, int64_t *nobs, + float64_t *x, float64_t *xx, + float64_t *xxx) nogil: """ add a value from the skew calc """ # Not NaN @@ -831,8 +848,9 @@ cdef inline void add_skew(double val, int64_t *nobs, double *x, double *xx, xxx[0] = xxx[0] + val * val * val -cdef inline void remove_skew(double val, int64_t *nobs, double *x, double *xx, - double *xxx) nogil: +cdef inline void remove_skew(float64_t val, int64_t *nobs, + float64_t *x, float64_t *xx, + float64_t *xxx) nogil: """ remove a value from the skew calc """ # Not NaN @@ -845,16 +863,16 @@ cdef inline void remove_skew(double val, int64_t *nobs, double *x, double *xx, xxx[0] = xxx[0] - val * val * val -def roll_skew(ndarray[double_t] values, int64_t win, int64_t minp, +def roll_skew(ndarray[float64_t] values, int64_t win, int64_t minp, object index, object closed): cdef: - double val, prev - double x = 0, xx = 0, xxx = 0 + float64_t val, prev + float64_t x = 0, xx = 0, xxx = 0 int64_t nobs = 0, i, j, N int64_t s, e bint is_variable ndarray[int64_t] start, end - ndarray[double_t] output + ndarray[float64_t] output start, end, N, win, minp, is_variable = get_window_indexer(values, win, minp, index, @@ -915,17 +933,20 @@ def roll_skew(ndarray[double_t] values, int64_t win, int64_t minp, return output + # ---------------------------------------------------------------------- # Rolling kurtosis -cdef inline double calc_kurt(int64_t minp, int64_t nobs, double x, double xx, - double xxx, double xxxx) nogil: - cdef double result, dnobs - cdef double A, B, C, D, R, K +cdef inline float64_t calc_kurt(int64_t minp, int64_t nobs, + float64_t x, float64_t xx, + float64_t xxx, float64_t xxxx) nogil: + cdef: + float64_t result, dnobs + float64_t A, B, C, D, R, K if nobs >= minp: - dnobs = nobs + dnobs = nobs A = x / dnobs R = A * A B = xx / dnobs - R @@ -954,8 +975,9 @@ cdef inline double calc_kurt(int64_t minp, int64_t nobs, double x, double xx, return result -cdef inline void add_kurt(double val, int64_t *nobs, double *x, double *xx, - double *xxx, double *xxxx) nogil: +cdef inline void add_kurt(float64_t val, int64_t *nobs, + float64_t *x, float64_t *xx, + float64_t *xxx, float64_t *xxxx) nogil: """ add a value from the kurotic calc """ # Not NaN @@ -969,8 +991,9 @@ cdef inline void add_kurt(double val, int64_t *nobs, double *x, double *xx, xxxx[0] = xxxx[0] + val * val * val * val -cdef inline void remove_kurt(double val, int64_t *nobs, double *x, double *xx, - double *xxx, double *xxxx) nogil: +cdef inline void remove_kurt(float64_t val, int64_t *nobs, + float64_t *x, float64_t *xx, + float64_t *xxx, float64_t *xxxx) nogil: """ remove a value from the kurotic calc """ # Not NaN @@ -984,16 +1007,16 @@ cdef inline void remove_kurt(double val, int64_t *nobs, double *x, double *xx, xxxx[0] = xxxx[0] - val * val * val * val -def roll_kurt(ndarray[double_t] values, int64_t win, int64_t minp, +def roll_kurt(ndarray[float64_t] values, int64_t win, int64_t minp, object index, object closed): cdef: - double val, prev - double x = 0, xx = 0, xxx = 0, xxxx = 0 + float64_t val, prev + float64_t x = 0, xx = 0, xxx = 0, xxxx = 0 int64_t nobs = 0, i, j, N int64_t s, e bint is_variable ndarray[int64_t] start, end - ndarray[double_t] output + ndarray[float64_t] output start, end, N, win, minp, is_variable = get_window_indexer(values, win, minp, index, @@ -1050,6 +1073,7 @@ def roll_kurt(ndarray[double_t] values, int64_t win, int64_t minp, return output + # ---------------------------------------------------------------------- # Rolling median, min, max @@ -1057,7 +1081,7 @@ def roll_kurt(ndarray[double_t] values, int64_t win, int64_t minp, def roll_median_c(ndarray[float64_t] values, int64_t win, int64_t minp, object index, object closed): cdef: - double val, res, prev + float64_t val, res, prev bint err = 0, is_variable int ret = 0 skiplist_t *sl @@ -1065,7 +1089,7 @@ def roll_median_c(ndarray[float64_t] values, int64_t win, int64_t minp, int64_t nobs = 0, N, s, e int midpoint ndarray[int64_t] start, end - ndarray[double_t] output + ndarray[float64_t] output # we use the Fixed/Variable Indexer here as the # actual skiplist ops outweigh any window computation costs @@ -1130,6 +1154,7 @@ def roll_median_c(ndarray[float64_t] values, int64_t win, int64_t minp, raise MemoryError("skiplist_insert failed") return output + # ---------------------------------------------------------------------- # Moving maximum / minimum code taken from Bottleneck under the terms @@ -1167,7 +1192,8 @@ cdef inline void remove_mm(numeric aold, Py_ssize_t *nobs) nogil: cdef inline numeric calc_mm(int64_t minp, Py_ssize_t nobs, numeric value) nogil: - cdef numeric result + cdef: + numeric result if numeric in cython.floating: if nobs >= minp: @@ -1252,7 +1278,7 @@ cdef _roll_min_max_variable(ndarray[numeric] values, Py_ssize_t nobs = 0 deque Q[int64_t] # min/max always the front deque W[int64_t] # track the whole window for nobs compute - ndarray[double_t, ndim=1] output + ndarray[float64_t, ndim=1] output output = np.empty(N, dtype=float) Q = deque[int64_t]() @@ -1335,7 +1361,7 @@ cdef _roll_min_max_fixed(ndarray[numeric] values, numeric* minvalue numeric* end numeric* last - ndarray[double_t, ndim=1] output + ndarray[float64_t, ndim=1] output output = np.empty(N, dtype=float) # setup the rings of death! @@ -1427,19 +1453,19 @@ interpolation_types = { def roll_quantile(ndarray[float64_t, cast=True] values, int64_t win, int64_t minp, object index, object closed, - double quantile, str interpolation): + float64_t quantile, str interpolation): """ O(N log(window)) implementation using skip list """ cdef: - double val, prev, midpoint, idx_with_fraction + float64_t val, prev, midpoint, idx_with_fraction skiplist_t *skiplist int64_t nobs = 0, i, j, s, e, N Py_ssize_t idx bint is_variable ndarray[int64_t] start, end - ndarray[double_t] output - double vlow, vhigh + ndarray[float64_t] output + float64_t vlow, vhigh InterpolationType interpolation_type int ret = 0 @@ -1529,7 +1555,7 @@ def roll_quantile(ndarray[float64_t, cast=True] values, int64_t win, elif interpolation_type == MIDPOINT: vlow = skiplist_get(skiplist, idx, &ret) vhigh = skiplist_get(skiplist, idx + 1, &ret) - output[i] = (vlow + vhigh) / 2 + output[i] = (vlow + vhigh) / 2 else: output[i] = NaN @@ -1543,7 +1569,7 @@ def roll_generic(object obj, int offset, object func, bint raw, object args, object kwargs): cdef: - ndarray[double_t] output, counts, bufarr + ndarray[float64_t] output, counts, bufarr ndarray[float64_t, cast=True] arr float64_t *buf float64_t *oldbuf @@ -1642,7 +1668,7 @@ def roll_window(ndarray[float64_t, ndim=1, cast=True] values, Assume len(weights) << len(values) """ cdef: - ndarray[double_t] output, tot_wgt, counts + ndarray[float64_t] output, tot_wgt, counts Py_ssize_t in_i, win_i, win_n, win_k, in_n, in_k float64_t val_in, val_win, c, w @@ -1703,7 +1729,8 @@ def roll_window(ndarray[float64_t, ndim=1, cast=True] values, # Exponentially weighted moving average -def ewma(double_t[:] vals, double_t com, int adjust, int ignore_na, int minp): +def ewma(float64_t[:] vals, float64_t com, + int adjust, int ignore_na, int minp): """ Compute exponentially-weighted moving average using center-of-mass. @@ -1722,8 +1749,8 @@ def ewma(double_t[:] vals, double_t com, int adjust, int ignore_na, int minp): cdef: Py_ssize_t N = len(vals) - ndarray[double_t] output = np.empty(N, dtype=float) - double alpha, old_wt_factor, new_wt, weighted_avg, old_wt, cur + ndarray[float64_t] output = np.empty(N, dtype=float) + float64_t alpha, old_wt_factor, new_wt, weighted_avg, old_wt, cur Py_ssize_t i, nobs if N == 0: @@ -1767,12 +1794,13 @@ def ewma(double_t[:] vals, double_t com, int adjust, int ignore_na, int minp): return output + # ---------------------------------------------------------------------- # Exponentially weighted moving covariance -def ewmcov(double_t[:] input_x, double_t[:] input_y, - double_t com, int adjust, int ignore_na, int minp, int bias): +def ewmcov(float64_t[:] input_x, float64_t[:] input_y, + float64_t com, int adjust, int ignore_na, int minp, int bias): """ Compute exponentially-weighted moving variance using center-of-mass. @@ -1793,10 +1821,10 @@ def ewmcov(double_t[:] input_x, double_t[:] input_y, cdef: Py_ssize_t N = len(input_x) - double alpha, old_wt_factor, new_wt, mean_x, mean_y, cov - double sum_wt, sum_wt2, old_wt, cur_x, cur_y, old_mean_x, old_mean_y + float64_t alpha, old_wt_factor, new_wt, mean_x, mean_y, cov + float64_t sum_wt, sum_wt2, old_wt, cur_x, cur_y, old_mean_x, old_mean_y Py_ssize_t i, nobs - ndarray[double_t] output + ndarray[float64_t] output if len(input_y) != N: raise ValueError("arrays are of different lengths " diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index 04d7f4d498c2b..c15696705ab82 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -3,7 +3,6 @@ import warnings import numpy as np -from numpy import nan import pytest from pandas._libs.sparse import IntIndex @@ -24,7 +23,8 @@ def kind(request): class TestSparseArray(object): def setup_method(self, method): - self.arr_data = np.array([nan, nan, 1, 2, 3, nan, 4, 5, nan, 6]) + self.arr_data = np.array([np.nan, np.nan, 1, 2, 3, + np.nan, 4, 5, np.nan, 6]) self.arr = SparseArray(self.arr_data) self.zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0) diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 89d45639f3e03..bbe4914b5f447 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -6,7 +6,6 @@ import pytest -from numpy import nan import numpy as np from pandas.compat import range @@ -328,7 +327,7 @@ def test_combineFrame(self): frame_copy = self.frame.reindex(self.frame.index[::2]) del frame_copy['D'] - frame_copy['C'][:5] = nan + frame_copy['C'][:5] = np.nan added = self.frame + frame_copy diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 668613c494a47..01dee47fffe49 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -7,7 +7,6 @@ import sys import textwrap -from numpy import nan import numpy as np import pytest @@ -49,8 +48,8 @@ def test_repr_mixed_big(self): biggie = DataFrame({'A': np.random.randn(200), 'B': tm.makeStringIndex(200)}, index=lrange(200)) - biggie.loc[:20, 'A'] = nan - biggie.loc[:20, 'B'] = nan + biggie.loc[:20, 'A'] = np.nan + biggie.loc[:20, 'B'] = np.nan foo = repr(biggie) # noqa diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 5794630e72419..4f04169d08206 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -6,7 +6,6 @@ import pytest -from numpy import nan from numpy.random import randn import numpy as np @@ -516,8 +515,8 @@ def test_first_last_valid(self, data, idx, expected_first, expected_last): N = len(self.frame.index) mat = randn(N) - mat[:5] = nan - mat[-5:] = nan + mat[:5] = np.nan + mat[-5:] = np.nan frame = DataFrame({'foo': mat}, index=self.frame.index) index = frame.first_valid_index() @@ -533,7 +532,7 @@ def test_first_last_valid(self, data, idx, assert empty.first_valid_index() is None # GH17400: no valid entries - frame[:] = nan + frame[:] = np.nan assert frame.last_valid_index() is None assert frame.first_valid_index() is None diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index b56375d0a8670..cd43cfe34d80b 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -6,7 +6,6 @@ import csv import pytest -from numpy import nan import numpy as np from pandas.compat import (lmap, range, lrange, StringIO, u) @@ -52,7 +51,7 @@ def test_from_csv_deprecation(self): def test_to_csv_from_csv1(self): with ensure_clean('__tmp_to_csv_from_csv1__') as path: - self.frame['A'][:5] = nan + self.frame['A'][:5] = np.nan self.frame.to_csv(path) self.frame.to_csv(path, columns=['A', 'B']) diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py index d0e1f04238366..a0cc653a28b06 100644 --- a/pandas/tests/groupby/aggregate/test_cython.py +++ b/pandas/tests/groupby/aggregate/test_cython.py @@ -9,7 +9,6 @@ import pytest import numpy as np -from numpy import nan import pandas as pd from pandas import (bdate_range, DataFrame, Index, Series, Timestamp, @@ -36,11 +35,11 @@ 'max', ]) def test_cythonized_aggers(op_name): - data = {'A': [0, 0, 0, 0, 1, 1, 1, 1, 1, 1., nan, nan], + data = {'A': [0, 0, 0, 0, 1, 1, 1, 1, 1, 1., np.nan, np.nan], 'B': ['A', 'B'] * 6, 'C': np.random.randn(12)} df = DataFrame(data) - df.loc[2:10:2, 'C'] = nan + df.loc[2:10:2, 'C'] = np.nan op = lambda x: getattr(x, op_name)() diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 77e43a346c824..4cce26d135443 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -5,7 +5,6 @@ import operator import numpy as np -from numpy import nan import pytest import pandas.compat as compat @@ -750,12 +749,12 @@ def _check_fill(meth, op, a, b, fill_value=0): with np.errstate(all='ignore'): if amask[i]: if bmask[i]: - exp_values.append(nan) + exp_values.append(np.nan) continue exp_values.append(op(fill_value, b[i])) elif bmask[i]: if amask[i]: - exp_values.append(nan) + exp_values.append(np.nan) continue exp_values.append(op(a[i], fill_value)) else: @@ -765,8 +764,8 @@ def _check_fill(meth, op, a, b, fill_value=0): expected = Series(exp_values, exp_index) assert_series_equal(result, expected) - a = Series([nan, 1., 2., 3., nan], index=np.arange(5)) - b = Series([nan, 1, nan, 3, nan, 4.], index=np.arange(6)) + a = Series([np.nan, 1., 2., 3., np.nan], index=np.arange(5)) + b = Series([np.nan, 1, np.nan, 3, np.nan, 4.], index=np.arange(6)) result = op(a, b) exp = equiv_op(a, b) From 9977a087bbbbc25cd3af019c1a139ad1565126b7 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sun, 11 Nov 2018 09:22:06 -0600 Subject: [PATCH 089/122] API: DataFrame.__getitem__ returns Series for sparse column (#23561) closes https://github.com/pandas-dev/pandas/issues/23559 --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/dtypes/concat.py | 21 ------------------ pandas/core/frame.py | 3 +-- pandas/tests/frame/test_indexing.py | 27 ++++++++++++++++++----- pandas/tests/sparse/series/test_series.py | 5 ----- 5 files changed, 23 insertions(+), 34 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 3dcaef302d564..be795b024c329 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -562,6 +562,7 @@ changes were made: - The result of concatenating a mix of sparse and dense Series is a Series with sparse values, rather than a ``SparseSeries``. - ``SparseDataFrame.combine`` and ``DataFrame.combine_first`` no longer supports combining a sparse column with a dense column while preserving the sparse subtype. The result will be an object-dtype SparseArray. - Setting :attr:`SparseArray.fill_value` to a fill value with a different dtype is now allowed. +- ``DataFrame[column]`` is now a :class:`Series` with sparse values, rather than a :class:`SparseSeries`, when slicing a single column with sparse values (:issue:`23559`). Some new warnings are issued for operations that require or are likely to materialize a large dense array: diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index b2999c112e8ab..bb4ab823069ee 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -101,27 +101,6 @@ def _get_frame_result_type(result, objs): ABCSparseDataFrame)) -def _get_sliced_frame_result_type(data, obj): - """ - return appropriate class of Series. When data is sparse - it will return a SparseSeries, otherwise it will return - the Series. - - Parameters - ---------- - data : array-like - obj : DataFrame - - Returns - ------- - Series or SparseSeries - """ - if is_sparse(data): - from pandas.core.sparse.api import SparseSeries - return SparseSeries - return obj._constructor_sliced - - def _concat_compat(to_concat, axis=0): """ provide concatenation of an array of arrays each of which is a single diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6b29725ba2bea..7153f5c2e7007 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -72,7 +72,6 @@ is_iterator, is_sequence, is_named_tuple) -from pandas.core.dtypes.concat import _get_sliced_frame_result_type from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass, ABCMultiIndex from pandas.core.dtypes.missing import isna, notna @@ -3241,7 +3240,7 @@ def _box_item_values(self, key, values): def _box_col_values(self, values, items): """ provide boxed values for a column """ - klass = _get_sliced_frame_result_type(values, self) + klass = self._constructor_sliced return klass(values, index=self.index, name=items, fastpath=True) def __setitem__(self, key, value): diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index b0e7fe2e25a6c..78aa853f68459 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -2277,19 +2277,34 @@ def test_getitem_ix_float_duplicates(self): expect = df.iloc[[1, -1], 0] assert_series_equal(df.loc[0.2, 'a'], expect) + def test_getitem_sparse_column(self): + # https://github.com/pandas-dev/pandas/issues/23559 + data = pd.SparseArray([0, 1]) + df = pd.DataFrame({"A": data}) + expected = pd.Series(data, name="A") + result = df['A'] + tm.assert_series_equal(result, expected) + + result = df.iloc[:, 0] + tm.assert_series_equal(result, expected) + + result = df.loc[:, 'A'] + tm.assert_series_equal(result, expected) + def test_setitem_with_sparse_value(self): # GH8131 df = pd.DataFrame({'c_1': ['a', 'b', 'c'], 'n_1': [1., 2., 3.]}) - sp_series = pd.Series([0, 0, 1]).to_sparse(fill_value=0) - df['new_column'] = sp_series - assert_series_equal(df['new_column'], sp_series, check_names=False) + sp_array = pd.SparseArray([0, 0, 1]) + df['new_column'] = sp_array + assert_series_equal(df['new_column'], + pd.Series(sp_array, name='new_column'), + check_names=False) def test_setitem_with_unaligned_sparse_value(self): df = pd.DataFrame({'c_1': ['a', 'b', 'c'], 'n_1': [1., 2., 3.]}) - sp_series = (pd.Series([0, 0, 1], index=[2, 1, 0]) - .to_sparse(fill_value=0)) + sp_series = pd.Series(pd.SparseArray([0, 0, 1]), index=[2, 1, 0]) df['new_column'] = sp_series - exp = pd.SparseSeries([1, 0, 0], name='new_column') + exp = pd.Series(pd.SparseArray([1, 0, 0]), name='new_column') assert_series_equal(df['new_column'], exp) def test_setitem_with_unaligned_tz_aware_datetime_column(self): diff --git a/pandas/tests/sparse/series/test_series.py b/pandas/tests/sparse/series/test_series.py index 9c7dbd85edcbb..fd5dbcd932993 100644 --- a/pandas/tests/sparse/series/test_series.py +++ b/pandas/tests/sparse/series/test_series.py @@ -160,11 +160,6 @@ def test_construct_DataFrame_with_sp_series(self): df.dtypes str(df) - tm.assert_sp_series_equal(df['col'], self.bseries, check_names=False) - - result = df.iloc[:, 0] - tm.assert_sp_series_equal(result, self.bseries, check_names=False) - # blocking expected = Series({'col': 'float64:sparse'}) result = df.ftypes From 1d861de3e90c8c590d7d4bc6b065cdd50119d72e Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 11 Nov 2018 07:40:42 -0800 Subject: [PATCH 090/122] BUG: Delegate more of Excel parsing to CSV (#23544) The idea is that we read the Excel file, get the data, and then let the TextParser handle the reading and parsing. We shouldn't be doing a lot of work that is already defined in parsers.py In doing so, we identified several bugs: * index_col=None was not being respected * usecols behavior was inconsistent with that of read_csv for list of strings and callable inputs * usecols was not being validated as proper Excel column names when passed as a string. Closes gh-18273. Closes gh-20480. --- doc/source/io.rst | 29 +- doc/source/whatsnew/v0.24.0.txt | 3 + pandas/io/excel.py | 194 ++++--- pandas/tests/io/test_excel.py | 955 +++++++++++++++++--------------- 4 files changed, 670 insertions(+), 511 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 5d29e349e2898..beb1c1daba962 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2861,7 +2861,13 @@ to be parsed. read_excel('path_to_file.xls', 'Sheet1', usecols=2) -If `usecols` is a list of integers, then it is assumed to be the file column +You can also specify a comma-delimited set of Excel columns and ranges as a string: + +.. code-block:: python + + read_excel('path_to_file.xls', 'Sheet1', usecols='A,C:E') + +If ``usecols`` is a list of integers, then it is assumed to be the file column indices to be parsed. .. code-block:: python @@ -2870,6 +2876,27 @@ indices to be parsed. Element order is ignored, so ``usecols=[0, 1]`` is the same as ``[1, 0]``. +.. versionadded:: 0.24 + +If ``usecols`` is a list of strings, it is assumed that each string corresponds +to a column name provided either by the user in ``names`` or inferred from the +document header row(s). Those strings define which columns will be parsed: + +.. code-block:: python + + read_excel('path_to_file.xls', 'Sheet1', usecols=['foo', 'bar']) + +Element order is ignored, so ``usecols=['baz', 'joe']`` is the same as ``['joe', 'baz']``. + +.. versionadded:: 0.24 + +If ``usecols`` is callable, the callable function will be evaluated against +the column names, returning names where the callable function evaluates to ``True``. + +.. code-block:: python + + read_excel('path_to_file.xls', 'Sheet1', usecols=lambda x: x.isalpha()) + Parsing Dates +++++++++++++ diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index be795b024c329..1a5e4144b842b 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -238,6 +238,7 @@ Other Enhancements - Added :meth:`Interval.overlaps`, :meth:`IntervalArray.overlaps`, and :meth:`IntervalIndex.overlaps` for determining overlaps between interval-like objects (:issue:`21998`) - :func:`~DataFrame.to_parquet` now supports writing a ``DataFrame`` as a directory of parquet files partitioned by a subset of the columns when ``engine = 'pyarrow'`` (:issue:`23283`) - :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have gained the ``nonexistent`` argument for alternative handling of nonexistent times. See :ref:`timeseries.timezone_nonexsistent` (:issue:`8917`) +- :meth:`read_excel()` now accepts ``usecols`` as a list of column names or callable (:issue:`18273`) .. _whatsnew_0240.api_breaking: @@ -1302,6 +1303,8 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form - Bug in :func:`to_string()` that caused representations of :class:`DataFrame` to not take up the whole window (:issue:`22984`) - Bug in :meth:`read_csv()` in which :class:`MultiIndex` index names were being improperly handled in the cases when they were not provided (:issue:`23484`) - Bug in :meth:`read_html()` in which the error message was not displaying the valid flavors when an invalid one was provided (:issue:`23549`) +- Bug in :meth:`read_excel()` in which ``index_col=None`` was not being respected and parsing index columns anyway (:issue:`20480`) +- Bug in :meth:`read_excel()` in which ``usecols`` was not being validated for proper column names when passed in as a string (:issue:`20480`) Plotting ^^^^^^^^ diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 7a7b801f4ba4a..2e93c237bb7ea 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -17,8 +17,7 @@ import pandas._libs.json as json import pandas.compat as compat from pandas.compat import ( - OrderedDict, add_metaclass, lrange, map, range, reduce, string_types, u, - zip) + OrderedDict, add_metaclass, lrange, map, range, string_types, u, zip) from pandas.errors import EmptyDataError from pandas.util._decorators import Appender, deprecate_kwarg @@ -93,13 +92,22 @@ .. deprecated:: 0.21.0 Pass in `usecols` instead. -usecols : int or list, default None - * If None then parse all columns, - * If int then indicates last column to be parsed - * If list of ints then indicates list of column numbers to be parsed - * If string then indicates comma separated list of Excel column letters and - column ranges (e.g. "A:E" or "A,C,E:F"). Ranges are inclusive of +usecols : int, str, list-like, or callable default None + * If None, then parse all columns, + * If int, then indicates last column to be parsed + * If string, then indicates comma separated list of Excel column letters + and column ranges (e.g. "A:E" or "A,C,E:F"). Ranges are inclusive of both sides. + * If list of ints, then indicates list of column numbers to be parsed. + * If list of strings, then indicates list of column names to be parsed. + + .. versionadded:: 0.24.0 + + * If callable, then evaluate each column name against it and parse the + column if the callable returns ``True``. + + .. versionadded:: 0.24.0 + squeeze : boolean, default False If the parsed data only contains one column then return a Series dtype : Type name or dict of column -> type, default None @@ -466,39 +474,6 @@ def parse(self, convert_float=convert_float, **kwds) - def _should_parse(self, i, usecols): - - def _range2cols(areas): - """ - Convert comma separated list of column names and column ranges to a - list of 0-based column indexes. - - >>> _range2cols('A:E') - [0, 1, 2, 3, 4] - >>> _range2cols('A,C,Z:AB') - [0, 2, 25, 26, 27] - """ - def _excel2num(x): - "Convert Excel column name like 'AB' to 0-based column index" - return reduce(lambda s, a: s * 26 + ord(a) - ord('A') + 1, - x.upper().strip(), 0) - 1 - - cols = [] - for rng in areas.split(','): - if ':' in rng: - rng = rng.split(':') - cols += lrange(_excel2num(rng[0]), _excel2num(rng[1]) + 1) - else: - cols.append(_excel2num(rng)) - return cols - - if isinstance(usecols, int): - return i <= usecols - elif isinstance(usecols, compat.string_types): - return i in _range2cols(usecols) - else: - return i in usecols - def _parse_excel(self, sheet_name=0, header=0, @@ -527,10 +502,6 @@ def _parse_excel(self, raise NotImplementedError("chunksize keyword of read_excel " "is not implemented") - if parse_dates is True and index_col is None: - warnings.warn("The 'parse_dates=True' keyword of read_excel was " - "provided without an 'index_col' keyword value.") - import xlrd from xlrd import (xldate, XL_CELL_DATE, XL_CELL_ERROR, XL_CELL_BOOLEAN, @@ -620,17 +591,13 @@ def _parse_cell(cell_contents, cell_typ): sheet = self.book.sheet_by_index(asheetname) data = [] - should_parse = {} + usecols = _maybe_convert_usecols(usecols) for i in range(sheet.nrows): row = [] for j, (value, typ) in enumerate(zip(sheet.row_values(i), sheet.row_types(i))): - if usecols is not None and j not in should_parse: - should_parse[j] = self._should_parse(j, usecols) - - if usecols is None or should_parse[j]: - row.append(_parse_cell(value, typ)) + row.append(_parse_cell(value, typ)) data.append(row) if sheet.nrows == 0: @@ -642,24 +609,22 @@ def _parse_cell(cell_contents, cell_typ): # forward fill and pull out names for MultiIndex column header_names = None - if header is not None: - if is_list_like(header): - header_names = [] - control_row = [True] * len(data[0]) - for row in header: - if is_integer(skiprows): - row += skiprows - - data[row], control_row = _fill_mi_header( - data[row], control_row) - header_name, data[row] = _pop_header_name( - data[row], index_col) - header_names.append(header_name) - else: - data[header] = _trim_excel_header(data[header]) + if header is not None and is_list_like(header): + header_names = [] + control_row = [True] * len(data[0]) + + for row in header: + if is_integer(skiprows): + row += skiprows + + data[row], control_row = _fill_mi_header( + data[row], control_row) + header_name, _ = _pop_header_name( + data[row], index_col) + header_names.append(header_name) if is_list_like(index_col): - # forward fill values for MultiIndex index + # Forward fill values for MultiIndex index. if not is_list_like(header): offset = 1 + header else: @@ -667,6 +632,7 @@ def _parse_cell(cell_contents, cell_typ): for col in index_col: last = data[offset][col] + for row in range(offset + 1, len(data)): if data[row][col] == '' or data[row][col] is None: data[row][col] = last @@ -693,11 +659,14 @@ def _parse_cell(cell_contents, cell_typ): thousands=thousands, comment=comment, skipfooter=skipfooter, + usecols=usecols, **kwds) output[asheetname] = parser.read(nrows=nrows) + if names is not None: output[asheetname].columns = names + if not squeeze or isinstance(output[asheetname], DataFrame): output[asheetname].columns = output[ asheetname].columns.set_names(header_names) @@ -726,6 +695,97 @@ def __exit__(self, exc_type, exc_value, traceback): self.close() +def _excel2num(x): + """ + Convert Excel column name like 'AB' to 0-based column index. + + Parameters + ---------- + x : str + The Excel column name to convert to a 0-based column index. + + Returns + ------- + num : int + The column index corresponding to the name. + + Raises + ------ + ValueError + Part of the Excel column name was invalid. + """ + index = 0 + + for c in x.upper().strip(): + cp = ord(c) + + if cp < ord("A") or cp > ord("Z"): + raise ValueError("Invalid column name: {x}".format(x=x)) + + index = index * 26 + cp - ord("A") + 1 + + return index - 1 + + +def _range2cols(areas): + """ + Convert comma separated list of column names and ranges to indices. + + Parameters + ---------- + areas : str + A string containing a sequence of column ranges (or areas). + + Returns + ------- + cols : list + A list of 0-based column indices. + + Examples + -------- + >>> _range2cols('A:E') + [0, 1, 2, 3, 4] + >>> _range2cols('A,C,Z:AB') + [0, 2, 25, 26, 27] + """ + cols = [] + + for rng in areas.split(","): + if ":" in rng: + rng = rng.split(":") + cols.extend(lrange(_excel2num(rng[0]), _excel2num(rng[1]) + 1)) + else: + cols.append(_excel2num(rng)) + + return cols + + +def _maybe_convert_usecols(usecols): + """ + Convert `usecols` into a compatible format for parsing in `parsers.py`. + + Parameters + ---------- + usecols : object + The use-columns object to potentially convert. + + Returns + ------- + converted : object + The compatible format of `usecols`. + """ + if usecols is None: + return usecols + + if is_integer(usecols): + return lrange(usecols + 1) + + if isinstance(usecols, compat.string_types): + return _range2cols(usecols) + + return usecols + + def _validate_freeze_panes(freeze_panes): if freeze_panes is not None: if ( diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 4bff39f8c7efc..49a3a3d58672d 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -179,6 +179,65 @@ def test_usecols_str(self, ext): tm.assert_frame_equal(df2, df1, check_names=False) tm.assert_frame_equal(df3, df1, check_names=False) + @pytest.mark.parametrize("usecols", [ + [0, 1, 3], [0, 3, 1], + [1, 0, 3], [1, 3, 0], + [3, 0, 1], [3, 1, 0], + ]) + def test_usecols_diff_positional_int_columns_order(self, ext, usecols): + expected = self.get_csv_refdf("test1")[["A", "C"]] + result = self.get_exceldf("test1", ext, "Sheet1", + index_col=0, usecols=usecols) + tm.assert_frame_equal(result, expected, check_names=False) + + @pytest.mark.parametrize("usecols", [ + ["B", "D"], ["D", "B"] + ]) + def test_usecols_diff_positional_str_columns_order(self, ext, usecols): + expected = self.get_csv_refdf("test1")[["B", "D"]] + expected.index = range(len(expected)) + + result = self.get_exceldf("test1", ext, "Sheet1", usecols=usecols) + tm.assert_frame_equal(result, expected, check_names=False) + + def test_read_excel_without_slicing(self, ext): + expected = self.get_csv_refdf("test1") + result = self.get_exceldf("test1", ext, "Sheet1", index_col=0) + tm.assert_frame_equal(result, expected, check_names=False) + + def test_usecols_excel_range_str(self, ext): + expected = self.get_csv_refdf("test1")[["C", "D"]] + result = self.get_exceldf("test1", ext, "Sheet1", + index_col=0, usecols="A,D:E") + tm.assert_frame_equal(result, expected, check_names=False) + + def test_usecols_excel_range_str_invalid(self, ext): + msg = "Invalid column name: E1" + + with pytest.raises(ValueError, match=msg): + self.get_exceldf("test1", ext, "Sheet1", usecols="D:E1") + + def test_index_col_label_error(self, ext): + msg = "list indices must be integers.*, not str" + + with pytest.raises(TypeError, match=msg): + self.get_exceldf("test1", ext, "Sheet1", index_col=["A"], + usecols=["A", "C"]) + + def test_usecols_pass_non_existent_column(self, ext): + msg = ("Usecols do not match columns, " + "columns expected but not found: " + r"\['E'\]") + + with pytest.raises(ValueError, match=msg): + self.get_exceldf("test1", ext, usecols=["E"]) + + def test_usecols_wrong_type(self, ext): + msg = ("'usecols' must either be list-like of " + "all strings, all unicode, all integers or a callable.") + + with pytest.raises(ValueError, match=msg): + self.get_exceldf("test1", ext, usecols=["E1", 0]) + def test_excel_stop_iterator(self, ext): parsed = self.get_exceldf('test2', ext, 'Sheet1') @@ -446,63 +505,48 @@ def test_read_excel_blank_with_header(self, ext): actual = self.get_exceldf('blank_with_header', ext, 'Sheet1') tm.assert_frame_equal(actual, expected) - @td.skip_if_no('openpyxl') - @td.skip_if_no('xlwt') - # GH 12292 : error when read one empty column from excel file - def test_read_one_empty_col_no_header(self, ext): + @td.skip_if_no("xlwt") + @td.skip_if_no("openpyxl") + @pytest.mark.parametrize("header,expected", [ + (None, DataFrame([np.nan] * 4)), + (0, DataFrame({"Unnamed: 0": [np.nan] * 3})) + ]) + def test_read_one_empty_col_no_header(self, ext, header, expected): + # xref gh-12292 + filename = "no_header" df = pd.DataFrame( [["", 1, 100], ["", 2, 200], ["", 3, 300], ["", 4, 400]] ) + with ensure_clean(ext) as path: - df.to_excel(path, 'no_header', index=False, header=False) - actual_header_none = read_excel( - path, - 'no_header', - usecols=[0], - header=None - ) - - actual_header_zero = read_excel( - path, - 'no_header', - usecols=[0], - header=0 - ) - expected = DataFrame() - tm.assert_frame_equal(actual_header_none, expected) - tm.assert_frame_equal(actual_header_zero, expected) + df.to_excel(path, filename, index=False, header=False) + result = read_excel(path, filename, usecols=[0], header=header) - @td.skip_if_no('openpyxl') - @td.skip_if_no('xlwt') - def test_read_one_empty_col_with_header(self, ext): + tm.assert_frame_equal(result, expected) + + @td.skip_if_no("xlwt") + @td.skip_if_no("openpyxl") + @pytest.mark.parametrize("header,expected", [ + (None, DataFrame([0] + [np.nan] * 4)), + (0, DataFrame([np.nan] * 4)) + ]) + def test_read_one_empty_col_with_header(self, ext, header, expected): + filename = "with_header" df = pd.DataFrame( [["", 1, 100], ["", 2, 200], ["", 3, 300], ["", 4, 400]] ) + with ensure_clean(ext) as path: df.to_excel(path, 'with_header', index=False, header=True) - actual_header_none = read_excel( - path, - 'with_header', - usecols=[0], - header=None - ) - - actual_header_zero = read_excel( - path, - 'with_header', - usecols=[0], - header=0 - ) - expected_header_none = DataFrame(pd.Series([0], dtype='int64')) - tm.assert_frame_equal(actual_header_none, expected_header_none) - expected_header_zero = DataFrame(columns=[0]) - tm.assert_frame_equal(actual_header_zero, expected_header_zero) + result = read_excel(path, filename, usecols=[0], header=header) + + tm.assert_frame_equal(result, expected) @td.skip_if_no('openpyxl') @td.skip_if_no('xlwt') @@ -539,29 +583,33 @@ def test_date_conversion_overflow(self, ext): result = self.get_exceldf('testdateoverflow', ext) tm.assert_frame_equal(result, expected) - @td.skip_if_no('xlrd', '1.0.1') # GH-22682 + @td.skip_if_no("xlrd", "1.0.1") # see gh-22682 def test_sheet_name_and_sheetname(self, ext): - # GH10559: Minor improvement: Change "sheet_name" to "sheetname" - # GH10969: DOC: Consistent var names (sheetname vs sheet_name) - # GH12604: CLN GH10559 Rename sheetname variable to sheet_name - # GH20920: ExcelFile.parse() and pd.read_xlsx() have different - # behavior for "sheetname" argument - dfref = self.get_csv_refdf('test1') - df1 = self.get_exceldf('test1', ext, - sheet_name='Sheet1') # doc + # gh-10559: Minor improvement: Change "sheet_name" to "sheetname" + # gh-10969: DOC: Consistent var names (sheetname vs sheet_name) + # gh-12604: CLN GH10559 Rename sheetname variable to sheet_name + # gh-20920: ExcelFile.parse() and pd.read_xlsx() have different + # behavior for "sheetname" argument + filename = "test1" + sheet_name = "Sheet1" + + df_ref = self.get_csv_refdf(filename) + df1 = self.get_exceldf(filename, ext, + sheet_name=sheet_name, index_col=0) # doc with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - df2 = self.get_exceldf('test1', ext, - sheetname='Sheet1') # bkwrd compat + df2 = self.get_exceldf(filename, ext, index_col=0, + sheetname=sheet_name) # backward compat - excel = self.get_excelfile('test1', ext) - df1_parse = excel.parse(sheet_name='Sheet1') # doc + excel = self.get_excelfile(filename, ext) + df1_parse = excel.parse(sheet_name=sheet_name, index_col=0) # doc with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - df2_parse = excel.parse(sheetname='Sheet1') # bkwrd compat + df2_parse = excel.parse(index_col=0, + sheetname=sheet_name) # backward compat - tm.assert_frame_equal(df1, dfref, check_names=False) - tm.assert_frame_equal(df2, dfref, check_names=False) - tm.assert_frame_equal(df1_parse, dfref, check_names=False) - tm.assert_frame_equal(df2_parse, dfref, check_names=False) + tm.assert_frame_equal(df1, df_ref, check_names=False) + tm.assert_frame_equal(df2, df_ref, check_names=False) + tm.assert_frame_equal(df1_parse, df_ref, check_names=False) + tm.assert_frame_equal(df2_parse, df_ref, check_names=False) def test_sheet_name_both_raises(self, ext): with pytest.raises(TypeError, match="Cannot specify both"): @@ -594,20 +642,24 @@ def test_excel_read_buffer(self, ext): actual = read_excel(xls, 'Sheet1', index_col=0) tm.assert_frame_equal(expected, actual) - @td.skip_if_no('xlwt') - def test_read_xlrd_Book(self, ext): + @td.skip_if_no("xlwt") + def test_read_xlrd_book(self, ext): import xlrd - df = self.frame - with ensure_clean('.xls') as pth: - df.to_excel(pth, "SheetA") + + engine = "xlrd" + sheet_name = "SheetA" + + with ensure_clean(ext) as pth: + df.to_excel(pth, sheet_name) book = xlrd.open_workbook(pth) - with ExcelFile(book, engine="xlrd") as xl: - result = read_excel(xl, "SheetA") + with ExcelFile(book, engine=engine) as xl: + result = read_excel(xl, sheet_name, index_col=0) tm.assert_frame_equal(df, result) - result = read_excel(book, sheet_name="SheetA", engine="xlrd") + result = read_excel(book, sheet_name=sheet_name, + engine=engine, index_col=0) tm.assert_frame_equal(df, result) @tm.network @@ -618,17 +670,18 @@ def test_read_from_http_url(self, ext): local_table = self.get_exceldf('test1', ext) tm.assert_frame_equal(url_table, local_table) - @td.skip_if_no('s3fs') + @td.skip_if_no("s3fs") @td.skip_if_not_us_locale def test_read_from_s3_url(self, ext): - boto3 = pytest.importorskip('boto3') - moto = pytest.importorskip('moto') + moto = pytest.importorskip("moto") + boto3 = pytest.importorskip("boto3") with moto.mock_s3(): conn = boto3.resource("s3", region_name="us-east-1") conn.create_bucket(Bucket="pandas-test") file_name = os.path.join(self.dirpath, 'test1' + ext) - with open(file_name, 'rb') as f: + + with open(file_name, "rb") as f: conn.Bucket("pandas-test").put_object(Key="test1" + ext, Body=f) @@ -695,17 +748,18 @@ def test_reader_closes_file(self, ext): assert f.closed - @td.skip_if_no('openpyxl') - @td.skip_if_no('xlwt') + @td.skip_if_no("xlwt") + @td.skip_if_no("openpyxl") def test_creating_and_reading_multiple_sheets(self, ext): - # Test reading multiple sheets, from a runtime created excel file - # with multiple sheets. - # See PR #9450 - def tdf(sheetname): + # see gh-9450 + # + # Test reading multiple sheets, from a runtime + # created Excel file with multiple sheets. + def tdf(col_sheet_name): d, i = [11, 22, 33], [1, 2, 3] - return DataFrame(d, i, columns=[sheetname]) + return DataFrame(d, i, columns=[col_sheet_name]) - sheets = ['AAA', 'BBB', 'CCC'] + sheets = ["AAA", "BBB", "CCC"] dfs = [tdf(s) for s in sheets] dfs = dict(zip(sheets, dfs)) @@ -714,7 +768,9 @@ def tdf(sheetname): with ExcelWriter(pth) as ew: for sheetname, df in iteritems(dfs): df.to_excel(ew, sheetname) - dfs_returned = read_excel(pth, sheet_name=sheets) + + dfs_returned = read_excel(pth, sheet_name=sheets, index_col=0) + for s in sheets: tm.assert_frame_equal(dfs[s], dfs_returned[s]) @@ -756,206 +812,206 @@ def test_reader_seconds(self, ext): tm.assert_frame_equal(actual, expected) def test_read_excel_multiindex(self, ext): - # GH 4679 - mi = MultiIndex.from_product([['foo', 'bar'], ['a', 'b']]) - mi_file = os.path.join(self.dirpath, 'testmultiindex' + ext) - - expected = DataFrame([[1, 2.5, pd.Timestamp('2015-01-01'), True], - [2, 3.5, pd.Timestamp('2015-01-02'), False], - [3, 4.5, pd.Timestamp('2015-01-03'), False], - [4, 5.5, pd.Timestamp('2015-01-04'), True]], + # see gh-4679 + mi = MultiIndex.from_product([["foo", "bar"], ["a", "b"]]) + mi_file = os.path.join(self.dirpath, "testmultiindex" + ext) + + # "mi_column" sheet + expected = DataFrame([[1, 2.5, pd.Timestamp("2015-01-01"), True], + [2, 3.5, pd.Timestamp("2015-01-02"), False], + [3, 4.5, pd.Timestamp("2015-01-03"), False], + [4, 5.5, pd.Timestamp("2015-01-04"), True]], columns=mi) - actual = read_excel(mi_file, 'mi_column', header=[0, 1]) - tm.assert_frame_equal(actual, expected) - actual = read_excel(mi_file, 'mi_column', header=[0, 1], index_col=0) + actual = read_excel(mi_file, "mi_column", header=[0, 1], index_col=0) tm.assert_frame_equal(actual, expected) - expected.columns = ['a', 'b', 'c', 'd'] + # "mi_index" sheet expected.index = mi - actual = read_excel(mi_file, 'mi_index', index_col=[0, 1]) + expected.columns = ["a", "b", "c", "d"] + + actual = read_excel(mi_file, "mi_index", index_col=[0, 1]) tm.assert_frame_equal(actual, expected, check_names=False) + # "both" sheet expected.columns = mi - actual = read_excel(mi_file, 'both', index_col=[0, 1], header=[0, 1]) + + actual = read_excel(mi_file, "both", index_col=[0, 1], header=[0, 1]) tm.assert_frame_equal(actual, expected, check_names=False) - expected.index = mi.set_names(['ilvl1', 'ilvl2']) - expected.columns = ['a', 'b', 'c', 'd'] - actual = read_excel(mi_file, 'mi_index_name', index_col=[0, 1]) + # "mi_index_name" sheet + expected.columns = ["a", "b", "c", "d"] + expected.index = mi.set_names(["ilvl1", "ilvl2"]) + + actual = read_excel(mi_file, "mi_index_name", index_col=[0, 1]) tm.assert_frame_equal(actual, expected) + # "mi_column_name" sheet expected.index = list(range(4)) - expected.columns = mi.set_names(['c1', 'c2']) - actual = read_excel(mi_file, 'mi_column_name', + expected.columns = mi.set_names(["c1", "c2"]) + actual = read_excel(mi_file, "mi_column_name", header=[0, 1], index_col=0) tm.assert_frame_equal(actual, expected) - # Issue #11317 + # see gh-11317 + # "name_with_int" sheet expected.columns = mi.set_levels( - [1, 2], level=1).set_names(['c1', 'c2']) - actual = read_excel(mi_file, 'name_with_int', + [1, 2], level=1).set_names(["c1", "c2"]) + + actual = read_excel(mi_file, "name_with_int", index_col=0, header=[0, 1]) tm.assert_frame_equal(actual, expected) - expected.columns = mi.set_names(['c1', 'c2']) - expected.index = mi.set_names(['ilvl1', 'ilvl2']) - actual = read_excel(mi_file, 'both_name', - index_col=[0, 1], header=[0, 1]) - tm.assert_frame_equal(actual, expected) + # "both_name" sheet + expected.columns = mi.set_names(["c1", "c2"]) + expected.index = mi.set_names(["ilvl1", "ilvl2"]) - actual = read_excel(mi_file, 'both_name', + actual = read_excel(mi_file, "both_name", index_col=[0, 1], header=[0, 1]) tm.assert_frame_equal(actual, expected) - actual = read_excel(mi_file, 'both_name_skiprows', index_col=[0, 1], + # "both_skiprows" sheet + actual = read_excel(mi_file, "both_name_skiprows", index_col=[0, 1], header=[0, 1], skiprows=2) tm.assert_frame_equal(actual, expected) - @td.skip_if_no('xlsxwriter') + @td.skip_if_no("xlsxwriter") def test_read_excel_multiindex_empty_level(self, ext): - # GH 12453 - with ensure_clean('.xlsx') as path: + # see gh-12453 + with ensure_clean(ext) as path: df = DataFrame({ - ('One', 'x'): {0: 1}, - ('Two', 'X'): {0: 3}, - ('Two', 'Y'): {0: 7}, - ('Zero', ''): {0: 0} + ("One", "x"): {0: 1}, + ("Two", "X"): {0: 3}, + ("Two", "Y"): {0: 7}, + ("Zero", ""): {0: 0} }) expected = DataFrame({ - ('One', u'x'): {0: 1}, - ('Two', u'X'): {0: 3}, - ('Two', u'Y'): {0: 7}, - ('Zero', 'Unnamed: 3_level_1'): {0: 0} + ("One", u"x"): {0: 1}, + ("Two", u"X"): {0: 3}, + ("Two", u"Y"): {0: 7}, + ("Zero", "Unnamed: 4_level_1"): {0: 0} }) df.to_excel(path) - actual = pd.read_excel(path, header=[0, 1]) + actual = pd.read_excel(path, header=[0, 1], index_col=0) tm.assert_frame_equal(actual, expected) df = pd.DataFrame({ - ('Beg', ''): {0: 0}, - ('Middle', 'x'): {0: 1}, - ('Tail', 'X'): {0: 3}, - ('Tail', 'Y'): {0: 7} + ("Beg", ""): {0: 0}, + ("Middle", "x"): {0: 1}, + ("Tail", "X"): {0: 3}, + ("Tail", "Y"): {0: 7} }) expected = pd.DataFrame({ - ('Beg', 'Unnamed: 0_level_1'): {0: 0}, - ('Middle', u'x'): {0: 1}, - ('Tail', u'X'): {0: 3}, - ('Tail', u'Y'): {0: 7} + ("Beg", "Unnamed: 1_level_1"): {0: 0}, + ("Middle", u"x"): {0: 1}, + ("Tail", u"X"): {0: 3}, + ("Tail", u"Y"): {0: 7} }) df.to_excel(path) - actual = pd.read_excel(path, header=[0, 1]) + actual = pd.read_excel(path, header=[0, 1], index_col=0) tm.assert_frame_equal(actual, expected) - @td.skip_if_no('xlsxwriter') - def test_excel_multindex_roundtrip(self, ext): - # GH 4679 - with ensure_clean('.xlsx') as pth: - for c_idx_names in [True, False]: - for r_idx_names in [True, False]: - for c_idx_levels in [1, 3]: - for r_idx_levels in [1, 3]: - # column index name can't be serialized unless - # MultiIndex - if (c_idx_levels == 1 and c_idx_names): - continue - - # empty name case current read in as unnamed - # levels, not Nones - check_names = True - if not r_idx_names and r_idx_levels > 1: - check_names = False - - df = mkdf(5, 5, c_idx_names, - r_idx_names, c_idx_levels, - r_idx_levels) - df.to_excel(pth) - act = pd.read_excel( - pth, index_col=list(range(r_idx_levels)), + @td.skip_if_no("xlsxwriter") + @pytest.mark.parametrize("c_idx_names", [True, False]) + @pytest.mark.parametrize("r_idx_names", [True, False]) + @pytest.mark.parametrize("c_idx_levels", [1, 3]) + @pytest.mark.parametrize("r_idx_levels", [1, 3]) + def test_excel_multindex_roundtrip(self, ext, c_idx_names, r_idx_names, + c_idx_levels, r_idx_levels): + # see gh-4679 + with ensure_clean(ext) as pth: + if c_idx_levels == 1 and c_idx_names: + pytest.skip("Column index name cannot be " + "serialized unless it's a MultiIndex") + + # Empty name case current read in as + # unnamed levels, not Nones. + check_names = r_idx_names or r_idx_levels <= 1 + + df = mkdf(5, 5, c_idx_names, r_idx_names, + c_idx_levels, r_idx_levels) + df.to_excel(pth) + + act = pd.read_excel(pth, index_col=list(range(r_idx_levels)), header=list(range(c_idx_levels))) - tm.assert_frame_equal( - df, act, check_names=check_names) + tm.assert_frame_equal(df, act, check_names=check_names) - df.iloc[0, :] = np.nan - df.to_excel(pth) - act = pd.read_excel( - pth, index_col=list(range(r_idx_levels)), + df.iloc[0, :] = np.nan + df.to_excel(pth) + + act = pd.read_excel(pth, index_col=list(range(r_idx_levels)), header=list(range(c_idx_levels))) - tm.assert_frame_equal( - df, act, check_names=check_names) + tm.assert_frame_equal(df, act, check_names=check_names) - df.iloc[-1, :] = np.nan - df.to_excel(pth) - act = pd.read_excel( - pth, index_col=list(range(r_idx_levels)), + df.iloc[-1, :] = np.nan + df.to_excel(pth) + act = pd.read_excel(pth, index_col=list(range(r_idx_levels)), header=list(range(c_idx_levels))) - tm.assert_frame_equal( - df, act, check_names=check_names) + tm.assert_frame_equal(df, act, check_names=check_names) def test_excel_old_index_format(self, ext): # see gh-4679 - filename = 'test_index_name_pre17' + ext + filename = "test_index_name_pre17" + ext in_file = os.path.join(self.dirpath, filename) # We detect headers to determine if index names exist, so # that "index" name in the "names" version of the data will # now be interpreted as rows that include null data. data = np.array([[None, None, None, None, None], - ['R0C0', 'R0C1', 'R0C2', 'R0C3', 'R0C4'], - ['R1C0', 'R1C1', 'R1C2', 'R1C3', 'R1C4'], - ['R2C0', 'R2C1', 'R2C2', 'R2C3', 'R2C4'], - ['R3C0', 'R3C1', 'R3C2', 'R3C3', 'R3C4'], - ['R4C0', 'R4C1', 'R4C2', 'R4C3', 'R4C4']]) - columns = ['C_l0_g0', 'C_l0_g1', 'C_l0_g2', 'C_l0_g3', 'C_l0_g4'] - mi = MultiIndex(levels=[['R0', 'R_l0_g0', 'R_l0_g1', - 'R_l0_g2', 'R_l0_g3', 'R_l0_g4'], - ['R1', 'R_l1_g0', 'R_l1_g1', - 'R_l1_g2', 'R_l1_g3', 'R_l1_g4']], + ["R0C0", "R0C1", "R0C2", "R0C3", "R0C4"], + ["R1C0", "R1C1", "R1C2", "R1C3", "R1C4"], + ["R2C0", "R2C1", "R2C2", "R2C3", "R2C4"], + ["R3C0", "R3C1", "R3C2", "R3C3", "R3C4"], + ["R4C0", "R4C1", "R4C2", "R4C3", "R4C4"]]) + columns = ["C_l0_g0", "C_l0_g1", "C_l0_g2", "C_l0_g3", "C_l0_g4"] + mi = MultiIndex(levels=[["R0", "R_l0_g0", "R_l0_g1", + "R_l0_g2", "R_l0_g3", "R_l0_g4"], + ["R1", "R_l1_g0", "R_l1_g1", + "R_l1_g2", "R_l1_g3", "R_l1_g4"]], labels=[[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], names=[None, None]) - si = Index(['R0', 'R_l0_g0', 'R_l0_g1', 'R_l0_g2', - 'R_l0_g3', 'R_l0_g4'], name=None) + si = Index(["R0", "R_l0_g0", "R_l0_g1", "R_l0_g2", + "R_l0_g3", "R_l0_g4"], name=None) expected = pd.DataFrame(data, index=si, columns=columns) - actual = pd.read_excel(in_file, 'single_names') + actual = pd.read_excel(in_file, "single_names", index_col=0) tm.assert_frame_equal(actual, expected) expected.index = mi - actual = pd.read_excel(in_file, 'multi_names') + actual = pd.read_excel(in_file, "multi_names", index_col=[0, 1]) tm.assert_frame_equal(actual, expected) # The analogous versions of the "names" version data # where there are explicitly no names for the indices. - data = np.array([['R0C0', 'R0C1', 'R0C2', 'R0C3', 'R0C4'], - ['R1C0', 'R1C1', 'R1C2', 'R1C3', 'R1C4'], - ['R2C0', 'R2C1', 'R2C2', 'R2C3', 'R2C4'], - ['R3C0', 'R3C1', 'R3C2', 'R3C3', 'R3C4'], - ['R4C0', 'R4C1', 'R4C2', 'R4C3', 'R4C4']]) - columns = ['C_l0_g0', 'C_l0_g1', 'C_l0_g2', 'C_l0_g3', 'C_l0_g4'] - mi = MultiIndex(levels=[['R_l0_g0', 'R_l0_g1', 'R_l0_g2', - 'R_l0_g3', 'R_l0_g4'], - ['R_l1_g0', 'R_l1_g1', 'R_l1_g2', - 'R_l1_g3', 'R_l1_g4']], + data = np.array([["R0C0", "R0C1", "R0C2", "R0C3", "R0C4"], + ["R1C0", "R1C1", "R1C2", "R1C3", "R1C4"], + ["R2C0", "R2C1", "R2C2", "R2C3", "R2C4"], + ["R3C0", "R3C1", "R3C2", "R3C3", "R3C4"], + ["R4C0", "R4C1", "R4C2", "R4C3", "R4C4"]]) + columns = ["C_l0_g0", "C_l0_g1", "C_l0_g2", "C_l0_g3", "C_l0_g4"] + mi = MultiIndex(levels=[["R_l0_g0", "R_l0_g1", "R_l0_g2", + "R_l0_g3", "R_l0_g4"], + ["R_l1_g0", "R_l1_g1", "R_l1_g2", + "R_l1_g3", "R_l1_g4"]], labels=[[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], names=[None, None]) - si = Index(['R_l0_g0', 'R_l0_g1', 'R_l0_g2', - 'R_l0_g3', 'R_l0_g4'], name=None) + si = Index(["R_l0_g0", "R_l0_g1", "R_l0_g2", + "R_l0_g3", "R_l0_g4"], name=None) expected = pd.DataFrame(data, index=si, columns=columns) - actual = pd.read_excel(in_file, 'single_no_names') + actual = pd.read_excel(in_file, "single_no_names", index_col=0) tm.assert_frame_equal(actual, expected) expected.index = mi - actual = pd.read_excel(in_file, 'multi_no_names', index_col=[0, 1]) + actual = pd.read_excel(in_file, "multi_no_names", index_col=[0, 1]) tm.assert_frame_equal(actual, expected, check_names=False) def test_read_excel_bool_header_arg(self, ext): @@ -971,33 +1027,28 @@ def test_read_excel_chunksize(self, ext): pd.read_excel(os.path.join(self.dirpath, 'test1' + ext), chunksize=100) - @td.skip_if_no('openpyxl') - @td.skip_if_no('xlwt') + @td.skip_if_no("xlwt") + @td.skip_if_no("openpyxl") def test_read_excel_parse_dates(self, ext): - # GH 11544, 12051 + # see gh-11544, gh-12051 df = DataFrame( - {'col': [1, 2, 3], - 'date_strings': pd.date_range('2012-01-01', periods=3)}) + {"col": [1, 2, 3], + "date_strings": pd.date_range("2012-01-01", periods=3)}) df2 = df.copy() - df2['date_strings'] = df2['date_strings'].dt.strftime('%m/%d/%Y') + df2["date_strings"] = df2["date_strings"].dt.strftime("%m/%d/%Y") with ensure_clean(ext) as pth: df2.to_excel(pth) - res = read_excel(pth) + res = read_excel(pth, index_col=0) tm.assert_frame_equal(df2, res) - # no index_col specified when parse_dates is True - with tm.assert_produces_warning(): - res = read_excel(pth, parse_dates=True) - tm.assert_frame_equal(df2, res) - - res = read_excel(pth, parse_dates=['date_strings'], index_col=0) + res = read_excel(pth, parse_dates=["date_strings"], index_col=0) tm.assert_frame_equal(df, res) - dateparser = lambda x: pd.datetime.strptime(x, '%m/%d/%Y') - res = read_excel(pth, parse_dates=['date_strings'], - date_parser=dateparser, index_col=0) + date_parser = lambda x: pd.datetime.strptime(x, "%m/%d/%Y") + res = read_excel(pth, parse_dates=["date_strings"], + date_parser=date_parser, index_col=0) tm.assert_frame_equal(df, res) def test_read_excel_skiprows_list(self, ext): @@ -1106,26 +1157,29 @@ class and any subclasses, on account of the `autouse=True` class TestExcelWriter(_WriterBase): # Base class for test cases to run with different Excel writers. - def test_excel_sheet_by_name_raise(self, merge_cells, engine, ext): + def test_excel_sheet_by_name_raise(self, *_): import xlrd gt = DataFrame(np.random.randn(10, 2)) gt.to_excel(self.path) + xl = ExcelFile(self.path) - df = read_excel(xl, 0) + df = read_excel(xl, 0, index_col=0) + tm.assert_frame_equal(gt, df) with pytest.raises(xlrd.XLRDError): - read_excel(xl, '0') + read_excel(xl, "0") - def test_excelwriter_contextmanager(self, merge_cells, engine, ext): + def test_excel_writer_context_manager(self, *_): with ExcelWriter(self.path) as writer: - self.frame.to_excel(writer, 'Data1') - self.frame2.to_excel(writer, 'Data2') + self.frame.to_excel(writer, "Data1") + self.frame2.to_excel(writer, "Data2") with ExcelFile(self.path) as reader: - found_df = read_excel(reader, 'Data1') - found_df2 = read_excel(reader, 'Data2') + found_df = read_excel(reader, "Data1", index_col=0) + found_df2 = read_excel(reader, "Data2", index_col=0) + tm.assert_frame_equal(found_df, self.frame) tm.assert_frame_equal(found_df2, self.frame2) @@ -1182,12 +1236,13 @@ def test_mixed(self, merge_cells, engine, ext): recons = read_excel(reader, 'test1', index_col=0) tm.assert_frame_equal(self.mixed_frame, recons) - def test_tsframe(self, merge_cells, engine, ext): + def test_ts_frame(self, *_): df = tm.makeTimeDataFrame()[:5] - df.to_excel(self.path, 'test1') + df.to_excel(self.path, "test1") reader = ExcelFile(self.path) - recons = read_excel(reader, 'test1') + + recons = read_excel(reader, "test1", index_col=0) tm.assert_frame_equal(df, recons) def test_basics_with_nan(self, merge_cells, engine, ext): @@ -1200,21 +1255,25 @@ def test_basics_with_nan(self, merge_cells, engine, ext): @pytest.mark.parametrize("np_type", [ np.int8, np.int16, np.int32, np.int64]) def test_int_types(self, merge_cells, engine, ext, np_type): - # Test np.int values read come back as int (rather than float - # which is Excel's format). + # Test np.int values read come back as int + # (rather than float which is Excel's format). frame = DataFrame(np.random.randint(-10, 10, size=(10, 2)), dtype=np_type) - frame.to_excel(self.path, 'test1') + frame.to_excel(self.path, "test1") + reader = ExcelFile(self.path) - recons = read_excel(reader, 'test1') + recons = read_excel(reader, "test1", index_col=0) + int_frame = frame.astype(np.int64) tm.assert_frame_equal(int_frame, recons) - recons2 = read_excel(self.path, 'test1') + + recons2 = read_excel(self.path, "test1", index_col=0) tm.assert_frame_equal(int_frame, recons2) - # test with convert_float=False comes back as float + # Test with convert_float=False comes back as float. float_frame = frame.astype(float) - recons = read_excel(self.path, 'test1', convert_float=False) + recons = read_excel(self.path, "test1", + convert_float=False, index_col=0) tm.assert_frame_equal(recons, float_frame, check_index_type=False, check_column_type=False) @@ -1224,25 +1283,31 @@ def test_int_types(self, merge_cells, engine, ext, np_type): def test_float_types(self, merge_cells, engine, ext, np_type): # Test np.float values read come back as float. frame = DataFrame(np.random.random_sample(10), dtype=np_type) - frame.to_excel(self.path, 'test1') + frame.to_excel(self.path, "test1") + reader = ExcelFile(self.path) - recons = read_excel(reader, 'test1').astype(np_type) + recons = read_excel(reader, "test1", index_col=0).astype(np_type) + tm.assert_frame_equal(frame, recons, check_dtype=False) @pytest.mark.parametrize("np_type", [np.bool8, np.bool_]) def test_bool_types(self, merge_cells, engine, ext, np_type): # Test np.bool values read come back as float. frame = (DataFrame([1, 0, True, False], dtype=np_type)) - frame.to_excel(self.path, 'test1') + frame.to_excel(self.path, "test1") + reader = ExcelFile(self.path) - recons = read_excel(reader, 'test1').astype(np_type) + recons = read_excel(reader, "test1", index_col=0).astype(np_type) + tm.assert_frame_equal(frame, recons) - def test_inf_roundtrip(self, merge_cells, engine, ext): + def test_inf_roundtrip(self, *_): frame = DataFrame([(1, np.inf), (2, 3), (5, -np.inf)]) - frame.to_excel(self.path, 'test1') + frame.to_excel(self.path, "test1") + reader = ExcelFile(self.path) - recons = read_excel(reader, 'test1') + recons = read_excel(reader, "test1", index_col=0) + tm.assert_frame_equal(frame, recons) def test_sheets(self, merge_cells, engine, ext): @@ -1353,37 +1418,41 @@ def test_excel_roundtrip_indexname(self, merge_cells, engine, ext): tm.assert_frame_equal(result, df) assert result.index.name == 'foo' - def test_excel_roundtrip_datetime(self, merge_cells, engine, ext): + def test_excel_roundtrip_datetime(self, merge_cells, *_): # datetime.date, not sure what to test here exactly tsf = self.tsframe.copy() tsf.index = [x.date() for x in self.tsframe.index] - tsf.to_excel(self.path, 'test1', merge_cells=merge_cells) + tsf.to_excel(self.path, "test1", merge_cells=merge_cells) + reader = ExcelFile(self.path) - recons = read_excel(reader, 'test1') + recons = read_excel(reader, "test1", index_col=0) + tm.assert_frame_equal(self.tsframe, recons) - # GH4133 - excel output format strings def test_excel_date_datetime_format(self, merge_cells, engine, ext): + # see gh-4133 + # + # Excel output format strings df = DataFrame([[date(2014, 1, 31), date(1999, 9, 24)], [datetime(1998, 5, 26, 23, 33, 4), datetime(2014, 2, 28, 13, 5, 13)]], - index=['DATE', 'DATETIME'], columns=['X', 'Y']) + index=["DATE", "DATETIME"], columns=["X", "Y"]) df_expected = DataFrame([[datetime(2014, 1, 31), datetime(1999, 9, 24)], [datetime(1998, 5, 26, 23, 33, 4), datetime(2014, 2, 28, 13, 5, 13)]], - index=['DATE', 'DATETIME'], columns=['X', 'Y']) + index=["DATE", "DATETIME"], columns=["X", "Y"]) with ensure_clean(ext) as filename2: writer1 = ExcelWriter(self.path) writer2 = ExcelWriter(filename2, - date_format='DD.MM.YYYY', - datetime_format='DD.MM.YYYY HH-MM-SS') + date_format="DD.MM.YYYY", + datetime_format="DD.MM.YYYY HH-MM-SS") - df.to_excel(writer1, 'test1') - df.to_excel(writer2, 'test1') + df.to_excel(writer1, "test1") + df.to_excel(writer2, "test1") writer1.close() writer2.close() @@ -1391,54 +1460,66 @@ def test_excel_date_datetime_format(self, merge_cells, engine, ext): reader1 = ExcelFile(self.path) reader2 = ExcelFile(filename2) - rs1 = read_excel(reader1, 'test1', index_col=None) - rs2 = read_excel(reader2, 'test1', index_col=None) + rs1 = read_excel(reader1, "test1", index_col=0) + rs2 = read_excel(reader2, "test1", index_col=0) tm.assert_frame_equal(rs1, rs2) - # since the reader returns a datetime object for dates, we need - # to use df_expected to check the result + # Since the reader returns a datetime object for dates, + # we need to use df_expected to check the result. tm.assert_frame_equal(rs2, df_expected) - def test_to_excel_interval_no_labels(self, merge_cells, engine, ext): - # GH19242 - test writing Interval without labels + def test_to_excel_interval_no_labels(self, *_): + # see gh-19242 + # + # Test writing Interval without labels. frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), dtype=np.int64) expected = frame.copy() - frame['new'] = pd.cut(frame[0], 10) - expected['new'] = pd.cut(expected[0], 10).astype(str) - frame.to_excel(self.path, 'test1') + + frame["new"] = pd.cut(frame[0], 10) + expected["new"] = pd.cut(expected[0], 10).astype(str) + + frame.to_excel(self.path, "test1") reader = ExcelFile(self.path) - recons = read_excel(reader, 'test1') + + recons = read_excel(reader, "test1", index_col=0) tm.assert_frame_equal(expected, recons) - def test_to_excel_interval_labels(self, merge_cells, engine, ext): - # GH19242 - test writing Interval with labels + def test_to_excel_interval_labels(self, *_): + # see gh-19242 + # + # Test writing Interval with labels. frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), dtype=np.int64) expected = frame.copy() - intervals = pd.cut(frame[0], 10, labels=['A', 'B', 'C', 'D', 'E', - 'F', 'G', 'H', 'I', 'J']) - frame['new'] = intervals - expected['new'] = pd.Series(list(intervals)) - frame.to_excel(self.path, 'test1') + intervals = pd.cut(frame[0], 10, labels=["A", "B", "C", "D", "E", + "F", "G", "H", "I", "J"]) + frame["new"] = intervals + expected["new"] = pd.Series(list(intervals)) + + frame.to_excel(self.path, "test1") reader = ExcelFile(self.path) - recons = read_excel(reader, 'test1') + + recons = read_excel(reader, "test1", index_col=0) tm.assert_frame_equal(expected, recons) - def test_to_excel_timedelta(self, merge_cells, engine, ext): - # GH 19242, GH9155 - test writing timedelta to xls + def test_to_excel_timedelta(self, *_): + # see gh-19242, gh-9155 + # + # Test writing timedelta to xls. frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), - columns=['A'], - dtype=np.int64 - ) + columns=["A"], dtype=np.int64) expected = frame.copy() - frame['new'] = frame['A'].apply(lambda x: timedelta(seconds=x)) - expected['new'] = expected['A'].apply( + + frame["new"] = frame["A"].apply(lambda x: timedelta(seconds=x)) + expected["new"] = expected["A"].apply( lambda x: timedelta(seconds=x).total_seconds() / float(86400)) - frame.to_excel(self.path, 'test1') + + frame.to_excel(self.path, "test1") reader = ExcelFile(self.path) - recons = read_excel(reader, 'test1') + + recons = read_excel(reader, "test1", index_col=0) tm.assert_frame_equal(expected, recons) def test_to_excel_periodindex(self, merge_cells, engine, ext): @@ -1543,53 +1624,54 @@ def test_to_excel_multiindex_no_write_index(self, merge_cells, engine, # Test that it is the same as the initial frame. tm.assert_frame_equal(frame1, frame3) - def test_to_excel_float_format(self, merge_cells, engine, ext): + def test_to_excel_float_format(self, *_): df = DataFrame([[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], - index=['A', 'B'], columns=['X', 'Y', 'Z']) - - df.to_excel(self.path, 'test1', float_format='%.2f') + index=["A", "B"], columns=["X", "Y", "Z"]) + df.to_excel(self.path, "test1", float_format="%.2f") reader = ExcelFile(self.path) - rs = read_excel(reader, 'test1', index_col=None) - xp = DataFrame([[0.12, 0.23, 0.57], - [12.32, 123123.20, 321321.20]], - index=['A', 'B'], columns=['X', 'Y', 'Z']) - tm.assert_frame_equal(rs, xp) + result = read_excel(reader, "test1", index_col=0) + + expected = DataFrame([[0.12, 0.23, 0.57], + [12.32, 123123.20, 321321.20]], + index=["A", "B"], columns=["X", "Y", "Z"]) + tm.assert_frame_equal(result, expected) def test_to_excel_output_encoding(self, merge_cells, engine, ext): - # avoid mixed inferred_type - df = DataFrame([[u'\u0192', u'\u0193', u'\u0194'], - [u'\u0195', u'\u0196', u'\u0197']], - index=[u'A\u0192', u'B'], - columns=[u'X\u0193', u'Y', u'Z']) - - with ensure_clean('__tmp_to_excel_float_format__.' + ext) as filename: - df.to_excel(filename, sheet_name='TestSheet', encoding='utf8') - result = read_excel(filename, 'TestSheet', encoding='utf8') + # Avoid mixed inferred_type. + df = DataFrame([[u"\u0192", u"\u0193", u"\u0194"], + [u"\u0195", u"\u0196", u"\u0197"]], + index=[u"A\u0192", u"B"], + columns=[u"X\u0193", u"Y", u"Z"]) + + with ensure_clean("__tmp_to_excel_float_format__." + ext) as filename: + df.to_excel(filename, sheet_name="TestSheet", encoding="utf8") + result = read_excel(filename, "TestSheet", + encoding="utf8", index_col=0) tm.assert_frame_equal(result, df) def test_to_excel_unicode_filename(self, merge_cells, engine, ext): - with ensure_clean(u('\u0192u.') + ext) as filename: + with ensure_clean(u("\u0192u.") + ext) as filename: try: - f = open(filename, 'wb') + f = open(filename, "wb") except UnicodeEncodeError: - pytest.skip('no unicode file names on this system') + pytest.skip("No unicode file names on this system") else: f.close() df = DataFrame([[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], - index=['A', 'B'], columns=['X', 'Y', 'Z']) - - df.to_excel(filename, 'test1', float_format='%.2f') + index=["A", "B"], columns=["X", "Y", "Z"]) + df.to_excel(filename, "test1", float_format="%.2f") reader = ExcelFile(filename) - rs = read_excel(reader, 'test1', index_col=None) - xp = DataFrame([[0.12, 0.23, 0.57], - [12.32, 123123.20, 321321.20]], - index=['A', 'B'], columns=['X', 'Y', 'Z']) - tm.assert_frame_equal(rs, xp) + result = read_excel(reader, "test1", index_col=0) + + expected = DataFrame([[0.12, 0.23, 0.57], + [12.32, 123123.20, 321321.20]], + index=["A", "B"], columns=["X", "Y", "Z"]) + tm.assert_frame_equal(result, expected) # def test_to_excel_header_styling_xls(self, merge_cells, engine, ext): @@ -1691,106 +1773,83 @@ def test_to_excel_unicode_filename(self, merge_cells, engine, ext): # assert ws.cell(maddr).merged # os.remove(filename) - def test_excel_010_hemstring(self, merge_cells, engine, ext): - if merge_cells: - pytest.skip('Skip tests for merged MI format.') + @pytest.mark.parametrize("use_headers", [True, False]) + @pytest.mark.parametrize("r_idx_nlevels", [1, 2, 3]) + @pytest.mark.parametrize("c_idx_nlevels", [1, 2, 3]) + def test_excel_010_hemstring(self, merge_cells, engine, ext, + c_idx_nlevels, r_idx_nlevels, use_headers): - from pandas.util.testing import makeCustomDataframe as mkdf - # ensure limited functionality in 0.10 - # override of #2370 until sorted out in 0.11 + def roundtrip(data, header=True, parser_hdr=0, index=True): + data.to_excel(self.path, header=header, + merge_cells=merge_cells, index=index) - def roundtrip(df, header=True, parser_hdr=0, index=True): - - df.to_excel(self.path, header=header, - merge_cells=merge_cells, index=index) xf = ExcelFile(self.path) - res = read_excel(xf, xf.sheet_names[0], header=parser_hdr) - return res - - nrows = 5 - ncols = 3 - for use_headers in (True, False): - for i in range(1, 4): # row multindex up to nlevel=3 - for j in range(1, 4): # col "" - df = mkdf(nrows, ncols, r_idx_nlevels=i, c_idx_nlevels=j) - - # this if will be removed once multi column excel writing - # is implemented for now fixing #9794 - if j > 1: - with pytest.raises(NotImplementedError): - res = roundtrip(df, use_headers, index=False) - else: - res = roundtrip(df, use_headers) - - if use_headers: - assert res.shape == (nrows, ncols + i) - else: - # first row taken as columns - assert res.shape == (nrows - 1, ncols + i) + return read_excel(xf, xf.sheet_names[0], header=parser_hdr) - # no nans - for r in range(len(res.index)): - for c in range(len(res.columns)): - assert res.iloc[r, c] is not np.nan + # Basic test. + parser_header = 0 if use_headers else None + res = roundtrip(DataFrame([0]), use_headers, parser_header) - res = roundtrip(DataFrame([0])) - assert res.shape == (1, 1) - assert res.iloc[0, 0] is not np.nan - - res = roundtrip(DataFrame([0]), False, None) assert res.shape == (1, 2) assert res.iloc[0, 0] is not np.nan - def test_excel_010_hemstring_raises_NotImplementedError(self, merge_cells, - engine, ext): - # This test was failing only for j>1 and header=False, - # So I reproduced a simple test. - if merge_cells: - pytest.skip('Skip tests for merged MI format.') + # More complex tests with multi-index. + nrows = 5 + ncols = 3 from pandas.util.testing import makeCustomDataframe as mkdf # ensure limited functionality in 0.10 - # override of #2370 until sorted out in 0.11 + # override of gh-2370 until sorted out in 0.11 - def roundtrip2(df, header=True, parser_hdr=0, index=True): + df = mkdf(nrows, ncols, r_idx_nlevels=r_idx_nlevels, + c_idx_nlevels=c_idx_nlevels) - df.to_excel(self.path, header=header, - merge_cells=merge_cells, index=index) - xf = ExcelFile(self.path) - res = read_excel(xf, xf.sheet_names[0], header=parser_hdr) - return res + # This if will be removed once multi-column Excel writing + # is implemented. For now fixing gh-9794. + if c_idx_nlevels > 1: + with pytest.raises(NotImplementedError): + roundtrip(df, use_headers, index=False) + else: + res = roundtrip(df, use_headers) - nrows = 5 - ncols = 3 - j = 2 - i = 1 - df = mkdf(nrows, ncols, r_idx_nlevels=i, c_idx_nlevels=j) - with pytest.raises(NotImplementedError): - roundtrip2(df, header=False, index=False) + if use_headers: + assert res.shape == (nrows, ncols + r_idx_nlevels) + else: + # First row taken as columns. + assert res.shape == (nrows - 1, ncols + r_idx_nlevels) + + # No NaNs. + for r in range(len(res.index)): + for c in range(len(res.columns)): + assert res.iloc[r, c] is not np.nan - def test_duplicated_columns(self, merge_cells, engine, ext): - # Test for issue #5235 + def test_duplicated_columns(self, *_): + # see gh-5235 write_frame = DataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3]]) - colnames = ['A', 'B', 'B'] + col_names = ["A", "B", "B"] - write_frame.columns = colnames - write_frame.to_excel(self.path, 'test1') + write_frame.columns = col_names + write_frame.to_excel(self.path, "test1") + + read_frame = read_excel(self.path, "test1", index_col=0) + read_frame.columns = col_names - read_frame = read_excel(self.path, 'test1') - read_frame.columns = colnames tm.assert_frame_equal(write_frame, read_frame) - # 11007 / #10970 + # see gh-11007, gh-10970 write_frame = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], - columns=['A', 'B', 'A', 'B']) - write_frame.to_excel(self.path, 'test1') - read_frame = read_excel(self.path, 'test1') - read_frame.columns = ['A', 'B', 'A', 'B'] + columns=["A", "B", "A", "B"]) + write_frame.to_excel(self.path, "test1") + + read_frame = read_excel(self.path, "test1", index_col=0) + read_frame.columns = ["A", "B", "A", "B"] + tm.assert_frame_equal(write_frame, read_frame) - # 10982 - write_frame.to_excel(self.path, 'test1', index=False, header=False) - read_frame = read_excel(self.path, 'test1', header=None) + # see gh-10982 + write_frame.to_excel(self.path, "test1", index=False, header=False) + read_frame = read_excel(self.path, "test1", header=None) + write_frame.columns = [0, 1, 2, 3] tm.assert_frame_equal(write_frame, read_frame) @@ -1805,36 +1864,40 @@ def test_swapped_columns(self, merge_cells, engine, ext): tm.assert_series_equal(write_frame['A'], read_frame['A']) tm.assert_series_equal(write_frame['B'], read_frame['B']) - def test_invalid_columns(self, merge_cells, engine, ext): - # 10982 - write_frame = DataFrame({'A': [1, 1, 1], - 'B': [2, 2, 2]}) + def test_invalid_columns(self, *_): + # see gh-10982 + write_frame = DataFrame({"A": [1, 1, 1], + "B": [2, 2, 2]}) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - write_frame.to_excel(self.path, 'test1', columns=['B', 'C']) - expected = write_frame.reindex(columns=['B', 'C']) - read_frame = read_excel(self.path, 'test1') + write_frame.to_excel(self.path, "test1", columns=["B", "C"]) + + expected = write_frame.reindex(columns=["B", "C"]) + read_frame = read_excel(self.path, "test1", index_col=0) tm.assert_frame_equal(expected, read_frame) with pytest.raises(KeyError): - write_frame.to_excel(self.path, 'test1', columns=['C', 'D']) + write_frame.to_excel(self.path, "test1", columns=["C", "D"]) - def test_comment_arg(self, merge_cells, engine, ext): - # Re issue #18735 - # Test the comment argument functionality to read_excel + def test_comment_arg(self, *_): + # see gh-18735 + # + # Test the comment argument functionality to read_excel. - # Create file to read in - df = DataFrame({'A': ['one', '#one', 'one'], - 'B': ['two', 'two', '#two']}) - df.to_excel(self.path, 'test_c') + # Create file to read in. + df = DataFrame({"A": ["one", "#one", "one"], + "B": ["two", "two", "#two"]}) + df.to_excel(self.path, "test_c") + + # Read file without comment arg. + result1 = read_excel(self.path, "test_c", index_col=0) - # Read file without comment arg - result1 = read_excel(self.path, 'test_c') result1.iloc[1, 0] = None result1.iloc[1, 1] = None result1.iloc[2, 1] = None - result2 = read_excel(self.path, 'test_c', comment='#') + + result2 = read_excel(self.path, "test_c", comment="#", index_col=0) tm.assert_frame_equal(result1, result2) def test_comment_default(self, merge_cells, engine, ext): @@ -1851,22 +1914,23 @@ def test_comment_default(self, merge_cells, engine, ext): result2 = read_excel(self.path, 'test_c', comment=None) tm.assert_frame_equal(result1, result2) - def test_comment_used(self, merge_cells, engine, ext): - # Re issue #18735 - # Test the comment argument is working as expected when used + def test_comment_used(self, *_): + # see gh-18735 + # + # Test the comment argument is working as expected when used. - # Create file to read in - df = DataFrame({'A': ['one', '#one', 'one'], - 'B': ['two', 'two', '#two']}) - df.to_excel(self.path, 'test_c') + # Create file to read in. + df = DataFrame({"A": ["one", "#one", "one"], + "B": ["two", "two", "#two"]}) + df.to_excel(self.path, "test_c") - # Test read_frame_comment against manually produced expected output - expected = DataFrame({'A': ['one', None, 'one'], - 'B': ['two', None, None]}) - result = read_excel(self.path, 'test_c', comment='#') + # Test read_frame_comment against manually produced expected output. + expected = DataFrame({"A": ["one", None, "one"], + "B": ["two", None, None]}) + result = read_excel(self.path, "test_c", comment="#", index_col=0) tm.assert_frame_equal(result, expected) - def test_comment_emptyline(self, merge_cells, engine, ext): + def test_comment_empty_line(self, merge_cells, engine, ext): # Re issue #18735 # Test that read_excel ignores commented lines at the end of file @@ -1899,64 +1963,69 @@ def test_datetimes(self, merge_cells, engine, ext): tm.assert_series_equal(write_frame['A'], read_frame['A']) - # GH7074 def test_bytes_io(self, merge_cells, engine, ext): + # see gh-7074 bio = BytesIO() df = DataFrame(np.random.randn(10, 2)) - # pass engine explicitly as there is no file path to infer from + + # Pass engine explicitly, as there is no file path to infer from. writer = ExcelWriter(bio, engine=engine) df.to_excel(writer) writer.save() + bio.seek(0) - reread_df = read_excel(bio) + reread_df = read_excel(bio, index_col=0) tm.assert_frame_equal(df, reread_df) - # GH8188 - def test_write_lists_dict(self, merge_cells, engine, ext): - df = DataFrame({'mixed': ['a', ['b', 'c'], {'d': 'e', 'f': 2}], - 'numeric': [1, 2, 3.0], - 'str': ['apple', 'banana', 'cherry']}) + def test_write_lists_dict(self, *_): + # see gh-8188. + df = DataFrame({"mixed": ["a", ["b", "c"], {"d": "e", "f": 2}], + "numeric": [1, 2, 3.0], + "str": ["apple", "banana", "cherry"]}) + df.to_excel(self.path, "Sheet1") + read = read_excel(self.path, "Sheet1", header=0, index_col=0) + expected = df.copy() expected.mixed = expected.mixed.apply(str) - expected.numeric = expected.numeric.astype('int64') + expected.numeric = expected.numeric.astype("int64") - df.to_excel(self.path, 'Sheet1') - read = read_excel(self.path, 'Sheet1', header=0) tm.assert_frame_equal(read, expected) - # GH13347 - def test_true_and_false_value_options(self, merge_cells, engine, ext): - df = pd.DataFrame([['foo', 'bar']], columns=['col1', 'col2']) - expected = df.replace({'foo': True, - 'bar': False}) + def test_true_and_false_value_options(self, *_): + # see gh-13347 + df = pd.DataFrame([["foo", "bar"]], columns=["col1", "col2"]) + expected = df.replace({"foo": True, "bar": False}) df.to_excel(self.path) - read_frame = read_excel(self.path, true_values=['foo'], - false_values=['bar']) + read_frame = read_excel(self.path, true_values=["foo"], + false_values=["bar"], index_col=0) tm.assert_frame_equal(read_frame, expected) - def test_freeze_panes(self, merge_cells, engine, ext): - # GH15160 - expected = DataFrame([[1, 2], [3, 4]], columns=['col1', 'col2']) + def test_freeze_panes(self, *_): + # see gh-15160 + expected = DataFrame([[1, 2], [3, 4]], columns=["col1", "col2"]) expected.to_excel(self.path, "Sheet1", freeze_panes=(1, 1)) - result = read_excel(self.path) - tm.assert_frame_equal(expected, result) - def test_path_pathlib(self, merge_cells, engine, ext): + result = read_excel(self.path, index_col=0) + tm.assert_frame_equal(result, expected) + + def test_path_path_lib(self, merge_cells, engine, ext): df = tm.makeDataFrame() writer = partial(df.to_excel, engine=engine) - reader = partial(pd.read_excel) + + reader = partial(pd.read_excel, index_col=0) result = tm.round_trip_pathlib(writer, reader, - path="foo.{}".format(ext)) - tm.assert_frame_equal(df, result) + path="foo.{ext}".format(ext=ext)) + tm.assert_frame_equal(result, df) - def test_path_localpath(self, merge_cells, engine, ext): + def test_path_local_path(self, merge_cells, engine, ext): df = tm.makeDataFrame() writer = partial(df.to_excel, engine=engine) - reader = partial(pd.read_excel) + + reader = partial(pd.read_excel, index_col=0) result = tm.round_trip_pathlib(writer, reader, - path="foo.{}".format(ext)) - tm.assert_frame_equal(df, result) + path="foo.{ext}".format(ext=ext)) + tm.assert_frame_equal(result, df) @td.skip_if_no('openpyxl') From c355f2637b4386e698056013c84623c9a42acb85 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 11 Nov 2018 08:20:04 -0800 Subject: [PATCH 091/122] BUG: Casting tz-aware DatetimeIndex to object-dtype ndarray/Index (#23524) --- doc/source/whatsnew/v0.24.0.txt | 4 ++ pandas/_libs/tslibs/offsets.pyx | 9 +++- pandas/core/arrays/datetimes.py | 10 ++++ pandas/core/indexes/base.py | 14 ++++-- pandas/tests/arrays/test_datetimelike.py | 48 ++++++++++++++++++++ pandas/tests/indexes/test_base.py | 12 ++++- pandas/tests/tseries/offsets/test_offsets.py | 13 ++++++ pandas/tests/tseries/offsets/test_ticks.py | 22 +++++++++ pandas/tseries/offsets.py | 31 ++++++++++--- 9 files changed, 149 insertions(+), 14 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 1a5e4144b842b..93c0c0eb745dc 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1128,6 +1128,9 @@ Datetimelike - Bug in :class:`PeriodIndex` with attribute ``freq.n`` greater than 1 where adding a :class:`DateOffset` object would return incorrect results (:issue:`23215`) - Bug in :class:`Series` that interpreted string indices as lists of characters when setting datetimelike values (:issue:`23451`) - Bug in :class:`Timestamp` constructor which would drop the frequency of an input :class:`Timestamp` (:issue:`22311`) +- Bug in :class:`DatetimeIndex` where calling ``np.array(dtindex, dtype=object)`` would incorrectly return an array of ``long`` objects (:issue:`23524`) +- Bug in :class:`Index` where passing a timezone-aware :class:`DatetimeIndex` and `dtype=object` would incorrectly raise a ``ValueError`` (:issue:`23524`) +- Bug in :class:`Index` where calling ``np.array(dtindex, dtype=object)`` on a timezone-naive :class:`DatetimeIndex` would return an array of ``datetime`` objects instead of :class:`Timestamp` objects, potentially losing nanosecond portions of the timestamps (:issue:`23524`) Timedelta ^^^^^^^^^ @@ -1174,6 +1177,7 @@ Offsets - Bug in :class:`FY5253` where date offsets could incorrectly raise an ``AssertionError`` in arithmetic operatons (:issue:`14774`) - Bug in :class:`DateOffset` where keyword arguments ``week`` and ``milliseconds`` were accepted and ignored. Passing these will now raise ``ValueError`` (:issue:`19398`) - Bug in adding :class:`DateOffset` with :class:`DataFrame` or :class:`PeriodIndex` incorrectly raising ``TypeError`` (:issue:`23215`) +- Bug in comparing :class:`DateOffset` objects with non-DateOffset objects, particularly strings, raising ``ValueError`` instead of returning ``False`` for equality checks and ``True`` for not-equal checks (:issue:`23524`) Numeric ^^^^^^^ diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 0495202818eb5..7ef38cba0c37f 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -308,8 +308,13 @@ class _BaseOffset(object): def __eq__(self, other): if is_string_object(other): - other = to_offset(other) - + try: + # GH#23524 if to_offset fails, we are dealing with an + # incomparable type so == is False and != is True + other = to_offset(other) + except ValueError: + # e.g. "infer" + return False try: return self._params == other._params except AttributeError: diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 405056c628ceb..08b83598bb6af 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -19,6 +19,7 @@ from pandas.core.dtypes.common import ( _NS_DTYPE, is_object_dtype, + is_int64_dtype, is_datetime64tz_dtype, is_datetime64_dtype, ensure_int64) @@ -388,6 +389,15 @@ def _resolution(self): # ---------------------------------------------------------------- # Array-like Methods + def __array__(self, dtype=None): + if is_object_dtype(dtype): + return np.array(list(self), dtype=object) + elif is_int64_dtype(dtype): + return self.asi8 + + # TODO: warn that conversion may be lossy? + return self._data.view(np.ndarray) # follow Index.__array__ + def __iter__(self): """ Return an iterator over the boxed values diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 8470bc6fec490..263de57d32f31 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -301,11 +301,19 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, (dtype is not None and is_datetime64_any_dtype(dtype)) or 'tz' in kwargs): from pandas import DatetimeIndex - result = DatetimeIndex(data, copy=copy, name=name, - dtype=dtype, **kwargs) + if dtype is not None and is_dtype_equal(_o_dtype, dtype): - return Index(result.to_pydatetime(), dtype=_o_dtype) + # GH#23524 passing `dtype=object` to DatetimeIndex is invalid, + # will raise in the where `data` is already tz-aware. So + # we leave it out of this step and cast to object-dtype after + # the DatetimeIndex construction. + # Note we can pass copy=False because the .astype below + # will always make a copy + result = DatetimeIndex(data, copy=False, name=name, **kwargs) + return result.astype(object) else: + result = DatetimeIndex(data, copy=copy, name=name, + dtype=dtype, **kwargs) return result elif (is_timedelta64_dtype(data) or diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 3fd03a351de7c..5ba99a48e34ad 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -57,6 +57,54 @@ def timedelta_index(request): class TestDatetimeArray(object): + def test_array_object_dtype(self, tz_naive_fixture): + # GH#23524 + tz = tz_naive_fixture + dti = pd.date_range('2016-01-01', periods=3, tz=tz) + arr = DatetimeArrayMixin(dti) + + expected = np.array(list(dti)) + + result = np.array(arr, dtype=object) + tm.assert_numpy_array_equal(result, expected) + + # also test the DatetimeIndex method while we're at it + result = np.array(dti, dtype=object) + tm.assert_numpy_array_equal(result, expected) + + def test_array(self, tz_naive_fixture): + # GH#23524 + tz = tz_naive_fixture + dti = pd.date_range('2016-01-01', periods=3, tz=tz) + arr = DatetimeArrayMixin(dti) + + expected = dti.asi8.view('M8[ns]') + result = np.array(arr) + tm.assert_numpy_array_equal(result, expected) + + # check that we are not making copies when setting copy=False + result = np.array(arr, copy=False) + assert result.base is expected.base + assert result.base is not None + + def test_array_i8_dtype(self, tz_naive_fixture): + # GH#23524 + tz = tz_naive_fixture + dti = pd.date_range('2016-01-01', periods=3, tz=tz) + arr = DatetimeArrayMixin(dti) + + expected = dti.asi8 + result = np.array(arr, dtype='i8') + tm.assert_numpy_array_equal(result, expected) + + result = np.array(arr, dtype=np.int64) + tm.assert_numpy_array_equal(result, expected) + + # check that we are not making copies when setting copy=False + result = np.array(arr, dtype='i8', copy=False) + assert result.base is expected.base + assert result.base is not None + def test_from_dti(self, tz_naive_fixture): tz = tz_naive_fixture dti = pd.date_range('2016-01-01', periods=3, tz=tz) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 666420a6a9b06..4a3efe22926f7 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -132,7 +132,7 @@ def test_construction_list_tuples_nan(self, na_value, vtype): @pytest.mark.parametrize("cast_as_obj", [True, False]) @pytest.mark.parametrize("index", [ pd.date_range('2015-01-01 10:00', freq='D', periods=3, - tz='US/Eastern'), # DTI with tz + tz='US/Eastern', name='Green Eggs & Ham'), # DTI with tz pd.date_range('2015-01-01 10:00', freq='D', periods=3), # DTI no tz pd.timedelta_range('1 days', freq='D', periods=3), # td pd.period_range('2015-01-01', freq='D', periods=3) # period @@ -145,8 +145,16 @@ def test_constructor_from_index_dtlike(self, cast_as_obj, index): tm.assert_index_equal(result, index) - if isinstance(index, pd.DatetimeIndex) and hasattr(index, 'tz'): + if isinstance(index, pd.DatetimeIndex): assert result.tz == index.tz + if cast_as_obj: + # GH#23524 check that Index(dti, dtype=object) does not + # incorrectly raise ValueError, and that nanoseconds are not + # dropped + index += pd.Timedelta(nanoseconds=50) + result = pd.Index(index, dtype=object) + assert result.dtype == np.object_ + assert list(result) == list(index) @pytest.mark.parametrize("index,has_tz", [ (pd.date_range('2015-01-01 10:00', freq='D', periods=3, diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index cbd3e0903b713..d68dd65c9841b 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -187,6 +187,19 @@ def testMult2(self): assert self.d + (-5 * self._offset(-10)) == self.d + self._offset(50) assert self.d + (-3 * self._offset(-2)) == self.d + self._offset(6) + def test_compare_str(self): + # GH#23524 + # comparing to strings that cannot be cast to DateOffsets should + # not raise for __eq__ or __ne__ + if self._offset is None: + return + off = self._get_offset(self._offset) + + assert not off == "infer" + assert off != "foo" + # Note: inequalities are only implemented for Tick subclasses; + # tests for this are in test_ticks + class TestCommon(Base): # exected value created by Base._get_offset diff --git a/pandas/tests/tseries/offsets/test_ticks.py b/pandas/tests/tseries/offsets/test_ticks.py index 369c0971f1e9a..128010fe6d32c 100644 --- a/pandas/tests/tseries/offsets/test_ticks.py +++ b/pandas/tests/tseries/offsets/test_ticks.py @@ -267,3 +267,25 @@ def test_compare_ticks(cls): assert cls(4) > three assert cls(3) == cls(3) assert cls(3) != cls(4) + + +@pytest.mark.parametrize('cls', tick_classes) +def test_compare_ticks_to_strs(cls): + # GH#23524 + off = cls(19) + + # These tests should work with any strings, but we particularly are + # interested in "infer" as that comparison is convenient to make in + # Datetime/Timedelta Array/Index constructors + assert not off == "infer" + assert not "foo" == off + + for left, right in [("infer", off), (off, "infer")]: + with pytest.raises(TypeError): + left < right + with pytest.raises(TypeError): + left <= right + with pytest.raises(TypeError): + left > right + with pytest.raises(TypeError): + left >= right diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 53719b71d1180..25c419e485db1 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -2199,9 +2199,18 @@ def apply_index(self, i): def _tick_comp(op): + assert op not in [operator.eq, operator.ne] + def f(self, other): - return op(self.delta, other.delta) + try: + return op(self.delta, other.delta) + except AttributeError: + # comparing with a non-Tick object + raise TypeError("Invalid comparison between {cls} and {typ}" + .format(cls=type(self).__name__, + typ=type(other).__name__)) + f.__name__ = '__{opname}__'.format(opname=op.__name__) return f @@ -2220,8 +2229,6 @@ def __init__(self, n=1, normalize=False): __ge__ = _tick_comp(operator.ge) __lt__ = _tick_comp(operator.lt) __le__ = _tick_comp(operator.le) - __eq__ = _tick_comp(operator.eq) - __ne__ = _tick_comp(operator.ne) def __add__(self, other): if isinstance(other, Tick): @@ -2242,8 +2249,13 @@ def __add__(self, other): def __eq__(self, other): if isinstance(other, compat.string_types): from pandas.tseries.frequencies import to_offset - - other = to_offset(other) + try: + # GH#23524 if to_offset fails, we are dealing with an + # incomparable type so == is False and != is True + other = to_offset(other) + except ValueError: + # e.g. "infer" + return False if isinstance(other, Tick): return self.delta == other.delta @@ -2258,8 +2270,13 @@ def __hash__(self): def __ne__(self, other): if isinstance(other, compat.string_types): from pandas.tseries.frequencies import to_offset - - other = to_offset(other) + try: + # GH#23524 if to_offset fails, we are dealing with an + # incomparable type so == is False and != is True + other = to_offset(other) + except ValueError: + # e.g. "infer" + return True if isinstance(other, Tick): return self.delta != other.delta From a4c1490273633c82ac305368d515eb969cc7fb0a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 11 Nov 2018 15:26:59 -0800 Subject: [PATCH 092/122] BUG/REF: TimedeltaIndex.__new__ (#23539) --- doc/source/whatsnew/v0.24.0.txt | 2 + pandas/core/arrays/datetimes.py | 4 +- pandas/core/arrays/timedeltas.py | 179 +++++++++++++++++- pandas/core/indexes/datetimes.py | 31 +-- pandas/core/indexes/timedeltas.py | 61 +++--- pandas/core/tools/timedeltas.py | 55 +++--- pandas/tests/arithmetic/test_timedelta64.py | 4 +- .../indexes/datetimes/test_construction.py | 3 +- .../indexes/timedeltas/test_arithmetic.py | 10 +- .../indexes/timedeltas/test_construction.py | 75 +++++++- pandas/tests/indexes/timedeltas/test_ops.py | 3 +- .../tests/scalar/timedelta/test_arithmetic.py | 6 +- 12 files changed, 330 insertions(+), 103 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 93c0c0eb745dc..73d706ed98416 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -247,6 +247,7 @@ Backwards incompatible API changes - A newly constructed empty :class:`DataFrame` with integer as the ``dtype`` will now only be cast to ``float64`` if ``index`` is specified (:issue:`22858`) - :meth:`Series.str.cat` will now raise if `others` is a `set` (:issue:`23009`) +- Passing scalar values to :class:`DatetimeIndex` or :class:`TimedeltaIndex` will now raise ``TypeError`` instead of ``ValueError`` (:issue:`23539`) .. _whatsnew_0240.api_breaking.deps: @@ -969,6 +970,7 @@ Deprecations - The class ``FrozenNDArray`` has been deprecated. When unpickling, ``FrozenNDArray`` will be unpickled to ``np.ndarray`` once this class is removed (:issue:`9031`) - Deprecated the `nthreads` keyword of :func:`pandas.read_feather` in favor of `use_threads` to reflect the changes in pyarrow 0.11.0. (:issue:`23053`) +- Constructing a :class:`TimedeltaIndex` from data with ``datetime64``-dtyped data is deprecated, will raise ``TypeError`` in a future version (:issue:`23539`) .. _whatsnew_0240.deprecations.datetimelike_int_ops: diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 08b83598bb6af..b0485cc82f07f 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -234,9 +234,7 @@ def __new__(cls, values, freq=None, tz=None, dtype=None): result = cls._simple_new(values, freq=freq, tz=tz) if freq_infer: - inferred = result.inferred_freq - if inferred: - result.freq = to_offset(inferred) + result.freq = to_offset(result.inferred_freq) # NB: Among other things not yet ported from the DatetimeIndex # constructor, this does not call _deepcopy_if_needed diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index cf3ba263d1f81..1f78e0c00bf00 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -1,18 +1,28 @@ # -*- coding: utf-8 -*- from datetime import timedelta +import warnings import numpy as np from pandas._libs import tslibs -from pandas._libs.tslibs import Timedelta, Timestamp, NaT +from pandas._libs.tslibs import Timedelta, Timestamp, NaT, iNaT from pandas._libs.tslibs.fields import get_timedelta_field -from pandas._libs.tslibs.timedeltas import array_to_timedelta64 +from pandas._libs.tslibs.timedeltas import ( + array_to_timedelta64, parse_timedelta_unit) from pandas import compat from pandas.core.dtypes.common import ( - _TD_DTYPE, is_list_like) -from pandas.core.dtypes.generic import ABCSeries + _TD_DTYPE, + is_object_dtype, + is_string_dtype, + is_float_dtype, + is_integer_dtype, + is_timedelta64_dtype, + is_datetime64_dtype, + is_list_like, + ensure_int64) +from pandas.core.dtypes.generic import ABCSeries, ABCTimedeltaIndex from pandas.core.dtypes.missing import isna import pandas.core.common as com @@ -139,9 +149,7 @@ def __new__(cls, values, freq=None): result = cls._simple_new(values, freq=freq) if freq_infer: - inferred = result.inferred_freq - if inferred: - result.freq = to_offset(inferred) + result.freq = to_offset(result.inferred_freq) return result @@ -397,6 +405,163 @@ def f(x): # --------------------------------------------------------------------- # Constructor Helpers +def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"): + """ + Parameters + ---------- + array : list-like + copy : bool, default False + unit : str, default "ns" + errors : {"raise", "coerce", "ignore"}, default "raise" + + Returns + ------- + ndarray[timedelta64[ns]] + inferred_freq : Tick or None + + Raises + ------ + ValueError : data cannot be converted to timedelta64[ns] + + Notes + ----- + Unlike `pandas.to_timedelta`, if setting `errors=ignore` will not cause + errors to be ignored; they are caught and subsequently ignored at a + higher level. + """ + inferred_freq = None + unit = parse_timedelta_unit(unit) + + # Unwrap whatever we have into a np.ndarray + if not hasattr(data, 'dtype'): + # e.g. list, tuple + if np.ndim(data) == 0: + # i.e. generator + data = list(data) + data = np.array(data, copy=False) + elif isinstance(data, ABCSeries): + data = data._values + elif isinstance(data, (ABCTimedeltaIndex, TimedeltaArrayMixin)): + inferred_freq = data.freq + data = data._data + + # Convert whatever we have into timedelta64[ns] dtype + if is_object_dtype(data) or is_string_dtype(data): + # no need to make a copy, need to convert if string-dtyped + data = objects_to_td64ns(data, unit=unit, errors=errors) + copy = False + + elif is_integer_dtype(data): + # treat as multiples of the given unit + data, copy_made = ints_to_td64ns(data, unit=unit) + copy = copy and not copy_made + + elif is_float_dtype(data): + # treat as multiples of the given unit. If after converting to nanos, + # there are fractional components left, these are truncated + # (i.e. NOT rounded) + mask = np.isnan(data) + coeff = np.timedelta64(1, unit) / np.timedelta64(1, 'ns') + data = (coeff * data).astype(np.int64).view('timedelta64[ns]') + data[mask] = iNaT + copy = False + + elif is_timedelta64_dtype(data): + if data.dtype != _TD_DTYPE: + # non-nano unit + # TODO: watch out for overflows + data = data.astype(_TD_DTYPE) + copy = False + + elif is_datetime64_dtype(data): + # GH#23539 + warnings.warn("Passing datetime64-dtype data to TimedeltaIndex is " + "deprecated, will raise a TypeError in a future " + "version", + FutureWarning, stacklevel=3) + data = ensure_int64(data).view(_TD_DTYPE) + + else: + raise TypeError("dtype {dtype} cannot be converted to timedelta64[ns]" + .format(dtype=data.dtype)) + + data = np.array(data, copy=copy) + assert data.dtype == 'm8[ns]', data + return data, inferred_freq + + +def ints_to_td64ns(data, unit="ns"): + """ + Convert an ndarray with integer-dtype to timedelta64[ns] dtype, treating + the integers as multiples of the given timedelta unit. + + Parameters + ---------- + data : np.ndarray with integer-dtype + unit : str, default "ns" + + Returns + ------- + ndarray[timedelta64[ns]] + bool : whether a copy was made + """ + copy_made = False + unit = unit if unit is not None else "ns" + + if data.dtype != np.int64: + # converting to int64 makes a copy, so we can avoid + # re-copying later + data = data.astype(np.int64) + copy_made = True + + if unit != "ns": + dtype_str = "timedelta64[{unit}]".format(unit=unit) + data = data.view(dtype_str) + + # TODO: watch out for overflows when converting from lower-resolution + data = data.astype("timedelta64[ns]") + # the astype conversion makes a copy, so we can avoid re-copying later + copy_made = True + + else: + data = data.view("timedelta64[ns]") + + return data, copy_made + + +def objects_to_td64ns(data, unit="ns", errors="raise"): + """ + Convert a object-dtyped or string-dtyped array into an + timedelta64[ns]-dtyped array. + + Parameters + ---------- + data : ndarray or Index + unit : str, default "ns" + errors : {"raise", "coerce", "ignore"}, default "raise" + + Returns + ------- + ndarray[timedelta64[ns]] + + Raises + ------ + ValueError : data cannot be converted to timedelta64[ns] + + Notes + ----- + Unlike `pandas.to_timedelta`, if setting `errors=ignore` will not cause + errors to be ignored; they are caught and subsequently ignored at a + higher level. + """ + # coerce Index to np.ndarray, converting string-dtype if necessary + values = np.array(data, dtype=np.object_, copy=False) + + result = array_to_timedelta64(values, + unit=unit, errors=errors) + return result.view('timedelta64[ns]') + + def _generate_regular_range(start, end, periods, offset): stride = offset.nanos if periods is None: diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 8da0672559006..c82cff19573e3 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -239,6 +239,21 @@ def __new__(cls, data=None, dayfirst=False, yearfirst=False, dtype=None, copy=False, name=None, verify_integrity=True): + if data is None: + # TODO: Remove this block and associated kwargs; GH#20535 + result = cls._generate_range(start, end, periods, + freq=freq, tz=tz, normalize=normalize, + closed=closed, ambiguous=ambiguous) + result.name = name + return result + + if is_scalar(data): + raise TypeError("{cls}() must be called with a " + "collection of some kind, {data} was passed" + .format(cls=cls.__name__, data=repr(data))) + + # - Cases checked above all return/raise before reaching here - # + # This allows to later ensure that the 'copy' parameter is honored: if isinstance(data, Index): ref_to_data = data._data @@ -253,20 +268,8 @@ def __new__(cls, data=None, # if dtype has an embedded tz, capture it tz = dtl.validate_tz_from_dtype(dtype, tz) - if data is None: - # TODO: Remove this block and associated kwargs; GH#20535 - result = cls._generate_range(start, end, periods, - freq=freq, tz=tz, normalize=normalize, - closed=closed, ambiguous=ambiguous) - result.name = name - return result - if not isinstance(data, (np.ndarray, Index, ABCSeries, DatetimeArrayMixin)): - if is_scalar(data): - raise ValueError('DatetimeIndex() must be called with a ' - 'collection of some kind, %s was passed' - % repr(data)) # other iterable of some kind if not isinstance(data, (list, tuple)): data = list(data) @@ -328,9 +331,7 @@ def __new__(cls, data=None, cls._validate_frequency(subarr, freq, ambiguous=ambiguous) if freq_infer: - inferred = subarr.inferred_freq - if inferred: - subarr.freq = to_offset(inferred) + subarr.freq = to_offset(subarr.inferred_freq) return subarr._deepcopy_if_needed(ref_to_data, copy) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 5b077a6984114..35e17c7400892 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -15,7 +15,8 @@ from pandas.core.dtypes.missing import isna from pandas.core.arrays.timedeltas import ( - TimedeltaArrayMixin, _is_convertible_to_td, _to_m8) + TimedeltaArrayMixin, _is_convertible_to_td, _to_m8, + sequence_to_td64ns) from pandas.core.arrays import datetimelike as dtl from pandas.core.indexes.base import Index @@ -33,10 +34,9 @@ TimelikeOps, DatetimeIndexOpsMixin, wrap_arithmetic_op, wrap_array_method, wrap_field_accessor) from pandas.core.tools.timedeltas import ( - to_timedelta, _coerce_scalar_to_timedelta_type) + _coerce_scalar_to_timedelta_type) from pandas._libs import (lib, index as libindex, join as libjoin, Timedelta, NaT) -from pandas._libs.tslibs.timedeltas import array_to_timedelta64 class TimedeltaIndex(TimedeltaArrayMixin, DatetimeIndexOpsMixin, @@ -139,12 +139,6 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, periods=None, closed=None, dtype=None, copy=False, name=None, verify_integrity=True): - if isinstance(data, TimedeltaIndex) and freq is None and name is None: - if copy: - return data.copy() - else: - return data._shallow_copy() - freq, freq_infer = dtl.maybe_infer_freq(freq) if data is None: @@ -154,32 +148,31 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, result.name = name return result - if unit is not None: - data = to_timedelta(data, unit=unit, box=False) - if is_scalar(data): - raise ValueError('TimedeltaIndex() must be called with a ' - 'collection of some kind, {data} was passed' - .format(data=repr(data))) - - # convert if not already - if getattr(data, 'dtype', None) != _TD_DTYPE: - data = to_timedelta(data, unit=unit, box=False) - elif copy: - data = np.array(data, copy=True) - - data = np.array(data, copy=False) - if data.dtype == np.object_: - data = array_to_timedelta64(data) - if data.dtype != _TD_DTYPE: - if is_timedelta64_dtype(data): - # non-nano unit - # TODO: watch out for overflows - data = data.astype(_TD_DTYPE) + raise TypeError('{cls}() must be called with a ' + 'collection of some kind, {data} was passed' + .format(cls=cls.__name__, data=repr(data))) + + if isinstance(data, TimedeltaIndex) and freq is None and name is None: + if copy: + return data.copy() else: - data = ensure_int64(data).view(_TD_DTYPE) + return data._shallow_copy() - assert data.dtype == 'm8[ns]', data.dtype + # - Cases checked above all return/raise before reaching here - # + + data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=unit) + if inferred_freq is not None: + if freq is not None and freq != inferred_freq: + raise ValueError('Inferred frequency {inferred} from passed ' + 'values does not conform to passed frequency ' + '{passed}' + .format(inferred=inferred_freq, + passed=freq.freqstr)) + elif freq_infer: + freq = inferred_freq + freq_infer = False + verify_integrity = False subarr = cls._simple_new(data, name=name, freq=freq) # check that we are matching freqs @@ -188,9 +181,7 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, cls._validate_frequency(subarr, freq) if freq_infer: - inferred = subarr.inferred_freq - if inferred: - subarr.freq = to_offset(inferred) + subarr.freq = to_offset(subarr.inferred_freq) return subarr diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 220b14a9cb7c6..fad136b3b5a45 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -6,16 +6,13 @@ import pandas as pd from pandas._libs import tslibs from pandas._libs.tslibs.timedeltas import (convert_to_timedelta64, - array_to_timedelta64, parse_timedelta_unit) -from pandas.core.dtypes.common import ( - ensure_object, - is_integer_dtype, - is_timedelta64_dtype, - is_list_like) +from pandas.core.dtypes.common import is_list_like from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass +from pandas.core.arrays.timedeltas import sequence_to_td64ns + def to_timedelta(arg, unit='ns', box=True, errors='raise'): """ @@ -129,31 +126,27 @@ def _convert_listlike(arg, unit='ns', box=True, errors='raise', name=None): """Convert a list of objects to a timedelta index object.""" if isinstance(arg, (list, tuple)) or not hasattr(arg, 'dtype'): - arg = np.array(list(arg), dtype='O') - - # these are shortcut-able - if is_timedelta64_dtype(arg): - value = arg.astype('timedelta64[ns]') - elif is_integer_dtype(arg): - value = arg.astype('timedelta64[{unit}]'.format(unit=unit)).astype( - 'timedelta64[ns]', copy=False) - else: - try: - value = array_to_timedelta64(ensure_object(arg), - unit=unit, errors=errors) - value = value.astype('timedelta64[ns]', copy=False) - except ValueError: - if errors == 'ignore': - return arg - else: - # This else-block accounts for the cases when errors='raise' - # and errors='coerce'. If errors == 'raise', these errors - # should be raised. If errors == 'coerce', we shouldn't - # expect any errors to be raised, since all parsing errors - # cause coercion to pd.NaT. However, if an error / bug is - # introduced that causes an Exception to be raised, we would - # like to surface it. - raise + # This is needed only to ensure that in the case where we end up + # returning arg (errors == "ignore"), and where the input is a + # generator, we return a useful list-like instead of a + # used-up generator + arg = np.array(list(arg), dtype=object) + + try: + value = sequence_to_td64ns(arg, unit=unit, + errors=errors, copy=False)[0] + except ValueError: + if errors == 'ignore': + return arg + else: + # This else-block accounts for the cases when errors='raise' + # and errors='coerce'. If errors == 'raise', these errors + # should be raised. If errors == 'coerce', we shouldn't + # expect any errors to be raised, since all parsing errors + # cause coercion to pd.NaT. However, if an error / bug is + # introduced that causes an Exception to be raised, we would + # like to surface it. + raise if box: from pandas import TimedeltaIndex diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index f92a772f3eaad..50c0e9564e02d 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -1054,11 +1054,11 @@ def test_tdi_mul_float_series(self, box_df_fail): idx = tm.box_expected(idx, box) rng5f = np.arange(5, dtype='float64') - expected = TimedeltaIndex(rng5f * (rng5f + 0.1)) + expected = TimedeltaIndex(rng5f * (rng5f + 1.0)) box2 = pd.Series if box is pd.Index else box expected = tm.box_expected(expected, box2) - result = idx * Series(rng5f + 0.1) + result = idx * Series(rng5f + 1.0) tm.assert_equal(result, expected) # TODO: Put Series/DataFrame in others? diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index 04b2c4f280588..42a75f277faa6 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -318,7 +318,8 @@ def test_constructor_coverage(self): pytest.raises(ValueError, DatetimeIndex, start='1/1/2000', end='1/10/2000') - pytest.raises(ValueError, DatetimeIndex, '1/1/2000') + with pytest.raises(TypeError): + DatetimeIndex('1/1/2000') # generator expression gen = (datetime(2000, 1, 1) + timedelta(i) for i in range(10)) diff --git a/pandas/tests/indexes/timedeltas/test_arithmetic.py b/pandas/tests/indexes/timedeltas/test_arithmetic.py index a03698c9ea0de..82337ac37fbee 100644 --- a/pandas/tests/indexes/timedeltas/test_arithmetic.py +++ b/pandas/tests/indexes/timedeltas/test_arithmetic.py @@ -453,10 +453,16 @@ def test_timedelta_ops_with_missing_values(self): # setup s1 = pd.to_timedelta(Series(['00:00:01'])) s2 = pd.to_timedelta(Series(['00:00:02'])) - sn = pd.to_timedelta(Series([pd.NaT])) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + # Passing datetime64-dtype data to TimedeltaIndex is deprecated + sn = pd.to_timedelta(Series([pd.NaT])) + df1 = pd.DataFrame(['00:00:01']).apply(pd.to_timedelta) df2 = pd.DataFrame(['00:00:02']).apply(pd.to_timedelta) - dfn = pd.DataFrame([pd.NaT]).apply(pd.to_timedelta) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + # Passing datetime64-dtype data to TimedeltaIndex is deprecated + dfn = pd.DataFrame([pd.NaT]).apply(pd.to_timedelta) + scalar1 = pd.to_timedelta('00:00:01') scalar2 = pd.to_timedelta('00:00:02') timedelta_NaT = pd.to_timedelta('NaT') diff --git a/pandas/tests/indexes/timedeltas/test_construction.py b/pandas/tests/indexes/timedeltas/test_construction.py index 1abda624777c8..074c8904b55b1 100644 --- a/pandas/tests/indexes/timedeltas/test_construction.py +++ b/pandas/tests/indexes/timedeltas/test_construction.py @@ -5,11 +5,81 @@ import pandas as pd import pandas.util.testing as tm -from pandas import TimedeltaIndex, timedelta_range, to_timedelta +from pandas import TimedeltaIndex, timedelta_range, to_timedelta, Timedelta class TestTimedeltaIndex(object): + def test_int64_nocopy(self): + # GH#23539 check that a copy isn't made when we pass int64 data + # and copy=False + arr = np.arange(10, dtype=np.int64) + tdi = TimedeltaIndex(arr, copy=False) + assert tdi._data.base is arr + + def test_infer_from_tdi(self): + # GH#23539 + # fast-path for inferring a frequency if the passed data already + # has one + tdi = pd.timedelta_range('1 second', periods=10**7, freq='1s') + + result = pd.TimedeltaIndex(tdi, freq='infer') + assert result.freq == tdi.freq + + # check that inferred_freq was not called by checking that the + # value has not been cached + assert "inferred_freq" not in getattr(result, "_cache", {}) + + def test_infer_from_tdi_mismatch(self): + # GH#23539 + # fast-path for invalidating a frequency if the passed data already + # has one and it does not match the `freq` input + tdi = pd.timedelta_range('1 second', periods=100, freq='1s') + + msg = ("Inferred frequency .* from passed values does " + "not conform to passed frequency") + with pytest.raises(ValueError, match=msg): + TimedeltaIndex(tdi, freq='D') + + def test_dt64_data_invalid(self): + # GH#23539 + # passing tz-aware DatetimeIndex raises, naive or ndarray[datetime64] + # does not yet, but will in the future + dti = pd.date_range('2016-01-01', periods=3) + + msg = "cannot be converted to timedelta64" + with pytest.raises(TypeError, match=msg): + TimedeltaIndex(dti.tz_localize('Europe/Brussels')) + + with tm.assert_produces_warning(FutureWarning): + TimedeltaIndex(dti) + + with tm.assert_produces_warning(FutureWarning): + TimedeltaIndex(np.asarray(dti)) + + def test_float64_ns_rounded(self): + # GH#23539 without specifying a unit, floats are regarded as nanos, + # and fractional portions are truncated + tdi = TimedeltaIndex([2.3, 9.7]) + expected = TimedeltaIndex([2, 9]) + tm.assert_index_equal(tdi, expected) + + # integral floats are non-lossy + tdi = TimedeltaIndex([2.0, 9.0]) + expected = TimedeltaIndex([2, 9]) + tm.assert_index_equal(tdi, expected) + + # NaNs get converted to NaT + tdi = TimedeltaIndex([2.0, np.nan]) + expected = TimedeltaIndex([pd.Timedelta(nanoseconds=2), pd.NaT]) + tm.assert_index_equal(tdi, expected) + + def test_float64_unit_conversion(self): + # GH#23539 + tdi = TimedeltaIndex([1.5, 2.25], unit='D') + expected = TimedeltaIndex([Timedelta(days=1.5), Timedelta(days=2.25)]) + tm.assert_index_equal(tdi, expected) + def test_construction_base_constructor(self): arr = [pd.Timedelta('1 days'), pd.NaT, pd.Timedelta('3 days')] tm.assert_index_equal(pd.Index(arr), pd.TimedeltaIndex(arr)) @@ -63,7 +133,8 @@ def test_constructor_coverage(self): pytest.raises(ValueError, TimedeltaIndex, start='1 days', end='10 days') - pytest.raises(ValueError, TimedeltaIndex, '1 days') + with pytest.raises(TypeError): + TimedeltaIndex('1 days') # generator expression gen = (timedelta(i) for i in range(10)) diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index 2fc0a49d789fd..989955c0d7ee7 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -54,8 +54,7 @@ def test_minmax(self): assert pd.isna(getattr(obj, op)()) def test_numpy_minmax(self): - dr = pd.date_range(start='2016-01-15', end='2016-01-20') - td = TimedeltaIndex(np.asarray(dr)) + td = timedelta_range('16815 days', '16820 days', freq='D') assert np.min(td) == Timedelta('16815 days') assert np.max(td) == Timedelta('16820 days') diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index 65709b0eebaf7..79fa49b564ad6 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -506,6 +506,9 @@ def test_td_rfloordiv_numeric_series(self): # TODO: GH-19761. Change to TypeError. ser // td + # ---------------------------------------------------------------- + # Timedelta.__mod__, __rmod__ + def test_mod_timedeltalike(self): # GH#19365 td = Timedelta(hours=37) @@ -545,9 +548,6 @@ def test_mod_offset(self): assert isinstance(result, Timedelta) assert result == Timedelta(hours=2) - # ---------------------------------------------------------------- - # Timedelta.__mod__, __rmod__ - def test_mod_numeric(self): # GH#19365 td = Timedelta(hours=37) From 468df9c64d090008ca98ccadb1f53cc00d54cce6 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Sun, 11 Nov 2018 23:36:29 +0000 Subject: [PATCH 093/122] BUILD: Simplifying contributor dependencies (#23522) --- ci/code_checks.sh | 20 +++- ci/environment-dev.yaml | 20 ---- ci/requirements-optional-conda.txt | 28 ------ ci/requirements_dev.txt | 16 ---- doc/source/contributing.rst | 11 +-- environment.yml | 53 ++++++++++ ...s-optional-pip.txt => requirements-dev.txt | 16 +++- scripts/convert_deps.py | 31 ------ scripts/generate_pip_deps_from_conda.py | 96 +++++++++++++++++++ 9 files changed, 181 insertions(+), 110 deletions(-) delete mode 100644 ci/environment-dev.yaml delete mode 100644 ci/requirements-optional-conda.txt delete mode 100644 ci/requirements_dev.txt create mode 100644 environment.yml rename ci/requirements-optional-pip.txt => requirements-dev.txt (63%) delete mode 100755 scripts/convert_deps.py create mode 100755 scripts/generate_pip_deps_from_conda.py diff --git a/ci/code_checks.sh b/ci/code_checks.sh index eba96f0c6c2fc..fac5c211cdad8 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -9,16 +9,19 @@ # In the future we may want to add the validation of docstrings and other checks here. # # Usage: -# $ ./ci/code_checks.sh # run all checks -# $ ./ci/code_checks.sh lint # run linting only -# $ ./ci/code_checks.sh patterns # check for patterns that should not exist -# $ ./ci/code_checks.sh doctests # run doctests +# $ ./ci/code_checks.sh # run all checks +# $ ./ci/code_checks.sh lint # run linting only +# $ ./ci/code_checks.sh patterns # check for patterns that should not exist +# $ ./ci/code_checks.sh doctests # run doctests +# $ ./ci/code_checks.sh dependencies # check that dependencies are consistent echo "inside $0" [[ $LINT ]] || { echo "NOT Linting. To lint use: LINT=true $0 $1"; exit 0; } -[[ -z "$1" || "$1" == "lint" || "$1" == "patterns" || "$1" == "doctests" ]] || { echo "Unknown command $1. Usage: $0 [lint|patterns|doctests]"; exit 9999; } +[[ -z "$1" || "$1" == "lint" || "$1" == "patterns" || "$1" == "doctests" || "$1" == "dependencies" ]] \ + || { echo "Unknown command $1. Usage: $0 [lint|patterns|doctests|dependencies]"; exit 9999; } source activate pandas +BASE_DIR="$(dirname $0)/.." RET=0 CHECK=$1 @@ -172,4 +175,11 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then fi +### DEPENDENCIES ### +if [[ -z "$CHECK" || "$CHECK" == "dependencies" ]]; then + MSG='Check that requirements-dev.txt has been generated from environment.yml' ; echo $MSG + $BASE_DIR/scripts/generate_pip_deps_from_conda.py --compare + RET=$(($RET + $?)) ; echo $MSG "DONE" +fi + exit $RET diff --git a/ci/environment-dev.yaml b/ci/environment-dev.yaml deleted file mode 100644 index 2718c1cd582b6..0000000000000 --- a/ci/environment-dev.yaml +++ /dev/null @@ -1,20 +0,0 @@ -name: pandas-dev -channels: - - defaults - - conda-forge -dependencies: - - Cython>=0.28.2 - - NumPy - - flake8 - - flake8-comprehensions - - flake8-rst - - hypothesis>=3.58.0 - - isort - - moto - - pytest>=3.6 - - python-dateutil>=2.5.0 - - python=3 - - pytz - - setuptools>=24.2.0 - - sphinx - - sphinxcontrib-spelling diff --git a/ci/requirements-optional-conda.txt b/ci/requirements-optional-conda.txt deleted file mode 100644 index 8758c8154abca..0000000000000 --- a/ci/requirements-optional-conda.txt +++ /dev/null @@ -1,28 +0,0 @@ -beautifulsoup4>=4.2.1 -blosc -bottleneck>=1.2.0 -fastparquet>=0.1.2 -gcsfs -html5lib -ipython>=5.6.0 -ipykernel -jinja2 -lxml -matplotlib>=2.0.0 -nbsphinx -numexpr>=2.6.1 -openpyxl -pyarrow>=0.7.0 -pymysql -pytables>=3.4.2 -pytest-cov -pytest-xdist -s3fs -scipy>=0.18.1 -seaborn -sqlalchemy -statsmodels -xarray -xlrd -xlsxwriter -xlwt diff --git a/ci/requirements_dev.txt b/ci/requirements_dev.txt deleted file mode 100644 index a1cb20c265974..0000000000000 --- a/ci/requirements_dev.txt +++ /dev/null @@ -1,16 +0,0 @@ -# This file was autogenerated by scripts/convert_deps.py -# Do not modify directly -Cython>=0.28.2 -NumPy -flake8 -flake8-comprehensions -flake8-rst -hypothesis>=3.58.0 -isort -moto -pytest>=3.6 -python-dateutil>=2.5.0 -pytz -setuptools>=24.2.0 -sphinx -sphinxcontrib-spelling \ No newline at end of file diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 084f710091a1b..514a58456bcd9 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -170,7 +170,7 @@ We'll now kick off a three-step process: .. code-block:: none # Create and activate the build environment - conda env create -f ci/environment-dev.yaml + conda env create -f environment.yml conda activate pandas-dev # or with older versions of Anaconda: @@ -180,9 +180,6 @@ We'll now kick off a three-step process: python setup.py build_ext --inplace -j 4 python -m pip install -e . - # Install the rest of the optional dependencies - conda install -c defaults -c conda-forge --file=ci/requirements-optional-conda.txt - At this point you should be able to import pandas from your locally built version:: $ python # start an interpreter @@ -221,14 +218,12 @@ You'll need to have at least python3.5 installed on your system. . ~/virtualenvs/pandas-dev/bin/activate # Install the build dependencies - python -m pip install -r ci/requirements_dev.txt + python -m pip install -r requirements-dev.txt + # Build and install pandas python setup.py build_ext --inplace -j 4 python -m pip install -e . - # Install additional dependencies - python -m pip install -r ci/requirements-optional-pip.txt - Creating a branch ----------------- diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000000000..f66625e6a60c7 --- /dev/null +++ b/environment.yml @@ -0,0 +1,53 @@ +name: pandas-dev +channels: + - defaults + - conda-forge +dependencies: + # required + - NumPy + - python=3 + - python-dateutil>=2.5.0 + - pytz + + # development + - Cython>=0.28.2 + - flake8 + - flake8-comprehensions + - flake8-rst + - hypothesis>=3.58.0 + - isort + - moto + - pytest>=3.6 + - setuptools>=24.2.0 + - sphinx + - sphinxcontrib-spelling + + # optional + - beautifulsoup4>=4.2.1 + - blosc + - bottleneck>=1.2.0 + - fastparquet>=0.1.2 + - gcsfs + - html5lib + - ipython>=5.6.0 + - ipykernel + - jinja2 + - lxml + - matplotlib>=2.0.0 + - nbsphinx + - numexpr>=2.6.1 + - openpyxl + - pyarrow>=0.7.0 + - pymysql + - pytables>=3.4.2 + - pytest-cov + - pytest-xdist + - s3fs + - scipy>=0.18.1 + - seaborn + - sqlalchemy + - statsmodels + - xarray + - xlrd + - xlsxwriter + - xlwt diff --git a/ci/requirements-optional-pip.txt b/requirements-dev.txt similarity index 63% rename from ci/requirements-optional-pip.txt rename to requirements-dev.txt index 62f1c555d8544..93145d948c218 100644 --- a/ci/requirements-optional-pip.txt +++ b/requirements-dev.txt @@ -1,5 +1,17 @@ -# This file was autogenerated by scripts/convert_deps.py -# Do not modify directly +NumPy +python-dateutil>=2.5.0 +pytz +Cython>=0.28.2 +flake8 +flake8-comprehensions +flake8-rst +hypothesis>=3.58.0 +isort +moto +pytest>=3.6 +setuptools>=24.2.0 +sphinx +sphinxcontrib-spelling beautifulsoup4>=4.2.1 blosc bottleneck>=1.2.0 diff --git a/scripts/convert_deps.py b/scripts/convert_deps.py deleted file mode 100755 index 3ff157e0a0d7b..0000000000000 --- a/scripts/convert_deps.py +++ /dev/null @@ -1,31 +0,0 @@ -""" -Convert the conda environment.yaml to a pip requirements.txt -""" -import re -import yaml - -exclude = {'python=3'} -rename = {'pytables': 'tables'} - -with open("ci/environment-dev.yaml") as f: - dev = yaml.load(f) - -with open("ci/requirements-optional-conda.txt") as f: - optional = [x.strip() for x in f.readlines()] - -required = dev['dependencies'] -required = [rename.get(dep, dep) for dep in required if dep not in exclude] -optional = [rename.get(dep, dep) for dep in optional if dep not in exclude] -optional = [re.sub("(?<=[^<>])=", '==', dep) for dep in optional] - - -with open("ci/requirements_dev.txt", 'wt') as f: - f.write("# This file was autogenerated by scripts/convert_deps.py\n") - f.write("# Do not modify directly\n") - f.write('\n'.join(required)) - - -with open("ci/requirements-optional-pip.txt", 'wt') as f: - f.write("# This file was autogenerated by scripts/convert_deps.py\n") - f.write("# Do not modify directly\n") - f.write("\n".join(optional)) diff --git a/scripts/generate_pip_deps_from_conda.py b/scripts/generate_pip_deps_from_conda.py new file mode 100755 index 0000000000000..2474214a4a53b --- /dev/null +++ b/scripts/generate_pip_deps_from_conda.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python +""" +Convert the conda environment.yml to the pip requirements-dev.txt, +or check that they have the same packages (for the CI) + +Usage: + + Generate `requirements-dev.txt` + $ ./conda_to_pip + + Compare and fail (exit status != 0) if `requirements-dev.txt` has not been + generated with this script: + $ ./conda_to_pip --compare +""" +import argparse +import os +import re +import sys +import yaml + + +EXCLUDE = {'python=3'} +RENAME = {'pytables': 'tables'} + + +def conda_package_to_pip(package): + """ + Convert a conda package to its pip equivalent. + + In most cases they are the same, those are the exceptions: + - Packages that should be excluded (in `EXCLUDE`) + - Packages that should be renamed (in `RENAME`) + - A package requiring a specific version, in conda is defined with a single + equal (e.g. ``pandas=1.0``) and in pip with two (e.g. ``pandas==1.0``) + """ + if package in EXCLUDE: + return + + if package in RENAME: + return RENAME[package] + + return re.sub('(?<=[^<>])=', '==', package).strip() + + +def main(conda_fname, pip_fname, compare=False): + """ + Generate the pip dependencies file from the conda file, or compare that + they are synchronized (``compare=True``). + + Parameters + ---------- + conda_fname : str + Path to the conda file with dependencies (e.g. `environment.yml`). + pip_fname : str + Path to the pip file with dependencies (e.g. `requirements-dev.txt`). + compare : bool, default False + Whether to generate the pip file (``False``) or to compare if the + pip file has been generated with this script and the last version + of the conda file (``True``). + + Returns + ------- + bool + True if the comparison fails, False otherwise + """ + with open(conda_fname) as conda_fd: + deps = yaml.safe_load(conda_fd)['dependencies'] + + pip_content = '\n'.join(filter(None, map(conda_package_to_pip, deps))) + + if compare: + with open(pip_fname) as pip_fd: + return pip_content != pip_fd.read() + else: + with open(pip_fname, 'w') as pip_fd: + pip_fd.write(pip_content) + return False + + +if __name__ == '__main__': + argparser = argparse.ArgumentParser( + description='convert (or compare) conda file to pip') + argparser.add_argument('--compare', + action='store_true', + help='compare whether the two files are equivalent') + args = argparser.parse_args() + + repo_path = os.path.dirname(os.path.abspath(os.path.dirname(__file__))) + res = main(os.path.join(repo_path, 'environment.yml'), + os.path.join(repo_path, 'requirements-dev.txt'), + compare=args.compare) + if res: + sys.stderr.write('`requirements-dev.txt` has to be generated with ' + '`{}` after `environment.yml` is modified.\n'.format( + sys.argv[0])) + sys.exit(res) From 6401a4f58e394e8f0b84f5dfdc16dc836a0da2c1 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 11 Nov 2018 16:15:17 -0800 Subject: [PATCH 094/122] TST: Fix xfailing DataFrame arithmetic tests by transposing (#23620) --- pandas/tests/arithmetic/conftest.py | 23 ++-- pandas/tests/arithmetic/test_datetime64.py | 26 ++-- pandas/tests/arithmetic/test_period.py | 43 +++---- pandas/tests/arithmetic/test_timedelta64.py | 131 +++++++++----------- pandas/util/testing.py | 7 +- 5 files changed, 113 insertions(+), 117 deletions(-) diff --git a/pandas/tests/arithmetic/conftest.py b/pandas/tests/arithmetic/conftest.py index cf1abc6f79101..9ee5e05638978 100644 --- a/pandas/tests/arithmetic/conftest.py +++ b/pandas/tests/arithmetic/conftest.py @@ -158,19 +158,18 @@ def box_df_fail(request): return request.param -@pytest.fixture(params=[ - pd.Index, - pd.Series, - pytest.param(pd.DataFrame, - marks=pytest.mark.xfail(reason="Tries to broadcast " - "incorrectly", - strict=True, raises=ValueError)) -], ids=lambda x: x.__name__) -def box_df_broadcast_failure(request): - """ - Fixture equivalent to `box` but with the common failing case where - the DataFrame operation tries to broadcast incorrectly. +@pytest.fixture(params=[(pd.Index, False), + (pd.Series, False), + (pd.DataFrame, False), + pytest.param((pd.DataFrame, True), + marks=pytest.mark.xfail(strict=True))], + ids=lambda x: x[0].__name__ + '-' + str(x[1])) +def box_transpose_fail(request): + """ + Fixture similar to `box` but testing both transpose cases for DataFrame, + with the tranpose=True case xfailed. """ + # GH#23620 return request.param diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 73921a18ee5c7..b25e9a9a485c2 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -1155,14 +1155,18 @@ def test_dti_add_intarray_no_freq(self, box): def test_dti_add_timedeltalike(self, tz_naive_fixture, two_hours, box_with_datetime): # GH#22005, GH#22163 check DataFrame doesn't raise TypeError + box = box_with_datetime + tz = tz_naive_fixture rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz) - rng = tm.box_expected(rng, box_with_datetime) + + # FIXME: calling with transpose=True raises ValueError + rng = tm.box_expected(rng, box, transpose=False) result = rng + two_hours expected = pd.date_range('2000-01-01 02:00', '2000-02-01 02:00', tz=tz) - expected = tm.box_expected(expected, box_with_datetime) + expected = tm.box_expected(expected, box, transpose=False) tm.assert_equal(result, expected) def test_dti_iadd_timedeltalike(self, tz_naive_fixture, two_hours): @@ -1192,12 +1196,15 @@ def test_dti_isub_timedeltalike(self, tz_naive_fixture, two_hours): def test_dt64arr_add_sub_td64_nat(self, box, tz_naive_fixture): # GH#23320 special handling for timedelta64("NaT") tz = tz_naive_fixture + dti = pd.date_range("1994-04-01", periods=9, tz=tz, freq="QS") other = np.timedelta64("NaT") expected = pd.DatetimeIndex(["NaT"] * 9, tz=tz) - obj = tm.box_expected(dti, box) - expected = tm.box_expected(expected, box) + # FIXME: fails with transpose=True due to tz-aware DataFrame + # transpose bug + obj = tm.box_expected(dti, box, transpose=False) + expected = tm.box_expected(expected, box, transpose=False) result = obj + other tm.assert_equal(result, expected) @@ -1450,10 +1457,8 @@ def test_sub_period(self, freq, box_with_datetime): operator.sub, ops.rsub]) @pytest.mark.parametrize('pi_freq', ['D', 'W', 'Q', 'H']) @pytest.mark.parametrize('dti_freq', [None, 'D']) - def test_dti_sub_pi(self, dti_freq, pi_freq, op, box_df_broadcast_failure): + def test_dti_sub_pi(self, dti_freq, pi_freq, op, box): # GH#20049 subtracting PeriodIndex should raise TypeError - box = box_df_broadcast_failure - dti = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], freq=dti_freq) pi = dti.to_period(pi_freq) @@ -1782,6 +1787,8 @@ def test_dti_with_offset_series(self, tz_naive_fixture, names): def test_dti_add_offset_tzaware(self, tz_aware_fixture, box_with_datetime): # GH#21610, GH#22163 ensure DataFrame doesn't return object-dtype + box = box_with_datetime + timezone = tz_aware_fixture if timezone == 'US/Pacific': dates = date_range('2012-11-01', periods=3, tz=timezone) @@ -1793,8 +1800,9 @@ def test_dti_add_offset_tzaware(self, tz_aware_fixture, box_with_datetime): expected = DatetimeIndex(['2010-11-01 05:00', '2010-11-01 06:00', '2010-11-01 07:00'], freq='H', tz=timezone) - dates = tm.box_expected(dates, box_with_datetime) - expected = tm.box_expected(expected, box_with_datetime) + # FIXME: these raise ValueError with transpose=True + dates = tm.box_expected(dates, box, transpose=False) + expected = tm.box_expected(expected, box, transpose=False) # TODO: parametrize over the scalar being added? radd? sub? offset = dates + pd.offsets.Hour(5) diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index 687d07082ea33..a26a11cb6be9e 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -70,9 +70,8 @@ def test_parr_cmp_period_scalar(self, freq, box): tm.assert_equal(per >= base, exp) @pytest.mark.parametrize('freq', ['M', '2M', '3M']) - def test_parr_cmp_pi(self, freq, box_df_fail): + def test_parr_cmp_pi(self, freq, box): # GH#13200 - box = box_df_fail xbox = np.ndarray if box is pd.Index else box base = PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'], @@ -108,11 +107,9 @@ def test_parr_cmp_pi(self, freq, box_df_fail): tm.assert_equal(base <= idx, exp) @pytest.mark.parametrize('freq', ['M', '2M', '3M']) - def test_parr_cmp_pi_mismatched_freq_raises(self, freq, box_df_fail): + def test_parr_cmp_pi_mismatched_freq_raises(self, freq, box): # GH#13200 # different base freq - box = box_df_fail - base = PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'], freq=freq) base = tm.box_expected(base, box) @@ -302,9 +299,7 @@ class TestPeriodIndexArithmetic(object): # PeriodIndex - other is defined for integers, timedelta-like others, # and PeriodIndex (with matching freq) - def test_parr_add_iadd_parr_raises(self, box_df_broadcast_failure): - box = box_df_broadcast_failure - + def test_parr_add_iadd_parr_raises(self, box): rng = pd.period_range('1/1/2000', freq='D', periods=5) other = pd.period_range('1/6/2000', freq='D', periods=5) # TODO: parametrize over boxes for other? @@ -346,9 +341,7 @@ def test_pi_sub_pi_with_nat(self): expected = pd.Index([pd.NaT, 0 * off, 0 * off, 0 * off, 0 * off]) tm.assert_index_equal(result, expected) - def test_parr_sub_pi_mismatched_freq(self, box_df_broadcast_failure): - box = box_df_broadcast_failure - + def test_parr_sub_pi_mismatched_freq(self, box): rng = pd.period_range('1/1/2000', freq='D', periods=5) other = pd.period_range('1/6/2000', freq='H', periods=5) # TODO: parametrize over boxes for other? @@ -364,9 +357,6 @@ def test_parr_sub_pi_mismatched_freq(self, box_df_broadcast_failure): @pytest.mark.parametrize('op', [operator.add, ops.radd, operator.sub, ops.rsub]) def test_pi_add_sub_float(self, op, other, box): - if box is pd.DataFrame and isinstance(other, np.ndarray): - pytest.xfail(reason="Tries to broadcast incorrectly") - dti = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D') pi = dti.to_period('D') pi = tm.box_expected(pi, box) @@ -563,15 +553,18 @@ def test_pi_sub_isub_offset(self): rng -= pd.offsets.MonthEnd(5) tm.assert_index_equal(rng, expected) - def test_pi_add_offset_n_gt1(self, box): + def test_pi_add_offset_n_gt1(self, box_transpose_fail): # GH#23215 # add offset to PeriodIndex with freq.n > 1 + box, transpose = box_transpose_fail + per = pd.Period('2016-01', freq='2M') pi = pd.PeriodIndex([per]) expected = pd.PeriodIndex(['2016-03'], freq='2M') - pi = tm.box_expected(pi, box) - expected = tm.box_expected(expected, box) + + pi = tm.box_expected(pi, box, transpose=transpose) + expected = tm.box_expected(expected, box, transpose=transpose) result = pi + per.freq tm.assert_equal(result, expected) @@ -582,12 +575,14 @@ def test_pi_add_offset_n_gt1(self, box): def test_pi_add_offset_n_gt1_not_divisible(self, box_with_period): # GH#23215 # PeriodIndex with freq.n > 1 add offset with offset.n % freq.n != 0 + box = box_with_period pi = pd.PeriodIndex(['2016-01'], freq='2M') - pi = tm.box_expected(pi, box_with_period) - expected = pd.PeriodIndex(['2016-04'], freq='2M') - expected = tm.box_expected(expected, box_with_period) + + # FIXME: with transposing these tests fail + pi = tm.box_expected(pi, box, transpose=False) + expected = tm.box_expected(expected, box, transpose=False) result = pi + to_offset('3M') tm.assert_equal(result, expected) @@ -801,14 +796,16 @@ def test_pi_add_sub_timedeltalike_freq_mismatch_monthly(self, with pytest.raises(period.IncompatibleFrequency, match=msg): rng -= other - def test_parr_add_sub_td64_nat(self, box): + def test_parr_add_sub_td64_nat(self, box_transpose_fail): # GH#23320 special handling for timedelta64("NaT") + box, transpose = box_transpose_fail + pi = pd.period_range("1994-04-01", periods=9, freq="19D") other = np.timedelta64("NaT") expected = pd.PeriodIndex(["NaT"] * 9, freq="19D") - obj = tm.box_expected(pi, box) - expected = tm.box_expected(expected, box) + obj = tm.box_expected(pi, box, transpose=transpose) + expected = tm.box_expected(expected, box, transpose=transpose) result = obj + other tm.assert_equal(result, expected) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 50c0e9564e02d..58c8b3b07f723 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -367,10 +367,6 @@ def test_td64arr_add_str_invalid(self, box): operator.sub, ops.rsub], ids=lambda x: x.__name__) def test_td64arr_add_sub_float(self, box, op, other): - if box is pd.DataFrame and isinstance(other, np.ndarray): - pytest.xfail("Tries to broadcast, raising " - "ValueError instead of TypeError") - tdi = TimedeltaIndex(['-1 days', '-1 days']) tdi = tm.box_expected(tdi, box) @@ -393,9 +389,8 @@ def test_td64arr_sub_period(self, box, freq): @pytest.mark.parametrize('pi_freq', ['D', 'W', 'Q', 'H']) @pytest.mark.parametrize('tdi_freq', [None, 'H']) - def test_td64arr_sub_pi(self, box_df_broadcast_failure, tdi_freq, pi_freq): + def test_td64arr_sub_pi(self, box, tdi_freq, pi_freq): # GH#20049 subtracting PeriodIndex should raise TypeError - box = box_df_broadcast_failure tdi = TimedeltaIndex(['1 hours', '2 hours'], freq=tdi_freq) dti = Timestamp('2018-03-07 17:16:40') + tdi pi = dti.to_period(pi_freq) @@ -427,8 +422,10 @@ def test_td64arr_add_timestamp(self, box, tz_naive_fixture): idx = TimedeltaIndex(['1 day', '2 day']) expected = DatetimeIndex(['2011-01-02', '2011-01-03'], tz=tz) - idx = tm.box_expected(idx, box) - expected = tm.box_expected(expected, box) + # FIXME: fails with transpose=True because of tz-aware DataFrame + # transpose bug + idx = tm.box_expected(idx, box, transpose=False) + expected = tm.box_expected(expected, box, transpose=False) result = idx + other tm.assert_equal(result, expected) @@ -460,9 +457,7 @@ def test_td64arr_add_sub_timestamp(self, box): with pytest.raises(TypeError): tdser - ts - def test_tdi_sub_dt64_array(self, box_df_broadcast_failure): - box = box_df_broadcast_failure - + def test_tdi_sub_dt64_array(self, box): dti = pd.date_range('2016-01-01', periods=3) tdi = dti - dti.shift(1) dtarr = dti.values @@ -478,9 +473,7 @@ def test_tdi_sub_dt64_array(self, box_df_broadcast_failure): result = dtarr - tdi tm.assert_equal(result, expected) - def test_tdi_add_dt64_array(self, box_df_broadcast_failure): - box = box_df_broadcast_failure - + def test_tdi_add_dt64_array(self, box): dti = pd.date_range('2016-01-01', periods=3) tdi = dti - dti.shift(1) dtarr = dti.values @@ -524,9 +517,8 @@ def test_td64arr_add_int_series_invalid(self, box, tdser): with pytest.raises(err): int_ser - tdser - def test_td64arr_add_intlike(self, box_df_broadcast_failure): + def test_td64arr_add_intlike(self, box): # GH#19123 - box = box_df_broadcast_failure tdi = TimedeltaIndex(['59 days', '59 days', 'NaT']) ser = tm.box_expected(tdi, box) err = TypeError if box is not pd.Index else NullFrequencyError @@ -580,9 +572,6 @@ def test_td64arr_add_sub_numeric_scalar_invalid(self, box, scalar, tdser): # TODO: Add DataFrame in here? ], ids=lambda x: type(x).__name__) def test_td64arr_add_sub_numeric_arr_invalid(self, box, vec, dtype, tdser): - if box is pd.DataFrame and not isinstance(vec, Series): - raise pytest.xfail(reason="Tries to broadcast incorrectly") - tdser = tm.box_expected(tdser, box) err = TypeError if box is pd.Index and not dtype.startswith('float'): @@ -655,9 +644,7 @@ def test_timedelta64_operations_with_timedeltas(self): # roundtrip tm.assert_series_equal(result + td2, td1) - def test_td64arr_add_td64_array(self, box_df_broadcast_failure): - box = box_df_broadcast_failure - + def test_td64arr_add_td64_array(self, box): dti = pd.date_range('2016-01-01', periods=3) tdi = dti - dti.shift(1) tdarr = tdi.values @@ -671,9 +658,7 @@ def test_td64arr_add_td64_array(self, box_df_broadcast_failure): result = tdarr + tdi tm.assert_equal(result, expected) - def test_td64arr_sub_td64_array(self, box_df_broadcast_failure): - box = box_df_broadcast_failure - + def test_td64arr_sub_td64_array(self, box): dti = pd.date_range('2016-01-01', periods=3) tdi = dti - dti.shift(1) tdarr = tdi.values @@ -691,10 +676,13 @@ def test_td64arr_sub_td64_array(self, box_df_broadcast_failure): @pytest.mark.parametrize('names', [(None, None, None), ('Egon', 'Venkman', None), ('NCC1701D', 'NCC1701D', 'NCC1701D')]) - def test_td64arr_add_sub_tdi(self, box_df_broadcast_failure, names): + def test_td64arr_add_sub_tdi(self, box, names): # GH#17250 make sure result dtype is correct # GH#19043 make sure names are propagated correctly - box = box_df_broadcast_failure + if box is pd.DataFrame and names[1] == 'Venkman': + pytest.skip("Name propagation for DataFrame does not behave like " + "it does for Index/Series") + tdi = TimedeltaIndex(['0 days', '1 day'], name=names[0]) ser = Series([Timedelta(hours=3), Timedelta(hours=4)], name=names[1]) expected = Series([Timedelta(hours=3), Timedelta(days=1, hours=4)], @@ -825,9 +813,12 @@ def test_timedelta64_operations_with_DateOffset(self): @pytest.mark.parametrize('names', [(None, None, None), ('foo', 'bar', None), ('foo', 'foo', 'foo')]) - def test_td64arr_add_offset_index(self, names, box_df_broadcast_failure): + def test_td64arr_add_offset_index(self, names, box): # GH#18849, GH#19744 - box = box_df_broadcast_failure + if box is pd.DataFrame and names[1] == 'bar': + pytest.skip("Name propagation for DataFrame does not behave like " + "it does for Index/Series") + tdi = TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00'], name=names[0]) other = pd.Index([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)], @@ -838,19 +829,21 @@ def test_td64arr_add_offset_index(self, names, box_df_broadcast_failure): tdi = tm.box_expected(tdi, box) expected = tm.box_expected(expected, box) - with tm.assert_produces_warning(PerformanceWarning): + # The DataFrame operation is transposed and so operates as separate + # scalar operations, which do not issue a PerformanceWarning + warn = PerformanceWarning if box is not pd.DataFrame else None + with tm.assert_produces_warning(warn): res = tdi + other tm.assert_equal(res, expected) - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(warn): res2 = other + tdi tm.assert_equal(res2, expected) # TODO: combine with test_td64arr_add_offset_index by parametrizing # over second box? - def test_td64arr_add_offset_array(self, box_df_broadcast_failure): + def test_td64arr_add_offset_array(self, box): # GH#18849 - box = box_df_broadcast_failure tdi = TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00']) other = np.array([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)]) @@ -860,20 +853,26 @@ def test_td64arr_add_offset_array(self, box_df_broadcast_failure): tdi = tm.box_expected(tdi, box) expected = tm.box_expected(expected, box) - with tm.assert_produces_warning(PerformanceWarning): + # The DataFrame operation is transposed and so operates as separate + # scalar operations, which do not issue a PerformanceWarning + warn = PerformanceWarning if box is not pd.DataFrame else None + with tm.assert_produces_warning(warn): res = tdi + other tm.assert_equal(res, expected) - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(warn): res2 = other + tdi tm.assert_equal(res2, expected) @pytest.mark.parametrize('names', [(None, None, None), ('foo', 'bar', None), ('foo', 'foo', 'foo')]) - def test_td64arr_sub_offset_index(self, names, box_df_broadcast_failure): + def test_td64arr_sub_offset_index(self, names, box): # GH#18824, GH#19744 - box = box_df_broadcast_failure + if box is pd.DataFrame and names[1] == 'bar': + pytest.skip("Name propagation for DataFrame does not behave like " + "it does for Index/Series") + tdi = TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00'], name=names[0]) other = pd.Index([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)], @@ -885,13 +884,15 @@ def test_td64arr_sub_offset_index(self, names, box_df_broadcast_failure): tdi = tm.box_expected(tdi, box) expected = tm.box_expected(expected, box) - with tm.assert_produces_warning(PerformanceWarning): + # The DataFrame operation is transposed and so operates as separate + # scalar operations, which do not issue a PerformanceWarning + warn = PerformanceWarning if box is not pd.DataFrame else None + with tm.assert_produces_warning(warn): res = tdi - other tm.assert_equal(res, expected) - def test_td64arr_sub_offset_array(self, box_df_broadcast_failure): + def test_td64arr_sub_offset_array(self, box): # GH#18824 - box = box_df_broadcast_failure tdi = TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00']) other = np.array([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)]) @@ -901,7 +902,10 @@ def test_td64arr_sub_offset_array(self, box_df_broadcast_failure): tdi = tm.box_expected(tdi, box) expected = tm.box_expected(expected, box) - with tm.assert_produces_warning(PerformanceWarning): + # The DataFrame operation is transposed and so operates as separate + # scalar operations, which do not issue a PerformanceWarning + warn = PerformanceWarning if box is not pd.DataFrame else None + with tm.assert_produces_warning(warn): res = tdi - other tm.assert_equal(res, expected) @@ -943,9 +947,6 @@ def test_td64arr_with_offset_series(self, names, box_df_fail): @pytest.mark.parametrize('obox', [np.array, pd.Index, pd.Series]) def test_td64arr_addsub_anchored_offset_arraylike(self, obox, box): # GH#18824 - if box is pd.DataFrame and obox is not pd.Series: - raise pytest.xfail(reason="Attempts to broadcast incorrectly") - tdi = TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00']) tdi = tm.box_expected(tdi, box) @@ -1023,8 +1024,7 @@ def test_tdi_mul_int_array_zerodim(self, box): result = idx * np.array(5, dtype='int64') tm.assert_equal(result, expected) - def test_tdi_mul_int_array(self, box_df_broadcast_failure): - box = box_df_broadcast_failure + def test_tdi_mul_int_array(self, box): rng5 = np.arange(5, dtype='int64') idx = TimedeltaIndex(rng5) expected = TimedeltaIndex(rng5 ** 2) @@ -1035,8 +1035,7 @@ def test_tdi_mul_int_array(self, box_df_broadcast_failure): result = idx * rng5 tm.assert_equal(result, expected) - def test_tdi_mul_int_series(self, box_df_fail): - box = box_df_fail + def test_tdi_mul_int_series(self, box): idx = TimedeltaIndex(np.arange(5, dtype='int64')) expected = TimedeltaIndex(np.arange(5, dtype='int64') ** 2) @@ -1048,8 +1047,7 @@ def test_tdi_mul_int_series(self, box_df_fail): result = idx * pd.Series(np.arange(5, dtype='int64')) tm.assert_equal(result, expected) - def test_tdi_mul_float_series(self, box_df_fail): - box = box_df_fail + def test_tdi_mul_float_series(self, box): idx = TimedeltaIndex(np.arange(5, dtype='int64')) idx = tm.box_expected(idx, box) @@ -1069,9 +1067,7 @@ def test_tdi_mul_float_series(self, box_df_fail): pd.Float64Index(range(1, 11)), pd.RangeIndex(1, 11) ], ids=lambda x: type(x).__name__) - def test_tdi_rmul_arraylike(self, other, box_df_broadcast_failure): - box = box_df_broadcast_failure - + def test_tdi_rmul_arraylike(self, other, box): tdi = TimedeltaIndex(['1 Day'] * 10) expected = timedelta_range('1 days', '10 days') @@ -1131,8 +1127,8 @@ def test_td64arr_floordiv_tdscalar(self, box, scalar_td): expected = Series([0, 0, np.nan]) - td1 = tm.box_expected(td1, box) - expected = tm.box_expected(expected, box) + td1 = tm.box_expected(td1, box, transpose=False) + expected = tm.box_expected(expected, box, transpose=False) result = td1 // scalar_td tm.assert_equal(result, expected) @@ -1144,8 +1140,8 @@ def test_td64arr_rfloordiv_tdscalar(self, box, scalar_td): expected = Series([1, 1, np.nan]) - td1 = tm.box_expected(td1, box) - expected = tm.box_expected(expected, box) + td1 = tm.box_expected(td1, box, transpose=False) + expected = tm.box_expected(expected, box, transpose=False) result = scalar_td // td1 tm.assert_equal(result, expected) @@ -1157,8 +1153,8 @@ def test_td64arr_rfloordiv_tdscalar_explicit(self, box, scalar_td): expected = Series([1, 1, np.nan]) - td1 = tm.box_expected(td1, box) - expected = tm.box_expected(expected, box) + td1 = tm.box_expected(td1, box, transpose=False) + expected = tm.box_expected(expected, box, transpose=False) # We can test __rfloordiv__ using this syntax, # see `test_timedelta_rfloordiv` @@ -1192,14 +1188,14 @@ def test_td64arr_rfloordiv_tdlike_scalar(self, scalar_td, box): tdi = TimedeltaIndex(['00:05:03', '00:05:03', pd.NaT], freq=None) expected = pd.Index([2.0, 2.0, np.nan]) - tdi = tm.box_expected(tdi, box) - expected = tm.box_expected(expected, box) + tdi = tm.box_expected(tdi, box, transpose=False) + expected = tm.box_expected(expected, box, transpose=False) res = tdi.__rfloordiv__(scalar_td) tm.assert_equal(res, expected) expected = pd.Index([0.0, 0.0, np.nan]) - expected = tm.box_expected(expected, box) + expected = tm.box_expected(expected, box, transpose=False) res = tdi // (scalar_td) tm.assert_equal(res, expected) @@ -1283,11 +1279,10 @@ def test_td64arr_div_numeric_scalar(self, box, two, tdser): Series([20, 30, 40])], ids=lambda x: type(x).__name__) @pytest.mark.parametrize('op', [operator.mul, ops.rmul]) - def test_td64arr_rmul_numeric_array(self, op, box_df_fail, + def test_td64arr_rmul_numeric_array(self, op, box, vector, dtype, tdser): # GH#4521 # divide/multiply by integers - box = box_df_fail # broadcasts incorrectly but doesn't raise vector = vector.astype(dtype) expected = Series(['1180 Days', '1770 Days', 'NaT'], @@ -1301,14 +1296,6 @@ def test_td64arr_rmul_numeric_array(self, op, box_df_fail, result = op(vector, tdser) tm.assert_equal(result, expected) - @pytest.mark.parametrize('box', [ - pd.Index, - Series, - pytest.param(pd.DataFrame, - marks=pytest.mark.xfail(reason="broadcasts along " - "wrong axis", - strict=True)) - ], ids=lambda x: x.__name__) @pytest.mark.parametrize('dtype', ['int64', 'int32', 'int16', 'uint64', 'uint32', 'uint16', 'uint8', 'float64', 'float32', 'float16']) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 748f3bbc5b497..1fa77f5321038 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1587,7 +1587,7 @@ def assert_equal(left, right, **kwargs): raise NotImplementedError(type(left)) -def box_expected(expected, box_cls): +def box_expected(expected, box_cls, transpose=True): """ Helper function to wrap the expected output of a test in a given box_class. @@ -1606,6 +1606,11 @@ def box_expected(expected, box_cls): expected = pd.Series(expected) elif box_cls is pd.DataFrame: expected = pd.Series(expected).to_frame() + if transpose: + # for vector operations, we we need a DataFrame to be a single-row, + # not a single-column, in order to operate against non-DataFrame + # vectors of the same length. + expected = expected.T elif box_cls is PeriodArray: # the PeriodArray constructor is not as flexible as period_array expected = period_array(expected) From 92b015d3e2db04ba4546d1a83a8a5d88270436ae Mon Sep 17 00:00:00 2001 From: Vincent La Date: Sun, 11 Nov 2018 16:21:58 -0800 Subject: [PATCH 095/122] DOC: Enhancing pivot / reshape docs (#21038) --- doc/source/reshaping.rst | 110 ++++++++++++++++++++++++++++++++++++--- pandas/core/frame.py | 72 ++++++++++++++++--------- 2 files changed, 151 insertions(+), 31 deletions(-) diff --git a/doc/source/reshaping.rst b/doc/source/reshaping.rst index 6163b6f2ae89a..ff867a2ddfe6d 100644 --- a/doc/source/reshaping.rst +++ b/doc/source/reshaping.rst @@ -17,6 +17,8 @@ Reshaping and Pivot Tables Reshaping by pivoting DataFrame objects --------------------------------------- +.. image:: _static/reshaping_pivot.png + .. ipython:: :suppress: @@ -33,8 +35,7 @@ Reshaping by pivoting DataFrame objects In [3]: df = unpivot(tm.makeTimeDataFrame()) -Data is often stored in CSV files or databases in so-called "stacked" or -"record" format: +Data is often stored in so-called "stacked" or "record" format: .. ipython:: python @@ -66,8 +67,6 @@ To select out everything for variable ``A`` we could do: df[df['variable'] == 'A'] -.. image:: _static/reshaping_pivot.png - But suppose we wish to do time series operations with the variables. A better representation would be where the ``columns`` are the unique variables and an ``index`` of dates identifies individual observations. To reshape the data into @@ -87,7 +86,7 @@ column: .. ipython:: python df['value2'] = df['value'] * 2 - pivoted = df.pivot('date', 'variable') + pivoted = df.pivot(index='date', columns='variable') pivoted You can then select subsets from the pivoted ``DataFrame``: @@ -99,6 +98,12 @@ You can then select subsets from the pivoted ``DataFrame``: Note that this returns a view on the underlying data in the case where the data are homogeneously-typed. +.. note:: + :func:`~pandas.pivot` will error with a ``ValueError: Index contains duplicate + entries, cannot reshape`` if the index/column pair is not unique. In this + case, consider using :func:`~pandas.pivot_table` which is a generalization + of pivot that can handle duplicate values for one index/column pair. + .. _reshaping.stacking: Reshaping by stacking and unstacking @@ -704,10 +709,103 @@ handling of NaN: In [3]: np.unique(x, return_inverse=True)[::-1] Out[3]: (array([3, 3, 0, 4, 1, 2]), array([nan, 3.14, inf, 'A', 'B'], dtype=object)) - .. note:: If you just want to handle one column as a categorical variable (like R's factor), you can use ``df["cat_col"] = pd.Categorical(df["col"])`` or ``df["cat_col"] = df["col"].astype("category")``. For full docs on :class:`~pandas.Categorical`, see the :ref:`Categorical introduction ` and the :ref:`API documentation `. + +Examples +-------- + +In this section, we will review frequently asked questions and examples. The +column names and relevant column values are named to correspond with how this +DataFrame will be pivoted in the answers below. + +.. ipython:: python + + np.random.seed([3, 1415]) + n = 20 + + cols = np.array(['key', 'row', 'item', 'col']) + df = cols + pd.DataFrame((np.random.randint(5, size=(n, 4)) // [2, 1, 2, 1]).astype(str)) + df.columns = cols + df = df.join(pd.DataFrame(np.random.rand(n, 2).round(2)).add_prefix('val')) + + df + +Pivoting with Single Aggregations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Suppose we wanted to pivot ``df`` such that the ``col`` values are columns, +``row`` values are the index, and the mean of ``val0`` are the values? In +particular, the resulting DataFrame should look like: + +.. code-block:: ipython + + col col0 col1 col2 col3 col4 + row + row0 0.77 0.605 NaN 0.860 0.65 + row2 0.13 NaN 0.395 0.500 0.25 + row3 NaN 0.310 NaN 0.545 NaN + row4 NaN 0.100 0.395 0.760 0.24 + +This solution uses :func:`~pandas.pivot_table`. Also note that +``aggfunc='mean'`` is the default. It is included here to be explicit. + +.. ipython:: python + + df.pivot_table( + values='val0', index='row', columns='col', aggfunc='mean') + +Note that we can also replace the missing values by using the ``fill_value`` +parameter. + +.. ipython:: python + + df.pivot_table( + values='val0', index='row', columns='col', aggfunc='mean', fill_value=0) + +Also note that we can pass in other aggregation functions as well. For example, +we can also pass in ``sum``. + +.. ipython:: python + + df.pivot_table( + values='val0', index='row', columns='col', aggfunc='sum', fill_value=0) + +Another aggregation we can do is calculate the frequency in which the columns +and rows occur together a.k.a. "cross tabulation". To do this, we can pass +``size`` to the ``aggfunc`` parameter. + +.. ipython:: python + + df.pivot_table(index='row', columns='col', fill_value=0, aggfunc='size') + +Pivoting with Multiple Aggregations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +We can also perform multiple aggregations. For example, to perform both a +``sum`` and ``mean``, we can pass in a list to the ``aggfunc`` argument. + +.. ipython:: python + + df.pivot_table( + values='val0', index='row', columns='col', aggfunc=['mean', 'sum']) + +Note to aggregate over multiple value columns, we can pass in a list to the +``values`` parameter. + +.. ipython:: python + + df.pivot_table( + values=['val0', 'val1'], index='row', columns='col', aggfunc=['mean']) + +Note to subdivide over multiple columns we can pass in a list to the +``columns`` parameter. + +.. ipython:: python + + df.pivot_table( + values=['val0'], index='row', columns=['item', 'col'], aggfunc=['mean']) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7153f5c2e7007..f8d153327f135 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5518,50 +5518,72 @@ def pivot(self, index=None, columns=None, values=None): ... "C": ["small", "large", "large", "small", ... "small", "large", "small", "small", ... "large"], - ... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7]}) + ... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7], + ... "E": [2, 4, 5, 5, 6, 6, 8, 9, 9]}) >>> df - A B C D - 0 foo one small 1 - 1 foo one large 2 - 2 foo one large 2 - 3 foo two small 3 - 4 foo two small 3 - 5 bar one large 4 - 6 bar one small 5 - 7 bar two small 6 - 8 bar two large 7 + A B C D E + 0 foo one small 1 2 + 1 foo one large 2 4 + 2 foo one large 2 5 + 3 foo two small 3 5 + 4 foo two small 3 6 + 5 bar one large 4 6 + 6 bar one small 5 8 + 7 bar two small 6 9 + 8 bar two large 7 9 + + This first example aggregates values by taking the sum. >>> table = pivot_table(df, values='D', index=['A', 'B'], ... columns=['C'], aggfunc=np.sum) >>> table C large small A B - bar one 4.0 5.0 - two 7.0 6.0 - foo one 4.0 1.0 - two NaN 6.0 + bar one 4 5 + two 7 6 + foo one 4 1 + two NaN 6 + + We can also fill missing values using the `fill_value` parameter. >>> table = pivot_table(df, values='D', index=['A', 'B'], - ... columns=['C'], aggfunc=np.sum) + ... columns=['C'], aggfunc=np.sum, fill_value=0) >>> table C large small A B - bar one 4.0 5.0 - two 7.0 6.0 - foo one 4.0 1.0 - two NaN 6.0 + bar one 4 5 + two 7 6 + foo one 4 1 + two 0 6 + + The next example aggregates by taking the mean across multiple columns. + + >>> table = pivot_table(df, values=['D', 'E'], index=['A', 'C'], + ... aggfunc={'D': np.mean, + ... 'E': np.mean}) + >>> table + D E + mean mean + A C + bar large 5.500000 7.500000 + small 5.500000 8.500000 + foo large 2.000000 4.500000 + small 2.333333 4.333333 + + We can also calculate multiple types of aggregations for any given + value column. >>> table = pivot_table(df, values=['D', 'E'], index=['A', 'C'], ... aggfunc={'D': np.mean, ... 'E': [min, max, np.mean]}) >>> table D E - mean max median min + mean max mean min A C - bar large 5.500000 16 14.5 13 - small 5.500000 15 14.5 14 - foo large 2.000000 10 9.5 9 - small 2.333333 12 11.0 8 + bar large 5.500000 9 7.500000 6 + small 5.500000 9 8.500000 8 + foo large 2.000000 5 4.500000 4 + small 2.333333 6 4.333333 2 Returns ------- From 519de0b2ef1b3dcdc7636463467f7e4f351f95fe Mon Sep 17 00:00:00 2001 From: Kyle Prestel Date: Sun, 11 Nov 2018 23:45:26 -0500 Subject: [PATCH 096/122] CLN:Remove unused **kwargs from user facing methods (#23249) --- pandas/io/pytables.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 41e14e482d061..4c28e0f88b1ae 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1674,7 +1674,7 @@ def cvalues(self): def __iter__(self): return iter(self.values) - def maybe_set_size(self, min_itemsize=None, **kwargs): + def maybe_set_size(self, min_itemsize=None): """ maybe set a string col itemsize: min_itemsize can be an integer or a dict with this columns name with an integer size """ @@ -1687,13 +1687,13 @@ def maybe_set_size(self, min_itemsize=None, **kwargs): self.typ = _tables( ).StringCol(itemsize=min_itemsize, pos=self.pos) - def validate(self, handler, append, **kwargs): + def validate(self, handler, append): self.validate_names() def validate_names(self): pass - def validate_and_set(self, handler, append, **kwargs): + def validate_and_set(self, handler, append): self.set_table(handler.table) self.validate_col() self.validate_attr(append) @@ -3772,7 +3772,7 @@ def read_coordinates(self, where=None, start=None, stop=None, **kwargs): return Index(coords) - def read_column(self, column, where=None, start=None, stop=None, **kwargs): + def read_column(self, column, where=None, start=None, stop=None): """return a single column from the table, generally only indexables are interesting """ @@ -4727,7 +4727,7 @@ class Selection(object): """ - def __init__(self, table, where=None, start=None, stop=None, **kwargs): + def __init__(self, table, where=None, start=None, stop=None): self.table = table self.where = where self.start = start From d4be02007a5e04b9976ccdb91dd5657e6685388b Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Mon, 12 Nov 2018 05:31:07 +0000 Subject: [PATCH 097/122] CI: Check in the CI that assert_raises_regex is not being used (#23627) Follow-up to gh-23592. --- ci/code_checks.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index fac5c211cdad8..f0772f72d63d4 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -122,6 +122,10 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then ! grep -R --include="*.py" --include="*.pyx" --include="*.rst" -E "\.\. (autosummary|contents|currentmodule|deprecated|function|image|important|include|ipython|literalinclude|math|module|note|raw|seealso|toctree|versionadded|versionchanged|warning):[^:]" ./pandas ./doc/source RET=$(($RET + $?)) ; echo $MSG "DONE" + MSG='Check that the deprecated `assert_raises_regex` is not used (`pytest.raises(match=pattern)` should be used instead)' ; echo $MSG + ! grep -R --exclude=*.pyc --exclude=testing.py --exclude=test_testing.py assert_raises_regex pandas + RET=$(($RET + $?)) ; echo $MSG "DONE" + MSG='Check for modules that pandas should not import' ; echo $MSG python -c " import sys From d8826bfa472d8bb40f3b5628f602e5142997db0b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 12 Nov 2018 03:42:44 -0800 Subject: [PATCH 098/122] CLN: datetimelike arrays: isort, small reorg (#23587) --- pandas/core/arrays/datetimelike.py | 8 +- pandas/core/arrays/datetimes.py | 2 +- pandas/core/arrays/period.py | 106 +++++++++--------- pandas/core/arrays/timedeltas.py | 26 +++-- pandas/core/indexes/datetimelike.py | 44 +++----- pandas/core/indexes/datetimes.py | 133 ++++++++++------------- pandas/core/indexes/period.py | 12 +- pandas/core/indexes/timedeltas.py | 61 +++++------ pandas/tests/arrays/test_datetimelike.py | 37 ++++--- pandas/tests/arrays/test_datetimes.py | 1 + pandas/tests/arrays/test_period.py | 2 +- pandas/tests/arrays/test_timedeltas.py | 5 + setup.cfg | 3 - 13 files changed, 211 insertions(+), 229 deletions(-) create mode 100644 pandas/tests/arrays/test_timedeltas.py diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 3fa4f503d2dd5..daf2dcccd284b 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -124,8 +124,12 @@ def asi8(self): # do not cache or you'll create a memory leak return self._data.view('i8') - # ------------------------------------------------------------------ - # Array-like Methods + # ---------------------------------------------------------------- + # Array-Like / EA-Interface Methods + + @property + def nbytes(self): + return self._data.nbytes @property def shape(self): diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index b0485cc82f07f..a6f688fb0cf7a 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -385,7 +385,7 @@ def _resolution(self): return libresolution.resolution(self.asi8, self.tz) # ---------------------------------------------------------------- - # Array-like Methods + # Array-Like / EA-Interface Methods def __array__(self, dtype=None): if is_object_dtype(dtype): diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 482968fdb4766..b343d42ef3b7c 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -272,10 +272,6 @@ def _concat_same_type(cls, to_concat): # -------------------------------------------------------------------- # Data / Attributes - @property - def nbytes(self): - # TODO(DatetimeArray): remove - return self._data.nbytes @cache_readonly def dtype(self): @@ -286,10 +282,6 @@ def _ndarray_values(self): # Ordinals return self._data - @property - def asi8(self): - return self._data - @property def freq(self): """Return the frequency object for this PeriodArray.""" @@ -330,6 +322,50 @@ def start_time(self): def end_time(self): return self.to_timestamp(how='end') + def to_timestamp(self, freq=None, how='start'): + """ + Cast to DatetimeArray/Index. + + Parameters + ---------- + freq : string or DateOffset, optional + Target frequency. The default is 'D' for week or longer, + 'S' otherwise + how : {'s', 'e', 'start', 'end'} + + Returns + ------- + DatetimeArray/Index + """ + from pandas.core.arrays import DatetimeArrayMixin + + how = libperiod._validate_end_alias(how) + + end = how == 'E' + if end: + if freq == 'B': + # roll forward to ensure we land on B date + adjust = Timedelta(1, 'D') - Timedelta(1, 'ns') + return self.to_timestamp(how='start') + adjust + else: + adjust = Timedelta(1, 'ns') + return (self + self.freq).to_timestamp(how='start') - adjust + + if freq is None: + base, mult = frequencies.get_freq_code(self.freq) + freq = frequencies.get_to_timestamp_base(base) + else: + freq = Period._maybe_convert_freq(freq) + + base, mult = frequencies.get_freq_code(freq) + new_data = self.asfreq(freq, how=how) + + new_data = libperiod.periodarr_to_dt64arr(new_data.asi8, base) + return DatetimeArrayMixin(new_data, freq='infer') + + # -------------------------------------------------------------------- + # Array-like / EA-Interface Methods + def __repr__(self): return '<{}>\n{}\nLength: {}, dtype: {}'.format( self.__class__.__name__, @@ -456,6 +492,8 @@ def value_counts(self, dropna=False): name=result.index.name) return Series(result.values, index=index, name=result.name) + # -------------------------------------------------------------------- + def shift(self, periods=1): """ Shift values by desired number. @@ -567,49 +605,9 @@ def asfreq(self, freq=None, how='E'): return type(self)(new_data, freq=freq) - def to_timestamp(self, freq=None, how='start'): - """ - Cast to DatetimeArray/Index - - Parameters - ---------- - freq : string or DateOffset, optional - Target frequency. The default is 'D' for week or longer, - 'S' otherwise - how : {'s', 'e', 'start', 'end'} - - Returns - ------- - DatetimeArray/Index - """ - from pandas.core.arrays import DatetimeArrayMixin - - how = libperiod._validate_end_alias(how) - - end = how == 'E' - if end: - if freq == 'B': - # roll forward to ensure we land on B date - adjust = Timedelta(1, 'D') - Timedelta(1, 'ns') - return self.to_timestamp(how='start') + adjust - else: - adjust = Timedelta(1, 'ns') - return (self + self.freq).to_timestamp(how='start') - adjust - - if freq is None: - base, mult = frequencies.get_freq_code(self.freq) - freq = frequencies.get_to_timestamp_base(base) - else: - freq = Period._maybe_convert_freq(freq) - - base, mult = frequencies.get_freq_code(freq) - new_data = self.asfreq(freq, how=how) - - new_data = libperiod.periodarr_to_dt64arr(new_data.asi8, base) - return DatetimeArrayMixin(new_data, freq='infer') - # ------------------------------------------------------------------ # Formatting + def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): """ actually format my specific types """ # TODO(DatetimeArray): remove @@ -630,9 +628,13 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): values = np.array([formatter(dt) for dt in values]) return values + # Delegation... + def strftime(self, date_format): + return self._format_native_types(date_format=date_format) + def repeat(self, repeats, *args, **kwargs): """ - Repeat elements of a Categorical. + Repeat elements of a PeriodArray. See also -------- @@ -643,10 +645,6 @@ def repeat(self, repeats, *args, **kwargs): values = self._data.repeat(repeats) return type(self)(values, self.freq) - # Delegation... - def strftime(self, date_format): - return self._format_native_types(date_format=date_format) - def astype(self, dtype, copy=True): # TODO: Figure out something better here... # We have DatetimeLikeArrayMixin -> diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 1f78e0c00bf00..9dbdd6ff8b562 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -190,6 +190,9 @@ def _generate_range(cls, start, end, periods, freq, closed=None): return cls._simple_new(index, freq=freq) + # ---------------------------------------------------------------- + # Array-Like / EA-Interface Methods + # ---------------------------------------------------------------- # Arithmetic Methods @@ -412,20 +415,25 @@ def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"): array : list-like copy : bool, default False unit : str, default "ns" + The timedelta unit to treat integers as multiples of. errors : {"raise", "coerce", "ignore"}, default "raise" + How to handle elements that cannot be converted to timedelta64[ns]. + See ``pandas.to_timedelta`` for details. Returns ------- - ndarray[timedelta64[ns]] + converted : numpy.ndarray + The sequence converted to a numpy array with dtype ``timedelta64[ns]``. inferred_freq : Tick or None + The inferred frequency of the sequence. Raises ------ - ValueError : data cannot be converted to timedelta64[ns] + ValueError : Data cannot be converted to timedelta64[ns]. Notes ----- - Unlike `pandas.to_timedelta`, if setting `errors=ignore` will not cause + Unlike `pandas.to_timedelta`, if setting ``errors=ignore`` will not cause errors to be ignored; they are caught and subsequently ignored at a higher level. """ @@ -497,12 +505,13 @@ def ints_to_td64ns(data, unit="ns"): Parameters ---------- - data : np.ndarray with integer-dtype + data : numpy.ndarray with integer-dtype unit : str, default "ns" + The timedelta unit to treat integers as multiples of. Returns ------- - ndarray[timedelta64[ns]] + numpy.ndarray : timedelta64[ns] array converted from data bool : whether a copy was made """ copy_made = False @@ -538,15 +547,18 @@ def objects_to_td64ns(data, unit="ns", errors="raise"): ---------- data : ndarray or Index unit : str, default "ns" + The timedelta unit to treat integers as multiples of. errors : {"raise", "coerce", "ignore"}, default "raise" + How to handle elements that cannot be converted to timedelta64[ns]. + See ``pandas.to_timedelta`` for details. Returns ------- - ndarray[timedelta64[ns]] + numpy.ndarray : timedelta64[ns] array converted from data Raises ------ - ValueError : data cannot be converted to timedelta64[ns] + ValueError : Data cannot be converted to timedelta64[ns]. Notes ----- diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 59429488a7c2f..1d9d3b1d3bd16 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -4,44 +4,32 @@ """ import warnings -from pandas import compat -from pandas.compat.numpy import function as nv -from pandas.core.tools.timedeltas import to_timedelta - import numpy as np -from pandas._libs import lib, iNaT, NaT -from pandas._libs.tslibs.timestamps import round_nsint64, RoundTo +from pandas._libs import NaT, iNaT, lib +from pandas._libs.tslibs.timestamps import RoundTo, round_nsint64 +import pandas.compat as compat +from pandas.compat.numpy import function as nv +from pandas.util._decorators import Appender, cache_readonly from pandas.core.dtypes.common import ( - ensure_int64, - is_dtype_equal, - is_float, - is_integer, - is_list_like, - is_scalar, - is_bool_dtype, - is_period_dtype, - is_categorical_dtype, - is_datetime_or_timedelta_dtype, - is_float_dtype, - is_integer_dtype, - is_object_dtype, - is_string_dtype) -from pandas.core.dtypes.generic import ( - ABCIndex, ABCSeries, ABCIndexClass) + ensure_int64, is_bool_dtype, is_categorical_dtype, + is_datetime_or_timedelta_dtype, is_dtype_equal, is_float, is_float_dtype, + is_integer, is_integer_dtype, is_list_like, is_object_dtype, + is_period_dtype, is_scalar, is_string_dtype) +import pandas.core.dtypes.concat as _concat +from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries from pandas.core.dtypes.missing import isna -from pandas.core import common as com, algorithms, ops - -import pandas.io.formats.printing as printing +from pandas.core import algorithms, common as com, ops from pandas.core.arrays import PeriodArray from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin +import pandas.core.indexes.base as ibase from pandas.core.indexes.base import Index, _index_shared_docs -from pandas.util._decorators import Appender, cache_readonly -import pandas.core.dtypes.concat as _concat +from pandas.core.tools.timedeltas import to_timedelta + +import pandas.io.formats.printing as printing -import pandas.core.indexes.base as ibase _index_doc_kwargs = dict(ibase._index_doc_kwargs) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index c82cff19573e3..b754b2705d034 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1,57 +1,45 @@ # pylint: disable=E1101 from __future__ import division + +from datetime import datetime, time, timedelta import operator import warnings -from datetime import time, datetime, timedelta import numpy as np from pytz import utc -from pandas.core.base import _shared_docs +from pandas._libs import ( + Timestamp, index as libindex, join as libjoin, lib, tslib as libts) +from pandas._libs.tslibs import ( + ccalendar, conversion, fields, parsing, timezones) +import pandas.compat as compat +from pandas.util._decorators import Appender, Substitution, cache_readonly from pandas.core.dtypes.common import ( - _INT64_DTYPE, - _NS_DTYPE, - is_datetime64_dtype, - is_datetimetz, - is_dtype_equal, - is_integer, - is_float, - is_integer_dtype, - is_datetime64_ns_dtype, - is_period_dtype, - is_string_like, - is_list_like, - is_scalar, - pandas_dtype, - ensure_int64) + _INT64_DTYPE, _NS_DTYPE, ensure_int64, is_datetime64_dtype, + is_datetime64_ns_dtype, is_datetimetz, is_dtype_equal, is_float, + is_integer, is_integer_dtype, is_list_like, is_period_dtype, is_scalar, + is_string_like, pandas_dtype) +import pandas.core.dtypes.concat as _concat from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.missing import isna -import pandas.core.dtypes.concat as _concat -from pandas.core.arrays.datetimes import DatetimeArrayMixin, _to_m8 from pandas.core.arrays import datetimelike as dtl - +from pandas.core.arrays.datetimes import ( + DatetimeArrayMixin as DatetimeArray, _to_m8) +from pandas.core.base import _shared_docs +import pandas.core.common as com from pandas.core.indexes.base import Index, _index_shared_docs +from pandas.core.indexes.datetimelike import ( + DatelikeOps, DatetimeIndexOpsMixin, TimelikeOps, wrap_array_method, + wrap_field_accessor) from pandas.core.indexes.numeric import Int64Index from pandas.core.ops import get_op_result_name -import pandas.compat as compat -from pandas.tseries.frequencies import to_offset, Resolution -from pandas.core.indexes.datetimelike import ( - DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin, - wrap_field_accessor, wrap_array_method) -from pandas.tseries.offsets import ( - CDay, prefix_mapping) - -from pandas.util._decorators import Appender, cache_readonly, Substitution -import pandas.core.common as com -import pandas.tseries.offsets as offsets import pandas.core.tools.datetimes as tools -from pandas._libs import (lib, index as libindex, tslib as libts, - join as libjoin, Timestamp) -from pandas._libs.tslibs import (timezones, conversion, fields, parsing, - ccalendar) +from pandas.tseries import offsets +from pandas.tseries.frequencies import Resolution, to_offset +from pandas.tseries.offsets import CDay, prefix_mapping def _new_DatetimeIndex(cls, d): @@ -68,7 +56,7 @@ def _new_DatetimeIndex(cls, d): return result -class DatetimeIndex(DatetimeArrayMixin, DatelikeOps, TimelikeOps, +class DatetimeIndex(DatetimeArray, DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin, Int64Index): """ Immutable ndarray of datetime64 data, represented internally as int64, and @@ -182,8 +170,6 @@ class DatetimeIndex(DatetimeArrayMixin, DatelikeOps, TimelikeOps, pandas.to_datetime : Convert argument to datetime """ - _resolution = cache_readonly(DatetimeArrayMixin._resolution.fget) - _typ = 'datetimeindex' _join_precedence = 10 @@ -227,8 +213,6 @@ def _join_i8_wrapper(joinf, **kwargs): _is_numeric_dtype = False _infer_as_myclass = True - _timezone = cache_readonly(DatetimeArrayMixin._timezone.fget) - is_normalized = cache_readonly(DatetimeArrayMixin.is_normalized.fget) # -------------------------------------------------------------------- # Constructors @@ -268,8 +252,7 @@ def __new__(cls, data=None, # if dtype has an embedded tz, capture it tz = dtl.validate_tz_from_dtype(dtype, tz) - if not isinstance(data, (np.ndarray, Index, ABCSeries, - DatetimeArrayMixin)): + if not isinstance(data, (np.ndarray, Index, ABCSeries, DatetimeArray)): # other iterable of some kind if not isinstance(data, (list, tuple)): data = list(data) @@ -283,7 +266,7 @@ def __new__(cls, data=None, data = tools.to_datetime(data, dayfirst=dayfirst, yearfirst=yearfirst) - if isinstance(data, DatetimeArrayMixin): + if isinstance(data, DatetimeArray): if tz is None: tz = data.tz elif data.tz is None: @@ -1125,43 +1108,47 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): # -------------------------------------------------------------------- # Wrapping DatetimeArray - year = wrap_field_accessor(DatetimeArrayMixin.year) - month = wrap_field_accessor(DatetimeArrayMixin.month) - day = wrap_field_accessor(DatetimeArrayMixin.day) - hour = wrap_field_accessor(DatetimeArrayMixin.hour) - minute = wrap_field_accessor(DatetimeArrayMixin.minute) - second = wrap_field_accessor(DatetimeArrayMixin.second) - microsecond = wrap_field_accessor(DatetimeArrayMixin.microsecond) - nanosecond = wrap_field_accessor(DatetimeArrayMixin.nanosecond) - weekofyear = wrap_field_accessor(DatetimeArrayMixin.weekofyear) + _timezone = cache_readonly(DatetimeArray._timezone.fget) + is_normalized = cache_readonly(DatetimeArray.is_normalized.fget) + _resolution = cache_readonly(DatetimeArray._resolution.fget) + + year = wrap_field_accessor(DatetimeArray.year) + month = wrap_field_accessor(DatetimeArray.month) + day = wrap_field_accessor(DatetimeArray.day) + hour = wrap_field_accessor(DatetimeArray.hour) + minute = wrap_field_accessor(DatetimeArray.minute) + second = wrap_field_accessor(DatetimeArray.second) + microsecond = wrap_field_accessor(DatetimeArray.microsecond) + nanosecond = wrap_field_accessor(DatetimeArray.nanosecond) + weekofyear = wrap_field_accessor(DatetimeArray.weekofyear) week = weekofyear - dayofweek = wrap_field_accessor(DatetimeArrayMixin.dayofweek) + dayofweek = wrap_field_accessor(DatetimeArray.dayofweek) weekday = dayofweek - weekday_name = wrap_field_accessor(DatetimeArrayMixin.weekday_name) + weekday_name = wrap_field_accessor(DatetimeArray.weekday_name) - dayofyear = wrap_field_accessor(DatetimeArrayMixin.dayofyear) - quarter = wrap_field_accessor(DatetimeArrayMixin.quarter) - days_in_month = wrap_field_accessor(DatetimeArrayMixin.days_in_month) + dayofyear = wrap_field_accessor(DatetimeArray.dayofyear) + quarter = wrap_field_accessor(DatetimeArray.quarter) + days_in_month = wrap_field_accessor(DatetimeArray.days_in_month) daysinmonth = days_in_month - is_month_start = wrap_field_accessor(DatetimeArrayMixin.is_month_start) - is_month_end = wrap_field_accessor(DatetimeArrayMixin.is_month_end) - is_quarter_start = wrap_field_accessor(DatetimeArrayMixin.is_quarter_start) - is_quarter_end = wrap_field_accessor(DatetimeArrayMixin.is_quarter_end) - is_year_start = wrap_field_accessor(DatetimeArrayMixin.is_year_start) - is_year_end = wrap_field_accessor(DatetimeArrayMixin.is_year_end) - is_leap_year = wrap_field_accessor(DatetimeArrayMixin.is_leap_year) - - tz_localize = wrap_array_method(DatetimeArrayMixin.tz_localize, True) - tz_convert = wrap_array_method(DatetimeArrayMixin.tz_convert, True) - to_perioddelta = wrap_array_method(DatetimeArrayMixin.to_perioddelta, + is_month_start = wrap_field_accessor(DatetimeArray.is_month_start) + is_month_end = wrap_field_accessor(DatetimeArray.is_month_end) + is_quarter_start = wrap_field_accessor(DatetimeArray.is_quarter_start) + is_quarter_end = wrap_field_accessor(DatetimeArray.is_quarter_end) + is_year_start = wrap_field_accessor(DatetimeArray.is_year_start) + is_year_end = wrap_field_accessor(DatetimeArray.is_year_end) + is_leap_year = wrap_field_accessor(DatetimeArray.is_leap_year) + + tz_localize = wrap_array_method(DatetimeArray.tz_localize, True) + tz_convert = wrap_array_method(DatetimeArray.tz_convert, True) + to_perioddelta = wrap_array_method(DatetimeArray.to_perioddelta, False) - to_period = wrap_array_method(DatetimeArrayMixin.to_period, True) - normalize = wrap_array_method(DatetimeArrayMixin.normalize, True) - to_julian_date = wrap_array_method(DatetimeArrayMixin.to_julian_date, + to_period = wrap_array_method(DatetimeArray.to_period, True) + normalize = wrap_array_method(DatetimeArray.normalize, True) + to_julian_date = wrap_array_method(DatetimeArray.to_julian_date, False) - month_name = wrap_array_method(DatetimeArrayMixin.month_name, True) - day_name = wrap_array_method(DatetimeArrayMixin.day_name, True) + month_name = wrap_array_method(DatetimeArray.month_name, True) + day_name = wrap_array_method(DatetimeArray.day_name, True) # -------------------------------------------------------------------- diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 128068959ebd3..7890f03a1eba7 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -256,8 +256,12 @@ def _simple_new(cls, values, name=None, freq=None, **kwargs): result._reset_identity() return result + # ------------------------------------------------------------------------ + # Wrapping PeriodArray + # ------------------------------------------------------------------------ # Data + @property def _ndarray_values(self): return self._data._ndarray_values @@ -361,13 +365,6 @@ def asfreq(self, freq=None, how='E'): result = self._data.asfreq(freq=freq, how=how) return self._simple_new(result, name=self.name) - def _nat_new(self, box=True): - # TODO(DatetimeArray): remove this - result = self._data._nat_new(box=box) - if box: - result = self._simple_new(result, name=self.name) - return result - def to_timestamp(self, freq=None, how='start'): from pandas import DatetimeIndex result = self._data.to_timestamp(freq=freq, how=how) @@ -425,6 +422,7 @@ def _maybe_convert_timedelta(self, other): # ------------------------------------------------------------------------ # Indexing + @cache_readonly def _engine(self): return self._engine_type(lambda: self, len(self)) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 35e17c7400892..d9625d38b85de 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -2,44 +2,36 @@ from datetime import datetime import numpy as np + +from pandas._libs import ( + NaT, Timedelta, index as libindex, join as libjoin, lib) +import pandas.compat as compat +from pandas.util._decorators import Appender, Substitution + from pandas.core.dtypes.common import ( - _TD_DTYPE, - is_integer, - is_float, - is_list_like, - is_scalar, - is_timedelta64_dtype, - is_timedelta64_ns_dtype, - pandas_dtype, - ensure_int64) + _TD_DTYPE, ensure_int64, is_float, is_integer, is_list_like, is_scalar, + is_timedelta64_dtype, is_timedelta64_ns_dtype, pandas_dtype) +import pandas.core.dtypes.concat as _concat from pandas.core.dtypes.missing import isna +from pandas.core.arrays import datetimelike as dtl from pandas.core.arrays.timedeltas import ( - TimedeltaArrayMixin, _is_convertible_to_td, _to_m8, + TimedeltaArrayMixin as TimedeltaArray, _is_convertible_to_td, _to_m8, sequence_to_td64ns) -from pandas.core.arrays import datetimelike as dtl - -from pandas.core.indexes.base import Index -from pandas.core.indexes.numeric import Int64Index -import pandas.compat as compat - -from pandas.tseries.frequencies import to_offset from pandas.core.base import _shared_docs -from pandas.core.indexes.base import _index_shared_docs import pandas.core.common as com -from pandas.core.ops import get_op_result_name -import pandas.core.dtypes.concat as _concat -from pandas.util._decorators import Appender, Substitution +from pandas.core.indexes.base import Index, _index_shared_docs from pandas.core.indexes.datetimelike import ( - TimelikeOps, DatetimeIndexOpsMixin, wrap_arithmetic_op, - wrap_array_method, wrap_field_accessor) -from pandas.core.tools.timedeltas import ( - _coerce_scalar_to_timedelta_type) -from pandas._libs import (lib, index as libindex, - join as libjoin, Timedelta, NaT) + DatetimeIndexOpsMixin, TimelikeOps, wrap_arithmetic_op, wrap_array_method, + wrap_field_accessor) +from pandas.core.indexes.numeric import Int64Index +from pandas.core.ops import get_op_result_name +from pandas.core.tools.timedeltas import _coerce_scalar_to_timedelta_type + +from pandas.tseries.frequencies import to_offset -class TimedeltaIndex(TimedeltaArrayMixin, DatetimeIndexOpsMixin, +class TimedeltaIndex(TimedeltaArray, DatetimeIndexOpsMixin, TimelikeOps, Int64Index): """ Immutable ndarray of timedelta64 data, represented internally as int64, and @@ -223,8 +215,7 @@ def _maybe_update_attributes(self, attrs): return attrs def _evaluate_with_timedelta_like(self, other, op): - result = TimedeltaArrayMixin._evaluate_with_timedelta_like(self, other, - op) + result = TimedeltaArray._evaluate_with_timedelta_like(self, other, op) return wrap_arithmetic_op(self, other, result) def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): @@ -236,12 +227,12 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): # ------------------------------------------------------------------- # Wrapping TimedeltaArray - days = wrap_field_accessor(TimedeltaArrayMixin.days) - seconds = wrap_field_accessor(TimedeltaArrayMixin.seconds) - microseconds = wrap_field_accessor(TimedeltaArrayMixin.microseconds) - nanoseconds = wrap_field_accessor(TimedeltaArrayMixin.nanoseconds) + days = wrap_field_accessor(TimedeltaArray.days) + seconds = wrap_field_accessor(TimedeltaArray.seconds) + microseconds = wrap_field_accessor(TimedeltaArray.microseconds) + nanoseconds = wrap_field_accessor(TimedeltaArray.nanoseconds) - total_seconds = wrap_array_method(TimedeltaArrayMixin.total_seconds, True) + total_seconds = wrap_array_method(TimedeltaArray.total_seconds, True) # ------------------------------------------------------------------- diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 5ba99a48e34ad..bb4022c9cac9a 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -4,7 +4,8 @@ import pandas as pd from pandas.core.arrays import ( - DatetimeArrayMixin, PeriodArray, TimedeltaArrayMixin) + DatetimeArrayMixin as DatetimeArray, PeriodArray, + TimedeltaArrayMixin as TimedeltaArray) import pandas.util.testing as tm @@ -61,7 +62,7 @@ def test_array_object_dtype(self, tz_naive_fixture): # GH#23524 tz = tz_naive_fixture dti = pd.date_range('2016-01-01', periods=3, tz=tz) - arr = DatetimeArrayMixin(dti) + arr = DatetimeArray(dti) expected = np.array(list(dti)) @@ -76,7 +77,7 @@ def test_array(self, tz_naive_fixture): # GH#23524 tz = tz_naive_fixture dti = pd.date_range('2016-01-01', periods=3, tz=tz) - arr = DatetimeArrayMixin(dti) + arr = DatetimeArray(dti) expected = dti.asi8.view('M8[ns]') result = np.array(arr) @@ -91,7 +92,7 @@ def test_array_i8_dtype(self, tz_naive_fixture): # GH#23524 tz = tz_naive_fixture dti = pd.date_range('2016-01-01', periods=3, tz=tz) - arr = DatetimeArrayMixin(dti) + arr = DatetimeArray(dti) expected = dti.asi8 result = np.array(arr, dtype='i8') @@ -108,7 +109,7 @@ def test_array_i8_dtype(self, tz_naive_fixture): def test_from_dti(self, tz_naive_fixture): tz = tz_naive_fixture dti = pd.date_range('2016-01-01', periods=3, tz=tz) - arr = DatetimeArrayMixin(dti) + arr = DatetimeArray(dti) assert list(dti) == list(arr) # Check that Index.__new__ knows what to do with DatetimeArray @@ -119,7 +120,7 @@ def test_from_dti(self, tz_naive_fixture): def test_astype_object(self, tz_naive_fixture): tz = tz_naive_fixture dti = pd.date_range('2016-01-01', periods=3, tz=tz) - arr = DatetimeArrayMixin(dti) + arr = DatetimeArray(dti) asobj = arr.astype('O') assert isinstance(asobj, np.ndarray) assert asobj.dtype == 'O' @@ -129,11 +130,11 @@ def test_astype_object(self, tz_naive_fixture): def test_to_perioddelta(self, datetime_index, freqstr): # GH#23113 dti = datetime_index - arr = DatetimeArrayMixin(dti) + arr = DatetimeArray(dti) expected = dti.to_perioddelta(freq=freqstr) result = arr.to_perioddelta(freq=freqstr) - assert isinstance(result, TimedeltaArrayMixin) + assert isinstance(result, TimedeltaArray) # placeholder until these become actual EA subclasses and we can use # an EA-specific tm.assert_ function @@ -142,7 +143,7 @@ def test_to_perioddelta(self, datetime_index, freqstr): @pytest.mark.parametrize('freqstr', ['D', 'B', 'W', 'M', 'Q', 'Y']) def test_to_period(self, datetime_index, freqstr): dti = datetime_index - arr = DatetimeArrayMixin(dti) + arr = DatetimeArray(dti) expected = dti.to_period(freq=freqstr) result = arr.to_period(freq=freqstr) @@ -156,7 +157,7 @@ def test_to_period(self, datetime_index, freqstr): def test_bool_properties(self, datetime_index, propname): # in this case _bool_ops is just `is_leap_year` dti = datetime_index - arr = DatetimeArrayMixin(dti) + arr = DatetimeArray(dti) assert dti.freq == arr.freq result = getattr(arr, propname) @@ -167,7 +168,7 @@ def test_bool_properties(self, datetime_index, propname): @pytest.mark.parametrize('propname', pd.DatetimeIndex._field_ops) def test_int_properties(self, datetime_index, propname): dti = datetime_index - arr = DatetimeArrayMixin(dti) + arr = DatetimeArray(dti) result = getattr(arr, propname) expected = np.array(getattr(dti, propname), dtype=result.dtype) @@ -178,7 +179,7 @@ def test_int_properties(self, datetime_index, propname): class TestTimedeltaArray(object): def test_from_tdi(self): tdi = pd.TimedeltaIndex(['1 Day', '3 Hours']) - arr = TimedeltaArrayMixin(tdi) + arr = TimedeltaArray(tdi) assert list(arr) == list(tdi) # Check that Index.__new__ knows what to do with TimedeltaArray @@ -188,7 +189,7 @@ def test_from_tdi(self): def test_astype_object(self): tdi = pd.TimedeltaIndex(['1 Day', '3 Hours']) - arr = TimedeltaArrayMixin(tdi) + arr = TimedeltaArray(tdi) asobj = arr.astype('O') assert isinstance(asobj, np.ndarray) assert asobj.dtype == 'O' @@ -196,7 +197,7 @@ def test_astype_object(self): def test_to_pytimedelta(self, timedelta_index): tdi = timedelta_index - arr = TimedeltaArrayMixin(tdi) + arr = TimedeltaArray(tdi) expected = tdi.to_pytimedelta() result = arr.to_pytimedelta() @@ -205,7 +206,7 @@ def test_to_pytimedelta(self, timedelta_index): def test_total_seconds(self, timedelta_index): tdi = timedelta_index - arr = TimedeltaArrayMixin(tdi) + arr = TimedeltaArray(tdi) expected = tdi.total_seconds() result = arr.total_seconds() @@ -215,7 +216,7 @@ def test_total_seconds(self, timedelta_index): @pytest.mark.parametrize('propname', pd.TimedeltaIndex._field_ops) def test_int_properties(self, timedelta_index, propname): tdi = timedelta_index - arr = TimedeltaArrayMixin(tdi) + arr = TimedeltaArray(tdi) result = getattr(arr, propname) expected = np.array(getattr(tdi, propname), dtype=result.dtype) @@ -248,9 +249,9 @@ def test_to_timestamp(self, how, period_index): pi = period_index arr = PeriodArray(pi) - expected = DatetimeArrayMixin(pi.to_timestamp(how=how)) + expected = DatetimeArray(pi.to_timestamp(how=how)) result = arr.to_timestamp(how=how) - assert isinstance(result, DatetimeArrayMixin) + assert isinstance(result, DatetimeArray) # placeholder until these become actual EA subclasses and we can use # an EA-specific tm.assert_ function diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index a15295cfbd81a..2b630b98b69a2 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- """ Tests for DatetimeArray """ diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index 95a1d1781456c..63b34db13705e 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -190,7 +190,7 @@ def test_setitem_raises_type(): # ---------------------------------------------------------------------------- # Ops -def tet_sub_period(): +def test_sub_period(): arr = period_array(['2000', '2001'], freq='D') other = pd.Period("2000", freq="M") with pytest.raises(IncompatibleFrequency, match="freq"): diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py new file mode 100644 index 0000000000000..3ff807daeeab9 --- /dev/null +++ b/pandas/tests/arrays/test_timedeltas.py @@ -0,0 +1,5 @@ +# -*- coding: utf-8 -*- + + +class TestTimedeltaArray(object): + pass diff --git a/setup.cfg b/setup.cfg index 2e07182196d5b..9f5384170a245 100644 --- a/setup.cfg +++ b/setup.cfg @@ -120,9 +120,6 @@ skip= pandas/core/indexes/numeric.py, pandas/core/indexes/interval.py, pandas/core/indexes/multi.py, - pandas/core/indexes/timedeltas.py, - pandas/core/indexes/datetimelike.py, - pandas/core/indexes/datetimes.py, pandas/core/indexes/base.py, pandas/core/indexes/accessors.py, pandas/core/indexes/period.py, From 3b87703ce8c5c6c980e9d0ef2904e00b5d1eaa84 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 12 Nov 2018 06:28:45 -0600 Subject: [PATCH 099/122] BUG: astype fill_value for SparseArray.astype (#23547) --- pandas/core/arrays/sparse.py | 105 ++++++++++++++++++++--- pandas/tests/arrays/sparse/test_array.py | 28 ++++++ pandas/tests/arrays/sparse/test_dtype.py | 20 +++++ 3 files changed, 139 insertions(+), 14 deletions(-) diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index a63b3fb53625f..672261c2a407e 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -284,6 +284,83 @@ def is_dtype(cls, dtype): return True return isinstance(dtype, np.dtype) or dtype == 'Sparse' + def update_dtype(self, dtype): + """Convert the SparseDtype to a new dtype. + + This takes care of converting the ``fill_value``. + + Parameters + ---------- + dtype : Union[str, numpy.dtype, SparseDtype] + The new dtype to use. + + * For a SparseDtype, it is simply returned + * For a NumPy dtype (or str), the current fill value + is converted to the new dtype, and a SparseDtype + with `dtype` and the new fill value is returned. + + Returns + ------- + SparseDtype + A new SparseDtype with the corret `dtype` and fill value + for that `dtype`. + + Raises + ------ + ValueError + When the current fill value cannot be converted to the + new `dtype` (e.g. trying to convert ``np.nan`` to an + integer dtype). + + + Examples + -------- + >>> SparseDtype(int, 0).update_dtype(float) + Sparse[float64, 0.0] + + >>> SparseDtype(int, 1).update_dtype(SparseDtype(float, np.nan)) + Sparse[float64, nan] + """ + cls = type(self) + dtype = pandas_dtype(dtype) + + if not isinstance(dtype, cls): + fill_value = astype_nansafe(np.array(self.fill_value), + dtype).item() + dtype = cls(dtype, fill_value=fill_value) + + return dtype + + @property + def _subtype_with_str(self): + """ + Whether the SparseDtype's subtype should be considered ``str``. + + Typically, pandas will store string data in an object-dtype array. + When converting values to a dtype, e.g. in ``.astype``, we need to + be more specific, we need the actual underlying type. + + Returns + ------- + + >>> SparseDtype(int, 1)._subtype_with_str + dtype('int64') + + >>> SparseDtype(object, 1)._subtype_with_str + dtype('O') + + >>> dtype = SparseDtype(str, '') + >>> dtype.subtype + dtype('O') + + >>> dtype._subtype_with_str + str + """ + if isinstance(self.fill_value, compat.string_types): + return type(self.fill_value) + return self.subtype + + # ---------------------------------------------------------------------------- # Array @@ -614,7 +691,7 @@ def __array__(self, dtype=None, copy=True): # Can't put pd.NaT in a datetime64[ns] fill_value = np.datetime64('NaT') try: - dtype = np.result_type(self.sp_values.dtype, fill_value) + dtype = np.result_type(self.sp_values.dtype, type(fill_value)) except TypeError: dtype = object @@ -996,7 +1073,7 @@ def _take_with_fill(self, indices, fill_value=None): if len(self) == 0: # Empty... Allow taking only if all empty if (indices == -1).all(): - dtype = np.result_type(self.sp_values, fill_value) + dtype = np.result_type(self.sp_values, type(fill_value)) taken = np.empty_like(indices, dtype=dtype) taken.fill(fill_value) return taken @@ -1009,7 +1086,7 @@ def _take_with_fill(self, indices, fill_value=None): if self.sp_index.npoints == 0: # Avoid taking from the empty self.sp_values taken = np.full(sp_indexer.shape, fill_value=fill_value, - dtype=np.result_type(fill_value)) + dtype=np.result_type(type(fill_value))) else: taken = self.sp_values.take(sp_indexer) @@ -1030,12 +1107,13 @@ def _take_with_fill(self, indices, fill_value=None): result_type = taken.dtype if m0.any(): - result_type = np.result_type(result_type, self.fill_value) + result_type = np.result_type(result_type, + type(self.fill_value)) taken = taken.astype(result_type) taken[old_fill_indices] = self.fill_value if m1.any(): - result_type = np.result_type(result_type, fill_value) + result_type = np.result_type(result_type, type(fill_value)) taken = taken.astype(result_type) taken[new_fill_indices] = fill_value @@ -1061,7 +1139,7 @@ def _take_without_fill(self, indices): # edge case in take... # I think just return out = np.full(indices.shape, self.fill_value, - dtype=np.result_type(self.fill_value)) + dtype=np.result_type(type(self.fill_value))) arr, sp_index, fill_value = make_sparse(out, fill_value=self.fill_value) return type(self)(arr, sparse_index=sp_index, @@ -1073,7 +1151,7 @@ def _take_without_fill(self, indices): if fillable.any(): # TODO: may need to coerce array to fill value - result_type = np.result_type(taken, self.fill_value) + result_type = np.result_type(taken, type(self.fill_value)) taken = taken.astype(result_type) taken[fillable] = self.fill_value @@ -1093,7 +1171,9 @@ def _concat_same_type(cls, to_concat): fill_value = fill_values[0] - if len(set(fill_values)) > 1: + # np.nan isn't a singleton, so we may end up with multiple + # NaNs here, so we ignore tha all NA case too. + if not (len(set(fill_values)) == 1 or isna(fill_values).all()): warnings.warn("Concatenating sparse arrays with multiple fill " "values: '{}'. Picking the first and " "converting the rest.".format(fill_values), @@ -1212,13 +1292,10 @@ def astype(self, dtype=None, copy=True): IntIndex Indices: array([2, 3], dtype=int32) """ - dtype = pandas_dtype(dtype) - - if not isinstance(dtype, SparseDtype): - dtype = SparseDtype(dtype, fill_value=self.fill_value) - + dtype = self.dtype.update_dtype(dtype) + subtype = dtype._subtype_with_str sp_values = astype_nansafe(self.sp_values, - dtype.subtype, + subtype, copy=copy) if sp_values is self.sp_values and copy: sp_values = sp_values.copy() diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index c15696705ab82..0e5a8280cc467 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -477,6 +477,34 @@ def test_astype_all(self, any_real_dtype): tm.assert_numpy_array_equal(np.asarray(res.values), vals.astype(typ)) + @pytest.mark.parametrize('array, dtype, expected', [ + (SparseArray([0, 1]), 'float', + SparseArray([0., 1.], dtype=SparseDtype(float, 0.0))), + (SparseArray([0, 1]), bool, SparseArray([False, True])), + (SparseArray([0, 1], fill_value=1), bool, + SparseArray([False, True], dtype=SparseDtype(bool, True))), + pytest.param( + SparseArray([0, 1]), 'datetime64[ns]', + SparseArray(np.array([0, 1], dtype='datetime64[ns]'), + dtype=SparseDtype('datetime64[ns]', + pd.Timestamp('1970'))), + marks=[pytest.mark.xfail(reason="NumPy-7619", strict=True)], + ), + (SparseArray([0, 1, 10]), str, + SparseArray(['0', '1', '10'], dtype=SparseDtype(str, '0'))), + (SparseArray(['10', '20']), float, SparseArray([10.0, 20.0])), + (SparseArray([0, 1, 0]), object, + SparseArray([0, 1, 0], dtype=SparseDtype(object, 0))), + ]) + def test_astype_more(self, array, dtype, expected): + result = array.astype(dtype) + tm.assert_sp_array_equal(result, expected) + + def test_astype_nan_raises(self): + arr = SparseArray([1.0, np.nan]) + with pytest.raises(ValueError, match='Cannot convert non-finite'): + arr.astype(int) + def test_set_fill_value(self): arr = SparseArray([1., np.nan, 2.], fill_value=np.nan) arr.fill_value = 2 diff --git a/pandas/tests/arrays/sparse/test_dtype.py b/pandas/tests/arrays/sparse/test_dtype.py index 7c310693cf26c..2d386de0d31a3 100644 --- a/pandas/tests/arrays/sparse/test_dtype.py +++ b/pandas/tests/arrays/sparse/test_dtype.py @@ -139,3 +139,23 @@ def test_parse_subtype(string, expected): def test_construct_from_string_fill_value_raises(string): with pytest.raises(TypeError, match='fill_value in the string is not'): SparseDtype.construct_from_string(string) + + +@pytest.mark.parametrize('original, dtype, expected', [ + (SparseDtype(int, 0), float, SparseDtype(float, 0.0)), + (SparseDtype(int, 1), float, SparseDtype(float, 1.0)), + (SparseDtype(int, 1), str, SparseDtype(object, '1')), + (SparseDtype(float, 1.5), int, SparseDtype(int, 1)), +]) +def test_update_dtype(original, dtype, expected): + result = original.update_dtype(dtype) + assert result == expected + + +@pytest.mark.parametrize("original, dtype", [ + (SparseDtype(float, np.nan), int), + (SparseDtype(str, 'abc'), int), +]) +def test_update_dtype_raises(original, dtype): + with pytest.raises(ValueError): + original.update_dtype(dtype) From 999ef436f32ef8584fdef3b2255fd50845401ef0 Mon Sep 17 00:00:00 2001 From: Jason Kiley Date: Mon, 12 Nov 2018 06:31:45 -0600 Subject: [PATCH 100/122] More helpful Stata string length error. (#23629) --- pandas/io/stata.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index df0d47b063411..66e996075f1ed 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -461,7 +461,8 @@ def parse_dates_safe(dates, delta=False, year=False, days=False): excessive_string_length_error = """ Fixed width strings in Stata .dta files are limited to 244 (or fewer) -characters. Column '%s' does not satisfy this restriction. +characters. Column '%s' does not satisfy this restriction. Use the +'version=117' parameter to write the newer (Stata 13 and later) format. """ From 7fc37324fd86c5e22ddb728e2d9319d5aaede0f3 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 12 Nov 2018 05:04:40 -0800 Subject: [PATCH 101/122] DEPR: Deprecate usecols as int in read_excel (#23635) Follow-up to gh-23544. --- doc/source/io.rst | 5 +++++ doc/source/whatsnew/v0.24.0.txt | 1 + pandas/io/excel.py | 8 ++++++++ pandas/tests/io/test_excel.py | 33 ++++++++++++++++++++++----------- 4 files changed, 36 insertions(+), 11 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index beb1c1daba962..34dc185c200e6 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2854,6 +2854,11 @@ It is often the case that users will insert columns to do temporary computations in Excel and you may not want to read in those columns. ``read_excel`` takes a ``usecols`` keyword to allow you to specify a subset of columns to parse. +.. deprecated:: 0.24.0 + +Passing in an integer for ``usecols`` has been deprecated. Please pass in a list +of ints from 0 to ``usecols`` inclusive instead. + If ``usecols`` is an integer, then it is assumed to indicate the last column to be parsed. diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 73d706ed98416..190456e97d331 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -970,6 +970,7 @@ Deprecations - The class ``FrozenNDArray`` has been deprecated. When unpickling, ``FrozenNDArray`` will be unpickled to ``np.ndarray`` once this class is removed (:issue:`9031`) - Deprecated the `nthreads` keyword of :func:`pandas.read_feather` in favor of `use_threads` to reflect the changes in pyarrow 0.11.0. (:issue:`23053`) +- :func:`pandas.read_excel` has deprecated accepting ``usecols`` as an integer. Please pass in a list of ints from 0 to ``usecols`` inclusive instead (:issue:`23527`) - Constructing a :class:`TimedeltaIndex` from data with ``datetime64``-dtyped data is deprecated, will raise ``TypeError`` in a future version (:issue:`23539`) .. _whatsnew_0240.deprecations.datetimelike_int_ops: diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 2e93c237bb7ea..c25a7670cce44 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -95,6 +95,10 @@ usecols : int, str, list-like, or callable default None * If None, then parse all columns, * If int, then indicates last column to be parsed + + .. deprecated:: 0.24.0 + Pass in a list of ints instead from 0 to `usecols` inclusive. + * If string, then indicates comma separated list of Excel column letters and column ranges (e.g. "A:E" or "A,C,E:F"). Ranges are inclusive of both sides. @@ -778,6 +782,10 @@ def _maybe_convert_usecols(usecols): return usecols if is_integer(usecols): + warnings.warn(("Passing in an integer for `usecols` has been " + "deprecated. Please pass in a list of ints from " + "0 to `usecols` inclusive instead."), + FutureWarning, stacklevel=2) return lrange(usecols + 1) if isinstance(usecols, compat.string_types): diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 49a3a3d58672d..9b147d53c06c4 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -105,23 +105,34 @@ def get_exceldf(self, basename, ext, *args, **kwds): class ReadingTestsBase(SharedItems): # This is based on ExcelWriterBase - @td.skip_if_no('xlrd', '1.0.1') # GH-22682 + @td.skip_if_no("xlrd", "1.0.1") # see gh-22682 def test_usecols_int(self, ext): - dfref = self.get_csv_refdf('test1') - dfref = dfref.reindex(columns=['A', 'B', 'C']) - df1 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0, usecols=3) - df2 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], - index_col=0, usecols=3) + df_ref = self.get_csv_refdf("test1") + df_ref = df_ref.reindex(columns=["A", "B", "C"]) - with tm.assert_produces_warning(FutureWarning): - df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], + # usecols as int + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + df1 = self.get_exceldf("test1", ext, "Sheet1", + index_col=0, usecols=3) + + # usecols as int + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + df2 = self.get_exceldf("test1", ext, "Sheet2", skiprows=[1], + index_col=0, usecols=3) + + # parse_cols instead of usecols, usecols as int + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + df3 = self.get_exceldf("test1", ext, "Sheet2", skiprows=[1], index_col=0, parse_cols=3) # TODO add index to xls file) - tm.assert_frame_equal(df1, dfref, check_names=False) - tm.assert_frame_equal(df2, dfref, check_names=False) - tm.assert_frame_equal(df3, dfref, check_names=False) + tm.assert_frame_equal(df1, df_ref, check_names=False) + tm.assert_frame_equal(df2, df_ref, check_names=False) + tm.assert_frame_equal(df3, df_ref, check_names=False) @td.skip_if_no('xlrd', '1.0.1') # GH-22682 def test_usecols_list(self, ext): From bb9f4eba047eab97289810abae24011b8f1d3549 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 12 Nov 2018 05:12:59 -0800 Subject: [PATCH 102/122] BUG: Don't over-optimize memory with jagged CSV (#23527) With jagged CSV's, we risk being too quick to dump memory that we need to allocate because previous chunks would have indicated much larger rows than we can anticipate in subsequent chunks. Closes gh-23509. --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/_libs/parsers.pyx | 1 + pandas/_libs/src/parser/tokenizer.c | 33 +++++++++++++++++++++++++++-- pandas/_libs/src/parser/tokenizer.h | 1 + pandas/tests/io/parser/common.py | 16 ++++++++++++++ 5 files changed, 50 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 190456e97d331..a9a247cc4fc53 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1299,6 +1299,7 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form - :func:`read_excel()` will correctly show the deprecation warning for previously deprecated ``sheetname`` (:issue:`17994`) - :func:`read_csv()` and func:`read_table()` will throw ``UnicodeError`` and not coredump on badly encoded strings (:issue:`22748`) - :func:`read_csv()` will correctly parse timezone-aware datetimes (:issue:`22256`) +- Bug in :func:`read_csv()` in which memory management was prematurely optimized for the C engine when the data was being read in chunks (:issue:`23509`) - :func:`read_sas()` will parse numbers in sas7bdat-files that have width less than 8 bytes correctly. (:issue:`21616`) - :func:`read_sas()` will correctly parse sas7bdat files with many columns (:issue:`22628`) - :func:`read_sas()` will correctly parse sas7bdat files with data page types having also bit 7 set (so page type is 128 + 256 = 384) (:issue:`16615`) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 3870a55c22fd6..40aa03caa56eb 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -132,6 +132,7 @@ cdef extern from "parser/tokenizer.h": int64_t *word_starts # where we are in the stream int64_t words_len int64_t words_cap + int64_t max_words_cap # maximum word cap encountered char *pword_start # pointer to stream start of current field int64_t word_start # position start of current field diff --git a/pandas/_libs/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c index 2fce241027d56..e46e1e85f1c81 100644 --- a/pandas/_libs/src/parser/tokenizer.c +++ b/pandas/_libs/src/parser/tokenizer.c @@ -197,6 +197,7 @@ int parser_init(parser_t *self) { sz = sz ? sz : 1; self->words = (char **)malloc(sz * sizeof(char *)); self->word_starts = (int64_t *)malloc(sz * sizeof(int64_t)); + self->max_words_cap = sz; self->words_cap = sz; self->words_len = 0; @@ -247,7 +248,7 @@ void parser_del(parser_t *self) { } static int make_stream_space(parser_t *self, size_t nbytes) { - int64_t i, cap; + int64_t i, cap, length; int status; void *orig_ptr, *newptr; @@ -287,8 +288,23 @@ static int make_stream_space(parser_t *self, size_t nbytes) { */ cap = self->words_cap; + + /** + * If we are reading in chunks, we need to be aware of the maximum number + * of words we have seen in previous chunks (self->max_words_cap), so + * that way, we can properly allocate when reading subsequent ones. + * + * Otherwise, we risk a buffer overflow if we mistakenly under-allocate + * just because a recent chunk did not have as many words. + */ + if (self->words_len + nbytes < self->max_words_cap) { + length = self->max_words_cap - nbytes; + } else { + length = self->words_len; + } + self->words = - (char **)grow_buffer((void *)self->words, self->words_len, + (char **)grow_buffer((void *)self->words, length, (int64_t*)&self->words_cap, nbytes, sizeof(char *), &status); TRACE( @@ -1241,6 +1257,19 @@ int parser_trim_buffers(parser_t *self) { int64_t i; + /** + * Before we free up space and trim, we should + * save how many words we saw when parsing, if + * it exceeds the maximum number we saw before. + * + * This is important for when we read in chunks, + * so that we can inform subsequent chunk parsing + * as to how many words we could possibly see. + */ + if (self->words_cap > self->max_words_cap) { + self->max_words_cap = self->words_cap; + } + /* trim words, word_starts */ new_cap = _next_pow2(self->words_len) + 1; if (new_cap < self->words_cap) { diff --git a/pandas/_libs/src/parser/tokenizer.h b/pandas/_libs/src/parser/tokenizer.h index 9fc3593aaaf5b..c32c061c7fa89 100644 --- a/pandas/_libs/src/parser/tokenizer.h +++ b/pandas/_libs/src/parser/tokenizer.h @@ -142,6 +142,7 @@ typedef struct parser_t { int64_t *word_starts; // where we are in the stream int64_t words_len; int64_t words_cap; + int64_t max_words_cap; // maximum word cap encountered char *pword_start; // pointer to stream start of current field int64_t word_start; // position start of current field diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index 18690a18f7cb3..67a3bd6f9b75e 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -458,6 +458,22 @@ def test_read_chunksize_generated_index(self): tm.assert_frame_equal(pd.concat(reader), df) + def test_read_chunksize_jagged_names(self): + # see gh-23509 + data = "\n".join(["0"] * 7 + [",".join(["0"] * 10)]) + reader = self.read_csv(StringIO(data), names=range(10), chunksize=4) + + expected = DataFrame() + + for i in range(10): + if i == 0: + expected[i] = [0] * 8 + else: + expected[i] = [np.nan] * 7 + [0] + + result = pd.concat(reader) + tm.assert_frame_equal(result, expected) + def test_read_text_list(self): data = """A,B,C\nfoo,1,2,3\nbar,4,5,6""" as_list = [['A', 'B', 'C'], ['foo', '1', '2', '3'], ['bar', From 76c078c597d8016cb16621b4d1803847960414f9 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Mon, 12 Nov 2018 15:33:47 +0000 Subject: [PATCH 103/122] DOC: Adding validation of the section order in docstrings (#23607) * Adding validation of the section order in docstrings * Updating allowed sections --- scripts/tests/test_validate_docstrings.py | 34 ++++++++++++++++++++ scripts/validate_docstrings.py | 38 +++++++++++++++++++++++ 2 files changed, 72 insertions(+) diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py index ccd5f56141a6a..c1bdab73c2671 100644 --- a/scripts/tests/test_validate_docstrings.py +++ b/scripts/tests/test_validate_docstrings.py @@ -350,6 +350,35 @@ def private_classes(self): This mentions NDFrame, which is not correct. """ + def unknown_section(self): + """ + This section has an unknown section title. + + Unknown Section + --------------- + This should raise an error in the validation. + """ + + def sections_in_wrong_order(self): + """ + This docstring has the sections in the wrong order. + + Parameters + ---------- + name : str + This section is in the right position. + + Examples + -------- + >>> print('So far Examples is good, as it goes before Parameters') + So far Examples is good, as it goes before Parameters + + See Also + -------- + function : This should generate an error, as See Also needs to go + before Examples. + """ + class BadSummaries(object): @@ -706,6 +735,11 @@ def test_bad_generic_functions(self, func): ('BadGenericDocStrings', 'private_classes', ("Private classes (NDFrame) should not be mentioned in public " 'docstrings',)), + ('BadGenericDocStrings', 'unknown_section', + ('Found unknown section "Unknown Section".',)), + ('BadGenericDocStrings', 'sections_in_wrong_order', + ('Wrong order of sections. "See Also" should be located before ' + '"Notes"',)), ('BadSeeAlso', 'desc_no_period', ('Missing period at end of description for See Also "Series.iloc"',)), ('BadSeeAlso', 'desc_first_letter_lowercase', diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index ed84e58049cae..7da77a1f60ad5 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -56,6 +56,9 @@ PRIVATE_CLASSES = ['NDFrame', 'IndexOpsMixin'] DIRECTIVES = ['versionadded', 'versionchanged', 'deprecated'] +ALLOWED_SECTIONS = ['Parameters', 'Attributes', 'Methods', 'Returns', 'Yields', + 'Other Parameters', 'Raises', 'Warns', 'See Also', 'Notes', + 'References', 'Examples'] ERROR_MSGS = { 'GL01': 'Docstring text (summary) should start in the line immediately ' 'after the opening quotes (not in the same line, or leaving a ' @@ -69,6 +72,10 @@ 'mentioned in public docstrings', 'GL05': 'Tabs found at the start of line "{line_with_tabs}", please use ' 'whitespace only', + 'GL06': 'Found unknown section "{section}". Allowed sections are: ' + '{allowed_sections}', + 'GL07': 'Wrong order of sections. "{wrong_section}" should be located ' + 'before "{goes_before}", the right order is: {sorted_sections}', 'SS01': 'No summary found (a short summary in a single line should be ' 'present at the beginning of the docstring)', 'SS02': 'Summary does not start with a capital letter', @@ -353,6 +360,18 @@ def double_blank_lines(self): prev = row.strip() return False + @property + def section_titles(self): + sections = [] + self.doc._doc.reset() + while not self.doc._doc.eof(): + content = self.doc._read_to_next_section() + if (len(content) > 1 + and len(content[0]) == len(content[1]) + and set(content[1]) == {'-'}): + sections.append(content[0]) + return sections + @property def summary(self): return ' '.join(self.doc['Summary']) @@ -580,6 +599,25 @@ def validate_one(func_name): if re.match("^ *\t", line): errs.append(error('GL05', line_with_tabs=line.lstrip())) + unseen_sections = list(ALLOWED_SECTIONS) + for section in doc.section_titles: + if section not in ALLOWED_SECTIONS: + errs.append(error('GL06', + section=section, + allowed_sections=', '.join(ALLOWED_SECTIONS))) + else: + if section in unseen_sections: + section_idx = unseen_sections.index(section) + unseen_sections = unseen_sections[section_idx + 1:] + else: + section_idx = ALLOWED_SECTIONS.index(section) + goes_before = ALLOWED_SECTIONS[section_idx + 1] + errs.append(error('GL07', + sorted_sections=' > '.join(ALLOWED_SECTIONS), + wrong_section=section, + goes_before=goes_before)) + break + if not doc.summary: errs.append(error('SS01')) else: From fdda5d7407d0b62860f44a6574e1eba0af00d0b6 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 12 Nov 2018 08:58:52 -0800 Subject: [PATCH 104/122] CLN: remove incorrect usages of com.AbstractMethodError (#23625) --- pandas/core/generic.py | 8 +++++--- pandas/core/groupby/generic.py | 5 +++-- pandas/core/groupby/groupby.py | 5 +++-- pandas/core/groupby/ops.py | 3 ++- pandas/core/indexes/datetimelike.py | 5 +++-- pandas/io/common.py | 7 +++---- pandas/io/html.py | 19 +++++++++---------- pandas/io/json/json.py | 8 ++++---- pandas/io/parquet.py | 6 +++--- pandas/io/parsers.py | 6 +++--- pandas/plotting/_core.py | 3 ++- pandas/tests/io/parser/test_parsers.py | 4 ++-- pandas/tests/test_resample.py | 5 ++--- pandas/tseries/offsets.py | 8 ++++---- 14 files changed, 48 insertions(+), 44 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 34f25c5634d5b..2c7f6ae8e3533 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11,6 +11,8 @@ import pandas as pd from pandas._libs import properties, Timestamp, iNaT +from pandas.errors import AbstractMethodError + from pandas.core.dtypes.common import ( ensure_int64, ensure_object, @@ -200,7 +202,7 @@ def _constructor(self): """Used when a manipulation result has the same dimensions as the original. """ - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def __unicode__(self): # unicode representation based upon iterating over self @@ -221,7 +223,7 @@ def _constructor_sliced(self): """Used when a manipulation result has one lower dimension(s) as the original, such as DataFrame single columns slicing. """ - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) @property def _constructor_expanddim(self): @@ -2884,7 +2886,7 @@ def _iget_item_cache(self, item): return lower def _box_item_values(self, key, values): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _maybe_cache_changed(self, item, value): """The object has called back to us saying maybe it has changed. diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 451f1199ac8e6..b0477c7d3a8ad 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -18,6 +18,7 @@ import pandas.compat as compat from pandas.compat import lzip, map from pandas.compat.numpy import _np_version_under1p13 +from pandas.errors import AbstractMethodError from pandas.util._decorators import Appender, Substitution from pandas.core.dtypes.cast import maybe_downcast_to_dtype @@ -240,7 +241,7 @@ def _aggregate_generic(self, func, *args, **kwargs): return self._wrap_generic_output(result, obj) def _wrap_aggregated_output(self, output, names=None): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _aggregate_item_by_item(self, func, *args, **kwargs): # only for axis==0 @@ -1659,4 +1660,4 @@ def _aggregate_item_by_item(self, func, *args, **kwargs): raise ValueError("axis value must be greater than 0") def _wrap_aggregated_output(self, output, names=None): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index ea7507799fa9a..12327e1cf148e 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -20,6 +20,7 @@ class providing the base-class of operations. import pandas.compat as compat from pandas.compat import callable, range, set_function_name, zip from pandas.compat.numpy import function as nv +from pandas.errors import AbstractMethodError from pandas.util._decorators import Appender, Substitution, cache_readonly from pandas.util._validators import validate_kwargs @@ -706,7 +707,7 @@ def _iterate_slices(self): yield self._selection_name, self._selected_obj def transform(self, func, *args, **kwargs): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _cumcount_array(self, ascending=True): """ @@ -861,7 +862,7 @@ def _python_agg_general(self, func, *args, **kwargs): return self._wrap_aggregated_output(output) def _wrap_applied_output(self, *args, **kwargs): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _concat_objects(self, keys, values, not_indexed_same=False): from pandas.core.reshape.concat import concat diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 390334a89cbfe..125bd9a5e855d 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -13,6 +13,7 @@ from pandas._libs import NaT, groupby as libgroupby, iNaT, lib, reduction from pandas.compat import lzip, range, zip +from pandas.errors import AbstractMethodError from pandas.util._decorators import cache_readonly from pandas.core.dtypes.common import ( @@ -841,7 +842,7 @@ def _chop(self, sdata, slice_obj): return sdata.iloc[slice_obj] def apply(self, f): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) class SeriesSplitter(DataSplitter): diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 1d9d3b1d3bd16..4547f47314bad 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -10,6 +10,7 @@ from pandas._libs.tslibs.timestamps import RoundTo, round_nsint64 import pandas.compat as compat from pandas.compat.numpy import function as nv +from pandas.errors import AbstractMethodError from pandas.util._decorators import Appender, cache_readonly from pandas.core.dtypes.common import ( @@ -21,7 +22,7 @@ from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries from pandas.core.dtypes.missing import isna -from pandas.core import algorithms, common as com, ops +from pandas.core import algorithms, ops from pandas.core.arrays import PeriodArray from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin import pandas.core.indexes.base as ibase @@ -531,7 +532,7 @@ def argmax(self, axis=None, *args, **kwargs): @property def _formatter_func(self): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _format_attrs(self): """ diff --git a/pandas/io/common.py b/pandas/io/common.py index 155cf566b4c40..3a67238a66450 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -10,12 +10,11 @@ import pandas.compat as compat from pandas.compat import BytesIO, StringIO, string_types, text_type from pandas.errors import ( # noqa - DtypeWarning, EmptyDataError, ParserError, ParserWarning) + AbstractMethodError, DtypeWarning, EmptyDataError, ParserError, + ParserWarning) from pandas.core.dtypes.common import is_file_like, is_number -import pandas.core.common as com - from pandas.io.formats.printing import pprint_thing # gh-12665: Alias for now and remove later. @@ -67,7 +66,7 @@ def __iter__(self): return self def __next__(self): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) if not compat.PY3: diff --git a/pandas/io/html.py b/pandas/io/html.py index bcbb07c6dddfb..c967bdd29df1f 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -12,12 +12,11 @@ from pandas.compat import ( binary_type, iteritems, lmap, lrange, raise_with_traceback, string_types, u) -from pandas.errors import EmptyDataError +from pandas.errors import AbstractMethodError, EmptyDataError from pandas.core.dtypes.common import is_list_like from pandas import Series -import pandas.core.common as com from pandas.io.common import _is_url, _validate_header_arg, urlopen from pandas.io.formats.printing import pprint_thing @@ -256,7 +255,7 @@ def _text_getter(self, obj): text : str or unicode The text from an individual DOM node. """ - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _parse_td(self, obj): """Return the td elements from a row element. @@ -271,7 +270,7 @@ def _parse_td(self, obj): list of node-like These are the elements of each row, i.e., the columns. """ - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _parse_thead_tr(self, table): """ @@ -286,7 +285,7 @@ def _parse_thead_tr(self, table): list of node-like These are the row elements of a table. """ - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _parse_tbody_tr(self, table): """ @@ -305,7 +304,7 @@ def _parse_tbody_tr(self, table): list of node-like These are the row elements of a table. """ - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _parse_tfoot_tr(self, table): """ @@ -320,7 +319,7 @@ def _parse_tfoot_tr(self, table): list of node-like These are the row elements of a table. """ - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _parse_tables(self, doc, match, attrs): """ @@ -346,7 +345,7 @@ def _parse_tables(self, doc, match, attrs): list of node-like HTML
elements to be parsed into raw data. """ - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _equals_tag(self, obj, tag): """ @@ -365,7 +364,7 @@ def _equals_tag(self, obj, tag): boolean Whether `obj`'s tag name is `tag` """ - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _build_doc(self): """ @@ -376,7 +375,7 @@ def _build_doc(self): node-like The DOM from which to parse the table element. """ - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _parse_thead_tbody_tfoot(self, table_html): """ diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index af7b390de213d..4453416a97f89 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -7,11 +7,11 @@ import pandas._libs.json as json from pandas._libs.tslibs import iNaT from pandas.compat import StringIO, long, to_str, u +from pandas.errors import AbstractMethodError from pandas.core.dtypes.common import is_period_dtype from pandas import DataFrame, MultiIndex, Series, compat, isna, to_datetime -import pandas.core.common as com from pandas.core.reshape.concat import concat from pandas.io.common import ( @@ -97,7 +97,7 @@ def __init__(self, obj, orient, date_format, double_precision, self._format_axes() def _format_axes(self): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def write(self): return self._write(self.obj, self.orient, self.double_precision, @@ -658,7 +658,7 @@ def _convert_axes(self): setattr(self.obj, axis, new_axis) def _try_convert_types(self): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _try_convert_data(self, name, data, use_dtypes=True, convert_dates=True): @@ -771,7 +771,7 @@ def _try_convert_to_date(self, data): return data, False def _try_convert_dates(self): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) class SeriesParser(Parser): diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 3d72b1ec3a47f..aad59f9805a3b 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -4,9 +4,9 @@ from warnings import catch_warnings from pandas.compat import string_types +from pandas.errors import AbstractMethodError from pandas import DataFrame, get_option -import pandas.core.common as com from pandas.io.common import get_filepath_or_buffer, is_s3_url @@ -67,10 +67,10 @@ def validate_dataframe(df): raise ValueError("Index level names must be strings") def write(self, df, path, compression, **kwargs): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def read(self, path, columns=None, **kwargs): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) class PyArrowImpl(BaseImpl): diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 12914c10e0655..9fd35effe1b07 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -20,7 +20,8 @@ import pandas.compat as compat from pandas.compat import ( PY3, StringIO, lrange, lzip, map, range, string_types, u, zip) -from pandas.errors import EmptyDataError, ParserError, ParserWarning +from pandas.errors import ( + AbstractMethodError, EmptyDataError, ParserError, ParserWarning) from pandas.util._decorators import Appender from pandas.core.dtypes.cast import astype_nansafe @@ -33,7 +34,6 @@ from pandas.core import algorithms from pandas.core.arrays import Categorical -import pandas.core.common as com from pandas.core.frame import DataFrame from pandas.core.index import ( Index, MultiIndex, RangeIndex, ensure_index_from_sequences) @@ -1050,7 +1050,7 @@ def _make_engine(self, engine='c'): self._engine = klass(self.f, **self.options) def _failover_to_python(self): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def read(self, nrows=None): nrows = _validate_integer('nrows', nrows) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 405c534e8528b..1c70ece434abb 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -12,6 +12,7 @@ from pandas.util._decorators import cache_readonly, Appender from pandas.compat import range, lrange, map, zip, string_types import pandas.compat as compat +from pandas.errors import AbstractMethodError import pandas.core.common as com from pandas.core.base import PandasObject @@ -373,7 +374,7 @@ def _compute_plot_data(self): self.data = numeric_data def _make_plot(self): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _add_table(self): if self.table is False: diff --git a/pandas/tests/io/parser/test_parsers.py b/pandas/tests/io/parser/test_parsers.py index 50d927176a7b4..21286e9b82323 100644 --- a/pandas/tests/io/parser/test_parsers.py +++ b/pandas/tests/io/parser/test_parsers.py @@ -6,9 +6,9 @@ from pandas._libs.tslib import Timestamp from pandas.compat import StringIO +from pandas.errors import AbstractMethodError from pandas import DataFrame, read_csv, read_table -import pandas.core.common as com import pandas.util.testing as tm from .c_parser_only import CParserTests @@ -46,7 +46,7 @@ def read_table(self, *args, **kwargs): raise NotImplementedError def float_precision_choices(self): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) @pytest.fixture(autouse=True) def setup_method(self, datapath): diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 756385f0cfb56..7e0342e8b987a 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -24,12 +24,11 @@ notna, Timestamp, Timedelta) from pandas.compat import range, lrange, zip, OrderedDict -from pandas.errors import UnsupportedFunctionCall +from pandas.errors import AbstractMethodError, UnsupportedFunctionCall import pandas.tseries.offsets as offsets from pandas.tseries.offsets import Minute, BDay from pandas.core.groupby.groupby import DataError -import pandas.core.common as com from pandas.core.indexes.datetimes import date_range from pandas.core.indexes.period import period_range, PeriodIndex, Period @@ -599,7 +598,7 @@ def index(self, _index_start, _index_end, _index_freq): @pytest.fixture def _series_name(self): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) @pytest.fixture def _static_values(self, index): diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 25c419e485db1..067a7d4622ca2 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -9,7 +9,6 @@ from pandas.core.dtypes.generic import ABCPeriod from pandas.core.tools.datetimes import to_datetime -import pandas.core.common as com # import after tools, dateutil check from dateutil.easter import easter @@ -29,6 +28,7 @@ roll_yearday, shift_month, BaseOffset) +from pandas.errors import AbstractMethodError __all__ = ['Day', 'BusinessDay', 'BDay', 'CustomBusinessDay', 'CDay', @@ -1097,7 +1097,7 @@ def apply(self, other): def _apply(self, n, other): """Handle specific apply logic for child classes""" - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) @apply_index_wraps def apply_index(self, i): @@ -1137,11 +1137,11 @@ def _get_roll(self, i, before_day_of_month, after_day_of_month): The roll array is based on the fact that i gets rolled back to the first day of the month. """ - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _apply_index_days(self, i, roll): """Apply the correct day for each date in i""" - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) class SemiMonthEnd(SemiMonthOffset): From d3e43f8f1c639dd4af5d5ab852cba27d84c4f124 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 12 Nov 2018 11:02:51 -0600 Subject: [PATCH 105/122] DOC: avoid SparseArray.take error (#23637) Closes https://github.com/pandas-dev/pandas/issues/22215 SparseArray.take not accepting scalars is already in 0.24.0.txt --- doc/source/whatsnew/v0.18.1.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt index 34921505a46bf..2445daebb580a 100644 --- a/doc/source/whatsnew/v0.18.1.txt +++ b/doc/source/whatsnew/v0.18.1.txt @@ -266,7 +266,7 @@ These changes conform sparse handling to return the correct types and work to ma ``SparseArray.take`` now returns a scalar for scalar input, ``SparseArray`` for others. Furthermore, it handles a negative indexer with the same rule as ``Index`` (:issue:`10560`, :issue:`12796`) -.. ipython:: python +.. code-block:: python s = pd.SparseArray([np.nan, np.nan, 1, 2, 3, np.nan, 4, 5, np.nan, 6]) s.take(0) From c7dc40c9fe6831fea3f7a862be5e6cd63a709b5a Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 12 Nov 2018 21:54:49 +0100 Subject: [PATCH 106/122] DOC: clean-up recent doc errors/warnings (#23636) --- doc/source/advanced.rst | 2 +- doc/source/ecosystem.rst | 2 +- doc/source/timeseries.rst | 4 +- doc/source/whatsnew/v0.24.0.txt | 114 +++++++++++++++------------- pandas/_libs/tslibs/timedeltas.pyx | 14 ++-- pandas/core/frame.py | 3 + pandas/core/generic.py | 14 ++-- pandas/core/indexes/datetimelike.py | 3 +- pandas/core/tools/timedeltas.py | 14 ++-- 9 files changed, 91 insertions(+), 79 deletions(-) diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index 24c117a534209..563c869eff54d 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -702,7 +702,7 @@ Index Types We have discussed ``MultiIndex`` in the previous sections pretty extensively. Documentation about ``DatetimeIndex`` and ``PeriodIndex`` are shown :ref:`here `, -and documentation about ``TimedeltaIndex`` is found :ref:`here `. +and documentation about ``TimedeltaIndex`` is found :ref:`here `. In the following sub-sections we will highlight some other index types. diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst index edbd6629a617d..ad389bbe35b71 100644 --- a/doc/source/ecosystem.rst +++ b/doc/source/ecosystem.rst @@ -140,7 +140,7 @@ which are utilized by Jupyter Notebook for displaying (Note: HTML tables may or may not be compatible with non-HTML Jupyter output formats.) -See :ref:`Options and Settings ` and :ref:`options.available ` +See :ref:`Options and Settings ` and :ref:`options.available` for pandas ``display.`` settings. `quantopian/qgrid `__ diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 42fd356bbe65a..cc377f45c4b8d 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -2372,7 +2372,8 @@ can be controlled by the ``nonexistent`` argument. The following options are ava * ``shift``: Shifts nonexistent times forward to the closest real time .. ipython:: python - dti = date_range(start='2015-03-29 01:30:00', periods=3, freq='H') + + dti = pd.date_range(start='2015-03-29 01:30:00', periods=3, freq='H') # 2:30 is a nonexistent time Localization of nonexistent times will raise an error by default. @@ -2385,6 +2386,7 @@ Localization of nonexistent times will raise an error by default. Transform nonexistent times to ``NaT`` or the closest real time forward in time. .. ipython:: python + dti dti.tz_localize('Europe/Warsaw', nonexistent='shift') dti.tz_localize('Europe/Warsaw', nonexistent='NaT') diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index a9a247cc4fc53..358f3e7429394 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -14,11 +14,10 @@ New features ~~~~~~~~~~~~ - :func:`merge` now directly allows merge between objects of type ``DataFrame`` and named ``Series``, without the need to convert the ``Series`` object into a ``DataFrame`` beforehand (:issue:`21220`) - ``ExcelWriter`` now accepts ``mode`` as a keyword argument, enabling append to existing workbooks when using the ``openpyxl`` engine (:issue:`3441`) -- ``FrozenList`` has gained the ``.union()`` and ``.difference()`` methods. This functionality greatly simplifies groupby's that rely on explicitly excluding certain columns. See :ref:`Splitting an object into groups -` for more information (:issue:`15475`, :issue:`15506`) +- ``FrozenList`` has gained the ``.union()`` and ``.difference()`` methods. This functionality greatly simplifies groupby's that rely on explicitly excluding certain columns. See :ref:`Splitting an object into groups ` for more information (:issue:`15475`, :issue:`15506`). - :func:`DataFrame.to_parquet` now accepts ``index`` as an argument, allowing -the user to override the engine's default behavior to include or omit the -dataframe's indexes from the resulting Parquet file. (:issue:`20768`) + the user to override the engine's default behavior to include or omit the + dataframe's indexes from the resulting Parquet file. (:issue:`20768`) - :meth:`DataFrame.corr` and :meth:`Series.corr` now accept a callable for generic calculation methods of correlation, e.g. histogram intersection (:issue:`22684`) @@ -227,7 +226,7 @@ Other Enhancements - :class:`Series` and :class:`DataFrame` now support :class:`Iterable` in constructor (:issue:`2193`) - :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`) - :meth:`round`, :meth:`ceil`, and meth:`floor` for :class:`DatetimeIndex` and :class:`Timestamp` now support an ``ambiguous`` argument for handling datetimes that are rounded to ambiguous times (:issue:`18946`) -- :meth:`round`, :meth:`ceil`, and meth:`floor` for :class:`DatetimeIndex` and :class:`Timestamp` now support a ``nonexistent`` argument for handling datetimes that are rounded to nonexistent times. See :ref:`timeseries.timezone_nonexsistent` (:issue:`22647`) +- :meth:`round`, :meth:`ceil`, and meth:`floor` for :class:`DatetimeIndex` and :class:`Timestamp` now support a ``nonexistent`` argument for handling datetimes that are rounded to nonexistent times. See :ref:`timeseries.timezone_nonexistent` (:issue:`22647`) - :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`). - :meth:`Series.resample` and :meth:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`). - :meth:`pandas.core.dtypes.is_list_like` has gained a keyword ``allow_sets`` which is ``True`` by default; if ``False``, @@ -237,7 +236,7 @@ Other Enhancements - Compatibility with Matplotlib 3.0 (:issue:`22790`). - Added :meth:`Interval.overlaps`, :meth:`IntervalArray.overlaps`, and :meth:`IntervalIndex.overlaps` for determining overlaps between interval-like objects (:issue:`21998`) - :func:`~DataFrame.to_parquet` now supports writing a ``DataFrame`` as a directory of parquet files partitioned by a subset of the columns when ``engine = 'pyarrow'`` (:issue:`23283`) -- :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have gained the ``nonexistent`` argument for alternative handling of nonexistent times. See :ref:`timeseries.timezone_nonexsistent` (:issue:`8917`) +- :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have gained the ``nonexistent`` argument for alternative handling of nonexistent times. See :ref:`timeseries.timezone_nonexistent` (:issue:`8917`) - :meth:`read_excel()` now accepts ``usecols`` as a list of column names or callable (:issue:`18273`) .. _whatsnew_0240.api_breaking: @@ -283,10 +282,10 @@ and replaced it with references to `pyarrow` (:issue:`21639` and :issue:`23053`) .. _whatsnew_0240.api_breaking.csv_line_terminator: `os.linesep` is used for ``line_terminator`` of ``DataFrame.to_csv`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ :func:`DataFrame.to_csv` now uses :func:`os.linesep` rather than ``'\n'`` - for the default line terminator (:issue:`20353`). +for the default line terminator (:issue:`20353`). This change only affects when running on Windows, where ``'\r\n'`` was used for line terminator even when ``'\n'`` was passed in ``line_terminator``. @@ -294,26 +293,26 @@ Previous Behavior on Windows: .. code-block:: ipython -In [1]: data = pd.DataFrame({ - ...: "string_with_lf": ["a\nbc"], - ...: "string_with_crlf": ["a\r\nbc"] - ...: }) + In [1]: data = pd.DataFrame({ + ...: "string_with_lf": ["a\nbc"], + ...: "string_with_crlf": ["a\r\nbc"] + ...: }) -In [2]: # When passing file PATH to to_csv, line_terminator does not work, and csv is saved with '\r\n'. - ...: # Also, this converts all '\n's in the data to '\r\n'. - ...: data.to_csv("test.csv", index=False, line_terminator='\n') + In [2]: # When passing file PATH to to_csv, line_terminator does not work, and csv is saved with '\r\n'. + ...: # Also, this converts all '\n's in the data to '\r\n'. + ...: data.to_csv("test.csv", index=False, line_terminator='\n') -In [3]: with open("test.csv", mode='rb') as f: - ...: print(f.read()) -b'string_with_lf,string_with_crlf\r\n"a\r\nbc","a\r\r\nbc"\r\n' + In [3]: with open("test.csv", mode='rb') as f: + ...: print(f.read()) + b'string_with_lf,string_with_crlf\r\n"a\r\nbc","a\r\r\nbc"\r\n' -In [4]: # When passing file OBJECT with newline option to to_csv, line_terminator works. - ...: with open("test2.csv", mode='w', newline='\n') as f: - ...: data.to_csv(f, index=False, line_terminator='\n') + In [4]: # When passing file OBJECT with newline option to to_csv, line_terminator works. + ...: with open("test2.csv", mode='w', newline='\n') as f: + ...: data.to_csv(f, index=False, line_terminator='\n') -In [5]: with open("test2.csv", mode='rb') as f: - ...: print(f.read()) -b'string_with_lf,string_with_crlf\n"a\nbc","a\r\nbc"\n' + In [5]: with open("test2.csv", mode='rb') as f: + ...: print(f.read()) + b'string_with_lf,string_with_crlf\n"a\nbc","a\r\nbc"\n' New Behavior on Windows: @@ -322,54 +321,54 @@ New Behavior on Windows: - The value of ``line_terminator`` only affects the line terminator of CSV, so it does not change the value inside the data. -.. code-block:: ipython + .. code-block:: ipython -In [1]: data = pd.DataFrame({ - ...: "string_with_lf": ["a\nbc"], - ...: "string_with_crlf": ["a\r\nbc"] - ...: }) + In [1]: data = pd.DataFrame({ + ...: "string_with_lf": ["a\nbc"], + ...: "string_with_crlf": ["a\r\nbc"] + ...: }) -In [2]: data.to_csv("test.csv", index=False, line_terminator='\n') + In [2]: data.to_csv("test.csv", index=False, line_terminator='\n') -In [3]: with open("test.csv", mode='rb') as f: - ...: print(f.read()) -b'string_with_lf,string_with_crlf\n"a\nbc","a\r\nbc"\n' + In [3]: with open("test.csv", mode='rb') as f: + ...: print(f.read()) + b'string_with_lf,string_with_crlf\n"a\nbc","a\r\nbc"\n' - On Windows, the value of ``os.linesep`` is ``'\r\n'``, so if ``line_terminator`` is not set, ``'\r\n'`` is used for line terminator. - Again, it does not affect the value inside the data. -.. code-block:: ipython + .. code-block:: ipython -In [1]: data = pd.DataFrame({ - ...: "string_with_lf": ["a\nbc"], - ...: "string_with_crlf": ["a\r\nbc"] - ...: }) + In [1]: data = pd.DataFrame({ + ...: "string_with_lf": ["a\nbc"], + ...: "string_with_crlf": ["a\r\nbc"] + ...: }) -In [2]: data.to_csv("test.csv", index=False) + In [2]: data.to_csv("test.csv", index=False) -In [3]: with open("test.csv", mode='rb') as f: - ...: print(f.read()) -b'string_with_lf,string_with_crlf\r\n"a\nbc","a\r\nbc"\r\n' + In [3]: with open("test.csv", mode='rb') as f: + ...: print(f.read()) + b'string_with_lf,string_with_crlf\r\n"a\nbc","a\r\nbc"\r\n' - For files objects, specifying ``newline`` is not sufficient to set the line terminator. You must pass in the ``line_terminator`` explicitly, even in this case. -.. code-block:: ipython + .. code-block:: ipython -In [1]: data = pd.DataFrame({ - ...: "string_with_lf": ["a\nbc"], - ...: "string_with_crlf": ["a\r\nbc"] - ...: }) + In [1]: data = pd.DataFrame({ + ...: "string_with_lf": ["a\nbc"], + ...: "string_with_crlf": ["a\r\nbc"] + ...: }) -In [2]: with open("test2.csv", mode='w', newline='\n') as f: - ...: data.to_csv(f, index=False) + In [2]: with open("test2.csv", mode='w', newline='\n') as f: + ...: data.to_csv(f, index=False) -In [3]: with open("test2.csv", mode='rb') as f: - ...: print(f.read()) -b'string_with_lf,string_with_crlf\r\n"a\nbc","a\r\nbc"\r\n' + In [3]: with open("test2.csv", mode='rb') as f: + ...: print(f.read()) + b'string_with_lf,string_with_crlf\r\n"a\nbc","a\r\nbc"\r\n' .. _whatsnew_0240.api_breaking.interval_values: @@ -777,17 +776,20 @@ Previous Behavior: df = pd.DataFrame(arr) .. ipython:: python + # Comparison operations and arithmetic operations both broadcast. df == arr[[0], :] df + arr[[0], :] .. ipython:: python + # Comparison operations and arithmetic operations both broadcast. df == (1, 2) df + (1, 2) .. ipython:: python :okexcept: + # Comparison operations and arithmetic opeartions both raise ValueError. df == (1, 2, 3) df + (1, 2, 3) @@ -797,8 +799,9 @@ Previous Behavior: DataFrame Arithmetic Operations Broadcasting Changes ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + :class:`DataFrame` arithmetic operations when operating with 2-dimensional -``np.ndarray`` objects now broadcast in the same way as ``np.ndarray``s +``np.ndarray`` objects now broadcast in the same way as ``np.ndarray`` broadcast. (:issue:`23000`) Previous Behavior: @@ -817,11 +820,13 @@ Previous Behavior: *Current Behavior*: .. ipython:: python + arr = np.arange(6).reshape(3, 2) df = pd.DataFrame(arr) df .. ipython:: python + df + arr[[0], :] # 1 row, 2 columns df + arr[:, [1]] # 1 column, 3 rows @@ -888,7 +893,7 @@ Current Behavior: ... OverflowError: Trying to coerce negative values to unsigned integers -.. _whatsnew_0240.api.crosstab_dtypes +.. _whatsnew_0240.api.crosstab_dtypes: Crosstab Preserves Dtypes ^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1008,6 +1013,7 @@ Current Behavior: .. ipython:: python :okwarning: + per = pd.Period('2016Q1') per + 3 diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index c09a8e5b395ee..ca8491726a5f7 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1111,14 +1111,14 @@ class Timedelta(_Timedelta): Parameters ---------- value : Timedelta, timedelta, np.timedelta64, string, or integer - unit : string, {'Y', 'M', 'W', 'D', 'days', 'day', - 'hours', hour', 'hr', 'h', 'm', 'minute', 'min', 'minutes', - 'T', 'S', 'seconds', 'sec', 'second', 'ms', - 'milliseconds', 'millisecond', 'milli', 'millis', 'L', - 'us', 'microseconds', 'microsecond', 'micro', 'micros', - 'U', 'ns', 'nanoseconds', 'nano', 'nanos', 'nanosecond' - 'N'}, optional + unit : str, optional Denote the unit of the input, if input is an integer. Default 'ns'. + Possible values: + {'Y', 'M', 'W', 'D', 'days', 'day', 'hours', hour', 'hr', 'h', + 'm', 'minute', 'min', 'minutes', 'T', 'S', 'seconds', 'sec', 'second', + 'ms', 'milliseconds', 'millisecond', 'milli', 'millis', 'L', + 'us', 'microseconds', 'microsecond', 'micro', 'micros', 'U', + 'ns', 'nanoseconds', 'nano', 'nanos', 'nanosecond', 'N'} days, seconds, microseconds, milliseconds, minutes, hours, weeks : numeric, optional Values for construction in compat with datetime.timedelta. diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f8d153327f135..f6f91ff5081f1 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3409,6 +3409,7 @@ def assign(self, **kwargs): Berkeley 25.0 Where the value is a callable, evaluated on `df`: + >>> df.assign(temp_f=lambda x: x.temp_c * 9 / 5 + 32) temp_c temp_f Portland 17.0 62.6 @@ -3416,6 +3417,7 @@ def assign(self, **kwargs): Alternatively, the same behavior can be achieved by directly referencing an existing Series or sequence: + >>> df.assign(temp_f=df['temp_c'] * 9 / 5 + 32) temp_c temp_f Portland 17.0 62.6 @@ -3424,6 +3426,7 @@ def assign(self, **kwargs): In Python 3.6+, you can create multiple columns within the same assign where one of the columns depends on another one defined within the same assign: + >>> df.assign(temp_f=lambda x: x['temp_c'] * 9 / 5 + 32, ... temp_k=lambda x: (x['temp_f'] + 459.67) * 5 / 9) temp_c temp_f temp_k diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2c7f6ae8e3533..2cb3df421325e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6508,16 +6508,16 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False, def asof(self, where, subset=None): """ - Return the last row(s) without any `NaN`s before `where`. + Return the last row(s) without any NaNs before `where`. The last row (for each element in `where`, if list) without any - `NaN` is taken. - In case of a :class:`~pandas.DataFrame`, the last row without `NaN` + NaN is taken. + In case of a :class:`~pandas.DataFrame`, the last row without NaN considering only the subset of columns (if not `None`) .. versionadded:: 0.19.0 For DataFrame - If there is no good value, `NaN` is returned for a Series or + If there is no good value, NaN is returned for a Series or a Series of NaN values for a DataFrame Parameters @@ -6526,7 +6526,7 @@ def asof(self, where, subset=None): Date(s) before which the last row(s) are returned. subset : str or array-like of str, default `None` For DataFrame, if not `None`, only use these columns to - check for `NaN`s. + check for NaNs. Notes ----- @@ -6562,7 +6562,7 @@ def asof(self, where, subset=None): 2.0 For a sequence `where`, a Series is returned. The first value is - ``NaN``, because the first element of `where` is before the first + NaN, because the first element of `where` is before the first index value. >>> s.asof([5, 20]) @@ -6571,7 +6571,7 @@ def asof(self, where, subset=None): dtype: float64 Missing values are not considered. The following is ``2.0``, not - ``NaN``, even though ``NaN`` is at the index location for ``30``. + NaN, even though NaN is at the index location for ``30``. >>> s.asof(30) 2.0 diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 4547f47314bad..2ce6a0ec2a7a4 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -91,6 +91,8 @@ class TimelikeOps(object): :ref:`frequency aliases ` for a list of possible `freq` values. ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise' + Only relevant for DatetimeIndex: + - 'infer' will attempt to infer fall dst-transition hours based on order - bool-ndarray where True signifies a DST time, False designates @@ -99,7 +101,6 @@ class TimelikeOps(object): - 'NaT' will return NaT where there are ambiguous times - 'raise' will raise an AmbiguousTimeError if there are ambiguous times - Only relevant for DatetimeIndex .. versionadded:: 0.24.0 nonexistent : 'shift', 'NaT', default 'raise' diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index fad136b3b5a45..58673c80c0c55 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -21,14 +21,14 @@ def to_timedelta(arg, unit='ns', box=True, errors='raise'): Parameters ---------- arg : string, timedelta, list, tuple, 1-d array, or Series - unit : string, {'Y', 'M', 'W', 'D', 'days', 'day', - 'hours', hour', 'hr', 'h', 'm', 'minute', 'min', 'minutes', - 'T', 'S', 'seconds', 'sec', 'second', 'ms', - 'milliseconds', 'millisecond', 'milli', 'millis', 'L', - 'us', 'microseconds', 'microsecond', 'micro', 'micros', - 'U', 'ns', 'nanoseconds', 'nano', 'nanos', 'nanosecond' - 'N'}, optional + unit : str, optional Denote the unit of the input, if input is an integer. Default 'ns'. + Possible values: + {'Y', 'M', 'W', 'D', 'days', 'day', 'hours', hour', 'hr', 'h', + 'm', 'minute', 'min', 'minutes', 'T', 'S', 'seconds', 'sec', 'second', + 'ms', 'milliseconds', 'millisecond', 'milli', 'millis', 'L', + 'us', 'microseconds', 'microsecond', 'micro', 'micros', 'U', + 'ns', 'nanoseconds', 'nano', 'nanos', 'nanosecond', 'N'} box : boolean, default True - If True returns a Timedelta/TimedeltaIndex of the results - if False returns a np.timedelta64 or ndarray of values of dtype From f7556a419787489c6aca6c019d40cce0626c4a29 Mon Sep 17 00:00:00 2001 From: Ksenia Gueletina Date: Mon, 12 Nov 2018 19:45:50 -0500 Subject: [PATCH 107/122] DOC: Fix name of the See Also section titles in docstrings (#23653) --- pandas/_libs/tslibs/period.pyx | 2 +- pandas/core/accessor.py | 2 +- pandas/core/arrays/categorical.py | 24 ++++++++++++------------ pandas/core/arrays/datetimes.py | 2 +- pandas/core/arrays/period.py | 2 +- pandas/core/base.py | 4 ++-- pandas/core/frame.py | 18 +++++++++--------- pandas/core/generic.py | 8 ++++---- pandas/core/groupby/generic.py | 4 ++-- pandas/core/groupby/groupby.py | 10 +++++----- pandas/core/indexes/base.py | 14 +++++++------- pandas/core/indexes/datetimelike.py | 8 ++++---- pandas/core/indexes/multi.py | 8 ++++---- pandas/core/indexes/numeric.py | 2 +- pandas/core/indexes/range.py | 2 +- pandas/core/ops.py | 8 ++++---- pandas/core/resample.py | 6 +++--- pandas/core/reshape/merge.py | 4 ++-- pandas/core/reshape/util.py | 2 +- pandas/core/series.py | 18 +++++++++--------- pandas/core/tools/datetimes.py | 2 +- pandas/core/tools/numeric.py | 2 +- pandas/core/tools/timedeltas.py | 2 +- pandas/core/window.py | 10 +++++----- pandas/io/json/table_schema.py | 2 +- pandas/io/sql.py | 10 +++++----- 26 files changed, 88 insertions(+), 88 deletions(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index a284d8fb544e7..d651e75674239 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1739,7 +1739,7 @@ cdef class _Period(object): ------- Timestamp - See also + See Also -------- Period.end_time : Return the end Timestamp. Period.dayofyear : Return the day of year. diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index 6694737737562..d178c66da2cc1 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -247,7 +247,7 @@ def plot(self): >>> ds.geo.plot() # plots data on a map -See also +See Also -------- %(others)s """ diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 4363f3ccb14e2..55f0040344bdc 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -313,7 +313,7 @@ class Categorical(ExtensionArray, PandasObject): See the `user guide `_ for more. - See also + See Also -------- pandas.api.types.CategoricalDtype : Type for categorical data CategoricalIndex : An Index with an underlying ``Categorical`` @@ -457,7 +457,7 @@ def categories(self): If the new categories do not validate as categories or if the number of new categories is unequal the number of old categories - See also + See Also -------- rename_categories reorder_categories @@ -823,7 +823,7 @@ def set_categories(self, new_categories, ordered=None, rename=False, ------- cat : Categorical with reordered categories or None if inplace. - See also + See Also -------- rename_categories reorder_categories @@ -894,7 +894,7 @@ def rename_categories(self, new_categories, inplace=False): With ``inplace=False``, the new categorical is returned. With ``inplace=True``, there is no return value. - See also + See Also -------- reorder_categories add_categories @@ -971,7 +971,7 @@ def reorder_categories(self, new_categories, ordered=None, inplace=False): ------- cat : Categorical with reordered categories or None if inplace. - See also + See Also -------- rename_categories add_categories @@ -1010,7 +1010,7 @@ def add_categories(self, new_categories, inplace=False): ------- cat : Categorical with new categories added or None if inplace. - See also + See Also -------- rename_categories reorder_categories @@ -1058,7 +1058,7 @@ def remove_categories(self, removals, inplace=False): ------- cat : Categorical with removed categories or None if inplace. - See also + See Also -------- rename_categories reorder_categories @@ -1100,7 +1100,7 @@ def remove_unused_categories(self, inplace=False): ------- cat : Categorical with unused categories dropped or None if inplace. - See also + See Also -------- rename_categories reorder_categories @@ -1364,7 +1364,7 @@ def isna(self): ------- a boolean array of whether my values are null - See also + See Also -------- isna : top-level isna isnull : alias of isna @@ -1387,7 +1387,7 @@ def notna(self): ------- a boolean array of whether my values are not null - See also + See Also -------- notna : top-level notna notnull : alias of notna @@ -1503,7 +1503,7 @@ def argsort(self, *args, **kwargs): ------- argsorted : numpy array - See also + See Also -------- numpy.ndarray.argsort @@ -2322,7 +2322,7 @@ def repeat(self, repeats, *args, **kwargs): """ Repeat elements of a Categorical. - See also + See Also -------- numpy.ndarray.repeat diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index a6f688fb0cf7a..926228f267049 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -874,7 +874,7 @@ def to_period(self, freq=None): PeriodIndex(['2017-01-01', '2017-01-02'], dtype='period[D]', freq='D') - See also + See Also -------- pandas.PeriodIndex: Immutable ndarray holding ordinal values pandas.DatetimeIndex.to_pydatetime: Return DatetimeIndex as object diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index b343d42ef3b7c..faba404faeb23 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -636,7 +636,7 @@ def repeat(self, repeats, *args, **kwargs): """ Repeat elements of a PeriodArray. - See also + See Also -------- numpy.ndarray.repeat """ diff --git a/pandas/core/base.py b/pandas/core/base.py index de368f52b6f00..5533054fcaf8a 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -820,7 +820,7 @@ def argmax(self, axis=None): """ return a ndarray of the maximum argument indexer - See also + See Also -------- numpy.ndarray.argmax """ @@ -863,7 +863,7 @@ def argmin(self, axis=None): """ return a ndarray of the minimum argument indexer - See also + See Also -------- numpy.ndarray.argmin """ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f6f91ff5081f1..cef811d710a39 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -346,7 +346,7 @@ class DataFrame(NDFrame): 1 4 5 6 2 7 8 9 - See also + See Also -------- DataFrame.from_records : constructor from tuples, also record arrays DataFrame.from_dict : from dicts of Series, arrays, or dicts @@ -1694,7 +1694,7 @@ def from_csv(cls, path, header=0, sep=',', index_col=0, parse_dates=True, datetime format based on the first datetime string. If the format can be inferred, there often will be a large parsing speed-up. - See also + See Also -------- pandas.read_csv @@ -5592,7 +5592,7 @@ def pivot(self, index=None, columns=None, values=None): ------- table : DataFrame - See also + See Also -------- DataFrame.pivot : pivot without aggregation that can handle non-numeric data @@ -5797,7 +5797,7 @@ def unstack(self, level=-1, fill_value=None): .. versionadded:: 0.18.0 - See also + See Also -------- DataFrame.pivot : Pivot a table based on column values. DataFrame.stack : Pivot a level of the column labels (inverse operation @@ -5867,7 +5867,7 @@ def unstack(self, level=-1, fill_value=None): col_level : int or string, optional If columns are a MultiIndex then use this level to melt. - See also + See Also -------- %(other)s pivot_table @@ -6108,7 +6108,7 @@ def _gotitem(self, 3 NaN dtype: float64 - See also + See Also -------- DataFrame.apply : Perform any type of operations. DataFrame.transform : Perform transformation type operations. @@ -6242,7 +6242,7 @@ def apply(self, func, axis=0, broadcast=None, raw=False, reduce=None, side-effects, as they will take effect twice for the first column/row. - See also + See Also -------- DataFrame.applymap: For elementwise operations DataFrame.aggregate: only perform aggregating type operations @@ -6351,7 +6351,7 @@ def applymap(self, func): DataFrame Transformed DataFrame. - See also + See Also -------- DataFrame.apply : Apply a function along input axis of DataFrame @@ -6434,7 +6434,7 @@ def append(self, other, ignore_index=False, those rows to a list and then concatenate the list with the original DataFrame all at once. - See also + See Also -------- pandas.concat : General function to concatenate DataFrame, Series or Panel objects diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2cb3df421325e..80ea084ccd2be 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1850,7 +1850,7 @@ def empty(self): >>> df.dropna().empty True - See also + See Also -------- pandas.Series.dropna pandas.DataFrame.dropna @@ -5309,7 +5309,7 @@ def astype(self, dtype, copy=True, errors='raise', **kwargs): 1 2 dtype: int64 - See also + See Also -------- pandas.to_datetime : Convert argument to datetime. pandas.to_timedelta : Convert argument to timedelta. @@ -10090,7 +10090,7 @@ def _doc_parms(cls): """ _all_see_also = """\ -See also +See Also -------- pandas.Series.all : Return True if all elements are True pandas.DataFrame.any : Return True if one (or more) elements are True @@ -10117,7 +10117,7 @@ def _doc_parms(cls): ------- %(outname)s : %(name1)s or %(name2)s\n %(examples)s -See also +See Also -------- pandas.core.window.Expanding.%(accum_func_name)s : Similar functionality but ignores ``NaN`` values. diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index b0477c7d3a8ad..ee84f8cda07d0 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -734,7 +734,7 @@ def _selection_name(self): 1 1 2 2 3 4 - See also + See Also -------- pandas.Series.groupby.apply pandas.Series.groupby.transform @@ -1289,7 +1289,7 @@ class DataFrameGroupBy(NDFrameGroupBy): 1 1 2 0.590716 2 3 4 0.704907 - See also + See Also -------- pandas.DataFrame.groupby.apply pandas.DataFrame.groupby.transform diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 12327e1cf148e..5041449b4d724 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -43,7 +43,7 @@ class providing the base-class of operations. _doc_template = """ - See also + See Also -------- pandas.Series.%(name)s pandas.DataFrame.%(name)s @@ -91,7 +91,7 @@ class providing the base-class of operations. -------- {examples} - See also + See Also -------- pipe : Apply function to the full GroupBy object instead of to each group. @@ -253,7 +253,7 @@ class providing the base-class of operations. ------- %(klass)s -See also +See Also -------- aggregate, transform @@ -1624,7 +1624,7 @@ def ngroup(self, ascending=True): 5 0 dtype: int64 - See also + See Also -------- .cumcount : Number the rows in each group. """ @@ -1680,7 +1680,7 @@ def cumcount(self, ascending=True): 5 0 dtype: int64 - See also + See Also -------- .ngroup : Number the groups themselves. """ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 263de57d32f31..2b916f35a1173 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -876,7 +876,7 @@ def ravel(self, order='C'): """ return an ndarray of the flattened values of the underlying data - See also + See Also -------- numpy.ndarray.ravel """ @@ -2160,7 +2160,7 @@ def _concat_same_dtype(self, to_concat, name): If allow_fill=True and fill_value is not None, indices specified by -1 is regarded as NA. If Index doesn't hold NA, raise ValueError - See also + See Also -------- numpy.ndarray.take """ @@ -2305,7 +2305,7 @@ def notna(self): numpy.ndarray Boolean array to indicate which entries are not NA. - See also + See Also -------- Index.notnull : alias of notna Index.isna: inverse of notna @@ -2338,7 +2338,7 @@ def putmask(self, mask, value): """ return a new Index of the values set with the mask - See also + See Also -------- numpy.ndarray.putmask """ @@ -2695,7 +2695,7 @@ def argsort(self, *args, **kwargs): Integer indices that would sort the index if used as an indexer. - See also + See Also -------- numpy.argsort : Similar method for NumPy arrays. Index.sort_values : Return sorted copy of Index. @@ -3202,7 +3202,7 @@ def _get_level_values(self, level): values : Index Calling object, as there is only one level in the Index. - See also + See Also -------- MultiIndex.get_level_values : get values for a level of a MultiIndex @@ -3607,7 +3607,7 @@ def isin(self, values, level=None): is_contained : ndarray NumPy array of boolean values. - See also + See Also -------- Series.isin : Same for Series. DataFrame.isin : Same method for DataFrames. diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 2ce6a0ec2a7a4..3f9a60f6d5c51 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -436,7 +436,7 @@ def min(self, axis=None, *args, **kwargs): Return the minimum value of the Index or minimum along an axis. - See also + See Also -------- numpy.ndarray.min """ @@ -465,7 +465,7 @@ def argmin(self, axis=None, *args, **kwargs): See `numpy.ndarray.argmin` for more information on the `axis` parameter. - See also + See Also -------- numpy.ndarray.argmin """ @@ -486,7 +486,7 @@ def max(self, axis=None, *args, **kwargs): Return the maximum value of the Index or maximum along an axis. - See also + See Also -------- numpy.ndarray.max """ @@ -515,7 +515,7 @@ def argmax(self, axis=None, *args, **kwargs): See `numpy.ndarray.argmax` for more information on the `axis` parameter. - See also + See Also -------- numpy.ndarray.argmax """ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 01304cce507f0..7a188dd7ba299 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1176,7 +1176,7 @@ def to_frame(self, index=True, name=None): ------- DataFrame : a DataFrame containing the original MultiIndex data. - See also + See Also -------- DataFrame """ @@ -2204,7 +2204,7 @@ def get_loc(self, key, method=None): or a sequence of such. If you want to use those, use :meth:`MultiIndex.get_locs` instead. - See also + See Also -------- Index.get_loc : get_loc method for (single-level) index. MultiIndex.slice_locs : Get slice location given start label(s) and @@ -2530,7 +2530,7 @@ def get_locs(self, seq): >>> mi.get_locs([[True, False, True], slice('e', 'f')]) array([2], dtype=int64) - See also + See Also -------- MultiIndex.get_loc : Get location for a label or a tuple of labels. MultiIndex.slice_locs : Get slice location given start label(s) and @@ -2657,7 +2657,7 @@ def equals(self, other): Determines if two MultiIndex objects have the same labeling information (the levels themselves do not necessarily have to be the same) - See also + See Also -------- equal_levels """ diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 795ffeefa1794..bdd0bf40507f4 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -155,7 +155,7 @@ def insert(self, loc, item): ----- An Index instance can **only** contain hashable objects. - See also + See Also -------- Index : The base pandas Index type """ diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index e4c177a08462e..e689eb4a7d84a 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -305,7 +305,7 @@ def argsort(self, *args, **kwargs): ------- argsorted : numpy array - See also + See Also -------- numpy.ndarray.argsort """ diff --git a/pandas/core/ops.py b/pandas/core/ops.py index fbfdfb9c01237..1f422a6098fa0 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -88,7 +88,7 @@ def _maybe_match_name(a, b): ------- name : str or None - See also + See Also -------- pandas.core.common.consensus_name_attr """ @@ -609,7 +609,7 @@ def _get_op_name(op, special): e NaN dtype: float64 -See also +See Also -------- Series.{reverse} """ @@ -673,7 +673,7 @@ def _get_op_name(op, special): -------- {df_examples} -See also +See Also -------- DataFrame.{reverse} """ @@ -692,7 +692,7 @@ def _get_op_name(op, special): ------- Panel -See also +See Also -------- Panel.{reverse} """ diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 36476a8ecb657..0a275c7a3575b 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -239,7 +239,7 @@ def pipe(self, func, *args, **kwargs): 2013-01-01 00:00:02 7 4.949747 2013-01-01 00:00:04 5 NaN - See also + See Also -------- pandas.DataFrame.groupby.aggregate pandas.DataFrame.resample.transform @@ -983,7 +983,7 @@ def _upsample(self, method, limit=None, fill_value=None): fill_value : scalar, default None Value to use for missing values - See also + See Also -------- .fillna @@ -1113,7 +1113,7 @@ def _upsample(self, method, limit=None, fill_value=None): fill_value : scalar, default None Value to use for missing values - See also + See Also -------- .fillna diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index e09cf0a527ff9..3d6f55c907269 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -199,7 +199,7 @@ def merge_ordered(left, right, on=None, The output type will the be same as 'left', if it is a subclass of DataFrame. - See also + See Also -------- merge merge_asof @@ -447,7 +447,7 @@ def merge_asof(left, right, on=None, 3 2016-05-25 13:30:00.048 GOOG 720.92 100 NaN NaN 4 2016-05-25 13:30:00.048 AAPL 98.00 100 NaN NaN - See also + See Also -------- merge merge_ordered diff --git a/pandas/core/reshape/util.py b/pandas/core/reshape/util.py index 07f7272398777..7f43a0e9719b8 100644 --- a/pandas/core/reshape/util.py +++ b/pandas/core/reshape/util.py @@ -26,7 +26,7 @@ def cartesian_product(X): [array(['A', 'A', 'B', 'B', 'C', 'C'], dtype='|S1'), array([1, 2, 1, 2, 1, 2])] - See also + See Also -------- itertools.product : Cartesian product of input iterables. Equivalent to nested for-loops. diff --git a/pandas/core/series.py b/pandas/core/series.py index 20e4720a3bde7..6617bf8500a1c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -475,7 +475,7 @@ def ravel(self, order='C'): """ Return the flattened underlying data as an ndarray - See also + See Also -------- numpy.ndarray.ravel """ @@ -487,7 +487,7 @@ def compress(self, condition, *args, **kwargs): .. deprecated:: 0.24.0 - See also + See Also -------- numpy.ndarray.compress """ @@ -538,7 +538,7 @@ def put(self, *args, **kwargs): Applies the `put` method to its `values` attribute if it has one. - See also + See Also -------- numpy.ndarray.put """ @@ -992,7 +992,7 @@ def repeat(self, repeats, *args, **kwargs): Repeat elements of an Series. Refer to `numpy.ndarray.repeat` for more information about the `repeats` argument. - See also + See Also -------- numpy.ndarray.repeat """ @@ -2181,7 +2181,7 @@ def append(self, to_append, ignore_index=False, verify_integrity=False): list and then concatenate the list with the original Series all at once. - See also + See Also -------- pandas.concat : General function to concatenate DataFrame, Series or Panel objects @@ -2784,7 +2784,7 @@ def argsort(self, axis=0, kind='quicksort', order=None): ------- argsorted : Series, with -1 indicated where nan values are present - See also + See Also -------- numpy.ndarray.argsort """ @@ -3198,7 +3198,7 @@ def _gotitem(self, key, ndim, subset=None): max 4 dtype: int64 - See also + See Also -------- pandas.Series.apply : Invoke function on a Series. pandas.Series.transform : Transform function producing @@ -3263,7 +3263,7 @@ def apply(self, func, convert_dtype=True, args=(), **kwds): ------- y : Series or DataFrame if func returns a Series - See also + See Also -------- Series.map: For element-wise operations Series.agg: only perform aggregating type operations @@ -3891,7 +3891,7 @@ def from_csv(cls, path, sep=',', parse_dates=True, header=None, datetime format based on the first datetime string. If the format can be inferred, there often will be a large parsing speed-up. - See also + See Also -------- pandas.read_csv diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index dcba51d26980f..0eb2ffeab28f1 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -543,7 +543,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, 1 1960-01-03 2 1960-01-04 - See also + See Also -------- pandas.DataFrame.astype : Cast argument to a specified dtype. pandas.to_timedelta : Convert argument to timedelta. diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index 4bb5c223d1bcc..12699927141cb 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -88,7 +88,7 @@ def to_numeric(arg, errors='raise', downcast=None): 3 -3.0 dtype: float64 - See also + See Also -------- pandas.DataFrame.astype : Cast argument to a specified dtype. pandas.to_datetime : Convert argument to datetime. diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 58673c80c0c55..db93820c6942f 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -68,7 +68,7 @@ def to_timedelta(arg, unit='ns', box=True, errors='raise'): TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'], dtype='timedelta64[ns]', freq=None) - See also + See Also -------- pandas.DataFrame.astype : Cast argument to a specified dtype. pandas.to_datetime : Convert argument to datetime. diff --git a/pandas/core/window.py b/pandas/core/window.py index be28a3bcccec6..b250851a18f4d 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -50,7 +50,7 @@ ------- same type as input -See also +See Also -------- pandas.Series.%(name)s pandas.DataFrame.%(name)s @@ -733,7 +733,7 @@ def f(arg, *args, **kwargs): 8 -0.096361 0.818139 0.472290 9 0.070889 0.134399 -0.031308 - See also + See Also -------- pandas.DataFrame.rolling.aggregate pandas.DataFrame.aggregate @@ -1640,7 +1640,7 @@ def _validate_freq(self): 8 -0.289082 -1.647453 9 0.212668 -1.647453 - See also + See Also -------- pandas.Series.rolling pandas.DataFrame.rolling @@ -1916,7 +1916,7 @@ def _get_window(self, other=None): 8 0.067236 0.948257 0.163353 9 -0.286980 0.618493 -0.694496 - See also + See Also -------- pandas.DataFrame.expanding.aggregate pandas.DataFrame.rolling.aggregate @@ -2219,7 +2219,7 @@ def _constructor(self): 8 0.067236 0.948257 0.163353 9 -0.286980 0.618493 -0.694496 - See also + See Also -------- pandas.DataFrame.rolling.aggregate diff --git a/pandas/io/json/table_schema.py b/pandas/io/json/table_schema.py index 3b4ebb638412e..5cbecaf143295 100644 --- a/pandas/io/json/table_schema.py +++ b/pandas/io/json/table_schema.py @@ -290,7 +290,7 @@ def parse_table_schema(json, precise_float): :class:`Index` name of 'index' and :class:`MultiIndex` names starting with 'level_' are not supported. - See also + See Also -------- build_table_schema : inverse function pandas.read_json diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 2f411a956dfb8..db405390431c9 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -222,7 +222,7 @@ def read_sql_table(table_name, con, schema=None, index_col=None, ----- Any datetime values with time zone information will be converted to UTC. - See also + See Also -------- read_sql_query : Read SQL query into a DataFrame. read_sql @@ -302,7 +302,7 @@ def read_sql_query(sql, con, index_col=None, coerce_float=True, params=None, Any datetime values with time zone information parsed via the `parse_dates` parameter will be converted to UTC. - See also + See Also -------- read_sql_table : Read SQL database table into a DataFrame. read_sql @@ -366,7 +366,7 @@ def read_sql(sql, con, index_col=None, coerce_float=True, params=None, ------- DataFrame - See also + See Also -------- read_sql_table : Read SQL database table into a DataFrame. read_sql_query : Read SQL query into a DataFrame. @@ -1002,7 +1002,7 @@ def read_table(self, table_name, index_col=None, coerce_float=True, ------- DataFrame - See also + See Also -------- pandas.read_sql_table SQLDatabase.read_query @@ -1063,7 +1063,7 @@ def read_query(self, sql, index_col=None, coerce_float=True, ------- DataFrame - See also + See Also -------- read_sql_table : Read SQL database table into a DataFrame read_sql From 951041eed49a8e8c9785a197b5967e0c64108e85 Mon Sep 17 00:00:00 2001 From: Benoit Paquet Date: Mon, 12 Nov 2018 21:11:20 -0500 Subject: [PATCH 108/122] CI: Allow to compile docs with ipython 7.11 #22990 (#23655) --- ci/deps/travis-36-doc.yaml | 2 +- doc/source/conf.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/deps/travis-36-doc.yaml b/ci/deps/travis-36-doc.yaml index ce095b887f189..6bf8cb38e0b7c 100644 --- a/ci/deps/travis-36-doc.yaml +++ b/ci/deps/travis-36-doc.yaml @@ -11,7 +11,7 @@ dependencies: - html5lib - hypothesis>=3.58.0 - ipykernel - - ipython==6.5.0 + - ipython - ipywidgets - lxml - matplotlib diff --git a/doc/source/conf.py b/doc/source/conf.py index b0501eaf54dc2..3b0b51dd0d648 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -388,6 +388,7 @@ category=FutureWarning) +ipython_warning_is_error = False ipython_exec_lines = [ 'import numpy as np', 'import pandas as pd', From d5d6d916680531cbdb21efe2a390bcea2cca99f4 Mon Sep 17 00:00:00 2001 From: Jeremy Schendel Date: Tue, 13 Nov 2018 06:48:57 -0700 Subject: [PATCH 109/122] TST: IntervalTree.get_loc_interval should return platform int (#23660) --- pandas/_libs/intervaltree.pxi.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/intervaltree.pxi.in b/pandas/_libs/intervaltree.pxi.in index aa53f5086b894..7be3bdbc1048a 100644 --- a/pandas/_libs/intervaltree.pxi.in +++ b/pandas/_libs/intervaltree.pxi.in @@ -116,7 +116,7 @@ cdef class IntervalTree(IntervalMixin): enclosing = self.get_loc(0.5 * (key_left + key_right)) combined = np.concatenate([left_overlap, right_overlap, enclosing]) uniques = pd.unique(combined) - return uniques + return uniques.astype('intp') def get_indexer(self, scalar_t[:] target): """Return the positions corresponding to unique intervals that overlap From 42420775505b59637e006f7892890053c16504bd Mon Sep 17 00:00:00 2001 From: gfyoung Date: Tue, 13 Nov 2018 05:50:45 -0800 Subject: [PATCH 110/122] CLN: Move to_excel to generic.py (#23656) --- pandas/core/frame.py | 18 ------------------ pandas/core/generic.py | 34 +++++++++++++++++++++++++++------- pandas/core/series.py | 13 ------------- 3 files changed, 27 insertions(+), 38 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cef811d710a39..3a8ad3f98f8e0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1829,24 +1829,6 @@ def to_panel(self): return self._constructor_expanddim(new_mgr) - @Appender(_shared_docs['to_excel'] % _shared_doc_kwargs) - def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', - float_format=None, columns=None, header=True, index=True, - index_label=None, startrow=0, startcol=0, engine=None, - merge_cells=True, encoding=None, inf_rep='inf', verbose=True, - freeze_panes=None): - - from pandas.io.formats.excel import ExcelFormatter - formatter = ExcelFormatter(self, na_rep=na_rep, cols=columns, - header=header, - float_format=float_format, index=index, - index_label=index_label, - merge_cells=merge_cells, - inf_rep=inf_rep) - formatter.write(excel_writer, sheet_name=sheet_name, startrow=startrow, - startcol=startcol, freeze_panes=freeze_panes, - engine=engine) - @deprecate_kwarg(old_arg_name='encoding', new_arg_name=None) def to_stata(self, fname, convert_dates=None, write_index=True, encoding="latin-1", byteorder=None, time_stamp=None, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 80ea084ccd2be..d5cc56d9f881d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1977,16 +1977,17 @@ def _repr_latex_(self): # I/O Methods _shared_docs['to_excel'] = """ - Write %(klass)s to an excel sheet. + Write %(klass)s to an Excel sheet. - To write a single %(klass)s to an excel .xlsx file it is only necessary to + To write a single %(klass)s to an Excel .xlsx file it is only necessary to specify a target file name. To write to multiple sheets it is necessary to create an `ExcelWriter` object with a target file name, and specify a sheet - in the file to write to. Multiple sheets may be written to by - specifying unique `sheet_name`. With all data written to the file it is - necessary to save the changes. Note that creating an ExcelWriter object - with a file name that already exists will result in the contents of the - existing file being erased. + in the file to write to. + + Multiple sheets may be written to by specifying unique `sheet_name`. + With all data written to the file it is necessary to save the changes. + Note that creating an `ExcelWriter` object with a file name that already + exists will result in the contents of the existing file being erased. Parameters ---------- @@ -9951,6 +9952,25 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, if path_or_buf is None: return formatter.path_or_buf.getvalue() + @Appender(_shared_docs["to_excel"] % dict(klass="object")) + def to_excel(self, excel_writer, sheet_name="Sheet1", na_rep="", + float_format=None, columns=None, header=True, index=True, + index_label=None, startrow=0, startcol=0, engine=None, + merge_cells=True, encoding=None, inf_rep="inf", verbose=True, + freeze_panes=None): + df = self if isinstance(self, ABCDataFrame) else self.to_frame() + + from pandas.io.formats.excel import ExcelFormatter + formatter = ExcelFormatter(df, na_rep=na_rep, cols=columns, + header=header, + float_format=float_format, index=index, + index_label=index_label, + merge_cells=merge_cells, + inf_rep=inf_rep) + formatter.write(excel_writer, sheet_name=sheet_name, startrow=startrow, + startcol=startcol, freeze_panes=freeze_panes, + engine=engine) + def _doc_parms(cls): """Return a tuple of the doc parms.""" diff --git a/pandas/core/series.py b/pandas/core/series.py index 6617bf8500a1c..9a3873a41a2da 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3970,19 +3970,6 @@ def to_csv(self, *args, **kwargs): kwargs["header"] = False # Backwards compatibility. return self.to_frame().to_csv(**kwargs) - @Appender(generic._shared_docs['to_excel'] % _shared_doc_kwargs) - def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', - float_format=None, columns=None, header=True, index=True, - index_label=None, startrow=0, startcol=0, engine=None, - merge_cells=True, encoding=None, inf_rep='inf', verbose=True): - df = self.to_frame() - df.to_excel(excel_writer=excel_writer, sheet_name=sheet_name, - na_rep=na_rep, float_format=float_format, columns=columns, - header=header, index=index, index_label=index_label, - startrow=startrow, startcol=startcol, engine=engine, - merge_cells=merge_cells, encoding=encoding, - inf_rep=inf_rep, verbose=verbose) - @Appender(generic._shared_docs['isna'] % _shared_doc_kwargs) def isna(self): return super(Series, self).isna() From c1640c6ac97e2579934cb9fdf23dbafcadabb5f7 Mon Sep 17 00:00:00 2001 From: William Ayd Date: Tue, 13 Nov 2018 08:57:18 -0800 Subject: [PATCH 111/122] Add to_flat_index method to MultiIndex (#22866) --- doc/source/api.rst | 1 + doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/indexes/base.py | 20 +++++++++++++ pandas/core/indexes/multi.py | 29 +++++++++++++++++++ pandas/tests/indexes/multi/test_conversion.py | 8 +++++ pandas/tests/indexes/test_base.py | 8 +++++ 6 files changed, 67 insertions(+) diff --git a/doc/source/api.rst b/doc/source/api.rst index 665649aead33c..81bb420c47a99 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1724,6 +1724,7 @@ MultiIndex Components MultiIndex.set_levels MultiIndex.set_labels MultiIndex.to_hierarchical + MultiIndex.to_flat_index MultiIndex.to_frame MultiIndex.is_lexsorted MultiIndex.sortlevel diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 358f3e7429394..3664bed1b3916 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -238,6 +238,7 @@ Other Enhancements - :func:`~DataFrame.to_parquet` now supports writing a ``DataFrame`` as a directory of parquet files partitioned by a subset of the columns when ``engine = 'pyarrow'`` (:issue:`23283`) - :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have gained the ``nonexistent`` argument for alternative handling of nonexistent times. See :ref:`timeseries.timezone_nonexistent` (:issue:`8917`) - :meth:`read_excel()` now accepts ``usecols`` as a list of column names or callable (:issue:`18273`) +- :meth:`MultiIndex.to_flat_index` has been added to flatten multiple levels into a single-level :class:`Index` object. .. _whatsnew_0240.api_breaking: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 2b916f35a1173..ff2562a4480bc 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1113,6 +1113,26 @@ def _format_attrs(self): """ return format_object_attrs(self) + def to_flat_index(self): + """ + Identity method. + + .. versionadded:: 0.24.0 + + This is implemented for compatability with subclass implementations + when chaining. + + Returns + ------- + pd.Index + Caller. + + See Also + -------- + MultiIndex.to_flat_index : Subclass implementation. + """ + return self + def to_series(self, index=None, name=None): """ Create a Series with both index and values equal to the index keys diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 7a188dd7ba299..310e7c2bd95d7 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -193,6 +193,7 @@ class MultiIndex(Index): set_levels set_labels to_frame + to_flat_index is_lexsorted sortlevel droplevel @@ -1246,6 +1247,34 @@ def to_hierarchical(self, n_repeat, n_shuffle=1): FutureWarning, stacklevel=2) return MultiIndex(levels=levels, labels=labels, names=names) + def to_flat_index(self): + """ + Convert a MultiIndex to an Index of Tuples containing the level values. + + .. versionadded:: 0.24.0 + + Returns + ------- + pd.Index + Index with the MultiIndex data represented in Tuples. + + Notes + ----- + This method will simply return the caller if called by anything other + than a MultiIndex. + + Examples + -------- + >>> index = pd.MultiIndex.from_product( + ... [['foo', 'bar'], ['baz', 'qux']], + ... names=['a', 'b']) + >>> index.to_flat_index() + Index([('foo', 'baz'), ('foo', 'qux'), + ('bar', 'baz'), ('bar', 'qux')], + dtype='object') + """ + return Index(self.values, tupleize_cols=False) + @property def is_all_dates(self): return False diff --git a/pandas/tests/indexes/multi/test_conversion.py b/pandas/tests/indexes/multi/test_conversion.py index 79494a7c77cbd..fb734b016518e 100644 --- a/pandas/tests/indexes/multi/test_conversion.py +++ b/pandas/tests/indexes/multi/test_conversion.py @@ -170,3 +170,11 @@ def test_to_series_with_arguments(idx): assert s.values is not idx.values assert s.index is not idx assert s.name != idx.name + + +def test_to_flat_index(idx): + expected = pd.Index((('foo', 'one'), ('foo', 'two'), ('bar', 'one'), + ('baz', 'two'), ('qux', 'one'), ('qux', 'two')), + tupleize_cols=False) + result = idx.to_flat_index() + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 4a3efe22926f7..619f60a42e0be 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -2266,6 +2266,14 @@ def test_tab_complete_warning(self, ip): with provisionalcompleter('ignore'): list(ip.Completer.completions('idx.', 4)) + def test_to_flat_index(self, indices): + # 22866 + if isinstance(indices, MultiIndex): + pytest.skip("Separate expectation for MultiIndex") + + result = indices.to_flat_index() + tm.assert_index_equal(result, indices) + class TestMixedIntIndex(Base): # Mostly the tests from common.py for which the results differ From 8e4bf4cf5ec700b147cd5d97bd7911ad53fe2a9b Mon Sep 17 00:00:00 2001 From: gfyoung Date: Tue, 13 Nov 2018 09:32:51 -0800 Subject: [PATCH 112/122] BUG: Fix read_excel w/parse_cols & empty dataset (#23661) Closes gh-9208. --- doc/source/whatsnew/v0.24.0.txt | 6 +++--- pandas/io/excel.py | 19 +++++++++++-------- pandas/tests/io/data/test1.xls | Bin 30720 -> 28160 bytes pandas/tests/io/data/test1.xlsm | Bin 45056 -> 13072 bytes pandas/tests/io/data/test1.xlsx | Bin 44929 -> 12982 bytes pandas/tests/io/test_excel.py | 10 ++++++++++ 6 files changed, 24 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 3664bed1b3916..19af38954e282 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1264,9 +1264,6 @@ MultiIndex I/O ^^^ -- Bug in :meth:`to_sql` when writing timezone aware data (``datetime64[ns, tz]`` dtype) would raise a ``TypeError`` (:issue:`9086`) -- Bug in :meth:`to_sql` where a naive DatetimeIndex would be written as ``TIMESTAMP WITH TIMEZONE`` type in supported databases, e.g. PostgreSQL (:issue:`23510`) - .. _whatsnew_0240.bug_fixes.nan_with_str_dtype: Proper handling of `np.NaN` in a string data-typed column with the Python engine @@ -1302,6 +1299,9 @@ Current Behavior: Notice how we now instead output ``np.nan`` itself instead of a stringified form of it. +- Bug in :meth:`to_sql` when writing timezone aware data (``datetime64[ns, tz]`` dtype) would raise a ``TypeError`` (:issue:`9086`) +- Bug in :meth:`to_sql` where a naive DatetimeIndex would be written as ``TIMESTAMP WITH TIMEZONE`` type in supported databases, e.g. PostgreSQL (:issue:`23510`) +- Bug in :meth:`read_excel()` when ``parse_cols`` is specified with an empty dataset (:issue:`9208`) - :func:`read_html()` no longer ignores all-whitespace ```` within ```` when considering the ``skiprows`` and ``header`` arguments. Previously, users had to decrease their ``header`` and ``skiprows`` values on such tables to work around the issue. (:issue:`21641`) - :func:`read_excel()` will correctly show the deprecation warning for previously deprecated ``sheetname`` (:issue:`17994`) - :func:`read_csv()` and func:`read_table()` will throw ``UnicodeError`` and not coredump on badly encoded strings (:issue:`22748`) diff --git a/pandas/io/excel.py b/pandas/io/excel.py index c25a7670cce44..a7e0e48de0a75 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -634,14 +634,17 @@ def _parse_cell(cell_contents, cell_typ): else: offset = 1 + max(header) - for col in index_col: - last = data[offset][col] - - for row in range(offset + 1, len(data)): - if data[row][col] == '' or data[row][col] is None: - data[row][col] = last - else: - last = data[row][col] + # Check if we have an empty dataset + # before trying to collect data. + if offset < len(data): + for col in index_col: + last = data[offset][col] + + for row in range(offset + 1, len(data)): + if data[row][col] == '' or data[row][col] is None: + data[row][col] = last + else: + last = data[row][col] has_index_names = is_list_like(header) and len(header) > 1 diff --git a/pandas/tests/io/data/test1.xls b/pandas/tests/io/data/test1.xls index db0f9dec7d5e42c87dc0b0d297b66305f7af4225..a5940b2cfa6c2481955bca02e074fdb910c96c67 100644 GIT binary patch literal 28160 zcmeHQ2V4|Mm#-PpkOTxV%MeA$pkN+?S%MMVRYZg#C<03`tRn1U!hndZC}Ng1hsCr; zPz;NXyCy`;7*SD7$hx9)?{&{WPfx(_zPtO~-H*cR?s{GIf3IG>dKJ2-z5JKagN?tn zt0b(@kEoHaVr`N}`~sJCy@ev__xf16qh7(PR>6C7SSwFB~ID6iK3VW6$A0Zb~ zjyoW&YK%KjbE5{C6mh{EEt<^Hr^y`sn#jQ+0+qGJx^!zI2Z0=vy(!TcVrVysq0GNWrC%a0O`!BQY-Wa>`rpszJD zm6mVdD=kd1ZY=qLm_V-}q*lzQ&h(*}k96qdH2s70mTEdWKMIGHUl|=y73Kf`Oy_`i zYz(wM+X8{o!vV=1^$be?^7Oq*=*sf%RFZ$Y68d^2^j%8m%H&z8B!8X~x-xua^8BU* z=f6sxRZ8$_dD_Ue*GVeBJpG^&`e7yX{YvO`edY0Kd0NQHvzx+cDW&Ha8RhEc>Jo`v zVUMTmEhi$P$gIeWsMGRjfDfVQe?tZ;(DtbT?o2V=UJ1RcLb@qnQaCLC0255|0oM&I zy1q1jL%-NT)k{Z8zfOk3r5VfzDY~vKT~A7f3!3kB?0jjUMmm23oyAuR@+*=D=EXEl zgFb0{GnUe?lZkNIPKs|Jr8me=+ds^lX`Fl`BU4>dXcxMl8cFGZPqz!*uQgz#SEPS! zX?~XcFq%s18%jvO2K}^>(pmWlF$G`GCqx}GP@$MVn`c61W5YBAFLL$#*{>-)2a+N~ z?+ZD&L?6ZrHMqivgDnhGU0cA&&Pm~Uu?oPj3fYtu;PNC$C=5NM^7-;0Zr=hT7N`d4 z_AMZ;Zn7Zk`s4andHuQaqQfvPsXw(^2qj{s!no>Cf4p(7=1;Nm`rFEY*$#Pb6dH{$P)to4`Krl^-B%;*~AX<_trlzKf`I3OmjsQ%WHiHJY zsGy^OmcnBT)f@6Mg%T(h>fO6{azY6N0y&|WG^tQXlgLD*LNRIbLcyevA=E!9O4O}L zD5h5aJx#HDV-FCD?v1E@IiaM?#A%!cP081+YeUVj|6@>S(gg(t4Iok*V5(Nij7j@; zb+bFB0K}j>Ce{qvO4bHacr#d*3{!CMSknF{7K<7RNQEsdEEE$-Eu_LyG>ODgVVN|k zZ8Xq;i@XoKq^-iHQt|Qh%Zv1SaN9S4ZTl8ifBXAg*qzK|0HTKti!jaDuN~ zCpevNvWbNeszKU+3y8>9HHi7QfVjFbK~`{E5?fTk!)0* zByiL_iS=P0&Y4&fK5>BPMZ(}u2>b51w~Pwlz;3pdx-g=m@|{=%uhBYSu(yV>oQrIX z*lb+o*;uGxL$6mTHX6c+`o}jEvuVj@<0j9>Mg<#sMMJUC6h^#y^-3`tV>X*^@@%@Q zU_-BYC^lNc2=P0SVm2mhHr?gfII3WSt0GC;@PrX}U)CvR(~8Z;U7n4r3O4SIY_x?D z4{L5IW@CzMx~aA_Ghoy08!S!ad*IwN#cW!$*(k6yOBHOGmL~H3`{HiJY}&BdD6q87 zD%dbBP2_v~_HD&%+OpXwurxarY?ziN@)bW_t(Z+aHX8+&=A?oR)6zt~C(2(dX49U{ zMuDZdsbIsjG?DMQ2L}|h>40q9Ra+V#*tma#rMbFgZdcMa9ocLYSelgzHcU%%b^EYy zuHxEsVzW_TX|^iZFfGm1t+cdMF&i^B8wHkTuYwKJ(p=rd6%mTr@Y!q>Semm6HcU%% zbz5`#sbV(fY&Hrkt(yuqOiOch+kQDy5gR>tkOlLuhH=x2HEwnkYbhNo3F)73h6crf z5`tu`)FYOSpkPsKP;(Y2(~1armExip)T$8_?1v3%%>rfG451d=)Tqvu)VdKAtc4BQ zg$2sA5U6r7Q7oxVBPiGg8`OaX%CrYU#;qw+4BEL76fA)a>cRqLwmu>8y9X%(Y#H-xQy*f#eGjR{MR2~CQQ zi_PZ!Bn!wj73&iE0e=`thR6J&F!hQiaWDnL?JS&0ASmuyNg3cS62(9hp10#o+|S@Z zW@d^flhT$+t>7BK^H$K9uYH;( zk1xC=K~)n# z1U|e{BZNCiAPg6#+q5NMV@u3Soj9jygLJ)EklNywWFUZGbH$PA01^&hg8?!gt`mS7 z4UDEp>ZlDfec0p~6c-2P-dh%m(@v~T1_4MM%iwYQTq+TWES0DQtR`R`GFTWI9vvIW zhZP5_x@v~dK7&CD>_I3+BuGZDG$=!YZr6YGWEu_)S_>qj-RTN$3*yEE2M@{yH&Osj zdr>9eV!@{0E1@C?-i-+ko|FyVQUN&aO_hMt6AUf|-kk{!9+l-*)QlB?qgSQdU5lv} zw0-w9p1kdIc@&7^R@NaNU_xX_Y46^6H#&BTFkIq!hOvZc10E4yaY7TNO}Ekas5SJ11}`Y}d$@V615Z*de+!dM!nHa(Eget#3X&vT5jQB#nT|Eo5bdx6(8L z4Erm(CWJ!gqz&(zfZ_VbB?+^6lVpo@d*E(t)?(z%oZiC&hraTx3hB89-}{24hG9<)XgiI|OG1 zSBLbYAmWm+!C9LeYHcY9R5;y2>XZ%1Y-FI`rLKmqtu%!eWdv`W!PD%Z(1cj@YP260 z$O_8Ug&`pbI3$4GN*$Xb|BbT+O2e7Fu_TkR8u9N!r8sr^sE+IQ#5adY0 zapOFTWc|-3>-8zuW9~DG4oycnP zJ`suGA4U<#v6`%gw0O0MfJ-6ZSBP7Ln7D~VKnFQ7u7(&vNJPYHvU2500vU+gY9N4{ zb%VH?ia>LNvOpMb*aGKi5U`dQCQ@&f&V$yh?ultlGAc?aOmaa=4L;M6Qe2VZ*`Gmi z%j2~nd5rep;7tg4-VAT;Zf4v=`V#2k9QZrM;Xx$C?--ZVlcn$`EWx=Imh*z>4bcZ; ze>I|x&uzluVz9?+fhF17tUb^EjWl*d*9-3>iH$UYHo{IAdzeuH_f~OPmb{)D5mrf8sDSztEFs{ruFj_no($)ywvS zf(!0@1XSz}%=oEYn?9ih2CF_CJ$c;LV?o-GjJ%Nrv!2FB`WE)h%t|oJ+H&=u9^8xF zr`e=^No#Q_dBmMIFEf_+cvR|B{zUE2zK&!2zJBs)mDR(n14jk!{=0_y@Y44eXJ4=0 z_Gw_Lskh^WBb{p`id;E90OInuf)5{3?E0W*V;b~Ft zbs5#SZ+sVX{BCAp=4|%~BL7ojlZmy0D&uRS)N!ex5iu+oWc5{WW9zeG(33kf5sZc; zBtaOH=;VmUxy$~VT;i;^pl05Q^eZVNu9md1p5|1L<>RzDg;d(6Mp?AK-Tvk6fb*I< z$9QwM)YNV7`*qLaPn9{(_@;R~G)o@(2rfodPU~Bg`g7^LEnBU5<7ULq-#qK~!I0y_ zuWasf$)$b4;6u*8v@}>)5paCu9`EogrG{6B4!GTIl2z)~c_JO6GvdJu6c-PS2|F2s>swU{c}5 zJ;y#0pZs=S=Qmtkf4lt2qjZly@0*mZ8K(c8eZH3dBH<;+g@@nFw%^)*+pWPJ&f3RR zo^SQ^`HVKD2bLP&AM!`yh!o4Rta-Z+-sRsoXjf)lV}EzT5v$347Y_=mx>nca;eeNR zHagrMiiJPb+VJF0Fq1gPr+WxPY`O|AD0cOCmln4lWLo6(I5p}Y$Co?DnWYOi2Lx2N zy*;+jC$sy=A;IfjIF1;T_nY(liR(WW?#t3YqMmTk+`j!_zD;hui{=`u&x5LWz6hND z%jm*L&HdrIZBJbe{^vwc(T_D{vjWC=#0eH}pFaNx-~RdEW$jztFDcErG`O(p?`54g zkGxnnw|}YLh?NgAi_V=cN_n}W=uUqv*V8T^J^LAHZF@9mq@%$Fq078btJr-|>!RcbQJxZrBh!4dtoyvZB9QS;ZF zWxq_084^`pePD;-B!_=;W<@TXHhWd%xj#Gqz4}AnMRVsm^_nw4{OQR!kBe1&<4p<6 zcA0xzOOSMzTFUNilO}yF0Jt3uC+OMO=MqfC4qheQW zy55wutAiH2_DUSS_xl+>sVNH*Jw}`9?)BRKb)uG_UG1qwrW4Xf&i*~!VAiX9#W(hP zbvgN4dR*Yq^bOI^%Qq~{ew?Hgtlhr$YsA5x;eS?bG``m&eZ>BiU7Us+>d#Et-sP$N z^3RhT1_rb><9KqqWLv#`{A0$mD4h{^2Ib_(KkT@Cfcj5|`)eCJkKaCK;4i)EgVuWr zqL*z_TcQ_zR(I9HwdQBf8;|l|S@`I_#_p4xnV=H9>pt9j^~^KO875 z?YJpC^um|7ckhe8TxxxACS>sH-F;u?KL;MoXadr@n0w`4|9FuyYI+{ zew`;67;PP5^Z1O;7VoO8GY@hPF6RG!tMJlW zw$rxfZ&;P5De!hpSl(gpr|W0*=cxxcIScQ_&z~1x+ULQ(2_?_#_$9yJo#R{n+tSC` z`A3sFnV<80oVNC(rHNDUO_w1S#-F$Mm}fZPo{vrNo3Mx)!`c>eJWD4`7nt8ns9Rus zDs1o`n*s~Z>mGKm-#qCk+VywGz0W&@2CaPl?&gI(!T$MIu8ecob-CZ^RhtsKmE4?F zzdhY+nS;Zc)>~=@W{qj_o87+G7QF+@X8o*jV91WW6@JSH^&e~A`<(8f1%JK$(eCZ8 z5iKoG2E0#sb2R$B`RJaJT~j=_Znb&S@28Ij56?#DXmOL;oH`NovV`~D!C@zKryt*Q z$s{YM9vO|sj1 z=gz$Y^V)B4Dg0;cDl4;?IfrhodsB4nV~=Si=c8xbS+qMRxil~`WN6PjljA>De$~32 z$NApcz-4sy-TdUcW#gxJ|0+%i+8^f>Qs?@)SLVwJ=N5E&bTqfrKd_&_)2IVZmn*hi z%lwctX#J-%vzE<%I``^t<44W5`mlXjT1dD3MtWU)rrgPRZ&@rY zd#qS*)OM-mfw`ME?%11S-~ZPi&5v6f$4}a_VC~UX>-Ibha~~GaYv-8c9jgNae+qY* zp#A=b9tGK5j$X0%uSuxQd*!{pFqUUqaWTu;X8(}u9j)@#cnPgie$^eAn049iv#tHD z7N!mba~|#JXzJ-ur&Is<dqiSyo%bC{F7FFV{WzOF~!+F6~G&YGF?e%trm7|nbm zPHU~Q>v?WmkI^}Oov9nEv&4n;U` z3(ekTxXym`Pu-VJ9&_#Z*5DTzH}H<$JIEsA<#Es#Hc0HukE`~QL!z!x4UjV zIk?zF+w!G{`{cvbE6aJ$lALE0TrfzuJwn&#y+blmIxoBmuIn7D9qFO> zvMhA$ZoMB5wjMq<+$5;v!_hhFL2Z)O2H$%5cJ1QNGZKd-hFy5*dad<)q2KtBJ3&)E zJH7799rA4EX7|bEw@(b*9$b?-^wfs(t_MFaoe+Jw%0p|jmmucgAg9?izUk?)C(OFd z-#v2kt(x4Q2M)LrcW~IH4S_E;FU3x2lN1s@Rc}`xKeN>i#mDo?cjJUpe#W7HC2Rd5 zxYHiyBb`|@4tknJ4daY?hh*C2r*qobWNzKh(|K`m*Nz%4x4FCU^6u+ub02rtcNbWV zsD7ot$DZf7AoS$w=wrESOO3-^@3}^AFjxMVbT67+|BLDKD(Vx=x^w_*C@Ug|_U%mb~c6o4Ja>|CcYdEvd zTAi3=IIE1bT{%2Yy#Gs&b{UaQW~RYkM|dSx%+s*lbWZfZa?K83^$X*U9zAovq3Rs2 z1H3V&Y#t8T7(v0TeE2X~0QSVRv9vSjlQB(Y7i3TL8Vk0C8L-^UiJ^Wr5_{Iwh;%~{ z5;X^7W3Y{&5o0l|%$U-sFp1&AG+;vocW5+7Fw}lN&eX{oSRk!oEQa6)Spu{r3>xOc z6(IdvPeYmCdg^j<9Zm-iJSI2`2M|Qqf@yVDLx^wIC&& zDjlpX1lWPaKtsTO5aDrgxs3X+r_O|9K+#a+Y#Ji4X}B}MrlAyU8uDV(u;JM>SP96` zzDQR2_&Wqb=oA;)4&|d$YE(-~Q|gdHndZQD73T3Io@SMvYEW;~s{!#h6t^5dp+-Pc z5UJ<*H8Up8)|cc#G!5%pWOP7z5uFDSD{W&_hh)AhLR8d(Cz&WgMSC(JuUn*C^*Qr~ zLghjZ0}@s~;Z9(O0ty3v-)lhT82n)2edvsYf~c()<~+UJ?Vbbqdw$!~5NzOg>2{h#EV^t-$NrR0#!9b#Lm7+PUTlg^F$INs1+? zxVLIRjx2mrztsLdB?r8-1e7)8ez$Q$9#R+@lm`6WPeu+b{UphQsO!@oPkk}_D20mF z;7Ry=y0!3k(FP9K>B7pcwgR;ZV4?4H7p@y%ab2!DO4>=*0YuG4Xe!GODKqHwl2SW z(9*X>)Cftw|)K02mlP-B#;Q$R9i-h*-=M5BRX-+gu=K)bNDeNS|gGD0P99^1#*n z?{|UdqOsMPuJS+Y7O@=d2+=jrDeSDM2Rema3cDhmLc_-{)adv;WgH*Vozjkv=}u|K z$8@K#2@8l!cS<|Hi879l=}u|K$8@J^a_*FNd`x#rJH9j-$2W-KPHD%-bf@YRD$|{+ z%ehn9@okWCd`x#rJ3gj6r5zvBo#Kx(WZfz4_=+W{wBuvCQ`+${-KnOWJEa{T)1A_e zPwGx-$H#Q1wBuvCQ`+${-KmzGJ4HM6Y;dPMId{q<)ZoW(r#v}#%A-&l-Ki;=14gg` z-08pNP(d;X=vG0<|7M4ZTmlCWas%T3?3^-!3QwsyrS!USPFcm6Q&tIZPKmQr?C=~= zI=Vb~q66WYKzfEZX_}Z*_JQX?bfD*yQU^*8U}(J3Iix0I4vFpq19RSnV=tDX@1db^ z8&c?Y&=t`4u#2Fn(J2i`q3q@0n4;hezX!#!7FkP1LcCHm)^g#43e;GOJ(pI4hK7m} zmX?_UYd-uR*1%U-JJBawByH62!DWs>!~1NBHu4;SafWxkb21p>4-&r zYzWSn#wRs?(P0U3iE$B0e814J=-8yV#3=rN8DYX0xaSG+zG+Hsd3O;9uSu8cBivcn zIoL~@L$d~&HPEbqW(_oJpjiXW8fex)vj&e8={N zD{^7|Z~tizuK#aB#2qGF{||?VyH;Z%;+IOoA>u=6T$AJ4J_#beemff?zS27vBJKw) zfv5-ZAVlVV0PZDVpx5n&6c(nb2i*$Joq!4E#r+8Sz3RqG*TzdXq6Zm~5pE_RSWsNT zltg?HJCQ2iyiWi+X%5XAXx2cp2AVa{tbt|?G;5$)1I-#})9^P7yQa1e(2?238qAYdg~Usp&jvg zcqF8lI>Jx*6$ES_DLDwZC%}g_CQ))sk&?gp4F3aE1Amm^TMPD)NTxwXQC~5@&t+iU z@v9>dluSrz{PdABTwCCeeF*!{xAM0_Fzh@KG)RBohM!uR3g1+~7WmiXf&bm=U#`DB IGM4cF4^v|zm;e9( literal 30720 zcmeHQ4RloHnf_)n2>~nwMF`qz3{voigalBALcoA2M2YEup$?c3C&Q3V$Y5q@z`-Wn z)-K)EmTp6pny%Q>?%H&N@w@kafA9Cc-}`&-dmWzn*|Zan{?lbARmxGOveZCgqRMf!yKv84dw}InSwGE1tOrP zpY9scyJbi=s)%Y)VRaX-5nM-mT&&zK%B{g6lljDO zr-fOzTCaAh?f9z)9=p|A)vUH^oY)dmjks?f0q8uePg1Bq#9@S?gI)7*ca}nrBU`P; z3IUBn5G%z-iO&+OZ)neSeZ8=?{}Xee#W~q(w(sbKJmpq7*vyl-KH>Qx&Pu(AvRUn* z?r#i63)Tg0X>4u|?k@OaALSZ{dKYn64i39e??lOiMPVEGj`Jn@{EvFn4`eKQs@3lj zyP~n%sQ>g(Z@E_LaS!NoA2jg# z2V50LN|$^1Sy{u+8g{LNSYcQ+Jm3W}B$6p;c+~QtT~~PC4=IB^F1)&M_Vu%8FQ~ib zL!0NB%I0gX@LUCp58G^-(7(wSY`8OdP$3qB@iv2s79_(#ouKBzX%wc8N`Yn#y%BaD zmb;FqTw7o;v#Oit>4weMe8_4joL>mM>gKJi!zxAU8vJ3ZbKbQD&Pmr6B+Xa>=K(k8 zTudlPUQeUuSaQV*d(j)~+Ak_~I#B?Qnd3+S+sQ-S{#It~ zl)iN1hifO)bmK4Nc9$w3dDbEOoDJUTedteZzh;>B>xXH-Hf=j2rMGDNQ~1azDqVXv z;;MA`Fm}@T)1~R-NWmA~9+lUgmEd(+dyc+d-0soWOWG&s>-6@C`g&=5uD+^vrLQyE zvlD*<8!ncR?&(*#IqJ&qFj70F&c2zg-pKv5Qr~)2?c-_7!CgVlpDq6nN&YJHsO@(=&r=!y*py+HQ&yI zEVw^-^YcyncRky*{~_FaHtsw~)EJJx)pW=H{5^@oYBvgwZ{Pi>O99Sf_r_E|;M@{@ z)P?(lgzdfuV{oM_KGCtj!!^0K2lL9B@7UkIcIUzFJr88N@>JgR+v{id1LpK^T((br z^MfAFWr)OU0D%<`J_0=a2>PF&Sxfi3v;G6Pf@aeczgl8+4so)9Ya6GZXd?tqDcQ^n7{X7>DPOLMee=) z16q^Smbo78huMEl(UCdkX$Fu$9uN* zqyMFpe(rzn!O7VV{j%|D>eXKc?$x+(ZO$iOh&S;gzfW8Jg!xevBQG4g_vf=*Tc%HU z-G_U@9s3Kj#If5ROC087xrFa|N#XlsH_f{ol$o3HO#S(Jd~|2XdlAkJczoymG@OZ7 zt~&dT*MGk5uId$ay8T++ew{x5g>Grr(kC$IXpQ%{K8c}YC?#tN!$FTZu|Dui#v#W(HP(G)B$UcF&;LG`U`*Ka5QjN;;(s|pJWx5Z*RmKGPc zwzd}4b5v0vvYi8?#nmm59l@5^?ppzDJ|+}3#2N~*j6hF92dxc_fmqq3iOa&l-DLn_ z0o&2i*c>aWt`7w(A}!nNal39!q^0qmNOP>d$rp?+E9Q7iv!mOK{m#M+iz-Vso!k_t!EV|VWL557#Qa!i$H*M zQ&~yrf(6Tp^(`Tfh*LCH9}8AA)rSUg!gy?BFcfSl0~B+sQ7kq%E-N;OFEefD=EZF- zjX`iC2Dvhe4HBJp_W8hxG(>U7&9;>!@8DQr#ri;f6NLG~VM7W*wknK4J;3V;NcIJ0 zkQk7%}sdi9CRjCbUg8=uL0QZ)h<9AbqMw+3UeAfgXD zvM}>g;~mROrgP-BA4>7$69?TkRvh%@j#dlP4ULgdz25QjcejV3!LirP_4Av?z;>rH zHYwt}so`Tgqp@HE8-2k*?0l5`cSax?<gTLg++MvWR68nTL=Jp zl=p_@0NSY|@4XskxNR_t7r0=gQaDsHH23B&OyQoz)#2JpO1%pfAPq2hl%|yl3`%Nu z*v+vZ6WE-!aG-eRJ+JNcP+=d5FmAl%A;+u{xADKdx&DH*;u%pmR9WAJ8NYF1TD!gY z#v9HL6?IAicJIQm+v!CimBgX4AcnhSNfP^v-A>yLULAQLQi8%Q~x*4r|mJ_PU>Q&(rCW22@-sD~oAIS6c%2CW*pf zYFnfofKaLYC%+;lCM-Q?wZplb7=OQ0c=DKyx z2cO?~WNn<;#!6}%Gck4>GhTT*p>4OxTqf)6wuiDgE<34B<}zh!k9$Oqj#(8*V#(SjG_I$$8CfYd`nKVM66D%v4(}sLWAKRsTEX-k+~} z>NO43jS*8-;CStS)x7`f8ipGfO;w+mdUs*@p{J}CLcMfv$4k#F`i~WaN}Ok^b_;5j zhWgoEjp>8BEgTA|NO%5f9db z{2L9Gyqu|ySWvSy)bH(m?d${Bzo7BR)=(c-FD%-$_C*aNTkk;C4NrzD{{4(KhEN~* z)UUU{y65{EDy3njDkuYy!(G=M+AtkWVr)$(iz#=iTM_#JOTTXR0%@A9tp3$8)CXQ^AZ# ztQSMckM|?_%5<+9Z@@t#- z?28)1i!rVg<2lpR%r7;@daxb~FBGcR4U=!QP4h@5HPvqq(q=!4r~T|Ky}7sS^iY?P zlu@%Wv%JQzG&uHp%)V|nuPE?#Z9HcxpB9pBCb$~UnL6bn^~JQU3S(YLZG({JvsYX4 zQz@h@BxlpdbEX&Wi}-ZO;Pb(Vr%xkYNMF8WMBz|neHUhYJZCy4f~%o+JH04mYPi!U zN9U(+9?zND|3k;0B(xr$`{OxN>uJTAiA=*I6YW@9vq67BJf1WCljKa9b75{$a-4Gi z$I6-RdsjJA=3AL^vh%1WJId@AkIeI^94VPo<`SjGj2ZVqYRthgcPBMw%9wMrGo>q& zGNsP^t7J-@`B%x5I`gmiUy}jRnSYf`>6WBSsWbm7nNnx|HET%zRWhZ{{HtV2+mkY- z&it!nN}c&vkA~{Zzj}t`UnNs|Fey{&%)d&e)R}*kOsO;f3NB&?ocUMDlpe95N~YAA zf0azBGyj@1B>yUzQfK~EGNmT}Dw$Ge{#7!i&it!nN}c)F2}AO)ltVlJnmZ)_nmZ)_ znmZ)_nyaDO`PUih6A*%f$iJq^!0OK#R$|q2XJGXpU<8dGl7W@J@$(qi0WydmT~P7) zJ6TjJUdN9=j~y7m=S*3XgO6LGj8?flG$+)n-NGUB|h zG0+l;Mz+QZZr&9LHWlb~w1zCHSX<5yzI1z$ZZDGGSs~o>UXjWf4x%lpS&=h=$ij)P$$o-QRs@cq42i=+fm5huc46NPoPi^et^PXn0X4t$#*a+ z>Bq%T)$%Z0JAb&u=F7VJ0N!OpqT7T+=v#^M!i;+#Jihp-i}&sS+@^nbPUK3d52Fp= z^=U3vuU)m_D*08QN+3SXow!IP2GCF9Sa`69#}y!ZQoE}ipA+JP!Y}hnz)$K9YlJLd zX5wd1AZawHdD)X5JSzinHqhky5fzE|M>|!>f4a5X744L3cvH0V_pROTXlEcl633k@ zvL_tph`_A6eAYSAj0i174f8UnCy!IojFU-P&Cl zD=&?7R_!mF7D3%w!Ce_GFAaDJ(O#o!e`Sm<)*3F~p~#-M94q(^h5Y`0dxg8K75sgR zp9%TPODkhs&#X|F-M4zpsPy*9aEw*6y0)t-ZiU@bJWYE$t61#hJuMI}P!mYbf!Dxoo`G6Ku?foWNSUfrbFbj@Uo|Y7l|eZi@NKgZoBt z1#qd)1UG$ciJGpEw>&T2>+h+GmgfcXtD?1iZ3*7Td&!0JJbzE6FATK<+ttflvG98ABp=q0{ImsVSiP$q9o+4D2c?EpAv$x+RWO%@;u_t_DUal?Cap1iW0)8 z>8kXFyliD3=>mr?ZSAfo@pZH%!hX)GD1nW@m_Rnl2%d2#RP|`bZ@EA#~U%B1Ixal>9w5k?~$a zps3b#Rg@6vnyyHkCJ*)n#6>%aE{U!vA(6muMj*eok2APAT!ZUYl#s-#s9;#0$JHxJ zNG4`vh5dvH(*E1xX>Gs1-{=DOnrIR&Y9NUYd5Jp7l>2yZB+j--ob5s`wSxSWks+`2 z*L0D{ke5jEjwKSOoN=8%zQ0H2&;r!<5njAE4%`470anGZ)mBWp`JS0h#wfVKpw zq4Ws`yeCDxhP-5$*%4}#j3SmIdxlwo{6N0Y5cU(#crPVZ=2O&ax@0Xfk*yF%KxR$) zM8e~})Ee#$@#7wFXKMQ>gv6@0PxhQ6K#j-X3{wBcVt%xfo)Q;^fZa$EjvSO7@y)&=*BJ zc_p_gSJ)s(EjNK%K~<#XA_oYA$R?+DcSH=xoLqwN^iZIjq z(I{}9ST2szM2OhyAvMzjLQWooMYQ-o{N)y^RZ_1d1LA zog72QW84(irCf7w$sfumXR}5+|F10pDG&SUb*ZL=$GN0coSlJ0w*{^t@S>D(4aArd zU}xmKPFei-_xXW`@E_-yB5{;9Vw91jXLIjG(zS+6p&23-wA4a_P;AS&Xf-W2b-*dv zq7SqK)Bx7FFT_mrfxAcj@pTYW1`Z4gs-ohoMDpn!WOt;MxS3GRjo5CzAs7N-xiAJv z1ZcY}Bk)69*d!VqLoON0Ie=K(H&N7twmyUhL8`xTU}6d?N^1MOPm6#vv?Fo!7_`>J zK;mPKT^3u(f&pJgHlIis3@(1WDvJ69;zkoFY9pk|+L~0uTgXOFI4-;uQfX`H7^pd! zyDwRWo|$vFAtamH%sW~H_Bn}-cJir$PKt;M_a%5{*c0}14nuvzTei|m$P8k~5uEMs zS^PM+LLTLvXxvWaS4e`Gk0S|nMQ`HY#bM_d&p{Fin?I?IZ}GTj3t@N+z);J0QC(axd;o)SS=B=ZO$F6x_t9<)t+}p`}8oQE;Owf~+VY!IN>^`sAlE+INxD zcrTxh5(}w#;HEJQ~*~xeNp;T(r0n zDpE9Pj`%dlM_VD+q2z|I;Z1lRd(z2Qc^)HSv>yaRgeXTG8fWvoLB5jDbZo?iTEwR>*2F>Zc~IsFp@K2nM)&IE zDY~_IuN%A42h*n?+Gr6p)EqHVq{5BtA4dbq1m}FKO}~?}7z$`yOAAD{ z8Ef}}M}rUqx*mx)PFzLb1J5&n@S5eEquu`F0WaoVF1#W8X?dt`be+^MUggb&m|=OA zpy-j;z{h%nLLH+(F%;vj^W_A^oSuhoN$h73tT&UdA&8Sa%~X_dBqf5O94%I*k8=n~ zUM_GBaUq_3e?w!>IsP6Bervb)X}S#tYqXEvr@1SbgO^X`dAO=qtsu>J2pDT>sh(5< z`Z=MS*Jvl-`iL4(euy3Sw2M zE##nh4QeR$!)UYp-hi4zn=647`{_^UH$)qyMlChe8ttWl;waHc-bqkOU8eUk+DqQb z7zui5A-Ez7y&%^Y7cVtyhu(2$`fE%2{l}~J>$itm9%Q#kCxd)3m6U0bD`HIFM4Kd! zS(F^wHfg8qvX59(dsq`X#PHH;i6J+kDaVp4T!U-V!!axs`^B0q3u5poKior(7wMp` zh}6h<2DIEA>bSAr{{F?UMX3veQqLD4c666c>Zt)qQa?pv9f=?4Lg<_bgAc2)h!rK- z)TiaA1|Pv-pNKzkuIWNd2?*pSSEB^es(7E~8w>lD3bl?B8uHR+@cpZ3r|qXXT3+GE zJ2h>;`TiQ@xhrGgO||__LWzJ)C>cfZk=7%d#d+tOb6@~!|4GY^O^5j|)*HR3G zkuO^69PQTcP%Kj|;vwov!Qp5ME$7+qRj8a2?1`{SC`du2D6x+Ql{FDD#vsn;OHp?T zyu?kLFpNQ#sEN2(!V$woQQ;d+F#{YW^pUk>Fh>$gQ8=M-RF9TL7Q=mqY7T`rf#Ik# z4HWNvNuPP&+AU{lf*Bu}RddMSm-+a}p4xu#nj^y>Rqmy9lE{mscaZi$Fi(jXi2%;w z&M_BPb11abcS!azlQ{uz2${zC3za(T&1{{(tbjNA6@Op!E9YKkRGeFoBZPnmf9sRw zrRU~_nL#oTm&eXM!)#Vveqa_}*{+YjaBd>Z9IUdmwKp(JupD`_Mn$+;fP}+%9|}PG zsR@~K5Sn&~0bj;*z(HpB{aQ+>0 zk-`|>G%BS0{^ONVX2?gwMUIsKeq?ER$3_S)0Wr(yj8Mp5H>>r@Q4pdzk53gc5c{l8 z!elFczVT;&xYR2A`-9u@FZ|txe}nO3quvkxJA4VpkqH0~25rb>V1{EH%l}DudOolJ E|39OKD*ylh diff --git a/pandas/tests/io/data/test1.xlsm b/pandas/tests/io/data/test1.xlsm index 4c873e55a5300797bd9055d29c7820e1c0526da2..981c303b7bd30c736f0e67ee7f1302f2bd738fa6 100644 GIT binary patch literal 13072 zcmeHtWl$Uq*6sko-8DD_4-h1{yM*BGZi5pXf?FWC1cJM}6WrY)K(HVICJ-1L?j-MS zWtTU5@2$G`-?#mvySjSLQ{89Id5$W{!92kMzyS~d000F*ORUc?5()sghXnwz0SM4K zVnBNrGkX^URSyRPoU|t0ML-%|M&PGtbtM0A;lh6jCQIM^!Jr?j`Ucf zMfC7VRI1ijfQ3vh^`|SR@^Uv9^2);gElRkEEa96&ZDx|b?dfJmHL%omUCEnmdtoST zX#Lk#^qErVSBZnM3Eac5==qvZ5$X5)GQheBe?oUJCJ70g%6xG>k@6}8Onwboin)l( z7Yb&oo&^Qutr_F_vt8J4hKsu}?$(~l&whUG*15e^A6I=&*{b)jpp+=uw^yd#E4tI2 z4y5)-kuQV8K5H}IgEoOStAfs2#%0qomlq>9Hlf>=9Djm_PUYjQ=3v2y!bp4)RSq<= zWLVK_KNmRRjpK|(Rn3u7t95U7Ii<;4dKsiuO`J!l5Y5BH*_3l`Fe_W=0GeBbT1Yqt zA-S{GXV9%IT%pr-R(M}<5j|u69>Cl3AU@DXwol%nN4~Oq!f0g#W&&940Q{DYh+N2= z^RZGWPTDi>Tl^;$TU{JdP?W<4Jrcisr*8`to5_%Zdn=A7xM?)Ab7Qz&#piiAnO6)| z8=%v$zv>6^(uW5afYQJDaJ?EU)fvPvo4;ey*@gZns~MI83~3eQ6P+zU%T(^@X9KG z^uZtnc$1?n5(|fqs=>WHB>CFW6`q0CDM`|?Y@-*`ZSH&yoGK;jLFd*U!%*5(lr1;3 zP9-^WB3_L#!JRbw~$n@k17<# zBY!xZiaY3RY_U|~J!ngDenq6FV##AwWt{26OX*=?YWwMjczP%Hoj0@Mhe2gZ4y;S| z38?{^taCr@I*yZ(43A!J#GcZ@??Z1R$%@t>;p`7i5(P8;3I+i1f(HQ5A)0w(``jJq zWMd2j+WZW1d1@>6xx6?&Wc#-;r+lztS@aBq`IDBEVpySk%T(psP3P6|NC$X!lhL55banaA>><)bC9d0#>UM>@wfnPHqa&1q6# z_m&Q2CW}Tl3zI?x*cF?OT3D7)2TWz93NuD!6E;|An}M%O>VU1FIj8`6GpwMI&$n%S z?Fyi*%wKY1%%l7UH%;oWiHg)gf9rst;PM7LNKt>zsU$Cn*cooJOZuAHj|7gOldj7_ zJFt;L0lM$Fd9{a%G4Y7lA(oP8X;R>BzjLT|g zUOnF&c_qcNuEyso2C3cebnb^%lp>i2t)-dV7|{*{#yF$zoH}0XOQ7`NZ#+2!ELZN> z(;t5omEKZKq8bc5lf)w@>Z|M6m_1{yf&xIVjuQ7I01`xzVAxj+?;7J=7A2Uw0Bg)Q zmAl9JFMiz}& z9xZ_xP9DxFAYHnWP7DBvJ&tssG7{#qnwt_R`ZZ}(@{ZJE*Tgm{7`sI8=%^zIwdVv~ zL@mURf74hn2JZ*5pRbzuJvF_YpXa-0z5#^`GY;BGuf5N)<6c`y(%5RYCr1R2zed?z zmf6E5M8hO87pTm%!l3YEQ-${B{$I zMPg)}pc;sG1z9`F0~i&1p3tIdt!pEGr9&)-E8e&`MAqu*v}Jiu^C1^seB}Kev|xvw zJ_c$gn`ptaMC82vgt>ln@BncH1k0Gy+pVh8xSP4z(^`pAWd)C|yQ6R-%jB`Amu0wHnF(yvp za3cQO4k;3GfRm^)<f9Q_*odK8^+ z#S6}{EZ9f-zC0vDB~hm z42&6nuxV|vm-BYqv4}~k)EorjkDFrfL>x?#Gp1-$4-CP@^DPaq!>^w3Gj|!Jr*SFe*4Py28!(BDHO0R|hnt>S`WY z{0e5%t3djQfswS}UdXSLtgl0ZTc%pJp@CK9p>OuSbqUm|kZE_W35>Gh*Q7fr?zBpC zA_;MAVe?RiZ{UDcpKlnBhijY@(@aj3Q9ly9;xY{yWPMfB4PuHawXt11=an1lE@=IX zqhOK8zLF<$drr&vZB08<--P8=$$%b@tet*$uTR5RTJn4mXlMVjN6e^n+ZTDwiy>}y zMdQ-!a;rTZe8Zi2%2aM^x1azRF~a|$bIm(z6i3S3zT=lj$k08^Z>IQ3L*;NmG-qlG z&x^C)Zz_eK$azIA+I`9Tz>g|)0Rc3tH~1~z;O2OOwiE@4PhH<^UgaU=U2-+qIIZ2i z_t2=b&*_p^H@-@zJn87bcHc44x-+6D@F+LX*WYB56m6aVu0vt6byz3WwPuB@(d~v) zhm)8D4$G2KYx_dZZ)sOZ9$aNr)enuR$U@&=njBoLVoK%xWYk0E2(N47U?-JNO}fBz zy+eph{+qwdQ^$j{!q%2*4^Uln&5OzY7U?uouHwVm&|Clf7WvOsxhDJ`Q$i$sdnB6T zswU4qnww$?26%gFwhHLw?%G7oaJqFg@at_2)MHFN6%$|6CEkqg;ZC z_s%luHYIsokudwF7%Bs80LBpKS@krv6RUvXe3vBk7iCrk6D+oJ7CcU!X+P0vrvJ2{w(%gX0utNY>q_vsyA5^cA*5}{_Kh-EV$Xdu=vlVV4CsZ=@5(Z z*vzQEntlN;iNrQaOJwsTYIlf^YesxD+^;HR+6t)JBrltE#EB=U!JKj;Kv>zrDO6=B zR+XpHpwf4)96Of?4XnX*r|W;&&<~X>0_`qL`T?gP>HOOun;=2il@*+EMdyJjD#~gmH=%xax7_Fwp&ZMhTY@wItk|1 zw4$C&47I>z_GISQmK4SkbzihiDxZh1KxL=qP(p_w)4)9g8%DxmT{|;ZQIzWjj_H95 z=mM%7pZJ~m)o9*5Q3(|=<6Yct<~3zQ%f_izRUMXd-EA0PqX^C;nnp$u0@fLvH6g%@ zvJUC78^~^=g%Smjydx~Zet6$t@84Qu@%iN;{Z;_!kEVqrQ4Sf^ULIn+Ewrt71RGCW zRkHl`$ya(Do+K(0rJ*Vsd(}*x>Nyhxlu5$nUYCaH%WhXxyN}g;HdM8%!BOzBi|pt0 zTWHdk9d!(DukSNjb1k1&K=~MVK*4AYw3)X<*LXRiwy-Pn#hyr~f%e!gP;u4wvtb;k zWv#RUnZE7JI*F|>*I31wZK4s2WvB|uvj*!D>K@CQu8RlC8q-#~+T!Oacyz7uxnz_a z;=V@NM6LV9*Ht+zwzWXPd4-$fBZkGx5GP@RgY7-Z@gX`V{E#(PQO-PPS2+&l{Ts)Z z{T%Em3(v>XJmNykKc=~9$Fy4ca1C8-wZ<5bgi?Hj+>O5wB3C9ZC({D|p&Go_$| zP}|zj6*BNZX#ViW1a{PYK6Vj8fmi>T0@?qfKs^Wr;`kU?Jp}H!A6}hC!9%HC7BwiB zMmMw`)tZDD!g^`0P@9(CUkI4Mu%VSp%dm!cdGBU=9qSsztR4M`zOWo-sO?)UggOyW z$#YA^y)b$B*tw2wyk2^cr%6k!lT!U)%kZ$eeX%b2s=>QxzJHdKA7^gSVBy@2AVjT& z%R?E~%?3$Ryt@FTFAv^Z2ZKfS<`w^o%n0gpFwH&T z+HI5kkDJoOQzm9RGYwb3gc0!%n_nE5$>09fFOis`dz#-|@e{trFAf~S_i}`AU@AY4 z^FBjpt1S<>^L>wv|EmKuQ{f^$7yb3SA>^2uGCon-HE`|^A7sn43 z)x9E}EnKu-={=(14eE)+SClLcR163%MfyPc%oInE^5D!c-PP{wGZ$!ivDt2_{v@7R z+@%oXHLgY|!Xdr*6mqpqq$h=&T@AkRLIF!?n06~yFL9`=hr8A`=KJ=Yg0?Z>Zys!C zS}kI;apVU`I$|*@P{Pj$m^0f7L$ko9!=v6AI!G@h)U7=PBVRv!Z5FxR(Aa`YE zF=3^t6z`x1^(8fFYla@xL`@{7NWvQhzspVOjg1Zq5vx>;dD%ywD9eC5Gk9K{d^Cmg zsjW|BDU1)tgrwNSMbGOS;@6>xJv zYJ|DRo_$P;KPakXF?-DSGcg=Xx(o`DHnWyx(xj0CO&8Qgx% zCMw+b8@0-N$z=A2GW63XP$osldK803-@T;Gi&Yi#4A}pc2>wCE(j9^!@1!xB9MRO5 zEtM5w5^~(@S5WQ?V-kAAe!R~%CuK)39xpEOq1N|>J)3jjM1YZ5^Otu*;z_n&S8NZF3t*R7W}4c65@MGU+Uljc zuck77!_op-Y0K4_8;!TO7#}%`kCOvcp(#c)Oi#O!zrA#!wIqk^{*4vyx&gP!R~e!- zAM3P%;3gm4l&I6Y@8B(bP`n;x_Tb^i;ARP;)u>&cyQnYeD&pZbn7MphT_J^`Nkb2i zbnova1^orFAq}M8^$Eg={~;lK@rw-;R^FwvVuXMg&z~}_2L^n=tdfnFSdElbdv)=S z@Dq}uHD9ROocnqJ2lZ=JdTm7N^mXqS52M0oE@g~0@OF9D#s{sWG?NI4Lj_^`6J*6N z3VSuzOxu)Ed(UuDd}IZE!_Z`b3W$b?SVY(})5~0@qhVOq_PTSTfwC1YW6buhUGk*? z7I$*4eb5u>HEAOunF!gwLD{FeNSch*rPA*<4(m6xa@}(ztX}w|M5evyGyq`d?y{G6j0^Q<}KTD z@|mvw2(9A_6?%Aq&?IJal@mH+QAtX{-t6tM$rmh`DvIv&YZB%o`0G+y!fkJjCE&q# zLetmUD8x-F9Lox_DeVEH1Q(w?_C@rt7pDG7?myZECZF^{v`?AQ&s6_VyL&SeHEmrg} zn?VD0J8k;t!F!^P6f&g}WG^RF`FP)iO-%#G23zvo8|&eW`-8Q`LXPhz66+w84M z#NknHoneNNX7!oC&VzP&IziF3ago;D5z%=KlDmRojb+Eg; zyQtXYOpHw+&zjgKvWE>P&YV`|*B$Y5`UxdI7NU^MuEh+?3o=gIOw8SW=uOrgDa zFkS;8Qz2yu`EVJ07I%yML_Y9B~fWnwY!8tyJ5sK0G^aI+HK2NANNr#3g5=gVqJWpSYip zs&4qCVTIer!_@VtOpX!sCw@Tby`sBDEI@-*^luDx)u2K94kma(R-vG~wTi98@BQgp zF8sT))^Gy)1Fb9bINndp<67y+l~DEv^$d8&F*S>wO$2I=d<3AI`QtC6j&|+|Vsjs! z&>-I7gis&e+^E=90N+%K|#yl0=3;bPKh>(0tD9T9HZg zfW%~@!-bljwm`!0y452lLB~k_Yv$&n$z_JPFJYSaJTZ(FrNrwBqLx^Cqzf4nX>-Cn zdI;Ef@s8)+l9{Y2L|TQb)waf3oQ;|xXxd%Y&eeoj7`x?$2=Oes1w1bm4)if<3EoAD z(%8;srbW^&7Z6om=a!&_;&Z(>&l#54(v_|cH#7FJ)W@wbPE8$&zKt9UL2Ka7^+%mW zmaeXYrZ{bsfpT-vlM4~UDk-UDd^K7f426>z8-s({D&yi0XG`tI+C*ksavok31cMwu zc@3wyP**?3*w}#QTJBk3<^bj~_jns=UHr;|me68@c3qZmG$V0748Fy~jC(l405{_V zD4S?a@Gzk|wF32loJnFChJxf6Daxgz0h&d%I~!>S5!DNNyj}jt-!cGO`weP;D3$pS zgIJ%t^{*dxjHZd;MC!#%sOfBUZq(UL;WSg6?6qzmN8Zhr4@w^9tI~^vCK%OVtQpGe*tx z#uv}H2~;r0YoJEV(x{O}JCCv(w3T$5-!u z7N*0+79uH9l|YM_!Xo<^TIVpwsH;h}JG?%4^iNai4hKTU;BN_|8{^v|9nA7BsBbt} zSW@eVE24P?dKPKTLQycI(VqPns20h+TvXxW^5>$$dZI1Uv`tK;EBZ7Aws@6UZnJMD zTn?$0J>f8Jk)-H~;L}C5sHwnmYNG24i~4yDY`N+~S9v$M4Xv*)Qr*XSONS1#-sI%6 z7_z&*sTo3&3pg={LGN%tLg^{K(}zue$IL)$y6^x=ul}BRMV-HST>*KL35k%Xe?>=U z7Y|!A=f9>Xc0fqBbuVc3&~@r=ofS)isl=@Cp22P|&uylI*AT1JSES#?bmsnM90Myy z@!Px)qWf;^z(o6HoUW^K)-!XW3blwpWHPD(Bt55kg_GUT&*>(!nkBLfb;k5-wQvSz zz;){ARb;nLlTk{4C4~t0`FT0fG~)qkI~JEQoDZtY($*rJI@7ZuQ#Fnv%CVCd7UIek z-~Fo+`qurWtw~pw8SBc5cZgY*juc1c^ zC}Qk7eZ3E4aF?3g2-Ha8Sf9eiH9TP(!jhmNK6^fzp!(W^8Vx8ca5Z7z1j;2s*bz+B7;476{nKkq`6YJ~JqNaabW*lGt3=XcnUy(>`etLi2QcWlWGbY14 z&fqwT*lZ zbI-G0e-~NktkO?0X~?fE&i6v3Fwk-dLG$I;*h7lGn|S6v?47{$wQLGuuVEj|@!Jc> zFJ4#qWN)UT#@nwB*}krbfM z(Pd(*vA53#;_m)v*pc&#v0D&>Mua@Y`G;X!8abJns<=2=*<1W&-|xF1D-b`&sBL1~ zd~yR&6ba^UyQ%F-zCh*7+fe1Z;kSNT%LK$@w1Y4GlJ0IASC5_Lzdo%MFCc12M?z5^ z(aY-yX_LQeyFcv?FQRX!=0qu($*7-3dkMGnJxG-Se%xe|(ivzr zOu$Qs;VKVqDX~8!pbW6`pk?mpGb?!Wp_(TUsV#mlL+*Ri6XrrP z4pf?;elSfsOuV3bA5m0hL0nXm5w8LCq0j)j?hG@`M6u2b3UIH-9NMdq$ioMJ=E^ez z$S42Z+xhI+<+>nGL?JCHs=t)-b7u38zWZN8oZsE|Un88Ln68L!Rvd{7n0v7|Uh(t6 zC}Jwyluc^8Fvi!*a4X5N7dUrbwIpVrc6J;lcDT}S_=}0n(cUx7wm=7Oo8VhkefL^s z?i8thdGHwrk(oc%XojUD4MtH>Hhdsf6BoHU_%-z%L62|mQuRB;;#k42OETdetV36k z#FJ$u)^?``@O94>>p@K0J{p0%0?JzVhwpsI`0aB$$E8mvf@FAN4Fz+gc?KnpPW_EV zzPW=lZG6z46xfZ9`~V~VNdwruOxWH3HyZfWeEX*cAj2CBaZvDW;G!2bU#L<9je#u1 z0_`0@LVqO(ENj_Fb9J+~7$o|x$7GFF&rb{gJTlmX61m)7m?D@%JdMdaL$IMTOOqef zS0)IrVsNr`Ozt8)GFf|*$6S3*)Yb@Z2-xqC;&@)BB3WW8Z+5P1k}410Sb1gXrcleh zQwj4qL**$P@(ycVmg0hL^2gShm;L@A`rERfDj1@PGvvvU78q1J8VQ1^Knk4VeocZH z^=)+3*&Q;V`@0d?ocQ!JLks`|(s_LLmk~HXT*Af7NyW^?<)?2ve{|2rVr*r9m?Qw% zIlu@ZOJ-$bt)&2m0`grrB1*R`HKz4GO(UZ6Ds)WUPS9Puu4hf-{aQjAv-`u$?UBVYv+4EMK;fe_PoqvX=#XzHw+Kq9$7w+H`Ts-X?ElsH|C7cE1t0dpAvV4X zVSh}>h}slrqT~d0aDHwAbTa#y+CVm`{yUk57^`<&m;K7m5C{uK3psvQmZtzi;TT}Q zXNJmZNXUCwAY)t7m-Hk&<69${-7`ggoc8eBp;z0_%Ry@g3^&MZS|Bd@QKY1RnKjME z^(wN~Q18B)pkYEa4|w8vQDgU=!qR#)d+fgkOWHkHboqGE6BFM%f*%YI8lLd#DPHzeJjJAWy^3p zM!JPt?>rUHyE}Lew0$;m5hU;gw5Etom1!2i&nPbx`{h z@R!sgbBVG|`N=*FhQ}+XqzOL+z6u3Mf-;C`q2s5Hog5DG_?ZR$mISJt^by|Uk9c=1 zj|DTSpPFBlrh81Ud$3U-Y#otdYw(El8EY_Y*T*h2;`d@_9d>>cUA$RD)8Dh=xJug* z`Xggpf2W4egV?VJguyZX)qaf}9R6#*5S9Jo_z?dO^M(0ozR@2{=|Y6ntMiFHHLC$K ztuLOLNzFYlgjuYnlSB7wertnbb%V>bztS&rn{JmMQ4Rv+Sxd$v>?Q?zQ5VHBg)X zQqDcW+(ND4;mdrIFqx>SA3UTK^)8*|uF`5>VL~b<)Y;ImTrhZ26DGWH_F`WARfp_S za(tKMP;*g=eau&$WtaKpxF4!Vs2ojZCVC=bMcXzas}yH)D*@4YYROzr)3!|{!T6PS zG&LWE7q@V70xi2}`VUt6O~cZK=?};R1z$&}NXyt0M2fuxWek7=_0dsDbh*HAR0zn) zVesihd7wEo<7?y-Pep##s5Ywcj$NJcE81xh?K>SJnOzmsNaGG}@ry5C!rlkdFJE){ z^Eq2ZbZA|Oqi=ixQf!Rqu|!%Mi1o(3;#sw0DB=^8v0dDLv0i8}Z6=ssSKjrs+PnHv zWshuW>oWfA)bo9%v#(HXT3n@{g@IUXU|MQ>x=k*U8>q>I}4)E^_ zvi}tPdF_R`%fGD4{!Q@F(%^5=eaO2$S|NNa{O?;&zeNE6Go)X_|6vE}G0x-Nm)}T* zkc8|%ZNfZ8c|1P;jZ%X7N0eXr>|>P2W0&73O?ZDq`89-jjPkg{{u>3G@E6MCe*0s9 z$4!Xe0Bq#H0DiY59*aIM^L~pWK?)p*Hk}_AeUA|y*F?V&$fzId;Exs3W5CBH#&19h zn*R;p?}Fno;Nyh-Hy}0bFTh9X`XAHx|9j9RNKpV$-2VaeSBCl+^xxC2-|q>r78db literal 45056 zcmeFXXINCr(kQ%eh!Q0S2}%;lk`xApjEDq5K|q)dNEp%}afE?Uau5+wP!LcN5CM^l zqydItLXaq+zzix`W|XjmVYrLE&)IvQ_nhy0-{;<+_bwiK)pS=^S5;S4cULbP3m82w zzz8q{0B{7z&p-O(8XW*=G5`P%zKQ6#6<}c;4kr*{=l(^zyS#V z|2_W~YoPnQg*8EitzD8L{_N7ZylR-Ck0_I%nf`$o{nMW`6qI}DnsFI zU0(S4hNaW-O7(VC&a?Z>oj!8hLSLirpu0^}P6F5~zWpx0zE>Q*EYoMt1mAWf%5`8Z z4W{(!4@)>`oqzw7kGJt?iJ#kRoi#Sx#uduK?qgxA$_St8c&pg57YwKG@}1be&y&G; zOr~xpNbBL9%vV2GL4n&d85Qf6MbgKV3l&ArCme~jx&JQr*?<&cZNYULf(%`P*$`+KZkKgTY=?L$qUpRF4uXvpUF_F#a`zc_C6yYmI?kmG)a{E`)NTx77P ze~7Zue)<0#_Wxop{hR0&w=Jy*DqIO$r$2S(M$gqr#M|Tta@j^OC&13GqTIRnUWFfN z(@iundcu7nwpe#=eyu-m!%Iass+6m~_CU4?%Z-ZAGgGD=e!F!|O#D1f32n7fv!@Sf zj&6)@Cq$n8b}7K|Q5_{61&mfuvqdiXS%O}H6hFJwo+sUz2X zAHQxmqyO=l$Zs1T<_q^8_p?i%zjw<*z=vog!)orWT+usd!~O69_Wd}erRdd2#{lfJ zE6113D8>-+U^2^s55Cs!Pzp zm($Dp+?M1kxu=};rpjE(Lt*peIa9tUrT6@`xvD-wfiXk)yq4E&jYprLsw^+2C2F! zaeTCip&e6fl&bD~h1_2sM^NKR6vepp_w!TwflxCVE^j z^EW#eWF_*&_ye)b)dMMaNT;uRa>&cQzIa&OQ}3ZgR&Sp{$nEO826=6P=d!QTB<9p% z&uY(BSMqfSM6AA^&ZySU|CTE4rWgGw%jC;U*aL;zNv~Jj-ry4DaGH_> z?YIs0|71Ak)aRXZkTFO>hQkS21Y|gWH<=r@mgp%Jwl>a>IId9ZFUK0D7^>40P%i+2 z%}SAO`3WA@YAK`J-U0;Lj?c?Q?e!WSx!aLnqP}c<4YL1Yc`xsGu=$?sooks5w%^VV ziqCA-`kldYm2jtP*bK^&*GsT_fuEftV(K=}a&>^QV@K-bds{0ESP_kig6|Gj4j-Kz z&cBBiO@1>D7--#)HGf}clW4^(zl9Lo#GL02w+|OSdr$wc`q5di-M+H)>-JOOrb59( z9~zMHy)TmzZ<~Ta&Sk&_12ycV7>%Xr`R>Ou##*?!z=N6}319Oi?=rstZ#{X+ea3c* z_S<_A5C7zT*3>sCN*^mnUk&f*NE;=+zo-B3+_NZ#w!O4ddaub3&wB2FyNh>c`p9w5 z4AbW>UyUW(t?)dx6Aem*zg89Oo*Tb%;7Pp3$CNf-zMkIf58OO=K}{tITm2~gnuV6T zhJB*XdXmW{xf$_=D8IW={;@wI=Z@-GGJEx1MFgHoZr=$CxDt*7`apVUr^5iU9P zm*{lv+-75Bp?rE?8X`X)c`@KY1*c98x|>ByozmfG)AsiAO7QGgacH*xJ&@@32Y=m! z?7@#703d_ew}+6>tNxxL`vK(69m|Y76|Tg!yW8RlGg?-I4U@(l)3-JUFBDsryRr+q zS0ZldJ{d&LsLp&48*=?^<^EOp&l55+-|LP#5L@eH9wmJ);C4Ice8x_8z3)^M=5aSn zOWI*9IexQicz*q7=|`ig?kN*yiDVVMM*`Qrb>CA~G7Q1jrmHNJRSX{vTJ-%UB8yox zx^1s8xrRT{U;Idt_#_Q?-RJxC)VD`3nu!u2Wm4;~G;Ws%uD%|k1AN(&qGDH7Ym<7I zTC)>sr|n-D9j?6~%}S|$BdK#js`!d0JI40on7*N;aT1T!S}#r0G3e1XyYs?3h%ayW zzOTqxco6CZ#}NtgFNtnh4AZAw<_fru|x&6j8!j<^{# zoX;hq$ggk5Xx%fbBM~-yvhjKM)%%{zq8ZMp3S)+kIawRht(}_SJ4GNhuebP1y~d?S zufN@R-4TB9(TMZCGUOoVIqO)vh~^DuW% zq~@(K%MxQQSZEV1i?+w@=es-!Uzt7B1kybN>zdBFFQ&w!&Mf$ywm+F*=OTqJK7h(T z?!b4;F}(aa;k03sbR6le+%dG=@#*6ymEXrd-_$zy#$G(<>DmupH+`wl`o5=qkBT>x zGilHf>fa4SKJ~a{24o!D2LV9vuYeKaUi2(>^t@vkUMTdb;e%kVe$-9lUu$J0=}a$p(#?L{dzP_Ob@TbNiPCYBAbh#` zwdEL2UZP~nnz5l5_{^zvYO|Om$P1m(|>cfUZ3>lD;HEy^qM*h4|3!5ZOcoFz%-w8YrR)b`Bku- zQ?jLlVeEug47Ige&%Aq`c-^9-NY$ES+f#@0&`1CEbKXk420!NqeX*)7_uBTp-vmE- zEk5E98#b+aM=@)!XFA68G_i8JxU^`uzm@v<=Wb{09mB(yPL@nH)(4(=6{piy)NU2f zd~fud^tE{Rrv=BuzFse7;d=KVh{YCDe0_eNA-88hUruM)K3Y_4X)Ib)Xh}0#v~9tC z_R0~Xz}s|-32ZH`AGuvN!d}}neqRP{8W)$lO|&Fn2CX;W;?T{$rzA${hRxTtX$-@KQ+8RYHy*c?!-lmW3Yw`D+pu3O&)v@@z(kx zf~6d$V5K?mTl4p`sLxNsYnLixD<7-N+Z8k_Yw3T+?&Xf{wED*uO}~68%;KX~U)W0P z%zdUo@u|=-+4%`X-A#ObP+F)3(C%DA@$_?{vi_$arox%*cZdl9rbYL&M8B_cp+24gp31+Ms{8S4#nBoq zV#14;q==eEp*>{7h0VgAeXMr&88)qU9+|`FZd$3e)aDAwQrPRwq@;`uF5POnqOV_U zPqNjse8`+SQq_@n|MP|wJn#XxzJ7>LTP=R;TieWz8g6r`e(v5QqoYz^&DEZ|KBrHX zIwC%F`4ijs&6XU^15$ieiT5b+>Tzg#3ZH3=D?DbACZKN0Vy zp#1ZW6gjZlH4jd0(;BDEODY@=QRwiJo8vWlO5HrT}`AuwZ3`bnqc-pSM|8mU##{G&!5lspG_`f zsF8m8{MXwrC$1AapWU~+!PN6)G_dlDXXzjsINLv`op04*r?@Tza2SzCCT+(Ymjll` zzkJXAiuPdq^YamT4FTj3Uei0IXw-L9 zZtH_JB4&xnkfC28lJYAHS)hBB($j~J`S}qx!;i5^+(Mm|YPP&NsF%^HWpu;ulu+_d z^L5FyqAZ862WBkYOg%iiD(9tlDXr1Jiy>9f2W@33mAqO(f#6+H<# z*YYBta}##WPi$H%$#{O!>rHBZId$@i;ne*ngBm}dgf^kgx>Z9mh{i-!^JK$qQG`VI z+~?>sqOK9BV@+Qq6TaJxbd6Y;7jir>&Sm-hP_^50>8Ft4YEW$yXZ^{aSKEv@T%>sE zy~O5r;!ml&_VFZm6u!~B>3E;)e9=TtGRp2y_7%}Gi6M`!lS^zVcl7S-9H1&P?8+p@ zd}ZHJ%Ij6KJr+4Te3doC)$}_#Eva@f-D=g8g>RD+adU5Gs|zi)>r=n#GUdg)-H=ZVmVcBmjaSvrXI!ZVUt|XzZICt@vanXuA9E~v8`d^ zY9FiCwB(zM)|NsqrTeFyyAG(YbUn;gOR};(+&#n`vvekrm<~m=@LXLc-fL28OviSB^iK>^xDUs$NxNQ*AtOc@dwg8JT6LGzk3Kri)S~|QWo?F|>=zyrV`X8nM|63~(QHH}t9pddabi~&Z(Yo_ zDUJSyt6w+ADu(Z@)v>+5>5*-EayffDT&C(ItJdz57oG6C?_e{n4I=aJ*HWK@X~D%0 zdiT^E)`I~pWs(3^45rPtxpV@@NrMSP$%!s>%k7Jzw-kenSgWpMo7^?`Sc9ML zHEMW=-K$BAdpKF@A$3ea_BeO^ih=WqiHO7+--h*zcNOd|pLllp`Ps?|Sn%nK$gPgB z&v7tbgJ*exx9jQRbn0~mgwNK^ZinnbO zli{}b#l5G@_jg?%35@8?Bu%Hz{*3#|X<&~Wa$!GlTI=+VAFv~)@H4f>1B&d|JbJGc=`Z@E9WLUdKI7m#^q^ zSTb;+4c18G@xoiZ<}ElY`Xs9B^7u8*(g|ys)mXn;t3o&R+tNFi;}^&$5jftnAGvFg zr++Bc82G8?`y|Zft_{u}^uj3mI=nT+Z1@umyWX)Wws2_*`06o`qZRxRmE%()AL@SM&XsJSx{NAiPhFy1Mf8@J0=W%$xBD~2WV3f^?p9mctRsJH^-mss#Ii<-Jtvrb1S`SRo%6#mma}gQKczV!e zSZ2WKzAJ0BvBBf5+mH9Ajb2Y*ta0vU^iP3n=iXO)aDSpD=LZ-3eu<*L1}Dn6tKhTq!lKz39kDs5v>Yx19H+25FEb85n zkQeyR0%1{J@;?h7$=80HsxJ7_P+>r^Iec(yGBeTOzBK}F-|$+DgS?ZFs(=WI&~jM! z%+zYA*Acj`^qswJ)3HyRaoD{K7u$jzX|M+MeZb6AMTcqTBH?P$7{Qz40&GP zR(X|CS^M?Zg?u+%1Mg|=kO}v(kMn>WP}1-oYxn^btMWDGO3H=Smp!|~vF}K-nC!w$ z`<$rIrpES99)!03uVT2VG`GGy)s<}3h?r54rF0@o=8sk4^u@6}p~X8YDS|=8PkPPR zuat}RN&dzwTQ(#`QPGXBjQSXFeCeR6=NayjQTEwy^iK|1){*2g0nRJ1`G>1vPu8eQ z#UYYKIjtca35LfkrFj%Yd$6ODGDjcZ7`|g+a*X2jB=7a<*bSC5M~|b<2$`P}YC{hj zo$fwn5&!MH?=RR1ve~EgzAD%4Ea)u2D+}FnWZijkD>nK4^R{JoWataU z?+>TozhAMDCyMn5nsLEZgt!AECDNO2v8_GPezYga&+&o1dY_k~5yx%VdY!(ZN*A-G z&+&RK&x(&i*&V8;>IZZ;pP5NqU^tiRCvDe!|Au~^nDNmv?!+oDvLD!d-StO_Ma)K* zw1)Po#l*{aF!lT)#ip_38O6>>iBOyKQXN5$x@}@NQqoMhbAuIUBV{{}-N;kpljf2) zF&|z>1lxRGe+|j8e-_9O=1MY2J%9Z8*UB?5wCC>KGPGqs#n8NPw_sUK$v~S`=B2es zhDhme83adD#uB!T*r*YcNVfGKSsfQV+kcklw5Pkbo*jrh$9yE^%t>!V?0EkD3mFu% z>p`)((l)J(;p=A0Y=m>?&Y%`lv{gmfTt;Mf=>t(;#E(~`M>5Zd95HMR@jmoU8BbT)~>%;I(oe-g-ytooFt?yx_>M zYWd^=s`_}He$9oh{*DbHQT;zvbNJgmd>j6gZM!AlQWoFB_oxb7lIMh*i@;AC6-687 z`=-jVs29{oSlzdtXn&V2@xvnUEiDcex2JAdIxnTNy7yGhFR&cvb9u)1;q)Ec`w**? zn|i{_fgGniv-`eDYkjuK*);m>Zr^@(4H3hlH=!7t>_c=!7E{Y7YrcyH~<{r_6jz=@CW_@c%;vH2MFee>Mx^0&Cma%J~GhV${K>9z6T!r zp)L05?Bl%h2r+_m2^@vsX&+DX{W*sCfdQ92jZGj}0fNPR!cPB#KI}|LxZQsJ?{p%4 zjI1CSq5})|bvt7N!NL%n9E`Tz9}D6c_SnKCCw=)aD;Nz!n1it@g))1DdXbF}DtB1A0Uc#DT?*Iu{2doo@hxNj$U@d>r{Lgd$2bDkP{&#uz>-MGHpM`&p z_jg@;0{$F^93mW&9L5~t9MT+T|HP8u5a3YdIL#powTS#VZ~lL^|BEarz!B2bKehUw z$J+M*6mS9J1O-GuxWSNC_ca^{T!pCq-B13(DZLoID!uuCvD%aSw){^n7)=-Rkyjb-#Bw#J4}h zw`Zs)23k7+zz7|5H5ldX6Dn~+MMX=(09wjCCCmdoloTY~{QV_5(e)AZinPbgk7-3EZ&%)Pyx$9sFbMG*Z@0MOyTM+8Dm zFeonou(aHd`p_3-^mM`ibnEduXL@uZf5N78$DjbdUw;4ql28EsKXCSA_5QOTU+MmQ zA9#4+pBQw&L)brI;K4rX-V|W8zkB#UdiII@9}(E^yw?F7Vg?@20Wi8lz=1<_utRiv z-2ikgNXPI8?R|FlAG!lDdIm-&W|o7j5JEi{aDWa5J3tR(V4#N-K^G0R1N4U&xR0nB zF!G#pV>)_;_r&$gN6eCj)t!8{AIMT_?!mDv2l)jAg@mPLWRJ-mSJ%+gI(bUl$k@cx z?6mnAyYm<99iZv;xa{fW?St|S2@MO6z+8=ti%&>QN=`|=optAKcFw)~xsQvU6ql4f zEqhi|TUX!E*z~fwi_qQE+xP1Ao1x*6(XsK56O%;J+?TK4<`)*1C_jF#Z)|RXzqa?~ z+L!a6&wrE)QUcupdU_Z=)4p7E2O{li5rB0Nl?2GohWdAe4V*j@!`%|#LPS}qdh!O5TG|^?v?Y}<;UsoE%zsC95bZ8Jeb#boQl$s{pnX#x-s7*@?sKW_06Yp zl`3C!>5}!)9yYE;{-J*L=Rui0+>-FyI){%R&2|r>Q~zZ9S($ne`GpbLH^5>+eoL5P zp3mu`3D>>^vnYN9G0u^r{AK*bj~A_Sw14@yPOGuyG`;ES{KLARoOJ zlMm7O-xK41Cajn>MTeL{sMK--wKxiAGgv1~DzJ_XL-iL|>ztYG&J8jp$mwUOZ9khA5M|IX*`qO zdw}F1-M=Hn=*M&-_^b4^Xde&)G!d+25-}rLJzd-!tgHZ$1kC~%czH7W4xDM{3}%^l zS&iMSBizPUjCVKyE>{ZLzNTtWyaXsYsT=0ssYqgZ5_!bI`5FSb2N12W$M%57A+rNi z*}4Im*+UqOcfKoQTUL(^(qz=xJ>W|u@B?uZ$JQ>oP{2Eyod1pm`x&?S+eoVZ!1ZuqzK2hXz7It`tr5!+)OH{Q4yuKOU>Wgm)4`bn7u}Hq$9wd6$!jSJ?P^Xa{@m{wZqKGr)@0Jmxd3gEsnDjIud8Ny zvMVtGqur)oru4n!@o&zqa;&u=wP@}&EA4ZyBthjlaUNB)TBYG}UktLR>N=(%Gx{*htcBXz!s{=Xcu+mE= zx}D)$0o1OyHM#=b;xE=xSH6SAvBdCLebh~%wklP~b zRaB{rJwRO@zTl{1&j)}p|qGGE=e@S)S*_QG}gR3M3VXn^I4H%rOWerx=W@LcN*;;8x@QzHGDoMoc2z{m1W<$l3=X}Fphm$C#W{+~{A z*cD@sTd+4Yx|pG6*;R28XS%B}h$FRG&71>8C>)TBZzVf-<2fXU0^nBJ5s@VsoEcbH z7-!nra5tWP1I>%^>!ophXgxTCSF;Qv*1oVs8! z5zW1iO=o)lA9ip53W* zKdQQLQ{x>of_M~OB<78)(AdTlGP#|3Bt~&DqH!`KG3M~J&sGkO0kR$z5Z%WvlR+%J z(S8(6CtASDFaxI0WO_J&X|o81MKRUu=t4roL-`b>GA>ESV^9){%t+0bV~^g`NI2I0;)qB8Ht&tQv{ zpYsgIa(JT!X9Bh?cY5w@bD|q>a;qTTco&)qwGZbXYo~ za>^dSQwn;QP^AakkAcIT?et_hvO`x?RdTLoA&wZJ)uT173^vZL9l~VU)sgeZ?#>~2 zov|m$jW@nyb%-uV^U*LJjLgWY|6$mQ_Zc$GlOewLnG=RDrt|fpkDFJdzoAo}@VspSdPZzwE{TQ(R^jp*C z<9$M>KRX)b%%kXG6SxMr=N@3_Sw-X1#8WFFm-XmHuFE(H#?oClP87!tKJ(sFYv~W>W_Mbb^ht?}9ah{$* zg_fTgX-C zES`d362AaiJ}TtFTGzY;@x)4PB!CH5Px!up=hrMod-JE2EP5@CiH0xzI6X(+_&Qpj z*cv*ulU(@#RzyLp%FUt~7POgCu@|a9m=Spz7?PzoTNlYv(!PqV=x#V~0>mYl0UEt%+|s(<3i zZY#q3WUJ~Nr!b;d`TBfU0YEV+jBWLN-bYCzH1a!frf$e7KOR<>FRY!k(CGPAvh7;6 zd(Zz!YvV|k$+%Zd0ZEk8;r1(7D;T$9gFOja6FJ$j2H^cU&V+U*vTVA{Fp->e18YKY zAZ)sdP)?1imu)Pw)G1z?(k&?S)Vv-LMiLmQl+LkgAa4{;w?#xSW}>b{aWS+pZQdel zrN+o$(q{V-sV3xk7qFJ7s1cUIgVi2tms!YZSNx#fn`V~3rd)T=e=_vuBeb=2B(;H?MC$>Sn8~kV_3-9kLCshK_dsSp4g`2 zNN$V^O_!NMj={~UIAm%#d8_nW< z2v&?%FYcfQf(gI2J+Sb~)5R-OOEX*w8tKR$H4{?&?Kh5Jb;~`7Q^>Xb&3n!5(oWA%%PC2be{ZyEAxOb6li*f;_(& zvH4LGfU%z4t=3~(cG`-?suoerK*Tdl9LXxdlY|BqBXK==PM3btXC}M)G3C%U3$tui zhx!6trvXCy(dMDwZjM{=wiUs|%-YyNf^Pt_V<8Q%k#FCh-~^=4eJniZm6 zP>$v4m7^r-XA#OsPt}vLMw(e6WyNU*o?pS>Z^PXKPp387Z^2csdXns(;(nW+Tv7wV zn+H}s$W?^NzKlBdklq>A^?q-nYk~&}nPRVnC2$nMAq?nwL zGVtvyib6YUvx}^C(%IW97M^p0N?6&aglP6Blin$n=ukxi#d-_*mW%q@zw{JPcpj|K z^Q9R2CbIX9br*r&Ll_IEQK~qF$B?3%m_x#IUMW>#N}sV~Y1 zM0!qOl#kxT6iCqhY)n9Sh##OTf#c6&hH_Zv(%ycS?86DRYoJ`kDc0RM`Y9?L#LXdZ zIGKDar0DcChvr%nJ~Q=1gl|PMe5>{5D4fFyfu)2=_JX3Qu#&}wh+7w39f4?3^3-M~ z*{m1AyhPIj9jmAa%1j(w_i9urnAF|O)E#hq&VszVnO8FDr!|&KtwoXbQZyRUo}#v= zl*&X)c6rVXZ#=z`>$02wW%lsVm`@1c$q-z$s!~G8r1=vKYSozUASfB?(w2E^4sj^y`u?+>Q=0x9Fg6*=>q}IU>5HUV=H~ zViBTdt#VR~^c^tMJE)y`e3l~J7xz3gAjTp<>~h^mCDsuYMtu9$1SL~D7Vz9(DO+gv z?)d}O>@ib#I=DwA#*Q$}2bPn)UB_A>ya9etyUz-2eE4Qsn*2Q!?miiF0?V1xG1-oP3L>p2 zXK{jXy-DNk&5RPq<>sEW_X=r~C0P%BoxD>tEKN6cqUEi~@5`Tnxj(kqVHbusgKA?Mp-?7<+)&6C} z2dcrQkdj%_MoXTHz@-44^Ubki`kYz~yLt62F|vi}(;O6m9(>|tX?j46z>+vGR&rK3 z#g(g;EYS0IEipH`ppY1C-7D;nq9=yQyEIhK0@v~`0nLyt(7x50-UXA_6=|RO7?T)a z#eSmyF|aj|f)G|8h->F*qs;V}k_)fFMN2;|jyc2}r66wlhwkd4H_{f=`z5Us@+?tR zVY<$?%?-orZ6|s~@{P{5Hzkg}nY{Ytyu@Jv0_ROU$2uY@=HQ177Obyw-2mIdrk48v z(YhzVcJPw(gvnvN*GL_nVH(v)UbFl9`QztLlkb~u3q3IuyoMd0-2=F$_W;60Ov{{i zH&WPV`V@JzB&!mGB)Vq#kvTVS)>81DdjQPu+(MScBNUfjLI}}h1dA+Rc>N6g*|Q~V zFitfQeV+dK;-|%F@xj&54!RBR)EI3nd|W+ZDD)THcf7GVF-5eg6||!`oNZ8Qp3ay1 ziQcegnE2?-p4l+|@@0zSW^cviP-CV6dNj+5o?m)Nm;;}KZAevkvD$?2fwXs|IFxHX+pKNIKE0K9XcXIa@O{$Y5 zp0*|1Q@}jds(f{JU@C`zqpw><619534c1+w%}sEIh`Twhn>D9UWQiZHQr`m-C$|#q z2<;q={UG^Ig#yD6H!;K+(KyIet8_8OI*4*HDxA1re=M-x%q9LrR8_fik z+kdFL+eFy|`oxo6bt$^|o>TRoWi1A&YFDoMRe~bcI~@6*M4xMNpvWd4xJ+J}BWk7B zS*IX4!f+|#wOUDf3|M(kpu~5EiFV2f!`h2v)nmkJ@0RNcIQroPY~P>SC>KwTksQSN zvEv+@tXZTtuRmVaV;Qcj^lGc7*q6%Oq+3GrA>0SJeG#?uD^=t)qE@%OCIz`d+(Cd- zo4&1MG162iF#jqM&x=tbp*!-FV_g*}@!HBstHL{$6w|~>*E|xM!)MslWziu!pd6#O z2iUzxI&T@%4a*`US9q#0;yv2ytaF^~m;s_g5>{@f#mH08s|K^; zNi+@$M;zr%fTjp5wB_%;t{hu0DJ90!Ry|I&e6l zoJ>XXJo`GHZn6C^xd@YYTKIc4dt(xn-0Izcgv_!db(I13KOAJ6;eN}|C?Gpn7 zdr0D<;P!Io6^!%d?U#cw5?~dHsW&45C(J`AfRlHYVT5-28t@X9orFuovBt<_d55uz z6r>@!F;&kQ$}HP&%M9>+lqCnu%(Yb+Ng#mG>`SNS|iNFiU_C7L7%vuqB~67}EILPD(QQsw&-sX$j|vzFKX6(`jlX^TwgSV` zm!o~wsk-3~YRETx(@D4T%XYA%2Agn!nl~+ zcwH6$w9%6VSD}%L7(uE=134$DRjc^(3#R@8D9s$}i$zjqsFGTfrI@!CWX?pa0C8s0td}N89#4;v z#$KR^-^2#&m{|}DlbdfodW2D%Qyqd|D9cmkpsLn=z_M3h#_UMSe0zZ3gv{M13lD{G zhjH$2tJ2h{lAs+iJ@_p#Vr78rOw7ojiGnA63b;T@CH8chHJG%c3!Nxoi@^~6QMQpZ-{R~pHi%40^F>8WIL7@@jRbUGWv z4`HFCJd0~ml@msXxV3s{c8vw&Z9IziLL}g8p_39PyYDs(HLXnHqvb!wYK~xy=9J^w z)uQlmNIq!66YQBqh<;qMcRfI!Pf@z2cW<`<=K<|nurP3y;xXsiGWQy8@EFV@h}XHK zSGMQPqSG2eFy%Jj8g%fT;dge%lLPC~A!Ph){aV~(^1B$`F%l?Q4B{-n33x89n=@ss zug6tv_|vtRLLV{p50Zm*7bleu1!kBJXEs2mOm3yGwFy=el7sg}gE>TnGLzm_7|>tC z`t6uf502`|_)Uw0dE_aQaz;is5=KRW&XlAsy#rv;6ROQv45LyJdHKx#@uSEI6JkOh z-^_{2>uwjPXx2uAkra<;M5W<)4T_g}P+DXo-8&D(-LWDA)Z@4$$M-lQJ9!U48kOps zxqj@`t=1P8a>O2^yxjvjjaKwjd~j?U?2^RQ3<5ft#!Jab+fd{oj>jnkYUGFMiC$W8 z>P@P|D0e%=XoIgqNFo{RpPeo_Vb2eeYQSiehdNEP(7Jhqy%J>l)Q)&fuG~~BY*=tX ztRRZNUDG)ZeF^lC^K~I8=g_Hq6UQHhr@KT|CYxnoqtPgG#9UHByQsyiwa8)&yEx+> z;5)+$X3h4@cN9*-dKAkrQ=LPaaS-oN=AgSSt6Te$n8GMrN31BScnb8eu@>wq6=9(?LXjSsIyMb}QB|l( z&(f}1=&*dccJ%v~k(KAlfvsi~VPz)28W8fgBV(=RvPpo#BBC~_?=w>$vYzjqI7_=0 z%E$>s?u39LJz`2^Tu+qcG3?bm60#T1yo`I%kVQ@m(3#`NssMxcfLVu*HXT>8LUZCH zCW6tdNK^_{+3o4l*kncHS-Ov))eOm@8-2jV2{SjV)t!+#S-O(2!wWLaj@9m*M1pui zyD-JO$9mF(G!ToGuE3gosLv2a+_a95@Ii25b!M3Z;Eu|v8iE`dl&C%v(??HJ)bnm} z7PgA)D9}diBXd^v00n5Xqc*whWYX8!W=4G9h9j+UfVfV4O-AwxE|61%>a+VyU5#i! zdE323rXHnet;~?DIjBOQxi|YjcY0KhD?rxEpA|ne0bW9C6ayiyEW39QC`)<5v>-AW zUyJWU@MD!eJV$uH!ug@&g0L6K?~6&e$(%WKf}S{SJkqS!`lc%@il#a7(N9c(A^@!d z$>OyFH@?e`V=ZPaEqeHA2V%F82hFqyZ*}z+V2k3D9-HC23q>}bCV>s6 zloHmiwM&yTdNU4!k|fuhC*WiBq{aE&2PAStS4Qe~Y3c@aYoj^d9ggRqDYDzH*-)G? zi90q!8kJ~E@UC|Wvj36D?nxLY(QIXB#{;80x0TWAM_LsiQE966WZ6{>*=}fa%tFBv zRt@wteQ3wQDDus|%|Nns=L|cz)1wu0xBz zRxYBug(O@OlsLw7V;L!k z6>lOUX$HE3@D9)vMfAO}B;u1badq{HWuzuAB(qPbE{YLxtR9E6kOtyu^c1F5yEz0OmK8La3qV_qwOWw-h#bYUtQO7)jv2Ejn8Myj z`#5OHXhZb1YYf9x-AK^Uk@XD4hL9NM%${yByg&9&1FBk8d9ffU8oNx^mySiZoIDmi3N|)SU+4*0bX)jXO9;tnqGg z*o>HYi8)z-SeUM-usE62CF)1oO3muQv*{g#a*YD3a&E;aasIkp)r~!X4eLsYNUT%l zr;36B#HeJdA4$iW-jW!gJ#5l&Q`EY*UGE;Ozg-#EiJ)m3kvW|x%3TOLFs}>A6C;Pc z2$ol2aGP!)G<5PqhehRxP5&E@YX$5p->`TUm zK>KwE65@vY5S5Pv(=olmjKw?0!SXKQzOES&KRwx6aCf#m;{X^l--%?0Yf;L3v}Ty~c)_>C z#-w&_0}rC587U`G`UBj2=fVtV%PH)WNkxpb8q8X=E;Ki$!j1T<2)}WbBA~@GZl->` zD`~eX5a-+PZA^i#2fC4I&?Of(fQS@YfR1R&X>|iUzMSNr#O$QGUsN^X3>PX=9}Jic zKy+`bEf(;R^$JTToEHx{A)OVR9@un}3J+b2skC95^QQ6&%%WG(6sx{7r|WnqRfdfp zaHv&TVqszogNrLwd^F}T^&+J&7TPq?-XQsCM_j<+xl8s1lI@@cN#0rr=L_x-#v`3T zr>!1gT&j9TlBoq#^Ki!rd;oOR-vjEIdh5x8gL=GJhw%*@Ggb-wOm6AO9Y~^v%~ml_ zfm)kYEfmgeV|my1tf@9it0Qpvi>JX@ERd zRY5&j4Z8Rdty#eG&PF`X#w)l}Q7@0;_flLH*jbz@kQ-#BaU+EMgp&v}Vi;TH2|{=CCFEg$$V+DfIM%wyV-&!& ziq0ZTG5p-zMJ$5{aZ$2vC=4Nlx+V?>EoL41pp7*K%023Ff^<73U_@6fv?VabdEwJ% zSYiYh#7>c`VyWgteC#ySh#rp%)^VuSb5>M*Ghu|va%C1&M<2wzHFwsv=q->L(No5} zC&I5*ftr1GBmQuMV@%~#dXRI+3R3IRZV?nx=Zz>>1S{O3BxaN`z!^EZ&h(*x(4%(> zbc-g7DRY7u=9KArl6dN55Lq^!YIda<^yr1o32Za9@souqR8Hb9x;s)6U-~wZZbyvFt8tP(7ZU9GD+!0w<={ydYk7T9VX64V@CLAQ!v zYp261j<<_p$~E|AAEanTBTI+WcTwl0?@dK3mk^OI6mD=N?Br(9GFAjYcx z=;1zg5YUqugqslefI~6U*|^$~8FqLyQ9L1I7I!0j+lQixM#2S)qm{$jq!aK-whWT87pDUHJ|xBzf-&rZkr(# zx?iqdR;npCM31zY!-`2|N7;w0dw~yWlQczGQ~IO2r-?gvFsoY^_P3ahl0QRD;l@3^ z%CLU$h1~VdfnVNM1Ys$fDfE^yvj4#uU8&LS)ytpO{Bf~?x$(%sZ%m#%!|5n!n9=qV z?p}Jg(K4mm7=GkmDGflrIJ5XyVBOz>KX0}In2d%}jo0QW=q@?spV-}mJDf)CjER~t zT>f=m9eb^U<=6jBFDFjIl?ZY~K}j0-q8*=b!M#aIBKxQNpG5ooYIv zQRy&ePRw5z?5w$mT`p`-C6oHk3$&KZ%~MjV$cS;jsQLVv>hh@TChw9s@v#W)I+=hL z$T|pSY@7JY%I7(H?!HAugUeiX9nDme?}i%33PcQza#JLrJSP< zo2uutNEBEKNeU$KoMD-aBM2+-Ar$nX+II4ioLrvI+%M>HeG*gCsa!1#P6}-ySjNEYy8G;br2DK5 zBE&bJWR3Vj;yS%I6Auw?Y@6d30u9f8O<}irLWLcACl#^zyKU;oAXux&uVBx+kC`SX z(jy~t=ELaEEW^ORHUlB%0JAO8v5vhf`bJ_M$zcWW^c@Yv#D1e|-J?3V;@@L`?Wo1jY`Sl&<2oVyhtUCX&MkI2G7#7Q2Tot8i$&N~ z2;(cH;tsNyv5Z;UPB4`ua_umDxf(A$i%@b!_6Q-QhwMBN?I}b&OQ84rWaKYOr1K3_ z^+yPAeKL|MiOM9bVunV>d(On%nXR?LhZvrb0`V=>43{lnNHX~D<+e8%H={>Y@7+U@ z`f7C=-;{ZdQy{SynZZw}PRu87W$jJQgY&s8bA~2J6#H}nnJQZ&=@+;c?(%8gutj;S z0ryX(lq2-1DxH?8`Gl#_RYJ51scz%hvA40D`UTYMO{wj#XPi1g4HXcI*^5zzCRUy) za;?F>M&wyy{A4I|4;HLRqETs1`=S0Ab%4FJk?l5^B&aoq@%bY6Pq?KH=r8G2>|beC ztYEmZ?Sur+4#0Mx)HJk>?SuZQ9ZVEj3fgOX-@_;E98p(f`0@iNH(RX&c!sTs>q zzJ|nyMl8bG$=0ZuS~9$Y!7U|kgL9=_G+j5$xz66$LDyWF77#5*nPHlo`=go}hQk?(3<1Zd@^`E28cH!AL8(Cxno!!2CW`Je{GUYEty@Uz4|=!$M>^bU02 zBja1g`ewU35Y>gb7;nZLQx8xZ4`ZJ(oQ zC!2X~^AXK&+82E!ztV>od!kqUTjul!Y2ZL|X@;UMX@m_tD21>Ry8=1!@@X%OLd3L1 zX(^&0O;dq`l0o68PZ)@E2SvbXqGkI=p7X-$_~`_;~Db)9P@GJkDm zX5?633pov`o{=Km&{(!N)oRj?W`U)uhz`t;y8j^f>+D^qKlJBc*MJ>;r}39j3;3bL zMih6CwH2A4$+|Zo_K>qKA|^+XOBgYop#H5X2W*K}IXR%0gjdH6Jze)4H_sd9>7FCL zvTlW!S|asGZEIS}ZM!4dqCX0@vgzE2vwyoHYAnxNl0kD65#a*!yOc;kg5;$XoiHR} zf$rGx9`;611-3syshuW)FVM6kgxFd-&))hHXE@tKS_&m3d)S5be=z^$;Y8mq$Iga( zFNfQH+gtx&)#4y%x5+)+5z}^p(6_S`O?rYxLVPLLEKzKMFm8~2v2`dQ^&HrqOp0t? zvV`OxgZRCP7#u^EFb9%Ut{4K7YpM>O|9R_M<^pFG>A%?KES zcp1hmG>ilv|4@p^BYS42_$$Xh4#&f&IW#xz7c01ZM6h#vG%dbw^!=t6);7OKz47?1 zmG#TR1cxFVOAm1gtWyXL4CVN13%Tu)RQx0bKWjMPVbWpT@EnbeL+~ACtChK!h1w(L zBuvO4ifto|qo4r9I8W|utiD%NKP`x_SG}8lk(;ysOmFu2${qi~p9ND7r(HQqo>Zcn zVDRv6`Bz;0fANxuiZ&9c3A-jfrq0d8)w{5XnU z;WtDIg7uK3(768Ymq7X8;v*PhUi>GEsNb)K1JLO=Eu{x-j&yd6? zrG_IG+)TrfqQ65qb&F9y9 zVli;OhpGv^LXXp=e-#y+Zkc&+wSb}kdMhsW1<3>qAMfUm$WxSxxD)7UBnFbS)=}GN zFK*o~(FZXjHTWE(FY6#ES4sz3bT}2FSWeAAyy0qfw4^?PV|99f6;qXXnT+r6==kft zdSl5UobD5jOAzAQB9#DO<#8XOF=~o}2yh0Yi;~@=v=pyNtXPh&D~+~CufW~VSpX3R zYUpn$F7LXaB>xVV<*>NAdcx5&|Ka;U>& zGc1S1LPDDIRb4t$2-`wnWGJjy4`gIC>)$7gRz0G0I4%tR zrs9B{EY_{o-%Ss+kC^!IjOojVHGcD{i?b=w?Ypz4W)#{uS^Qf)v z!2uDd$pmLu(!LOPLu5~f7-r0!T3uQb>Vx#R6PA-`aH4b%8tPK0A~=9gE9v|Q1}6U$ z_+wxroWp-eH##E?S?aW41G%r~5G#;$^)@f*sp5TS2H_wpyuV(+&mmZ)P;7(Lg#3!N z{g|`b{e2qrC-*wrX)3kCr*}s1`i_vea%9}Nt+7w!^(6Rd_`{*#82;o5zmVSYU&OBp z-oI}#?RNAnCRwN`!}vhZLoQVnU6r=^ai*o<=_qI-NR_!tJbUH%DES#0=5{>>Esupi z9;CWSN9|s6_X!3sd^lWapXWi^mGszR?_@{S_s4y4EY=JQTEzE9ViD&PLkvfGS?L5tm4|MBi(a*B4~DoUWLsaz0v4{L>m>aPwp(KpUiNPJv8r{W{# z3$~H`q1S|hvRn1p>X9g0+TBOJPm^azi5U*t*}WVS4=i239Hj-KTkcPAky4dXDk-2W z#v;{qPP!!Y;v;MgU@_XK>TKj+p&XTa^QC=XzDDy`MhJXuFbW!*mXubzNn{l-JyM~2 zyJOJ|4ToJit@g)rfzFYb?JIc+WV=+g{k1blW9Dp>ud29Gr8h9tXRi`(oELtU(tTaQ z1lPyh+`g)ig(9+C;?a~Y2O-{48U228ftn|{dtll`8!o`x+o<20@GXdq;YjtJI_kXUYmINn6!!>c9Z^$Hld5?m>G&a zj{9c|hd~@J=}e`(7*yV1S;OyHar~);Hq58AoxR0yOZeVicNOMlr6_aAc0;Vt6vQRO zg^tf~@T0>>OXXSn@T z5Wb0EUf$_fb(^c-hBh8X{{b)nVU>`w9CLu>?GEdE>x%EH@vCX^iuv{iYCb_>gNC&# zTx!+nLHn@+RBdei<}@{mT0@`ZmQIeU%>KRnw!<%*)*R++I6uq!mVIK^9_^# z{tKJ5?iHKHyDaL*yb!)>8UNvXY+K1uB)z+}n-Hkq8g!54564NVSv(7)S3-pc>?u@l z@xT_n>i5UDdeN|DqN+b|63>w7Q(A_|_HGKOPzF6c{gWgP8GDKSa0PW=q z%|GERhbQM#lOV&s!+xh!gsbCz_aeqGIDA|1wCTpeqtbgB=@zgo3Y{>ON|s0; zuzb;rkH-;aTwn;mm5!iw^%JG?+wQSe4X;G~_t2;(`(iG6N1iR~^EfcK?q-?Ks~~;KR$IwO=-SOq<;KcBblSS<1}N{pv%D=E4e6l*O0EwLnFsfjY~pf*y}&>hx5! zwtlkoHUMstCOV=m$c${o`LM5Itt(xXBQpoCnE0>DZwV-wVfzC(4iAuN`03y$+&@M4 zO#ZK4!gq5=3coVk5k`eVboP*6XSu5O{V{1L+p@Pr`U$rRV6ETb*I-1%$E z{iyju{t70TW||S|%xZv`EN051Q0*fou}xwjvPFEqMk_VJv?ZWVFxn(6Y%Yg^zc7A^ zVUC?0>W7{-i5z8x05$aw>APUxsz`doU9)WFmSlPtWZQn&wkD^?<-UG9Oi|V>6 zZmMW@;J^o*#1-!!O4N6UY9A*a%ThaEo>!kJhgJn^Hs7rN*|ZG3bkLCLDcYm=QtUz8 zkIQ{h8K%r&NS9TQMWYZSMop((C%E`5C$Gc;&?!g?HwN{Ic2%=B86IvEf~VXCW^~XA zOPYD@wEct5=9_TDt|ufP~6AL?F;c{L)r+2l3vSaC>dt3|{}>uvHfnBUGp z0|}7FCtUFyFSlYt6GFKbE9n*17IIcbp|xR6Lm9ic1r-*CYbp35pstza6*hgs<HNrmcESg`* z3TV!iZ660RYd`|#r&!@1Qexj39a>?{fdNk3OrX&=-6O6*WSat#ZlpEo;v?F_W0GO*H!kNb z_jT^W?KW*;Z-*t*GcZRgHaFyp;t;Qn3g<>N7Y^KQxQn@gSV%5!&71>-#yvsHrrOBX z+)(*b($aLOdkKBib1;R7eqCXW$O|tdJEwKR5yO}tY^-}a>GxfDJ5ITp-A#B|-3G*I zw8a2`2@y1E=<|alF&C#_N!&hOa%b~zLvM-%u zS|viiH98B?T0oQf&9`>Z)faB5cBM$b+0xzNvbM2Ys<7j0{EH~zxT#r+rSTVE@=`+) zM)`y@&k7f#j4D(PZqPDL96#IWZg^nyI0=9Pu^YAmiJC7E*+T?p))#}AMHO0j3Ls`w zJJf8`PO?|u>}w^ShPq1^0*h2le$l08RW4lp7r+1o&f8BoJa!rNg-v9QG%GjsB0`kz zq}b2mSF^4lypE#K7K#lixF($W;e+z%AZs7wdI_{taRs24b&E>vOqG{)*4ERMZ`PI> z-Xt~<&*RARav!a2e3R$%r}PG~SOf_+zKo6AeU4)D~l{+MI}78P5sG92~o6 zOpp=jf-<|_NMuGCKVtw%sdUPsaitY|-6YL^V-M>}E!x3KL8zTgl9|$kTI_ruc0y^E#5xM`xlIOn zi{7M`#sn8(G~;uo+L{Y_F!tBV09LL3l@`x`M{fRs@?>6?G?Ua z{0m6vj-rvr`p)*(3yXG+oBRc=&Jr`p>7r1zD0~>90wA4KH=nq#7XRhMIw?;8d&g^WWq$W$$;1r zEj3!xa>*M&=TI!#0&Ah-tjhR^4_qj|WPdM4tQ>uh@=h}OSXf$3M!J(~De7@aABZJ1 z!E!)7sU-?qDGx7pjeo(6R0p=xifktccrYoliM^XqZZ*rSM|2UM+6Sj7uWdxMIhZY& zF_gIXC^xu=?@d_>lufk#{kB2Ru1AJ>to(Xr@n^GLN&y=ynS8?H8|LX1-BZ)bUvf@Q0g# zu5iYL`d{4oM5{@!Nx9Vo-Sgg}1v53b$T9lm!=0t} z8OG1w{c`2;_{FMGKK!x+h`r4%E*&mO8NGkyw6pNV&aQ%wezk|j@BFUL2q-by9;A#% zA*DuJM<5+_0(k=erx);9lJnHz%1}e~oO!U)k|}8?SoEJl2}rpv9=%=0Z#6JFUrh0q ziT&xcl|Fn&!m?jX3ov*4>&6Z5ByD;pf(5hWcq4KmD`{DCO;-&X_Vv#%`&X`8Wn9O8 zyHHvr1d6PykLqft$Xmf)7(paZ0O&D->*87!AAV%{>bGIG#h}*t>r&w2a_7;Pt2@i< znQg;=B7eTEPN{5FM^X!wtLMft67k_SPC({KYN&I}{mP7@uBdn>}T5A zyy2sYnonei%r5FvF&aGI`3YC(*VvwZOsbCMV%yS^?Au433uI8&qx?7+sS{CBFB)E_ z@h7)S**I^~QrJY3dkx#Jb^@q+0^o*fAnI@`#G9_x?_!;Lj&4yqUpfRI&Q%$8s}_p^o`C1HB|V*>oI%4>UEeKypeOJ}yUs8AbHRSvw(cF?Bs>J! zJDlu%Sgy+Eca}#HRB`S0L*i{0 z2GIf0EPyy)7h8Nc6$$I^Le7WQoW-poIf2t_9-5U{uuenD;s!oK&|T~$uVi_ncs`x> zvNYe5WfaNJk;14bwC@Hyj?jeqk0c z_7RGZ`ge&LE|myTwX6ku28tq+tU-XGRG2dt4KDdVR z4#OMCYvC^ybKHWtyXL15pZ18&<(E|sG<%?ntmp;Ym|I5($dnFX9=XfVE5X z{V+90NU&k{e=X>p<(Z)b>4}j!;%fTU;@ZdiYNtx57fNeSpIbE7(Ooz_*6?#ql1XRp zK5v_jZ8zf`J03olZ9HF$M791&P1>+!TdCEXRnMN;1a9!l!4b-%0}MCL`|#JsSFD2> z?bQ4T-!?Eo8}8PT-Q#ICv(y5L6YGjP$yG-)RHkSZh0t$eYqS^4SNZZX;K~^}hH&#! z?wWL5LZ9GWPVzG!y=Z`|Bsp5|cb@MSGhB?FCqT!$o#kw}CkjU@H92>Y#+EPzxkeoo z*b;41x*w(#&T=h*jki^0u|w7tjYE9KivC17)#RrJ`WA)xoF_T;2g@0;3z+uZyjiJZZEG!p|@+XI@Oc z8`I{P0mSBfr%9JJ18o1_Pom*$?~aF)J56icgq&kR+|W>(j*{177Wl4_`V8;dVrO3S2fd_mDRvmX)^ioUjw}nSn; z=4O|7G;s@4k{qQc1^<#=-MU~naGL8T3!t_D5fP*TH|FF3+oBL}$^3+)`pL2N)zI{b zh2sNb?U!{8@(NcuDLgGbwRXyHly5V+P&8ZXS#zm2@#ScuJcB^TqWZlFH#lZSH*AA^ zmYZkiXx~B2t?)rF*3l*^9GasJy$M2OIdhbiiD6gMs7s#J%?T@NyoZXszx&ed;BR>o zx7HEXuv`u0|XW`8}2f<%mf-9tJ;c0sv;u%v}xW&|c>%VZ|- z)U2uhkTkt5v@OiV4Xv&I9nH}jZEHw-YMFui%UhaKa(Eo8WpQozcgC?c7fUo<#a+ex z7=_@aR7+!zoFlkU2iLX$V-5o#$Jt@_ik(^aq;Msan=z}j1qR$}wP-M8FV*H^ME|A1o7`A2t@ya73-LPG0edD-wIc14rkp1 zAU{XV$Q5sc57)uvtu6$ZkTt8cqhAGLm(+W#eZ6HPtz;KuTZ3Qjwb>|7jstLRASahR z@2eBE;2mKtW?Qe^U8=j<<4N9vir)mG0kE%>w}iA0A+*v~VyU1Rc!aisJ`;06WNobw z!wQS;C4a`lyGhfwuo#qLk`MGJ8e7BUww8&nLFjC^U3vK?d4}naXt~jh0Ih;&I`bi7 zq0;(HG9v2?ptd%0Y>hTb^a&-$=!6c#FCk*vgon|kI*SwzH>Km%snR<@U`J~QlZGgj zez_qc`oZRYPi*~n-`8R52y`Gr+=V!`a2(h>u`_5X;z=#CVV1WvD)Fpk3T`XI9^4Q6 zHc8IV4n<Bf08}%(s>L z3A^4mD|0I0{>g~nZeOj=Kk%Bd>Ai5=bda-5@<$2g2aqNI-G=0G3mog8dn_|>QsnB| z5x$JFhP4^6ILX~Pie*LjSgVor;=8I&P+AB!Xty+t{23OESXNhF5pNB?YkSDp1&k(9 zz8ysRri#0!H|E^^>E#5Qlcz>9(m`^Kaet(C!O$ZC2W3@{Y25CmxK)e(8Plhs{_5QP zImWwBxKrqH7_TOF0?8eBCQ^mZXK#816N-(im^&#ZOv0Xqfv2paHB%=ztJ$Bk++QFR z-L2ct*7eWUxr}tke?6~_wew^B^(*}z{x|9hW0yog$tM3BzQo{T?%=2$CkLQFf&4<& zLAjyxn*?-BTft z5X~HEr!Dab+0fluVTn>1^rsz~y68@4^A5IOA8}(=_&~k)A>2Q2T9nVnW*idg*j*Px zKu*)gwySSzYOrmhKGtbd9-&TMoIV04fxdX5=OMuT-+J%1Eb9auOVMzl+_H*To2}CeGiRN|^p8w0>EyJ9L zByrkhtjmV-B-S37b4+3oi8o-|Vd4x(<5=CPN5W<*FNtUtQ zFj7B@3O#y{l*?!@%UBy=ysU9h>`HfW?)Qlzs^;5$o0|u)XiufAVr?&rnvU4FEgY!g z$8*oMtd&Tq!=bur7(Zi%T#6b5vyna;p=BHG+5Jt8d6ESF64q`s4I#mVc+KJo{)^UQD(_xzOk&#N~)-vNX#9{$Y!TB2DW^MDp00gP_ou)&C47R1=22t4M7F{m; zb}AySlaHTt`C`aseqSR>gX;VbzeKqYn3+=@qg%_r$}`a_-(7;n3Q4B0{Kxb=I36VJQ4Ag zWEJMBq7+20K*rh$*S{T|Y_u!e+Kt4ukR1W)l9wdfmlgxn{=+(Q+hwEVMNhrZ{O8>i zpA_J_QwkdH>%l56nN~!Xk9YB20H~@Pl>}dH|SC4nDU&wCGQ83&CbexW_q7Seih2 z*;xZCHKuKNQ|tiZo5x8k*P-_IS)}by_iARE=53*hmuvidkhNW16sc+8B9V}Atev}p zZpT~3UKMyzzooueO;|mg@cq>264*EA>Fl?7(?&xf<-%*)aDPj*6E)Mtw*){~7VOVX z33;vV&AvLqpm7sArm+}n`GV|3lxYk19q%~@MCp7L5PVKLi1wf6vknO*h6j8yf$qz* zo(rYqdlM}uZhu4A%erF7kubVf0dO!A&C&$?yVIODqg8s3c(%>pIW$JcU7C2i(Z$yA z0GJu~HlQ9r-N&;QyCsU3=+70xAHPHl-Pi3qE396iaY$figrJPmIrU32!Jgi9YTeq# zHtn#ft_S9E|8*7E6LQGwUN`i6@@cIKM!_t1_<^=n@-nAHjURDS$I0*oiZK~KXFevn z0l;YHYU;FHLtMe!*A}d|O`LcHDAZb28X2n_&WcRmA{lWwhfnfz8KM<;*c!Q=OS35UriK93^&85?{Xn zM;pCEyb-?5uVh}L>0>j4(i05ar_sZcl*E@sA5?nZ@(V>f)~zG_KzTvTF#2Nz)E{}< zMft`#?JJnrF+#C<^T7CPsnQDI+iBDuA@REMg)9Tms%0*(t&oQMD((xmoC2&YY7fGf zL&+0!9u&RTk!NTna*;$bx(}&^xPM04IM01&s)2iqV8$2Fa&(-Rx4`$3oG0W0AI=TR zS&734%j+2&#k~P~Za7dD0Sbw27m$SYk8VbF zXi*I_h`kY&w!BQhIfu2;-i_#{OQJwJ522wh79RNu%~x|*(^x>bg1AaK7Aeiax|^l# zG)Gp9CJ7%1OiJ2J$1$MF0h3>mti=G#lPPK~dTMkFw}WOO43C9#ThI2**6kc?@5zW^ z{|5Z&Gcq_=Gd6vv9-VIqR-1|3^}fa~V0`i|xXi)>j}Ry~yp?P__S+_{R9PrlEO1g< zO;>wy1WAQx9Z6mq;*1pj*b==2t*QYBy@Qy5`2+D11?UoJHl(EheB2FmykLJ59HN;1 zVW{uD#HO9MRJ8nPGdXqzsckiG7a$F}4?ujyHrDC;XxX^QO!sg<6xI>tq-!-iW^V+u zgYF>csU-M4u&e`c$reb~mw@9r_ezAEF*&QeaDF3 zr{?Axm-S!)42WSuLK4L$Xbi0FFiUuf9{1KW3dEL?NoPKGHdU2&%rK&d*eL%?C0ecHpj+;ZYoEp|SBlnGC*Jle&iA-GSAT0|s@>n@#_GM@wjTw8 zV?hpcVX?cG^q4*el!q9m7*<3R0B9do4Ok}t%&nfk_6awB40P7U6*Clc5JZvkMR<^g zRtnY}4od(i$YT>VZ^TLMa#K&45LgB9&j`jO&$zr)(tw?;Xs~ z04V)kSf)vBV_YM=q~v16d-1w?MO*=y#59L~Bby7*(uZAYn)N6%le&y`5INWU1RXoh z6!<%^C@QT@t7Yy(ixs-hfN}1C;W3M2Uqp$&Yp0PZUpD{Qohf!Ojyk@BB?V*$=DA+U zxh{t8APPbF>zt)RZ9(pSq%>#lZWz^2g}Lz)DK;QOCqSD$qpiQ!PzrUlyVwewGrz^t zgoY{s-jU@6hjA~EH?j!mKKNs+bDw^uVX#zKcM?zPIRaS@tK~d0wIK)!f!SHB2BfoGugb zOi~DV=4PS^YsV{~a6Jx5ia+5R34kYz8@aolCk5_cZ>|Y;65XN?NQhnrRC4}pycHjO zA~{R3{YdN|Y~M^-+to|)7c(wZEHyNcH=)wQuuCZ)i#7ZtvaV!>H5w{JM-jZ{H{J@d zOLZ@?R;Uf-f`L8h+r!FbbdTfwOchAem(hYeAlYBY7Pv>Ez+Po9S&s1S^nk z;44FNrIMwBW~Yv_qKAHGd8=3_=7$=F8tvy@ycjFdxbPm#cO!e#Yi`&xTqmHrV?eMN z0l=ts2^GX%@{)1gWwDjvu@A$B-EG{_56o|ujBhW~aG-3hLsGAiH?Shn$4Nd)TQs$m zV9OV!KEd0dv*dy-sBU ze7INfEz4lLoApzp4nVao0T44qz{vqKPF)zSU+6?|U}m*k&22AYbC!;Fc=D>5UZ>!p z$_EL4B%qI*VelClWW;cjbV{;@wMBgi3;{qrG(yRx2q`zjHcYApj5J%IX4Bp1FRD6x zi(hJ<3h}Rb z(7dNII2@l1?@O=)A_H zWt2?dT-nj4?$<@WerC99+`%-3s#nC)z$2lC?>OYt{T#0*JM=XYXVlisUY1 zIsxce6;p&-IO~@s1Nee+?ItTh=2b0mxTOwLr z%Z%&*VnW-*Cgn`JnhOA;23aBH9REf?Yc0siG#ne*JqS2Lf9ONuBj&tRG2a%jb27ng zaDxafr#s&&uB`qq(9FS9uEJj?5g2bG1(ZTXpfv3l0d2Vv5P{a12PG?+ve@U=Eq({z zfpzG`!0br+zLtgC)xNoc$I03y@kfU-A}U?R1u-7BM@rf+f5O>3{qR&sw21!iBE9cL z6~RQdBbc->z>l`KZ;!q7$%EEt!^I z0O4#)3j~{>tc+@e3zSZ=$Zo6}Cu) zu^W;X&RLj1Zbd#5WN@*HG2MAs+t%e9h?L5lqSAB+P$@FjAz2HY(&Wk~%m;F56SciN z_k_}Nww7-ek*Kr~TKMiqJi2~ZxYb=C^0@Rcyo1OMqGgFU(yaNJEIL$VdkoF3o-mTo)24ZRNn$gV z>UfKr=_xGlwghNx3xQ*Nt8^BBhV2I(?sgU)1*^0~$>bbBX~-Lye5k?+W=MYoB27EC zGkPDQSs35;oMbjiUHlMxZtM_w2Wtyr*Qi!Hp!+nOES8yuYRQy?5U=W-*>~PiUdKT= zt04{;9#l2J@DNUNzbE=W!7urH|AgDwcSqmaojY^pmvd)=+`bR;UWDYf&~8X9u~5WU z_cI8-(XL^?E#<+FC5aMq%ob(X=WKxIb3M<1FdR4)vM*$O~{BDA56FlwrF+4Ao<@(QHu`NfpgKCjb~0{2}>t~-p+sX2YMu5P0DpHvSZ+J4r& zysG_4(gzh|hxhNlmG}P3vRbF_Gv;yt*QAUF6wqIZAV7Wp9DczW1MxBG%(zduJ5PYm z)qa37o{De`a}dvX?y-sg5i~ui*1*%#XhSKC7r|o@oWz9Dl>_Www#&3L>=9^-vD^1o zNptR5-d61Q6}T2L>JE7XhxH2e^S8&MV4Zk$Yi~-vH^sz)dKmGVaQu*wf)Yd9%)d?H zOw(-9!mlF#Ir}~2k0`3v|^@jYiFBY%gx_f&vFH>yOa}FUbxWB0i(geSD(%J*-Ldy`s z{9gN2?=*>09r84?`uY-kZ^YCyzs5ut(lgTkTlymc}f*zjedpo2u z;EyJ+i1>;n4M~ns2fQpRJ^$0g++}%ZbEUa=*xx71Q=9Y4{BPe{lZktQectB=%I2`p zeOT~5Q5Qmu=&$M}nQh8>S@UM2ZE#JZj-mRiVPMW-lTdg&cxt@fvm%&&#qkXM<&njp zQe3k=k>j5p*50J6WvuQJ0 z6STJADbAIGuPYifAo97A^d}1G=_g8O|JHN!Uq5TK^=K}xy3E?7E7ZHEyn1!g!}E`_ z(M>=96zG`$@ONBgi&dR|@{QUfDJA@TXmIIqI~NPbpPEh`Yi^G?Tb2RyMmF}JsR)lN zZvAWL&F*a_8#e?w=b3ucGB*d<8m&TzL5OAJqlstCNB(pV%3%RqQGU95eGhBD=lN{_ zvRxUWi2Tl(CA{ADWnYQ?k02o`l>!$|5@!8TiE#l2Y>U^|iw8#ze*7LSiGDqh z{ODDZ`;n#>a7u^q2l=yd`cOad99;SP#LSiRA;OXHH|D2CN|yX}uvme6%66KVNr}Ch zQgm>1qUO(p#_Aw=*X#Z>2I%kEsY(b>?j&_>8K!Rxz7zY!Z^Z`}Pbe*)3Yf0KKA+~W zAYsmA+IUiO9kJCfMc^wx6Wb&%7=6?B`F`h_zTZ!IbSXCcernstr+r_P?QX%Lm(CLF z=eE~PR)$Kh)d6?!v}6hBTa}8YeNKu|C8y0*SNl4ySTX9``19?9(f&;9|2#dKjXO`V z5KLXFEKjE$Z6rGv#FI$h^pWspFy?CqsGXm4_xz2mlf3Ia6|pFRbd@oDj82=qvikl% zpPAf2;;yNBMLxA}s_9J~_Xd34yM=;;-{dJ*U=4t(x-#=Kg&KZL1J}$1)Fzk0cTYrn2<8abt`8TW`dzO5==fAFXxR?l*SKqWp zS5;08Cni3mb?5B}-)l)8L9ZKwgBy10jQC=k3o|jrL(@NWbqOwxygi%08n?8Zn5$ce z%@&tDVrE73Czl8CV)_I#l-ApRvAj}WMT~wkj}CqDbVl@=H=Bp$g~qnnKHBup_~I&g zo55wlBf|Xs-VZvwRnSTWW`C8C@!_=mQA6=(Nsr_kc&&YbPeT|ht@bTxD){}El}Gli zJzrz9HRF^E?i!*GY;qJy~d$!&F z{I}cujobadjPTyNDAc$*=cX>0j-PSreioh>6Y=tyQ3?~@ou9jZY18;n(s>{ov()>D z>xrAo;Kcv5m$pqNTwwQ(9sJO;YL2nEqiVduz3CaUOc=MbD&|R|U#%jXxwmUBt+$#K zlJW#(@W#7InwaYgOkcmc9bCC2Va<=?~|9Pn305(=Vl6a-Ny#YA8|Vc@i0?%Odj_7sd+P z%sdAF@U`tGg@uYrtwTtC4+}TG@WL_evr_n61-^}nr~B>X!ji9HU5TpJ$>8^^HDdZT zlA{?!@GpK&wdtNv}oA3x^Q*@!=77kb!FgQW8p&O${GE9j(I#7jb(W@v+1W=l-0`V; zTXrG&zfZ@)8(X*f|K^%@`P_kxb-1)R>=cE%jD2+s{WoO69^c=}t9WHxh2wNcXW_W9 zdQLWufP8BpIs7xlgTODz{D1fEwbl6?h@@v^i4e$H3E!$7#$MJX0TU8LKrBh`rDR&^ zUj}9QZ-0OM`@=m;zR{CVoka<_g#DY2T9`Yb>h3-TG$%NU8d_j_eUww$z*+h;yJ96a z*9`-djQfY}U)_;@V-@owpVc}m4<7hwW>4h~Te@%b%|0k^hdw0*W z@qfC;|HF@zA$ZIUxZNDb%1DGiIWd#$bJ9A--^LEIp*+WT=I!suPjLmf%K+2?%%je( z@K3zy722?0YN=={QkP!rTn=x4eZe>8#@N~;%Nqapf|&kR;etw@j#HK5;PW*&yvuP?9T!PT%i&#N;EJm`TCg ztGIyrSlXe{77i(*y4yH;loyg5M?b>a~{SM);%D{33Ee8#usXq@+P3 z1av>?G!*XRG;)Q{cnKN4I@SIR$F5XjdvA}lImTW0)@Bmci2N%jG;K|uyp-AmHa^4) zWH>bk1~tA2uFWH*rl%k+xv|d?d@*b@IRtfll5Z6w^ybScj|5Smc zw5k1sTd*OW{)A&O8Q>QZJQv* z)7tV${*Gb0h>xpcJ~Dl_FXb*^_*uFY&tX$JBZKSx#p1*g@(=#T(U>O8WJk}EM$#xQ zd-lTTa5?>ltcfkj5;37bE5evYGE4w5GO1Fk3BEnVYw5YXQf)#%!id%L%Zv) zA_pIj25jEdDG!kK9Vk}xkn-e;St!hV zQ(CoC;e7J>-l+<9r=;#7DGe3myJB8U+Nl~sZb52ijM!(fYNcT-U9{fs=^`$kIydtG8XNCZgQ;_-K9XMM3E%s3JNry2;m%OGFRB2f{dVl;K(OH{hj~b zpYIKi^gel_xSHKW|Cs<5&2fy&jTPtv`o)L=Pt@_i7m3!y7Xh;xDqs=Eg86ZBkTDA8 z0ZO9fAVlu#Rh||BS|czCM6)~skpJT6a>FP`Gp`J;s?%?en8##~E`cI8ZE{4;#HR7r z^Hc{0IaKx~|6*ct*9yN~=_h=yeOxe@Xkh$Ddox#A#@`a2BMT$NJ2ech(4*zFcizmf=2(>Q%M8od%XF&xtM_?zAgxF0+JeYT? z-w(u~^(`ouhZ;p~?5;imVl%qO!F*J~mR?eBP+k$mfL#u>sbCN;wB;Mo<3e!L#)K&{ zgH#t;1DGLJ&;}H4;b<3$9n{&22HueXnGG1LOO2VZFsRSR3v`|uNeIluRD>%Nv_Y9T zX`rS5ZGA$BjNuqDW}=I9^d#xg6%I_fTpmM?L%T=>?As*CJwg4RCBxdiw!al0G58@X z0QfI|XOC%OiBq9iaYWd)Pr;2AsK`^etU%$LGWTL=Gx&SP-FRFA8*?NjI(aT8P5?m( z|9eSCCmTT$8Py^w7qy|n6cyw5M?aF6x36@plz*q!-rg{{+vW;A{{&l{rwR)Ja0B+x z{^0uxuvKH{Vy;8c=OMAw*RT8Cl5DM*9Ll`3E)IJO%9U z1}Z2BP8e5ievBMU9!Xwsc>vO1Q_m_%z46lZZNcw;0oI;%pc{B~iyl9Ick#nVbIh_% zU3vKEx!_Vig|8+T`jz-Gc#vDkHY4ucda89m=U$qXoTf~~6|hfX0Yg57weITyxu8|= z9{GZfFu56a2NOSM=0WA@vTCH0a;9yPA5TY!Khz!xt;WJ?0)Vw+U!h?GM-<5jFmjk0 zD4T}5T}Q}!s|;_2shGpbVahzXQQZO@(GUn04|0>)AMAN;NYkf4W8dUT61Bli0;w27 zWwOC`bnFS#E4HK4|cs6OMtD!5XAM7|00P8%j4}gfs~Mb^QASe z&2vSL^5MZNzP>tI;~cYL^m)+^N6n;^L5D~DG~34-6ur5 z>d}|`nBdsUm`wNCEDhdu&ew>xkSOSU&W|YZOK!I_tc^7+1p~Y(kPUHa6M+TRI3%wG44Bd()GuqXf+OFz2sLiZBwr%rXh`{;ZG8r# zO|^NHnw7q2x^in3eDc^mbb>mbL4yVq2@iA`Psp~GK$0V!!dc{ao-ibPp#s-o4h?)U7 zE7Lk#WRL|l{O~^9)l^GeO{iH6Q3k%x?aHZUT)+@I%3GN>-SZSt59Hd{F(&v#tVAQ4 zcy;hI@`lN4vUpToJSG+I@Hur%VNZmLvIz*J^MU2^NVVJ!0;+|VE=jEVq@%Rx@Gg>M zWohq1R=-p|&DkVAxwq0haqI34%ra~2pPJ&a*&iTZG)b6G5ZD@|8Sv8TLUHRifDQOx z*0eYCR@q0tMJzA6UcOpGnNw5Paicg)8>A%w(JsdIg9aDfij45)J&Cc-XR8{Y({0t1 z1!d+o>2D5@calUw{?m3~aDP?H1lF_2sRwqP5g`~CUjtzm)%_Qx(ag`OE6FLdx#~|^ z*Y6~r&J|$nm2sI1?q(vYYrp6o^Y&{fDOh$VawRd-O{eivum>D3lE#6%q+5< z@-e~c{bN1zh0=8_aOxkX-DNsjAtq+N_avar2gJl;=DTby~{#U-cj^* zXlQpqbPzqL0WSb~69d{lf_IZt5V#8jVVuQ1?JYfQtPQ+89In}UieX%wbANlL*CfFl z<|B+3Sn7Q6Awch72Sa4sAomE>{iXp8#|8GPmo+B?t+It=H%j~|WS0ucJ&olUbx8xJ zfiybi3loVCT}QusB03^8r1TkuILh5l2iDY8y9qizO3DpAR)!1=FMov;-^0D;e0&7j5G8G0l z4p4SL5QPFfn4id@V|Z!)0U(7v4Fs`5^iUZl2C;o>poz$aIAWLKVEp^7iJ)628UjmBe`+uPw8A#NxjIb+BfW{KAG$uFytj`BYhIoI5I1!5cJb)WiOW5ehg{}Ls_ zH!RTS<-6of|e;DB(5Js5aRXA!4`Y{GA<)0*9^K%VCr0&u@DPZm&t#llw3()|7_9YOAWPbuHRM-S_m?CO_WY*A-J!Ns{nhWf5?+8wlxOj1Rn{ z=TLPiA@luvtoHMWhQsF-n4Ef4g_HZZ2Rt9dmeT$-$D-Bb8I2ida}cjeHZ9{2!=%5VFhW~IQ=7s$YoVgeD9!zp`%v1k zjb?TF{bbK9O}!KitGQl1L{Pwx8QtSflQq#3nQzc(;i%0=FUrl0*|o|p z#OJ)?eYAU1KFv)^I*-CVz1(4lbx~2lrDH&Ug_zZCbl7VoBn?C*Qhw-X^uVj0^2WurNHwMiXFwS z6T03XW$1KB{kP;UN+af z(1thMY&^}k_*!4*X9Rw)4KsEE1RsGQF#Uw!1kCg5evUA2Dq2#K{hiPpbgf zi57x({sgQ7zRZ^S_PXJ0^UZE=2NjJxEEEgCHW(p@`zKIKfC+$Z-PhCJ#>UGNI04`! z`B&tVKt?#RAs`69Z#(_>vwy?lWpCqRBmV96*k2)_Y{X(xKsPe~OE>I!{<0fCU;!ZC z4gu)F9f^jfa0uG^1N!cVLxvB6{?l%I*?YTOy=Lj)EavEDW4E0m+r6LfSj6-GB<6Ns zXgh|V9ft4l5Tum>E=`4;fdOL*0b}g;c_Wws`^)W@%?PbR zJ+1r5+^vYN%}jlv3PL_fG7&2#sKRm;GSrt~OpHo)94A(b0OT>M=mfNNW@Y)~^*@>Vzc@Yr<)@d%Dkyccp@NU3@4^NyXO?3y z#N^yXrJE_#`~qaYqSr>{P!lh=zaYU-BMOFj?ceHm_ho5WAZmAj;$oe%G!hGkpQ_Hi z44ic1=!(EV>y#+vSi07O={9pdbCL2|)|1Yy^*uvLLt&QO;3}2W)UiY*#u$qx5fXMD z2^dc>B~YhNL3_pIsvK%YO!c4){JEYdb2n}*&3`U2|Hm`ox4iQElPS0Z&L$QMZ+E!R-a8WocJg`4b5yjP9@UXvG0AD6+aHBP;z2jag4q0r^!6`*Qwz=9!mG@ z;X&#y8TdIE6iHUN^0!HH)uVd*1_l7|LjVBKA)fKDWp%fAvN5r@xA~)_<*E6{URmh2L_p5z#uH4|&`Cynw3=?8(56f88U_$a zMNOLJE;=ICI7v;#o^e1*texblWt$;hKhq`W!Jr;5#M4lo;+jD>;-J*ec;)fJE-H}Z<5GO8ze2y=r6e1@efhaOV4)5w4>0p za$bB6xZ}|QOzzgkik&=QGV?m19B=wCGdzG|Sh5vPLuU_j8Qw8i?*l2ASUG%`gWKrA|M0=ZBTkvZPRKx}18g<+|LsT%_URF)cK$;tCw zmTjr4gAjn|5~rDHgG8rF!#v=EfAJ_=)t`}wj115}{ZzKY19vUTkeSDn$LK5x3fIvG zdZe=0KQpuN>?~3nvRR>{DxC~U18yMSjB5`>Ya`c&4E#7rDgWV!c^_Iu?|l7}f&%7; z@f{y;hkk+gxq)_+QA@IK5cZwy(V76oQg8WJdvg!y)`w0a1Ul<@(gY2(akadHo$6{d_?6qzOdh=nM zjO9m-CMIWURgJ;e>t+L`4cj~}aA+(a#~nw1qLfz$?vV(Maj`bsj164izl=HbroJ_~ z*=5;lL(V*!Lhp@h5CDe8ADaoU@qg90Zfo3S39bjYo51kmF= zh`^3md|3_dOl6`_6&Y2+vTgap+s`-d#kooC9O=ecMz|R9UvgILz?X*zSv$arl^>*B z-xfHZ5dU3?T0y#}gNEegG$aFw0EkeK?EAAy{c8^XvtmI(v?b)T|GTf2xG`%8WA_~e zw+2u6I>y=oHv9;Cy{?nogF~~2VOYJ&7Cx= zpm!t%eVk@FOEo1V`r+z1`|OK=LOsXbOn^x~eRM*QLn$}8V(|OXN1Ns*J2@Z6EsOVw z6OA&-BlUtr35oU4l)m46G3?{A$uY{`MNovhZt4gugtt)v0D|BA z?Q97Ix;V4`y0QO3=&gm?)@h3(G%NEk64eqFqK0^ToZKkU0Q8~`OT3uKF?Jt zh?Va?`q_izJJuLQ38|lh#{)(euk;}1^D|&ZM%ec3qlVM=W zqEy_k&npWu=<4yU8%a%?Ej-%#an&tuT(aqhvf|AUJH4cN1-#m5O}n_|$v9yuvjxp5 z0EUe5Pjzqjrj27snOnE~69^f)z6bynJ2X}I=ftunCh)wu`ur!71&ExN)T7)NtatsX z!sZZKJ%~sol$>jF}WS%?jmKHQOReB!P zL{+^U?`x7yHRCSYuMWEl$ZL}SYL#Ou;5i{o!oNeJC81{e;*+@J`myB^ZG4|h(=IcTH|6ou9z{^;X#Fro zI4>$Esh!vajb=NgsK2SOF_>bpm$3kn5Xhfd=Hk8&hrJ?kmm*y7F= zjk7otkTaPXg-CHGF2cQ1s}GaR9C{0; z6EA-bJ;?G_MURniYReL!<3^4pjmvĽE;I9emg+?-n2oq?eqyvUKn{Kk^PM6%|a zj%fvJ_!3lBN;V}l7=;G@#f4EMJl2gf^Jj`Oz2Fi3i+sAk&yKMEXZ}@M_pqvAfmcC;*xkQsFsKWL&hGz|k2x4r5`W%L`>u6y_fh5s{#n_J@ z>g)oVt1Q00+Na+LBt31IOBCajQS0F)#@j&Kh$dKrbydv_Fd$#*c7RP#AxeFwXyRQp zaiZ@`5LhY&pL0|4)Z_3&s0v5H7ZlmYWez-V(MPeKuwu zLV?%+Jq2?7NrCzh3dHd>w0aEQa^Jr`d4~X{epOhfQW90yd{Au)HiGlkTB0^9dAJlb zg<(f4la^rvd;4r>cpvH+zF#>wjk>h_!cg5iUjTJ1sG94Rf_rKD_^Evr-DI_7H&=_6 zSU0)y(U#$HdGm5rN~zALaJFxnQ~+mY-f-^RjR36P#OZZ|g*V~it&cSe=qj4$VG9!Ze{DS6zaOJK+{`9sa;e?491k`i|j2RQ3 z+Wg|c41w0~{t3hkU6TUliXHfxzc_FZ-`f$wfhhvK&OaE!nr(S6+COyL1SsvInTZtg zyBMtA4_c;(8ux!;9I@VC9bXf>T{D}W9!nY)P)~P0!W4dY4|1{>q8)S7yv<-7=eHpv zIfKEY{i&T2C8J;DG1IgolxU+TF&~uv~VJ~NIr7>uyI=27arml2uo}GGJqG~`m z^`T11KY8CzzZ*h|>*Y@1q3y;Zm3Ss-r1|iEfugv4JIdrJe&CzL4%nd_}3E zV8y`D66BAhFU)X+D0k0{(p*7jU%8JK=Nmy2wa0PH5-tT8Z*Vok5claNCQvGEBE2Zw zKvnoAbNMV`Z*`ivdx*naJ>9i8F+a3!<+qI3|K`C~rsYC*8%F_vlp_|S0wuzfpgFUx z2s8^^4hJpf)GoAxBVaN>O6NFPI$+GvqWCS3vfDh%7HL9}CEbP;d3=5>65ge(=SnM_!zszqDqA-#zWI$B`|Rqw_U zlBE!gL!)z&dt##Ag2gKo-@odmPmpE6of1gQ{U3klnV@guw*kRzvszd&) zAqLe3!iAp@BQG2wq$X1&e?n}QE}{PWBFU`)dfI1YcX z&%6fFOTlHTyn02^){Wa7&E1(Gpi~Mj=e7iK zN|whRL{Tp;?k&7p&dxZe9T8{@j9gS8J6ySG=_S{mPhm!bsV9c{I&eZye^i})!fie? zjDUPlVrzkj)xcW2%*}FWtH0F3t8W_5oeZnoCZth$emvB2sBTRRzae$q4fwdH;?2lV3zEwjW)$cMyslL{!{h8db;wMBYmJwq2BCX$iv--XSDGILwg^Sioqcn9pg!78(~+O zXS22dCQ&gER#RnNIKfL37+juc>rAOU<)(GKdan`roWSj6IrTxGxo29vK!vdoY2j{V zNeVcogn4!4rI!-ogUsLUxvSU(%dp12F_hOgo0F@}kekrRSt&h_kCm0E*we9UW0<&{ zAa)duCSoN!GA7ISe!wV-6=Tdn-Xl**clN6h2CDY;hMw&_(!b*|A-bpneZ$>qXQi?C zF}~KE|C#>z8??<(UQYTj_KgJ&?34ou3%vF#bod;}Is*CXH*CoYi3@Cc=4*7xk9k%f z<@O!+A}r~!_)fEjbC{xeO0EFzXJ@xu_@`|0&T3 zHkuCX#%KFl-)U${s|2Rg&NMZD?Vx@`^3j_;O48c}Y=H?J?-g=2t?Qre5eE5{cwR29 zYn&d{Zk2Jj%n2=9$9X?nau@-6O%OEgqPW#9{N(ak^J2CUm!)e`35nlvuGK@B)8GOH zcCphi0?UeTmf;_90{Zk3EDDsrT%^^BCiyGfK&BjjjV(6xw>AfmCo9kYn1TFi7P(jg zK|t1DufNV2``U8$#5@>n_&fgO7a3YrH2vI^2#HKIp!J^52{^ne&C@KVhfbm@b*_xD zrh(9sQ`t$-t>?-$&td?rx$S+R2yVC&d_HQ;l03=l1Xy251aGoc)a?<%!x@_}dhuHc z=`Yn9%I$h67=vP@6HIn+?zgRSj8L)27MSJ&RAol;W)7hH`^)lmu7sF)^31VKB0IQn zmFvZ>Ak;PX#3WBe4iBD9%3fqU$rcPGAApZ)-Dj;5X4c$fbEq#4$hJqQVVu;3725E7 z*x&Wx>r-NHL~Aq0DVAy5G+ZG+<`Jf7E*-cz*T(d6aFKg2BzIO!ClV7!qOAllwebeq z*YNPA0}<>+@Y)4}kMy{y!K+I78iA%K^7Wj zH|vRCE!kde_jwY@X?@q8JH#J>e)$w2Tza zh8R2BsfS;NNq<*5+vVB`(r8)TUs@nm=mu3oiAT-$@3-KWpU7`kB)t; zFFq1dV9M(&A>(Mmij5ClMel)R1*O(Mb+u@ODBSxgOV^uJ0t-yLUhPUr$dZjbhq}@D z=!Y%ddeY<@8KsI(kF$Jw+!D%LLCJG%@S@Y|Fd@ZU2KP4bYcNKyWf~)yPh>M;_E9JL ztF*mC@Ay}TLai8I^D3z(IYrLWKK#5o;ajs4ieA9LRt09}@+a`+Eyw`V-V`n!eYr3r zTMAGZP7>gW#@+L5@HY-$>B+H86VVv9dvnBa){Ol8ZkzF}MbGcl)l(-TOIFDnH04>~ zOngKEwAG9vY@|^*Vm5|}@cKE93TMv2git9EIFvo&?oinQhDRzF%4S$la6t3*UVitX zl1Zx{YfG=I#ga5FFp*>fB&CL2)||qf=7m<7AZ}9IOLgl}#AlyqJA6i%mf}`59&7F} z+nZS4vSQ-OIHOnHSUeMDnNP=8oWKx_ZlN9=mdBM!?X4C*?|a7b65>?_F-t6c(z*1p)EN<8eMD@$ILGrYsSLJcBJG0Z zN?Q|cu6ivnnog&+b0uLW#&($zVjPQJKJP1qT?342g6K#w8r$iN)JWRJe4>h*oMN;v zeC`kC*yP6esmEP;M^za$s?+ z;^JyXrQym@D4c|t_c)l%GA;q|w$yHH4P>^(=i!ARFeq{3H}HycHMJv*^>uizWnTF} zhYMbF&!9-_A|(r2LW?!pRawH}^n}^B2u+?qo-gT!xar6CvI*7%k7H^ROHd!lnIsos zC`b;G-?_BaL9?iJWg%}NJ@bYhZIwR=unff3@p#r3MrD3#7~^}t`u*dU@gxzvXsx&@ zHJ$AX2Fws!aczzXcjRiX%5$y6QX@0UV#@O2vU!*)B`w3b4xoN8OuR^8nzc$iR4_lO z|Do>((O|7hy2t6hUfNSnclXxDCA7iGPn8LlY7u@wMy>PumoIn-R53@ZpoV@p)tQV~ zw#`Sd!7~;hKd=5 z_Tsd^QZ(mkUX`0WfSU>nR!62`lbA?P?0GU=(K55#dhb-Y9C9^B{C?~_N#Qj?$7Q9M znc!kdg6m6*+F4C(xypT4c{lhq?e8yB+(-FJ2KO^PvU6CBI9xrd29f0gkIiAw+Z>Ql zyNm7(;L@U*8EDPs9wGYbuhsG0xyPGw$de36g?#pBb#!*|v;{is73tt4ihz zbE0zfh+q^ls(fU9r&@*M?Xa(Drqf!*vJ5pQ^effyhCusO>ZoNDw|3KE$^d1B2>01p zIk8leergbl%LvX#wMA)bQ7+xdY4Ajqqo_*E_@#w}O8L)#&+)yh0n*l_ON)#(rA1rB zEDg(-S_Fe8p3Ulu<57TOeT)fc*UsUP!j~H~t>+7ZvG`IvO;z{-axUt}L4pO%+!W<%|y=C z32a=WWA;HTNgCoa*5P=yHx|@r_9BAUV}?#gIYfvXV)6s1V5Y!wZF4oWf*o?Z~MdhlT|TLx^*Ll@J6rD~VRd!Pd$dOj0oX;nI| zyA&-5wp>8edi6bKpQ86Rj=2|mD>!W>i$cWvi!bKr-KFC<@9R7=kBN7qt=Ib;7bmX} z=c+ATvA2&yyzjmwY{ZchBqmXju`(ygJ8)fC?~hL-D?p#4%fwV-Z=UtX-v4c2N6ya2 zY(N4U3Gx)@AAxOY>;yDZb#bz?v-mTY6<$c+O3j5@Je6KMiS`P9;b({%1H!23IHj{a@CyMSA%?5`MN_feJ^^K*l_xE8 zTQ4x*<6|XnFmg-WPWp|v#SH%~vtMU77N^|L23Y0-GR|i-A$=D#X)tj@?!83sGV){J zH5l_5LhlRrqw7sE!;BT_zNEP5@ti?Z8j9S13}CJ}Glcx)zh*l>h(oRu@xJUX}SqIbkh{y^7BvcMdo(V%2&Hzagdk=VvMI)+EQT@ z6=lQwW3+HlDns8;-xGBE^(<6IBNfF6eP56X_hcKqjwBv0Ew%=o7$VfLD%Kt`ZTe~k z^9ibGKkP^Qk_p)5w2w-kjD^VX#uy1@OY;s$9-IW2i0-*xWZ3wk!RCX8hfXh${>}r~ zJxthL{}T`V>c0Kc1CZ$rhQv|mUGTg&HGi0L1dX9A#T@NDK+<3-`$E>Tp62>?XFfzM zy4!SxP2XP||2#6(loF-PPJ|+qQzDhgCtawnB2!D?nV(DuUirXy^N8GKcw~~!I?|f$cKeBfkhKF0Fj*2CJ6koy1vrn5JTWs7 zjU2P+9!f3h4Z{R_f(XH>_%*%s&x=W9EevQdqEDhz8|$qTe!HS9yu-7IAa0bQj$yT=`Gb&Y>Y&kJCZj)jaj=JGp^ zQK48Rr2=~`j%O>>Zq0P3@h4 ze@Gk1GSz=Z(~w~GiG3s2&4v@Y3il{J=$)27!!EYe8Q;1i@|ok9nc7|Vvy2?p!OfAv zk~TjMS)B7>k~8|C#tU`0Pe!GsuF=vsIRMUz(CILe*7!7saOk~cywYN>%|e(+jox$h zKH@JnFiQD$9j}nMe^P_!?VVg)%CiV`{a|}-H=i#?-Y(L8zndU&=)U6T2Pm$41meD; zO*U;n0FOvWSmb`guDWKM0br1b!y(sHCriLSdORlpK9Uf`r>M%zarqTEf92Kv`R3!- z7mP1Vm+Ic6p|Sh;1sT@AAL}m8n)cNsQT~9W*a-FAT6N!tSNDOu7R=K9GK;m+WESMR z>WyTwjB8`mH2TxN6DI7A3RmJA_5}N5eU;qN;m|XN*yI>CgOI^0dBmusGq>Z}c5n98 z7bL9H*XNJnL*wEyI(_u56r40K*LgAtoD3f#*8g_QJ^P|hDjpJquOT#y@vlK>?BMX9 z7=#4hKdz5)O7=@^sJ%xp7tc|biFpT=Q3Fe}rTNc38$-p`St#V2W>uNJE&Z@R^}g=S z+`^|JJDk<-Zl&B@JH@CmKIt73x*n$wk*GmYc_{s20;FwvzIcSONO{NXA{3(m>r(!A znQruTF#}NMB$%HuxZX5>zydvs>Vz8#Okl|G!yub_(+62pJ->g&nRMUA$z`@N z_B3Zvcf)w7vx$RLbnp`F1(-qyS?GzC@h!R-(dVNsE0HeAnhB$dy;prZN^Jqnu3=A% zE`Fe!oBPSstCJ`|S%&-FNY+R1?b_Six!}M^Ia4yWpwABB9uAvO?V9{?JFGo>yvH4E zC(*BNGwgY{)1(os&h4$J(^stta8B=~YN8Zux>7=mZg#s+gzCaPED%=`9TwDX7pdty z?Wbb%!Cv34U#y1N+F(a3w|PF`+`KsDd_ZY!G1%PuXxLp^uz8C<-VXUs{w!cn(996w z{P&yW|NQ6vdHd3#0|C`OFr#MeHR(>NTK~&T~zQkYlSDvCg9s2%8NyYqIlwTtBDazAf$#0ZG zyuU^HH9C2U^0Y1f8wHl|7s}Hn`BQ+WorB*1l;pnve)kccnm#?7{WiU&{L}R5N$n}Z z(}T=!1Ptn@KKR>X%~Qaq=YrpWSTz3!z~85Yr+`mY_HRI3+FyWAH1^+W?*BPx7(~fK z!tnnB`b$MDux1ux7|yD8NP~%cYeee>D{Egv%-$J>@J@ zP$GcxuznUBD3SEvZaD+m8-q91b316`}#UuGLw6%=D+@O z)g^|t-KtW`dXfB*7?6IE+4w3+_fpD;vIzgYmSThOCmY4647OMw=TqYCJ-T&bz9$HY z4ljb?>%k;nBArd$wo(+5IOKLO+=I zCUc)wZ#4CBiK`0#F3AWIx#^JL$%a}q?~n_)MNgIu^8C<8$Y8lSc*jhcKJG91rvW08=xLc57z}2>`d>UX;{AnuZpc%oAV=haJQW$>?i;A0yj%Z25B_onAFg$3glN$;-AdHl!J8U8c$_?`li@g*&$-@&FkTLepC95{8QU<%<2fX zyIaouas=cb|L#44;PT7gkg80W0YD7c%Y^Y&`O|?y(E&cLXtdAnPWN|bVuH3i$kYDs zF3orQy&+?YTZt#cehycf+<6lB=Jh@O3coEt-`P>N?nuoftTn<&-Rvl!~o?ZM<%Q+n~pp_w&RQo)5MM6R3&Q)6{WI6KX*h9%X%X^aV zkxirBd5+1yy>w9B9sbxnv#0k=;N4gE&*ZiEpU=8Umzq^)epY?%Rk=`?Uj&z{R>BA^ zv<{v7VJ1^+g;|?medyE;7tWDaMt0Y4n^eCMIQphDqlq92<|1#5@0sd++&AQ_AYwnuwJ#jq#=8$U9-x<<>ER~ z=Yhq9H*!l|Yy15}wM=40l@X%hmHzWRy{UVpD;`wDX$ynf^=>bF-e^BlnDFlAqID<{ zREVv8?0(7v4oE)WsN+n}Yk$FnER8ERO@k8l)@8MKM$UMBc(S~&^uFK8&?;`T{YnOA z@f~U%Jv82g3ZHh+x!O74n+IQ~vrk9xS%!a02RP>qo(>jH{N1#PD&?)~shVXt>vGOwnGH`*P=Bv&lC0Wz~N%QT~t? zdri#bcF@}IK6RFtF?_l4xZu?=1AFwHU#{3kAv2ew^q7#8e&Z-N%lmUX-!*bhwi4JF z8@XU4fyF|^Pt;UHlsC^1Rnw7#>JD?e>ZbFwS)s#Z^KIh^_S7(GPFmpoJ6~@vXGUG? zJ@VNj@n}*u2S?WYoon;yx7DRvE9MUOYHc27Rhw9tP!YW*MRs1SFODB)6cHe(} zP|LA1cUV%wF)Tr*U-#+(>lN+Gm=@|UEoiy_J(B442YlOr48n&603eImHHg5Vo4)RW zyAkBxJ&O#;91>RUZ^0CaT9$)#6NYWmw>Jha7F#@b<`!`+N8Hvc8blJ06F*80IbU7M zxv9H)LN@Ye&0%{|bB*ki#HoA%7f#2swsLE|r@}D>UCdfC_M=I08y&-QYpbQ74Jx}P zjrK|*Ple$UJ%%`O_yepJbogc#XE^i6x?Sg`7eQeR{VI*4#7O5}1ci z)>m}h%yHiQAXVaWP9J9Ay8xm^={uLPvz^%Q@t)wuHH?e|-6+d?>>j5t0hLw@3J zdY*&|E+G}4J8J1U=&Ska&f9}GXAe{AYwonP8(;dxa5J-4qU9~^l_N!7G}k6s6mE^# z&2_kUe%tG&CYfUUDzK?0(N81TXrf z?xRTV>F{*J-z#M$Y3wfr(@cInc$TqHnO^a1ymX8#qPy7m*5VZJjku1}**W9izeP&Y zsoQhO`(k@azG>>^+6;7RO~v64AQ$M0OG@39h8Ith| zDN(lJ%2Pe)fx`^(;IC)% zFNDTT-V332f}Y9d$SbH)AHV{)N5!~f59dXWXO~#QGG;c9d7wte{b3dFg>TyB^(^_a ze0Q~4oA4BRE~2R9F?mq;x(ly&OI~6qrt!Q>^MhLI@BGE=l1&{9Tf6Ryp%#J_@%`I` zX!Ewh<5oOd?mE2upMBTPdn)fgvpP5EjXmD9sJ@k|Aa?!*tqgN)r3&Xew6)b zLiu!YY2i+PGp%5Cr#*y9Z9o>nz$uNJuV>Mo5S*Xhul1{BWJQamc{`F4KJczNY5S0#56} zZ>{TpE`rwe^NU?ZT2jnsn$zDu%4~f4`pqB<^$tm9{dnpc+d~a81KWr48+t$P zRgF0;zyIvPb50F4FKZJmtPM{|KQOaTOx&UkTjaAaLMr7(Ld_$A9h zY11>0d!28x+*2^nx0Y$>%6q%FhAOrH#vMM;mW3uJsr4qiVg7FB#ub6W(dzx>=6pz5 zR-0Q2tkkCas=RT!d*2JRS<4zfa*L>om*zQDoD#k0$7$^HC8$I-=Gt4951owjvHpC> z+94geI0ln1*sueLR(xALYooz^h$pN*R0@+GlkM!SS3~(Nc5ZYU9sysHvwR z)eGe@9Q4C+-QHRG*!Yl9PK6-B4!f& z?DH!}uVLd?jw7=ey$wsXrs`Z#IVyLp$&n+5XRh3CxN-V)u^rh4Zt-|;%1C8fUe46I zrLO-&fzzi0y;`bUH@~+Kx7BbP3$?Qko){cH^36=`sdEKO(vd^3p=)3E{oH8E*4%SM z$THypHBLQN{!V*sc_{I9iq=I#*O=4rJ9!PFhG&cOjvYG@=c1^xdhZCuzsu;BkZua~ zo%M|Lpm_EDPiNgDWO~#&tYh4)!#&jt4xNc8Kk@n9{ZJ8I*|Q7Lx0CbjEDmzSN=RPN z(6@BY;|NN+tQK`Mf%eoY{bH0z7N@g%Y|3vgySj>snf`N0Wvo>)uPc7{eLWFPYJZkv zdyBoh=#zi>4foPPG;pqeR{N1*gfrX#EKZ)lk*_APk<7-dRjNM!6S{7!w-;fsrK?k3vDd$iM+chfm<>&pLu>Njw z>n^VV)x=Sz%v4}B8FIgX*OIW*Nnh;a+OtcyA`ZOtrU_a0ru}+f@Tc^E!+vzFC zi#vI9?Z;Qrrd_r)hFNa5YNoFe@q4F__aoY>HS9O2hH%cJB5<1@tq_q5?E0+ziec2> znaF&-o7C>!*2vY*%)|p2>x50zxg(7h>4Wf$b}fTjKBq*JR?XI=&q;6`jP}o1NKZL9 zvn=lczmnSUbxLqh7u&X&$(~-`VtQ`>`g^Qeio}fa+>7q`?5Mo33W3fYvlAOu%ChcX z;5~_i*OMo2=uhSp4Qi|w1vQ{ex{e2CkPJ!3&64!DBoICL!}B~e%;lJRe7wtjVO3Or`FeFYZ|%v|n=J-BPDl2$cu3A}$DLAl?iGx8!@q;4 zU(TU8UNVA9huiXJ-H<4g8glD6xv($!9y~{94^4@6M>Zky8~3(yUXQZP(Xg4}n_Pj; z#y=^sA#t-KiOV04D7X4w!EZ_JRyAn{Zzoyp3B*)bmks%=W#ZxP$DQM||x;fvD z$6?)9&6W+Btu6B6F0PIHYJy5_d)03`O?vF#s(VE8oAhU!e3|q+Wv-{pqh;@=itk4q z%-tparzyW>7W@(;C!fSJu!+tW`hlPQiIz48yN32gE}Tu^IQZq#Rq0q3dziJ*`r(HigcdO`8MmA=N%MOF9CJo$ zOP|WAFE{NsYA%n>@lBpOV%LQAN*r6e9{J_yh{*Gv@8|NYYD$C@b;c*|pOXhSWHT?; zo;M$`()dQynl7HPR}n0Vm7TAUR@`$E@84Lj`n%mr>^3A- za-9EO8+@e0z}4XEzUSgZPAxI@l+>bKz8K$^ z%E~%>?}kFrMEi-V`m$gud6dI%Y79zGE@kP#|@5?5~N8n$+m8yZLQn^u_SKm70AY z(%rI*PcCL{g~(Q(N+0u!o^$zZL*!?e#H`h~`rc zWi@WJ$z>8&AP>e5B_%j9EfVG>ZYy0k;Hr$qHn?gsxB{Ls>NPxrA55Suv@z8`S@4B^1_Z4lgop^Sw;#~PSbAah3O8{z z4758F`)a!I-eUj!weBI5&&k+}x{rY01;3fn9$Kc8T$HZ_bYM6X6j! zF>yL&W;OO3?-@JfkQ4U_Q!UeNA7EQj$1Bt9dDV!A4y@mSa0y8MjiBU?!J~-82W<~{oxY*ZV=2I%7Up`ophv6a+x_|H zB#Od2u8l?UmX2G=F30#(TjE{R@5*dni~B)2iNNhY_gSC{Y5Gg4>Wt6vM_%zWxhsP+ zoE{h@Z~H!d%(^d0zvKNrr6xX2VQ)AqB|_2X%a#{AX7XD3SYty#$)#VOM;$vb2>0pC zfvS7sv1A|Sb+&tDvtzbd$63Gj%SmEM`$f{7^@edSA7AD8+k`CcUJLwt8o=1fz&wD` z0Q~=d8t}(9ddqGyeNvMzLS+!gx8|R$b*I2Z=Mpl8&2-RcSa!f6$C>Mu;hBQXy9JDC zgSXR{svNu6e3Nyxb92-l=8QLG|KiilDN*v(;6)jBv|V^Tb>P#iE^TpOGgEz^L-xhV zrK`u(c##(31V`Fd-_p1yaiZn8GGOrOLuX6ix_bH~7X9u;ny;V2(vq*O2Ty&!+n3R_ zw{=qn-wt`iy=i!#qOc@d(m!zh^Hjx~{)z9sLl-LKQ11@~zHFTm4i5J?wu*mptor-p ztNgEZFV0Y{4{~l!+(|f-V};PQt9vWSL)nf`QA7lWYT2*3-_fe8)e(+X{>k04ak*EU zZP>L=X(pOCeDJ}PZgb(Vg;-DOWnsKZ{7rwA3g&D*tR{P=PIde_7Ya+8?u%-Z)#Kr{ zCcF$Ct8>hem!9Uq~Zy-1)Ukn!YreCpv#mHCg0(aZ!&6_l@V0z0z0r%asisp{nY| zm4|WkR{Yez{k1mYMg8ZpS2jm^kDg+#_#UcEP6Xo-7LLTm;6dxJe#++NH^CS%qmkA_PdQg18#%SkXCFYUq9Wol)%jV;+){7Ma@ zNkpagFsUHx3rE_nKk2fLSx-(i7RU`ynhBF@KYA-qO-P3Cn337=8X~}YYV9o~$8Ik0 zD1a}~DCL5J!ng9XFSTbM+}5|@KE>Mj<9`04n(`TKF4@;sMj7IzS2H><8?%+{TaO7J zF$&wa_A9gPqB~*rc{U-M8u3e%DI?9HHp3+lPhE0%oeg{vbeAF z{Q0w}AFA5NCH6Uu$nCKBqrSovD$oA7T@m_xB`spLZ1aKtl{TCL%;fo z4J6Zizr5n{wSC+dvbt}p$?r<0(2t+tFK~(O<1S9ZtJbPY)_Ze|Rbo&tX<^JY-@7Ax zoi ztg|-^rd;g^=T;Dr9Pn|am?ST{b53~N2VTDRu>sYFrZof3_e(e(x7qrS0aTTLie^yg z`lB2IVB!yiA0`68?w>H!b`9-j9e}+YV6&x{#>@2s(zF~G#c z1eih}fI((&Ff%YfTH9C|o1HU)IskzEsmt~2zI&tqz|TJ@z{W&h>e6KgDfV&bS7uS@ z*DW!C%f&75`Wb8MbG!5J;_g2GwQP+3c}YL4^q>3x75l$@|Aa67fpfve0K(bj zmB-D?F8r-M%-_}03WA}rdkTEe=DT!uao)HE8bG=P z4ny#?m%G{S8bkar1J~RQjUZSNf+f9zP5(xp`D|c_?QZ)Ybi%w0EFl=8!yMr4a@HDx z#UMB-0By577sNAjfxEBC?pzRk=AP?8)_>!H`G>!+Ba15#o)dC?MEomp6z$%b-hukqHDtbXb15`(NDE z8w9h8Tyrtp)vYoF!vQ-c7r-4r1Flf*2KWOzfD~W@m;n0F^Ewa!^>_d%z!z$9hkD(i z78DaV-~+Y&XFTgaaQ+Ns5Dd+{%ZDAHe_Jg0=wD-`VgisT zw}A`_#LYjXME|27qVt!;5YOm;)&LL)Y2feDDgCpada=zSfAvUxVrjTCyssAvSp9eU zW&O(fgY^?skF$RH$9UFB)-TX!95@55fbSn1`9WNv{#c2?zwng)MTT|=GYD{jY7&Hp zhIsuur~k5|9{#t*{?W6m3HLNS_7nT%{Fwb!wQyw{}NBpmK^ZiHvzsQ0DE@An-JB1_LIMH$|A{foW<h9b0ASCG^RRaGsiGtgG%E@kHLrmQIC;_EB*M^~U!fP0{OK#2P_Wngz7+#Lr1 zt2Te^Z%hXU{?Qg>0RVbGp_n!Jk2Z&30C*7yZOiKaXgdnUL?Ju?G-$d72Za1d588P9 z0U*#r?ZhA7nfB>Jv1yvYScT#h`&|Ioxx-*=7cdw*g%JHO0MO>kAOS%}7?cM9SXk^v zedwJr7A7$O`qkqP&frYqf5FB~N1*_|+r9??5>Wul-*9$g^={dXuS|bE_dMS7cMK-r zG4tPH;NdPRV-hgfJw5y%m0cqLM+9~UGui< zhnCmvn!AUm7s@*@C^#e(b2BV9EIGezk^Xx7+1P`Bx-n(D_&XZT|9MWnw6d~7ukDLdN zsFP%j$4OE4kM}xOP{k;(Lg1mdDqJ`DW$&ZOsko=z+eiNoy^a8p@;h9~~z_d*gxwKaI}G00a$EoP>!gzee`8X58Il=XWDl7 z7(o0fN+CUYAQnN%A4dF^EMlhbpXaYS7H3#S$3U!uF+SqCcRmY67GHwO3Ux>*#DzYYS9Q32XT&h?VeulmfGe3 z3-yY2=`_*LQ5=tX`*}YBz~!26s|ala(EFkp%f?obiRBBmL$CFhI!RiKpvU?eA(%N%#>(D zP5r$xuX|Cvm;P-fqIf}DJSXDELr{$Ya4bXBogcuzVMw+yvQ9 zN^h5#aYhl%ZWOCR%Clmko$xb1E6Kc5tQ$Uc^j;f0Z>Ze`*9ynQ9S8_6P^q}dXR|$a0Zf0b_DTkIFeoo?w1~L z6V{7JK=Q0vB{WtP7J||pTkbT6m1wE-U%f^z>t36pnv#yBX8?wC?Rd}%g{NhlX8(6Arr+(` zs@~50U^`W=cMQi*IE>YxB&K53sEe~&om{YSF&*kdym_Rb%hC6u4$jwP0Y zuPedWu1K-rwUt!%u3W7o#Wk*FEt_<19W#1IZNmv5G6S;u{|N)VG=0#D@~*QDezLWS z!W&nv;@b=(`!tz9kKnRQAx|t!8nxt3lR0bvs>Qyica0A7y;*os#K#So&8Wl?_o17) zW||g>0LFOX*J?-P!N#V>hQXkD&)2UTJ}!OFzgZj}@XK7i;+L7Uz=x&xU3V>jVEn%= z2YVNRXXhTxhm7aMgmz`&M9bAvEkaFRe)6j+1#h6A);;20ZnTofM7E|XIZ!5#C8f33 zMfKI=?&VAXx{-(&`f)0vmncH-x20X8R>rNj?!%s-RCZ6ICA{dO!!#Wf8O8%9c1`pq z_>l*LdD*cSGrG5~jY5j7HMck`aZ%l)DgDdukWGi+F552vbT5u@AqtK6kyiF&>NnD; zD;bX|p($8BGPjlqRX1N{)bH>};U_=eA?Otu92x%F#V>dxye~B{AZWb!K)tYP*I;vp zR~y0rSJmvp01P7ULq>6j0enx0;$5l~V*rE1VjSrR?)Q_&%-hF$TGzDZk)Roj?&Ss0 z-%%@fxVP--0wxR~R}4V>8wKfXBWiq?O*uVTc`c*^yvG!YUF|_4ex!7ROUoDx#}; z_(Cg^14HO`{^sa-?~CJ3*4FYlg~9sVkLGi((g&j??I~Ov_dg=M7yuvEn=;nP0E8Ao zwldm?NudmYw^@H$2D~Aei6RG8H+@paV8ds`pJI`b`7$8-Ii|4FS&Nd8M$TAMS>_eM zu=gCe^SQk45&C8r(;EK2VK}=K)Nvg0XlWR#&EP_1F4#sCp2u5Jgv#%YZHk7hu3W1d zoeeLmH#@ZIiGnH5g|tzAeaP{ovbiwjE^ zCyZRH#$00lIq<)>M&^AW?|Ep?c~aSY5PPnU=!)crC1%MvIqC{+OMUuKz4KV zpWS%@Mko`HUK6z!A(LEep4c)kigRm!&z= zhuc@ZAw{S+VD0GpslvC?o64OqZoOjPHu56KLK$}ML-DooPuWK!<*21STvIaT!5J~v zy&8Qd(v#}L9)piaJEr50N7ptk!#mN%CK^5Sb|>E%FT6Y0s$(i8*|+m*R%J_22}dcb zWdPs&0NAAD3CLYfj6s`T9+GYcx#xeuxM|Y0GNIEBWA&umukR-EVRb;iz}YEJIzIzo zJ2Q(&4pWX-Ne;dt^YDbjJSyB%G)w2^y2Od;jo-8u?T)z%L**+K%X6O;rC$SGLpUEq zyu&&J=(ad!Os(vcFtG(C+@3GsMHHpLoZ^T|(&+ui=Fc4$b6b;VGA+~1^`uQ32Q?;@ zOzhG;hs!m%#V-thZ!}-zdX9;bpC{JIwWm2=0OJcj)6Bq=Ta_Jt`@lSM#QlM8=!Ae8 z0+~Bs6{&=lgRg3jg`cc((lXbwd%0L|f7b+-SgBn8LSwds_{GR~Mgo@108m!U7?m@F zy0+wUT$>myfp(GPWZ&y7I;IkNXr4IU$N*y2^p07LY2buVTGsPb#Xn=+ioLgtzE0dT zD$faykQK={e`MJs9>rr|xl;>gUD(9$Q{Pb|C$>LdWB_R@H-$U$j>46}n-$HWH%b?6 zF&-Pk22m4=FdUU&vaOFVhP883VW5jVs}J$Pb1h3UTBGHLr%Hz`8|^O1fW%M0I6 zVNX${3C${nAZ|HJG*k^@?aRy}4&*$!vXeh_xk5!lYiJ#})ak=It5~!75P@(Yh#2JoFNg8(AfV0IQ-9lRuOdTUB1Nc;fwi zd}-ZWzJ|-El8;b~Qfv&aPb^nnUudfk4!SEhBZ@jbhKoW5F{z#P!i%sR%^dcmW<7YZM3KYB?ajxc z4)x29lflNjHz|`pHa9|tql}(l-3RHXK*x518hE}8bnD6IAE7xu zLpg+|>I%(1QoB%Z;o50PHNIbV`9*)Rsrr4J3V=d6iu`3v%B!rb0u+i>KdkH2NDt0t#$1$0TBn!ECxPj3bu2dy7$BEn`SOoj-O^^sm#`8#Q)ECE zt;u_2q3FP+qSBeInJ*h*Of6rFCZ+bN2s|>_X4@D$|A4u66z2nrkCYtMjd<*nuL&Yc zd=Q*#$6x}qv#%OKd?7nUIqexdM>6{cImf1ZxhA~){CW%Twwh^EzMju{r$EEIeU|xp zZWiH>_nk!e;i9u0vOCMg#>mJH#}+s8%pHOlO0!jPt?7|fkF!a#JH=^eUK^`@E!)*m zMWj8Dq-QMt!%uct*TN_Ki8qJev&&8s9ZX|yscsfv-b{a}Ek&j`R@bxO2SGKp!(p^? zh5;ny3MVhAw+exQU0Y6duSrdlEwWsBUK%Zu48SkpRjHY3)39=1`_27&bq0_;tu$OT6e;-w zeypj!X;4KY?_>}I5C;dk;gW;C>*AB;S{BvKF&)3&F#lLdAl78Wz>k9__SDUeejJGb zq|-!6i+xjwF1XAfTpDFBhD=#L~WMDNq9&Ea-^&>YOUv7A9 zXk}?d4j>6`kU&=1J2T4>a0RR+y%S7)M)aCy8-(v zp%d13&YGWbDfG72Y!jtMPUEWM=-k7RhdQa5Ggt!Kr}Z6^j;+uXH?W_p>W`RhhIP%Jd279zoOcLK2zHX|8p|H^605+MU5N^@!`FmHV#L z3O`Mks+G|!THsSjCUSTo`PU+>oChDjL@zyZzPWU@s(*GUdwFV1GFedC-N<2P_pp3+-4==4j zp1u&NI@D}NS?!wme*dW%sYy?(Lwkn^*^U6_@M71r;80DPV3(TrbySugA3FhS27WBT zN>CoHq|+6&;%H1W%kVkNswS#%GA+=pbbb-{0(qtcB^IPO;bReAwlH!3Qyso|K1=n> zf>_#mmu8*a@WA0GzzjhI8w`d%q3KZUGM<{xo{uQOc$&OwI)LMhJm#GhL!N$j^xL*( zaWSa8YSEe)hFAXC^Yi;?aM$hr!+c%L-&@(igiX3wXE#wCei$4!rVb>Ot3UGTfDR68 zdB`YSO)x%<&I^h=WEkc;ahXQ19m8`f7m|07#uoOy`6>-VJgwxT%RznuJ#&^6dZJGW z!24X+mO4!O{<@HmS^9R?uBBx}-E_Hot^=Ra=>95rbZy=$5BChC{GsEsZ(3bT|NFB8 zbsBEc1uxmFa2@C;xB(Rb97$*vLASPsa&`_vJOj{eHiPmR`&$g)_ZcXqmfaGj{i4dz z`JJJQg>S4tWLV@RqB?tbyMJ`{-3n>tmLg3FO-4rH58~V_li$uFAR7BMN?(EYR0iM- znpa@*dh(ep4s`SgfvKbn@FKsXm$rXXb?4TLT=KNHLwwGlRNY9FZeee0HLM4jjF+Gv z1{pw)_CUfl3an=_BObSxE(PK`;gaC&O}&97DrJDo)=JenfA$JJ*zdY2NZvb=mW$GhA{Suw^*xYj(L$(8cu{vTKl-Dar8Ld4<`el6l;GBQ#iH!4T))qs*6~Z> z+b5U40Y3=hSkav#ER3XQ`F*&{nQf4+PCL4Dmo6UpuC>}eWd#nyUZUbUBV-wX#qip) z6*;@x!l#`9jBDF9`A;7oT|)yX4pQIq;gg8HYm?(&n}&iDVy^OvQ&yHFs3$tC*d3iQ z!yS#0Ib=By%4=(!c3Pte+L-Jfg48Ghi2Uq)e5blRQW}HkcBo4$D(}sppQPziSCW_M z^8FR_+RlljywpA?blU{(bI_s+mm!l?8}VgjFEY^{#?kCo0C`2=bDHT+PV*_sF%q(^ zMa>RGP~Q!=IiCP8Q`AUf%ao<5&Tz=dD8@u?VlR33Q6O=Fl2Z5EE#e`)lQFbG4si`{vf8IPC(T4XK0p7zS|l zQFt3&e(W()1mn|#<80O;SxrR14|ox@KnrTfsv{Mdiu2|LXb32L1i{j6Q7=C`LTk&T z&Vdyw6P2PoT25~z`^h;EznDhzPJzb9rmi!q&DzrhXS)%iSQBdC**vmG4yiI8&lkLs z(t4m-M=7y`H>H01)JK9U?flN&X1(pBS_!p>)a|xp3n{UkTal@uv+C6`-(mcV`imBn zYtX`u=1Ti)JzyV)RWpF-W+Teofo|=wt{uC93t$?xa^nG2xECgN86((+uSie=5SXNWSf%~5B-wJ>*t7R+FA?o4` zJ_#;?LD~~2b1AJPS-vEr5Ij8!6KF$>yWMseI%eBd&pYvl&kREq+y z{KHLyicxUOK?X3B+1PWRjAkvL>qYYUXe)}z=0_d|yXi36wQ6cfG5OdcdgAOHnYFk+ zCe)dxNsWUmP?5{>O1Rp`#VG$+--*<+610A!zU3p^kdmD2z14xkBDP1NAVaD3B zh=T9~%|ua?c48v(AnlYVrG9oL0joBfF+MkgZV!jWP2i$y{K74URj}qHJvZ`~ms4M` z7e{=R+#9PXnvdZy48q|hp(eH0CdvboN$2RKGTSvl;iuG#C_HQc#UB+uaHV(5nJcw} zSabhFES0?*#|NG9j)8;u2dQ(3#7mMA6q)Rf*%`DTO$~IUI?j~jk3bH0UnXZ9xAd*(1BV|z`4)C1W0l^=_*$%Q5x<;f)bI%dQ zYs2^BM<0D_kI;Y)(+y_h2Tt?Wt!Idzp0Iq{xOap+=4eT9I^g8imaqGI$n{fNO`&Dx zh5D+G%V@dbyOfTx!3ihFFMYl;rg8~(m-jROS~1zVM^|2i?tOibanovGJgMaXabu(v z{DhfiObGXE(wVZ!a&4_#*dv1;6qPjHn-Q>E&1M3N(~=ET%a&qH-7g6_eaeSo zH|U1ZSj>ueS4VBa&P)8l}Q!m7=*^3aFiT9n>|W*_P?a%B=Na27qG zjoe2w8VTm+YR8MF-yxTu8^?r_(n?Zi{M>q~@cfv{ZRfY@<#I+qA2_>xzzKU9Pl9#e z_xWTYxmrDNF~ogX9A%}w(~y$bo906{bij2X#uv%FZ4!=WTPcLnY7)E@|^5=@131fE^f3ma}H z^0s-%V9C#KjmV63s?O_nNbJR> z5G28>i75sU8~i*R4;=$oE3p#732u~&)cuVbSbQ59bAF3k^hx>ZTmE} zBv~@PpywkdH7LyTVOn)NLG@0k(nE!iw~}#h7H?nw@%|Rv-F5E~=2_%~){I>lRm~Yn z7P^QDM1Cw53MDW|)(+w=I6oA@B;?2m)s*AGutWztC>fa&8}7_iNocKUJ~%A{4N9)f zYpUc&aXkOx2o`XwT*@oG=$^tO4lq6b0k;jTFb4P)ct#Bg?$-Jk6qRH(o53Q5NR(N7eOw+9MA)QOA1B z@qAw}{tH9oVRA{LPW`U+4aLP?>^Z%OD|$QsFL zYDTw%WVO$B=p>eD2d^tQn~_M+q2B+VJ@eTrtrdkApxMeFa^AHaI0oUGnT5%QHkekkmjdd`9NqlGi zQEF%`{n!M`VD>#xfUW_WIPS#P;N#R#M3JCJo}JkYU=M+HequA)eMDm~Tnq+Gez78j zbDP-JeR6NS%G28s0-fp!(ink-;a(UM;Rx356XKf6=h8L=TP=PZnxva9K^aShD?|>% z4qydIN^WF$+6&A_S^!qM=?oZ7A_4{|@>}frr)@#F)M;%64Q@lTqAyX{of+b|=SD&G zNUP^>SMV}5f6QS>8P2_c;0cEAn}ji;opkPGG|ZGf46BCje#p3SxeZl{RkE%em3cNh4LWtQQdQz2 z)iL(%FoCrexo*M8A&1hXIpJ8seysf`UHcc9vGzKGbS)T4_43XROH3K=(wa!PQ|=Rn z?s~3wg6_AMINv6M<3xB8C&k-wu;z$aq_9`3@EkR*G8%y@gG>Yd^j8dgG`CqM%O%3PdApM@>iFY%UNUe}wkHW-H9gXh(Cr!2Nj z^ryd`u{wa2qsS#8yz!#o^BH7^invc$NG%zefY=+X0ysE~gCSdb(p_}5p&AhlD+y>0 zYDEqyl~>UFx|f4}%F>IZ;qPWaO?>rNWNHnCfqB`b3|C)n`>!8>UiVIFNk#TIT?yZwsePp>Mg9`AM#R;pZ z)_pV-)zfkW&bNpZZoZJO1iDb{$tqE;D0A*(reJO9d`2bcM&3+q)_8)cpIPZdbA2(H zMe=}ETxs7 z=5`=^rcwyHetBea8rWD#!Lu>|U%k_jlaaiXiI?W!%2onQs(+0Gzx`)x-C$uwKCkcW%NtK@_Pq%~_rRo}rZXAi10M zs6$*ZPH)8i=HtrAWUGk^Xal;988*xy(TmBvdoW0&PtE7aBIS|dK*XtS7)^4gg0LU4 z93HrB=c6r`szN_ZLxMW9XijioH(A3vm02xsUjB zl*;I}9r!z6jr zHHyN*<_!HgWscmNt}8@pQQ5EoE2*3-J$Ny44OL>Z1Ih1z>D#vNotA7G8tWJ%937^e zFT!e{xTH6l&vbtqO_SSs3fHKN>WQWlk_oNPK?CU8ZY6%2WfY5;tz^X*6Wd#BBZx_n ztOFFsjg}@01Ud4KB7#@FU!}8ke}#IBUAU8Z4?(25X{5${An`m*EYIgQj|$mpX>mK^ z%R(!~hC8sg{~6Z24qVwjtP(@#M6{vPB#%?lD@(EGs;MhIeYgbZzc$ztY9l;J-5DK= zNKTAZw=;JOh_5(9{v@fIgAUf&!S!&zSeTX=Y|~5Zo@NCzW?LufHyo+W_-giEUXJ<5 zJ)nD`4;>B;7lGMrNXi|s@6rf#kFeTtC-Q@&%n%uzb%T7A*()?swu$Q5<>bLF-Yrkg z5bK;d2IDo8D9J1X`_2Ks=Q@yk2nbNR4Swu31=q$qq0&hZ!qYj`wl!$}pgE=YG<6^z zD@?ZHrX8TbpHOla$sg{|ssKNrq+Pjm#yo?)y%1yD<8XF-!C9TE`GgwY6Us5$k87{f z&M$N!bDY*g&d9alCPLd=>+P3eRQq0SSSks;Om0!zc&g?-L9cuazqKJLPaWvwT8W}! za9v?m>FbqzG`r!%Xk9t7PMS)mq#77HS(|19u$c9f$EiFHE#P z!F?8oP8UN*k224|CVfby%dAvpv!O$&=Y}GcXv{+db($pABn{3&JNa=P#zk|aws!P$ z>Yc-EcTPaJvsfdl<2eetOG%9`(3ioD>DCqYE5`77H?k-Eq#>X)L3m#LaP3uI6zERj zBKfyaR@)s$%L*24y|pV7YErl7U8uy2CD*$P-#^HEV3wA2t^C}*Z!Mi|_d)OD&&WD@ z=d?8wRWq8b!V9KutX1#imw>3*mAy1Y95c@7K&J7n? zdxfmM-}iZ+=QnNLmXR3yMPA>0!5z`{HxJCZMV~qR=w=56-|vE>Ac#*3HMP++cR_UL z-t=Mmt0|AQPx@&}0g>KbvoKL;L8ucWUOm6dx*S3O`Aw2H&i;6!(Ic>}#V}v1KgWSD z)o}j#3+QYDbrxT6cN2Pbrm?-cps$eGQg9XL6COwaT3>)a-J5V&w^q!FAX>2h9xK1a zfZq)OsgGfn#tTc(mb%Ficb$cEsBV21?Ab0hh_Rzz6Y24PKr`fP0AOu{s;QYHk4IVg zeoUK_k$DSYS&V2|^J(WJNOcPtu#HiijIfmmca{o&?z(D}OjL-tJDw3|%HZkm`&NKn z`dnTvHKLHNhO|ZVQZwu*Trs;1u}#?bF<^)U*<3_?T6G)ISU9l5!|WLnJDgJ?jdZXb zE+1&w_+DHe*umgiLg9Vwe7*UKmpXg46ktwCuFHNCt$uIIm(b1F67ed`5%%*Jp}qHm zAlh(^o4=8=F3j;6@~+@q1?y6)d)UgPv60>VDj_YJJT|l1LBh~@lQOY2Ut9J7$_B7i zZhAk_X4DbK`05n&nUY1ODX3|HYK#?RG+yR~!?m=K6S@to13`5KO|y8VY191^p=gcs z0e|~VnOLMPtP|vjO*lW%(ieDbq>Keu zv4iV?l0-z$(?%J-aE>7T9O#RZIS-&+B2g0@{cEFXK^X(*(2EqPTahOjns3>EQVBvE z@YK_YCfIR}x9$bSQ+rQ%_&yep*Tr4~uje{Rn4@jP+&JQLS4IdaJLTl$6eQ2~~uWLZu+jxc_g84bI zREsw(Psw)dINB_ur5<68cLZDpRMDJ3=qU@~*!nA-!)d&eFFSOF#V3QFSAQ@+xNz$a zqaFu4?Stwq;`3B1767>6GC>C^hhmA=B4=b1h4-L!1EO_+D{`YO*51V~!2}7%&P8Y? zx2Gv8a)tIV=SV>tUO~5uTDXdikxUDS=I=%JO$enjy6gl18vNySbVF~a-|S{^Vkax; zsSYdZ(Rf59&udGFcs<5)R3@ZlDQ1~K(S(pXrK>Pz39D_o%Oke|?vfDog?{kAVBM!! zwDV1{GXj=G{(Ep&wkv+c^tP$sbuaSuZpn$`lK_WwZ(*84zJr)K3;)aCc!@ACy&m=nzGYMjebX5p7PkEBr}X`@{A zRUz{3nFn-+vZc8WCR6P(nv$3;S_F^=X_C{Pr*KhfDMUefbA>w}M1{F#FZv{IP<1kB z>7qq&s5phP5^eJB(>b2NT+u=$5&vOgV=W-y=VF=T-HPy;i*hkR)r^k1r&DZLfFGbq zMq>qw!%!k`gQLK0$4U(8zJ<=7eg)M$YJNYJH97uT)g*aI*`&=SD5Me^G0 z-PJ!2#lElWUCR2{V#f08qFR?CCZBUfOW%x70|+7^Tp1!reavDUc={Nc zluXm<9gPMl&e8XyT8PE;RRz0HADE`ZcOWUs`W(PU()L~@uRTAfKV4?{_9 z!Qu?(mZl%ymqPRUFnA=}#m zMJsC2Sj4?`$Ha!mXsqH6IbO39azm%xM@7Ubx=n21(RKk`qg0qbL7YX`5z{}X!_2vo zv5pxXp>b#~Y&HJQeD4@jodj2aO2-@9qiGl8=2c6l`S#UyfDE~hhAVrGUtcDpt-&Fr`{^n9izb4K}QF9?25Y?@Cb%iP%nfR6#Ojb;)`<1d>0)=$2!D>Z#aN zuo2?q92__!57p<8_M?vOS)9{(q9Ku`>&&llVV;#uxXe`L|0V(~h zaJatOLm(9{8xzP|*)J((NM0I+@IGM7^77%Re1Zh;5J_;XgukB(AnUVWHY->)L?F2fd~Lvl{ap_3mLUP~GjuSq-B1^5syXupTqo}4qpFr*sb26 z=!NxZt5~zUD`-n`_P9lfl-1e?B3H2_IW2W2}*O4_Dj5jyB2qJV;Lkh7Kd- z0KjD@lLRs1+{OuRMJV7#i@JTdEp)u#)PQs8)6`lE2+-oxS;-&98~^PMD)^Jy3Lg4gf2b zlt%rR26)i_m5+#vbYoOop$U8@0CsL@=Wv5Zy1D`BOX~tf=S6W$kp;Rjfz%3g(2HZ~ zdXWy_pUwl;2(e>xsJCPxuGOBPi0h(eip+;a>mUox$`4XJK?bE&Z;94;K0a~K$H9PU1ZOqIJyxmC4j#s48$Rem8YKwxI$$3Q% zTA=xn+xKHj6egSO!#nZ`N#gU4ciBC*yiVp=Rww5%J`Fl7GYQ-`;7IunRsn(|TZ$L@ zUyM@{Q+$~QBa-eAs@>G-sgU&t5Ts>1lt*65J(3b6p?X8kNFojXDi<bOiSl@ye!l1fPkreVcCRlmxa-@2E=Q z-XV2(0Xn?aP0aBzwz8N6n7`wfw| z-w*z9I5=nufOCFe`8o}d&&itT%UFJ`4V>CHBdvnmqSfeb1)vZDZ1HWseWK&gUK*TI z{JfcX`;^$&$;3jycI7BW6PkeXz~Lz zpJs;r0Kek{VM^Gv6u$)mfMkdeso_PJz3zFyIxqD>&WUcx3y*}J?pgmh#GZ5HQU8VP z3IF-O1Lo#%6jq<`qKv7h`|~Pw7Sz_oj9~}!fiJl8L_Buh#IKcJ4U$rl!EB74-pJyU2e_ae#zb44E0VK|y$dl90T!(PXL74;9!cj!_GvawQ!HR&7cXt0CiUUo zGR8gSo)O{UQvkhvg&iBLjv-l(z0b#HcjTO@h*TcKJJ^Y4QCt1-KX9_e zhB|xfFiKEJtsJ@;9c7u2f2erRv$l>3jwt37vFzj|*b+D%(r zTb0t1@01$!PS1DSrst)lKlLYctnesyTK?BFS3NkdO=SkHFHj#sXtU-radI#(5!(!l zr0RQv9QnauYz=B(&iZE{5cLQ2AqP*{7%_GbOmNgdxXNKI%8iGWM<3s|-yAH=3I!XQ z_IQ|dp?1@Kqvw8_E9}3Ewm;L}uEEa8cxls0FAjZ}Ag>ZRBD9(6n)l+`krc?T2HOTd zsZ|sR(*KG0cmk-*ym;wCrrh7?0D9`)?o*0?>z&tDe~h>t(3mijXh&ShiKHdd4akQz zekfh(K2v}0-t!IU$%$=4Tk1HY;T?@}UA>mjk7(Hg)JN8vCR+X6H0P%nl^N@yX5WZt z-D7CK?qyz^$n@nr-D2thY4T;Xo6L0Nh_;Nf9CK~b@^u@c_)8%7GcvptWP%hG!y`&T z+dJ(Y_G+FXN|%dUc|<4@<;h4!kvhx%YA8aw9J8167Eg<8hM=SJr(4JlMQ$U)ZSw9M z+dlGPtC=4ib_4KasTJ(G3pVA;`8gb%Tf%yk{A@liYmRNoTA+>|@z|}Z7PF2_w&x`h128xAk*wF8 z-7mGLSVquI27pFGpfmMB@{Xa^B_G6#RQ1G6Dt=Bd`-!xCifYD7sk;xw$_r`jS@+jJ zl2sEra}w^;(aXJoO-)Svlqr)&$pJ8(C= zKPdgso}|A|DA(S+xr?|ATA?k$@S5l-$`;f|K{sYyhlm1}sZ^1dG|S$^3YX)vu{B7g zd^vrEkOnV|cRES7eQ5kEf@YKrnKk~(wnh>%r+)rm+nZ8fc5X0y$hCQ~{(EMchpKBo z#U5R6Zy^^;a(sNUBpx0BS>kp?$NeA@J_^~8THo`o(5P+ph@IV?d*f?|uSdB*Se)Hg z{da5JfA9rD_nOzNN%j3<^6^q^J2RbXrL)Fcu{L@A@=@`*(>_kt(B^#U%w9S~foH}N=xe<(a!gI(hLuwunrqN(ef zP5wUL6B@=i9ief@gbh3m^WY{p@+0IjaJuo#h8!CQoiu9upEje559-bE8HiMigPef5 z?L4rZ{SCDD{|le6A@=Lw@R^>6Q~YHi>PN$_QiHIm+YZ~|%h^Rlv;x31|6z5>IqCC! z?k~_$IId#{OuBYq4frSK4;f?OZG&h=6>B)uqIGQVIO{_9QDk3kzGi&tF460Q$Vi*z zOxcPK9PWqwt64|e>9zB!OiqM=h+1?us=5-kx3aC!Xc<4Fv7&=$8;m;opa; z8Hybbr75|cw52RMDCo%)M|5B};xp^}09#kk;h^^zZb+R2zhN6IN~P>ZZKneV!Q5`} zNoq0;qqG2#t^J=ydr57)QE3a&xL&o4AvVL#RAXM+67hM#5^xR^#z^g|tVUjYJbjcY zL#M}8%mO*e>F)Jcu+(QRCFyjsX3l6;>T^H>Fb zw5T7S5nS`EvWex{M#tCL$0%|W~h%BFpVVTiOX`LYL3+{9iZL~`rTZfiZv6wA%eOICZe>uBXw+20qTvBw@6eqx( z0CeY-3Y5QhAhwq)S~5aDT#2n4W*Y{*3B`7g%v-o*l%oNi2-|R?FSKq`->_1 zFmdLb`>Yk>ClQ-KzU7fb!Z$FreL7xX$_i9+Iyj~*lDtryP2LE1N|e+rk#W@U#PUb5 zdn(%z0CC>|c_I^ws{+zmtC0+BnX@jZ`eX=tTGbKUJY1*uH*^eU&MdnRG0~AyEOZ3( z&}{|GS3AcH3xeqx6j$uWS~R5G?+UChp;)00kQKvn$c8CnP;(g?SMdE?bdHTLh2Bo> z{&w=&8Ol!H-ht8)0p@opZZ)E)h{sx!@HR=;4$37L7)n4kK;@P!!tdz>_7Al*^?{$-9xJ z5Y)?gQo@C2fsh>Vqaz|y2Hhe?px0fEf-{A^0e&l-aClkIv3VW2@TU>I7>!f=K7&a! zOwe$lCvAH^OA<((A_BTy;{}m`RF8l}bpdbyp~`^ZRd`$`Hb$rAodQ`jsXcfSz+|k3 z7T=AJkfOY@3G^zMr^AT-WTru1|EDG=+$YYcq?Kp{B+ncrVAB&Fyc*GKR0%JGZSx2f z-(^)Qd$V<0qKfp;>x$3=-Cc2)7ji2V#3goo)Q=npHipNvXuC)_>=ycqPIuzJBXc3g zAI1Nu_gVBzz$)0=7n^e?v>raly%*^bR{UVvl+7SLwfcpTR#}2 z*6<@ON8$*zjou7P61nDn_AaAI>Wq$G+s5uKazo^6SIuqQ zzXk0&Yrv|+2ch_V001;Pf<9ZHtn8t8ZdM91*=|FC=x*|-E6?XCo}hjV1q?H%}=xGfmOc{shV8d}2yXNq-p#oi^%Fjgb@wZz9T$vG6Pr9*TvXQZai)GlBjTO?FC)LxFouF1Y5(GYtBWZ&ls> z6@tJ&=CzI8zPY|Iwkon=7ttM{mHwLLQG6>5Ujn%d3(qLtFchR2sXW1Dq;-MW-aED( zXmY_}MZuVT!dRvM4raRh2z8ko-SGVmy50FfayVkqex|)>9sHycG3xw*>|ZlJG5uuB zyG_pyqiB;Zh|H#m_)_#*0)VC`)@ttwboQ);E`Rh5!dFOhbl+luK&S8pcK~)MgX7x8 za3;1BA@HzRIgQI%9%FB}XC#KR?E8Edo;<-pt!iJ8bZ`_+Z+_5!FU65s8T>J-(tjjP z2-qeo9Z+tf35ea^n{Cn+ly9&I{(dmYNkb5JhI@O0r|e)V&_V?hlWVXgPCUq61}2IL z)KJxaH7xZ?F@R~m;KmkQe#<|2XM@q3F#~C?Vd_2OPo$-FXZ8<3PdHXlf6nmFa2Xz? zl(olBKizWAXQ{8=E0cD8*&81j9 zS{1fA(vU$}yBpYXpFEcjX!GE_BS=>^w)a64XW2|~^B>6Bg&My6PF21A<^%AHjPk?; z3%FZ(@Ky;#h68RoCF-`(pH~3zt@g&}{ouTuJ?huU+EScTF)(lEx5SxQ_1|;O27@ce zM>_+)U+V_iDR^8iZKHAd^%J!SV>i&1npaL>-e?!%F)0@n!bjV!>6Y`+@k$yuCLItI zR68_@WJZfq(9$OCng=QnX|iy`gyDe7HQ~}B?qPnwQ?6f7EDryYW(h^~&lj$#dnzo%UPzW(H=z&k_1f#1KFv;4wRn|Iw95_I>M`00QJbP7H8JkMYzo#jHb0UmUR^Ub_!R4=&R!fh0y}c zd&n&ssA>W+7ikVCJSyF{p$v-YNQ^`-9u1fM{tNKpNNWWtF|X$4f2(nQbE(t1m&9|4 z^{jTluNZ^EP=SDK-RF1ry?Nr#PpFTnSGG5Lz0~q>I8r;=ETgvVhF9$AM>S(v(Ew7bkHk}`s6hk%o;P((!DMCNDam-Q4 zOtgYy%Xk@KPDkT)7SZ9p8bX`ckdZfXDa1cP-u)w<^iup|;e{5K?O)7H>+ZW#m&J>n z*{)*%<<_UH7S$u+I-Aexdr07Hyqsi>GCM>Y(IG`o{5~`T(N|D&gn^{a-j!(nM+E^} z2Q%`5rQAA|@zf#7YFx2Q`f7WZ7+;1mlz3Atww{v?L_@h5@ELhUOG13G_IIK0McErW zd3Y;P4-94|uKoZVNUPGhyrKhOKEDg}0a3h_J02-`rs&NUkCPs~T(V?%M>r4r<+GGTBDfOL&c)d-@bm01MGl+3Ay4xoUkrg@{p z2ToboW~4;HodM?UZUSWu$Lu6a;lKG9O25nEc#Dv*R2X9&^&bpJ3@cr(CnzK9oE=^a z@4{E=3|U4eQ15d!Q=)Cd^Tc!q3fimrewwq294BCf$cyS_K(`&upqj>j-w?S#N`l<( zqc=PA%gTABD6K_j`$YPHRT(lE8@k7yelkBkra5HUR(y>ML=X3>Z{Hr_up`sv?(L0d zvvv(Tugl%(cs4F`4!4EH4}Kt9tLSncs1U z^-l+6<#wwf)fscYQr>#G#e(%&{cSyHbhBitSp!9H4sn&MDo3sZo5aAyfJq3CMsM#`O?0+L$^Vaw}=$u@G@B31$xScT?)(tm)&&FG&|i0vVt5uvvt zo@b8j90c4PQ4t7Q%_2vXs(eG4ZUb-8h^Y3Hk7PjJ3Zg9phN%wX5|nomp1ejr1w7Z31%+6u9R;fv_p#U+3Zb7 z>|Mlp?iyloY*4fol`5Vz6~Dh4o?EZ72SE>884H4}>QAGwH-WaolEn)GCvgh2Wx{z7 z3aN}3w(YnJ$gLNg$MhuX*(;QtQ*3=i+=aJ88H&yl_@@)BOI-_BhQ)FdD*kjse@z9& zv7!En5OH{>=pLVLrUwnrv+@E5nPow-euW{&i#`SjaT>Z}?#^B83`#?&a%*uZwCj(T zqD>GPINoyr9sL8`Gc^?uOv@#^1AFq3rhzfmx>RO$)x2+cU1Z6PH6HuU`uy+!hf3tc znNft$>S5F|&auS2fxU7bHrwFwplrq`wE`OG{*W6OlWK$WWoP25f~V|RU~n89%pVB{(3%2%bcZCP>Z zv?gsyEHgFC5ZH1!cx=;CQD2?CAu*M>oNQDMve#~R^s zWQCU(7K>|#uU$lF9qM3OtLmC)6}m=2OxG<1jIS6p1MZ!8Qxpv4xfE;sG8R{%&Op9L zuTbnp{ghX*i*f;;Bysj@XD=W6gvWQ4v+7=tx(IRPu68NBAHd2L+zYN?ya_|IEGlv- z+osqiO(-9}4Y-TLm^&=(>`mn_VH13r@I2W@(VIH(o?{_e^&IB3>739|Wm8D~C9z|Wy^JLWx5P|jh3$BB zd*BE>lE}*pQLvljn|mm;)su)*djeZ3@8x1N#J#ijgXo)~`4B1|4R0&5=)+N7zl6># zzG|Ii18!vYs;<%xe+B`jqH)LgcY(ir$6H>VU1_?~%qJN4uaCM2#fKxGWuIpc!;@_e zj=&hrv(^nrjUZnz(-*BlLd6}unV`yqr7Bd$uUYN^L!hS%0J1GRn3jg^FR~e`xiCA2 zxe?R&M~2VV4v`PIjbCw@73`u^qHbSsYx(jKZN`|;ZPOy~S z0c%qZ?Re=LtuGT~&BUk--80y$(Y#K!-6#Se!}M4$yB_4o$@lqzYGxaM=?Brq_@FJH z-jl;01dX43h2Cg;_ZpC*>BN6FNjW2P;P9-_x+HpffE3cZ_+IL`v{heoc8Xmvl(ZJ-6qoFwVX;z>*T3d{5MC=q!k_3*Np2;TeMpW%Z&f43Qjac}jL2$_U zXb!!C!nGx$Z`ggndp7$MM5>(9t!_w`kIZCMfPpfVn+Xg|x>X1vjlBjsPz$^%(KSMM zxp6(vPFfR3Y086npfru!Zsv^obr96EaIJa*?oefsZIras3F{_p?SHiuA~%)~P(nA! zdB#zVdF>qiu*gUa3EJ|qo}Lz53bJqNd%0@L;*b=$K@#$Y*|u~iZX#Ke=bm3#?#5d@ghmqFo z<#1eEe0=D6l6{nV>-(_H$nfJmPd`~_GhRWnaojISCPeVtUpbD z?%ho#-eO8xTUb~z6%2sX0SHjL+Hdn8qvbHN5X2wCH zMe@QeaMr#>2y)&vVBIo;_Gur+_=U!GJP|EHD21RKwZ-Dmb%?{rF|?-=OcliRQ|Qr$ zQdY8e9b$Bij1Pa9nBc1kWIqQlmHC33Nzj0UXpmz4MmHqd0JJaEP;@(}{P#~9eWcXc{A|4313H@%)@!!Yqz`fPk zvj5`OQxiE6w53nxQkYYW#UX1F?FFz<{(c6R9S98D`3yeQx%1|e2bEr`~u%K6o zBbqpip)JF%L`zgDq1r&PrMiKIifqW{6cd1tbaU|;x-q5?$IYBdsEPFWG~vk&9-z3f zIERqVKk^#w#b8i0i5!dNwUb69A?ED1t=4)wE%QDC6q$P0Sn+d=R|V0$-jIg1oJROi2f z=1^Y}3YuV-vc;<%q?%Yj`|vw}0#Ol|EHsceAf3$s>=CR8g*6HVydij+T)b=uphvG+sU^!}|BVAk5Qdi*Q! z44uk#e}{R(!x{IX{mJTrm)NqqtSp(ADw9IQwhwWlt7%IAhM8hH9uP)ISP3i+0aafm zE3CPjX> zuN59;Q=wqnnq(l3%i$Q*33!Kk6q`-r6~_S-KWlXb)&vPr-orbXZ!s(Ghg#f%g|c_X zO~YnYe1!!G?L?a?0A-E3caTOI-tg^8K=aqJHXo~h3iD?zfPKTNp_Q5HUDHYQuaqz(We={7|W^yT4+ zF&u;U`NTw>CCJP3LyyYlu3a&bG*$K*N3k~#$HZ4N72F)R%4LyQy51$0NNH1~3(1~(1Vsce@~^mZsg zeN2R21K9Q$JM{syUVEFY4=azooPWj5smCu)xT$`I0t<_I#KQX83fhB6;Ly%b<#rIW zGUSs;ScBDlqZbnv+6-|t)fzHG{S)qC?zP(A1=u>xkJwINzd5B@hrSq>7jlzkp!3uS zK@!Dd7}Sv>CeVNtA?}hMPZ_sOkQPkJjG-;Vwj%o;L;gUVNdw6T?a_X1euz`2gF(T5 z+>~gAu2b4@HeUszdbITnQrcK}JIxYssR|%f$#4Lja`hCwE?WdrA(TYBh^y2#5)m>R zAsc9RMR1m1pcqXr&?N!_r|li`1A7x1{ji&qLRq#xuPm~GvOY!MPJK#yLwtdnLM2kq z_QZ9G7SYXsl5|w8X2MPx!lSh^PCCG}SD|OXxfnYFhTTxq>h1)}JvyxW4)s>@({&r^ z?;D4c719)k9n4gs*%mi}Stx5f(AI}r(*H_78%%7;ngfupHdki_k@~;lj9KEPy3LV% z8_oqH7wj#k4riJ=8tnl+lw1nrN+2D^;al`NaQu=KMj04YvmgCyx^+z4+N^)PuTHQC zI{+UPD|>Uo@JLQYPMwzpNU=DhG{Luz>>^%Zzu+$wZF(<#f}iz99D#i5u}>z&0KJM& zqqv}S88h2ho1lje>!fTOP=b zQKeEAKOXtCS!H$-6?BdjUdL%8;7KfNjG(<%C)R$Glt{G{p91qsUDwRNNPL%v`k7`Y zdSh%2Aw_2hj{Hz@j%a1jUX2f{`vmw(Bt+qqE{@)|R^*}^MDSeU8hE_x>Z&+N$L*_} z4lVuAaJ`s2=ANqBf*Dox{Gzwu26w61sFUDpbVx%Lpz71gDx)TB`c(7Id!`R$h7^pC z7i>89wDIYGet+)TzunidC@8~K@81$wbp%#rfY)b-ImRM$s$xLV?*au3eQzwG&xD z#@ag=Ri2>l(;ylkA4ZfdbkkFCXhnPTdL|GqeQaOB+OF`)a+?MIn#zviSKXi}8nJF0 z$9S!d2z&(Wa#!7Fy_KjC*wfxOI8d~ODi_EK?p4FXU8&e%xLdL#N3(;$dF<1LH-;Qh zX90A~gOn|FubOJ)%#!3@@rThH3vn4Lh{Q~YHK-Gtx`5b;>KTi~t6IUV^Ig_}oS!6& zzlb=3Qq~4GjfplD+OF|o_fzeCl7x;*(h}6Ki$nOSZXD0`xXdjNrCm8I9UYliiFPUM zy(QLyxCBZuLuO)LU2i;JJ}{5N-zJkZXR)BBLCR7SzYe+{^X$-nQM>zSeCsxro%~iW zXl4W^Se*7L#?(+=e^{Tl{Pwk0CoqOxQq!)H1OiXag?MWWsw{ap*6)~ zpujs@|96-)`v|DaZQ~L@HFK1tnYXeWJ4Lg2W?sQd=Qx~2kAvxLr^Xh&sTh%n$BjC5 z-vV`8(3vXqcUCZf7LT^`YsC^8F6a$(xUmi?=exJR@zJn{Lm4OF?=QpF*si%n*69d)Xwk4@4~h0#>-h;{{j#%n@;ZeYH8!5UBje?Y_>l?r7}~0NRJ0d}`}q?(FVL z)=f1BxqYV;^5m;8Jw^?_7A&JIkR5EHkal=S)}VZe$QUh5P&OZ80gLl`xNMkP9&vRf zZ60s@;ifos{;6<}-gDhc&J%G5HH~OVjW8zL0pu{0i6iDv>_A?IdtzNqTo~M&Lb2C% z=*)qx!(DScs!>;X{}tNZ?w~(NS2Oaem4MRp(e!2Z5u2Vg1h8NY4jb!Q0lZBAYz1MYHyEl!8dJX~iCl!~|!BDwmF5t*r#Qb}3i19O2Ept-@@zcXgIn91=J>TMVm96auqpKBzGA zM2~J*#{zgKx%<< zJc8oPTBK16U=WRC0WxcaR8xwTu7_^LHpXaZ6H?oDs}J;RN3ss4B`_(N)=UJ01%528F8jb zvjrS!__QjJ&ww(Hy%dHcv@WUz+a5X&dUA#hIrhODe;MESc`=Ko$bE1jLssq7=71AD zsvDZO3_VN^f%HCBq*5>7Z5c&QXmn`^XqxbAZc5l|L8wP{NZZ6Fpd2hXJ`C=)$`bO? zJ|PEb+@SLXvdIh`aq9M%p+HW&CdJ2uZtoH~jEKxXG#vG%Jg}NCq~g$KA< z62$JC@X}U%;$XAq9?BEgzx@umi)h5Wzi5^d0Ic zdHS-Rnss{OpWY4+YC7g$dLoo{`X7l;e+!w{N`w#bvp=gjrb7Dt2;vfQq`()}$g$7V z+Hfx0I?RFt042(stI5N3J~EyF0A@8 zItu_O(rgGo-8t=aV|2YnvfvH|;M_2Zs1BAUvsZN#LmO&gRR?}4%2Z!Yl5~hxA|vU- zc$Y`WO*t*SUTlr_wh`?xI~mQEb*WPXGQ7y{<*oNV1O}z<)f@+H zBVWQ2ZX~eu`ooAH$ap6KX(+Z6%A9?yhc?WY&ZC3Xhz+D0&n)-qlj06 zGJOkxYhX)@SEW2pLbROX;K^3^$AFOc!M%_$G~DM|^8Nv5z>O4^?{&`Ja(jm}kY5jy zdaD2UW`oJ>{F~=F$9gVqew%J2(b=FMmCID)E7`sIlUcfT=y}cx)}+FxtryRD%;#Rj zet61BBjT$m>%O`xu10sYWQDHL5m*}E=wFC>Z$1Fu(1q0Dyj2Zm2S%wXsPDSw;#ey_{Aqs%z{E7!~4ot?=3 zO0s565kGmNV%-AV`vtfC7|2s6-RHbi0i;JWEumsPU)`<7qGAxREzbIaQwac(_|a{b z%@|iQ9ZjG4N`mWZD-&+eJq9#K55nh5)V{e?4nr+nLQ`K69oNoyuKhRgo$QdI!RJ=` zlBi%&jzB7G@KAB<)kr{Am2WY+GU{qv27muZjJ}}tYLQt;GB^rOqx|;Z)6(v>c}u`V z6p>p#7x1IW1B%@%|MiuwV=pSUjD5QNi1E0)6#d3G_Rp%*5nJvJmvW4^PxX!N=`kt2 zntXCy-4A^&=1Y#~WtC!9@a<&xjLE*R&`mVk&{tB{Q{YxGY{+WQyLz5?O0M;YKK+Kg zdbLPnm{ArvcCPf&_kFt~BW-&oZFRX<&fZzGV*kvp zogkZS+j~Bo6nT}F9^35hpWLx+JScJ@wj$-zxXDh_WW~lyr_NNMr-%E;|1bOX(ioMS2dbj>efFnq_HXu} zUF(@BAgJ(p6`@4qE9!0UA`f;4eRHyVW9Qjlb`cJuvFA!!?Q5=APc`~ixgV8!Gs+^_ zO9rZoMNJmYL!ScFVdhC2R2Ob5h3Bug_Z#AjBn4{{;vVEMkJ}bvCJfF4 z&NqE!Mmxqz&=Jv;Xknxu6)YI7;^iQGD6(NPI#-|o?p20Im zcgvNA*JUgkSN^m?v##9(WY`+$Ix`h7m-#9O*niy&{(>v{vl{<=*Q(wuGTtypUL6+d z89MB2SPZiKipuQEcqvsDM}|nT+I23mp}Ril2=zBUAN{S0>8$PUzT_#vkFC9Bwg#ud zr!s%DV%o8u-MY5C3flTt3+$~gzhUf{WV+ivLf>{kUX*w0Li83lKaJg<3rp%A_X(Fp zM)|*K3=VzN&;Ua}-vsX2%MEACFGQE*PFz~M|2}TZC`4@-*y8-DaC^J$a6#$ljK<+s zMWk-|qk%2hGtmHX)Kx=Y(h(m07DTp``uulKfeWs18>x4fm5Ye*{&Qc{%djaz+#jr@ z{pUwyO)oK{OQy|kbr-#(cRaJyncna^&0zhQv~t~Gz`OmYlU6Fv?Z19(kqlnEEP?qz zkRDq$IJ>2Ez}an01-dnANB-Hzd$%-3SEDe0Y_{&hx}+s%AElkGzp^G{?3=#gLreK^ zl`HRY$O%rwL_$*cQQM(Ht{OmClJM$fcS!qc8R4L+#f?_aU%cWvl6)*}t%z{c?~RQOUA)CVh{D4sp}nDtFdzwm*7!D6GG`&6-&`k+W05s19IIm+kh_4zw;mvZ!-sh39u--Fx^aK!^+^+6gnj)k zxy3cF_gu4<_($&TP^L#ew(-Y-C>f0(%l&(xoSzMM=VG3U9Hzouf+u2!;M>I);}K0K zDDC)UTPo%lAho8fCE^&3w~OVflp?EvVb1apv{TvI=wH+Hu&gaOB=YK?p$AtcQg^%c zcq=>0M_vjHVIXrR7ZoKx} zZR;ZR$EgbW0&Dxn5o#L4!r*-|F2ssts@ElK1Ar2;xF2M_y(fWj1^0jU z^0#C^z@bczL-vzA<2g{Jswus13mHs4SSiYyl235>WDO@3NBo~&ZM06wLxG)3Zr7+e z1oN(|XP3p@h!udB&a3adJ@a#?)II!;wEIoNyAE8e_<#Bh|Ns7V9TjHC+7Clh7^z z_Zot%C$8A;Njtdg&$|vPupEwm!D*K`Z2f{0k=?o<1ECnxD zsRLc@aSlKzeX^-4o(bq+{lIqp-*wSS-8YEW$M?1hq5=A3>jI^`+}A=<;AnF0I~NUk zpX$-2XHP13=(XK!#a+JkKiYfpcqrHJ-)UsBHIgk8W2=-hNJ>P=c4diD_QGTxV;M%m zP_~q8MJ3tyE!l1eF-Ry|wid?HawRiS4aWFA-0%I~^y|Ohz4!Cm#>Xu0%sKD#p7*TJ z^PF?WFHUw>oD|`p5kd$lO2;DaZ66l3Jam*WI({IqIJ)EEsr%Yf2yq}+lsyf=Kayxg ze`^Cl@^U^1=e=eEcEil1|y#yQF{zlsAR&HxPw}3IT0V9`jkeg!Rilzw02CC z%0YovQZV=H4>kgHxrr`RYx(qH_eU2d;xcd55pg)h=pyHvK82_TT*bNfp!l4_$#{@ik#rIL7I>BWr`h zZ0R}y+O@Y9?s9cz?uzb2n8?Eh!CLHx)Onu!D^OzxiKzrS_mqGnWp!yJgR^=Cnz#Xx z{%haETIowBeOc5;BRkXWKKdY$v4uo;y|&1ToLwT0XW3L6??4~_Zd$j{cIM_(NBDfP z)+U!b)#O3eFXQ#f?m~#$?Ypue<>rx`J2kA~s1s!@PK#$nU%#B(0$Q>%=kks?!R>#% z?+d0!=>}A@=`q#haSX5D>Bb=83-#Mu@~0cPcT!_6WxtrW8^NRoj)PseT9oDUk4uXh zF65ye*)^L~&1KHH&$!!*M4_i{%aVS8QJ;+4%QoUYcB=Nx zv3&<^sRQNdMib9@7`~veMc9Rl_61ysU+#_S_o@o3<8$B8OU-GL{yxQFDQR^yOgJb4 zRA){lQbZEcRP(bKvUFmui(>T%XI0Jh6i^Iy*t&7H#5u3m@|AjSR(DeLYc@TY)W^`9 z_OVFd$A_vlsm)9KfpVfOcYwnwu@*~{LP_t;$~_q#r0fGzGoLSEmFf{0#PkesHVHGJn;WlrUTArcFJzezH1X6Ifbu zOIeGo0m|^iUY+JQVzF(a{6mB}iRiCY+Vk42Q!K!9`G-fmofM-YP4QFNPuwdaL}i$8 zEVHml^i)iGNYYr7P{AE;n}xTO0jl@YL7LjK8r^8d67yUb}n;B1SMID0&LQTSl8jM2LYB21MT;!od0S<)P*KQFi)r zlJR9hyosd%@)KBb@{ghvNd-yekC~JdU0doIO>4h%c-V25FJ5I-NxK^CWzd?(%_22fTGu+_EgT* z4A(cR)Z*t?f_AzvxqERw);3R{z5Q*v?T+W)5z`S9nR zeUhvmW9V#<;;hViGn3@nM$@0zS(5l|k+qj!KX0jCM} zb*{?R6ViKoS`w3Y1T0PRtx8rbaNOD^FpS-mNamhZ!4dY&w2Vyx#X3 z8cDxoK;xHe+x6)y-s)iD=bavJgAX_SQ{&vM-=@kFKH5w)MRNPqXjG5zd`uz}d&TAs zEX%9hw9}p%eS|8gDZG9(ZFBWML0veStcnaj0=G(F@}LEkVs!dP-R?;?5O7tdRf8sE z`Mc+CP#a_YyFs*q(A*i8@gzWVVk0K&!}CfNy`a3>A_^`q{eHpzuoI1f8&zE^XS*Ou(40WwXHCIj|W zG{wq{wi2*mj;HVIrR1hQKhx$}Z1!{z#@hQtzY03@<%orjVCt#|q5LT9~#^9CRf4 zW4#J+r)EP*)Efh&h4M7`|J(zcjmTf$C<@eb%~h6iD5LqcMdj#@X4fP~sWJ4>h#Lp5 zV&z+=uJE=-b0j&C6!s{1fsU70+Ph^$0I-yyIbd5uR|^P9hbbAl| z#h-zon7vUFK@T3U84VMX%lz%UR!q}klWseR>I;kz5w0j)kP!x`-7SED|d2{=Ku>21^Nn;}?%_G`%KfRh7?Uk5yQ z_h()u*GRw2@(k-QGFKUjM7%AHQh_fCwJga7@PK;l1_G@?JE(l`!e#@+med&7K~-r6 zYcInxpDsCl2N?zG>Ni1a#9uTwfUyko7M2Jp1RCPAP>wL5eBymz0!a!Hk8um z!1j25;hp$k=AmwdG9SwHiF_Nd)$!(woKzfZSDQ!k=4f#ps9gG?@lG^pMQ;*kFpMi8(A0auy2H8WfA6JCIb=K@LqGEfr_WtGHq7v>fdw(Au*BSZIje| zS`Txn@*?5j;uiV#0y;L9Z*actl=!~$I-NxsJH$4bc|6l#JqsHoHK~td7P-HaGEj67 z-Y0<=>hC%Niy7Z{k&H}pjbb7OB+Pog9B@XNQ6HXpQIyO1ajOM!PgqQZ!tEQN z8MqCN{k_GW(vZsAd?2Pb17)9O`1;M8;CX>KD^|2DXTgw~{N=c#LZj4JzpKp9Bd(y^ zBNHrpPfbB*14Q3Zkj;!akWOQEWiWLiT=OV8I@Z}a19Z#p*&!+TRr=)>NWA#na;OT}rW368-s z$b;H0Gd#78kB)@cr%N1MEDu^?;;L8fYR_HQk~ifTOlIXYnY@vYr+k^BxgfL^jM*i& zR(Tu?dDKwnA?{L;m=&h(Q(tK|9&8G_tm;;JkI8Q=#(pU7+^883JCWPtp8U4eP;Be1 zCmhaSUSBeoYS=fF(zv-U;bY38vf~ToCF{G5%R&TK>V4L@Hnalrz~#4etIiO;R1+W8 z%(jQD5}$4bAz>h+4{Htw(BiI@jS#f50*BUkfID- z%lU3QSbUYEL5_WTm1Ja5+{eL~syOoS=8e7*3nKSaO&`<@R~rndYX}60y)CzL7{f`W zjPi|Rmpr~`I_S10<*AhKQDV5@O~&nU$1xRvjGLpt{U5p=yfGA^XDCc4aXfSZ&Z z#iNqb-Nj9M@nVncxsK*ZqFO1&hL8G2 ztfyPfj*a&YA+Cw;wGww1J`k){SgAL^jn*jp`TC*cdwA6;<+#ngk^8mKRgbwz!nN!; z1!iMYzP}e!*mhj;)wW9$IhY6+(`1$%w$DzeA^RmTN(Ta;6etEErh4W|yBlH$X$QOK zrp=lA-btr+-cV8VTSokhlV?83+&c~U)O7;{ZTX&)?eSjN|7Ybt#Wx_{+V!%mVAw2T z2^oLUpaPwcmKfNbY$({-Q&t;Whig{&CBAE6Rsc7>7gc-wS$nfmgk)i9!Mv(E%Dj*_ z45lt3Wy6vGb+`ZSfuL(D_P2S8iukhf;FryYznp49J$2Q^Qet$V%80LNZjFy^!?+q9UjQ}=p zdyEIhWH-@b1}~L#rXpFf<3(9BLW3W8iBI_%QrACWBy{XwR;eqa?0#j41$CiQBMRzI zE}LyPPTXYpkoCN43hwnGrBU7NG=1u^_d0uTmpy!WahLilpW&TU?{*~TFSAvbSyp@x zeKwwqL{J_&&7YYpa&!EKY!Sk4Vbe_AyDTM>5gUI?CJ`SPeDzyk|AdxL6irR;bNj1Z z2ekxE<#VqNhx)NPXnyLLjSB>t^LM=p+prIqls^oON@c8*C*a59K<{q-lluR9XQ_#I zCt%$PwkI!mV7;wZ`O{c`J%Yf_llQm)g4ckdIo2U40H?I|!n%25WLBSl1l&rWxxWAa zbimJq)&ag}T{C#U!xHgcu6B65>*~3Gibtz`LG=JrQW>8L#tF#v3l9fAbcftmtL0Ce@d-cDF7!5i3a z@FM>+^2s72UAF*D@kaPZ`U~=`VIeqSaahdSv)s=RtSrX(-2%f%{ZGTN*!I(5e1`>q ztUbgaH0qYX-3SO;{TEnwF;XHB^lx)ZaPq;~xu0`(lX3CDI{rwJADxyz@hoKjPRt)& zm>)5$xAT9(Lm+$hyJ7sx>i-D6UblV^-JAa%^dFkmSbrm6Gzj7Ze;j~$&K9kH`VTX7 B7yJMK diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 9b147d53c06c4..c79affaff3c1f 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -235,6 +235,16 @@ def test_index_col_label_error(self, ext): self.get_exceldf("test1", ext, "Sheet1", index_col=["A"], usecols=["A", "C"]) + def test_index_col_empty(self, ext): + # see gh-9208 + result = self.get_exceldf("test1", ext, "Sheet3", + index_col=["A", "B", "C"]) + expected = DataFrame(columns=["D", "E", "F"], + index=MultiIndex(levels=[[]] * 3, + labels=[[]] * 3, + names=["A", "B", "C"])) + tm.assert_frame_equal(result, expected) + def test_usecols_pass_non_existent_column(self, ext): msg = ("Usecols do not match columns, " "columns expected but not found: " + r"\['E'\]") From b3a3ac774f255bad4b255338fdefa8f96f5ab274 Mon Sep 17 00:00:00 2001 From: Stephen Childs Date: Tue, 13 Nov 2018 12:52:41 -0500 Subject: [PATCH 113/122] DOC: Update is_sparse docstring (#19983) * BUG: Identify SparseDataFrame as sparse The is_sparse function checks to see if an array-like is spare by checking to see if it is an instance of ABCSparseArray or ABCSparseSeries. This commit adds ABCSparseDataFrame to that list -- so it can detect that a DataFrame (which is an array-like object) is sparse. Added a test for this. * Revert "BUG: Identify SparseDataFrame as sparse" This reverts commit 10dffd1452b50af101f26b331cbfabd3ea217434. The previous commit's change was not necessary. Will add a docstring to clarify the behaviour of the method. * DOC: Revise is_sparce docstring Clean up the docstring for is_sparse so it confirms to the documentation style guide. Add additional examples and clarify that is_sparse expect a 1-dimensional array-like. * DOC: Adjust is_sparse docstring. Responding to pull request comments. --- pandas/core/dtypes/common.py | 47 ++++++++++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 94e9b72b001b1..a01266870b8fc 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -150,32 +150,59 @@ def is_object_dtype(arr_or_dtype): def is_sparse(arr): """ - Check whether an array-like is a pandas sparse array. + Check whether an array-like is a 1-D pandas sparse array. + + Check that the one-dimensional array-like is a pandas sparse array. + Returns True if it is a pandas sparse array, not another type of + sparse array. Parameters ---------- arr : array-like - The array-like to check. + Array-like to check. Returns ------- - boolean : Whether or not the array-like is a pandas sparse array. + bool + Whether or not the array-like is a pandas sparse array. + + See Also + -------- + DataFrame.to_sparse : Convert DataFrame to a SparseDataFrame. + Series.to_sparse : Convert Series to SparseSeries. + Series.to_dense : Return dense representation of a Series. Examples -------- - >>> is_sparse(np.array([1, 2, 3])) - False - >>> is_sparse(pd.SparseArray([1, 2, 3])) + Returns `True` if the parameter is a 1-D pandas sparse array. + + >>> is_sparse(pd.SparseArray([0, 0, 1, 0])) True - >>> is_sparse(pd.SparseSeries([1, 2, 3])) + >>> is_sparse(pd.SparseSeries([0, 0, 1, 0])) True - This function checks only for pandas sparse array instances, so - sparse arrays from other libraries will return False. + Returns `False` if the parameter is not sparse. + + >>> is_sparse(np.array([0, 0, 1, 0])) + False + >>> is_sparse(pd.Series([0, 1, 0, 0])) + False + + Returns `False` if the parameter is not a pandas sparse array. >>> from scipy.sparse import bsr_matrix - >>> is_sparse(bsr_matrix([1, 2, 3])) + >>> is_sparse(bsr_matrix([0, 1, 0, 0])) False + + Returns `False` if the parameter has more than one dimension. + + >>> df = pd.SparseDataFrame([389., 24., 80.5, np.nan], + columns=['max_speed'], + index=['falcon', 'parrot', 'lion', 'monkey']) + >>> is_sparse(df) + False + >>> is_sparse(df.max_speed) + True """ from pandas.core.arrays.sparse import SparseDtype From 7dab45fac2c9ed8c38039145d6dfcb542af41803 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Wed, 14 Nov 2018 04:59:24 -0800 Subject: [PATCH 114/122] DOC: Surface / doc mangle_dupe_cols in read_excel (#23678) xref gh-10523. --- pandas/io/excel.py | 14 +++++++++-- pandas/tests/io/test_excel.py | 46 ++++++++++++++++++++--------------- 2 files changed, 39 insertions(+), 21 deletions(-) diff --git a/pandas/io/excel.py b/pandas/io/excel.py index a7e0e48de0a75..3ce8953a6edb2 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -175,12 +175,16 @@ convert integral floats to int (i.e., 1.0 --> 1). If False, all numeric data will be read in as floats: Excel stores all numbers as floats internally +mangle_dupe_cols : boolean, default True + Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than + 'X'...'X'. Passing in False will cause data to be overwritten if there + are duplicate names in the columns. Returns ------- parsed : DataFrame or Dict of DataFrames - DataFrame from the passed in Excel file. See notes in sheet_name - argument for more information on when a Dict of Dataframes is returned. + DataFrame from the passed in Excel file. See notes in sheet_name + argument for more information on when a dict of DataFrames is returned. Examples -------- @@ -314,6 +318,7 @@ def read_excel(io, comment=None, skipfooter=0, convert_float=True, + mangle_dupe_cols=True, **kwds): # Can't use _deprecate_kwarg since sheetname=None has a special meaning @@ -349,6 +354,7 @@ def read_excel(io, comment=comment, skipfooter=skipfooter, convert_float=convert_float, + mangle_dupe_cols=mangle_dupe_cols, **kwds) @@ -441,6 +447,7 @@ def parse(self, comment=None, skipfooter=0, convert_float=True, + mangle_dupe_cols=True, **kwds): """ Parse specified sheet(s) into a DataFrame @@ -476,6 +483,7 @@ def parse(self, comment=comment, skipfooter=skipfooter, convert_float=convert_float, + mangle_dupe_cols=mangle_dupe_cols, **kwds) def _parse_excel(self, @@ -498,6 +506,7 @@ def _parse_excel(self, comment=None, skipfooter=0, convert_float=True, + mangle_dupe_cols=True, **kwds): _validate_header_arg(header) @@ -667,6 +676,7 @@ def _parse_cell(cell_contents, cell_typ): comment=comment, skipfooter=skipfooter, usecols=usecols, + mangle_dupe_cols=mangle_dupe_cols, **kwds) output[asheetname] = parser.read(nrows=nrows) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index c79affaff3c1f..a097e0adbeb7a 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -1846,33 +1846,41 @@ def roundtrip(data, header=True, parser_hdr=0, index=True): def test_duplicated_columns(self, *_): # see gh-5235 - write_frame = DataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3]]) - col_names = ["A", "B", "B"] - - write_frame.columns = col_names - write_frame.to_excel(self.path, "test1") + df = DataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3]], + columns=["A", "B", "B"]) + df.to_excel(self.path, "test1") + expected = DataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3]], + columns=["A", "B", "B.1"]) - read_frame = read_excel(self.path, "test1", index_col=0) - read_frame.columns = col_names + # By default, we mangle. + result = read_excel(self.path, "test1", index_col=0) + tm.assert_frame_equal(result, expected) - tm.assert_frame_equal(write_frame, read_frame) + # Explicitly, we pass in the parameter. + result = read_excel(self.path, "test1", index_col=0, + mangle_dupe_cols=True) + tm.assert_frame_equal(result, expected) # see gh-11007, gh-10970 - write_frame = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], - columns=["A", "B", "A", "B"]) - write_frame.to_excel(self.path, "test1") - - read_frame = read_excel(self.path, "test1", index_col=0) - read_frame.columns = ["A", "B", "A", "B"] + df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], + columns=["A", "B", "A", "B"]) + df.to_excel(self.path, "test1") - tm.assert_frame_equal(write_frame, read_frame) + result = read_excel(self.path, "test1", index_col=0) + expected = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], + columns=["A", "B", "A.1", "B.1"]) + tm.assert_frame_equal(result, expected) # see gh-10982 - write_frame.to_excel(self.path, "test1", index=False, header=False) - read_frame = read_excel(self.path, "test1", header=None) + df.to_excel(self.path, "test1", index=False, header=False) + result = read_excel(self.path, "test1", header=None) - write_frame.columns = [0, 1, 2, 3] - tm.assert_frame_equal(write_frame, read_frame) + expected = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]]) + tm.assert_frame_equal(result, expected) + + msg = "Setting mangle_dupe_cols=False is not supported yet" + with pytest.raises(ValueError, match=msg): + read_excel(self.path, "test1", header=None, mangle_dupe_cols=False) def test_swapped_columns(self, merge_cells, engine, ext): # Test for issue #5427. From 991547ef130956aebd6b14547c8168567537c639 Mon Sep 17 00:00:00 2001 From: justinchan23 <45015017+justinchan23@users.noreply.github.com> Date: Wed, 14 Nov 2018 08:04:32 -0500 Subject: [PATCH 115/122] Fix errorbar visualization (#23674) --- doc/source/visualization.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst index 569a6fb7b7a0d..dd8ccfcfd28ac 100644 --- a/doc/source/visualization.rst +++ b/doc/source/visualization.rst @@ -1405,7 +1405,7 @@ Here is an example of one way to easily plot group means with standard deviation # Plot fig, ax = plt.subplots() @savefig errorbar_example.png - means.plot.bar(yerr=errors, ax=ax) + means.plot.bar(yerr=errors, ax=ax, capsize=4) .. ipython:: python :suppress: From c8ac3bfb2b8c1ad5de7b6080b482377db04e8125 Mon Sep 17 00:00:00 2001 From: Myles Braithwaite Date: Wed, 14 Nov 2018 08:58:15 -0500 Subject: [PATCH 116/122] DOC: Accessing files from a S3 bucket. (#23639) --- doc/source/io.rst | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 34dc185c200e6..92fc28af0281a 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -1580,12 +1580,19 @@ You can pass in a URL to a CSV file: df = pd.read_csv('https://download.bls.gov/pub/time.series/cu/cu.item', sep='\t') -S3 URLs are handled as well: +S3 URLs are handled as well but require installing the `S3Fs +`_ library: .. code-block:: python df = pd.read_csv('s3://pandas-test/tips.csv') +If your S3 bucket requires cedentials you will need to set them as environment +variables or in the ``~/.aws/credentials`` config file, refer to the `S3Fs +documentation on credentials +`_. + + Writing out Data '''''''''''''''' From f9563eaaa64a85d5ab1ebf384db9f7e2e29edb0b Mon Sep 17 00:00:00 2001 From: gfyoung Date: Wed, 14 Nov 2018 06:24:48 -0800 Subject: [PATCH 117/122] REF: Move Excel names parameter handling to CSV (#23690) --- pandas/io/excel.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 3ce8953a6edb2..141d2c79a1927 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -660,6 +660,7 @@ def _parse_cell(cell_contents, cell_typ): # GH 12292 : error when read one empty column from excel file try: parser = TextParser(data, + names=names, header=header, index_col=index_col, has_index_names=has_index_names, @@ -681,9 +682,6 @@ def _parse_cell(cell_contents, cell_typ): output[asheetname] = parser.read(nrows=nrows) - if names is not None: - output[asheetname].columns = names - if not squeeze or isinstance(output[asheetname], DataFrame): output[asheetname].columns = output[ asheetname].columns.set_names(header_names) From 2688cbe2a20b41d199e9f8257384b9d719480eb4 Mon Sep 17 00:00:00 2001 From: Jeremy Schendel Date: Wed, 14 Nov 2018 07:26:24 -0700 Subject: [PATCH 118/122] BUG: Fix Series/DataFrame.rank(pct=True) with more than 2**24 rows (#23688) --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/_libs/algos_rank_helper.pxi.in | 4 ++-- pandas/tests/frame/test_rank.py | 7 +++++++ pandas/tests/series/test_rank.py | 7 +++++++ pandas/tests/test_algos.py | 9 +++++++++ 5 files changed, 26 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 19af38954e282..f6a27e4c68ce0 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1205,6 +1205,7 @@ Numeric - Bug in :class:`DataFrame` with ``timedelta64[ns]`` dtype arithmetic operations with ``ndarray`` with integer dtype incorrectly treating the narray as ``timedelta64[ns]`` dtype (:issue:`23114`) - Bug in :meth:`Series.rpow` with object dtype ``NaN`` for ``1 ** NA`` instead of ``1`` (:issue:`22922`). - :meth:`Series.agg` can now handle numpy NaN-aware methods like :func:`numpy.nansum` (:issue:`19629`) +- Bug in :meth:`Series.rank` and :meth:`DataFrame.rank` when ``pct=True`` and more than 2:sup:`24` rows are present resulted in percentages greater than 1.0 (:issue:`18271`) Strings ^^^^^^^ diff --git a/pandas/_libs/algos_rank_helper.pxi.in b/pandas/_libs/algos_rank_helper.pxi.in index 4d144dcf2808a..329c368e13d6d 100644 --- a/pandas/_libs/algos_rank_helper.pxi.in +++ b/pandas/_libs/algos_rank_helper.pxi.in @@ -53,7 +53,7 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', int tiebreak = 0 bint keep_na = 0 bint isnan - float count = 0.0 + float64_t count = 0.0 tiebreak = tiebreakers[ties_method] {{if dtype == 'float64'}} @@ -228,7 +228,7 @@ def rank_2d_{{dtype}}(object in_arr, axis=0, ties_method='average', float64_t sum_ranks = 0 int tiebreak = 0 bint keep_na = 0 - float count = 0.0 + float64_t count = 0.0 tiebreak = tiebreakers[ties_method] diff --git a/pandas/tests/frame/test_rank.py b/pandas/tests/frame/test_rank.py index 078c48539de16..eaba5f7ec7790 100644 --- a/pandas/tests/frame/test_rank.py +++ b/pandas/tests/frame/test_rank.py @@ -309,3 +309,10 @@ def test_rank_pct_true(self, method, exp): expected = DataFrame(exp) tm.assert_frame_equal(result, expected) + + def test_pct_max_many_rows(self): + # GH 18271 + df = DataFrame({'A': np.arange(2**24 + 1), + 'B': np.arange(2**24 + 1, 0, -1)}) + result = df.rank(pct=True).max() + assert (result == 1).all() diff --git a/pandas/tests/series/test_rank.py b/pandas/tests/series/test_rank.py index 9772ceecfc7b1..5b0ea37a0bfcf 100644 --- a/pandas/tests/series/test_rank.py +++ b/pandas/tests/series/test_rank.py @@ -495,3 +495,10 @@ def test_rank_first_pct(dtype, ser, exp): result = s.rank(method='first', pct=True) expected = Series(exp).astype(result.dtype) assert_series_equal(result, expected) + + +def test_pct_max_many_rows(): + # GH 18271 + s = Series(np.arange(2**24 + 1)) + result = s.rank(pct=True).max() + assert result == 1 diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 3642c4ee98a9e..ff505f2986b1a 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1462,6 +1462,15 @@ def test_too_many_ndims(self): with pytest.raises(TypeError, match=msg): algos.rank(arr) + @pytest.mark.parametrize('values', [ + np.arange(2**24 + 1), + np.arange(2**25 + 2).reshape(2**24 + 1, 2)], + ids=['1d', '2d']) + def test_pct_max_many_rows(self, values): + # GH 18271 + result = algos.rank(values, pct=True).max() + assert result == 1 + def test_pad_backfill_object_segfault(): From d0adfb077d267b3ae28face16138f396e4361eec Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 14 Nov 2018 09:50:31 -0500 Subject: [PATCH 119/122] CI: raise clone depth limit on CI --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 9fac09e1fa788..6d31adcbf8a43 100644 --- a/.travis.yml +++ b/.travis.yml @@ -23,7 +23,7 @@ env: git: # for cloning - depth: 1000 + depth: 1500 matrix: fast_finish: true From f452c40323e0fdabf7a667d3d018d7f33c956144 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 14 Nov 2018 07:23:02 -0800 Subject: [PATCH 120/122] Implement _most_ of the EA interface for DTA/TDA (#23643) --- doc/source/whatsnew/v0.24.0.txt | 2 + pandas/core/arrays/datetimelike.py | 63 ++++++++++- pandas/core/arrays/datetimes.py | 28 ++++- pandas/core/arrays/period.py | 56 +++------ pandas/core/arrays/timedeltas.py | 14 ++- pandas/core/dtypes/concat.py | 8 +- pandas/core/indexes/datetimelike.py | 23 ++-- pandas/core/indexes/datetimes.py | 17 ++- pandas/tests/arrays/test_datetimelike.py | 138 ++++++++++++++++++++++- 9 files changed, 272 insertions(+), 77 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index f6a27e4c68ce0..a7fb2da3db2f7 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1313,6 +1313,8 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form - :func:`read_sas()` will correctly parse sas7bdat files with data page types having also bit 7 set (so page type is 128 + 256 = 384) (:issue:`16615`) - Bug in :meth:`detect_client_encoding` where potential ``IOError`` goes unhandled when importing in a mod_wsgi process due to restricted access to stdout. (:issue:`21552`) - Bug in :func:`to_string()` that broke column alignment when ``index=False`` and width of first column's values is greater than the width of first column's header (:issue:`16839`, :issue:`13032`) +- Bug in :func:`DataFrame.to_string()` that broke column alignment when ``index=False`` and width of first column's values is greater than the width of first column's header (:issue:`16839`, :issue:`13032`) +- Bug in :func:`DataFrame.to_string()` that caused representations of :class:`DataFrame` to not take up the whole window (:issue:`22984`) - Bug in :func:`DataFrame.to_csv` where a single level MultiIndex incorrectly wrote a tuple. Now just the value of the index is written (:issue:`19589`). - Bug in :meth:`HDFStore.append` when appending a :class:`DataFrame` with an empty string column and ``min_itemsize`` < 8 (:issue:`12242`) - Bug in :func:`to_string()` that caused representations of :class:`DataFrame` to not take up the whole window (:issue:`22984`) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index daf2dcccd284b..094c9c3df0bed 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -39,7 +39,7 @@ from pandas.core.dtypes.missing import isna import pandas.core.common as com -from pandas.core.algorithms import checked_add_with_arr +from pandas.core.algorithms import checked_add_with_arr, take, unique1d from .base import ExtensionOpsMixin from pandas.util._decorators import deprecate_kwarg @@ -196,6 +196,67 @@ def astype(self, dtype, copy=True): return self._box_values(self.asi8) return super(DatetimeLikeArrayMixin, self).astype(dtype, copy) + # ------------------------------------------------------------------ + # ExtensionArray Interface + # TODO: + # * _from_sequence + # * argsort / _values_for_argsort + # * _reduce + + def unique(self): + result = unique1d(self.asi8) + return type(self)(result, dtype=self.dtype) + + def _validate_fill_value(self, fill_value): + """ + If a fill_value is passed to `take` convert it to an i8 representation, + raising ValueError if this is not possible. + + Parameters + ---------- + fill_value : object + + Returns + ------- + fill_value : np.int64 + + Raises + ------ + ValueError + """ + raise AbstractMethodError(self) + + def take(self, indices, allow_fill=False, fill_value=None): + if allow_fill: + fill_value = self._validate_fill_value(fill_value) + + new_values = take(self.asi8, + indices, + allow_fill=allow_fill, + fill_value=fill_value) + + return type(self)(new_values, dtype=self.dtype) + + @classmethod + def _concat_same_type(cls, to_concat): + dtypes = {x.dtype for x in to_concat} + assert len(dtypes) == 1 + dtype = list(dtypes)[0] + + values = np.concatenate([x.asi8 for x in to_concat]) + return cls(values, dtype=dtype) + + def copy(self, deep=False): + values = self.asi8.copy() + return type(self)(values, dtype=self.dtype, freq=self.freq) + + def _values_for_factorize(self): + return self.asi8, iNaT + + @classmethod + def _from_factorized(cls, values, original): + return cls(values, dtype=original.dtype) + # ------------------------------------------------------------------ # Null Handling diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 926228f267049..7b4e362ac9fa0 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -12,7 +12,7 @@ conversion, fields, timezones, resolution as libresolution) -from pandas.util._decorators import cache_readonly +from pandas.util._decorators import cache_readonly, Appender from pandas.errors import PerformanceWarning from pandas import compat @@ -21,8 +21,7 @@ is_object_dtype, is_int64_dtype, is_datetime64tz_dtype, - is_datetime64_dtype, - ensure_int64) + is_datetime64_dtype) from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.missing import isna from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries @@ -294,7 +293,7 @@ def _generate_range(cls, start, end, periods, freq, tz=None, if tz is not None and index.tz is None: arr = conversion.tz_localize_to_utc( - ensure_int64(index.values), + index.asi8, tz, ambiguous=ambiguous) index = cls(arr) @@ -317,7 +316,7 @@ def _generate_range(cls, start, end, periods, freq, tz=None, if not right_closed and len(index) and index[-1] == end: index = index[:-1] - return cls._simple_new(index.values, freq=freq, tz=tz) + return cls._simple_new(index.asi8, freq=freq, tz=tz) # ----------------------------------------------------------------- # Descriptive Properties @@ -419,6 +418,25 @@ def __iter__(self): for v in converted: yield v + # ---------------------------------------------------------------- + # ExtensionArray Interface + + @property + def _ndarray_values(self): + return self._data + + @Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__) + def _validate_fill_value(self, fill_value): + if isna(fill_value): + fill_value = iNaT + elif isinstance(fill_value, (datetime, np.datetime64)): + self._assert_tzawareness_compat(fill_value) + fill_value = Timestamp(fill_value).value + else: + raise ValueError("'fill_value' should be a Timestamp. " + "Got '{got}'.".format(got=fill_value)) + return fill_value + # ----------------------------------------------------------------- # Comparison Methods diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index faba404faeb23..e46b00da6161e 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -216,14 +216,6 @@ def _from_sequence(cls, scalars, dtype=None, copy=False): ordinals = libperiod.extract_ordinals(periods, freq) return cls(ordinals, freq=freq) - def _values_for_factorize(self): - return self.asi8, iNaT - - @classmethod - def _from_factorized(cls, values, original): - # type: (Sequence[Optional[Period]], PeriodArray) -> PeriodArray - return cls(values, freq=original.freq) - @classmethod def _from_datetime64(cls, data, freq, tz=None): """Construct a PeriodArray from a datetime64 array @@ -262,14 +254,6 @@ def _generate_range(cls, start, end, periods, freq, fields): return subarr, freq - @classmethod - def _concat_same_type(cls, to_concat): - freq = {x.freq for x in to_concat} - assert len(freq) == 1 - freq = list(freq)[0] - values = np.concatenate([x._data for x in to_concat]) - return cls(values, freq=freq) - # -------------------------------------------------------------------- # Data / Attributes @@ -415,29 +399,20 @@ def __setitem__( raise TypeError(msg) self._data[key] = value - def take(self, indices, allow_fill=False, fill_value=None): - if allow_fill: - if isna(fill_value): - fill_value = iNaT - elif isinstance(fill_value, Period): - if self.freq != fill_value.freq: - msg = DIFFERENT_FREQ_INDEX.format( - self.freq.freqstr, - fill_value.freqstr - ) - raise IncompatibleFrequency(msg) - - fill_value = fill_value.ordinal - else: - msg = "'fill_value' should be a Period. Got '{}'." - raise ValueError(msg.format(fill_value)) - - new_values = algos.take(self._data, - indices, - allow_fill=allow_fill, - fill_value=fill_value) - - return type(self)(new_values, self.freq) + @Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__) + def _validate_fill_value(self, fill_value): + if isna(fill_value): + fill_value = iNaT + elif isinstance(fill_value, Period): + if fill_value.freq != self.freq: + msg = DIFFERENT_FREQ_INDEX.format(self.freq.freqstr, + fill_value.freqstr) + raise IncompatibleFrequency(msg) + fill_value = fill_value.ordinal + else: + raise ValueError("'fill_value' should be a Period. " + "Got '{got}'.".format(got=fill_value)) + return fill_value def fillna(self, value=None, method=None, limit=None): # TODO(#20300) @@ -474,9 +449,6 @@ def fillna(self, value=None, method=None, limit=None): new_values = self.copy() return new_values - def copy(self, deep=False): - return type(self)(self._data.copy(), freq=self.freq) - def value_counts(self, dropna=False): from pandas import Series, PeriodIndex diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 9dbdd6ff8b562..ad564ca34930f 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -9,6 +9,7 @@ from pandas._libs.tslibs.fields import get_timedelta_field from pandas._libs.tslibs.timedeltas import ( array_to_timedelta64, parse_timedelta_unit) +from pandas.util._decorators import Appender from pandas import compat @@ -139,7 +140,7 @@ def _simple_new(cls, values, freq=None, dtype=_TD_DTYPE): result._freq = freq return result - def __new__(cls, values, freq=None): + def __new__(cls, values, freq=None, dtype=_TD_DTYPE): freq, freq_infer = dtl.maybe_infer_freq(freq) @@ -193,6 +194,17 @@ def _generate_range(cls, start, end, periods, freq, closed=None): # ---------------------------------------------------------------- # Array-Like / EA-Interface Methods + @Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__) + def _validate_fill_value(self, fill_value): + if isna(fill_value): + fill_value = iNaT + elif isinstance(fill_value, (timedelta, np.timedelta64, Tick)): + fill_value = Timedelta(fill_value).value + else: + raise ValueError("'fill_value' should be a Timedelta. " + "Got '{got}'.".format(got=fill_value)) + return fill_value + # ---------------------------------------------------------------- # Arithmetic Methods diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index bb4ab823069ee..ebfb41825ae0a 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -476,13 +476,7 @@ def _concat_datetimetz(to_concat, name=None): all inputs must be DatetimeIndex it is used in DatetimeIndex.append also """ - # do not pass tz to set because tzlocal cannot be hashed - if len({str(x.dtype) for x in to_concat}) != 1: - raise ValueError('to_concat must have the same tz') - tz = to_concat[0].tz - # no need to localize because internal repr will not be changed - new_values = np.concatenate([x.asi8 for x in to_concat]) - return to_concat[0]._simple_new(new_values, tz=tz, name=name) + return to_concat[0]._concat_same_dtype(to_concat, name=name) def _concat_index_same_dtype(indexes, klass=None): diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 3f9a60f6d5c51..39bc7f4b85de2 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -18,7 +18,6 @@ is_datetime_or_timedelta_dtype, is_dtype_equal, is_float, is_float_dtype, is_integer, is_integer_dtype, is_list_like, is_object_dtype, is_period_dtype, is_scalar, is_string_dtype) -import pandas.core.dtypes.concat as _concat from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries from pandas.core.dtypes.missing import isna @@ -215,6 +214,11 @@ def ceil(self, freq, ambiguous='raise', nonexistent='raise'): class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin): """ common ops mixin to support a unified interface datetimelike Index """ + # override DatetimeLikeArrayMixin method + copy = Index.copy + unique = Index.unique + take = Index.take + # DatetimeLikeArrayMixin assumes subclasses are mutable, so these are # properties there. They can be made into cache_readonly for Index # subclasses bc they are immutable @@ -685,17 +689,21 @@ def _concat_same_dtype(self, to_concat, name): """ attribs = self._get_attributes_dict() attribs['name'] = name + # do not pass tz to set because tzlocal cannot be hashed + if len({str(x.dtype) for x in to_concat}) != 1: + raise ValueError('to_concat must have the same tz') if not is_period_dtype(self): # reset freq attribs['freq'] = None - - if getattr(self, 'tz', None) is not None: - return _concat._concat_datetimetz(to_concat, name) + # TODO(DatetimeArray) + # - remove the .asi8 here + # - remove the _maybe_box_as_values + # - combine with the `else` block + new_data = self._concat_same_type(to_concat).asi8 else: - new_data = np.concatenate([c.asi8 for c in to_concat]) + new_data = type(self._values)._concat_same_type(to_concat) - new_data = self._maybe_box_as_values(new_data, **attribs) return self._simple_new(new_data, **attribs) def _maybe_box_as_values(self, values, **attribs): @@ -704,7 +712,6 @@ def _maybe_box_as_values(self, values, **attribs): # but others are not. When everyone is an ExtensionArray, this can # be removed. Currently used in # - sort_values - # - _concat_same_dtype return values def astype(self, dtype, copy=True): @@ -761,7 +768,7 @@ def _ensure_datetimelike_to_i8(other, to_utc=False): try: return np.array(other, copy=False).view('i8') except TypeError: - # period array cannot be coerces to int + # period array cannot be coerced to int other = Index(other) return other.asi8 diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index b754b2705d034..23446a57e7789 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -551,16 +551,13 @@ def snap(self, freq='S'): # TODO: what about self.name? if so, use shallow_copy? def unique(self, level=None): - # Override here since IndexOpsMixin.unique uses self._values.unique - # For DatetimeIndex with TZ, that's a DatetimeIndex -> recursion error - # So we extract the tz-naive DatetimeIndex, unique that, and wrap the - # result with out TZ. - if self.tz is not None: - naive = type(self)(self._ndarray_values, copy=False) - else: - naive = self - result = super(DatetimeIndex, naive).unique(level=level) - return self._shallow_copy(result.values) + if level is not None: + self._validate_index_level(level) + + # TODO(DatetimeArray): change dispatch once inheritance is removed + # call DatetimeArray method + result = DatetimeArray.unique(self) + return self._shallow_copy(result._data) def union(self, other): """ diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index bb4022c9cac9a..a1242e2481fed 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -56,7 +56,68 @@ def timedelta_index(request): return pd.TimedeltaIndex(['1 Day', '3 Hours', 'NaT']) -class TestDatetimeArray(object): +class SharedTests(object): + index_cls = None + + def test_take(self): + data = np.arange(100, dtype='i8') + np.random.shuffle(data) + + idx = self.index_cls._simple_new(data, freq='D') + arr = self.array_cls(idx) + + takers = [1, 4, 94] + result = arr.take(takers) + expected = idx.take(takers) + + tm.assert_index_equal(self.index_cls(result), expected) + + takers = np.array([1, 4, 94]) + result = arr.take(takers) + expected = idx.take(takers) + + tm.assert_index_equal(self.index_cls(result), expected) + + def test_take_fill(self): + data = np.arange(10, dtype='i8') + + idx = self.index_cls._simple_new(data, freq='D') + arr = self.array_cls(idx) + + result = arr.take([-1, 1], allow_fill=True, fill_value=None) + assert result[0] is pd.NaT + + result = arr.take([-1, 1], allow_fill=True, fill_value=np.nan) + assert result[0] is pd.NaT + + result = arr.take([-1, 1], allow_fill=True, fill_value=pd.NaT) + assert result[0] is pd.NaT + + with pytest.raises(ValueError): + arr.take([0, 1], allow_fill=True, fill_value=2) + + with pytest.raises(ValueError): + arr.take([0, 1], allow_fill=True, fill_value=2.0) + + with pytest.raises(ValueError): + arr.take([0, 1], allow_fill=True, + fill_value=pd.Timestamp.now().time) + + def test_concat_same_type(self): + data = np.arange(10, dtype='i8') + + idx = self.index_cls._simple_new(data, freq='D').insert(0, pd.NaT) + arr = self.array_cls(idx) + + result = arr._concat_same_type([arr[:-1], arr[1:], arr]) + expected = idx._concat_same_dtype([idx[:-1], idx[1:], idx], None) + + tm.assert_index_equal(self.index_cls(result), expected) + + +class TestDatetimeArray(SharedTests): + index_cls = pd.DatetimeIndex + array_cls = DatetimeArray def test_array_object_dtype(self, tz_naive_fixture): # GH#23524 @@ -175,8 +236,60 @@ def test_int_properties(self, datetime_index, propname): tm.assert_numpy_array_equal(result, expected) + def test_take_fill_valid(self, datetime_index, tz_naive_fixture): + dti = datetime_index.tz_localize(tz_naive_fixture) + arr = DatetimeArray(dti) + + now = pd.Timestamp.now().tz_localize(dti.tz) + result = arr.take([-1, 1], allow_fill=True, fill_value=now) + assert result[0] == now + + with pytest.raises(ValueError): + # fill_value Timedelta invalid + arr.take([-1, 1], allow_fill=True, fill_value=now - now) + + with pytest.raises(ValueError): + # fill_value Period invalid + arr.take([-1, 1], allow_fill=True, fill_value=pd.Period('2014Q1')) + + tz = None if dti.tz is not None else 'US/Eastern' + now = pd.Timestamp.now().tz_localize(tz) + with pytest.raises(TypeError): + # Timestamp with mismatched tz-awareness + arr.take([-1, 1], allow_fill=True, fill_value=now) + + def test_concat_same_type_invalid(self, datetime_index): + # different timezones + dti = datetime_index + arr = DatetimeArray(dti) + + if arr.tz is None: + other = arr.tz_localize('UTC') + else: + other = arr.tz_localize(None) + + with pytest.raises(AssertionError): + arr._concat_same_type([arr, other]) + + def test_concat_same_type_different_freq(self): + # we *can* concatentate DTI with different freqs. + a = DatetimeArray(pd.date_range('2000', periods=2, freq='D', + tz='US/Central')) + b = DatetimeArray(pd.date_range('2000', periods=2, freq='H', + tz='US/Central')) + result = DatetimeArray._concat_same_type([a, b]) + expected = DatetimeArray(pd.to_datetime([ + '2000-01-01 00:00:00', '2000-01-02 00:00:00', + '2000-01-01 00:00:00', '2000-01-01 01:00:00', + ]).tz_localize("US/Central")) + + tm.assert_datetime_array_equal(result, expected) + + +class TestTimedeltaArray(SharedTests): + index_cls = pd.TimedeltaIndex + array_cls = TimedeltaArray -class TestTimedeltaArray(object): def test_from_tdi(self): tdi = pd.TimedeltaIndex(['1 Day', '3 Hours']) arr = TimedeltaArray(tdi) @@ -223,8 +336,27 @@ def test_int_properties(self, timedelta_index, propname): tm.assert_numpy_array_equal(result, expected) + def test_take_fill_valid(self, timedelta_index): + tdi = timedelta_index + arr = TimedeltaArray(tdi) + + td1 = pd.Timedelta(days=1) + result = arr.take([-1, 1], allow_fill=True, fill_value=td1) + assert result[0] == td1 + + now = pd.Timestamp.now() + with pytest.raises(ValueError): + # fill_value Timestamp invalid + arr.take([0, 1], allow_fill=True, fill_value=now) + + with pytest.raises(ValueError): + # fill_value Period invalid + arr.take([0, 1], allow_fill=True, fill_value=now.to_period('D')) + -class TestPeriodArray(object): +class TestPeriodArray(SharedTests): + index_cls = pd.PeriodIndex + array_cls = PeriodArray def test_from_pi(self, period_index): pi = period_index From 8459936adc84351619cf1d11912a8ab16d9eb8be Mon Sep 17 00:00:00 2001 From: JustinZhengBC Date: Wed, 14 Nov 2018 09:13:15 -0800 Subject: [PATCH 121/122] reapply changes --- doc/source/whatsnew/v0.24.0.txt | 1 - pandas/tests/io/formats/test_format.py | 7 ++----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 956ab47bceda7..e489f6afbb2a8 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1312,7 +1312,6 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form - :func:`read_sas()` will correctly parse sas7bdat files with many columns (:issue:`22628`) - :func:`read_sas()` will correctly parse sas7bdat files with data page types having also bit 7 set (so page type is 128 + 256 = 384) (:issue:`16615`) - Bug in :meth:`detect_client_encoding` where potential ``IOError`` goes unhandled when importing in a mod_wsgi process due to restricted access to stdout. (:issue:`21552`) -- Bug in :func:`to_string()` that broke column alignment when ``index=False`` and width of first column's values is greater than the width of first column's header (:issue:`16839`, :issue:`13032`) - Bug in :func:`DataFrame.to_string()` that broke column alignment when ``index=False`` and width of first column's values is greater than the width of first column's header (:issue:`16839`, :issue:`13032`) - Bug in :func:`DataFrame.to_string()` that caused representations of :class:`DataFrame` to not take up the whole window (:issue:`22984`) - Bug in :func:`DataFrame.to_csv` where a single level MultiIndex incorrectly wrote a tuple. Now just the value of the index is written (:issue:`19589`). diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 148ef9cf83b85..25c8d2859b13f 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -305,14 +305,10 @@ def test_repr_non_interactive(self): assert not has_truncated_repr(df) assert not has_expanded_repr(df) - def test_repr_truncates_terminal_size(self): + def test_repr_truncates_terminal_size(self, mock): # https://github.com/pandas-dev/pandas/issues/21180 # TODO: use mock fixutre. # This is being backported, so doing it directly here. - try: - from unittest import mock - except ImportError: - mock = pytest.importorskip("mock") terminal_size = (118, 96) p1 = mock.patch('pandas.io.formats.console.get_terminal_size', @@ -343,6 +339,7 @@ def test_repr_truncates_terminal_size(self): assert df2.columns[0] in result.split('\n')[0] + def test_repr_truncates_terminal_size(self, mock): # GH 22984 ensure entire window is filled terminal_size = (80, 24) df = pd.DataFrame(np.random.rand(1, 7)) From 0f7aa4bab11b74a14bf8ea83e8740497631296aa Mon Sep 17 00:00:00 2001 From: JustinZhengBC Date: Wed, 14 Nov 2018 09:18:49 -0800 Subject: [PATCH 122/122] r --- pandas/tests/io/formats/test_format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 25c8d2859b13f..b8ca8cb73c7e9 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -339,7 +339,7 @@ def test_repr_truncates_terminal_size(self, mock): assert df2.columns[0] in result.split('\n')[0] - def test_repr_truncates_terminal_size(self, mock): + def test_repr_truncates_terminal_size_full(self, mock): # GH 22984 ensure entire window is filled terminal_size = (80, 24) df = pd.DataFrame(np.random.rand(1, 7))